From 926c4c223e6ce249c09c01e406dbefc74e55a577 Mon Sep 17 00:00:00 2001 From: Vance Ingalls Date: Thu, 14 May 2026 01:51:26 +0000 Subject: [PATCH 1/3] fix(engine): force screenshot capture when GPU is software (broader than HDR-only guard) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the host has no hardware GPU (SwiftShader software rendering), Chrome's `HeadlessExperimental.beginFrame` stalls the compositor on shader-heavy frames — the CDP call hangs and the render eventually fails with `beginFrame timed out` at protocolTimeout (default 5 min). This was reproduced on a CPU-only Linux sandbox during the hf#677 BeginFrame spike and is likely the same root cause as the earlier Mac-Docker timeout that the HDR-only guard in `captureHdrStage.ts` partially mitigated. The existing per-stage guard (`cfg.forceScreenshot = true` for the layered HDR composite) is too narrow — the issue is GPU-presence, not HDR-vs-SDR. SDR renders also stall on a software-renderer host. Fix: in `acquireBrowser`, resolve `browserGpuMode` (cheap — the probe Promise is cached for the process lifetime) and force `captureMode = "screenshot"` whenever it resolves to "software", regardless of platform or binary. Defensively strip BeginFrame-only chrome flags (`--enable-begin-frame-control` etc.) from the launch args in the same path — leaving them in screenshot mode produces blank captures because the compositor waits for beginFrames the engine no longer sends. Mirror the same gate in `frameCapture.ts`'s preMode computation so the chromeArgs are built without BeginFrame flags up front (the strip in `acquireBrowser` is belt-and-braces for any other caller). A one-time `console.warn` documents the resolution path for operators when the guard actually changed the outcome (Linux + headless-shell only — silent on the always-screenshot platforms and on hardware hosts). Note: on `main` as-is the layered HDR path already forces screenshot mode unconditionally, so the SDR-software-renderer case is the load-bearing one this PR fixes. The broader guard also keeps the engine safe once any future BeginFrame-alpha work lands on top. Tests cover: software → screenshot + BeginFrame flag stripping, hardware → beginframe preserved, forceScreenshot=true still wins, one-time software-GPU warning, no warn on macOS (already screenshot anyway). — Vai --- .../src/services/browserManager.test.ts | 155 ++++++++++++++++++ .../engine/src/services/browserManager.ts | 48 +++++- packages/engine/src/services/frameCapture.ts | 10 +- 3 files changed, 208 insertions(+), 5 deletions(-) diff --git a/packages/engine/src/services/browserManager.test.ts b/packages/engine/src/services/browserManager.test.ts index 08af34802..7b6bcfd58 100644 --- a/packages/engine/src/services/browserManager.test.ts +++ b/packages/engine/src/services/browserManager.test.ts @@ -128,6 +128,161 @@ describe("resolveBrowserGpuMode", () => { }); }); +describe("acquireBrowser — software-renderer guard", () => { + // Tests for the defense-in-depth rule that a "software"-resolved GPU mode + // must force `captureMode = "screenshot"` regardless of platform/binary. + // Without this guard, BeginFrame attempts on a software-rendered compositor + // hang at the CDP protocolTimeout (the hf#677 spike repro). Tests below + // mock puppeteer at the module-import level so no real Chrome is launched. + + const origPlatform = process.platform; + + // Captures the most recent puppeteer.launch call so tests can assert on the + // args/executablePath actually passed to Chrome. + let lastLaunchArgs: string[] | undefined; + let lastLaunchExecutablePath: string | undefined; + + function installPuppeteerMock() { + const browserStub = { + close: vi.fn().mockResolvedValue(undefined), + newPage: vi.fn().mockResolvedValue({ + close: vi.fn().mockResolvedValue(undefined), + createCDPSession: vi.fn().mockResolvedValue({ + send: vi.fn().mockResolvedValue(undefined), + detach: vi.fn().mockResolvedValue(undefined), + }), + }), + }; + const launch = vi.fn(async (opts: { args: string[]; executablePath?: string }) => { + lastLaunchArgs = opts.args; + lastLaunchExecutablePath = opts.executablePath; + return browserStub; + }); + // Both `puppeteer` and `puppeteer-core` are tried by getPuppeteer() — mock + // both so it doesn't matter which one resolves first. + vi.doMock("puppeteer", () => ({ default: { launch } })); + vi.doMock("puppeteer-core", () => ({ default: { launch } })); + return { launch, browserStub }; + } + + beforeEach(() => { + vi.resetModules(); + lastLaunchArgs = undefined; + lastLaunchExecutablePath = undefined; + Object.defineProperty(process, "platform", { value: "linux", configurable: true }); + }); + + afterEach(() => { + Object.defineProperty(process, "platform", { value: origPlatform, configurable: true }); + vi.restoreAllMocks(); + vi.doUnmock("puppeteer"); + vi.doUnmock("puppeteer-core"); + }); + + it("forces screenshot capture mode when browserGpuMode resolves to 'software' on Linux", async () => { + installPuppeteerMock(); + const { acquireBrowser: acquire } = await import("./browserManager.js"); + + // chromePath points to a fake chrome-headless-shell so the Linux + binary + // preconditions for beginframe are satisfied — the software-GPU guard is + // the only thing that should flip captureMode to screenshot. + const chromeArgs = [ + "--no-sandbox", + "--enable-begin-frame-control", + "--deterministic-mode", + "--run-all-compositor-stages-before-draw", + ]; + const result = await acquire(chromeArgs, { + chromePath: "/fake/chrome-headless-shell", + browserGpuMode: "software", + }); + + expect(result.captureMode).toBe("screenshot"); + // BeginFrame-only flags MUST be stripped from the launched args — leaving + // `--enable-begin-frame-control` in produces blank screenshots because the + // compositor waits for beginFrames we'll never send. + expect(lastLaunchArgs).not.toContain("--enable-begin-frame-control"); + expect(lastLaunchArgs).not.toContain("--deterministic-mode"); + expect(lastLaunchArgs).not.toContain("--run-all-compositor-stages-before-draw"); + expect(lastLaunchArgs).toContain("--no-sandbox"); + }); + + it("keeps beginframe capture mode when browserGpuMode is 'hardware' on Linux with headless-shell", async () => { + installPuppeteerMock(); + const { acquireBrowser: acquire } = await import("./browserManager.js"); + + const chromeArgs = ["--no-sandbox", "--enable-begin-frame-control"]; + const result = await acquire(chromeArgs, { + chromePath: "/fake/chrome-headless-shell", + browserGpuMode: "hardware", + }); + + // Hardware GPU + Linux + headless-shell + no forceScreenshot → beginframe. + // (The post-launch probe normally checks HeadlessExperimental.enable; our + // mock CDP session resolves it successfully, so the probe passes.) + expect(result.captureMode).toBe("beginframe"); + // BeginFrame flags preserved in the launched args. + expect(lastLaunchArgs).toContain("--enable-begin-frame-control"); + expect(lastLaunchExecutablePath).toBe("/fake/chrome-headless-shell"); + }); + + it("forces screenshot mode when forceScreenshot=true regardless of GPU mode", async () => { + // Existing behavior — the new software-GPU branch must not regress this. + installPuppeteerMock(); + const { acquireBrowser: acquire } = await import("./browserManager.js"); + + const chromeArgs = ["--no-sandbox", "--enable-begin-frame-control"]; + const result = await acquire(chromeArgs, { + chromePath: "/fake/chrome-headless-shell", + browserGpuMode: "hardware", + forceScreenshot: true, + }); + + expect(result.captureMode).toBe("screenshot"); + expect(lastLaunchArgs).not.toContain("--enable-begin-frame-control"); + }); + + it("emits a one-time software-GPU warning when the guard actually changed the outcome", async () => { + installPuppeteerMock(); + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const { acquireBrowser: acquire, releaseBrowser: release } = + await import("./browserManager.js"); + + const chromeArgs = ["--no-sandbox", "--enable-begin-frame-control"]; + const acquired = await acquire(chromeArgs, { + chromePath: "/fake/chrome-headless-shell", + browserGpuMode: "software", + }); + // release so subsequent acquires don't reuse a pooled browser (we have + // pooling off by default but be explicit). + await release(acquired.browser); + + const messages = warnSpy.mock.calls.map((c) => String(c[0])); + const softwareWarn = messages.find((m) => m.includes("Software GPU detected")); + expect(softwareWarn).toBeDefined(); + expect(softwareWarn).toContain("screenshot"); + }); + + it("does NOT emit the software-GPU warning on macOS (already screenshot anyway)", async () => { + // macOS uses screenshot mode unconditionally — the software-GPU guard + // didn't change the outcome, so the warn should stay silent. + Object.defineProperty(process, "platform", { value: "darwin", configurable: true }); + installPuppeteerMock(); + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const { acquireBrowser: acquire, releaseBrowser: release } = + await import("./browserManager.js"); + + const acquired = await acquire(["--no-sandbox"], { + chromePath: "/fake/chrome-headless-shell", + browserGpuMode: "software", + }); + await release(acquired.browser); + + const messages = warnSpy.mock.calls.map((c) => String(c[0])); + expect(messages.find((m) => m.includes("Software GPU detected"))).toBeUndefined(); + }); +}); + describe("forceReleaseBrowser", () => { it("kills the browser process and disconnects", () => { const killFn = vi.fn(() => true); diff --git a/packages/engine/src/services/browserManager.ts b/packages/engine/src/services/browserManager.ts index 714b9a938..9aca9820c 100644 --- a/packages/engine/src/services/browserManager.ts +++ b/packages/engine/src/services/browserManager.ts @@ -254,7 +254,12 @@ export async function acquireBrowser( config?: Partial< Pick< EngineConfig, - "browserTimeout" | "protocolTimeout" | "enableBrowserPool" | "chromePath" | "forceScreenshot" + | "browserTimeout" + | "protocolTimeout" + | "enableBrowserPool" + | "chromePath" + | "forceScreenshot" + | "browserGpuMode" > >, ): Promise { @@ -271,10 +276,30 @@ export async function acquireBrowser( // BeginFrame requires chrome-headless-shell AND Linux (crashes on macOS/Windows). const isLinux = process.platform === "linux"; const forceScreenshot = config?.forceScreenshot ?? DEFAULT_CONFIG.forceScreenshot; + + // Resolve browserGpuMode. On software-renderer hosts (no hardware GPU / + // SwiftShader) HeadlessExperimental.beginFrame is unreliable — the + // compositor stalls indefinitely on shader-heavy frames and the CDP call + // times out at protocolTimeout (default 5 min). Force screenshot mode for + // the entire process whenever resolveBrowserGpuMode resolves to "software", + // independent of platform/binary. Broader than the per-stage HDR guard in + // the producer (captureHdrStage's `cfg.forceScreenshot = true`) — that + // guard only covered the layered-composite path, not the BeginFrame loop + // for SDR content on a software host. + // + // resolveBrowserGpuMode caches its probe Promise for the process lifetime, + // so calling it here is cheap after the first invocation. + const browserGpuMode = config?.browserGpuMode ?? DEFAULT_CONFIG.browserGpuMode; + const resolvedGpuMode = await resolveBrowserGpuMode(browserGpuMode, { + chromePath: headlessShell ?? undefined, + browserTimeout: config?.browserTimeout, + }); + const isSoftwareRenderer = resolvedGpuMode === "software"; + let captureMode: CaptureMode; let executablePath: string | undefined; - if (headlessShell && isLinux && !forceScreenshot) { + if (headlessShell && isLinux && !forceScreenshot && !isSoftwareRenderer) { captureMode = "beginframe"; executablePath = headlessShell; } else { @@ -283,12 +308,29 @@ export async function acquireBrowser( executablePath = headlessShell ?? undefined; } + // When falling back to screenshot mode the chromeArgs may still contain + // `--enable-begin-frame-control` etc. (caller built them before the GPU + // probe resolved). In beginframe-control mode the compositor blocks waiting + // for a beginFrame we'll never send, producing blank screenshots — strip + // them defensively. No-op if caller already built args for screenshot mode. + const launchArgs = captureMode === "screenshot" ? stripBeginFrameFlags(chromeArgs) : chromeArgs; + + if (captureMode === "screenshot" && isSoftwareRenderer && headlessShell && isLinux) { + // Log once when the software-renderer guard actually changed the outcome + // (Linux + headless-shell available — the only env where beginframe would + // have been the default). Silent on the always-screenshot platforms + // (macOS, Windows) and on hardware hosts. + console.warn( + "[BrowserManager] Software GPU detected; forcing screenshot capture mode (HeadlessExperimental.beginFrame stalls on software-rendered compositors).", + ); + } + const ppt = await getPuppeteer(); const browserTimeout = config?.browserTimeout ?? DEFAULT_CONFIG.browserTimeout; const protocolTimeout = config?.protocolTimeout ?? DEFAULT_CONFIG.protocolTimeout; let browser = await ppt.launch({ headless: true, - args: chromeArgs, + args: launchArgs, defaultViewport: null, executablePath, timeout: browserTimeout, diff --git a/packages/engine/src/services/frameCapture.ts b/packages/engine/src/services/frameCapture.ts index 656c87057..cf1babcb5 100644 --- a/packages/engine/src/services/frameCapture.ts +++ b/packages/engine/src/services/frameCapture.ts @@ -195,13 +195,19 @@ export async function createCaptureSession( // need explicit clip+scale on `Page.captureScreenshot`, so fall back to // the screenshot path for any DPR > 1. const supersampling = (options.deviceScaleFactor ?? 1) > 1; - const preMode: CaptureMode = - headlessShell && isLinux && !forceScreenshot && !supersampling ? "beginframe" : "screenshot"; const requestedGpuMode = config?.browserGpuMode ?? DEFAULT_CONFIG.browserGpuMode; const resolvedGpuMode = await resolveBrowserGpuMode(requestedGpuMode, { chromePath: headlessShell ?? undefined, browserTimeout: config?.browserTimeout, }); + // Mirror the software-renderer guard in `acquireBrowser`: on a software + // host the BeginFrame compositor stalls, so build the chromeArgs without + // BeginFrame flags up-front. acquireBrowser will also strip them as a + // belt-and-braces measure, but doing it here keeps the launch args clean. + const preMode: CaptureMode = + headlessShell && isLinux && !forceScreenshot && !supersampling && resolvedGpuMode !== "software" + ? "beginframe" + : "screenshot"; const chromeArgs = buildChromeArgs( { width: options.width, height: options.height, captureMode: preMode }, { ...config, browserGpuMode: resolvedGpuMode }, From d20e28a623ea6c64539d5ff7ce1e41a8e80b2d69 Mon Sep 17 00:00:00 2001 From: Vance Ingalls Date: Thu, 14 May 2026 04:33:29 +0000 Subject: [PATCH 2/3] fix(engine): gate software-GPU warn + thread resolved mode (staff review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes from PR #822 self-review: 1. Gate the "Software GPU detected" console.warn so it only fires when the guard actually changed the outcome. Previously the warn condition (captureMode==="screenshot" && isSoftwareRenderer && headlessShell && isLinux) was satisfied even when the caller had explicitly set forceScreenshot=true — misleading operators into thinking the guard kicked in when they had already picked screenshot mode. Add `!forceScreenshot` to the condition and one-shot the warn with a process-level latch to avoid spam across multi-worker renders. 2. Thread the already-resolved browserGpuMode from frameCapture.ts into acquireBrowser via a new AcquireBrowserOptions.resolvedBrowserGpuMode. The Promise cache made the duplicate resolution cheap, but threading the value removes the smell of two parallel resolutions of the same thing with no static guarantee they agree under future refactors. 3. Add fallback-launch-retry test: simulate probeBeginFrameSupport failure via a CDP mock whose `send` rejects, assert the second launch strips beginframe-only flags. Also add positive-control tests for "Linux + hardware + headless-shell does NOT warn" and "forceScreenshot suppresses the software-GPU warn". Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/services/browserManager.test.ts | 99 +++++++++++++++++++ .../engine/src/services/browserManager.ts | 70 +++++++++---- packages/engine/src/services/frameCapture.ts | 7 +- 3 files changed, 157 insertions(+), 19 deletions(-) diff --git a/packages/engine/src/services/browserManager.test.ts b/packages/engine/src/services/browserManager.test.ts index 7b6bcfd58..b40dac0e6 100644 --- a/packages/engine/src/services/browserManager.test.ts +++ b/packages/engine/src/services/browserManager.test.ts @@ -170,6 +170,9 @@ describe("acquireBrowser — software-renderer guard", () => { lastLaunchArgs = undefined; lastLaunchExecutablePath = undefined; Object.defineProperty(process, "platform", { value: "linux", configurable: true }); + // `vi.resetModules()` already gives each test a fresh `browserManager.js` + // module — so `_softwareGuardWarned` starts at false per test. No + // additional reset needed. }); afterEach(() => { @@ -281,6 +284,102 @@ describe("acquireBrowser — software-renderer guard", () => { const messages = warnSpy.mock.calls.map((c) => String(c[0])); expect(messages.find((m) => m.includes("Software GPU detected"))).toBeUndefined(); }); + + it("does NOT emit the software-GPU warning when caller explicitly set forceScreenshot=true", async () => { + // If the caller already picked screenshot mode, the software-GPU guard + // didn't change the outcome — warning would misleadingly tell operators + // the guard kicked in. Same shape as the macOS test, but here the + // platform satisfies all the original warn-firing preconditions and only + // forceScreenshot suppresses the warn. + installPuppeteerMock(); + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const { acquireBrowser: acquire, releaseBrowser: release } = + await import("./browserManager.js"); + + const acquired = await acquire(["--no-sandbox"], { + chromePath: "/fake/chrome-headless-shell", + browserGpuMode: "software", + forceScreenshot: true, + }); + await release(acquired.browser); + + const messages = warnSpy.mock.calls.map((c) => String(c[0])); + expect(messages.find((m) => m.includes("Software GPU detected"))).toBeUndefined(); + }); + + it("does NOT emit the software-GPU warning on Linux + hardware + headless-shell (positive control)", async () => { + // On the very env where the guard COULD fire (Linux + headless-shell), a + // hardware-resolved GPU mode must NOT trip the warning. Catches the + // failure mode where the gate condition gets inverted in a refactor. + installPuppeteerMock(); + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const { acquireBrowser: acquire, releaseBrowser: release } = + await import("./browserManager.js"); + + const acquired = await acquire(["--no-sandbox"], { + chromePath: "/fake/chrome-headless-shell", + browserGpuMode: "hardware", + }); + await release(acquired.browser); + + const messages = warnSpy.mock.calls.map((c) => String(c[0])); + expect(messages.find((m) => m.includes("Software GPU detected"))).toBeUndefined(); + }); + + it("falls back to screenshot mode + strips beginframe flags when probeBeginFrameSupport fails", async () => { + // Pre-existing defense-in-depth: even if the software-GPU guard misses + // (e.g. resolveBrowserGpuMode returns "hardware" but the binary itself + // has dropped HeadlessExperimental.beginFrame — observed on chrome + // builds 147+), the post-launch probe should detect it, close the + // browser, and re-launch with stripped flags. This test pins that path + // via a CDP session whose `send` rejects. + const launchCalls: Array<{ args: string[]; executablePath?: string }> = []; + const browserStub = { + close: vi.fn().mockResolvedValue(undefined), + newPage: vi.fn().mockResolvedValue({ + close: vi.fn().mockResolvedValue(undefined), + createCDPSession: vi.fn().mockResolvedValue({ + // Probe failure: HeadlessExperimental.enable rejects → probe returns + // false → acquireBrowser closes browser + re-launches with stripped + // flags. + send: vi.fn().mockRejectedValue(new Error("HeadlessExperimental not supported")), + detach: vi.fn().mockResolvedValue(undefined), + }), + }), + }; + const launch = vi.fn(async (opts: { args: string[]; executablePath?: string }) => { + launchCalls.push({ args: [...opts.args], executablePath: opts.executablePath }); + return browserStub; + }); + vi.doMock("puppeteer", () => ({ default: { launch } })); + vi.doMock("puppeteer-core", () => ({ default: { launch } })); + + const { acquireBrowser: acquire } = await import("./browserManager.js"); + + const chromeArgs = [ + "--no-sandbox", + "--enable-begin-frame-control", + "--deterministic-mode", + "--run-all-compositor-stages-before-draw", + ]; + const result = await acquire(chromeArgs, { + chromePath: "/fake/chrome-headless-shell", + browserGpuMode: "hardware", + }); + + // Two launches: first with beginframe flags (initial attempt), then + // re-launch with flags stripped after probe failure. + expect(launchCalls.length).toBe(2); + expect(launchCalls[0]?.args).toContain("--enable-begin-frame-control"); + expect(launchCalls[1]?.args).not.toContain("--enable-begin-frame-control"); + expect(launchCalls[1]?.args).not.toContain("--deterministic-mode"); + expect(launchCalls[1]?.args).not.toContain("--run-all-compositor-stages-before-draw"); + expect(launchCalls[1]?.args).toContain("--no-sandbox"); + // First browser closed before the re-launch. + expect(browserStub.close).toHaveBeenCalled(); + // Final captureMode is screenshot, reflecting the fallback. + expect(result.captureMode).toBe("screenshot"); + }); }); describe("forceReleaseBrowser", () => { diff --git a/packages/engine/src/services/browserManager.ts b/packages/engine/src/services/browserManager.ts index 9aca9820c..7e787d889 100644 --- a/packages/engine/src/services/browserManager.ts +++ b/packages/engine/src/services/browserManager.ts @@ -249,6 +249,29 @@ function logResolvedBrowserGpuMode(resolved: "hardware" | "software", reason: st console.error(`[hyperframes] browserGpuMode auto → ${resolved} (${reason})`); } +/** + * One-shot latch for the "Software GPU detected" warning. The guard fires per + * acquire() (each render worker calls acquireBrowser), so without a latch we'd + * spam the operator with N copies of the same message on a multi-worker run. + * Exported reset for tests. + */ +let _softwareGuardWarned = false; +export function _resetSoftwareGuardWarnedForTests(): void { + _softwareGuardWarned = false; +} + +export interface AcquireBrowserOptions { + /** + * If the caller already resolved `browserGpuMode` (e.g. `frameCapture.ts` + * computes it before building chromeArgs), pass the resolved value here so + * `acquireBrowser` doesn't redundantly re-resolve from the raw config. The + * Promise cache makes the duplicate call cheap, but threading the value + * through removes the smell of two parallel resolutions of the same thing + * with no static guarantee they agree. + */ + resolvedBrowserGpuMode?: "software" | "hardware"; +} + export async function acquireBrowser( chromeArgs: string[], config?: Partial< @@ -262,6 +285,7 @@ export async function acquireBrowser( | "browserGpuMode" > >, + options: AcquireBrowserOptions = {}, ): Promise { const enablePool = config?.enableBrowserPool ?? DEFAULT_CONFIG.enableBrowserPool; @@ -280,20 +304,27 @@ export async function acquireBrowser( // Resolve browserGpuMode. On software-renderer hosts (no hardware GPU / // SwiftShader) HeadlessExperimental.beginFrame is unreliable — the // compositor stalls indefinitely on shader-heavy frames and the CDP call - // times out at protocolTimeout (default 5 min). Force screenshot mode for - // the entire process whenever resolveBrowserGpuMode resolves to "software", - // independent of platform/binary. Broader than the per-stage HDR guard in - // the producer (captureHdrStage's `cfg.forceScreenshot = true`) — that - // guard only covered the layered-composite path, not the BeginFrame loop - // for SDR content on a software host. + // times out at protocolTimeout (default 5 min). Force screenshot mode + // whenever resolveBrowserGpuMode resolves to "software", independent of + // platform/binary. This is a separate defense from the producer-side + // `captureHdrStage` guard (which unconditionally forces screenshot for the + // HDR layered-composite path); this guard covers the SDR-render-on-software- + // host case that captureHdrStage doesn't touch. // - // resolveBrowserGpuMode caches its probe Promise for the process lifetime, - // so calling it here is cheap after the first invocation. - const browserGpuMode = config?.browserGpuMode ?? DEFAULT_CONFIG.browserGpuMode; - const resolvedGpuMode = await resolveBrowserGpuMode(browserGpuMode, { - chromePath: headlessShell ?? undefined, - browserTimeout: config?.browserTimeout, - }); + // If the caller has already resolved the mode (frameCapture.ts does) it + // hands us the value via options.resolvedBrowserGpuMode to avoid a second + // resolution from raw config. The probe Promise is cached for the process + // lifetime so the fallback path is still cheap. + let resolvedGpuMode: "software" | "hardware"; + if (options.resolvedBrowserGpuMode) { + resolvedGpuMode = options.resolvedBrowserGpuMode; + } else { + const browserGpuMode = config?.browserGpuMode ?? DEFAULT_CONFIG.browserGpuMode; + resolvedGpuMode = await resolveBrowserGpuMode(browserGpuMode, { + chromePath: headlessShell ?? undefined, + browserTimeout: config?.browserTimeout, + }); + } const isSoftwareRenderer = resolvedGpuMode === "software"; let captureMode: CaptureMode; @@ -315,11 +346,14 @@ export async function acquireBrowser( // them defensively. No-op if caller already built args for screenshot mode. const launchArgs = captureMode === "screenshot" ? stripBeginFrameFlags(chromeArgs) : chromeArgs; - if (captureMode === "screenshot" && isSoftwareRenderer && headlessShell && isLinux) { - // Log once when the software-renderer guard actually changed the outcome - // (Linux + headless-shell available — the only env where beginframe would - // have been the default). Silent on the always-screenshot platforms - // (macOS, Windows) and on hardware hosts. + // Warn ONLY when the software-renderer guard actually changed the outcome. + // Conditions: Linux + headless-shell available + GPU resolved to software + + // caller didn't already pick screenshot via forceScreenshot. (If the caller + // explicitly set forceScreenshot=true the guard didn't change anything, so + // warning would mislead operators into thinking the guard kicked in.) + // One-shot per process to avoid spamming multi-worker renders. + if (!forceScreenshot && isSoftwareRenderer && headlessShell && isLinux && !_softwareGuardWarned) { + _softwareGuardWarned = true; console.warn( "[BrowserManager] Software GPU detected; forcing screenshot capture mode (HeadlessExperimental.beginFrame stalls on software-rendered compositors).", ); diff --git a/packages/engine/src/services/frameCapture.ts b/packages/engine/src/services/frameCapture.ts index cf1babcb5..b0b07815e 100644 --- a/packages/engine/src/services/frameCapture.ts +++ b/packages/engine/src/services/frameCapture.ts @@ -213,7 +213,12 @@ export async function createCaptureSession( { ...config, browserGpuMode: resolvedGpuMode }, ); - const { browser, captureMode } = await acquireBrowser(chromeArgs, config); + // Thread the already-resolved GPU mode into acquireBrowser so it doesn't + // re-resolve from raw config. Promise-cached anyway, but removes the smell + // of two parallel resolutions that future refactors could let diverge. + const { browser, captureMode } = await acquireBrowser(chromeArgs, config, { + resolvedBrowserGpuMode: resolvedGpuMode, + }); const page = await browser.newPage(); // Polyfill esbuild's keepNames helper inside the page. From e1f63a85e17abac1411a6c92c72b3f96eea8def4 Mon Sep 17 00:00:00 2001 From: Vance Ingalls Date: Thu, 14 May 2026 07:36:20 +0000 Subject: [PATCH 3/3] fix(engine): make software-renderer screenshot guard opt-in (CI regression) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #822's software-renderer guard auto-flipped `captureMode` to "screenshot" whenever `resolveBrowserGpuMode` returned "software" — but the WebGL probe classifies GitHub Actions runners (SwiftShader) as "software" too, even though `HeadlessExperimental.beginFrame` works on them. As a result the guard pessimized mainstream CI, exceeding `ffmpegStreamingTimeout` (10 min default) on the largest fixtures and producing this regression on three shards of run 25841853063: - fast → sub-composition-video (390 frames, 1080×1920, video + sub-comp) - styles-e → style-13-prod (12 MB baseline, slow tag) - styles-g → overlay-montage-prod (42.36 s duration, 1080×1920) All three failed with `Streaming encode failed: FFmpeg exited with code 255 ... received signal 15`. Wall-clock for overlay-montage-prod jumped from 9 m 13 s (main, beginframe) to 10 m 38 s (PR-822, screenshot) — just past the streaming timeout. ffmpeg was alive and consuming frames the whole time; the producer side simply couldn't sustain the rate in screenshot mode for these fixtures. Fix: gate the auto-flip behind an opt-in env var `HYPERFRAMES_FORCE_SCREENSHOT_ON_SOFTWARE_GPU`. When unset (default), the GPU probe is skipped entirely and the original (pre-PR-822) capture-mode selection runs — Linux + headless-shell → beginframe, with the existing `probeBeginFrameSupport` fallback handling actual BeginFrame unavailability at runtime. Operators on hosts where BeginFrame is technically present but stalls under shader load (the CPU-only Linux sandbox where hf#677 was reproduced) set the env var to short-circuit BeginFrame entirely. Mirrored the same gate in `frameCapture.ts`'s preMode resolution so the expensive probe Chrome is also skipped on the chromeArgs side when the guard is off. Tests: kept all eight software-renderer tests; flipped the block-scoped default to "guard ON" (set in `beforeEach`) so the pre-existing assertions exercise the guard path. Added a positive-control test that pins the new default behavior: `Linux + software + headless-shell + guard OFF → beginframe` (the regression case). — Vai --- .../src/services/browserManager.test.ts | 48 +++++++++++-- .../engine/src/services/browserManager.ts | 68 +++++++++++++------ packages/engine/src/services/frameCapture.ts | 35 ++++++---- 3 files changed, 111 insertions(+), 40 deletions(-) diff --git a/packages/engine/src/services/browserManager.test.ts b/packages/engine/src/services/browserManager.test.ts index b40dac0e6..1e55f3bfc 100644 --- a/packages/engine/src/services/browserManager.test.ts +++ b/packages/engine/src/services/browserManager.test.ts @@ -128,14 +128,18 @@ describe("resolveBrowserGpuMode", () => { }); }); -describe("acquireBrowser — software-renderer guard", () => { - // Tests for the defense-in-depth rule that a "software"-resolved GPU mode - // must force `captureMode = "screenshot"` regardless of platform/binary. - // Without this guard, BeginFrame attempts on a software-rendered compositor - // hang at the CDP protocolTimeout (the hf#677 spike repro). Tests below - // mock puppeteer at the module-import level so no real Chrome is launched. +describe("acquireBrowser — software-renderer guard (opt-in)", () => { + // Tests for the opt-in rule that, when + // `HYPERFRAMES_FORCE_SCREENSHOT_ON_SOFTWARE_GPU=1` is set, a + // "software"-resolved GPU mode must force `captureMode = "screenshot"` + // regardless of platform/binary. Without the env var the guard is dormant — + // mainstream CI hosts (SwiftShader on GitHub runners) report "software" but + // BeginFrame works on them, and pessimizing those hosts caused regressions + // on the largest fixtures (hf#822). Tests below mock puppeteer at the + // module-import level so no real Chrome is launched. const origPlatform = process.platform; + const origGuardEnv = process.env.HYPERFRAMES_FORCE_SCREENSHOT_ON_SOFTWARE_GPU; // Captures the most recent puppeteer.launch call so tests can assert on the // args/executablePath actually passed to Chrome. @@ -173,16 +177,25 @@ describe("acquireBrowser — software-renderer guard", () => { // `vi.resetModules()` already gives each test a fresh `browserManager.js` // module — so `_softwareGuardWarned` starts at false per test. No // additional reset needed. + // Default: guard ON for the bulk of tests in this block (they exercise + // the guard's behavior). Individual tests opt back to "guard OFF" by + // deleting the env var. + process.env.HYPERFRAMES_FORCE_SCREENSHOT_ON_SOFTWARE_GPU = "1"; }); afterEach(() => { Object.defineProperty(process, "platform", { value: origPlatform, configurable: true }); + if (origGuardEnv === undefined) { + delete process.env.HYPERFRAMES_FORCE_SCREENSHOT_ON_SOFTWARE_GPU; + } else { + process.env.HYPERFRAMES_FORCE_SCREENSHOT_ON_SOFTWARE_GPU = origGuardEnv; + } vi.restoreAllMocks(); vi.doUnmock("puppeteer"); vi.doUnmock("puppeteer-core"); }); - it("forces screenshot capture mode when browserGpuMode resolves to 'software' on Linux", async () => { + it("forces screenshot capture mode when browserGpuMode resolves to 'software' on Linux (guard enabled)", async () => { installPuppeteerMock(); const { acquireBrowser: acquire } = await import("./browserManager.js"); @@ -210,6 +223,27 @@ describe("acquireBrowser — software-renderer guard", () => { expect(lastLaunchArgs).toContain("--no-sandbox"); }); + it("KEEPS beginframe capture mode on Linux+software when the guard is NOT enabled (default)", async () => { + // Mirror of the "forces screenshot" test above with the guard OFF — this + // is the production default that mainstream CI hits. Without this, GH + // Actions runners (SwiftShader, reports "software") regress to screenshot + // mode and the streaming encoder times out on shader-heavy fixtures + // (hf#822 regression on overlay-montage-prod, sub-composition-video, + // style-13-prod). + delete process.env.HYPERFRAMES_FORCE_SCREENSHOT_ON_SOFTWARE_GPU; + installPuppeteerMock(); + const { acquireBrowser: acquire } = await import("./browserManager.js"); + + const chromeArgs = ["--no-sandbox", "--enable-begin-frame-control"]; + const result = await acquire(chromeArgs, { + chromePath: "/fake/chrome-headless-shell", + browserGpuMode: "software", + }); + + expect(result.captureMode).toBe("beginframe"); + expect(lastLaunchArgs).toContain("--enable-begin-frame-control"); + }); + it("keeps beginframe capture mode when browserGpuMode is 'hardware' on Linux with headless-shell", async () => { installPuppeteerMock(); const { acquireBrowser: acquire } = await import("./browserManager.js"); diff --git a/packages/engine/src/services/browserManager.ts b/packages/engine/src/services/browserManager.ts index 7e787d889..fce0f3b9b 100644 --- a/packages/engine/src/services/browserManager.ts +++ b/packages/engine/src/services/browserManager.ts @@ -260,6 +260,33 @@ export function _resetSoftwareGuardWarnedForTests(): void { _softwareGuardWarned = false; } +/** + * Opt-in: force `captureMode = "screenshot"` whenever the GPU resolves to + * "software" (no hardware WebGL). + * + * Why opt-in rather than always-on: GitHub Actions runners and other common + * SwiftShader hosts report "software" from the WebGL probe but + * `HeadlessExperimental.beginFrame` works reliably on them — so a blanket + * software → screenshot flip pessimizes mainstream CI by ~1.5 min on + * shader/composition-heavy fixtures and exceeds `ffmpegStreamingTimeout` on + * the largest renders. + * + * The runtime probe (`probeBeginFrameSupport` below) already catches actual + * BeginFrame *unavailability*. This env var exists for hosts where BeginFrame + * is technically present but the compositor stalls under shader load (the + * CPU-only Linux sandbox where hf#677 was reproduced). Operators on such + * hosts set `HYPERFRAMES_FORCE_SCREENSHOT_ON_SOFTWARE_GPU=1` to short-circuit + * BeginFrame entirely. + * + * Accepts the same truthy values as other engine env flags (`"1"`, `"true"`). + */ +export function isSoftwareScreenshotGuardEnabled(): boolean { + const raw = process.env.HYPERFRAMES_FORCE_SCREENSHOT_ON_SOFTWARE_GPU; + if (!raw) return false; + const v = raw.trim().toLowerCase(); + return v === "1" || v === "true" || v === "yes" || v === "on"; +} + export interface AcquireBrowserOptions { /** * If the caller already resolved `browserGpuMode` (e.g. `frameCapture.ts` @@ -301,31 +328,32 @@ export async function acquireBrowser( const isLinux = process.platform === "linux"; const forceScreenshot = config?.forceScreenshot ?? DEFAULT_CONFIG.forceScreenshot; - // Resolve browserGpuMode. On software-renderer hosts (no hardware GPU / - // SwiftShader) HeadlessExperimental.beginFrame is unreliable — the - // compositor stalls indefinitely on shader-heavy frames and the CDP call - // times out at protocolTimeout (default 5 min). Force screenshot mode - // whenever resolveBrowserGpuMode resolves to "software", independent of - // platform/binary. This is a separate defense from the producer-side - // `captureHdrStage` guard (which unconditionally forces screenshot for the - // HDR layered-composite path); this guard covers the SDR-render-on-software- - // host case that captureHdrStage doesn't touch. + // The software-renderer screenshot guard is opt-in (see + // `isSoftwareScreenshotGuardEnabled` above). Skip the GPU probe entirely + // unless the guard is enabled — the probe launches a throwaway Chrome to + // run a WebGL availability check, so paying for it on every render when + // the result is unused is wasteful. Operators on stall-prone hosts set + // `HYPERFRAMES_FORCE_SCREENSHOT_ON_SOFTWARE_GPU=1` and accept the probe + // cost as part of that contract. // // If the caller has already resolved the mode (frameCapture.ts does) it // hands us the value via options.resolvedBrowserGpuMode to avoid a second // resolution from raw config. The probe Promise is cached for the process - // lifetime so the fallback path is still cheap. - let resolvedGpuMode: "software" | "hardware"; - if (options.resolvedBrowserGpuMode) { - resolvedGpuMode = options.resolvedBrowserGpuMode; - } else { - const browserGpuMode = config?.browserGpuMode ?? DEFAULT_CONFIG.browserGpuMode; - resolvedGpuMode = await resolveBrowserGpuMode(browserGpuMode, { - chromePath: headlessShell ?? undefined, - browserTimeout: config?.browserTimeout, - }); + // lifetime so even when both paths fire, only one Chrome launch happens. + const guardEnabled = isSoftwareScreenshotGuardEnabled(); + let resolvedGpuMode: "software" | "hardware" | undefined; + if (guardEnabled) { + if (options.resolvedBrowserGpuMode) { + resolvedGpuMode = options.resolvedBrowserGpuMode; + } else { + const browserGpuMode = config?.browserGpuMode ?? DEFAULT_CONFIG.browserGpuMode; + resolvedGpuMode = await resolveBrowserGpuMode(browserGpuMode, { + chromePath: headlessShell ?? undefined, + browserTimeout: config?.browserTimeout, + }); + } } - const isSoftwareRenderer = resolvedGpuMode === "software"; + const isSoftwareRenderer = guardEnabled && resolvedGpuMode === "software"; let captureMode: CaptureMode; let executablePath: string | undefined; diff --git a/packages/engine/src/services/frameCapture.ts b/packages/engine/src/services/frameCapture.ts index b0b07815e..e527a45f8 100644 --- a/packages/engine/src/services/frameCapture.ts +++ b/packages/engine/src/services/frameCapture.ts @@ -18,6 +18,7 @@ import { releaseBrowser, forceReleaseBrowser, buildChromeArgs, + isSoftwareScreenshotGuardEnabled, resolveBrowserGpuMode, resolveHeadlessShellPath, type CaptureMode, @@ -195,27 +196,35 @@ export async function createCaptureSession( // need explicit clip+scale on `Page.captureScreenshot`, so fall back to // the screenshot path for any DPR > 1. const supersampling = (options.deviceScaleFactor ?? 1) > 1; + // Mirror the software-renderer guard in `acquireBrowser`. The guard is + // opt-in (env var `HYPERFRAMES_FORCE_SCREENSHOT_ON_SOFTWARE_GPU`); when it + // is off we skip the GPU probe entirely to avoid the extra Chrome launch. + // When on, the probe Promise is cached for the process lifetime, so + // resolving here and threading the result to acquireBrowser collapses to + // a single probe. + const guardEnabled = isSoftwareScreenshotGuardEnabled(); const requestedGpuMode = config?.browserGpuMode ?? DEFAULT_CONFIG.browserGpuMode; - const resolvedGpuMode = await resolveBrowserGpuMode(requestedGpuMode, { - chromePath: headlessShell ?? undefined, - browserTimeout: config?.browserTimeout, - }); - // Mirror the software-renderer guard in `acquireBrowser`: on a software - // host the BeginFrame compositor stalls, so build the chromeArgs without - // BeginFrame flags up-front. acquireBrowser will also strip them as a - // belt-and-braces measure, but doing it here keeps the launch args clean. + const resolvedGpuMode = guardEnabled + ? await resolveBrowserGpuMode(requestedGpuMode, { + chromePath: headlessShell ?? undefined, + browserTimeout: config?.browserTimeout, + }) + : undefined; + const isSoftwareRenderer = guardEnabled && resolvedGpuMode === "software"; const preMode: CaptureMode = - headlessShell && isLinux && !forceScreenshot && !supersampling && resolvedGpuMode !== "software" + headlessShell && isLinux && !forceScreenshot && !supersampling && !isSoftwareRenderer ? "beginframe" : "screenshot"; const chromeArgs = buildChromeArgs( { width: options.width, height: options.height, captureMode: preMode }, - { ...config, browserGpuMode: resolvedGpuMode }, + resolvedGpuMode ? { ...config, browserGpuMode: resolvedGpuMode } : config, ); - // Thread the already-resolved GPU mode into acquireBrowser so it doesn't - // re-resolve from raw config. Promise-cached anyway, but removes the smell - // of two parallel resolutions that future refactors could let diverge. + // Thread the already-resolved GPU mode into acquireBrowser when available + // so it doesn't re-resolve from raw config. (Both sides agree on whether + // the guard is enabled because they read the same env var; if the env var + // flips between these two calls — basically only possible in tests — the + // guard semantics fall back to acquireBrowser's own resolution.) const { browser, captureMode } = await acquireBrowser(chromeArgs, config, { resolvedBrowserGpuMode: resolvedGpuMode, });