Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@
"vitest": "^3.2.4"
},
"optionalDependencies": {
"@google/genai": "^1.50.1"
"@google/genai": "^1.50.1",
"webgpu": "^0.4.0"
},
"engines": {
"node": ">=22"
Expand Down
20 changes: 20 additions & 0 deletions packages/cli/src/commands/render.ts
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,15 @@ export default defineCommand({
description:
"Force host GPU acceleration for Chrome/WebGL capture. Default: auto (probe on first launch; fall back to software if no GPU). Use --no-browser-gpu to force software (SwiftShader).",
},
"gpu-shader-blend": {
type: "boolean",
default: false,
description:
"EXPERIMENTAL. Use the native WebGPU (Dawn) compositor for shader-transition blends when a GPU is available. " +
"Falls back to CPU when Dawn isn't installed or no GPU adapter is present. " +
"Determinism: PSNR ≥ 50dB vs the CPU canonical path, not byte-equal. " +
"Currently ports a subset of shaders (crossfade); unsupported shaders transparently fall back to CPU.",
},
quiet: {
type: "boolean",
description: "Suppress verbose output",
Expand Down Expand Up @@ -293,6 +302,17 @@ export default defineCommand({
workers = parsed;
}

// ── GPU shader-blend (Dawn/WebGPU, EXPERIMENTAL) ────────────────────
// The flag flips an env var that the shader-blend worker reads on first
// message. We pipe through an env var (rather than threading the flag
// through render orchestrator → captureHdrStage → captureHdrHybridLoop
// → pool → worker) because env vars survive the worker_threads boundary
// unchanged and require zero plumbing. The worker logs once whether it
// could acquire a GPU; if not, the existing CPU path runs as before.
if (args["gpu-shader-blend"] === true) {
process.env.HF_DAWN_WEBGPU = "1";
}

// ── Validate max-concurrent-renders ─────────────────────────────────
if (args["max-concurrent-renders"] != null) {
const parsed = parseInt(args["max-concurrent-renders"], 10);
Expand Down
6 changes: 6 additions & 0 deletions packages/cli/tsup.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ var __dirname = __hf_dirname(__filename);`,
"esbuild",
"giget",
"postcss",
// `webgpu` (Dawn) ships a 70+ MB native .dawn.node binary per
// platform. Keeping it external means tsup won't try to inline it
// and the CLI install resolves it (or doesn't, if the optionalDep
// skipped) from the user's node_modules. The shader-blend worker
// dynamically `import("webgpu")` and falls back to CPU on absence.
"webgpu",
],
noExternal: [
"@hyperframes/core",
Expand Down
8 changes: 4 additions & 4 deletions packages/producer/build.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ await Promise.all([
platform: "node",
target: "node22",
format: "esm",
external: ["puppeteer", "esbuild", "postcss"],
external: ["puppeteer", "esbuild", "postcss", "webgpu"],
plugins: [workspaceAliasPlugin],
minify: false,
sourcemap: true,
Expand All @@ -54,7 +54,7 @@ await Promise.all([
platform: "node",
target: "node22",
format: "esm",
external: ["puppeteer", "esbuild", "postcss"],
external: ["puppeteer", "esbuild", "postcss", "webgpu"],
plugins: [workspaceAliasPlugin],
minify: false,
sourcemap: true,
Expand All @@ -70,7 +70,7 @@ await Promise.all([
platform: "node",
target: "node22",
format: "esm",
external: ["puppeteer", "esbuild", "postcss"],
external: ["puppeteer", "esbuild", "postcss", "webgpu"],
plugins: [workspaceAliasPlugin],
minify: false,
sourcemap: true,
Expand All @@ -86,7 +86,7 @@ await Promise.all([
platform: "node",
target: "node22",
format: "esm",
external: ["puppeteer", "esbuild", "postcss"],
external: ["puppeteer", "esbuild", "postcss", "webgpu"],
plugins: [workspaceAliasPlugin],
minify: false,
sourcemap: true,
Expand Down
3 changes: 3 additions & 0 deletions packages/producer/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@
"tsx": "^4.21.0",
"typescript": "^5.7.2"
},
"optionalDependencies": {
"webgpu": "^0.4.0"
},
"engines": {
"node": ">=22"
}
Expand Down
149 changes: 149 additions & 0 deletions packages/producer/src/services/shaderTransitionGpu.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/**
* Tests for the Dawn/WebGPU shader-blend compositor.
*
* We can't depend on a working GPU adapter in CI — the Linux sandbox has
* no Vulkan driver. So these tests focus on the surface that must work
* regardless of host:
*
* 1. `HF_DAWN_FORCE_FAIL=1` short-circuits init to a clean failure (the
* env hook the CLI / worker rely on for fallback testability).
* 2. `initGpuCompositor()` never throws. On a no-GPU host it returns
* `{ ok: false, reason }` and the caller can fall back without
* try/catch.
* 3. When a GPU IS available (Vance's Mac, Linux+GPU), the compositor's
* crossfade output matches the CPU canonical path within PSNR ≥ 50dB.
* This branch is skipped when init fails — the test logs the reason
* instead so a regression on Mac surfaces cleanly without breaking CI
* elsewhere.
*
* Determinism note: we deliberately do NOT pin byte-equality with the CPU
* shader. The whole point of the new path is f32 GPU math + u16 storage,
* which differs from f64 CPU math at the LSB. PSNR is the right pin.
*/

import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { crossfade } from "@hyperframes/engine/shader-transitions";
import { initGpuCompositor } from "./shaderTransitionGpu.js";

const WIDTH = 32;
const HEIGHT = 16;
const PX = WIDTH * HEIGHT;
const BYTES = PX * 6;

function fillGradient(): Buffer {
const buf = Buffer.alloc(BYTES);
for (let i = 0; i < PX; i++) {
const o = i * 6;
buf.writeUInt16LE((i * 1024) & 0xffff, o);
buf.writeUInt16LE(((i * 2048) & 0xffff) ^ 0xa5a5, o + 2);
buf.writeUInt16LE(((i * 4096) & 0xffff) ^ 0x5a5a, o + 4);
}
return buf;
}

function fillSolid(r: number, g: number, b: number): Buffer {
const buf = Buffer.alloc(BYTES);
for (let i = 0; i < PX; i++) {
const o = i * 6;
buf.writeUInt16LE(r, o);
buf.writeUInt16LE(g, o + 2);
buf.writeUInt16LE(b, o + 4);
}
return buf;
}

/**
* Peak signal-to-noise ratio in dB between two rgb48le buffers (16-bit
* channel depth → MAX = 65535). >= 50 dB is the acceptance bar for the
* GPU path (still visually indistinguishable from f64 canonical; passes
* the eye / objective metric for transition rendering).
*/
function psnrDb(a: Buffer, b: Buffer): number {
if (a.length !== b.length) throw new Error("buffer length mismatch");
const samples = a.length / 2;
let sse = 0;
for (let i = 0; i < samples; i++) {
const av = a.readUInt16LE(i * 2);
const bv = b.readUInt16LE(i * 2);
const d = av - bv;
sse += d * d;
}
if (sse === 0) return Infinity;
const mse = sse / samples;
const MAX = 65535;
return 10 * Math.log10((MAX * MAX) / mse);
}

describe("shaderTransitionGpu", () => {
const originalForceFail = process.env.HF_DAWN_FORCE_FAIL;

beforeEach(() => {
// Each test below sets its own value; reset between tests so they don't
// bleed state. The module caches the loadWebgpu() promise, but each
// suite-level test runs in a fresh vitest worker file so the cache is
// only shared within a single `describe` — fine for these tests.
delete process.env.HF_DAWN_FORCE_FAIL;
});

afterEach(() => {
if (originalForceFail === undefined) {
delete process.env.HF_DAWN_FORCE_FAIL;
} else {
process.env.HF_DAWN_FORCE_FAIL = originalForceFail;
}
});

it("HF_DAWN_FORCE_FAIL short-circuits to a clean failure", async () => {
process.env.HF_DAWN_FORCE_FAIL = "1";
const result = await initGpuCompositor();
expect(result.ok).toBe(false);
if (!result.ok) {
expect(result.reason).toMatch(/HF_DAWN_FORCE_FAIL/);
}
});

it("returns ok:false (never throws) on hosts without a GPU adapter", async () => {
// No assertion on which branch we hit — we just assert the call never
// throws and returns a structured result. On Vance's Mac this will
// typically be `ok: true`; on the Linux sandbox it'll be
// `{ ok: false, reason: "no GPU adapter..." }` or the
// module-not-installed branch. Both are correct.
const result = await initGpuCompositor();
expect(typeof result).toBe("object");
if (result.ok) {
expect(typeof result.compositor.supportsShader).toBe("function");
expect(result.compositor.supportsShader("crossfade")).toBe(true);
expect(result.compositor.supportsShader("not-a-real-shader")).toBe(false);
await result.compositor.dispose();
} else {
expect(typeof result.reason).toBe("string");
expect(result.reason.length).toBeGreaterThan(0);
}
});

it("crossfade output matches CPU canonical within PSNR >= 50dB when a GPU is available", async () => {
const result = await initGpuCompositor();
if (!result.ok) {
// Skipped — host has no GPU. Log so a regression on Mac (where the
// adapter SHOULD be available) is visible in the test output.
// eslint-disable-next-line no-console
console.log(`[shaderTransitionGpu.test] GPU branch skipped: ${result.reason}`);
return;
}
const compositor = result.compositor;
try {
const from = fillGradient();
const to = fillSolid(40000, 5000, 25000);
const outGpu = Buffer.alloc(BYTES);
const outCpu = Buffer.alloc(BYTES);
await compositor.blend("crossfade", from, to, outGpu, WIDTH, HEIGHT, 0.5);
crossfade(from, to, outCpu, WIDTH, HEIGHT, 0.5);
const psnr = psnrDb(outGpu, outCpu);
// eslint-disable-next-line no-console
console.log(`[shaderTransitionGpu.test] crossfade PSNR vs CPU: ${psnr.toFixed(2)} dB`);
expect(psnr).toBeGreaterThanOrEqual(50);
} finally {
await compositor.dispose();
}
});
});
Loading
Loading