Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
39cbf70
feat(bench): add FrameLoop-based pan benchmark to grida-dev
softmarshmallow Mar 24, 2026
3453219
docs: update SKILL.md to include `frameloop` benchmark details
softmarshmallow Mar 24, 2026
cc847e1
perf(canvas): use FxHash for NodeId-keyed caches
softmarshmallow Mar 24, 2026
2e20f3b
Merge pull request #600 from gridaco/feature/great-wright
softmarshmallow Mar 24, 2026
c40122b
perf(cg): adaptive stable delay for smooth slow-pan
softmarshmallow Mar 24, 2026
fdb4e46
Merge remote-tracking branch 'origin/main' into canary
softmarshmallow Mar 24, 2026
ba3a1cb
Merge branch 'canary' of https://github.com/gridaco/grida into canary
softmarshmallow Mar 24, 2026
2e89cde
feat(scene): add scene_envelope method for bounding envelope retrieval
softmarshmallow Mar 24, 2026
8b34c73
clean logs
softmarshmallow Mar 24, 2026
00e31dc
refactor(renderer): enhance content caching and apply_changes logic
softmarshmallow Mar 24, 2026
cc9a6a1
feat(hittest): add intersects_topmost method for optimized hit testing
softmarshmallow Mar 24, 2026
4f2be30
refactor(image-loading): streamline raster image handling and URL ext…
softmarshmallow Mar 24, 2026
d875625
feat(native-application): add exiting flag to manage event processing…
softmarshmallow Mar 24, 2026
0639600
perf(layout): skip Taffy for non-flex containers and optimize hot pat…
softmarshmallow Mar 24, 2026
9054d36
clean
softmarshmallow Mar 24, 2026
255c874
feat(emscripten): add new external functions for timing and main loop…
softmarshmallow Mar 24, 2026
8aebabc
feat(perf): enhance WASM performance with DenseNodeMap and timing imp…
softmarshmallow Mar 24, 2026
2fac06e
refactor(embed): update RefigRenderConfig as now it is optimized agai…
softmarshmallow Mar 24, 2026
d02ab7c
fix, enable layout
softmarshmallow Mar 24, 2026
ef781e7
fix docs build
softmarshmallow Mar 24, 2026
8cba8c1
feat(io): add snapshot option to control JSON snapshot generation
softmarshmallow Mar 24, 2026
17d6f87
feat(geometry): implement GeoInput struct and optimize geometry extra…
softmarshmallow Mar 24, 2026
d903261
refactor(fig2grida): optimize document merging with shared buffers
softmarshmallow Mar 24, 2026
58ca791
feat(format): enhance FlatBuffer encoding with position encoding and …
softmarshmallow Mar 24, 2026
7b5de6b
refactor(format): optimize FlatBuffer encoding and transform handling
softmarshmallow Mar 24, 2026
9c525f0
refactor(fig2grida): streamline benchmark tests and update encoding s…
softmarshmallow Mar 24, 2026
2640c91
perf(cg): optimize WASM load_scene with property-split SoA and alloca…
softmarshmallow Mar 25, 2026
6783a31
fix(format): handle null id in NodeIdentifier creation and improve tr…
softmarshmallow Mar 25, 2026
b84ca70
perf logs
softmarshmallow Mar 25, 2026
930180d
chore
softmarshmallow Mar 25, 2026
9db0e99
wasm 0.91.0-canary.13
softmarshmallow Mar 25, 2026
e955d0c
refactor(grida-canvas): remove SmallVec dependency and optimize Paint…
softmarshmallow Mar 25, 2026
edf4ee0
feat(fig2grida): add support for Figma Deck (.deck) files
softmarshmallow Mar 25, 2026
8d08855
fix(grida-canvas): fix 5 bugs found during PR #601 review
softmarshmallow Mar 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 99 additions & 3 deletions .agents/skills/cg-perf/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,24 @@ reports `min/p50/p95/p99/MAX` plus per-stage breakdown and settle cost.
| `zoom` | slow/fast × around-fit/high | Zoom oscillation at different levels |
| `pan_with_settle` | slow/fast × fit/zoomed | Pan with settle frames interleaved every 12 frames |
| `realtime` | fast/slow × fit/zoomed | **Real-time event loop simulation** with sleep, 240Hz tick thread, and settle countdown matching the native viewer |
| `frameloop` | 16/50/80/120/200/300/500ms interval | **Real FrameLoop path** — the only bench that captures stable-frame jank during panning (see below) |
| `resize` | alternating viewport sizes | `--resize` flag. Measures `resize()` + `redraw()` cost per cycle (layout rebuild + cache invalidation + repaint) |

The `realtime` scenarios use actual `thread::sleep()` between frames
and simulate the native viewer's 240Hz tick thread + settle countdown.
These produce frame timings that match what users actually see,
including settle-induced frame drops at their natural frequency.

The `frameloop` scenarios go through the actual `FrameLoop.poll()` /
`complete()` path — the same code path as `Application::frame()`. All
other pan/zoom scenarios bypass `FrameLoop` and call `queue_unstable()`
directly, which means they never produce stable frames mid-interaction.
The `frameloop` scenarios sweep scroll intervals from 16ms (fast flick)
to 500ms (discrete clicks) and reveal how `FrameLoop`'s stable-frame
decisions affect the frame time distribution at each speed. Use these
when investigating panning jank, adaptive timing, or pan/zoom image
cache behavior.

**Choosing scenes:** Use `--list-scenes` to see what's available. Pick
scenes that stress the subsystem you're optimizing. For effects/caching
work, look for scenes with high promoted-node counts. For culling work,
Expand Down Expand Up @@ -174,6 +185,7 @@ of scenes, configs, and operations. The naming convention is
| Does a config toggle actually help? | Both GPU benchmarks + Criterion |
| Does it match what users see in the app? | `realtime` scenarios (sleep + settle simulation) |
| Are there frame drops during gestures? | Check `p99` and `MAX` in scenario stats |
| Is slow panning janky (stable frame spikes)? | `frameloop` scenarios (real FrameLoop path) |
| Is resize janky? | Single-scene GPU bench with `--resize` |

---
Expand Down Expand Up @@ -447,9 +459,19 @@ Back-to-back frame benchmarks (no sleep between frames) can produce
misleadingly fast numbers because they never trigger settle frames.
The native viewer's 240Hz tick thread fires `queue_stable()` ~50ms
after the last interaction, clearing image caches. Use the `realtime`
scenario type to simulate this timing and produce numbers that match
what users actually see. Always check `p99` and `MAX` — not just
`p50` — to catch settle-induced spikes.
or `frameloop` scenario types to produce numbers that match what users
actually see. Always check `p99` and `MAX` — not just `p50` — to
catch settle-induced spikes.

### Most benchmarks bypass FrameLoop

All pan/zoom/circle/zigzag scenarios call `queue_unstable()` directly
— they never go through `FrameLoop.poll()`. This means they never
produce stable frames mid-interaction and cannot capture the jank
pattern where a stable frame interrupts slow panning. Only the
`frameloop` scenarios use the real `FrameLoop` decision path. When
investigating panning smoothness or adaptive timing, always use the
`frameloop` scenarios.

### Stable frames must recapture caches

Expand All @@ -473,3 +495,77 @@ absolute-positioned documents.
thousands of cheap entries, the timing checks themselves can become
significant. Use `elapsed()` checks at reasonable intervals, not every
iteration.

### `Instant::now()` is broken on emscripten

Under emscripten, `Instant::now()` is effectively constant, so durations
collapse to zero. Use `crate::sys::perf_now()` for timing: it maps to
`emscripten_get_now()` (`performance.now()`) on WASM and `Instant` on native.

### WASM/native ratios are stage-dependent

WASM overhead is not a single multiplier. Roughly: simple compute is ~2-3x,
HashMap-heavy traversals can be 10-35x, and after Vec-indexing hot paths,
data-structure-bound stages drop to ~1-2x while compute-heavy stages stay
~5-15x+. Measure per stage.

### Data structures matter much more in WASM

Large `HashMap`s (100K+ entries) may be fine on native but can be extremely
slow in WASM due to linear memory and weaker cache behavior. Prefer dense
Vec-indexed storage (`DenseNodeMap<V>`) for hot paths. See `cache/fast_hash.rs`.

### Native profiles can mis-rank WASM bottlenecks

Native profiling finds stage costs, but not WASM amplification. Example:
native highlighted layers, while WASM was dominated by geometry because
per-node `HashMap` costs were amplified. Confirm priorities with WASM data.

---

## WASM Performance

WASM is the primary shipping target. Native benchmarks show the algorithmic
ceiling; WASM benchmarks show delivered performance.

See `docs/wg/feat-2d/wasm-benchmarking.md` for the full strategy and
lessons learned. Key points:

### Measurement inside WASM

`load_scene` emits per-stage timing via `eprintln!` + `sys::perf_now()`.
Read the `[load_scene]` line in browser console (stderr) for
fonts/layout/geometry/effects/layers. This is the primary `load_scene`
WASM measurement path today.

### Three-layer benchmarking model

1. **Native** (`load-bench`, Criterion): algorithmic ceiling + profiling
2. **WASM-on-Node**: real WASM in headless/CI — **implemented**
3. **Browser**: full pipeline (JS encode + WASM load + GPU render)

WASM-on-Node benchmark:

```sh
# Build WASM first
just --justfile crates/grida-canvas-wasm/justfile build

# Run benchmark (requires fixtures/local/perf/local/yrr-main.grida for 136k test)
cd crates/grida-canvas-wasm && npx vitest run __test__/bench-load-scene.test.ts
```

WASM-on-Node results closely match browser WASM timings, confirming it as
a valid benchmarking layer for compute-heavy stages.

### Known WASM-specific issues

- **GPU-only paths** can fail only on WASM (native runs CPU backend).
`blit_content_cache` and overlay-only fast path both had WASM-only bugs.
- **Large enum access** is the dominant WASM bottleneck. The `Node` enum
(15 variants, each hundreds of bytes) causes cache-unfriendly memory access
that WASM amplifies to 30×+ native cost. Fix: Struct-of-Arrays (SoA) —
see `docs/wg/feat-2d/wasm-load-scene-optimization.md`.
- **Deep recursion** (`build_recursive`, `flatten_node`) is costlier in WASM
due to stack-frame overhead in linear memory.
- **JS↔WASM boundary** is small for bulk calls (`switch_scene`), but JS-side
FlatBuffers encoding is still ~10% of pipeline cost.
18 changes: 15 additions & 3 deletions .ref/figma/fig2kiwi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,12 @@ import {

// --- Constants ---

// Kiwi archive preludes: the first 8 bytes of the file (see FigmaArchiveParser.parseArchive).
// Each variant uses a different fixed 8-byte ASCII magic string; FigJam's ends with a literal
// period — it is not a typo and must match bytes on disk.
const FIG_KIWI_PRELUDE = "fig-kiwi";
const FIGJAM_KIWI_PRELUDE = "fig-jam.";
const FIGDECK_KIWI_PRELUDE = "fig-deck";
const ZIP_SIGNATURE = [0x50, 0x4b, 0x03, 0x04];

// --- Archive Parser (duplicated from main source) ---
Expand Down Expand Up @@ -94,7 +98,11 @@ class FigmaArchiveParser {
const preludeData = parser.read(FIG_KIWI_PRELUDE.length);
const prelude = String.fromCharCode.apply(String, Array.from(preludeData));

if (prelude !== FIG_KIWI_PRELUDE && prelude !== FIGJAM_KIWI_PRELUDE) {
if (
prelude !== FIG_KIWI_PRELUDE &&
prelude !== FIGJAM_KIWI_PRELUDE &&
prelude !== FIGDECK_KIWI_PRELUDE
) {
throw new Error(`Unexpected prelude: "${prelude}"`);
}

Expand Down Expand Up @@ -138,8 +146,12 @@ function readFigFile(data: Uint8Array) {
String,
Array.from(fileData.slice(0, 8))
);
return prelude === FIG_KIWI_PRELUDE || prelude === FIGJAM_KIWI_PRELUDE;
}) || keys.find((k) => k.endsWith(".fig"));
return (
prelude === FIG_KIWI_PRELUDE ||
prelude === FIGJAM_KIWI_PRELUDE ||
prelude === FIGDECK_KIWI_PRELUDE
);
}) || keys.find((k) => k.endsWith(".fig") || k.endsWith(".deck"));

if (!mainFile) {
throw new Error(
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

151 changes: 151 additions & 0 deletions crates/grida-canvas-wasm/lib/__test__/bench-load-scene.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// @vitest-environment node
//
// WASM-on-Node benchmark for load_scene pipeline.
//
// Measures real WASM execution of the scene loading stages:
// 1. loadSceneGrida — FBS decode + SceneGraph construction
// 2. switchScene — layout + geometry + effects + layers
//
// When `perf` feature is enabled on the cg crate, the Rust side emits
// per-stage timing via eprintln! ([load_scene] line).
// This test measures JS-side wall time for comparison.
//
// Usage:
// pnpm test bench-load-scene
// pnpm vitest run bench-load-scene --reporter=verbose
//
// To benchmark a .grida file, place it in:
// lib/__test__/fixtures/local/
// All .grida files in that directory will be auto-discovered.

import { readFileSync, existsSync, readdirSync } from "node:fs";
import { resolve } from "node:path";
import { beforeAll, describe, expect, it } from "vitest";
import { Scene } from "../modules/canvas";

/** Directory for local (gitignored) benchmark fixtures. */
const LOCAL_FIXTURES_DIR = resolve(__dirname, "fixtures/local");

let module: any;

beforeAll(async () => {
const pkg = require("../../dist/index.js") as {
default: (opts?: unknown) => Promise<any>;
};
const factory = await pkg.default();
module = factory.module;
}, 30_000);

function createRasterScene(width = 1000, height = 1000): Scene {
const appptr = module._init_with_backend(
1, // BACKEND_ID.Raster
width,
height,
1, // useEmbeddedFonts = true
0 // configFlags
);
return new Scene(module, appptr);
}

/**
* Discover .grida files from the local fixtures directory.
*/
function discoverGridaFixtures(): { name: string; path: string }[] {
if (!existsSync(LOCAL_FIXTURES_DIR)) {
return [];
}
return readdirSync(LOCAL_FIXTURES_DIR)
.filter((f) => f.endsWith(".grida"))
.sort()
.map((f) => ({ name: f, path: resolve(LOCAL_FIXTURES_DIR, f) }));
}

describe("bench: load_scene (WASM-on-Node)", () => {
it("grida1 JSON (rectangle)", async () => {
const scene = createRasterScene();
const doc = readFileSync(
resolve(process.cwd(), "example/rectangle.grida1"),
"utf8"
);

const t0 = performance.now();
scene.loadScene(doc);
const elapsed = performance.now() - t0;

console.log(`[wasm-bench] rectangle.grida1: ${elapsed.toFixed(0)}ms`);
expect(elapsed).toBeLessThan(5_000);
scene.dispose();
});

it("synthetic 100x100 grid (10k nodes)", async () => {
const scene = createRasterScene();

const t0 = performance.now();
scene.loadBenchmarkScene(100, 100);
const elapsed = performance.now() - t0;

console.log(
`[wasm-bench] synthetic 100x100: ${elapsed.toFixed(0)}ms (10k nodes)`
);
expect(elapsed).toBeLessThan(30_000);
scene.dispose();
}, 60_000);

it("synthetic 200x200 grid (40k nodes)", async () => {
const scene = createRasterScene();

const t0 = performance.now();
scene.loadBenchmarkScene(200, 200);
const elapsed = performance.now() - t0;

console.log(
`[wasm-bench] synthetic 200x200: ${elapsed.toFixed(0)}ms (40k nodes)`
);
expect(elapsed).toBeLessThan(60_000);
scene.dispose();
}, 120_000);

// Auto-discovered .grida fixtures from fixtures/local/
const fixtures = discoverGridaFixtures();

for (const fx of fixtures) {
it(`grida binary: ${fx.name}`, async () => {
const data = new Uint8Array(readFileSync(fx.path));
const scene = createRasterScene();

// Phase 1: FBS decode
const t0 = performance.now();
scene.loadSceneGrida(data);
const tLoad = performance.now();

// Phase 2: switch to the first scene
const sceneIds = scene.loadedSceneIds();
expect(sceneIds.length).toBeGreaterThan(0);
const firstSceneId = sceneIds[0];

scene.switchScene(firstSceneId);
const tSwitch = performance.now();

const loadMs = tLoad - t0;
const switchMs = tSwitch - tLoad;
const totalMs = tSwitch - t0;

console.log(
`[wasm-bench] ${fx.name} (scene=${firstSceneId}): ` +
`load=${loadMs.toFixed(0)}ms switch=${switchMs.toFixed(0)}ms total=${totalMs.toFixed(0)}ms`
);

expect(totalMs).toBeLessThan(120_000);
scene.dispose();
}, 120_000);
}

if (fixtures.length === 0) {
it("no .grida fixtures found (skipped)", () => {
console.log(
"[wasm-bench] No .grida fixtures in lib/__test__/fixtures/local/. " +
"Place .grida files there to benchmark real scenes."
);
});
}
});
2 changes: 1 addition & 1 deletion crates/grida-canvas-wasm/lib/bin/grida-canvas-wasm.js

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions crates/grida-canvas-wasm/lib/bin/grida_canvas_wasm.wasm
Git LFS file not shown
1 change: 1 addition & 0 deletions crates/grida-canvas-wasm/lib/modules/canvas-bindings.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ declare namespace canvas {
ptr: number,
len: number
): void;
_loaded_scene_ids(state: GridaCanvasApplicationPtr): Ptr;
_drain_missing_images(state: GridaCanvasApplicationPtr): Ptr;
_resolve_image(
state: GridaCanvasApplicationPtr,
Expand Down
13 changes: 13 additions & 0 deletions crates/grida-canvas-wasm/lib/modules/canvas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,19 @@ export class Scene {
this._free_string(ptr, len);
}

/**
* Return the IDs of all scenes decoded by the last `loadSceneGrida` call.
*/
loadedSceneIds(): string[] {
this._assertAlive();
const outptr = this.module._loaded_scene_ids(this.appptr);
if (outptr === 0) {
return [];
}
const str = ffi.readLenPrefixedString(this.module, outptr);
return JSON.parse(str) as string[];
}

/**
* Returns image refs that were needed during the last render but not found.
* Only returns refs not yet reported in a previous call.
Expand Down
2 changes: 2 additions & 0 deletions crates/grida-canvas-wasm/lib/modules/ffi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ export namespace ffi {
): [ptr: number, len: number] {
const len = bytes.length;
const ptr = module._allocate(len);
// Re-read HEAPU8 after _allocate — if WASM memory grew during
// allocation, the old Uint8Array view is detached.
module.HEAPU8.set(bytes, ptr);
return [ptr, len];
}
Expand Down
4 changes: 2 additions & 2 deletions crates/grida-canvas-wasm/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@grida/canvas-wasm",
"version": "0.91.0-canary.12",
"version": "0.91.0-canary.13",
"private": false,
"description": "WASM bindings for Grida Canvas",
"keywords": [
Expand All @@ -23,7 +23,7 @@
"build": "tsup",
"dev": "tsup --watch",
"prepack": "just build",
"prepublishOnly": "[ $(du -sk lib 2>/dev/null | cut -f1) -lt 15360 ]",
"prepublishOnly": "[ $(du -sk dist 2>/dev/null | cut -f1) -lt 15360 ]",
"serve": "serve -p 4020",
"test": "vitest run",
"typecheck": "tsc --noEmit"
Expand Down
Loading
Loading