From f3e29b993f07fed343e6ae0707add2aba1244558 Mon Sep 17 00:00:00 2001 From: Universe Date: Mon, 6 Apr 2026 21:25:44 +0900 Subject: [PATCH 1/3] refactor(cg-perf): update scenario names and descriptions for clarity - Renamed scenarios to remove "BUG" prefix and reflect their purpose as no-cache baselines. - Updated comments in the code to clarify the intent of passing `stable=true` for full draw measurements. - Adjusted the Markdown documentation to align with the new scenario naming conventions. --- .agents/skills/cg-perf/SKILL.md | 26 +++++++++++++------------- crates/grida-dev/src/bench/runner.rs | 12 ++++++------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.agents/skills/cg-perf/SKILL.md b/.agents/skills/cg-perf/SKILL.md index bfca27c447..e8f79abe6f 100644 --- a/.agents/skills/cg-perf/SKILL.md +++ b/.agents/skills/cg-perf/SKILL.md @@ -123,19 +123,19 @@ reports `min/p50/p95/p99/MAX` plus per-stage breakdown and settle cost. **Scenario types in the expanded matrix:** -| Kind | Scenarios | What it tests | -| ----------------- | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | -| `pan` | slow/fast × fit/zoomed | Linear back-and-forth panning | -| `circle_pan` | small/large radius × fit/zoomed | Circular trackpad gesture (unpredictable edges) | -| `zigzag` | fast (continuous) / slow (with pauses) × fit/zoomed | Diagonal reading pattern with direction changes | -| `zoom` | slow/fast × around-fit/high | Zoom oscillation at different levels | -| `pan_with_settle` | slow/fast × fit/zoomed | Pan with settle frames interleaved every 12 frames | -| `zoom_with_settle`| slow/fast × fit/high | Zoom with settle frames interleaved every 12 frames — captures cache-cold spike after settle nukes zoom cache | -| `zoom_forced_stable` | slow/fast × fit/high (BUG prefix) | Forces `stable=true` on every zoom frame — reproduces the `redraw()` bug for A/B comparison | -| `realtime` | fast/slow × fit/zoomed | **Real-time event loop simulation** with sleep, 240Hz tick thread, and settle countdown matching the native viewer | -| `frameloop` | 16/50/80/120/200/300/500ms interval | **Real FrameLoop path** — the only bench that captures stable-frame jank during panning (see below) | -| `frameloop_zoom` | 16/50/80/120/200/500ms interval | **Real FrameLoop path for zoom** — captures stable-frame intrusion during zoom gestures | -| `resize` | alternating viewport sizes | `--resize` flag. Measures `resize()` + `redraw()` cost per cycle (layout rebuild + cache invalidation + repaint) | +| Kind | Scenarios | What it tests | +| ----------------------- | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | +| `pan` | slow/fast × fit/zoomed | Linear back-and-forth panning | +| `circle_pan` | small/large radius × fit/zoomed | Circular trackpad gesture (unpredictable edges) | +| `zigzag` | fast (continuous) / slow (with pauses) × fit/zoomed | Diagonal reading pattern with direction changes | +| `zoom` | slow/fast × around-fit/high | Zoom oscillation at different levels | +| `pan_with_settle` | slow/fast × fit/zoomed | Pan with settle frames interleaved every 12 frames | +| `zoom_with_settle` | slow/fast × fit/high | Zoom with settle frames interleaved every 12 frames — captures cache-cold spike after settle nukes zoom cache | +| `baseline_nocache_zoom` | slow/fast × fit/high | Forces `stable=true` on every zoom frame — no-cache baseline measuring raw full-draw cost for A/B comparison | +| `realtime` | fast/slow × fit/zoomed | **Real-time event loop simulation** with sleep, 240Hz tick thread, and settle countdown matching the native viewer | +| `frameloop` | 16/50/80/120/200/300/500ms interval | **Real FrameLoop path** — the only bench that captures stable-frame jank during panning (see below) | +| `frameloop_zoom` | 16/50/80/120/200/500ms interval | **Real FrameLoop path for zoom** — captures stable-frame intrusion during zoom gestures | +| `resize` | alternating viewport sizes | `--resize` flag. Measures `resize()` + `redraw()` cost per cycle (layout rebuild + cache invalidation + repaint) | **SurfaceUI overlay measurement (`--overlay`):** diff --git a/crates/grida-dev/src/bench/runner.rs b/crates/grida-dev/src/bench/runner.rs index a5d7922ba0..7a4fb89053 100644 --- a/crates/grida-dev/src/bench/runner.rs +++ b/crates/grida-dev/src/bench/runner.rs @@ -1287,7 +1287,7 @@ fn run_zoom_pass_forced_stable( z = next_z; } renderer.camera.set_zoom(z); - // BUG REPRODUCTION: always pass stable=true, same as redraw() does + // No-cache baseline: always pass stable=true (forces full draw every frame) if let Some((total, q, d, mf, c, f)) = measure_frame(renderer, true, overlay.as_mut()) { frame_times.push(total); queue_us_acc.push(q); @@ -1757,25 +1757,25 @@ fn run_scenarios( let forced_stable_scenarios = vec![ ForcedStableZoomScenario { - name: "BUG_zoom_stable_slow_fit", + name: "baseline_nocache_zoom_slow_fit", step: 0.005, z_min: fs_lo, z_max: fs_hi, }, ForcedStableZoomScenario { - name: "BUG_zoom_stable_fast_fit", + name: "baseline_nocache_zoom_fast_fit", step: 0.05, z_min: fs_lo, z_max: fs_hi, }, ForcedStableZoomScenario { - name: "BUG_zoom_stable_slow_high", + name: "baseline_nocache_zoom_slow_high", step: 0.01, z_min: fs_zoomed_in * 0.5, z_max: fs_zoomed_in, }, ForcedStableZoomScenario { - name: "BUG_zoom_stable_fast_high", + name: "baseline_nocache_zoom_fast_high", step: 0.1, z_min: fs_zoomed_in * 0.5, z_max: fs_zoomed_in, @@ -1791,7 +1791,7 @@ fn run_scenarios( run_zoom_pass_forced_stable(renderer, frames, fss.step, fss.z_min, fss.z_max, ov()); results.push(ScenarioResult { name: fss.name.to_string(), - kind: "zoom_forced_stable".to_string(), + kind: "baseline_nocache_zoom".to_string(), params: ScenarioParams { speed: Some(fss.step), zoom: None, From 4f00d227ca3b9d5fc36ab9e9db85368c5fd691dd Mon Sep 17 00:00:00 2001 From: Universe Date: Tue, 7 Apr 2026 04:15:37 +0900 Subject: [PATCH 2/3] perf(canvas): add AA toggle, GPU sync for benchmarks, and AA cost findings - Add `force_no_aa` field to `RenderPolicy` with `anti_alias()` method, allowing A/B measurement of anti-aliasing cost at different zoom levels. Paint functions (`sk_solid_paint`, `sk_paint_stack`, `sk_paint_stack_without_images`) now accept an `aa: bool` parameter instead of hardcoding `true`. - Add `sync_gpu` config on `RuntimeRendererConfig`. When enabled, `gpu_flush()` calls `flush_submit_and_sync_cpu()` instead of the async `flush_and_submit()`, making per-stage timing in `FrameFlushStats` reflect actual GPU execution cost. Benchmarks (`bench` and `bench-report`) enable this by default. - Add `--no-aa` flag to the bench CLI for A/B AA cost measurement. - Add `skia_bench_subpixel` isolated GPU benchmark measuring AA cost at sub-pixel scale (0.02x zoom). Key finding: AA on sub-pixel geometry is 3.2x more expensive than AA off. - Add `docs/wg/feat-2d/aa-cost-findings.md` documenting the investigation results, including the GPU flush timing discovery. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/grida-canvas/Cargo.toml | 5 + .../grida-canvas/examples/golden_sk_paints.rs | 2 +- .../skia_bench/skia_bench_subpixel.rs | 157 ++++++++++++++++++ crates/grida-canvas/src/cache/paragraph.rs | 2 +- crates/grida-canvas/src/painter/paint.rs | 14 +- crates/grida-canvas/src/painter/painter.rs | 19 ++- .../grida-canvas/src/painter/text_stroke.rs | 4 +- crates/grida-canvas/src/runtime/config.rs | 7 + .../grida-canvas/src/runtime/render_policy.rs | 12 ++ crates/grida-canvas/src/runtime/scene.rs | 28 +++- .../src/text/attributed_paragraph.rs | 4 +- .../src/vectornetwork/vn_painter.rs | 4 +- crates/grida-dev/src/bench/args.rs | 4 + crates/grida-dev/src/bench/runner.rs | 15 +- docs/wg/feat-2d/aa-cost-findings.md | 116 +++++++++++++ 15 files changed, 363 insertions(+), 30 deletions(-) create mode 100644 crates/grida-canvas/examples/skia_bench/skia_bench_subpixel.rs create mode 100644 docs/wg/feat-2d/aa-cost-findings.md diff --git a/crates/grida-canvas/Cargo.toml b/crates/grida-canvas/Cargo.toml index 40d5e1e313..c3c5ce4478 100644 --- a/crates/grida-canvas/Cargo.toml +++ b/crates/grida-canvas/Cargo.toml @@ -112,6 +112,11 @@ required-features = ["native-gl-context"] # Raw Skia measurements, no engine involvement. # Run: cargo run -p cg --example skia_bench_ --features native-gl-context --release +[[example]] +name = "skia_bench_subpixel" +path = "examples/skia_bench/skia_bench_subpixel.rs" +required-features = ["native-gl-context"] + [[example]] name = "skia_bench_primitives" path = "examples/skia_bench/skia_bench_primitives.rs" diff --git a/crates/grida-canvas/examples/golden_sk_paints.rs b/crates/grida-canvas/examples/golden_sk_paints.rs index 46f4a1e90d..ac46157429 100644 --- a/crates/grida-canvas/examples/golden_sk_paints.rs +++ b/crates/grida-canvas/examples/golden_sk_paints.rs @@ -371,7 +371,7 @@ fn draw_stacked( // Paint order semantics: // - `fills` is bottom → top. We pass as-is to the stacker, which composes // each subsequent paint on top of the accumulated background. - if let Some(paint) = paint::sk_paint_stack(fills, size_tuple, images) { + if let Some(paint) = paint::sk_paint_stack(fills, size_tuple, images, true) { canvas.draw_path(&path, &paint); } diff --git a/crates/grida-canvas/examples/skia_bench/skia_bench_subpixel.rs b/crates/grida-canvas/examples/skia_bench/skia_bench_subpixel.rs new file mode 100644 index 0000000000..4fb80f813d --- /dev/null +++ b/crates/grida-canvas/examples/skia_bench/skia_bench_subpixel.rs @@ -0,0 +1,157 @@ +//! Skia GPU Sub-Pixel Rendering Cost Benchmark +//! +//! Measures the actual cost of drawing sub-pixel geometry at low zoom. +//! Compares: +//! A) Drawing N rects at full size (4x4 px each) +//! B) Drawing N rects at 0.02x zoom (0.08x0.08 px each — sub-pixel) +//! C) Skipping N rects entirely (baseline dispatch cost = 0) +//! D) Drawing N rects at full size, AA off +//! E) Drawing N rects at 0.02x zoom, AA off +//! +//! All use pre-recorded SkPictures (matching the real engine path). +//! GPU is synced after each frame for accurate timing. +//! +//! ```bash +//! cargo run -p cg --example skia_bench_subpixel --features native-gl-context --release +//! ``` + +#[cfg(not(feature = "native-gl-context"))] +fn main() { + eprintln!("This example requires --features native-gl-context"); +} + +#[cfg(feature = "native-gl-context")] +fn main() { + use cg::window::headless::HeadlessGpu; + use skia_safe::Color; + use std::time::Instant; + + let mut gpu = HeadlessGpu::new(1000, 1000).expect("GPU init"); + gpu.print_gl_info(); + println!(); + + let surface = &mut gpu.surface; + let n_iter: u32 = 300; + + for &count in &[1_000, 5_000, 10_000, 40_000] { + let rect_size = 4.0_f32; + let cols = 500usize; // spread across a large world + + let pics_aa = record_rect_pictures(count, cols, rect_size, true); + let pics_noaa = record_rect_pictures(count, cols, rect_size, false); + + flush_gpu(surface); + + // A) Full size, AA on + let avg_full_aa = bench_pictures(surface, n_iter, &pics_aa, 1.0); + + // B) 0.02x zoom, AA on + let avg_zoom_aa = bench_pictures(surface, n_iter, &pics_aa, 0.02); + + // C) Skip (draw nothing, just clear + flush) + let avg_skip = { + flush_gpu(surface); + let start = Instant::now(); + for _ in 0..n_iter { + let canvas = surface.canvas(); + canvas.clear(Color::WHITE); + flush_gpu(surface); + } + start.elapsed() / n_iter + }; + + // D) Full size, AA off + let avg_full_noaa = bench_pictures(surface, n_iter, &pics_noaa, 1.0); + + // E) 0.02x zoom, AA off + let avg_zoom_noaa = bench_pictures(surface, n_iter, &pics_noaa, 0.02); + + println!("x{:<6}", count); + println!( + " full AA on: {:>7} us | AA off: {:>7} us", + avg_full_aa.as_micros(), + avg_full_noaa.as_micros(), + ); + println!( + " 0.02x AA on: {:>7} us | AA off: {:>7} us", + avg_zoom_aa.as_micros(), + avg_zoom_noaa.as_micros(), + ); + println!(" skip (0 draws): {:>5} us", avg_skip.as_micros(),); + let zoom_vs_skip = avg_zoom_aa.as_micros() as f64 - avg_skip.as_micros() as f64; + println!( + " per-node cost at 0.02x: {:.2} us (full: {:.2} us)", + zoom_vs_skip / count as f64, + (avg_full_aa.as_micros() as f64 - avg_skip.as_micros() as f64) / count as f64, + ); + println!(); + } +} + +#[cfg(feature = "native-gl-context")] +fn record_rect_pictures( + count: usize, + cols: usize, + rect_size: f32, + aa: bool, +) -> Vec { + use skia_safe::{Color, Paint, PictureRecorder, Rect}; + (0..count) + .map(|i| { + let x = (i % cols) as f32 * rect_size; + let y = (i / cols) as f32 * rect_size; + let bounds = Rect::from_xywh(x, y, rect_size, rect_size); + let mut recorder = PictureRecorder::new(); + let canvas = recorder.begin_recording(bounds, false); + let mut paint = Paint::default(); + paint.set_anti_alias(aa); + paint.set_color(Color::from_argb( + 255, + (i * 7 % 256) as u8, + (i * 13 % 256) as u8, + 100, + )); + canvas.draw_rect(bounds, &paint); + recorder.finish_recording_as_picture(Some(&bounds)).unwrap() + }) + .collect() +} + +#[cfg(feature = "native-gl-context")] +fn bench_pictures( + surface: &mut skia_safe::Surface, + n_iter: u32, + pics: &[skia_safe::Picture], + zoom: f32, +) -> std::time::Duration { + use skia_safe::Color; + use std::time::Instant; + + flush_gpu(surface); + let start = Instant::now(); + for _ in 0..n_iter { + let canvas = surface.canvas(); + canvas.clear(Color::WHITE); + if zoom != 1.0 { + canvas.save(); + canvas.scale((zoom, zoom)); + } + for pic in pics { + canvas.draw_picture(pic, None, None); + } + if zoom != 1.0 { + canvas.restore(); + } + flush_gpu(surface); + } + start.elapsed() / n_iter +} + +#[cfg(feature = "native-gl-context")] +fn flush_gpu(surface: &mut skia_safe::Surface) { + if let Some(mut ctx) = surface.recording_context() { + if let Some(mut direct) = ctx.as_direct_context() { + direct.flush_submit_and_sync_cpu(); + } + } +} diff --git a/crates/grida-canvas/src/cache/paragraph.rs b/crates/grida-canvas/src/cache/paragraph.rs index cbccfdb988..46b6cac19d 100644 --- a/crates/grida-canvas/src/cache/paragraph.rs +++ b/crates/grida-canvas/src/cache/paragraph.rs @@ -476,7 +476,7 @@ impl ParagraphCache { // Build the paragraph with paint applied (for rendering) let fill_paint = if !fills.is_empty() { // Use sk_paint_stack for all paint types (solid, gradient, image, multiple fills) - paint::sk_paint_stack(fills, layout_size, images) + paint::sk_paint_stack(fills, layout_size, images, true) } else { None }; diff --git a/crates/grida-canvas/src/painter/paint.rs b/crates/grida-canvas/src/painter/paint.rs index 0c469f3f64..e037ac4f25 100644 --- a/crates/grida-canvas/src/painter/paint.rs +++ b/crates/grida-canvas/src/painter/paint.rs @@ -2,10 +2,10 @@ use super::{gradient, image}; use crate::{cg::prelude::*, runtime::image_repository::ImageRepository}; use skia_safe::{self, shaders, Color, Shader}; -pub fn sk_solid_paint(paint: impl Into) -> skia_safe::Paint { +pub fn sk_solid_paint(paint: impl Into, aa: bool) -> skia_safe::Paint { let p: SolidPaint = paint.into(); let mut skia_paint = skia_safe::Paint::default(); - skia_paint.set_anti_alias(true); + skia_paint.set_anti_alias(aa); let CGColor { r, g, b, a } = p.color; let final_alpha = (a as f32 * p.opacity()) as u8; skia_paint.set_color(skia_safe::Color::from_argb(final_alpha, r, g, b)); @@ -28,6 +28,7 @@ pub fn sk_paint_stack( paints: &[Paint], size: (f32, f32), images: &ImageRepository, + aa: bool, ) -> Option { // Fast path: single solid fill — set color directly on the paint, // avoiding shader object allocation and giving Skia's GPU backend @@ -37,7 +38,7 @@ pub fn sk_paint_stack( let CGColor { r, g, b, a } = solid.color; let final_alpha = (a as f32 * solid.opacity()).round() as u8; let mut paint = skia_safe::Paint::default(); - paint.set_anti_alias(true); + paint.set_anti_alias(aa); paint.set_color(Color::from_argb(final_alpha, r, g, b)); paint.set_blend_mode(solid.blend_mode.into()); return Some(paint); @@ -63,7 +64,7 @@ pub fn sk_paint_stack( } } let mut paint = skia_safe::Paint::default(); - paint.set_anti_alias(true); + paint.set_anti_alias(aa); paint.set_shader(shader); // Apply the base paint's blend mode at the paint level so the first // fill can blend with the canvas/background, matching editor semantics. @@ -86,6 +87,7 @@ pub fn sk_paint_stack( pub fn sk_paint_stack_without_images( paints: &[Paint], size: (f32, f32), + aa: bool, ) -> Option { // Fast path: single solid fill — direct color, no shader allocation. if paints.len() == 1 { @@ -93,7 +95,7 @@ pub fn sk_paint_stack_without_images( let CGColor { r, g, b, a } = solid.color; let final_alpha = (a as f32 * solid.opacity()).round() as u8; let mut paint = skia_safe::Paint::default(); - paint.set_anti_alias(true); + paint.set_anti_alias(aa); paint.set_color(Color::from_argb(final_alpha, r, g, b)); paint.set_blend_mode(solid.blend_mode.into()); return Some(paint); @@ -112,7 +114,7 @@ pub fn sk_paint_stack_without_images( } } let mut paint = skia_safe::Paint::default(); - paint.set_anti_alias(true); + paint.set_anti_alias(aa); paint.set_shader(shader); // Apply the base paint's blend mode at the paint level so the first // fill can blend with the canvas/background, matching editor semantics. diff --git a/crates/grida-canvas/src/painter/painter.rs b/crates/grida-canvas/src/painter/painter.rs index b0a92ac83e..392263cdfc 100644 --- a/crates/grida-canvas/src/painter/painter.rs +++ b/crates/grida-canvas/src/painter/painter.rs @@ -347,7 +347,7 @@ impl<'a> Painter<'a> { draw_content(); let mut p = SkPaint::default(); - p.set_anti_alias(true); + p.set_anti_alias(self.policy.anti_alias()); p.set_blend_mode(skia_safe::BlendMode::DstIn); p.set_shader(mask_shader); self.canvas.draw_rect(bounds, &p); @@ -730,7 +730,7 @@ impl<'a> Painter<'a> { let mut paint = SkPaint::default(); paint.set_image_filter(shadow::drop_shadow_image_filter(shadow)); - paint.set_anti_alias(true); + paint.set_anti_alias(self.policy.anti_alias()); self.canvas .save_layer(&SaveLayerRec::default().bounds(&bounds).paint(&paint)); self.canvas.translate((0.0, y_offset)); @@ -761,7 +761,7 @@ impl<'a> Painter<'a> { let mut paint = SkPaint::default(); paint.set_image_filter(shadow::inner_shadow_image_filter(shadow)); - paint.set_anti_alias(true); + paint.set_anti_alias(self.policy.anti_alias()); self.canvas .save_layer(&SaveLayerRec::default().bounds(&bounds).paint(&paint)); self.canvas.translate((0.0, y_offset)); @@ -984,6 +984,7 @@ impl<'a> Painter<'a> { fills, (shape.rect.width(), shape.rect.height()), self.images, + self.policy.anti_alias(), ) { shape.draw_on_canvas(self.canvas, &paint); } @@ -1003,6 +1004,7 @@ impl<'a> Painter<'a> { fills, (shape.rect.width(), shape.rect.height()), self.images, + self.policy.anti_alias(), ) { self.draw_shape_at_offset(shape, &paint, tx, ty); } @@ -1025,6 +1027,7 @@ impl<'a> Painter<'a> { fills, (shape.rect.width(), shape.rect.height()), self.images, + self.policy.anti_alias(), ) { paint.set_alpha_f(paint.alpha_f() * opacity); self.draw_shape_at_offset(shape, &paint, tx, ty); @@ -1093,6 +1096,7 @@ impl<'a> Painter<'a> { fills, (shape.rect.width(), shape.rect.height()), self.images, + self.policy.anti_alias(), ) { paint.set_alpha_f(paint.alpha_f() * opacity); shape.draw_on_canvas(self.canvas, &paint); @@ -1119,6 +1123,7 @@ impl<'a> Painter<'a> { fills, (shape.rect.width(), shape.rect.height()), self.images, + self.policy.anti_alias(), ) { paint.set_alpha_f(paint.alpha_f() * opacity); self.canvas.draw_path(path, &paint); @@ -1164,6 +1169,7 @@ impl<'a> Painter<'a> { strokes, (shape.rect.width(), shape.rect.height()), self.images, + self.policy.anti_alias(), ) { self.canvas.draw_path(stroke_path, &paint); } @@ -1185,6 +1191,7 @@ impl<'a> Painter<'a> { strokes, (shape.rect.width(), shape.rect.height()), self.images, + self.policy.anti_alias(), ) { paint.set_alpha_f(paint.alpha_f() * opacity); self.canvas.draw_path(stroke_path, &paint); @@ -1207,6 +1214,7 @@ impl<'a> Painter<'a> { strokes, (shape.rect.width(), shape.rect.height()), self.images, + self.policy.anti_alias(), ) { crate::shape::marker::draw_endpoint_decorations( self.canvas, @@ -2083,6 +2091,7 @@ impl<'a> Painter<'a> { &vector_layer.strokes, (shape.rect.width(), shape.rect.height()), self.images, + self.policy.anti_alias(), ) { crate::shape::marker::draw_endpoint_decorations( self.canvas, @@ -2275,7 +2284,7 @@ impl<'a> Painter<'a> { paint.set_color(color); paint.set_style(skia_safe::paint::Style::Stroke); paint.set_stroke_width(style.width); - paint.set_anti_alias(true); + paint.set_anti_alias(self.policy.anti_alias()); paint } @@ -2650,7 +2659,7 @@ impl<'a> Painter<'a> { let inner_filter = shadow::inner_shadow_image_filter(is); let mut shadow_paint = SkPaint::default(); shadow_paint.set_image_filter(inner_filter); - shadow_paint.set_anti_alias(true); + shadow_paint.set_anti_alias(self.policy.anti_alias()); canvas.save(); canvas.clip_rect(bounds, None, true); canvas.draw_rect(bounds, &shadow_paint); diff --git a/crates/grida-canvas/src/painter/text_stroke.rs b/crates/grida-canvas/src/painter/text_stroke.rs index 4c96f68198..ba3e2a9538 100644 --- a/crates/grida-canvas/src/painter/text_stroke.rs +++ b/crates/grida-canvas/src/painter/text_stroke.rs @@ -60,7 +60,7 @@ pub fn draw_text_stroke( // Prepare paint for filling the stroke geometry. let bounds = stroke_path.compute_tight_bounds(); let size = (bounds.width(), bounds.height()); - let Some(mut sk_paint) = paint::sk_paint_stack(strokes, size, images) else { + let Some(mut sk_paint) = paint::sk_paint_stack(strokes, size, images, true) else { return; }; sk_paint.set_style(PaintStyle::Fill); @@ -123,7 +123,7 @@ pub fn draw_text_stroke_outside_fast_pre( // Prepare a stroke paint. We double the stroke width so that when the // paragraph is painted afterwards, it covers the inner half leaving only // the "outside" portion visible. - let Some(mut sk_paint) = paint::sk_paint_stack(strokes, layout_size, images) else { + let Some(mut sk_paint) = paint::sk_paint_stack(strokes, layout_size, images, true) else { return; }; sk_paint.set_style(PaintStyle::Stroke); diff --git a/crates/grida-canvas/src/runtime/config.rs b/crates/grida-canvas/src/runtime/config.rs index 18eecabbaa..5cbf093390 100644 --- a/crates/grida-canvas/src/runtime/config.rs +++ b/crates/grida-canvas/src/runtime/config.rs @@ -48,6 +48,12 @@ pub struct RuntimeRendererConfig { /// Use this for documents where all positioning is absolute (e.g. SVG). Eliminates the layout phase entirely, /// which is the dominant cost in `load_scene` for large documents. pub skip_layout: bool, + /// When true, GPU flush calls block until the GPU finishes all + /// submitted work. Makes per-stage timing in `FrameFlushStats` + /// reflect actual GPU cost instead of command submission time. + /// + /// **Only enable in benchmarks.** Stalls the CPU/GPU pipeline. + pub sync_gpu: bool, } impl Default for RuntimeRendererConfig { @@ -60,6 +66,7 @@ impl Default for RuntimeRendererConfig { pixel_preview_strategy: PixelPreviewStrategy::Stable, render_policy: Default::default(), skip_layout: false, + sync_gpu: false, } } } diff --git a/crates/grida-canvas/src/runtime/render_policy.rs b/crates/grida-canvas/src/runtime/render_policy.rs index 911475c694..1616c637b1 100644 --- a/crates/grida-canvas/src/runtime/render_policy.rs +++ b/crates/grida-canvas/src/runtime/render_policy.rs @@ -102,6 +102,9 @@ pub struct RenderPolicy { /// Quality level for expensive GPU effects (blur, shadow, noise). /// `Full` for stable frames, `Reduced` for interactive frames. pub effect_quality: EffectQuality, + /// When true, all paint operations use `set_anti_alias(false)`. + /// For benchmarking AA cost at different zoom levels. + pub force_no_aa: bool, } impl RenderPolicy { @@ -115,6 +118,7 @@ impl RenderPolicy { compositing: CompositingPolicy::Enabled, ignore_clips_content: false, effect_quality: EffectQuality::Full, + force_no_aa: false, }; /// Convenience preset used by the editor feature \"Show outlines\". @@ -128,8 +132,14 @@ impl RenderPolicy { // Wireframe is primarily used for inspection; by default, ignore clips. ignore_clips_content: true, effect_quality: EffectQuality::Full, + force_no_aa: false, }; + #[inline] + pub fn anti_alias(&self) -> bool { + !self.force_no_aa + } + /// Return a copy of this policy with reduced effect quality. /// Used for unstable (interactive) frames. #[inline] @@ -159,6 +169,7 @@ impl RenderPolicy { } ) && self.compositing == CompositingPolicy::Enabled && !self.ignore_clips_content + && !self.force_no_aa } /// True only for the default renderer behavior (full fills/strokes + effects + compositing). @@ -331,6 +342,7 @@ impl RenderPolicy { compositing, ignore_clips_content, effect_quality: EffectQuality::Full, + force_no_aa: false, } } diff --git a/crates/grida-canvas/src/runtime/scene.rs b/crates/grida-canvas/src/runtime/scene.rs index 160d1e9e39..e8307a4589 100644 --- a/crates/grida-canvas/src/runtime/scene.rs +++ b/crates/grida-canvas/src/runtime/scene.rs @@ -1125,7 +1125,7 @@ impl Renderer { // GPU flush. let mid_flush_start = Instant::now(); - Self::gpu_flush(surface); + self.gpu_flush(surface); let mid_flush_duration = mid_flush_start.elapsed(); let frame_duration = start.elapsed(); @@ -1291,7 +1291,7 @@ impl Renderer { canvas.draw_image(&cache.image, (dx, dy), None); let mid_flush_start = Instant::now(); - Self::gpu_flush(surface); + self.gpu_flush(surface); let mid_flush_duration = mid_flush_start.elapsed(); // Do NOT recapture — keep the original capture intact. @@ -1400,7 +1400,7 @@ impl Renderer { // Mid-frame GPU flush: isolate draw vs compositor GPU work. let mid_flush_start = Instant::now(); - Self::gpu_flush(surface); + self.gpu_flush(surface); let mid_flush_duration = mid_flush_start.elapsed(); // Capture composited frame for image caches. @@ -1453,7 +1453,7 @@ impl Renderer { // Final GPU flush. let flush_start = Instant::now(); - Self::gpu_flush(surface); + self.gpu_flush(surface); let flush_duration = flush_start.elapsed(); FrameFlushStats { @@ -1515,7 +1515,7 @@ impl Renderer { canvas.restore(); let mid_flush_start = Instant::now(); - Self::gpu_flush(surface); + self.gpu_flush(surface); let mid_flush_duration = mid_flush_start.elapsed(); let frame_duration = start.elapsed(); @@ -1523,14 +1523,24 @@ impl Renderer { } #[inline] - fn gpu_flush(surface: &mut Surface) { + /// Submit pending GPU work. When `config.sync_gpu` is enabled, + /// blocks until the GPU finishes for accurate per-stage timing. + fn gpu_flush(&self, surface: &mut Surface) { if let Some(mut gr_context) = surface.recording_context() { if let Some(mut direct_context) = gr_context.as_direct_context() { - direct_context.flush_and_submit(); + if self.config.sync_gpu { + direct_context.flush_submit_and_sync_cpu(); + } else { + direct_context.flush_and_submit(); + } } } } + pub fn set_sync_gpu(&mut self, sync: bool) { + self.config.sync_gpu = sync; + } + /// Submit any pending overlay draws to the GPU. /// /// Call this after drawing overlays on [`Self::canvas()`] to make the @@ -1539,7 +1549,7 @@ impl Renderer { /// selection outlines, frame title badges, and the size meter. pub fn flush_overlay(&mut self) { let surface = unsafe { &mut *self.backend.get_surface() }; - Self::gpu_flush(surface); + self.gpu_flush(surface); } /// Invoke the request redraw callback. @@ -1807,7 +1817,7 @@ impl Renderer { } } canvas.draw_image(&cache.image, (dx, dy), None); - Self::gpu_flush(surface); + self.gpu_flush(surface); true } diff --git a/crates/grida-canvas/src/text/attributed_paragraph.rs b/crates/grida-canvas/src/text/attributed_paragraph.rs index b7edd84867..b849464a90 100644 --- a/crates/grida-canvas/src/text/attributed_paragraph.rs +++ b/crates/grida-canvas/src/text/attributed_paragraph.rs @@ -41,9 +41,9 @@ fn resolve_fill_paint( images: Option<&ImageRepository>, ) -> Option { if let Some(images) = images { - paint_util::sk_paint_stack(fills, size, images) + paint_util::sk_paint_stack(fills, size, images, true) } else { - paint_util::sk_paint_stack_without_images(fills, size) + paint_util::sk_paint_stack_without_images(fills, size, true) } } diff --git a/crates/grida-canvas/src/vectornetwork/vn_painter.rs b/crates/grida-canvas/src/vectornetwork/vn_painter.rs index a2bd9baabe..d671d53c5c 100644 --- a/crates/grida-canvas/src/vectornetwork/vn_painter.rs +++ b/crates/grida-canvas/src/vectornetwork/vn_painter.rs @@ -259,12 +259,12 @@ impl<'a> VNPainter<'a> { let size = (bounds.width(), bounds.height()); if let Some(images) = self.images { - if let Some(mut paint) = paint::sk_paint_stack(paints, size, images) { + if let Some(mut paint) = paint::sk_paint_stack(paints, size, images, true) { paint.set_style(PaintStyle::Fill); self.canvas.draw_path(path, &paint); } } else { - if let Some(mut paint) = paint::sk_paint_stack_without_images(paints, size) { + if let Some(mut paint) = paint::sk_paint_stack_without_images(paints, size, true) { paint.set_style(PaintStyle::Fill); self.canvas.draw_path(path, &paint); } diff --git a/crates/grida-dev/src/bench/args.rs b/crates/grida-dev/src/bench/args.rs index e5a62f8c77..42abc9d23d 100644 --- a/crates/grida-dev/src/bench/args.rs +++ b/crates/grida-dev/src/bench/args.rs @@ -29,6 +29,10 @@ pub struct BenchArgs { /// Measures the combined cost of content rendering + overlay drawing. #[arg(long = "overlay", default_value_t = false)] pub overlay: bool, + /// Disable anti-aliasing on all paint operations. + /// For A/B measurement of AA cost at different zoom levels. + #[arg(long = "no-aa", default_value_t = false)] + pub no_aa: bool, } #[derive(Args, Debug)] diff --git a/crates/grida-dev/src/bench/runner.rs b/crates/grida-dev/src/bench/runner.rs index 7a4fb89053..8518232f73 100644 --- a/crates/grida-dev/src/bench/runner.rs +++ b/crates/grida-dev/src/bench/runner.rs @@ -2248,6 +2248,12 @@ pub async fn run_bench(args: BenchArgs, load_scenes: impl AsyncSceneLoader) -> R gpu.print_gl_info(); let mut renderer = gpu.create_renderer(); + if args.no_aa { + let mut policy = cg::runtime::render_policy::RenderPolicy::STANDARD; + policy.force_no_aa = true; + renderer.set_render_policy(policy); + } + renderer.set_sync_gpu(true); renderer.load_scene(scene); renderer.fit_camera_to_scene(); @@ -2259,9 +2265,13 @@ pub async fn run_bench(args: BenchArgs, load_scenes: impl AsyncSceneLoader) -> R fit_zoom, cam_rect.width, cam_rect.height, ); println!( - "Viewport: {}x{}, frames: {}\n", - args.width, args.height, args.frames + "Viewport: {}x{}, frames: {}{}", + args.width, + args.height, + args.frames, + if args.no_aa { " [NO-AA]" } else { "" }, ); + println!(); warmup(&mut renderer); @@ -2419,6 +2429,7 @@ pub async fn run_bench_report( }; let mut renderer = gpu.create_renderer(); + renderer.set_sync_gpu(true); renderer.load_scene(scene); renderer.fit_camera_to_scene(); let fit_zoom = renderer.camera.get_zoom(); diff --git a/docs/wg/feat-2d/aa-cost-findings.md b/docs/wg/feat-2d/aa-cost-findings.md new file mode 100644 index 0000000000..5d77faebe3 --- /dev/null +++ b/docs/wg/feat-2d/aa-cost-findings.md @@ -0,0 +1,116 @@ +# Anti-Aliasing Cost at Sub-Pixel Scale + +**Date:** 2026-04-07 +**Status:** Investigation findings + +## Summary + +At fit-zoom on large documents (0.02x on 135K nodes), anti-aliased +sub-pixel geometry is the dominant GPU cost. Disabling AA on sub-pixel +nodes is a viable path to 2x+ frame time reduction during settle frames. + +## Discovery + +### Benchmark setup + +Isolated GPU benchmark (`skia_bench_subpixel`): pre-recorded SkPictures +of simple rects drawn at 1.0x vs 0.02x zoom, with AA on vs off. +GPU synced via `flush_submit_and_sync_cpu()` for accurate timing. + +Hardware: Apple M2 Pro, Metal 4.1, 1000x1000 viewport. + +### Results + +| Nodes | full (AA on) | 0.02x (AA on) | 0.02x (AA off) | skip (0 draws) | +|-------|-------------|---------------|----------------|----------------| +| 1,000 | 621 µs | 809 µs | 442 µs | 286 µs | +| 5,000 | 1,923 µs | 3,426 µs | 1,535 µs | 271 µs | +| 10,000 | 2,986 µs | 6,221 µs | 2,180 µs | 458 µs | +| 40,000 | 9,628 µs | 21,878 µs | 6,804 µs | 324 µs | + +### Key findings + +1. **Sub-pixel with AA is 2.3x MORE expensive than full-size with AA.** + At 0.02x zoom, 40K rects: 21,878 µs vs 9,628 µs. Counter-intuitive — + smaller geometry costs more. + +2. **AA is the dominant cost at sub-pixel scale.** + AA on vs AA off at 0.02x: 21,878 vs 6,804 µs = **3.2x overhead**. + Skia's AA rasterizer computes edge coverage for each sub-pixel edge, + and this work is proportionally more expensive when the geometry is + smaller than a pixel. + +3. **Without AA, sub-pixel draws are near-free.** + Per-node cost at 0.02x: AA off = 0.16 µs/node vs AA on = 0.54 µs/node. + The AA-off cost approaches the skip baseline (0 draws). + +4. **Text is not the bottleneck.** + A/B test skipping all text nodes (22% of layers) on the 135K fixture + showed 0% frame time difference. At 0.02x zoom, text and shapes have + identical per-node cost — both dominated by AA overhead. + +## Implications for optimization + +### Adaptive AA by screen size + +When a node's screen-space area falls below a threshold (e.g. 4 px²), +disable AA for that node. The visual difference is invisible (the node +is sub-pixel) but the GPU cost drops 3x. + +This is similar to Chromium's approach: content below a certain screen +size gets rasterized with reduced quality during pinch-zoom. + +### Where AA is set + +All `set_anti_alias(true)` calls go through a few central functions in +`crates/grida-canvas/src/painter/`: + +- `paint.rs` — `sk_solid_paint()`, `sk_paint_stack()`, `sk_paint_stack_without_images()` +- `gradient.rs` — gradient paint creation +- `painter.rs` — shadow, inner shadow, outline paints +- `shadow.rs` — drop/inner shadow paints +- `effects_noise.rs` — noise effect paints + +A `force_no_aa` field on `RenderPolicy` controls AA globally. The +bench CLI exposes this as `--no-aa`. For production, the approach +should be per-node based on screen-space size, computed during the +frame plan. + +## Benchmark measurement fix + +During this investigation, we discovered that `gpu_flush()` was using +`flush_and_submit()` (async, non-blocking) instead of +`flush_submit_and_sync_cpu()` (blocking). This meant: + +- `mid_flush_us` measured command buffer submission time, not GPU execution +- Per-stage breakdowns in `FrameFlushStats` were unreliable +- A/B comparisons that changed GPU workload showed false-negative results + +Fixed: added `sync_gpu` config flag on `RuntimeRendererConfig`. +Benchmarks enable this, making per-stage timing accurate. Note: synced +benchmarks serialize CPU/GPU and understate pipelined throughput — they +measure isolated GPU cost, not real-world frame rate. + +## Real-scene results (135K nodes, 01-135k.perf.grida) + +| Scenario | AA on | AA off | Delta | +|----------|-------|--------|-------| +| baseline_nocache_zoom_slow_fit (0.02x) | 62,038 µs | 60,103 µs | **-3%** | +| mid_flush at fit | 50,599 µs | 48,773 µs | -4% | +| baseline_nocache_zoom_slow_high (zoomed in) | 21,190 µs | 19,491 µs | **-8%** | +| mid_flush at high zoom | 16,542 µs | 15,009 µs | -9% | + +The improvement is smaller than the isolated bench predicted (3-9% vs +3.2x) because the real scene has complex Path nodes where picture cache +replay and path tessellation overhead dominate over AA cost. + +**Interpretation:** AA is a contributor but not the primary bottleneck +on real scenes with complex geometry. The dominant cost is the per-node +`draw_picture` dispatch + replay + GPU pipeline overhead for 41K nodes, +regardless of AA state. + +## Related + +- `crates/grida-canvas/examples/skia_bench/skia_bench_subpixel.rs` — isolated benchmark +- `docs/wg/feat-2d/optimization.md` — master optimization catalog +- Chromium pinch-zoom: reduced rasterization quality during interaction From 940031ed510dff92d1bf71594d655c3bff614a0b Mon Sep 17 00:00:00 2001 From: Universe Date: Tue, 7 Apr 2026 04:40:43 +0900 Subject: [PATCH 3/3] fix(render-policy): preserve force_no_aa across flags bridge Add FLAG_FORCE_NO_AA to the RenderPolicyFlags bitmap so that from_flags/to_flags round-trips preserve the AA override across the WASM/host boundary. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/grida-canvas/src/runtime/render_policy.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/grida-canvas/src/runtime/render_policy.rs b/crates/grida-canvas/src/runtime/render_policy.rs index 1616c637b1..7dd88b6c32 100644 --- a/crates/grida-canvas/src/runtime/render_policy.rs +++ b/crates/grida-canvas/src/runtime/render_policy.rs @@ -302,6 +302,7 @@ pub const FLAG_RENDER_OUTLINES_ALWAYS: RenderPolicyFlags = 1 << 2; pub const FLAG_EFFECTS_ENABLED: RenderPolicyFlags = 1 << 3; pub const FLAG_COMPOSITING_ENABLED: RenderPolicyFlags = 1 << 4; pub const FLAG_IGNORE_CLIPS_CONTENT: RenderPolicyFlags = 1 << 5; +pub const FLAG_FORCE_NO_AA: RenderPolicyFlags = 1 << 6; impl RenderPolicy { /// Build a policy from flags. @@ -323,6 +324,7 @@ impl RenderPolicy { }; let ignore_clips_content = (flags & FLAG_IGNORE_CLIPS_CONTENT) != 0; + let force_no_aa = (flags & FLAG_FORCE_NO_AA) != 0; if (flags & FLAG_RENDER_OUTLINES_ALWAYS) != 0 { // Outline style is currently encoded in the preset; can be expanded later. @@ -330,6 +332,7 @@ impl RenderPolicy { p.effects = effects; p.compositing = compositing; p.ignore_clips_content = ignore_clips_content; + p.force_no_aa = force_no_aa; return p; } @@ -342,7 +345,7 @@ impl RenderPolicy { compositing, ignore_clips_content, effect_quality: EffectQuality::Full, - force_no_aa: false, + force_no_aa, } } @@ -358,6 +361,9 @@ impl RenderPolicy { if self.ignore_clips_content { flags |= FLAG_IGNORE_CLIPS_CONTENT; } + if self.force_no_aa { + flags |= FLAG_FORCE_NO_AA; + } match self.content { ContentPolicy::Standard {