diff --git a/pgpu-render/src/lib.rs b/pgpu-render/src/lib.rs index 7d4c60bcd..50462e73f 100644 --- a/pgpu-render/src/lib.rs +++ b/pgpu-render/src/lib.rs @@ -215,7 +215,10 @@ pub struct PgpuRect { /// Computes the bounding box for the glyph after applying the specified /// transform. #[no_mangle] -pub unsafe extern "C" fn pgpu_glyph_bbox(glyph: *const PgpuGlyph, transform: &[f32; 6]) -> PgpuRect { +pub unsafe extern "C" fn pgpu_glyph_bbox( + glyph: *const PgpuGlyph, + transform: &[f32; 6], +) -> PgpuRect { let transform = piet_scene::geometry::Affine::new(transform); let rect = (*glyph).bbox(Some(transform)); PgpuRect { diff --git a/pgpu-render/src/render.rs b/pgpu-render/src/render.rs index 361ef42b5..5b5d328b5 100644 --- a/pgpu-render/src/render.rs +++ b/pgpu-render/src/render.rs @@ -16,8 +16,8 @@ use piet_gpu::{EncodedSceneRef, PixelFormat, RenderConfig}; use piet_gpu_hal::{QueryPool, Session}; -use piet_scene::glyph::pinot::{types::Tag, FontDataRef}; use piet_scene::geometry::{Affine, Rect}; +use piet_scene::glyph::pinot::{types::Tag, FontDataRef}; use piet_scene::glyph::{GlyphContext, GlyphProvider}; use piet_scene::resource::ResourceContext; use piet_scene::scene::{Fragment, Scene}; @@ -214,7 +214,12 @@ pub struct PgpuGlyph { impl PgpuGlyph { pub fn bbox(&self, transform: Option) -> Rect { if let Some(transform) = &transform { - Rect::from_points(self.fragment.points().iter().map(|p| p.transform(transform))) + Rect::from_points( + self.fragment + .points() + .iter() + .map(|p| p.transform(transform)), + ) } else { Rect::from_points(self.fragment.points()) } diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index abe6ae1d1..7d577d3c6 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -249,7 +249,8 @@ fn main() -> Result<(), Error> { println!("parsing time: {:?}", start.elapsed()); test_scenes::render_svg(&mut ctx, &svg); } else { - test_scenes::render_scene(&mut ctx); + //test_scenes::render_scene(&mut ctx); + test_scenes::render_blend_grid(&mut ctx); } let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?; diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index 1642026d2..78867f59c 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -125,7 +125,7 @@ fn main() -> Result<(), Error> { } let mut ctx = PietGpuRenderContext::new(); - let test_blend = false; + let test_blend = true; if let Some(svg) = &svg { test_scenes::render_svg(&mut ctx, svg); } else if test_blend { diff --git a/piet-gpu/shader/blend.h b/piet-gpu/shader/blend.h index 1ac4bd6c3..736600646 100644 --- a/piet-gpu/shader/blend.h +++ b/piet-gpu/shader/blend.h @@ -18,6 +18,7 @@ #define Blend_Saturation 13 #define Blend_Color 14 #define Blend_Luminosity 15 +#define Blend_Clip 128 vec3 screen(vec3 cb, vec3 cs) { return cb + cs - (cb * cs); @@ -45,7 +46,7 @@ vec3 hard_light(vec3 cb, vec3 cs) { return mix( screen(cb, 2.0 * cs - 1.0), cb * 2.0 * cs, - vec3(lessThanEqual(cs, vec3(0.5))) + lessThanEqual(cs, vec3(0.5)) ); } @@ -53,12 +54,12 @@ vec3 soft_light(vec3 cb, vec3 cs) { vec3 d = mix( sqrt(cb), ((16.0 * cb - vec3(12.0)) * cb + vec3(4.0)) * cb, - vec3(lessThanEqual(cb, vec3(0.25))) + lessThanEqual(cb, vec3(0.25)) ); return mix( cb + (2.0 * cs - vec3(1.0)) * (d - cb), cb - (vec3(1.0) - 2.0 * cs) * cb * (vec3(1.0) - cb), - vec3(lessThanEqual(cs, vec3(0.5))) + lessThanEqual(cs, vec3(0.5)) ); } @@ -122,6 +123,8 @@ vec3 set_sat(vec3 c, float s) { return c; } +// Blends two RGB colors together. The colors are assumed to be in sRGB +// color space, and this function does not take alpha into account. vec3 mix_blend(vec3 cb, vec3 cs, uint mode) { vec3 b = vec3(0.0); switch (mode) { @@ -190,9 +193,10 @@ vec3 mix_blend(vec3 cb, vec3 cs, uint mode) { #define Comp_DestAtop 10 #define Comp_Xor 11 #define Comp_Plus 12 -#define Comp_PlusDarker 13 -#define Comp_PlusLighter 14 +#define Comp_PlusLighter 13 +// Apply general compositing operation. +// Inputs are separated colors and alpha, output is premultiplied. vec4 mix_compose(vec3 cb, vec3 cs, float ab, float as, uint mode) { float fa = 0.0; float fb = 0.0; @@ -245,16 +249,43 @@ vec4 mix_compose(vec3 cb, vec3 cs, float ab, float as, uint mode) { fa = 1.0; fb = 1.0; break; - case Comp_PlusDarker: - return vec4(max(vec4(0.0), 1.0 - as * vec4(cs, as) + 1.0 - ab * vec4(cb, ab)).xyz, - max(0.0, 1.0 - as + 1.0 - ab)); case Comp_PlusLighter: - return vec4(min(vec4(1.0), as * vec4(cs, as) + ab * vec4(cb, ab)).xyz, - min(1.0, as + ab)); + return min(vec4(1.0), vec4(as * cs + ab * cb, as + ab)); default: break; } - return as * fa * vec4(cs, as) + ab * fb * vec4(cb, ab); + float as_fa = as * fa; + float ab_fb = ab * fb; + vec3 co = as_fa * cs + ab_fb * cb; + return vec4(co, as_fa + ab_fb); } #define BlendComp_default (Blend_Normal << 8 | Comp_SrcOver) +#define BlendComp_clip (Blend_Clip << 8 | Comp_SrcOver) + +// This is added to alpha to prevent divide-by-zero +#define EPSILON 1e-15 + +// Apply blending and composition. Both input and output colors are +// premultiplied RGB. +vec4 mix_blend_compose(vec4 backdrop, vec4 src, uint mode) { + if ((mode & 0x7fff) == BlendComp_default) { + // Both normal+src_over blend and clip case + return backdrop * (1.0 - src.a) + src; + } + // Un-premultiply colors for blending + float inv_src_a = 1.0 / (src.a + EPSILON); + vec3 cs = src.rgb * inv_src_a; + float inv_backdrop_a = 1.0 / (backdrop.a + EPSILON); + vec3 cb = backdrop.rgb * inv_backdrop_a; + uint blend_mode = mode >> 8; + vec3 blended = mix_blend(cb, cs, blend_mode); + cs = mix(cs, blended, backdrop.a); + uint comp_mode = mode & 0xff; + if (comp_mode == Comp_SrcOver) { + vec3 co = mix(backdrop.rgb, cs, src.a); + return vec4(co, src.a + backdrop.a * (1 - src.a)); + } else { + return mix_compose(cb, cs, backdrop.a, src.a, comp_mode); + } +} diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 60e558268..09b068393 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -58,7 +58,7 @@ build gen/kernel4.hlsl: hlsl gen/kernel4.spv build gen/kernel4.dxil: dxil gen/kernel4.hlsl build gen/kernel4.msl: msl gen/kernel4.spv -build gen/kernel4_gray.spv: glsl kernel4.comp | ptcl.h setup.h mem.h +build gen/kernel4_gray.spv: glsl kernel4.comp | blend.h ptcl.h setup.h mem.h flags = -DGRAY build gen/kernel4_gray.hlsl: hlsl gen/kernel4_gray.spv build gen/kernel4_gray.dxil: dxil gen/kernel4_gray.hlsl diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 3abb2e056..1b3f25293 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -303,7 +303,7 @@ void main() { uint scene_offset = memory[drawmonoid_base + 2]; uint dd = drawdata_start + (scene_offset >> 2); uint blend = scene[dd]; - is_blend = (blend != BlendComp_default); + is_blend = (blend != BlendComp_clip); } include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip || is_blend; diff --git a/piet-gpu/shader/gen/backdrop.dxil b/piet-gpu/shader/gen/backdrop.dxil index 0fb9622ed..df2be88d0 100644 Binary files a/piet-gpu/shader/gen/backdrop.dxil and b/piet-gpu/shader/gen/backdrop.dxil differ diff --git a/piet-gpu/shader/gen/backdrop_lg.dxil b/piet-gpu/shader/gen/backdrop_lg.dxil index e24a6d3e8..81f9b6502 100644 Binary files a/piet-gpu/shader/gen/backdrop_lg.dxil and b/piet-gpu/shader/gen/backdrop_lg.dxil differ diff --git a/piet-gpu/shader/gen/bbox_clear.dxil b/piet-gpu/shader/gen/bbox_clear.dxil index 6655b7f9c..6b3efaf96 100644 Binary files a/piet-gpu/shader/gen/bbox_clear.dxil and b/piet-gpu/shader/gen/bbox_clear.dxil differ diff --git a/piet-gpu/shader/gen/clip_leaf.dxil b/piet-gpu/shader/gen/clip_leaf.dxil index 29a158ea0..b681a65dc 100644 Binary files a/piet-gpu/shader/gen/clip_leaf.dxil and b/piet-gpu/shader/gen/clip_leaf.dxil differ diff --git a/piet-gpu/shader/gen/clip_reduce.dxil b/piet-gpu/shader/gen/clip_reduce.dxil index 0dff71b61..0ccaac993 100644 Binary files a/piet-gpu/shader/gen/clip_reduce.dxil and b/piet-gpu/shader/gen/clip_reduce.dxil differ diff --git a/piet-gpu/shader/gen/coarse.dxil b/piet-gpu/shader/gen/coarse.dxil index fdab4447a..c91fcdf9d 100644 Binary files a/piet-gpu/shader/gen/coarse.dxil and b/piet-gpu/shader/gen/coarse.dxil differ diff --git a/piet-gpu/shader/gen/coarse.hlsl b/piet-gpu/shader/gen/coarse.hlsl index 04529bbc7..0331e33b0 100644 --- a/piet-gpu/shader/gen/coarse.hlsl +++ b/piet-gpu/shader/gen/coarse.hlsl @@ -919,26 +919,26 @@ void comp_main() uint scene_offset = _260.Load((drawmonoid_base_1 + 2u) * 4 + 8); uint dd = drawdata_start + (scene_offset >> uint(2)); uint blend = _1372.Load(dd * 4 + 0); - is_blend = blend != 3u; + is_blend = blend != 32771u; } - bool _1692 = tile.tile.offset != 0u; - bool _1701; - if (!_1692) + bool _1693 = tile.tile.offset != 0u; + bool _1702; + if (!_1693) { - _1701 = (tile.backdrop == 0) == is_clip; + _1702 = (tile.backdrop == 0) == is_clip; } else { - _1701 = _1692; + _1702 = _1693; } - include_tile = _1701 || is_blend; + include_tile = _1702 || is_blend; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1723; - InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1723); + uint _1724; + InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1724); } } GroupMemoryBarrierWithGroupSync(); @@ -967,9 +967,9 @@ void comp_main() { uint param_25 = element_ref_ix; bool param_26 = mem_ok; - TileRef _1800 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _1801 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_27 = read_tile_alloc(param_25, param_26); - TileRef param_28 = _1800; + TileRef param_28 = _1801; Tile tile_1 = Tile_read(param_27, param_28); uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); uint scene_offset_1 = _260.Load((drawmonoid_base_2 + 2u) * 4 + 8); @@ -984,11 +984,11 @@ void comp_main() Alloc param_29 = cmd_alloc; CmdRef param_30 = cmd_ref; uint param_31 = cmd_limit; - bool _1848 = alloc_cmd(param_29, param_30, param_31); + bool _1849 = alloc_cmd(param_29, param_30, param_31); cmd_alloc = param_29; cmd_ref = param_30; cmd_limit = param_31; - if (!_1848) + if (!_1849) { break; } @@ -999,10 +999,10 @@ void comp_main() write_fill(param_32, param_33, param_34, param_35); cmd_ref = param_33; uint rgba = _1372.Load(dd_1 * 4 + 0); - CmdColor _1871 = { rgba }; + CmdColor _1872 = { rgba }; Alloc param_36 = cmd_alloc; CmdRef param_37 = cmd_ref; - CmdColor param_38 = _1871; + CmdColor param_38 = _1872; Cmd_Color_write(param_36, param_37, param_38); cmd_ref.offset += 8u; break; @@ -1012,11 +1012,11 @@ void comp_main() Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; uint param_41 = cmd_limit; - bool _1889 = alloc_cmd(param_39, param_40, param_41); + bool _1890 = alloc_cmd(param_39, param_40, param_41); cmd_alloc = param_39; cmd_ref = param_40; cmd_limit = param_41; - if (!_1889) + if (!_1890) { break; } @@ -1043,11 +1043,11 @@ void comp_main() Alloc param_49 = cmd_alloc; CmdRef param_50 = cmd_ref; uint param_51 = cmd_limit; - bool _1953 = alloc_cmd(param_49, param_50, param_51); + bool _1954 = alloc_cmd(param_49, param_50, param_51); cmd_alloc = param_49; cmd_ref = param_50; cmd_limit = param_51; - if (!_1953) + if (!_1954) { break; } @@ -1077,11 +1077,11 @@ void comp_main() Alloc param_59 = cmd_alloc; CmdRef param_60 = cmd_ref; uint param_61 = cmd_limit; - bool _2059 = alloc_cmd(param_59, param_60, param_61); + bool _2060 = alloc_cmd(param_59, param_60, param_61); cmd_alloc = param_59; cmd_ref = param_60; cmd_limit = param_61; - if (!_2059) + if (!_2060) { break; } @@ -1094,27 +1094,27 @@ void comp_main() uint index = _1372.Load(dd_1 * 4 + 0); uint raw1 = _1372.Load((dd_1 + 1u) * 4 + 0); int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - CmdImage _2098 = { index, offset_1 }; + CmdImage _2099 = { index, offset_1 }; Alloc param_66 = cmd_alloc; CmdRef param_67 = cmd_ref; - CmdImage param_68 = _2098; + CmdImage param_68 = _2099; Cmd_Image_write(param_66, param_67, param_68); cmd_ref.offset += 12u; break; } case 5u: { - bool _2112 = tile_1.tile.offset == 0u; - bool _2118; - if (_2112) + bool _2113 = tile_1.tile.offset == 0u; + bool _2119; + if (_2113) { - _2118 = tile_1.backdrop == 0; + _2119 = tile_1.backdrop == 0; } else { - _2118 = _2112; + _2119 = _2113; } - if (_2118) + if (_2119) { clip_zero_depth = clip_depth + 1u; } @@ -1123,11 +1123,11 @@ void comp_main() Alloc param_69 = cmd_alloc; CmdRef param_70 = cmd_ref; uint param_71 = cmd_limit; - bool _2130 = alloc_cmd(param_69, param_70, param_71); + bool _2131 = alloc_cmd(param_69, param_70, param_71); cmd_alloc = param_69; cmd_ref = param_70; cmd_limit = param_71; - if (!_2130) + if (!_2131) { break; } @@ -1145,11 +1145,11 @@ void comp_main() Alloc param_74 = cmd_alloc; CmdRef param_75 = cmd_ref; uint param_76 = cmd_limit; - bool _2158 = alloc_cmd(param_74, param_75, param_76); + bool _2159 = alloc_cmd(param_74, param_75, param_76); cmd_alloc = param_74; cmd_ref = param_75; cmd_limit = param_76; - if (!_2158) + if (!_2159) { break; } @@ -1160,10 +1160,10 @@ void comp_main() write_fill(param_77, param_78, param_79, param_80); cmd_ref = param_78; uint blend_1 = _1372.Load(dd_1 * 4 + 0); - CmdEndClip _2181 = { blend_1 }; + CmdEndClip _2182 = { blend_1 }; Alloc param_81 = cmd_alloc; CmdRef param_82 = cmd_ref; - CmdEndClip param_83 = _2181; + CmdEndClip param_83 = _2182; Cmd_EndClip_write(param_81, param_82, param_83); cmd_ref.offset += 8u; break; @@ -1198,17 +1198,17 @@ void comp_main() break; } } - bool _2228 = (bin_tile_x + tile_x) < _1005.Load(8); - bool _2237; - if (_2228) + bool _2229 = (bin_tile_x + tile_x) < _1005.Load(8); + bool _2238; + if (_2229) { - _2237 = (bin_tile_y + tile_y) < _1005.Load(12); + _2238 = (bin_tile_y + tile_y) < _1005.Load(12); } else { - _2237 = _2228; + _2238 = _2229; } - if (_2237) + if (_2238) { Alloc param_84 = cmd_alloc; CmdRef param_85 = cmd_ref; diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl index 55812d4b3..854d2435f 100644 --- a/piet-gpu/shader/gen/coarse.msl +++ b/piet-gpu/shader/gen/coarse.msl @@ -942,25 +942,25 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint scene_offset = v_260.memory[drawmonoid_base_1 + 2u]; uint dd = drawdata_start + (scene_offset >> uint(2)); uint blend = _1372.scene[dd]; - is_blend = blend != 3u; + is_blend = blend != 32771u; } - bool _1692 = tile.tile.offset != 0u; - bool _1701; - if (!_1692) + bool _1693 = tile.tile.offset != 0u; + bool _1702; + if (!_1693) { - _1701 = (tile.backdrop == 0) == is_clip; + _1702 = (tile.backdrop == 0) == is_clip; } else { - _1701 = _1692; + _1702 = _1693; } - include_tile = _1701 || is_blend; + include_tile = _1702 || is_blend; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1723 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); + uint _1724 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); } } threadgroup_barrier(mem_flags::mem_threadgroup); @@ -1005,11 +1005,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_29 = cmd_alloc; CmdRef param_30 = cmd_ref; uint param_31 = cmd_limit; - bool _1848 = alloc_cmd(param_29, param_30, param_31, v_260, v_260BufferSize); + bool _1849 = alloc_cmd(param_29, param_30, param_31, v_260, v_260BufferSize); cmd_alloc = param_29; cmd_ref = param_30; cmd_limit = param_31; - if (!_1848) + if (!_1849) { break; } @@ -1032,11 +1032,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; uint param_41 = cmd_limit; - bool _1889 = alloc_cmd(param_39, param_40, param_41, v_260, v_260BufferSize); + bool _1890 = alloc_cmd(param_39, param_40, param_41, v_260, v_260BufferSize); cmd_alloc = param_39; cmd_ref = param_40; cmd_limit = param_41; - if (!_1889) + if (!_1890) { break; } @@ -1063,11 +1063,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_49 = cmd_alloc; CmdRef param_50 = cmd_ref; uint param_51 = cmd_limit; - bool _1953 = alloc_cmd(param_49, param_50, param_51, v_260, v_260BufferSize); + bool _1954 = alloc_cmd(param_49, param_50, param_51, v_260, v_260BufferSize); cmd_alloc = param_49; cmd_ref = param_50; cmd_limit = param_51; - if (!_1953) + if (!_1954) { break; } @@ -1097,11 +1097,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_59 = cmd_alloc; CmdRef param_60 = cmd_ref; uint param_61 = cmd_limit; - bool _2059 = alloc_cmd(param_59, param_60, param_61, v_260, v_260BufferSize); + bool _2060 = alloc_cmd(param_59, param_60, param_61, v_260, v_260BufferSize); cmd_alloc = param_59; cmd_ref = param_60; cmd_limit = param_61; - if (!_2059) + if (!_2060) { break; } @@ -1123,17 +1123,17 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } case 5u: { - bool _2112 = tile_1.tile.offset == 0u; - bool _2118; - if (_2112) + bool _2113 = tile_1.tile.offset == 0u; + bool _2119; + if (_2113) { - _2118 = tile_1.backdrop == 0; + _2119 = tile_1.backdrop == 0; } else { - _2118 = _2112; + _2119 = _2113; } - if (_2118) + if (_2119) { clip_zero_depth = clip_depth + 1u; } @@ -1142,11 +1142,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_69 = cmd_alloc; CmdRef param_70 = cmd_ref; uint param_71 = cmd_limit; - bool _2130 = alloc_cmd(param_69, param_70, param_71, v_260, v_260BufferSize); + bool _2131 = alloc_cmd(param_69, param_70, param_71, v_260, v_260BufferSize); cmd_alloc = param_69; cmd_ref = param_70; cmd_limit = param_71; - if (!_2130) + if (!_2131) { break; } @@ -1164,11 +1164,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_74 = cmd_alloc; CmdRef param_75 = cmd_ref; uint param_76 = cmd_limit; - bool _2158 = alloc_cmd(param_74, param_75, param_76, v_260, v_260BufferSize); + bool _2159 = alloc_cmd(param_74, param_75, param_76, v_260, v_260BufferSize); cmd_alloc = param_74; cmd_ref = param_75; cmd_limit = param_76; - if (!_2158) + if (!_2159) { break; } @@ -1216,17 +1216,17 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M break; } } - bool _2228 = (bin_tile_x + tile_x) < _1005.conf.width_in_tiles; - bool _2237; - if (_2228) + bool _2229 = (bin_tile_x + tile_x) < _1005.conf.width_in_tiles; + bool _2238; + if (_2229) { - _2237 = (bin_tile_y + tile_y) < _1005.conf.height_in_tiles; + _2238 = (bin_tile_y + tile_y) < _1005.conf.height_in_tiles; } else { - _2237 = _2228; + _2238 = _2229; } - if (_2237) + if (_2238) { Alloc param_84 = cmd_alloc; CmdRef param_85 = cmd_ref; diff --git a/piet-gpu/shader/gen/coarse.spv b/piet-gpu/shader/gen/coarse.spv index 6d33ee70c..56a87e531 100644 Binary files a/piet-gpu/shader/gen/coarse.spv and b/piet-gpu/shader/gen/coarse.spv differ diff --git a/piet-gpu/shader/gen/kernel4.dxil b/piet-gpu/shader/gen/kernel4.dxil index e6eccc19b..7399fe43e 100644 Binary files a/piet-gpu/shader/gen/kernel4.dxil and b/piet-gpu/shader/gen/kernel4.dxil differ diff --git a/piet-gpu/shader/gen/kernel4.hlsl b/piet-gpu/shader/gen/kernel4.hlsl index 92fe05b5e..4839db2bb 100644 --- a/piet-gpu/shader/gen/kernel4.hlsl +++ b/piet-gpu/shader/gen/kernel4.hlsl @@ -161,8 +161,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _291 : register(u0, space0); -ByteAddressBuffer _1666 : register(t1, space0); +RWByteAddressBuffer _297 : register(u0, space0); +ByteAddressBuffer _1681 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -189,8 +189,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _304 = { a.offset + offset }; - return _304; + Alloc _310 = { a.offset + offset }; + return _310; } bool touch_mem(Alloc alloc, uint offset) @@ -206,7 +206,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _291.Load(offset * 4 + 8); + uint v = _297.Load(offset * 4 + 8); return v; } @@ -215,8 +215,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _663 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _663; + CmdTag _669 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _669; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -236,9 +236,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _679 = { ref.offset + 4u }; + CmdStrokeRef _685 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _679; + CmdStrokeRef param_1 = _685; return CmdStroke_read(param, param_1); } @@ -274,8 +274,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _820 = { raw5 }; - s.next = _820; + TileSegRef _826 = { raw5 }; + s.next = _826; return s; } @@ -301,9 +301,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _669 = { ref.offset + 4u }; + CmdFillRef _675 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _669; + CmdFillRef param_1 = _675; return CmdFill_read(param, param_1); } @@ -320,9 +320,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _689 = { ref.offset + 4u }; + CmdAlphaRef _695 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _689; + CmdAlphaRef param_1 = _695; return CmdAlpha_read(param, param_1); } @@ -339,18 +339,15 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _699 = { ref.offset + 4u }; + CmdColorRef _705 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _699; + CmdColorRef param_1 = _705; return CmdColor_read(param, param_1); } float3 fromsRGB(float3 srgb) { - bool3 cutoff = bool3(srgb.x >= 0.040449999272823333740234375f.xxx.x, srgb.y >= 0.040449999272823333740234375f.xxx.y, srgb.z >= 0.040449999272823333740234375f.xxx.z); - float3 below = srgb / 12.9200000762939453125f.xxx; - float3 above = pow((srgb + 0.054999999701976776123046875f.xxx) / 1.05499994754791259765625f.xxx, 2.400000095367431640625f.xxx); - return float3(cutoff.x ? above.x : below.x, cutoff.y ? above.y : below.y, cutoff.z ? above.z : below.z); + return srgb; } float4 unpacksRGB(uint srgba) @@ -385,9 +382,9 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _709 = { ref.offset + 4u }; + CmdLinGradRef _715 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _709; + CmdLinGradRef param_1 = _715; return CmdLinGrad_read(param, param_1); } @@ -439,9 +436,9 @@ CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) { - CmdRadGradRef _719 = { ref.offset + 4u }; + CmdRadGradRef _725 = { ref.offset + 4u }; Alloc param = a; - CmdRadGradRef param_1 = _719; + CmdRadGradRef param_1 = _725; return CmdRadGrad_read(param, param_1); } @@ -462,9 +459,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _729 = { ref.offset + 4u }; + CmdImageRef _735 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _729; + CmdImageRef param_1 = _735; return CmdImage_read(param, param_1); } @@ -477,10 +474,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _1638 = fromsRGB(param_1); - fg_rgba.x = _1638.x; - fg_rgba.y = _1638.y; - fg_rgba.z = _1638.z; + float3 _1653 = fromsRGB(param_1); + fg_rgba.x = _1653.x; + fg_rgba.y = _1653.y; + fg_rgba.z = _1653.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -488,10 +485,7 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) float3 tosRGB(float3 rgb) { - bool3 cutoff = bool3(rgb.x >= 0.003130800090730190277099609375f.xxx.x, rgb.y >= 0.003130800090730190277099609375f.xxx.y, rgb.z >= 0.003130800090730190277099609375f.xxx.z); - float3 below = 12.9200000762939453125f.xxx * rgb; - float3 above = (1.05499994754791259765625f.xxx * pow(rgb, 0.416660010814666748046875f.xxx)) - 0.054999999701976776123046875f.xxx; - return float3(cutoff.x ? above.x : below.x, cutoff.y ? above.y : below.y, cutoff.z ? above.z : below.z); + return rgb; } uint packsRGB(inout float4 rgba) @@ -514,9 +508,9 @@ CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) { - CmdEndClipRef _739 = { ref.offset + 4u }; + CmdEndClipRef _745 = { ref.offset + 4u }; Alloc param = a; - CmdEndClipRef param_1 = _739; + CmdEndClipRef param_1 = _745; return CmdEndClip_read(param, param_1); } @@ -529,7 +523,10 @@ float3 hard_light(float3 cb, float3 cs) { float3 param = cb; float3 param_1 = (cs * 2.0f) - 1.0f.xxx; - return lerp(screen(param, param_1), (cb * 2.0f) * cs, float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); + float3 _889 = screen(param, param_1); + float3 _893 = (cb * 2.0f) * cs; + bool3 _898 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); + return float3(_898.x ? _893.x : _889.x, _898.y ? _893.y : _889.y, _898.z ? _893.z : _889.z); } float color_dodge(float cb, float cs) @@ -572,8 +569,14 @@ float color_burn(float cb, float cs) float3 soft_light(float3 cb, float3 cs) { - float3 d = lerp(sqrt(cb), ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb, float3(bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z))); - return lerp(cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)), cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)), float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); + float3 _904 = sqrt(cb); + float3 _917 = ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb; + bool3 _921 = bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z); + float3 d = float3(_921.x ? _917.x : _904.x, _921.y ? _917.y : _904.y, _921.z ? _917.z : _904.z); + float3 _932 = cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)); + float3 _942 = cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)); + bool3 _944 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); + return float3(_944.x ? _942.x : _932.x, _944.y ? _942.y : _932.y, _944.z ? _942.z : _932.z); } float sat(float3 c) @@ -706,8 +709,8 @@ float3 set_lum(float3 c, float l) { float3 param = c; float3 param_1 = c + (l - lum(param)).xxx; - float3 _1046 = clip_color(param_1); - return _1046; + float3 _1048 = clip_color(param_1); + return _1048; } float3 mix_blend(float3 cb, float3 cs, uint mode) @@ -795,9 +798,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1337 = set_sat(param_21, param_22); + float3 _1340 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1337; + float3 param_24 = _1340; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -807,9 +810,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1351 = set_sat(param_27, param_28); + float3 _1354 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1351; + float3 param_30 = _1354; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -919,18 +922,50 @@ float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) } case 13u: { - return float4(max(0.0f.xxxx, ((1.0f.xxxx - (float4(cs, as) * as)) + 1.0f.xxxx) - (float4(cb, ab) * ab)).xyz, max(0.0f, ((1.0f - as) + 1.0f) - ab)); - } - case 14u: - { - return float4(min(1.0f.xxxx, (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, min(1.0f, as + ab)); + return min(1.0f.xxxx, float4((cs * as) + (cb * ab), as + ab)); } default: { break; } } - return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); + float as_fa = as * fa; + float ab_fb = ab * fb; + float3 co = (cs * as_fa) + (cb * ab_fb); + return float4(co, as_fa + ab_fb); +} + +float4 mix_blend_compose(float4 backdrop, float4 src, uint mode) +{ + if ((mode & 32767u) == 3u) + { + return (backdrop * (1.0f - src.w)) + src; + } + float inv_src_a = 1.0f / (src.w + 1.0000000036274937255387218471014e-15f); + float3 cs = src.xyz * inv_src_a; + float inv_backdrop_a = 1.0f / (backdrop.w + 1.0000000036274937255387218471014e-15f); + float3 cb = backdrop.xyz * inv_backdrop_a; + uint blend_mode = mode >> uint(8); + float3 param = cb; + float3 param_1 = cs; + uint param_2 = blend_mode; + float3 blended = mix_blend(param, param_1, param_2); + cs = lerp(cs, blended, backdrop.w.xxx); + uint comp_mode = mode & 255u; + if (comp_mode == 3u) + { + float3 co = lerp(backdrop.xyz, cs, src.w.xxx); + return float4(co, src.w + (backdrop.w * (1.0f - src.w))); + } + else + { + float3 param_3 = cb; + float3 param_4 = cs; + float param_5 = backdrop.w; + float param_6 = src.w; + uint param_7 = comp_mode; + return mix_compose(param_3, param_4, param_5, param_6, param_7); + } } CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) @@ -946,24 +981,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _749 = { ref.offset + 4u }; + CmdJumpRef _755 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _749; + CmdJumpRef param_1 = _755; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _1666.Load(8)) + gl_WorkGroupID.x; - Alloc _1681; - _1681.offset = _1666.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1681.Load(8)) + gl_WorkGroupID.x; + Alloc _1696; + _1696.offset = _1681.Load(24); Alloc param; - param.offset = _1681.offset; + param.offset = _1696.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1690 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1690; + CmdRef _1705 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1705; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -972,7 +1007,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _291.Load(4) == 0u; + bool mem_ok = _297.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -997,8 +1032,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _1784 = { stroke.tile_ref }; - tile_seg_ref = _1784; + TileSegRef _1800 = { stroke.tile_ref }; + tile_seg_ref = _1800; do { uint param_7 = tile_seg_ref.offset; @@ -1034,8 +1069,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _1904 = { fill.tile_ref }; - tile_seg_ref = _1904; + TileSegRef _1920 = { fill.tile_ref }; + tile_seg_ref = _1920; do { uint param_15 = tile_seg_ref.offset; @@ -1124,10 +1159,10 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _2238 = fromsRGB(param_29); - fg_rgba.x = _2238.x; - fg_rgba.y = _2238.y; - fg_rgba.z = _2238.z; + float3 _2254 = fromsRGB(param_29); + fg_rgba.x = _2254.x; + fg_rgba.y = _2254.y; + fg_rgba.z = _2254.z; float4 fg_k_1 = fg_rgba * area[k_9]; rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; } @@ -1150,10 +1185,10 @@ void comp_main() int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; float3 param_33 = fg_rgba_1.xyz; - float3 _2348 = fromsRGB(param_33); - fg_rgba_1.x = _2348.x; - fg_rgba_1.y = _2348.y; - fg_rgba_1.z = _2348.z; + float3 _2364 = fromsRGB(param_33); + fg_rgba_1.x = _2364.x; + fg_rgba_1.y = _2364.y; + fg_rgba_1.z = _2364.z; float4 fg_k_2 = fg_rgba_1 * area[k_10]; rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; } @@ -1167,9 +1202,9 @@ void comp_main() CmdImage fill_img = Cmd_Image_read(param_34, param_35); uint2 param_36 = xy_uint; CmdImage param_37 = fill_img; - float4 _2391[8]; - fillImage(_2391, param_36, param_37); - float4 img[8] = _2391; + float4 _2407[8]; + fillImage(_2407, param_36, param_37); + float4 img[8] = _2407; for (uint k_11 = 0u; k_11 < 8u; k_11++) { float4 fg_k_3 = img[k_11] * area[k_11]; @@ -1184,8 +1219,8 @@ void comp_main() { uint d_2 = min(clip_depth, 127u); float4 param_38 = float4(rgba[k_12]); - uint _2454 = packsRGB(param_38); - blend_stack[d_2][k_12] = _2454; + uint _2470 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2470; rgba[k_12] = 0.0f.xxxx; } clip_depth++; @@ -1206,32 +1241,20 @@ void comp_main() uint param_41 = blend_stack[d_3][k_13]; float4 bg = unpacksRGB(param_41); float4 fg_1 = rgba[k_13] * area[k_13]; - float3 param_42 = bg.xyz; - float3 param_43 = fg_1.xyz; - uint param_44 = blend_mode; - float3 blend = mix_blend(param_42, param_43, param_44); - float4 _2521 = fg_1; - float _2525 = fg_1.w; - float3 _2532 = lerp(_2521.xyz, blend, float((_2525 * bg.w) > 0.0f).xxx); - fg_1.x = _2532.x; - fg_1.y = _2532.y; - fg_1.z = _2532.z; - float3 param_45 = bg.xyz; - float3 param_46 = fg_1.xyz; - float param_47 = bg.w; - float param_48 = fg_1.w; - uint param_49 = comp_mode; - rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); + float4 param_42 = bg; + float4 param_43 = fg_1; + uint param_44 = end_clip.blend; + rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); } cmd_ref.offset += 8u; break; } case 11u: { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - CmdRef _2569 = { Cmd_Jump_read(param_50, param_51).new_ref }; - cmd_ref = _2569; + Alloc param_45 = cmd_alloc; + CmdRef param_46 = cmd_ref; + CmdRef _2548 = { Cmd_Jump_read(param_45, param_46).new_ref }; + cmd_ref = _2548; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1239,9 +1262,9 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_52 = i_1; - float3 param_53 = rgba[i_1].xyz; - image[int2(xy_uint + chunk_offset(param_52))] = float4(tosRGB(param_53), rgba[i_1].w); + uint param_47 = i_1; + float3 param_48 = rgba[i_1].xyz; + image[int2(xy_uint + chunk_offset(param_47))] = float4(tosRGB(param_48), rgba[i_1].w); } } diff --git a/piet-gpu/shader/gen/kernel4.msl b/piet-gpu/shader/gen/kernel4.msl index 6489563f2..4caeaf066 100644 --- a/piet-gpu/shader/gen/kernel4.msl +++ b/piet-gpu/shader/gen/kernel4.msl @@ -237,7 +237,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_291) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_297) { Alloc param = alloc; uint param_1 = offset; @@ -245,29 +245,29 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_291.memory[offset]; + uint v = v_297.memory[offset]; return v; } static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_291); + uint tag_and_flags = read_mem(param, param_1, v_297); return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_291) +CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdStroke s; s.tile_ref = raw0; s.half_width = as_type(raw1); @@ -275,11 +275,11 @@ CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, } static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_291); + return CmdStroke_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -291,27 +291,27 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_291) +TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_291); + uint raw4 = read_mem(param_8, param_9, v_297); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_291); + uint raw5 = read_mem(param_10, param_11, v_297); TileSeg s; s.origin = float2(as_type(raw0), as_type(raw1)); s.vector = float2(as_type(raw2), as_type(raw3)); @@ -327,15 +327,15 @@ uint2 chunk_offset(thread const uint& i) } static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_291) +CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdFill s; s.tile_ref = raw0; s.backdrop = int(raw1); @@ -343,60 +343,57 @@ CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device } static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_291); + return CmdFill_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_291) +CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdAlpha s; s.alpha = as_type(raw0); return s; } static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_291); + return CmdAlpha_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_291) +CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_291); + return CmdColor_read(param, param_1, v_297); } static inline __attribute__((always_inline)) float3 fromsRGB(thread const float3& srgb) { - bool3 cutoff = srgb >= float3(0.040449999272823333740234375); - float3 below = srgb / float3(12.9200000762939453125); - float3 above = pow((srgb + float3(0.054999999701976776123046875)) / float3(1.05499994754791259765625), float3(2.400000095367431640625)); - return select(below, above, cutoff); + return srgb; } static inline __attribute__((always_inline)) @@ -408,21 +405,21 @@ float4 unpacksRGB(thread const uint& srgba) } static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_291) +CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); CmdLinGrad s; s.index = raw0; s.line_x = as_type(raw1); @@ -432,50 +429,50 @@ CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& re } static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_291); + return CmdLinGrad_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_291) +CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_291); + uint raw4 = read_mem(param_8, param_9, v_297); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_291); + uint raw5 = read_mem(param_10, param_11, v_297); Alloc param_12 = a; uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_291); + uint raw6 = read_mem(param_12, param_13, v_297); Alloc param_14 = a; uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_291); + uint raw7 = read_mem(param_14, param_15, v_297); Alloc param_16 = a; uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_291); + uint raw8 = read_mem(param_16, param_17, v_297); Alloc param_18 = a; uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19, v_291); + uint raw9 = read_mem(param_18, param_19, v_297); Alloc param_20 = a; uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21, v_291); + uint raw10 = read_mem(param_20, param_21, v_297); CmdRadGrad s; s.index = raw0; s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); @@ -487,23 +484,23 @@ CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& re } static inline __attribute__((always_inline)) -CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; - return CmdRadGrad_read(param, param_1, v_291); + return CmdRadGrad_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_291) +CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -511,11 +508,11 @@ CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, dev } static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_291); + return CmdImage_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -528,10 +525,10 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas.read(uint2(uv)); float3 param_1 = fg_rgba.xyz; - float3 _1638 = fromsRGB(param_1); - fg_rgba.x = _1638.x; - fg_rgba.y = _1638.y; - fg_rgba.z = _1638.z; + float3 _1653 = fromsRGB(param_1); + fg_rgba.x = _1653.x; + fg_rgba.y = _1653.y; + fg_rgba.z = _1653.z; rgba[i] = fg_rgba; } return rgba; @@ -540,10 +537,7 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag static inline __attribute__((always_inline)) float3 tosRGB(thread const float3& rgb) { - bool3 cutoff = rgb >= float3(0.003130800090730190277099609375); - float3 below = float3(12.9200000762939453125) * rgb; - float3 above = (float3(1.05499994754791259765625) * pow(rgb, float3(0.416660010814666748046875))) - float3(0.054999999701976776123046875); - return select(below, above, cutoff); + return rgb; } static inline __attribute__((always_inline)) @@ -555,23 +549,23 @@ uint packsRGB(thread float4& rgba) } static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_291) +CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdEndClip s; s.blend = raw0; return s; } static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_291); + return CmdEndClip_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -585,7 +579,7 @@ float3 hard_light(thread const float3& cb, thread const float3& cs) { float3 param = cb; float3 param_1 = (cs * 2.0) - float3(1.0); - return mix(screen(param, param_1), (cb * 2.0) * cs, float3(cs <= float3(0.5))); + return select(screen(param, param_1), (cb * 2.0) * cs, cs <= float3(0.5)); } static inline __attribute__((always_inline)) @@ -631,8 +625,8 @@ float color_burn(thread const float& cb, thread const float& cs) static inline __attribute__((always_inline)) float3 soft_light(thread const float3& cb, thread const float3& cs) { - float3 d = mix(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, float3(cb <= float3(0.25))); - return mix(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), float3(cs <= float3(0.5))); + float3 d = select(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, cb <= float3(0.25)); + return select(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), cs <= float3(0.5)); } static inline __attribute__((always_inline)) @@ -771,8 +765,8 @@ float3 set_lum(thread const float3& c, thread const float& l) { float3 param = c; float3 param_1 = c + float3(l - lum(param)); - float3 _1046 = clip_color(param_1); - return _1046; + float3 _1048 = clip_color(param_1); + return _1048; } static inline __attribute__((always_inline)) @@ -861,9 +855,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1337 = set_sat(param_21, param_22); + float3 _1340 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1337; + float3 param_24 = _1340; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -873,9 +867,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1351 = set_sat(param_27, param_28); + float3 _1354 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1351; + float3 param_30 = _1354; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -986,45 +980,78 @@ float4 mix_compose(thread const float3& cb, thread const float3& cs, thread cons } case 13u: { - return float4(fast::max(float4(0.0), ((float4(1.0) - (float4(cs, as) * as)) + float4(1.0)) - (float4(cb, ab) * ab)).xyz, fast::max(0.0, ((1.0 - as) + 1.0) - ab)); - } - case 14u: - { - return float4(fast::min(float4(1.0), (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, fast::min(1.0, as + ab)); + return fast::min(float4(1.0), float4((cs * as) + (cb * ab), as + ab)); } default: { break; } } - return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); + float as_fa = as * fa; + float ab_fb = ab * fb; + float3 co = (cs * as_fa) + (cb * ab_fb); + return float4(co, as_fa + ab_fb); +} + +static inline __attribute__((always_inline)) +float4 mix_blend_compose(thread const float4& backdrop, thread const float4& src, thread const uint& mode) +{ + if ((mode & 32767u) == 3u) + { + return (backdrop * (1.0 - src.w)) + src; + } + float inv_src_a = 1.0 / (src.w + 1.0000000036274937255387218471014e-15); + float3 cs = src.xyz * inv_src_a; + float inv_backdrop_a = 1.0 / (backdrop.w + 1.0000000036274937255387218471014e-15); + float3 cb = backdrop.xyz * inv_backdrop_a; + uint blend_mode = mode >> uint(8); + float3 param = cb; + float3 param_1 = cs; + uint param_2 = blend_mode; + float3 blended = mix_blend(param, param_1, param_2); + cs = mix(cs, blended, float3(backdrop.w)); + uint comp_mode = mode & 255u; + if (comp_mode == 3u) + { + float3 co = mix(backdrop.xyz, cs, float3(src.w)); + return float4(co, src.w + (backdrop.w * (1.0 - src.w))); + } + else + { + float3 param_3 = cb; + float3 param_4 = cs; + float param_5 = backdrop.w; + float param_6 = src.w; + uint param_7 = comp_mode; + return mix_compose(param_3, param_4, param_5, param_6, param_7); + } } static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_291) +CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdJump s; s.new_ref = raw0; return s; } static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_291); + return CmdJump_read(param, param_1, v_297); } -kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1666 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - uint tile_ix = (gl_WorkGroupID.y * _1666.conf.width_in_tiles) + gl_WorkGroupID.x; + uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x; Alloc param; - param.offset = _1666.conf.ptcl_alloc.offset; + param.offset = _1681.conf.ptcl_alloc.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -1037,7 +1064,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 rgba[i] = float4(0.0); } uint clip_depth = 0u; - bool mem_ok = v_291.mem_error == 0u; + bool mem_ok = v_297.mem_error == 0u; spvUnsafeArray df; TileSegRef tile_seg_ref; spvUnsafeArray area; @@ -1046,7 +1073,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_3 = cmd_alloc; CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_291).tag; + uint tag = Cmd_tag(param_3, param_4, v_297).tag; if (tag == 0u) { break; @@ -1057,7 +1084,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_5 = cmd_alloc; CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_291); + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_297); for (uint k = 0u; k < 8u; k++) { df[k] = 1000000000.0; @@ -1070,7 +1097,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 bool param_9 = mem_ok; Alloc param_10 = new_alloc(param_7, param_8, param_9); TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_291); + TileSeg seg = TileSeg_read(param_10, param_11, v_297); float2 line_vec = seg.vector; for (uint k_1 = 0u; k_1 < 8u; k_1++) { @@ -1093,7 +1120,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_13 = cmd_alloc; CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_291); + CmdFill fill = Cmd_Fill_read(param_13, param_14, v_297); for (uint k_3 = 0u; k_3 < 8u; k_3++) { area[k_3] = float(fill.backdrop); @@ -1106,7 +1133,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 bool param_17 = mem_ok; Alloc param_18 = new_alloc(param_15, param_16, param_17); TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_291); + TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297); for (uint k_4 = 0u; k_4 < 8u; k_4++) { uint param_20 = k_4; @@ -1150,7 +1177,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_21 = cmd_alloc; CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_291); + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_297); for (uint k_7 = 0u; k_7 < 8u; k_7++) { area[k_7] = alpha.alpha; @@ -1162,7 +1189,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_23 = cmd_alloc; CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_291); + CmdColor color = Cmd_Color_read(param_23, param_24, v_297); uint param_25 = color.rgba_color; float4 fg = unpacksRGB(param_25); for (uint k_8 = 0u; k_8 < 8u; k_8++) @@ -1177,7 +1204,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_26 = cmd_alloc; CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_291); + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_297); float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; for (uint k_9 = 0u; k_9 < 8u; k_9++) { @@ -1187,10 +1214,10 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float3 param_29 = fg_rgba.xyz; - float3 _2238 = fromsRGB(param_29); - fg_rgba.x = _2238.x; - fg_rgba.y = _2238.y; - fg_rgba.z = _2238.z; + float3 _2254 = fromsRGB(param_29); + fg_rgba.x = _2254.x; + fg_rgba.y = _2254.y; + fg_rgba.z = _2254.z; float4 fg_k_1 = fg_rgba * area[k_9]; rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; } @@ -1201,7 +1228,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_291); + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_297); for (uint k_10 = 0u; k_10 < 8u; k_10++) { uint param_32 = k_10; @@ -1213,10 +1240,10 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); float3 param_33 = fg_rgba_1.xyz; - float3 _2348 = fromsRGB(param_33); - fg_rgba_1.x = _2348.x; - fg_rgba_1.y = _2348.y; - fg_rgba_1.z = _2348.z; + float3 _2364 = fromsRGB(param_33); + fg_rgba_1.x = _2364.x; + fg_rgba_1.y = _2364.y; + fg_rgba_1.z = _2364.z; float4 fg_k_2 = fg_rgba_1 * area[k_10]; rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; } @@ -1227,7 +1254,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_34 = cmd_alloc; CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_291); + CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_297); uint2 param_36 = xy_uint; CmdImage param_37 = fill_img; spvUnsafeArray img; @@ -1246,8 +1273,8 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { uint d_2 = min(clip_depth, 127u); float4 param_38 = float4(rgba[k_12]); - uint _2454 = packsRGB(param_38); - blend_stack[d_2][k_12] = _2454; + uint _2470 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2470; rgba[k_12] = float4(0.0); } clip_depth++; @@ -1258,7 +1285,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_291); + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_297); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; @@ -1268,31 +1295,19 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 uint param_41 = blend_stack[d_3][k_13]; float4 bg = unpacksRGB(param_41); float4 fg_1 = rgba[k_13] * area[k_13]; - float3 param_42 = bg.xyz; - float3 param_43 = fg_1.xyz; - uint param_44 = blend_mode; - float3 blend = mix_blend(param_42, param_43, param_44); - float4 _2521 = fg_1; - float _2525 = fg_1.w; - float3 _2532 = mix(_2521.xyz, blend, float3(float((_2525 * bg.w) > 0.0))); - fg_1.x = _2532.x; - fg_1.y = _2532.y; - fg_1.z = _2532.z; - float3 param_45 = bg.xyz; - float3 param_46 = fg_1.xyz; - float param_47 = bg.w; - float param_48 = fg_1.w; - uint param_49 = comp_mode; - rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); + float4 param_42 = bg; + float4 param_43 = fg_1; + uint param_44 = end_clip.blend; + rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); } cmd_ref.offset += 8u; break; } case 11u: { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_50, param_51, v_291).new_ref }; + Alloc param_45 = cmd_alloc; + CmdRef param_46 = cmd_ref; + cmd_ref = CmdRef{ Cmd_Jump_read(param_45, param_46, v_297).new_ref }; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1300,9 +1315,9 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_52 = i_1; - float3 param_53 = rgba[i_1].xyz; - image.write(float4(tosRGB(param_53), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_52)))); + uint param_47 = i_1; + float3 param_48 = rgba[i_1].xyz; + image.write(float4(tosRGB(param_48), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_47)))); } } diff --git a/piet-gpu/shader/gen/kernel4.spv b/piet-gpu/shader/gen/kernel4.spv index 70612634a..f0e29634b 100644 Binary files a/piet-gpu/shader/gen/kernel4.spv and b/piet-gpu/shader/gen/kernel4.spv differ diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil index 046045f51..7b7c19f00 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.dxil and b/piet-gpu/shader/gen/kernel4_gray.dxil differ diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl index 019a73cd9..5d9b88d29 100644 --- a/piet-gpu/shader/gen/kernel4_gray.hlsl +++ b/piet-gpu/shader/gen/kernel4_gray.hlsl @@ -161,8 +161,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _291 : register(u0, space0); -ByteAddressBuffer _1666 : register(t1, space0); +RWByteAddressBuffer _297 : register(u0, space0); +ByteAddressBuffer _1681 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -189,8 +189,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _304 = { a.offset + offset }; - return _304; + Alloc _310 = { a.offset + offset }; + return _310; } bool touch_mem(Alloc alloc, uint offset) @@ -206,7 +206,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _291.Load(offset * 4 + 8); + uint v = _297.Load(offset * 4 + 8); return v; } @@ -215,8 +215,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _663 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _663; + CmdTag _669 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _669; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -236,9 +236,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _679 = { ref.offset + 4u }; + CmdStrokeRef _685 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _679; + CmdStrokeRef param_1 = _685; return CmdStroke_read(param, param_1); } @@ -274,8 +274,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _820 = { raw5 }; - s.next = _820; + TileSegRef _826 = { raw5 }; + s.next = _826; return s; } @@ -301,9 +301,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _669 = { ref.offset + 4u }; + CmdFillRef _675 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _669; + CmdFillRef param_1 = _675; return CmdFill_read(param, param_1); } @@ -320,9 +320,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _689 = { ref.offset + 4u }; + CmdAlphaRef _695 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _689; + CmdAlphaRef param_1 = _695; return CmdAlpha_read(param, param_1); } @@ -339,18 +339,15 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _699 = { ref.offset + 4u }; + CmdColorRef _705 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _699; + CmdColorRef param_1 = _705; return CmdColor_read(param, param_1); } float3 fromsRGB(float3 srgb) { - bool3 cutoff = bool3(srgb.x >= 0.040449999272823333740234375f.xxx.x, srgb.y >= 0.040449999272823333740234375f.xxx.y, srgb.z >= 0.040449999272823333740234375f.xxx.z); - float3 below = srgb / 12.9200000762939453125f.xxx; - float3 above = pow((srgb + 0.054999999701976776123046875f.xxx) / 1.05499994754791259765625f.xxx, 2.400000095367431640625f.xxx); - return float3(cutoff.x ? above.x : below.x, cutoff.y ? above.y : below.y, cutoff.z ? above.z : below.z); + return srgb; } float4 unpacksRGB(uint srgba) @@ -385,9 +382,9 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _709 = { ref.offset + 4u }; + CmdLinGradRef _715 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _709; + CmdLinGradRef param_1 = _715; return CmdLinGrad_read(param, param_1); } @@ -439,9 +436,9 @@ CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) { - CmdRadGradRef _719 = { ref.offset + 4u }; + CmdRadGradRef _725 = { ref.offset + 4u }; Alloc param = a; - CmdRadGradRef param_1 = _719; + CmdRadGradRef param_1 = _725; return CmdRadGrad_read(param, param_1); } @@ -462,9 +459,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _729 = { ref.offset + 4u }; + CmdImageRef _735 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _729; + CmdImageRef param_1 = _735; return CmdImage_read(param, param_1); } @@ -477,10 +474,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _1638 = fromsRGB(param_1); - fg_rgba.x = _1638.x; - fg_rgba.y = _1638.y; - fg_rgba.z = _1638.z; + float3 _1653 = fromsRGB(param_1); + fg_rgba.x = _1653.x; + fg_rgba.y = _1653.y; + fg_rgba.z = _1653.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -488,10 +485,7 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) float3 tosRGB(float3 rgb) { - bool3 cutoff = bool3(rgb.x >= 0.003130800090730190277099609375f.xxx.x, rgb.y >= 0.003130800090730190277099609375f.xxx.y, rgb.z >= 0.003130800090730190277099609375f.xxx.z); - float3 below = 12.9200000762939453125f.xxx * rgb; - float3 above = (1.05499994754791259765625f.xxx * pow(rgb, 0.416660010814666748046875f.xxx)) - 0.054999999701976776123046875f.xxx; - return float3(cutoff.x ? above.x : below.x, cutoff.y ? above.y : below.y, cutoff.z ? above.z : below.z); + return rgb; } uint packsRGB(inout float4 rgba) @@ -514,9 +508,9 @@ CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) { - CmdEndClipRef _739 = { ref.offset + 4u }; + CmdEndClipRef _745 = { ref.offset + 4u }; Alloc param = a; - CmdEndClipRef param_1 = _739; + CmdEndClipRef param_1 = _745; return CmdEndClip_read(param, param_1); } @@ -529,7 +523,10 @@ float3 hard_light(float3 cb, float3 cs) { float3 param = cb; float3 param_1 = (cs * 2.0f) - 1.0f.xxx; - return lerp(screen(param, param_1), (cb * 2.0f) * cs, float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); + float3 _889 = screen(param, param_1); + float3 _893 = (cb * 2.0f) * cs; + bool3 _898 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); + return float3(_898.x ? _893.x : _889.x, _898.y ? _893.y : _889.y, _898.z ? _893.z : _889.z); } float color_dodge(float cb, float cs) @@ -572,8 +569,14 @@ float color_burn(float cb, float cs) float3 soft_light(float3 cb, float3 cs) { - float3 d = lerp(sqrt(cb), ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb, float3(bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z))); - return lerp(cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)), cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)), float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); + float3 _904 = sqrt(cb); + float3 _917 = ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb; + bool3 _921 = bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z); + float3 d = float3(_921.x ? _917.x : _904.x, _921.y ? _917.y : _904.y, _921.z ? _917.z : _904.z); + float3 _932 = cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)); + float3 _942 = cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)); + bool3 _944 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); + return float3(_944.x ? _942.x : _932.x, _944.y ? _942.y : _932.y, _944.z ? _942.z : _932.z); } float sat(float3 c) @@ -706,8 +709,8 @@ float3 set_lum(float3 c, float l) { float3 param = c; float3 param_1 = c + (l - lum(param)).xxx; - float3 _1046 = clip_color(param_1); - return _1046; + float3 _1048 = clip_color(param_1); + return _1048; } float3 mix_blend(float3 cb, float3 cs, uint mode) @@ -795,9 +798,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1337 = set_sat(param_21, param_22); + float3 _1340 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1337; + float3 param_24 = _1340; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -807,9 +810,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1351 = set_sat(param_27, param_28); + float3 _1354 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1351; + float3 param_30 = _1354; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -919,18 +922,50 @@ float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) } case 13u: { - return float4(max(0.0f.xxxx, ((1.0f.xxxx - (float4(cs, as) * as)) + 1.0f.xxxx) - (float4(cb, ab) * ab)).xyz, max(0.0f, ((1.0f - as) + 1.0f) - ab)); - } - case 14u: - { - return float4(min(1.0f.xxxx, (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, min(1.0f, as + ab)); + return min(1.0f.xxxx, float4((cs * as) + (cb * ab), as + ab)); } default: { break; } } - return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); + float as_fa = as * fa; + float ab_fb = ab * fb; + float3 co = (cs * as_fa) + (cb * ab_fb); + return float4(co, as_fa + ab_fb); +} + +float4 mix_blend_compose(float4 backdrop, float4 src, uint mode) +{ + if ((mode & 32767u) == 3u) + { + return (backdrop * (1.0f - src.w)) + src; + } + float inv_src_a = 1.0f / (src.w + 1.0000000036274937255387218471014e-15f); + float3 cs = src.xyz * inv_src_a; + float inv_backdrop_a = 1.0f / (backdrop.w + 1.0000000036274937255387218471014e-15f); + float3 cb = backdrop.xyz * inv_backdrop_a; + uint blend_mode = mode >> uint(8); + float3 param = cb; + float3 param_1 = cs; + uint param_2 = blend_mode; + float3 blended = mix_blend(param, param_1, param_2); + cs = lerp(cs, blended, backdrop.w.xxx); + uint comp_mode = mode & 255u; + if (comp_mode == 3u) + { + float3 co = lerp(backdrop.xyz, cs, src.w.xxx); + return float4(co, src.w + (backdrop.w * (1.0f - src.w))); + } + else + { + float3 param_3 = cb; + float3 param_4 = cs; + float param_5 = backdrop.w; + float param_6 = src.w; + uint param_7 = comp_mode; + return mix_compose(param_3, param_4, param_5, param_6, param_7); + } } CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) @@ -946,24 +981,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _749 = { ref.offset + 4u }; + CmdJumpRef _755 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _749; + CmdJumpRef param_1 = _755; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _1666.Load(8)) + gl_WorkGroupID.x; - Alloc _1681; - _1681.offset = _1666.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1681.Load(8)) + gl_WorkGroupID.x; + Alloc _1696; + _1696.offset = _1681.Load(24); Alloc param; - param.offset = _1681.offset; + param.offset = _1696.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1690 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1690; + CmdRef _1705 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1705; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -972,7 +1007,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _291.Load(4) == 0u; + bool mem_ok = _297.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -997,8 +1032,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _1784 = { stroke.tile_ref }; - tile_seg_ref = _1784; + TileSegRef _1800 = { stroke.tile_ref }; + tile_seg_ref = _1800; do { uint param_7 = tile_seg_ref.offset; @@ -1034,8 +1069,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _1904 = { fill.tile_ref }; - tile_seg_ref = _1904; + TileSegRef _1920 = { fill.tile_ref }; + tile_seg_ref = _1920; do { uint param_15 = tile_seg_ref.offset; @@ -1124,10 +1159,10 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _2238 = fromsRGB(param_29); - fg_rgba.x = _2238.x; - fg_rgba.y = _2238.y; - fg_rgba.z = _2238.z; + float3 _2254 = fromsRGB(param_29); + fg_rgba.x = _2254.x; + fg_rgba.y = _2254.y; + fg_rgba.z = _2254.z; float4 fg_k_1 = fg_rgba * area[k_9]; rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; } @@ -1150,10 +1185,10 @@ void comp_main() int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; float3 param_33 = fg_rgba_1.xyz; - float3 _2348 = fromsRGB(param_33); - fg_rgba_1.x = _2348.x; - fg_rgba_1.y = _2348.y; - fg_rgba_1.z = _2348.z; + float3 _2364 = fromsRGB(param_33); + fg_rgba_1.x = _2364.x; + fg_rgba_1.y = _2364.y; + fg_rgba_1.z = _2364.z; float4 fg_k_2 = fg_rgba_1 * area[k_10]; rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; } @@ -1167,9 +1202,9 @@ void comp_main() CmdImage fill_img = Cmd_Image_read(param_34, param_35); uint2 param_36 = xy_uint; CmdImage param_37 = fill_img; - float4 _2391[8]; - fillImage(_2391, param_36, param_37); - float4 img[8] = _2391; + float4 _2407[8]; + fillImage(_2407, param_36, param_37); + float4 img[8] = _2407; for (uint k_11 = 0u; k_11 < 8u; k_11++) { float4 fg_k_3 = img[k_11] * area[k_11]; @@ -1184,8 +1219,8 @@ void comp_main() { uint d_2 = min(clip_depth, 127u); float4 param_38 = float4(rgba[k_12]); - uint _2454 = packsRGB(param_38); - blend_stack[d_2][k_12] = _2454; + uint _2470 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2470; rgba[k_12] = 0.0f.xxxx; } clip_depth++; @@ -1206,32 +1241,20 @@ void comp_main() uint param_41 = blend_stack[d_3][k_13]; float4 bg = unpacksRGB(param_41); float4 fg_1 = rgba[k_13] * area[k_13]; - float3 param_42 = bg.xyz; - float3 param_43 = fg_1.xyz; - uint param_44 = blend_mode; - float3 blend = mix_blend(param_42, param_43, param_44); - float4 _2521 = fg_1; - float _2525 = fg_1.w; - float3 _2532 = lerp(_2521.xyz, blend, float((_2525 * bg.w) > 0.0f).xxx); - fg_1.x = _2532.x; - fg_1.y = _2532.y; - fg_1.z = _2532.z; - float3 param_45 = bg.xyz; - float3 param_46 = fg_1.xyz; - float param_47 = bg.w; - float param_48 = fg_1.w; - uint param_49 = comp_mode; - rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); + float4 param_42 = bg; + float4 param_43 = fg_1; + uint param_44 = end_clip.blend; + rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); } cmd_ref.offset += 8u; break; } case 11u: { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - CmdRef _2569 = { Cmd_Jump_read(param_50, param_51).new_ref }; - cmd_ref = _2569; + Alloc param_45 = cmd_alloc; + CmdRef param_46 = cmd_ref; + CmdRef _2548 = { Cmd_Jump_read(param_45, param_46).new_ref }; + cmd_ref = _2548; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1239,8 +1262,8 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_52 = i_1; - image[int2(xy_uint + chunk_offset(param_52))] = rgba[i_1].w.x; + uint param_47 = i_1; + image[int2(xy_uint + chunk_offset(param_47))] = rgba[i_1].w.x; } } diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl index 6402c6fad..8c608c384 100644 --- a/piet-gpu/shader/gen/kernel4_gray.msl +++ b/piet-gpu/shader/gen/kernel4_gray.msl @@ -237,7 +237,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_291) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_297) { Alloc param = alloc; uint param_1 = offset; @@ -245,29 +245,29 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_291.memory[offset]; + uint v = v_297.memory[offset]; return v; } static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_291); + uint tag_and_flags = read_mem(param, param_1, v_297); return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_291) +CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdStroke s; s.tile_ref = raw0; s.half_width = as_type(raw1); @@ -275,11 +275,11 @@ CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, } static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_291); + return CmdStroke_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -291,27 +291,27 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_291) +TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_291); + uint raw4 = read_mem(param_8, param_9, v_297); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_291); + uint raw5 = read_mem(param_10, param_11, v_297); TileSeg s; s.origin = float2(as_type(raw0), as_type(raw1)); s.vector = float2(as_type(raw2), as_type(raw3)); @@ -327,15 +327,15 @@ uint2 chunk_offset(thread const uint& i) } static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_291) +CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdFill s; s.tile_ref = raw0; s.backdrop = int(raw1); @@ -343,60 +343,57 @@ CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device } static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_291); + return CmdFill_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_291) +CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdAlpha s; s.alpha = as_type(raw0); return s; } static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_291); + return CmdAlpha_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_291) +CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_291); + return CmdColor_read(param, param_1, v_297); } static inline __attribute__((always_inline)) float3 fromsRGB(thread const float3& srgb) { - bool3 cutoff = srgb >= float3(0.040449999272823333740234375); - float3 below = srgb / float3(12.9200000762939453125); - float3 above = pow((srgb + float3(0.054999999701976776123046875)) / float3(1.05499994754791259765625), float3(2.400000095367431640625)); - return select(below, above, cutoff); + return srgb; } static inline __attribute__((always_inline)) @@ -408,21 +405,21 @@ float4 unpacksRGB(thread const uint& srgba) } static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_291) +CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); CmdLinGrad s; s.index = raw0; s.line_x = as_type(raw1); @@ -432,50 +429,50 @@ CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& re } static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_291); + return CmdLinGrad_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_291) +CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_291); + uint raw4 = read_mem(param_8, param_9, v_297); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_291); + uint raw5 = read_mem(param_10, param_11, v_297); Alloc param_12 = a; uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_291); + uint raw6 = read_mem(param_12, param_13, v_297); Alloc param_14 = a; uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_291); + uint raw7 = read_mem(param_14, param_15, v_297); Alloc param_16 = a; uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_291); + uint raw8 = read_mem(param_16, param_17, v_297); Alloc param_18 = a; uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19, v_291); + uint raw9 = read_mem(param_18, param_19, v_297); Alloc param_20 = a; uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21, v_291); + uint raw10 = read_mem(param_20, param_21, v_297); CmdRadGrad s; s.index = raw0; s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); @@ -487,23 +484,23 @@ CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& re } static inline __attribute__((always_inline)) -CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; - return CmdRadGrad_read(param, param_1, v_291); + return CmdRadGrad_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_291) +CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -511,11 +508,11 @@ CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, dev } static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_291); + return CmdImage_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -528,10 +525,10 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas.read(uint2(uv)); float3 param_1 = fg_rgba.xyz; - float3 _1638 = fromsRGB(param_1); - fg_rgba.x = _1638.x; - fg_rgba.y = _1638.y; - fg_rgba.z = _1638.z; + float3 _1653 = fromsRGB(param_1); + fg_rgba.x = _1653.x; + fg_rgba.y = _1653.y; + fg_rgba.z = _1653.z; rgba[i] = fg_rgba; } return rgba; @@ -540,10 +537,7 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag static inline __attribute__((always_inline)) float3 tosRGB(thread const float3& rgb) { - bool3 cutoff = rgb >= float3(0.003130800090730190277099609375); - float3 below = float3(12.9200000762939453125) * rgb; - float3 above = (float3(1.05499994754791259765625) * pow(rgb, float3(0.416660010814666748046875))) - float3(0.054999999701976776123046875); - return select(below, above, cutoff); + return rgb; } static inline __attribute__((always_inline)) @@ -555,23 +549,23 @@ uint packsRGB(thread float4& rgba) } static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_291) +CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdEndClip s; s.blend = raw0; return s; } static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_291); + return CmdEndClip_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -585,7 +579,7 @@ float3 hard_light(thread const float3& cb, thread const float3& cs) { float3 param = cb; float3 param_1 = (cs * 2.0) - float3(1.0); - return mix(screen(param, param_1), (cb * 2.0) * cs, float3(cs <= float3(0.5))); + return select(screen(param, param_1), (cb * 2.0) * cs, cs <= float3(0.5)); } static inline __attribute__((always_inline)) @@ -631,8 +625,8 @@ float color_burn(thread const float& cb, thread const float& cs) static inline __attribute__((always_inline)) float3 soft_light(thread const float3& cb, thread const float3& cs) { - float3 d = mix(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, float3(cb <= float3(0.25))); - return mix(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), float3(cs <= float3(0.5))); + float3 d = select(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, cb <= float3(0.25)); + return select(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), cs <= float3(0.5)); } static inline __attribute__((always_inline)) @@ -771,8 +765,8 @@ float3 set_lum(thread const float3& c, thread const float& l) { float3 param = c; float3 param_1 = c + float3(l - lum(param)); - float3 _1046 = clip_color(param_1); - return _1046; + float3 _1048 = clip_color(param_1); + return _1048; } static inline __attribute__((always_inline)) @@ -861,9 +855,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1337 = set_sat(param_21, param_22); + float3 _1340 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1337; + float3 param_24 = _1340; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -873,9 +867,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1351 = set_sat(param_27, param_28); + float3 _1354 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1351; + float3 param_30 = _1354; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -986,45 +980,78 @@ float4 mix_compose(thread const float3& cb, thread const float3& cs, thread cons } case 13u: { - return float4(fast::max(float4(0.0), ((float4(1.0) - (float4(cs, as) * as)) + float4(1.0)) - (float4(cb, ab) * ab)).xyz, fast::max(0.0, ((1.0 - as) + 1.0) - ab)); - } - case 14u: - { - return float4(fast::min(float4(1.0), (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, fast::min(1.0, as + ab)); + return fast::min(float4(1.0), float4((cs * as) + (cb * ab), as + ab)); } default: { break; } } - return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); + float as_fa = as * fa; + float ab_fb = ab * fb; + float3 co = (cs * as_fa) + (cb * ab_fb); + return float4(co, as_fa + ab_fb); +} + +static inline __attribute__((always_inline)) +float4 mix_blend_compose(thread const float4& backdrop, thread const float4& src, thread const uint& mode) +{ + if ((mode & 32767u) == 3u) + { + return (backdrop * (1.0 - src.w)) + src; + } + float inv_src_a = 1.0 / (src.w + 1.0000000036274937255387218471014e-15); + float3 cs = src.xyz * inv_src_a; + float inv_backdrop_a = 1.0 / (backdrop.w + 1.0000000036274937255387218471014e-15); + float3 cb = backdrop.xyz * inv_backdrop_a; + uint blend_mode = mode >> uint(8); + float3 param = cb; + float3 param_1 = cs; + uint param_2 = blend_mode; + float3 blended = mix_blend(param, param_1, param_2); + cs = mix(cs, blended, float3(backdrop.w)); + uint comp_mode = mode & 255u; + if (comp_mode == 3u) + { + float3 co = mix(backdrop.xyz, cs, float3(src.w)); + return float4(co, src.w + (backdrop.w * (1.0 - src.w))); + } + else + { + float3 param_3 = cb; + float3 param_4 = cs; + float param_5 = backdrop.w; + float param_6 = src.w; + uint param_7 = comp_mode; + return mix_compose(param_3, param_4, param_5, param_6, param_7); + } } static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_291) +CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdJump s; s.new_ref = raw0; return s; } static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_291); + return CmdJump_read(param, param_1, v_297); } -kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1666 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - uint tile_ix = (gl_WorkGroupID.y * _1666.conf.width_in_tiles) + gl_WorkGroupID.x; + uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x; Alloc param; - param.offset = _1666.conf.ptcl_alloc.offset; + param.offset = _1681.conf.ptcl_alloc.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -1037,7 +1064,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 rgba[i] = float4(0.0); } uint clip_depth = 0u; - bool mem_ok = v_291.mem_error == 0u; + bool mem_ok = v_297.mem_error == 0u; spvUnsafeArray df; TileSegRef tile_seg_ref; spvUnsafeArray area; @@ -1046,7 +1073,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_3 = cmd_alloc; CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_291).tag; + uint tag = Cmd_tag(param_3, param_4, v_297).tag; if (tag == 0u) { break; @@ -1057,7 +1084,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_5 = cmd_alloc; CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_291); + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_297); for (uint k = 0u; k < 8u; k++) { df[k] = 1000000000.0; @@ -1070,7 +1097,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 bool param_9 = mem_ok; Alloc param_10 = new_alloc(param_7, param_8, param_9); TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_291); + TileSeg seg = TileSeg_read(param_10, param_11, v_297); float2 line_vec = seg.vector; for (uint k_1 = 0u; k_1 < 8u; k_1++) { @@ -1093,7 +1120,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_13 = cmd_alloc; CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_291); + CmdFill fill = Cmd_Fill_read(param_13, param_14, v_297); for (uint k_3 = 0u; k_3 < 8u; k_3++) { area[k_3] = float(fill.backdrop); @@ -1106,7 +1133,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 bool param_17 = mem_ok; Alloc param_18 = new_alloc(param_15, param_16, param_17); TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_291); + TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297); for (uint k_4 = 0u; k_4 < 8u; k_4++) { uint param_20 = k_4; @@ -1150,7 +1177,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_21 = cmd_alloc; CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_291); + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_297); for (uint k_7 = 0u; k_7 < 8u; k_7++) { area[k_7] = alpha.alpha; @@ -1162,7 +1189,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_23 = cmd_alloc; CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_291); + CmdColor color = Cmd_Color_read(param_23, param_24, v_297); uint param_25 = color.rgba_color; float4 fg = unpacksRGB(param_25); for (uint k_8 = 0u; k_8 < 8u; k_8++) @@ -1177,7 +1204,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_26 = cmd_alloc; CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_291); + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_297); float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; for (uint k_9 = 0u; k_9 < 8u; k_9++) { @@ -1187,10 +1214,10 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float3 param_29 = fg_rgba.xyz; - float3 _2238 = fromsRGB(param_29); - fg_rgba.x = _2238.x; - fg_rgba.y = _2238.y; - fg_rgba.z = _2238.z; + float3 _2254 = fromsRGB(param_29); + fg_rgba.x = _2254.x; + fg_rgba.y = _2254.y; + fg_rgba.z = _2254.z; float4 fg_k_1 = fg_rgba * area[k_9]; rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; } @@ -1201,7 +1228,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_291); + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_297); for (uint k_10 = 0u; k_10 < 8u; k_10++) { uint param_32 = k_10; @@ -1213,10 +1240,10 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); float3 param_33 = fg_rgba_1.xyz; - float3 _2348 = fromsRGB(param_33); - fg_rgba_1.x = _2348.x; - fg_rgba_1.y = _2348.y; - fg_rgba_1.z = _2348.z; + float3 _2364 = fromsRGB(param_33); + fg_rgba_1.x = _2364.x; + fg_rgba_1.y = _2364.y; + fg_rgba_1.z = _2364.z; float4 fg_k_2 = fg_rgba_1 * area[k_10]; rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; } @@ -1227,7 +1254,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_34 = cmd_alloc; CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_291); + CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_297); uint2 param_36 = xy_uint; CmdImage param_37 = fill_img; spvUnsafeArray img; @@ -1246,8 +1273,8 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { uint d_2 = min(clip_depth, 127u); float4 param_38 = float4(rgba[k_12]); - uint _2454 = packsRGB(param_38); - blend_stack[d_2][k_12] = _2454; + uint _2470 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2470; rgba[k_12] = float4(0.0); } clip_depth++; @@ -1258,7 +1285,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_291); + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_297); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; @@ -1268,31 +1295,19 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 uint param_41 = blend_stack[d_3][k_13]; float4 bg = unpacksRGB(param_41); float4 fg_1 = rgba[k_13] * area[k_13]; - float3 param_42 = bg.xyz; - float3 param_43 = fg_1.xyz; - uint param_44 = blend_mode; - float3 blend = mix_blend(param_42, param_43, param_44); - float4 _2521 = fg_1; - float _2525 = fg_1.w; - float3 _2532 = mix(_2521.xyz, blend, float3(float((_2525 * bg.w) > 0.0))); - fg_1.x = _2532.x; - fg_1.y = _2532.y; - fg_1.z = _2532.z; - float3 param_45 = bg.xyz; - float3 param_46 = fg_1.xyz; - float param_47 = bg.w; - float param_48 = fg_1.w; - uint param_49 = comp_mode; - rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); + float4 param_42 = bg; + float4 param_43 = fg_1; + uint param_44 = end_clip.blend; + rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); } cmd_ref.offset += 8u; break; } case 11u: { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_50, param_51, v_291).new_ref }; + Alloc param_45 = cmd_alloc; + CmdRef param_46 = cmd_ref; + cmd_ref = CmdRef{ Cmd_Jump_read(param_45, param_46, v_297).new_ref }; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1300,8 +1315,8 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_52 = i_1; - image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_52)))); + uint param_47 = i_1; + image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_47)))); } } diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv index 463340187..6ff17912d 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.spv and b/piet-gpu/shader/gen/kernel4_gray.spv differ diff --git a/piet-gpu/shader/gen/path_coarse.dxil b/piet-gpu/shader/gen/path_coarse.dxil index 9fd593ca4..b6c9398fd 100644 Binary files a/piet-gpu/shader/gen/path_coarse.dxil and b/piet-gpu/shader/gen/path_coarse.dxil differ diff --git a/piet-gpu/shader/gen/pathseg.dxil b/piet-gpu/shader/gen/pathseg.dxil index 613071260..7ce4684b8 100644 Binary files a/piet-gpu/shader/gen/pathseg.dxil and b/piet-gpu/shader/gen/pathseg.dxil differ diff --git a/piet-gpu/shader/gen/pathtag_reduce.dxil b/piet-gpu/shader/gen/pathtag_reduce.dxil index 4c2bd233c..ff544b84c 100644 Binary files a/piet-gpu/shader/gen/pathtag_reduce.dxil and b/piet-gpu/shader/gen/pathtag_reduce.dxil differ diff --git a/piet-gpu/shader/gen/pathtag_root.dxil b/piet-gpu/shader/gen/pathtag_root.dxil index 77f12e6db..48584bd22 100644 Binary files a/piet-gpu/shader/gen/pathtag_root.dxil and b/piet-gpu/shader/gen/pathtag_root.dxil differ diff --git a/piet-gpu/shader/gen/transform_leaf.dxil b/piet-gpu/shader/gen/transform_leaf.dxil index f9f31e6ea..0c1e37624 100644 Binary files a/piet-gpu/shader/gen/transform_leaf.dxil and b/piet-gpu/shader/gen/transform_leaf.dxil differ diff --git a/piet-gpu/shader/gen/transform_reduce.dxil b/piet-gpu/shader/gen/transform_reduce.dxil index 978dd98ac..fc3a311f9 100644 Binary files a/piet-gpu/shader/gen/transform_reduce.dxil and b/piet-gpu/shader/gen/transform_reduce.dxil differ diff --git a/piet-gpu/shader/gen/transform_root.dxil b/piet-gpu/shader/gen/transform_root.dxil index 5b4f05930..a33ff7f18 100644 Binary files a/piet-gpu/shader/gen/transform_root.dxil and b/piet-gpu/shader/gen/transform_root.dxil differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index c49e2fa9d..99fd22ee9 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -9,6 +9,11 @@ #version 450 #extension GL_GOOGLE_include_directive : enable +// We can do rendering either in sRGB colorspace (for compatibility) +// or in a linear colorspace, with conversions to sRGB (which will give +// higher quality antialiasing among other things). +#define DO_SRGB_CONVERSION 0 + #include "mem.h" #include "setup.h" @@ -39,18 +44,26 @@ layout(rgba8, set = 0, binding = 4) uniform restrict readonly image2D gradients; #define MAX_BLEND_STACK 128 mediump vec3 tosRGB(mediump vec3 rgb) { +#if DO_SRGB_CONVERSION bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308)); mediump vec3 below = vec3(12.92) * rgb; mediump vec3 above = vec3(1.055) * pow(rgb, vec3(0.41666)) - vec3(0.055); return mix(below, above, cutoff); +#else + return rgb; +#endif } mediump vec3 fromsRGB(mediump vec3 srgb) { +#if DO_SRGB_CONVERSION // Formula from EXT_sRGB. bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045)); mediump vec3 below = srgb / vec3(12.92); mediump vec3 above = pow((srgb + vec3(0.055)) / vec3(1.055), vec3(2.4)); return mix(below, above, cutoff); +#else + return srgb; +#endif } // unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color @@ -242,10 +255,7 @@ void main() { uint d = min(clip_depth, MAX_BLEND_STACK - 1); mediump vec4 bg = unpacksRGB(blend_stack[d][k]); mediump vec4 fg = rgba[k] * area[k]; - vec3 blend = mix_blend(bg.rgb, fg.rgb, blend_mode); - // Apply the blend color only where the foreground and background overlap. - fg.rgb = mix(fg.rgb, blend, float((fg.a * bg.a) > 0.0)); - rgba[k] = mix_compose(bg.rgb, fg.rgb, bg.a, fg.a, comp_mode); + rgba[k] = mix_blend_compose(bg, fg, end_clip.blend); } cmd_ref.offset += 4 + CmdEndClip_size; break; diff --git a/piet-gpu/src/blend.rs b/piet-gpu/src/blend.rs index aacf5973e..f0ca002ed 100644 --- a/piet-gpu/src/blend.rs +++ b/piet-gpu/src/blend.rs @@ -33,6 +33,8 @@ pub enum BlendMode { Saturation = 13, Color = 14, Luminosity = 15, + // Clip is the same as normal, but doesn't always push a blend group. + Clip = 128, } #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -51,8 +53,7 @@ pub enum CompositionMode { DestAtop = 10, Xor = 11, Plus = 12, - PlusDarker = 13, - PlusLighter = 14, + PlusLighter = 13, } #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -77,7 +78,7 @@ impl Blend { impl Default for Blend { fn default() -> Self { Self { - mode: BlendMode::Normal, + mode: BlendMode::Clip, composition_mode: CompositionMode::SrcOver, } } diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs index a24ddbc66..d0ef1eb71 100644 --- a/piet-gpu/src/encoder.rs +++ b/piet-gpu/src/encoder.rs @@ -306,16 +306,21 @@ impl Encoder { self.drawdata_stream.extend(bytemuck::bytes_of(&element)); } - /// Encode a fill radial gradient draw object. /// /// This should be encoded after a path. pub fn fill_rad_gradient(&mut self, index: u32, p0: [f32; 2], p1: [f32; 2], r0: f32, r1: f32) { self.drawtag_stream.push(DRAWTAG_FILLRADGRADIENT); - let element = FillRadGradient { index, p0, p1, r0, r1 }; + let element = FillRadGradient { + index, + p0, + p1, + r0, + r1, + }; self.drawdata_stream.extend(bytemuck::bytes_of(&element)); } - + /// Start a clip. pub fn begin_clip(&mut self, blend: Option) { self.drawtag_stream.push(DRAWTAG_BEGINCLIP); diff --git a/piet-gpu/src/gradient.rs b/piet-gpu/src/gradient.rs index e6559089b..443eaec29 100644 --- a/piet-gpu/src/gradient.rs +++ b/piet-gpu/src/gradient.rs @@ -19,7 +19,7 @@ use std::collections::hash_map::{Entry, HashMap}; use piet::kurbo::Point; -use piet::{Color, FixedLinearGradient, GradientStop, FixedRadialGradient}; +use piet::{Color, FixedLinearGradient, FixedRadialGradient, GradientStop}; /// Radial gradient compatible with COLRv1 spec #[derive(Debug, Clone)] diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index d32a9c53a..ba06e712d 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -13,8 +13,8 @@ use std::convert::TryInto; pub use blend::{Blend, BlendMode, CompositionMode}; pub use encoder::EncodedSceneRef; -pub use render_ctx::PietGpuRenderContext; pub use gradient::Colrv1RadialGradient; +pub use render_ctx::PietGpuRenderContext; use piet::kurbo::Vec2; use piet::{ImageFormat, RenderContext}; diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index dca03eb23..14f2561de 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -1,9 +1,12 @@ +// This should match the value in kernel4.comp for correct rendering. +const DO_SRGB_CONVERSION: bool = false; + use std::borrow::Cow; use crate::encoder::GlyphEncoder; use crate::stages::{Config, Transform}; use crate::MAX_BLEND_STACK; -use piet::kurbo::{Affine, Insets, PathEl, Point, Rect, Shape}; +use piet::kurbo::{Affine, PathEl, Point, Rect, Shape}; use piet::{ Color, Error, FixedGradient, ImageFormat, InterpolationMode, IntoBrush, RenderContext, StrokeStyle, @@ -13,7 +16,7 @@ use piet_gpu_hal::BufWrite; use piet_gpu_types::encoder::{Encode, Encoder}; use piet_gpu_types::scene::Element; -use crate::gradient::{LinearGradient, RadialGradient, RampCache, Colrv1RadialGradient}; +use crate::gradient::{Colrv1RadialGradient, LinearGradient, RadialGradient, RampCache}; use crate::text::Font; pub use crate::text::{PietGpuText, PietGpuTextLayout, PietGpuTextLayoutBuilder}; use crate::Blend; @@ -471,19 +474,27 @@ fn rect_to_f32_4(rect: Rect) -> [f32; 4] { } fn to_srgb(f: f64) -> f64 { - if f <= 0.0031308 { - f * 12.92 + if DO_SRGB_CONVERSION { + if f <= 0.0031308 { + f * 12.92 + } else { + let a = 0.055; + (1. + a) * f64::powf(f, f64::recip(2.4)) - a + } } else { - let a = 0.055; - (1. + a) * f64::powf(f, f64::recip(2.4)) - a + f } } fn from_srgb(f: f64) -> f64 { - if f <= 0.04045 { - f / 12.92 + if DO_SRGB_CONVERSION { + if f <= 0.04045 { + f / 12.92 + } else { + let a = 0.055; + f64::powf((f + a) * f64::recip(1. + a), 2.4) + } } else { - let a = 0.055; - f64::powf((f + a) * f64::recip(1. + a), 2.4) + f } } diff --git a/piet-gpu/src/stages/clip.rs b/piet-gpu/src/stages/clip.rs index 2fd195b94..b7b77ebfc 100644 --- a/piet-gpu/src/stages/clip.rs +++ b/piet-gpu/src/stages/clip.rs @@ -16,7 +16,9 @@ //! The clip processing stage (includes substages). -use piet_gpu_hal::{include_shader, BindType, Buffer, ComputePass, DescriptorSet, Pipeline, Session}; +use piet_gpu_hal::{ + include_shader, BindType, Buffer, ComputePass, DescriptorSet, Pipeline, Session, +}; // Note that this isn't the code/stage/binding pattern of most of the other stages // in the new element processing pipeline. We want to move those temporary buffers diff --git a/piet-gpu/src/test_scenes.rs b/piet-gpu/src/test_scenes.rs index bfd2af2d1..e3aeabaf1 100644 --- a/piet-gpu/src/test_scenes.rs +++ b/piet-gpu/src/test_scenes.rs @@ -2,10 +2,10 @@ use rand::{Rng, RngCore}; -use crate::{Blend, BlendMode, CompositionMode, PietGpuRenderContext, Colrv1RadialGradient}; +use crate::{Blend, BlendMode, Colrv1RadialGradient, CompositionMode, PietGpuRenderContext}; use piet::kurbo::{Affine, BezPath, Circle, Line, Point, Rect, Shape}; use piet::{ - Color, FixedGradient, FixedRadialGradient, GradientStop, Text, TextAttribute, TextLayoutBuilder, + Color, GradientStop, LinearGradient, Text, TextAttribute, TextLayoutBuilder, UnitPoint, }; use crate::{PicoSvg, RenderContext, Vec2}; @@ -200,6 +200,113 @@ fn render_tiger(rc: &mut impl RenderContext) { println!("flattening and encoding time: {:?}", start.elapsed()); } +pub fn render_blend_square(rc: &mut PietGpuRenderContext, blend: Blend) { + // Inspired by https://developer.mozilla.org/en-US/docs/Web/CSS/mix-blend-mode + let rect = Rect::new(0., 0., 200., 200.); + let stops = vec![ + GradientStop { + color: Color::BLACK, + pos: 0.0, + }, + GradientStop { + color: Color::WHITE, + pos: 1.0, + }, + ]; + let linear = LinearGradient::new(UnitPoint::LEFT, UnitPoint::RIGHT, stops); + rc.fill(rect, &linear); + const GRADIENTS: &[(f64, f64, Color)] = &[ + (150., 0., Color::rgb8(255, 240, 64)), + (175., 100., Color::rgb8(255, 96, 240)), + (125., 200., Color::rgb8(64, 192, 255)), + ]; + for (x, y, c) in GRADIENTS { + let stops = vec![ + GradientStop { + color: c.clone(), + pos: 0.0, + }, + GradientStop { + color: Color::rgba8(0, 0, 0, 0), + pos: 1.0, + }, + ]; + let rad = Colrv1RadialGradient { + center0: Point::new(*x, *y), + center1: Point::new(*x, *y), + radius0: 0.0, + radius1: 100.0, + stops, + }; + let brush = rc.radial_gradient_colrv1(&rad); + rc.fill(Rect::new(0., 0., 200., 200.), &brush); + } + const COLORS: &[Color] = &[ + Color::rgb8(255, 0, 0), + Color::rgb8(0, 255, 0), + Color::rgb8(0, 0, 255), + ]; + let _ = rc.with_save(|rc| { + // Isolation (this can be removed for non-isolated version) + rc.blend(rect, BlendMode::Normal.into()); + for (i, c) in COLORS.iter().enumerate() { + let stops = vec![ + GradientStop { + color: Color::WHITE, + pos: 0.0, + }, + GradientStop { + color: c.clone(), + pos: 1.0, + }, + ]; + // squash the ellipse + let a = Affine::translate((100., 100.)) + * Affine::rotate(std::f64::consts::FRAC_PI_3 * (i * 2 + 1) as f64) + * Affine::scale_non_uniform(1.0, 0.357) + * Affine::translate((-100., -100.)); + let linear = LinearGradient::new(UnitPoint::TOP, UnitPoint::BOTTOM, stops); + let _ = rc.with_save(|rc| { + rc.blend(rect, blend); + rc.transform(a); + rc.fill(Circle::new((100., 100.), 90.), &linear); + Ok(()) + }); + } + Ok(()) + }); +} + +pub fn render_blend_grid(rc: &mut PietGpuRenderContext) { + const BLEND_MODES: &[BlendMode] = &[ + BlendMode::Normal, + BlendMode::Multiply, + BlendMode::Darken, + BlendMode::Screen, + BlendMode::Lighten, + BlendMode::Overlay, + BlendMode::ColorDodge, + BlendMode::ColorBurn, + BlendMode::HardLight, + BlendMode::SoftLight, + BlendMode::Difference, + BlendMode::Exclusion, + BlendMode::Hue, + BlendMode::Saturation, + BlendMode::Color, + BlendMode::Luminosity, + ]; + for (ix, &blend) in BLEND_MODES.iter().enumerate() { + let _ = rc.with_save(|rc| { + let i = ix % 4; + let j = ix / 4; + rc.transform(Affine::translate((i as f64 * 225., j as f64 * 225.))); + render_blend_square(rc, blend.into()); + Ok(()) + }); + } +} + pub fn render_anim_frame(rc: &mut impl RenderContext, i: usize) { rc.fill( Rect::new(0.0, 0.0, 1000.0, 1000.0), diff --git a/piet-scene/src/glyph/mod.rs b/piet-scene/src/glyph/mod.rs index 3bfa36ce6..81d9735a6 100644 --- a/piet-scene/src/glyph/mod.rs +++ b/piet-scene/src/glyph/mod.rs @@ -114,7 +114,9 @@ impl<'a> GlyphProvider<'a> { }; xform_stack.push(xform); } - Command::PopTransform => { xform_stack.pop(); }, + Command::PopTransform => { + xform_stack.pop(); + } Command::PushClip(path_index) => { let path = glyph.path(*path_index)?; if let Some(xform) = xform_stack.last() {