diff --git a/src/platform/windows/display.h b/src/platform/windows/display.h index 4e802bfb387..b5db7be6ef8 100644 --- a/src/platform/windows/display.h +++ b/src/platform/windows/display.h @@ -346,6 +346,43 @@ namespace platf::dxgi { make_amf_encode_device(pix_fmt_e pix_fmt) override; std::atomic next_image_id; + + protected: + // Shared cursor blending pipeline used by display backends that need to + // composite a software cursor on top of a captured frame (DXGI Desktop + // Duplication, VDD direct-capture). AMD/WGC paths use different schemes + // and shadow these in their own subclasses where applicable. + sampler_state_t sampler_linear; + sampler_state_t sampler_point; + + blend_t blend_alpha; + blend_t blend_invert; + blend_t blend_disable; + + ps_t cursor_ps; + vs_t cursor_vs; + + gpu_cursor_t cursor_alpha; + gpu_cursor_t cursor_xor; + + /** + * @brief Create the cursor blend shaders, blend states, samplers, and + * rotation constant buffer. Caller is responsible for `device` / + * `device_ctx` being valid (typically called from a subclass + * `init()` after `display_base_t::init()` succeeds). + * @return 0 on success, -1 on any failure (errors already logged). + */ + int + init_cursor_pipeline(const ::video::config_t &config); + + /** + * @brief Draw the currently-configured cursor_alpha / cursor_xor onto the + * given render target. Caller must hold the capture mutex for the + * underlying image. After the draw the blend state is reset to + * `blend_disable` and the RTV/SRV slots are cleared. + */ + void + blend_cursor(ID3D11RenderTargetView *capture_rt); }; /** @@ -398,19 +435,6 @@ namespace platf::dxgi { release_snapshot() override; duplication_t dup; - sampler_state_t sampler_linear; - // Point sampler for high-quality resampling shaders (avoid double-filtering). - sampler_state_t sampler_point; - - blend_t blend_alpha; - blend_t blend_invert; - blend_t blend_disable; - - ps_t cursor_ps; - vs_t cursor_vs; - - gpu_cursor_t cursor_alpha; - gpu_cursor_t cursor_xor; texture2d_t old_surface_delayed_destruction; std::chrono::steady_clock::time_point old_surface_timestamp; @@ -585,6 +609,37 @@ namespace platf::dxgi { capture_e release_frame(); + /** + * @brief One snapshot of the producer-published hardware cursor state. + * Mirrors the layout of `CursorSharedMetadata` in ZakoVDD's Driver.cpp. + * `shape_buffer` is owned by the snapshot (copied out of SHM). + */ + struct cursor_snapshot { + bool valid = false; ///< True if at least one publish observed. + bool visible = false; + bool shape_updated = false; ///< True iff shape_id changed since last poll. + bool position_updated = false; + INT32 x = 0; ///< Top-left of cursor image, desktop-relative. + INT32 y = 0; + UINT32 position_id = 0; + UINT32 shape_id = 0; + UINT32 shape_type = 0; ///< IDDCX_CURSOR_SHAPE_TYPE value (0=mono, 1=color, 2=masked color). + UINT32 width = 0; + UINT32 height = 0; + UINT32 pitch = 0; + INT32 xhot = 0; + INT32 yhot = 0; + std::vector shape_buffer; ///< Empty if !shape_updated or shape is uninitialized. + }; + + /** + * @brief Non-blocking poll of the latest cursor state published by the + * driver-side CursorExporter. Returns false if cursor SHM is not + * attached or nothing has been published yet. + */ + bool + poll_cursor(cursor_snapshot &out); + /** * @brief Reported producer-side dimensions / format / HDR metadata. */ @@ -606,6 +661,13 @@ namespace platf::dxgi { keyed_mutex_t m_keyedMutex; bool m_holdsKey = false; + // Cursor SHM (optional; opened on a best-effort basis in init()). + HANDLE m_hCursorMeta = nullptr; + void *m_pCursorMeta = nullptr; + HANDLE m_hCursorEvent = nullptr; // For diagnostic use; poll_cursor() is event-free. + UINT32 m_lastSeenCursorShapeId = 0xFFFFFFFFu; + UINT32 m_lastSeenCursorPositionId = 0xFFFFFFFFu; + UINT m_width = 0; UINT m_height = 0; DXGI_FORMAT m_format = DXGI_FORMAT_UNKNOWN; diff --git a/src/platform/windows/display_vdd.cpp b/src/platform/windows/display_vdd.cpp index 36a51a21c5e..4abd18027eb 100644 --- a/src/platform/windows/display_vdd.cpp +++ b/src/platform/windows/display_vdd.cpp @@ -50,6 +50,34 @@ namespace platf::dxgi { static constexpr UINT32 VDD_META_MAGIC = 0x5A564446; // 'ZVDF' static constexpr UINT32 VDD_META_VERSION = 1; + // Mirror of CursorSharedMetadata in ZakoVDD/Driver.cpp. Layout is + // 4-byte aligned (#pragma pack(push, 4) on the producer side); the + // standard ABI on x64 already aligns the fields below identically. + struct CursorSharedMetadata { + UINT32 Magic; // 'ZVCU' = 0x5A564355 + UINT32 Version; // 1 + UINT32 IsVisible; // 0/1 + INT32 PositionX; // top-left of cursor image (already hot-spot adjusted, DXGI semantics) + INT32 PositionY; + UINT32 PositionId; // monotonic on position change + UINT32 ShapeId; // monotonic on shape change + UINT32 ShapeType; // IDDCX_CURSOR_SHAPE_TYPE value (0=mono, 1=color, 2=masked color) + UINT32 Width; + UINT32 Height; + UINT32 Pitch; + INT32 XHot; + INT32 YHot; + UINT32 SdrWhiteLevelX1000; + UINT32 ShapeBufferSize; + UINT32 Reserved0; + UINT64 LastUpdateQpc; + // Followed by up to 256 KiB of shape pixels. + }; + + static constexpr UINT32 VDD_CURSOR_MAGIC = 0x5A564355; // 'ZVCU' + static constexpr UINT32 VDD_CURSOR_VERSION = 1; + static constexpr UINT32 VDD_CURSOR_MAX_BYTES = 256u * 256u * 4u; // matches driver + vdd_capture_t::vdd_capture_t() = default; vdd_capture_t::~vdd_capture_t() { @@ -76,6 +104,20 @@ namespace platf::dxgi { CloseHandle(m_hEvent); m_hEvent = nullptr; } + if (m_pCursorMeta) { + UnmapViewOfFile(m_pCursorMeta); + m_pCursorMeta = nullptr; + } + if (m_hCursorMeta) { + CloseHandle(m_hCursorMeta); + m_hCursorMeta = nullptr; + } + if (m_hCursorEvent) { + CloseHandle(m_hCursorEvent); + m_hCursorEvent = nullptr; + } + m_lastSeenCursorShapeId = 0xFFFFFFFFu; + m_lastSeenCursorPositionId = 0xFFFFFFFFu; } int @@ -185,6 +227,40 @@ namespace platf::dxgi { << " "sv << m_width << "x"sv << m_height << " fmt="sv << static_cast(m_format) << " hdr="sv << m_is_hdr; + + // Optional: attach to the cursor SHM exported by the driver-side + // CursorExporter. Best-effort -- old driver builds won't have these + // mappings, in which case poll_cursor() returns false and we render + // frames without an overlay cursor (preserving previous behaviour). + { + std::wstring cursor_meta_name = L"Global\\ZakoVDD_CursorMeta_" + std::to_wstring(monitor_idx); + std::wstring cursor_event_name = L"Global\\ZakoVDD_CursorReady_" + std::to_wstring(monitor_idx); + + m_hCursorMeta = OpenFileMappingW(FILE_MAP_READ, FALSE, cursor_meta_name.c_str()); + if (m_hCursorMeta) { + const SIZE_T map_size = sizeof(CursorSharedMetadata) + VDD_CURSOR_MAX_BYTES; + m_pCursorMeta = MapViewOfFile(m_hCursorMeta, FILE_MAP_READ, 0, 0, map_size); + if (!m_pCursorMeta) { + BOOST_LOG(warning) << "[vdd_capture] cursor MapViewOfFile failed: "sv << GetLastError() + << "; cursor overlay disabled."sv; + CloseHandle(m_hCursorMeta); + m_hCursorMeta = nullptr; + } + else { + // Event is purely diagnostic / future poll-driven wait; poll_cursor() + // works directly off the mapping so absence is non-fatal. + m_hCursorEvent = OpenEventW(SYNCHRONIZE, FALSE, cursor_event_name.c_str()); + BOOST_LOG(info) << "[vdd_capture] cursor SHM attached (event="sv + << (m_hCursorEvent ? "yes"sv : "no"sv) << ")"sv; + } + } + else { + BOOST_LOG(info) << "[vdd_capture] cursor SHM not present for monitor "sv + << monitor_idx << " (driver may predate cursor export); "sv + << "clients will see no overlay cursor for this output."sv; + } + } + return 0; } @@ -257,6 +333,102 @@ namespace platf::dxgi { return capture_e::ok; } + bool + vdd_capture_t::poll_cursor(cursor_snapshot &out) { + if (!m_pCursorMeta) { + return false; + } + + auto *meta = static_cast(m_pCursorMeta); + + // Lock-free read pattern: snapshot header, then verify ShapeBufferSize + // sanity. The producer writes payload before flipping ShapeId; if we read + // a torn shape we'll simply pick it up on the next poll (cursor state + // remains valid otherwise). + UINT32 magic = meta->Magic; + UINT32 version = meta->Version; + if (magic != VDD_CURSOR_MAGIC || version != VDD_CURSOR_VERSION) { + return false; // producer hasn't published yet, or version skew + } + + UINT32 shape_id = meta->ShapeId; + UINT32 position_id = meta->PositionId; + UINT32 shape_buffer_size = meta->ShapeBufferSize; + if (shape_buffer_size > VDD_CURSOR_MAX_BYTES) { + return false; // torn read; try again next time + } + + // Snapshot geometry headers and validate them before publishing. A torn + // read of the header could leave Width/Height/Pitch/ShapeType inconsistent + // with ShapeBufferSize; downstream make_cursor_alpha_image() / + // make_cursor_xor_image() do pointer arithmetic against + // Pitch * Height into out.shape_buffer, so any mismatch must be rejected + // here to avoid out-of-bounds reads. + UINT32 shape_type = meta->ShapeType; + UINT32 width = meta->Width; + UINT32 height = meta->Height; + UINT32 pitch = meta->Pitch; + if (shape_type > 2) { + return false; // unknown shape type + } + // Hardware cursor sizes are capped at 256x256 (512 tall for monochrome + // which stores AND+XOR masks back-to-back). Reject anything bigger so a + // bogus header can't get past the size budget check below via overflow. + constexpr UINT32 kMaxCursorDim = 512; + if (width == 0 || width > kMaxCursorDim || height == 0 || height > kMaxCursorDim) { + return false; + } + // Per-type minimum pitch sanity. For color/masked-color each row is RGBA + // (>= 4*width). For monochrome each row is one packed-bit AND/XOR mask + // (>= ceil(width/8)). Height for monochrome encodes AND+XOR stacked, so + // it must be even. + UINT64 required_bytes = 0; + if (shape_type == 0 /* MONOCHROME */) { + if ((height & 1u) != 0u || pitch < (width + 7u) / 8u) { + return false; + } + required_bytes = static_cast(pitch) * height; // AND+XOR stacked + } else { + if (pitch < static_cast(width) * 4u) { + return false; + } + required_bytes = static_cast(pitch) * height; + } + if (required_bytes > VDD_CURSOR_MAX_BYTES || shape_buffer_size < required_bytes) { + return false; // header/payload mismatch — likely torn read + } + + out.valid = true; + out.visible = (meta->IsVisible != 0); + out.x = meta->PositionX; + out.y = meta->PositionY; + out.position_id = position_id; + out.shape_id = shape_id; + out.shape_type = shape_type; + out.width = width; + out.height = height; + out.pitch = pitch; + out.xhot = meta->XHot; + out.yhot = meta->YHot; + out.position_updated = (position_id != m_lastSeenCursorPositionId); + out.shape_updated = (shape_id != m_lastSeenCursorShapeId); + + if (out.shape_updated && shape_buffer_size > 0) { + const auto *payload = reinterpret_cast(meta + 1); + out.shape_buffer.assign(payload, payload + shape_buffer_size); + // Re-check shape id after copy. If it changed mid-copy, drop this shape + // and let the next poll pick up the consistent version. + if (meta->ShapeId != shape_id) { + out.shape_buffer.clear(); + out.shape_updated = false; + } + } + + if (out.shape_updated) m_lastSeenCursorShapeId = shape_id; + if (out.position_updated) m_lastSeenCursorPositionId = position_id; + return true; + } + // =========================================================================== // display_vdd_vram_t // =========================================================================== @@ -382,6 +554,16 @@ namespace platf::dxgi { << " fmt="sv << dxgi_format_to_string(capture_format) << " hdr="sv << dup.is_hdr() << " linear_gamma="sv << capture_linear_gamma; + + // Initialise the shared cursor blend pipeline (same shaders / blend states + // as display_ddup_vram_t). If init fails we leave it for the caller to + // tear down -- there is no degraded mode that's worth supporting because + // the same shaders are required for HDR output anyway. + if (init_cursor_pipeline(config) != 0) { + BOOST_LOG(error) << "[vdd] cursor pipeline init failed"sv; + return -1; + } + return 0; } diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index bf3fa108662..76f049ac2f4 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -2449,38 +2449,6 @@ namespace platf::dxgi { } } - auto blend_cursor = [&](img_d3d_t &d3d_img) { - device_ctx->VSSetShader(cursor_vs.get(), nullptr, 0); - device_ctx->PSSetShader(cursor_ps.get(), nullptr, 0); - device_ctx->OMSetRenderTargets(1, &d3d_img.capture_rt, nullptr); - - if (cursor_alpha.texture.get()) { - // Perform an alpha blending operation - device_ctx->OMSetBlendState(blend_alpha.get(), nullptr, 0xFFFFFFFFu); - - device_ctx->PSSetShaderResources(0, 1, &cursor_alpha.input_res); - device_ctx->RSSetViewports(1, &cursor_alpha.cursor_view); - device_ctx->Draw(3, 0); - } - - if (cursor_xor.texture.get()) { - // Perform an invert blending without touching alpha values - device_ctx->OMSetBlendState(blend_invert.get(), nullptr, 0x00FFFFFFu); - - device_ctx->PSSetShaderResources(0, 1, &cursor_xor.input_res); - device_ctx->RSSetViewports(1, &cursor_xor.cursor_view); - device_ctx->Draw(3, 0); - } - - device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); - - ID3D11RenderTargetView *emptyRenderTarget = nullptr; - device_ctx->OMSetRenderTargets(1, &emptyRenderTarget, nullptr); - device_ctx->RSSetViewports(0, nullptr); - ID3D11ShaderResourceView *emptyShaderResourceView = nullptr; - device_ctx->PSSetShaderResources(0, 1, &emptyShaderResourceView); - }; - switch (out_frame_action) { case ofa::forward_last_img: { auto p_img = std::get_if>(&last_frame_variant); @@ -2512,7 +2480,7 @@ namespace platf::dxgi { BOOST_LOG(error) << "Failed to lock capture texture for cursor blend"; return capture_e::error; } - blend_cursor(*d3d_img); + blend_cursor(d3d_img->capture_rt.get()); } else if (p_surface) { // We have an intermediate surface, copy it first then blend @@ -2522,7 +2490,7 @@ namespace platf::dxgi { if (!d3d_img) return capture_e::error; device_ctx->CopyResource(d3d_img->capture_texture.get(), p_surface->get()); - blend_cursor(*d3d_img); + blend_cursor(d3d_img->capture_rt.get()); } else if (p_img) { // Image is already in use by encoder, we need to get a new one @@ -2540,7 +2508,7 @@ namespace platf::dxgi { if (src_lock_helper.lock()) { device_ctx->CopyResource(d3d_img->capture_texture.get(), d3d_img_src->capture_texture.get()); } - blend_cursor(*d3d_img); + blend_cursor(d3d_img->capture_rt.get()); } else { BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__; @@ -2566,7 +2534,7 @@ namespace platf::dxgi { } if (blend_mouse_cursor_flag) { - blend_cursor(*d3d_img); + blend_cursor(d3d_img->capture_rt.get()); } break; @@ -2591,11 +2559,7 @@ namespace platf::dxgi { } int - display_ddup_vram_t::init(const ::video::config_t &config, const std::string &display_name) { - if (display_base_t::init(config, display_name) || dup.init(this, config)) { - return -1; - } - + display_vram_t::init_cursor_pipeline(const ::video::config_t &config) { D3D11_SAMPLER_DESC sampler_desc {}; sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; @@ -2679,6 +2643,52 @@ namespace platf::dxgi { return 0; } + void + display_vram_t::blend_cursor(ID3D11RenderTargetView *capture_rt) { + device_ctx->VSSetShader(cursor_vs.get(), nullptr, 0); + device_ctx->PSSetShader(cursor_ps.get(), nullptr, 0); + device_ctx->OMSetRenderTargets(1, &capture_rt, nullptr); + + if (cursor_alpha.texture.get()) { + // Perform an alpha blending operation + device_ctx->OMSetBlendState(blend_alpha.get(), nullptr, 0xFFFFFFFFu); + + device_ctx->PSSetShaderResources(0, 1, &cursor_alpha.input_res); + device_ctx->RSSetViewports(1, &cursor_alpha.cursor_view); + device_ctx->Draw(3, 0); + } + + if (cursor_xor.texture.get()) { + // Perform an invert blending without touching alpha values + device_ctx->OMSetBlendState(blend_invert.get(), nullptr, 0x00FFFFFFu); + + device_ctx->PSSetShaderResources(0, 1, &cursor_xor.input_res); + device_ctx->RSSetViewports(1, &cursor_xor.cursor_view); + device_ctx->Draw(3, 0); + } + + device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); + + ID3D11RenderTargetView *emptyRenderTarget = nullptr; + device_ctx->OMSetRenderTargets(1, &emptyRenderTarget, nullptr); + device_ctx->RSSetViewports(0, nullptr); + ID3D11ShaderResourceView *emptyShaderResourceView = nullptr; + device_ctx->PSSetShaderResources(0, 1, &emptyShaderResourceView); + } + + int + display_ddup_vram_t::init(const ::video::config_t &config, const std::string &display_name) { + if (display_base_t::init(config, display_name) || dup.init(this, config)) { + return -1; + } + + if (init_cursor_pipeline(config) != 0) { + return -1; + } + + return 0; + } + int display_amd_vram_t::init(const ::video::config_t &config, const std::string &display_name) { if (display_base_t::init(config, display_name) || dup.init(this, config, output_index)) { @@ -3427,7 +3437,7 @@ namespace platf::dxgi { capture_e display_vdd_vram_t::snapshot(const pull_free_image_cb_t &pull_free_image_cb, std::shared_ptr &img_out, - std::chrono::milliseconds timeout, bool /*cursor_visible*/) { + std::chrono::milliseconds timeout, bool cursor_visible) { if (current_frame) { // Defensive: caller forgot to call release_snapshot(). Drop the stale ref. dup.release_frame(); @@ -3488,6 +3498,55 @@ namespace platf::dxgi { } device_ctx->CopyResource(d3d_img->capture_texture.get(), current_frame); + // VDD-side hardware cursor is delivered out-of-band via the cursor SHM; + // blend it onto the freshly-copied frame so non-Windows clients (e.g. + // Moonlight Android) see a cursor. The OS desktop framebuffer has the + // cursor stripped out because IddCx HardwareCursor=true routes pointer + // shape/position to an overlay channel rather than the swap chain. + vdd_capture_t::cursor_snapshot cs; + if (cursor_visible && dup.poll_cursor(cs) && cs.valid) { + if (cs.shape_updated && !cs.shape_buffer.empty() && cs.width > 0 && cs.height > 0) { + DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info{}; + // IDDCX_CURSOR_SHAPE_TYPE 0/1/2 → DXGI 1/2/4 + switch (cs.shape_type) { + case 0: shape_info.Type = DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MONOCHROME; break; + case 1: shape_info.Type = DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR; break; + case 2: + default: shape_info.Type = DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR; break; + } + shape_info.Width = cs.width; + shape_info.Height = cs.height; + shape_info.Pitch = cs.pitch; + shape_info.HotSpot.x = cs.xhot; + shape_info.HotSpot.y = cs.yhot; + + util::buffer_t img_data(cs.shape_buffer.size()); + memcpy(std::begin(img_data), cs.shape_buffer.data(), cs.shape_buffer.size()); + + auto alpha_img = make_cursor_alpha_image(img_data, shape_info); + auto xor_img = make_cursor_xor_image(img_data, shape_info); + // If either upload fails, clear both cursor textures and skip blend + // this frame — leaving the previous textures in place would render a + // stale cursor at the new position; an empty SRV would otherwise be + // sampled by blend_cursor() below. + if (!set_cursor_texture(device.get(), cursor_alpha, std::move(alpha_img), shape_info) || + !set_cursor_texture(device.get(), cursor_xor, std::move(xor_img), shape_info)) { + DXGI_OUTDUPL_POINTER_SHAPE_INFO empty_info{}; + set_cursor_texture(device.get(), cursor_alpha, {}, empty_info); + set_cursor_texture(device.get(), cursor_xor, {}, empty_info); + } + } + + // CursorExporter publishes top-left coordinates that already include the + // hot-spot offset (DXGI semantics), so we feed x/y straight through. + cursor_alpha.set_pos(cs.x, cs.y, width, height, display_rotation, cs.visible); + cursor_xor.set_pos(cs.x, cs.y, width, height, display_rotation, cs.visible); + + if (cs.visible && (cursor_alpha.texture || cursor_xor.texture)) { + blend_cursor(d3d_img->capture_rt.get()); + } + } + img_out = img; img_out->frame_timestamp = frame_timestamp; armed = false; // success: ownership of current_frame transfers to release_snapshot()