diff --git a/src/globals.cpp b/src/globals.cpp index 53501da0f29..07636c81b4f 100644 --- a/src/globals.cpp +++ b/src/globals.cpp @@ -7,6 +7,8 @@ safe::mail_t mail::man; thread_pool_util::ThreadPool task_pool; bool display_cursor = true; +std::atomic capture_input_activity{false}; +std::atomic active_capture_timer{nullptr}; #ifdef _WIN32 nvprefs::nvprefs_interface nvprefs_instance; diff --git a/src/globals.h b/src/globals.h index 430851d6721..68c9af25570 100644 --- a/src/globals.h +++ b/src/globals.h @@ -4,6 +4,8 @@ */ #pragma once +#include + #include "entry_handler.h" #include "thread_pool.h" /** @@ -21,6 +23,22 @@ extern thread_pool_util::ThreadPool task_pool; */ extern bool display_cursor; +/** + * @brief Atomic flag set by the input path to notify the capture thread that input has arrived. + * @details When set, the capture thread skips frame pacing sleep to reduce input-to-display latency. + */ +extern std::atomic capture_input_activity; + +namespace platf { + struct high_precision_timer; +} + +/** + * @brief Pointer to the active capture timer, used by the input path to interrupt frame pacing sleep. + * @details Set by the capture thread when starting, cleared when stopping. Thread-safe via atomic. + */ +extern std::atomic active_capture_timer; + #ifdef _WIN32 // Declare global singleton used for NVIDIA control panel modifications #include "platform/windows/nvprefs/nvprefs_interface.h" diff --git a/src/input.cpp b/src/input.cpp index 5af1a3407b4..bf088098d6a 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -1638,6 +1638,14 @@ namespace input { input->input_queue.push_back(std::move(input_data)); } task_pool.push(passthrough_next_message, input); + + // Signal the capture thread that input has arrived to reduce input-to-display latency. + // This wakes the capture thread from its frame pacing sleep so it can capture and + // encode the next frame as soon as the desktop updates from this input. + capture_input_activity.store(true, std::memory_order_release); + if (auto t = active_capture_timer.load(std::memory_order_acquire)) { + t->interrupt(); + } } void diff --git a/src/platform/common.h b/src/platform/common.h index dc45bcfb75a..383514a9a4f 100644 --- a/src/platform/common.h +++ b/src/platform/common.h @@ -1058,6 +1058,32 @@ namespace platf { virtual void sleep_for(const std::chrono::nanoseconds &duration) = 0; + /** + * @brief Sleep for the duration, but can be interrupted from another thread. + * @param duration Sleep duration. + * @return true if interrupted early, false if slept full duration. + */ + virtual bool + sleep_for_interruptible(const std::chrono::nanoseconds &duration) { + sleep_for(duration); + return false; + } + + /** + * @brief Interrupt an in-progress sleep_for_interruptible() call from another thread. + */ + virtual void + interrupt() {} + + /** + * @brief Get the platform-specific interrupt event handle for use with WaitForMultipleObjects. + * @return Opaque handle (HANDLE on Windows), or nullptr if not supported. + */ + virtual void * + get_interrupt_event_handle() const { + return nullptr; + } + /** * @brief Check if platform-specific timer backend has been initialized successfully * @return `true` on success, `false` on error diff --git a/src/platform/windows/display.h b/src/platform/windows/display.h index 112adba00fa..e7600d66b44 100644 --- a/src/platform/windows/display.h +++ b/src/platform/windows/display.h @@ -296,6 +296,15 @@ namespace platf::dxgi { release_snapshot() = 0; virtual int complete_img(img_t *img, bool dummy) = 0; + + /** + * @brief Get a frame event handle for event-driven capture backends (e.g., WGC). + * @return HANDLE to an event signaled on frame arrival, or nullptr for polling backends. + */ + virtual HANDLE + get_frame_event_handle() const { + return nullptr; + } }; /** @@ -428,6 +437,7 @@ namespace platf::dxgi { winrt::Windows::Graphics::Capture::Direct3D11CaptureFrame produced_frame { nullptr }, consumed_frame { nullptr }; SRWLOCK frame_lock = SRWLOCK_INIT; CONDITION_VARIABLE frame_present_cv; + HANDLE frame_event = nullptr; // Manual-reset event signaled when a new frame is available void on_frame_arrived(winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool const &sender, winrt::Windows::Foundation::IInspectable const &); @@ -447,6 +457,15 @@ namespace platf::dxgi { release_frame(); int set_cursor_visible(bool); + + /** + * @brief Get the frame event handle for use in WaitForMultipleObjects. + * @return HANDLE to a manual-reset event that is signaled when a frame is available. + */ + HANDLE + get_frame_event() const { + return frame_event; + } /** * @brief Check if the captured window is still valid. @@ -471,6 +490,12 @@ namespace platf::dxgi { snapshot(const pull_free_image_cb_t &pull_free_image_cb, std::shared_ptr &img_out, std::chrono::milliseconds timeout, bool cursor_visible) override; capture_e release_snapshot() override; + + protected: + HANDLE + get_frame_event_handle() const override { + return dup.get_frame_event(); + } }; /** @@ -488,6 +513,12 @@ namespace platf::dxgi { snapshot(const pull_free_image_cb_t &pull_free_image_cb, std::shared_ptr &img_out, std::chrono::milliseconds timeout, bool cursor_visible) override; capture_e release_snapshot() override; + + protected: + HANDLE + get_frame_event_handle() const override { + return dup.get_frame_event(); + } }; class amd_capture_t { diff --git a/src/platform/windows/display_base.cpp b/src/platform/windows/display_base.cpp index 476d01e5bcf..2653d2a40ce 100644 --- a/src/platform/windows/display_base.cpp +++ b/src/platform/windows/display_base.cpp @@ -257,6 +257,7 @@ namespace platf::dxgi { }; DXGI_RATIONAL client_frame_rate_adjusted = adjust_client_frame_rate(); + std::optional last_frame_time; std::optional frame_pacing_group_start; uint32_t frame_pacing_group_frames = 0; @@ -271,6 +272,12 @@ namespace platf::dxgi { sleep_overshoot_logger.reset(); + // Register the timer globally so the input path can interrupt frame pacing sleep + active_capture_timer.store(timer.get(), std::memory_order_release); + auto clear_active_timer = util::fail_guard([]() { + active_capture_timer.store(nullptr, std::memory_order_release); + }); + while (true) { // This will return false if the HDR state changes or for any number of other // display or GPU changes. We should reinit to examine the updated state of @@ -309,97 +316,206 @@ namespace platf::dxgi { platf::capture_e status = capture_e::ok; std::shared_ptr img_out; - // Try to continue frame pacing group, snapshot() is called with zero timeout after waiting for client frame interval - if (frame_pacing_group_start) { - const uint32_t seconds = (uint64_t) frame_pacing_group_frames * client_frame_rate_adjusted.Denominator / client_frame_rate_adjusted.Numerator; - const uint32_t remainder = (uint64_t) frame_pacing_group_frames * client_frame_rate_adjusted.Denominator % client_frame_rate_adjusted.Numerator; - const auto sleep_target = *frame_pacing_group_start + - std::chrono::nanoseconds(1s) * seconds + - std::chrono::nanoseconds(1s) * remainder / client_frame_rate_adjusted.Numerator; - const auto sleep_period = sleep_target - std::chrono::steady_clock::now(); - - if (sleep_period <= 0ns) { - // We missed next frame time, invalidating current frame pacing group - frame_pacing_group_start = std::nullopt; - frame_pacing_group_frames = 0; - status = capture_e::timeout; + // Event-driven capture: poll for frames with short timeouts instead of sleeping + // for a fixed frame interval. This minimizes the latency between a desktop update + // (e.g., from input injection) and frame capture. + // + // Short timeouts are required because AcquireNextFrame holds the D3D11 device lock + // for the entire duration. Releasing between polls allows the encoding thread to + // acquire the lock for GPU operations. + auto short_timeout = std::chrono::milliseconds(16); + if (display_refresh_rate_rounded > 0) { + auto frame_interval_ms = 1000.0 / display_refresh_rate_rounded; + short_timeout = std::chrono::milliseconds(std::max(4, std::min(16, static_cast(frame_interval_ms / 2)))); + } + + // Rate limiting: don't capture faster than the client framerate to avoid + // wasting encode/network resources. Calculate minimum frame interval. + const auto min_frame_interval = std::chrono::nanoseconds( + std::chrono::nanoseconds(1s) * client_frame_rate_adjusted.Denominator / client_frame_rate_adjusted.Numerator + ); + + // Determine capture strategy based on backend capabilities + HANDLE frame_event = get_frame_event_handle(); + + if (frame_event) { + // WGC path: true event-driven capture using WaitForMultipleObjects. + // WGC doesn't hold the D3D11 device lock during the wait, so we can use + // long timeouts without starving the encoder. + bool rate_limit_handled = false; + + // Rate limiting: wait until min_frame_interval has elapsed before looking for the next frame. + // This prevents capturing at the display refresh rate when it exceeds the client framerate. + // The wait is interruptible so input can trigger an early capture attempt. + if (last_frame_time) { + auto now = std::chrono::steady_clock::now(); + auto elapsed = now - *last_frame_time; + if (elapsed < min_frame_interval) { + auto remaining = min_frame_interval - elapsed; + bool interrupted = timer->sleep_for_interruptible(remaining); + if (interrupted) { + capture_input_activity.store(false, std::memory_order_release); + // Input arrived during rate limit — try to capture immediately + status = snapshot(pull_free_image_cb, img_out, 0ms, *cursor); + if (status == capture_e::timeout) { + status = snapshot(pull_free_image_cb, img_out, short_timeout, *cursor); + } + if (status == capture_e::ok && img_out) { + last_frame_time = img_out->frame_timestamp; + if (!last_frame_time) { + last_frame_time = std::chrono::steady_clock::now(); + } + } + rate_limit_handled = true; + } + } } - else { - timer->sleep_for(sleep_period); - sleep_overshoot_logger.first_point(sleep_target); - sleep_overshoot_logger.second_point_now_and_log(); - - // Try with 0ms timeout first (non-blocking check) - status = snapshot(pull_free_image_cb, img_out, 0ms, *cursor); - - // If 0ms timeout failed but we're very close to the target time, try once more with a small timeout - // This helps catch frames that arrive slightly early or late due to timing variations - if (status == capture_e::timeout) { - const auto time_since_target = std::chrono::steady_clock::now() - sleep_target; - // If we're within 2ms of the target time, try one more time with a small timeout - if (time_since_target < 2ms && time_since_target > -2ms) { - status = snapshot(pull_free_image_cb, img_out, 2ms, *cursor); + + if (!rate_limit_handled) { + // Wait for the next frame event or input interrupt + HANDLE handles[2]; + DWORD handle_count = 0; + handles[handle_count++] = frame_event; + + HANDLE interrupt_handle = timer ? static_cast(timer->get_interrupt_event_handle()) : nullptr; + if (interrupt_handle) { + handles[handle_count++] = interrupt_handle; + } + + auto result = WaitForMultipleObjects(handle_count, handles, FALSE, 200); + + if (result == WAIT_OBJECT_0) { + // Frame event signaled — get the frame immediately + status = snapshot(pull_free_image_cb, img_out, 0ms, *cursor); + } + else if (interrupt_handle && result == WAIT_OBJECT_0 + 1) { + // Input interrupt — clear flag and try to get current frame + capture_input_activity.store(false, std::memory_order_release); + status = snapshot(pull_free_image_cb, img_out, 0ms, *cursor); + if (status == capture_e::timeout) { + status = snapshot(pull_free_image_cb, img_out, short_timeout, *cursor); } } + else { + // Timeout — try to get any available frame + status = snapshot(pull_free_image_cb, img_out, 0ms, *cursor); + } if (status == capture_e::ok && img_out) { - frame_pacing_group_frames += 1; + last_frame_time = img_out->frame_timestamp; + if (!last_frame_time) { + last_frame_time = std::chrono::steady_clock::now(); + } } - else { + } + } + else { + // DDX path: precise frame pacing with interruptible sleep for input responsiveness. + // Uses frame pacing group to track cumulative timing and avoid drift. + + // Try to continue frame pacing group, snapshot() is called with zero timeout after waiting for client frame interval + if (frame_pacing_group_start) { + const uint32_t seconds = (uint64_t) frame_pacing_group_frames * client_frame_rate_adjusted.Denominator / client_frame_rate_adjusted.Numerator; + const uint32_t remainder = (uint64_t) frame_pacing_group_frames * client_frame_rate_adjusted.Denominator % client_frame_rate_adjusted.Numerator; + const auto sleep_target = *frame_pacing_group_start + + std::chrono::nanoseconds(1s) * seconds + + std::chrono::nanoseconds(1s) * remainder / client_frame_rate_adjusted.Numerator; + const auto sleep_period = sleep_target - std::chrono::steady_clock::now(); + + if (sleep_period <= 0ns) { + // We missed next frame time, invalidating current frame pacing group frame_pacing_group_start = std::nullopt; frame_pacing_group_frames = 0; + status = capture_e::timeout; } - } - } + else { + // Use interruptible sleep so input can wake us early + bool interrupted = timer->sleep_for_interruptible(sleep_period); + bool captured_early = false; + + if (interrupted) { + capture_input_activity.store(false, std::memory_order_release); + + // Input arrived during sleep — try to capture a frame the input may have triggered + status = snapshot(pull_free_image_cb, img_out, 0ms, *cursor); + if (status == capture_e::timeout) { + // No frame yet — briefly poll to catch the input-response frame + status = snapshot(pull_free_image_cb, img_out, short_timeout, *cursor); + } + + if (status == capture_e::ok && img_out) { + // Got a frame early — advance pacing group without destroying it + frame_pacing_group_frames += 1; + captured_early = true; + } + else { + // No frame captured after interrupt. Resume sleeping to the original + // sleep_target to preserve pacing group timing integrity. + auto remaining = sleep_target - std::chrono::steady_clock::now(); + if (remaining > 0ns) { + timer->sleep_for(remaining); + } + // Fall through to normal target-time capture below + } + } - // Start new frame pacing group if necessary, snapshot() is called with non-zero timeout - if (status == capture_e::timeout || (status == capture_e::ok && !frame_pacing_group_start)) { - // Optimization: Use short timeout polling instead of long timeout to reduce lock contention. - // The D3D11 device is protected by an unfair lock that is held the entire time that - // IDXGIOutputDuplication::AcquireNextFrame() is running. Using short timeouts based on - // display refresh rate allows us to release the lock more frequently, giving the encoding - // thread opportunities to acquire it for operations like creating dummy images or initializing shared state. - // This prevents encoder reinitialization from taking several seconds due to lock starvation. - // - // Calculate optimal short timeout based on display refresh rate (aim for ~half a frame interval) - // This ensures we poll frequently enough to catch frames quickly while still releasing the lock regularly. - auto short_timeout = std::chrono::milliseconds(16); // Default to ~60fps frame interval - if (display_refresh_rate_rounded > 0) { - // Calculate half a frame interval in milliseconds, with minimum of 4ms and maximum of 16ms - auto frame_interval_ms = 1000.0 / display_refresh_rate_rounded; - short_timeout = std::chrono::milliseconds(std::max(4, std::min(16, static_cast(frame_interval_ms / 2)))); + if (!captured_early) { + // Normal capture at target time (timer expired naturally, or resumed after interrupt) + sleep_overshoot_logger.first_point(sleep_target); + sleep_overshoot_logger.second_point_now_and_log(); + + // Try with 0ms timeout first (non-blocking check) + status = snapshot(pull_free_image_cb, img_out, 0ms, *cursor); + + // If 0ms timeout failed but we're very close to the target time, try once more with a small timeout + if (status == capture_e::timeout) { + const auto time_since_target = std::chrono::steady_clock::now() - sleep_target; + if (time_since_target < 2ms && time_since_target > -2ms) { + status = snapshot(pull_free_image_cb, img_out, 2ms, *cursor); + } + } + + if (status == capture_e::ok && img_out) { + frame_pacing_group_frames += 1; + } + else { + frame_pacing_group_start = std::nullopt; + frame_pacing_group_frames = 0; + } + } + } } - constexpr auto max_total_timeout = 200ms; - const auto max_attempts = static_cast((max_total_timeout.count() + short_timeout.count() - 1) / short_timeout.count()); - status = capture_e::timeout; - for (int attempt = 0; attempt < max_attempts && status == capture_e::timeout; ++attempt) { - status = snapshot(pull_free_image_cb, img_out, short_timeout, *cursor); + // Start new frame pacing group if necessary, snapshot() is called with non-zero timeout + if (status == capture_e::timeout || (status == capture_e::ok && !frame_pacing_group_start)) { + constexpr auto max_total_timeout = 200ms; + const auto max_attempts = static_cast((max_total_timeout.count() + short_timeout.count() - 1) / short_timeout.count()); - // If we got a frame or error, break immediately - if (status != capture_e::timeout) { - break; - } + status = capture_e::timeout; + for (int attempt = 0; attempt < max_attempts && status == capture_e::timeout; ++attempt) { + status = snapshot(pull_free_image_cb, img_out, short_timeout, *cursor); + + if (status != capture_e::timeout) { + break; + } - // Release the snapshot to free the lock before next attempt - // This gives encoding thread a chance to acquire the device lock - release_snapshot(); + release_snapshot(); - // Small sleep to yield CPU and allow encoding thread to run - if (attempt < max_attempts - 1) { - std::this_thread::sleep_for(1ms); + if (attempt < max_attempts - 1) { + std::this_thread::sleep_for(1ms); + } } - } - if (status == capture_e::ok && img_out) { - frame_pacing_group_start = img_out->frame_timestamp; + if (status == capture_e::ok && img_out) { + frame_pacing_group_start = img_out->frame_timestamp; - if (!frame_pacing_group_start) { - BOOST_LOG(warning) << "snapshot() provided image without timestamp"; - frame_pacing_group_start = std::chrono::steady_clock::now(); - } + if (!frame_pacing_group_start) { + BOOST_LOG(warning) << "snapshot() provided image without timestamp"; + frame_pacing_group_start = std::chrono::steady_clock::now(); + } - frame_pacing_group_frames = 1; + frame_pacing_group_frames = 1; + } } } diff --git a/src/platform/windows/display_wgc.cpp b/src/platform/windows/display_wgc.cpp index b90d045212d..7a8398c0ed9 100644 --- a/src/platform/windows/display_wgc.cpp +++ b/src/platform/windows/display_wgc.cpp @@ -207,6 +207,10 @@ namespace platf::dxgi { wgc_capture_t::wgc_capture_t() { InitializeConditionVariable(&frame_present_cv); + frame_event = CreateEvent(nullptr, TRUE, FALSE, nullptr); // Manual-reset, initially non-signaled + if (!frame_event) { + BOOST_LOG(warning) << "Failed to create WGC frame event: " << GetLastError(); + } } wgc_capture_t::~wgc_capture_t() { @@ -219,6 +223,10 @@ namespace platf::dxgi { item = nullptr; capture_session = nullptr; frame_pool = nullptr; + if (frame_event) { + CloseHandle(frame_event); + frame_event = nullptr; + } } /** @@ -487,6 +495,9 @@ namespace platf::dxgi { } produced_frame = frame; + if (frame_event) { + SetEvent(frame_event); + } ReleaseSRWLockExclusive(&frame_lock); WakeConditionVariable(&frame_present_cv); } @@ -517,6 +528,10 @@ namespace platf::dxgi { if (produced_frame) { consumed_frame = produced_frame; produced_frame = nullptr; + // Reset event under lock so it stays synchronized with produced_frame state + if (frame_event) { + ResetEvent(frame_event); + } } ReleaseSRWLockExclusive(&frame_lock); if (consumed_frame == nullptr) { // spurious wakeup diff --git a/src/platform/windows/misc.cpp b/src/platform/windows/misc.cpp index f97fb7564a7..57adea4a1f4 100644 --- a/src/platform/windows/misc.cpp +++ b/src/platform/windows/misc.cpp @@ -2082,10 +2082,15 @@ namespace platf { BOOST_LOG(error) << "Unable to create high_precision_timer, CreateWaitableTimerEx() failed: " << GetLastError(); } } + interrupt_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); + if (!interrupt_event) { + BOOST_LOG(warning) << "Unable to create interrupt event for high_precision_timer: " << GetLastError(); + } } ~win32_high_precision_timer() { if (timer) CloseHandle(timer); + if (interrupt_event) CloseHandle(interrupt_event); } void @@ -2109,12 +2114,51 @@ namespace platf { WaitForSingleObject(timer, INFINITE); } + bool + sleep_for_interruptible(const std::chrono::nanoseconds &duration) override { + if (!timer) { + BOOST_LOG(error) << "Attempting high_precision_timer::sleep_for_interruptible() with uninitialized timer"; + return false; + } + if (!interrupt_event) { + sleep_for(duration); + return false; + } + if (duration < 0s) { + return false; + } + if (duration > 5s) { + return false; + } + + LARGE_INTEGER due_time; + due_time.QuadPart = duration.count() / -100; + SetWaitableTimer(timer, &due_time, 0, nullptr, nullptr, false); + + HANDLE handles[] = { timer, interrupt_event }; + auto result = WaitForMultipleObjects(2, handles, FALSE, INFINITE); + return result == WAIT_OBJECT_0 + 1; + } + + void + interrupt() override { + if (interrupt_event) { + SetEvent(interrupt_event); + } + } + + void * + get_interrupt_event_handle() const override { + return interrupt_event; + } + operator bool() override { return timer != NULL; } private: HANDLE timer = NULL; + HANDLE interrupt_event = NULL; }; std::unique_ptr