diff --git a/CMakeLists.txt b/CMakeLists.txt index 995edaa..416bee3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,9 +17,10 @@ find_package(Stb REQUIRED) find_package(spdlog CONFIG REQUIRED) - find_path(CGLTF_INCLUDE_DIRS "cgltf.h") +add_compile_definitions(ENABLE_TELEMETRY) + add_compile_options( $<$:-Wall> $<$:-Wextra> @@ -28,7 +29,4 @@ add_compile_options( $<$:-Wshadow> ) - - add_subdirectory(src) - diff --git a/src/backend/core/vk_backend_ctx.cpp b/src/backend/core/vk_backend_ctx.cpp index 55bf62e..3a33133 100644 --- a/src/backend/core/vk_backend_ctx.cpp +++ b/src/backend/core/vk_backend_ctx.cpp @@ -7,9 +7,6 @@ #include #include -DEFINE_TU_LOGGER("Backend.Ctx"); -#define LOG_TU_LOGGER() ThisLogger() - bool VkBackendCtx::init(std::span platformExtensions, bool enableValidation) { shutdown(); diff --git a/src/backend/core/vk_device.cpp b/src/backend/core/vk_device.cpp index 731a41a..e66834e 100644 --- a/src/backend/core/vk_device.cpp +++ b/src/backend/core/vk_device.cpp @@ -9,9 +9,6 @@ #include #include -DEFINE_TU_LOGGER("Backend.Device"); -#define LOG_TU_LOGGER() ThisLogger() - namespace { #ifdef __APPLE__ diff --git a/src/backend/core/vk_instance.cpp b/src/backend/core/vk_instance.cpp index 14d5f04..a6f65d1 100644 --- a/src/backend/core/vk_instance.cpp +++ b/src/backend/core/vk_instance.cpp @@ -9,9 +9,6 @@ #include #include -DEFINE_TU_LOGGER("Backend.Instance"); -#define LOG_TU_LOGGER() ThisLogger() - #ifndef NDEBUG constexpr bool kEnableValidationLayers = true; #else diff --git a/src/backend/frame/CMakeLists.txt b/src/backend/frame/CMakeLists.txt index f129b7f..3986156 100644 --- a/src/backend/frame/CMakeLists.txt +++ b/src/backend/frame/CMakeLists.txt @@ -12,7 +12,6 @@ target_link_libraries(quark_backend_frame PUBLIC Vulkan::Vulkan quark::backend::core - quark::backend::profiling ) add_library(quark::backend::frame ALIAS quark_backend_frame) diff --git a/src/backend/frame/vk_commands.cpp b/src/backend/frame/vk_commands.cpp index 83dbbdf..4c28d17 100644 --- a/src/backend/frame/vk_commands.cpp +++ b/src/backend/frame/vk_commands.cpp @@ -1,24 +1,13 @@ #include "vk_commands.hpp" #include "backend/core/vk_backend_ctx.hpp" +#include "engine/logging/log.hpp" #include -#include +#include #include bool VkCommands::init(VkBackendCtx &ctx, VkCommandPoolCreateFlags flags) { - if (ctx.device() == VK_NULL_HANDLE) { - std::cerr << "[Cmd] Device is null\n"; - return false; - } - - uint32_t family = ctx.graphicsQueueFamily(); - if (family == UINT32_MAX) { - std::cerr << "[Cmd] ctx.graphicsQueueFamily invalid\n"; - return false; - } - - // Re-init shutdown(); m_ctx = &ctx; @@ -30,21 +19,21 @@ bool VkCommands::init(VkBackendCtx &ctx, VkCommandPoolCreateFlags flags) { VkResult res = vkCreateCommandPool(ctx.device(), &poolInfo, nullptr, &m_pool); if (res != VK_SUCCESS) { - std::cerr << "[Cmd] vkCreateCommandPool failed: " << res << "\n"; + LOGE("vkCreateCommandPool failed: {}", fmt::underlying(res)); m_pool = VK_NULL_HANDLE; m_ctx = VK_NULL_HANDLE; return false; } - std::cout << "[Cmd] Command pool created\n"; + LOGI("Command pool created"); return true; } bool VkCommands::allocate(uint32_t count, VkCommandBufferLevel level) { VkDevice device = m_ctx->device(); - if (device == VK_NULL_HANDLE || m_pool == VK_NULL_HANDLE) { - std::cerr << "[Cmd] Device or command pool not read\n"; + if (m_pool == VK_NULL_HANDLE) { + LOGE("Command pool not created"); return false; } @@ -60,12 +49,12 @@ bool VkCommands::allocate(uint32_t count, VkCommandBufferLevel level) { VkResult res = vkAllocateCommandBuffers(device, &allocInfo, m_buffers.data()); if (res != VK_SUCCESS) { - std::cerr << "[Cmd] vkAllocateCommandBuffers failed: " << res << "\n"; + LOGE("vkAllocatedCommandBuffers failed: {}", fmt::underlying(res)); m_buffers.clear(); return false; } - std::cout << "[Cmd] Allocated " << m_buffers.size() << " command buffers\n"; + LOGI("Allocated {} command buffers", m_buffers.size()); return true; } @@ -76,6 +65,7 @@ void VkCommands::free() noexcept { static_cast(m_buffers.size()), m_buffers.data()); } + m_buffers.clear(); } @@ -84,6 +74,7 @@ void VkCommands::shutdown() noexcept { if (m_ctx != nullptr && m_ctx->device() != VK_NULL_HANDLE && m_pool != VK_NULL_HANDLE) { + LOGD("Destroying command pool"); vkDestroyCommandPool(m_ctx->device(), m_pool, nullptr); } diff --git a/src/backend/frame/vk_commands.hpp b/src/backend/frame/vk_commands.hpp index 98fa883..c617637 100644 --- a/src/backend/frame/vk_commands.hpp +++ b/src/backend/frame/vk_commands.hpp @@ -25,7 +25,9 @@ class VkCommands { m_ctx = std::exchange(other.m_ctx, nullptr); m_pool = std::exchange(other.m_pool, VK_NULL_HANDLE); - m_buffers = std::exchange(other.m_buffers, std::vector{}); + m_buffers = std::move(other.m_buffers); + other.m_buffers.clear(); + return *this; } diff --git a/src/backend/frame/vk_frame_manager.cpp b/src/backend/frame/vk_frame_manager.cpp index 9af476b..4874bb9 100644 --- a/src/backend/frame/vk_frame_manager.cpp +++ b/src/backend/frame/vk_frame_manager.cpp @@ -1,10 +1,11 @@ #include "vk_frame_manager.hpp" -#include "backend/profiling/cpu_profiler.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" +#include "engine/logging/log.hpp" #include #include -#include +#include #include constexpr uint32_t kOneFence = 1; @@ -12,17 +13,11 @@ constexpr VkBool32 kWaitAll = VK_TRUE; bool VkFrameManager::init(VkDevice device, uint32_t framesInFlight, uint32_t swapchainImageCount) { - if (device == VK_NULL_HANDLE) { - std::cerr << "[Frame] Device is null\n"; - return false; - } - if (framesInFlight == 0 || swapchainImageCount == 0) { - std::cerr << "[Frame] Invalid counts\n"; + LOGE("Invalid counts"); return false; } - // Re-init shutdown(); m_device = device; @@ -56,13 +51,13 @@ bool VkFrameManager::createSyncObjects() { for (uint32_t i = 0; i < m_framesInFlight; ++i) { if (vkCreateSemaphore(m_device, &semInfo, nullptr, &m_imageAvailable[i]) != VK_SUCCESS) { - std::cerr << "[Frame] Failed to create imageAvailable semaphore\n"; + LOGE("imageAvailable semaphore creation failed"); return false; } if (vkCreateFence(m_device, &fenceInfo, nullptr, &m_inFlightFences[i]) != VK_SUCCESS) { - std::cerr << "[Frame] Failed to create inFlight fence\n"; + LOGE("inFlight fence creation failed"); return false; } } @@ -70,7 +65,7 @@ bool VkFrameManager::createSyncObjects() { for (uint32_t i = 0; i < m_swapchainImageCount; ++i) { if (vkCreateSemaphore(m_device, &semInfo, nullptr, &m_renderFinished[i]) != VK_SUCCESS) { - std::cerr << "[Frame] Failed to create renderFinished semaphore\n"; + LOGE("renderFinished semaphore creation failed"); return false; } } @@ -89,18 +84,21 @@ void VkFrameManager::destroySyncObjects() noexcept { for (VkSemaphore s : m_imageAvailable) { if (s != VK_NULL_HANDLE) { + LOGD("Destroying sempahore"); vkDestroySemaphore(m_device, s, nullptr); } } for (VkSemaphore s : m_renderFinished) { if (s != VK_NULL_HANDLE) { + LOGD("Destroying sempahore"); vkDestroySemaphore(m_device, s, nullptr); } } for (VkFence f : m_inFlightFences) { if (f != VK_NULL_HANDLE) { + LOGD("Destroying sempahore"); vkDestroyFence(m_device, f, nullptr); } } @@ -113,8 +111,7 @@ void VkFrameManager::destroySyncObjects() noexcept { VkFrameManager::FrameStatus VkFrameManager::beginFrame(VkSwapchainKHR swapchain, uint32_t &outImageIndex, - uint64_t timeout, - CpuProfiler *profiler) { + uint64_t timeout) { if (m_device == VK_NULL_HANDLE) { return FrameStatus::Error; } @@ -127,54 +124,47 @@ VkFrameManager::FrameStatus VkFrameManager::beginFrame(VkSwapchainKHR swapchain, // Wait for CPU-frame fence VkResult waitRes = VK_SUCCESS; - if (profiler != nullptr) { - CpuProfiler::Scope s(*profiler, CpuProfiler::Stat::WaitForFence); - waitRes = - vkWaitForFences(m_device, kOneFence, &frameFence, kWaitAll, timeout); - } else { + { + PROFILE_CPU_SCOPE(CpuProfiler::Stat::WaitForFence); waitRes = vkWaitForFences(m_device, kOneFence, &frameFence, kWaitAll, timeout); } + if (waitRes != VK_SUCCESS) { - std::cerr << "[Frame] vkWaitForFences failed: " << waitRes << "\n"; + LOGE("vkWaitForFences failed: {}", fmt::underlying(waitRes)); return FrameStatus::Error; } VkResult acq = VK_SUCCESS; - if (profiler != nullptr) { - CpuProfiler::Scope s(*profiler, CpuProfiler::Stat::Acquire); - acq = vkAcquireNextImageKHR(m_device, swapchain, timeout, - m_imageAvailable[m_currentFrame], - VK_NULL_HANDLE, &outImageIndex); - } else { + { + PROFILE_CPU_SCOPE(CpuProfiler::Stat::Acquire); acq = vkAcquireNextImageKHR(m_device, swapchain, timeout, m_imageAvailable[m_currentFrame], VK_NULL_HANDLE, &outImageIndex); } + if (acq == VK_ERROR_OUT_OF_DATE_KHR) { - std::cerr << "[Frame] vkAcquireNextImageKHR returned OUT_OF_DATE\n"; + LOGE("vkAcquireNextImageKHR returned OUT_OF_DATE"); return FrameStatus::OutOfDate; } if (acq == VK_SUBOPTIMAL_KHR) { // TODO: signal recreate swapchain when convienent } else if (acq != VK_SUCCESS) { - std::cerr << "[Frame] vkAcquireNextImageKHR failed: " << acq << "\n"; + LOGE("vkAcquireNextImageKHR failed: {}", fmt::underlying(acq)); return FrameStatus::Error; } if (outImageIndex >= m_imagesInFlight.size()) { - std::cerr << "[Frame] imageIndex out of range\n"; + LOGE("imageIndex out of range"); return FrameStatus::Error; } if (m_imagesInFlight[outImageIndex] != VK_NULL_HANDLE) { VkFence imgFence = m_imagesInFlight[outImageIndex]; - if (profiler != nullptr) { - CpuProfiler::Scope s(*profiler, CpuProfiler::Stat::WaitForFence); - vkWaitForFences(m_device, kOneFence, &imgFence, kWaitAll, timeout); - } else { + { + PROFILE_CPU_SCOPE(CpuProfiler::Stat::WaitForFence); vkWaitForFences(m_device, kOneFence, &imgFence, kWaitAll, timeout); } } @@ -187,15 +177,7 @@ VkFrameManager::FrameStatus VkFrameManager::beginFrame(VkSwapchainKHR swapchain, VkFrameManager::FrameStatus VkFrameManager::submit(VkQueue queue, uint32_t imageIndex, VkCommandBuffer cmd, - VkPipelineStageFlags waitStage, CpuProfiler *profiler) { - // if (m_device == VK_NULL_HANDLE) { - // return FrameStatus::Error; - // } - // - // if (queue == VK_NULL_HANDLE) { - // return FrameStatus::Error; - // } - + VkPipelineStageFlags waitStage) { if (imageIndex >= m_renderFinished.size()) { return FrameStatus::Error; } @@ -215,15 +197,13 @@ VkFrameManager::submit(VkQueue queue, uint32_t imageIndex, VkCommandBuffer cmd, submit.pSignalSemaphores = &signalSem; VkResult res = VK_SUCCESS; - if (profiler != nullptr) { - CpuProfiler::Scope s(*profiler, CpuProfiler::Stat::QueueSubmit); - res = vkQueueSubmit(queue, kOneFence, &submit, frameFence); - } else { + { + PROFILE_CPU_SCOPE(CpuProfiler::Stat::QueueSubmit); res = vkQueueSubmit(queue, kOneFence, &submit, frameFence); } if (res != VK_SUCCESS) { - std::cerr << "[Frame] vkQueueSubmit failed: " << res << "\n"; + LOGE("vkQueueSubmit failed: {}", fmt::underlying(res)); return FrameStatus::Error; } @@ -232,8 +212,7 @@ VkFrameManager::submit(VkQueue queue, uint32_t imageIndex, VkCommandBuffer cmd, VkFrameManager::FrameStatus VkFrameManager::present(VkQueue queue, VkSwapchainKHR swapchain, - uint32_t imageIndex, - CpuProfiler *profiler) { + uint32_t imageIndex) { if (imageIndex >= m_renderFinished.size()) { return FrameStatus::Error; } @@ -249,16 +228,13 @@ VkFrameManager::FrameStatus VkFrameManager::present(VkQueue queue, present.pImageIndices = &imageIndex; VkResult res = VK_SUCCESS; - if (profiler != nullptr) { - CpuProfiler::Scope s(*profiler, CpuProfiler::Stat::QueuePresent); - res = vkQueuePresentKHR(queue, &present); - } else { - CpuProfiler::Scope s(*profiler, CpuProfiler::Stat::QueuePresent); + { + PROFILE_CPU_SCOPE(CpuProfiler::Stat::QueuePresent); res = vkQueuePresentKHR(queue, &present); } if (res == VK_ERROR_OUT_OF_DATE_KHR) { - std::cerr << "[Frame] vkQueuePresentKHR returned OUT_OF_DATE\n"; + LOGE("vkQueuePresentKHR returned OUT_OF_DATE"); return FrameStatus::OutOfDate; } @@ -266,7 +242,7 @@ VkFrameManager::FrameStatus VkFrameManager::present(VkQueue queue, if (res == VK_SUBOPTIMAL_KHR) { if (!suboptimal_logged) { - std::cerr << "[Frame] vkQueuePresentKHR returned SUBOPTIMAL\n"; + LOGE("vkQueuePresentKHR returned SUBOPTIMAL"); suboptimal_logged = true; } @@ -275,7 +251,7 @@ VkFrameManager::FrameStatus VkFrameManager::present(VkQueue queue, } if (res != VK_SUCCESS) { - std::cerr << "[Frame] vkQueuePresentKHR failed: " << res << "\n"; + LOGE("vkQueuePresentKHR failed: {}", fmt::underlying(res)); return FrameStatus::Error; } @@ -305,7 +281,7 @@ bool VkFrameManager::onSwapchainRecreated(uint32_t newSwapchainImageCount) { for (uint32_t i = 0; i < m_swapchainImageCount; ++i) { if (vkCreateSemaphore(m_device, &semInfo, nullptr, &m_renderFinished[i]) != VK_SUCCESS) { - std::cerr << "[Frame] Failed to recreate renderFinished semaphore\n"; + LOGE("renderFinished semaphore recreated failed"); return false; } } diff --git a/src/backend/frame/vk_frame_manager.hpp b/src/backend/frame/vk_frame_manager.hpp index 8b5d1d6..e753fb2 100644 --- a/src/backend/frame/vk_frame_manager.hpp +++ b/src/backend/frame/vk_frame_manager.hpp @@ -1,14 +1,10 @@ #pragma once -#include "backend/profiling/cpu_profiler.hpp" - #include #include #include #include -class CpuProfiler; - class VkFrameManager { public: VkFrameManager() = default; @@ -48,16 +44,14 @@ class VkFrameManager { bool resizeSwapchainImages(uint32_t swapchainImageCount); FrameStatus beginFrame(VkSwapchainKHR swapchain, uint32_t &outImageIndex, - uint64_t timeout = UINT64_MAX, - CpuProfiler *profiler = nullptr); + uint64_t timeout = UINT64_MAX); FrameStatus submit(VkQueue queue, uint32_t imageIndex, VkCommandBuffer cmd, VkPipelineStageFlags waitStage = - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - CpuProfiler *profiler = nullptr); + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); FrameStatus present(VkQueue queue, VkSwapchainKHR swapchain, - uint32_t imageIndex, CpuProfiler *profiler = nullptr); + uint32_t imageIndex); [[nodiscard]] uint32_t currentFrameIndex() const { return m_currentFrame; } diff --git a/src/backend/gpu/images/vk_depth_image.cpp b/src/backend/gpu/images/vk_depth_image.cpp index e0e8b61..10489da 100644 --- a/src/backend/gpu/images/vk_depth_image.cpp +++ b/src/backend/gpu/images/vk_depth_image.cpp @@ -1,8 +1,10 @@ #include "backend/gpu/images/vk_depth_image.hpp" #include "backend/gpu/images/vk_image.hpp" +#include "engine/logging/log.hpp" #include +#include #include #include #include @@ -26,8 +28,7 @@ bool VkDepthImage::findSupportedDepthFormat(VkPhysicalDevice physicalDevice, const auto features = props.optimalTilingFeatures; if ((features & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) != 0) { out = fmt; - std::cout << "[Depth] Chosen depth format: " << static_cast(out) - << "\n"; + LOGD("Chosen depth format: {}", static_cast(out)); return true; } } @@ -35,27 +36,25 @@ bool VkDepthImage::findSupportedDepthFormat(VkPhysicalDevice physicalDevice, return false; } -bool VkDepthImage::init(VmaAllocator allocator, VkPhysicalDevice physicalDevice, - VkDevice device, VkExtent2D extent) { - if (allocator == nullptr || physicalDevice == VK_NULL_HANDLE || - device == VK_NULL_HANDLE || extent.width == 0 || extent.height == 0) { +bool VkDepthImage::init(VkBackendCtx &ctx, VkExtent2D extent) { + if (extent.width == 0 || extent.height == 0) { + LOGE("Invalid init args"); std::cerr << "[Depth] Invalid init args\n"; return false; } shutdown(); - m_allocator = allocator; - m_device = device; + m_ctx = &ctx; m_extent = extent; - if (!findSupportedDepthFormat(physicalDevice, m_format)) { + if (!findSupportedDepthFormat(m_ctx->physicalDevice(), m_format)) { std::cerr << "[Depth] No supported depth format found\n"; shutdown(); return false; } - if (!m_image.init2D(m_allocator, extent.width, extent.height, m_format, + if (!m_image.init2D(m_ctx->allocator(), extent.width, extent.height, m_format, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, VK_IMAGE_TILING_OPTIMAL)) { std::cerr << "[Depth] Failed to create depth image\n"; @@ -79,9 +78,10 @@ bool VkDepthImage::init(VmaAllocator allocator, VkPhysicalDevice physicalDevice, viewInfo.subresourceRange.baseArrayLayer = 0; viewInfo.subresourceRange.layerCount = 1; - const VkResult res = vkCreateImageView(m_device, &viewInfo, nullptr, &m_view); + const VkResult res = + vkCreateImageView(m_ctx->device(), &viewInfo, nullptr, &m_view); if (res != VK_SUCCESS) { - std::cerr << "[Depth] vkCreateImageView failed: " << res << "\n"; + LOGE("vkCreateImageView failed: {}", fmt::underlying(res)); shutdown(); return false; } @@ -90,13 +90,19 @@ bool VkDepthImage::init(VmaAllocator allocator, VkPhysicalDevice physicalDevice, } void VkDepthImage::shutdown() noexcept { - if (m_device != VK_NULL_HANDLE && m_view != VK_NULL_HANDLE) { - vkDestroyImageView(m_device, m_view, nullptr); + VkDevice device = VK_NULL_HANDLE; + if (m_ctx != nullptr) { + device = m_ctx->device(); + } + + if (device != VK_NULL_HANDLE && m_view != VK_NULL_HANDLE) { + LOGD("Destroying image view"); + vkDestroyImageView(m_ctx->device(), m_view, nullptr); } m_view = VK_NULL_HANDLE; m_image.shutdown(); m_format = VK_FORMAT_UNDEFINED; m_extent = VkExtent2D{.width = 0, .height = 0}; - m_device = VK_NULL_HANDLE; + m_ctx = nullptr; } diff --git a/src/backend/gpu/images/vk_depth_image.hpp b/src/backend/gpu/images/vk_depth_image.hpp index fd0e00c..8521146 100644 --- a/src/backend/gpu/images/vk_depth_image.hpp +++ b/src/backend/gpu/images/vk_depth_image.hpp @@ -1,5 +1,6 @@ #pragma once +#include "backend/core/vk_backend_ctx.hpp" #include "backend/gpu/images/vk_image.hpp" #include @@ -22,8 +23,7 @@ class VkDepthImage { shutdown(); - m_allocator = std::exchange(other.m_allocator, nullptr); - m_device = std::exchange(other.m_device, VK_NULL_HANDLE); + m_ctx = std::exchange(other.m_ctx, nullptr); m_image = std::move(other.m_image); m_view = std::exchange(other.m_view, VK_NULL_HANDLE); m_format = std::exchange(other.m_format, VK_FORMAT_UNDEFINED); @@ -32,8 +32,7 @@ class VkDepthImage { return *this; } - bool init(VmaAllocator allocator, VkPhysicalDevice physicalDevice, - VkDevice device, VkExtent2D extent); + bool init(VkBackendCtx &ctx, VkExtent2D extent); void shutdown() noexcept; [[nodiscard]] VkImage image() const noexcept { return m_image.handle(); } @@ -46,8 +45,7 @@ class VkDepthImage { static bool findSupportedDepthFormat(VkPhysicalDevice physicalDevice, VkFormat &out); - VmaAllocator m_allocator = nullptr; // non-owning - VkDevice m_device = VK_NULL_HANDLE; // non-owning + VkBackendCtx *m_ctx = nullptr; // non-owning VkImageObj m_image; // owning VkImageView m_view = VK_NULL_HANDLE; // owning VkFormat m_format = VK_FORMAT_UNDEFINED; diff --git a/src/backend/gpu/upload/CMakeLists.txt b/src/backend/gpu/upload/CMakeLists.txt index 545b2ab..c4c6efa 100644 --- a/src/backend/gpu/upload/CMakeLists.txt +++ b/src/backend/gpu/upload/CMakeLists.txt @@ -19,7 +19,6 @@ target_link_libraries(quark_backend_gpu_upload quark::backend::core quark::backend::frame - quark::backend::profiling quark::backend::gpu::buffers quark::backend::gpu::textures ) diff --git a/src/backend/gpu/upload/vk_buffer_uploader.cpp b/src/backend/gpu/upload/vk_buffer_uploader.cpp index bcf1dbb..0ee1ef5 100644 --- a/src/backend/gpu/upload/vk_buffer_uploader.cpp +++ b/src/backend/gpu/upload/vk_buffer_uploader.cpp @@ -1,7 +1,7 @@ #include "vk_buffer_uploader.hpp" #include "backend/gpu/upload/vk_upload_context.hpp" -#include "backend/profiling/upload_profiler.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" #include #include @@ -9,32 +9,19 @@ #include #include -bool VkBufferUploader::init(VmaAllocator allocator, VkUploadContext *upload, - UploadProfiler *profiler) { - if (allocator == nullptr || upload == nullptr) { - std::cerr << "[Uploader] Invalid init args\n"; - return false; - } - +bool VkBufferUploader::init(VmaAllocator allocator) { m_allocator = allocator; - m_upload = upload; - m_profiler = profiler; return true; } -void VkBufferUploader::shutdown() noexcept { - m_allocator = nullptr; - m_upload = nullptr; - m_profiler = nullptr; -} +void VkBufferUploader::shutdown() noexcept { m_allocator = nullptr; } -bool VkBufferUploader::uploadToDeviceLocalBuffer(const void *data, - VkDeviceSize size, - VkBufferUsageFlags finalUsage, - VkBufferObj &outBuffer) { - if (m_allocator == nullptr || m_upload == nullptr) { - std::cerr << "[Uploader] Not initialized\n"; +bool VkBufferUploader::uploadToDeviceLocalBuffer( + VkUploadContext::Recorder recorder, const void *data, VkDeviceSize size, + VkBufferUsageFlags finalUsage, VkBufferObj &outBuffer) { + if (!recorder) { + std::cerr << "[BufferUploader] Invalid recorder\n"; return false; } @@ -43,7 +30,7 @@ bool VkBufferUploader::uploadToDeviceLocalBuffer(const void *data, return false; } - VkStagingAlloc stageAlloc = m_upload->allocStaging(size); + VkStagingAlloc stageAlloc = recorder.allocStaging(size); if (!stageAlloc) { std::cerr << "[Uploader] Out of staging space (increase per-frame budget " "or flush earlier)\n"; @@ -52,10 +39,8 @@ bool VkBufferUploader::uploadToDeviceLocalBuffer(const void *data, std::memcpy(stageAlloc.ptr, data, static_cast(size)); - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::UploadMemcpyCount, 1); - profilerAdd(m_profiler, UploadProfiler::Stat::UploadMemcpyBytes, size); - } + PROFILE_UPLOAD_INC(UploadProfiler::Stat::UploadMemcpyCount); + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::UploadMemcpyBytes, size); // Device-local buffer outBuffer.shutdown(); @@ -66,17 +51,13 @@ bool VkBufferUploader::uploadToDeviceLocalBuffer(const void *data, return false; } - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::BufferAllocatedBytes, size); - } + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::BufferAllocatedBytes, size); - m_upload->cmdCopyToBuffer(outBuffer.handle(), /*dstOffset=*/0, - /*srcOffset=*/stageAlloc.offset, size); + recorder.cmdCopyToBuffer(outBuffer.handle(), /*dstOffset=*/0, + /*srcOffset=*/stageAlloc.offset, size); - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::BufferUploadCount, 1); - profilerAdd(m_profiler, UploadProfiler::Stat::BufferUploadBytes, size); - } + PROFILE_UPLOAD_INC(UploadProfiler::Stat::BufferUploadCount); + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::BufferUploadCount, size); return true; } diff --git a/src/backend/gpu/upload/vk_buffer_uploader.hpp b/src/backend/gpu/upload/vk_buffer_uploader.hpp index 8e0516b..ba46d6e 100644 --- a/src/backend/gpu/upload/vk_buffer_uploader.hpp +++ b/src/backend/gpu/upload/vk_buffer_uploader.hpp @@ -2,25 +2,24 @@ #include "backend/gpu/buffers/vk_buffer.hpp" #include "backend/gpu/upload/vk_upload_context.hpp" -#include "backend/profiling/upload_profiler.hpp" #include #include +class UploadProfiler; + class VkBufferUploader { public: VkBufferUploader() = default; - bool init(VmaAllocator allocator, VkUploadContext *upload, - UploadProfiler *profiler); + bool init(VmaAllocator allocator); void shutdown() noexcept; - bool uploadToDeviceLocalBuffer(const void *data, VkDeviceSize size, + bool uploadToDeviceLocalBuffer(VkUploadContext::Recorder recorder, + const void *data, VkDeviceSize size, VkBufferUsageFlags finalUsage, VkBufferObj &outBuffer); private: - VmaAllocator m_allocator = nullptr; // non-owning - VkUploadContext *m_upload = nullptr; // non-owning - UploadProfiler *m_profiler = nullptr; // non-owning + VmaAllocator m_allocator = nullptr; // non-owning }; diff --git a/src/backend/gpu/upload/vk_instance_uploader.cpp b/src/backend/gpu/upload/vk_instance_uploader.cpp index 701d33d..4391d04 100644 --- a/src/backend/gpu/upload/vk_instance_uploader.cpp +++ b/src/backend/gpu/upload/vk_instance_uploader.cpp @@ -1,17 +1,22 @@ #include "backend/gpu/upload/vk_instance_uploader.hpp" -#include "backend/profiling/upload_profiler.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" #include #include #include +bool VkInstanceUploader::init() { return true; } + +void VkInstanceUploader::shutdown() noexcept {} + InstanceUploadResult VkInstanceUploader::uploadMat4Instances( - VkBuffer instanceBuffer, VkDeviceSize frameBaseBytes, - VkDeviceSize frameStrideBytes, uint32_t maxInstancesPerFrame, - uint32_t &cursorInstances, std::span models) { + VkUploadContext::Recorder recorder, VkBuffer instanceBuffer, + VkDeviceSize frameBaseBytes, VkDeviceSize frameStrideBytes, + uint32_t maxInstancesPerFrame, uint32_t &cursorInstances, + std::span models) { InstanceUploadResult out{}; - if (m_upload == nullptr || instanceBuffer == VK_NULL_HANDLE) { + if (!recorder || instanceBuffer == VK_NULL_HANDLE) { return out; } @@ -35,7 +40,7 @@ InstanceUploadResult VkInstanceUploader::uploadMat4Instances( return out; } - VkStagingAlloc stageAlloc = m_upload->allocStaging(bytes, /*alignment*/ 16); + VkStagingAlloc stageAlloc = recorder.allocStaging(bytes, /*alignment*/ 16); if (!stageAlloc) { std::cerr << "[InstanceUploader] allocStaging failed for instances\n"; return out; @@ -43,26 +48,21 @@ InstanceUploadResult VkInstanceUploader::uploadMat4Instances( std::memcpy(stageAlloc.ptr, models.data(), static_cast(bytes)); - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::UploadMemcpyCount, 1); - profilerAdd(m_profiler, UploadProfiler::Stat::UploadMemcpyBytes, bytes); - } + PROFILE_UPLOAD_INC(UploadProfiler::Stat::UploadMemcpyCount); + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::UploadMemcpyBytes, bytes); const uint32_t base = cursorInstances; const VkDeviceSize dstOffset = frameBaseBytes + (VkDeviceSize(base) * sizeof(glm::mat4)); - m_upload->cmdCopyToBuffer(instanceBuffer, dstOffset, stageAlloc.offset, - bytes); - m_upload->cmdBarrierBufferTransferToShader( + recorder.cmdCopyToBuffer(instanceBuffer, dstOffset, stageAlloc.offset, bytes); + recorder.cmdBarrierBufferTransferToShader( instanceBuffer, dstOffset, bytes, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT); cursorInstances += count; - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::InstanceUploadCount, 1); - profilerAdd(m_profiler, UploadProfiler::Stat::InstanceUploadBytes, bytes); - } + PROFILE_UPLOAD_INC(UploadProfiler::Stat::InstanceUploadCount); + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::InstanceUploadBytes, bytes); out.baseInstance = base; out.instanceCount = count; diff --git a/src/backend/gpu/upload/vk_instance_uploader.hpp b/src/backend/gpu/upload/vk_instance_uploader.hpp index 4cb950a..52d6809 100644 --- a/src/backend/gpu/upload/vk_instance_uploader.hpp +++ b/src/backend/gpu/upload/vk_instance_uploader.hpp @@ -1,7 +1,6 @@ #pragma once #include "backend/gpu/upload/vk_upload_context.hpp" -#include "backend/profiling/upload_profiler.hpp" #include #include @@ -16,24 +15,16 @@ struct InstanceUploadResult { class VkInstanceUploader { public: - bool init(VkUploadContext *upload, UploadProfiler *profiler) { - m_upload = upload; - m_profiler = profiler; - return m_upload != nullptr; - } - void shutdown() noexcept { - m_upload = nullptr; - m_profiler = nullptr; - } + bool init(); + void shutdown() noexcept; - InstanceUploadResult uploadMat4Instances(VkBuffer instanceBuffer, + // TODO: make cursorInstances multi threaded for parallelized + // batching/instance writes + InstanceUploadResult uploadMat4Instances(VkUploadContext::Recorder recorder, + VkBuffer instanceBuffer, VkDeviceSize frameBaseBytes, VkDeviceSize frameStrideBytes, uint32_t maxInstancesPerFrame, uint32_t &cursorInstances, std::span models); - -private: - VkUploadContext *m_upload = nullptr; // non-owning - UploadProfiler *m_profiler = nullptr; // non-owning }; diff --git a/src/backend/gpu/upload/vk_material_uploader.cpp b/src/backend/gpu/upload/vk_material_uploader.cpp index 1780165..09e4a48 100644 --- a/src/backend/gpu/upload/vk_material_uploader.cpp +++ b/src/backend/gpu/upload/vk_material_uploader.cpp @@ -1,7 +1,7 @@ #include "backend/gpu/upload/vk_material_uploader.hpp" #include "backend/gpu/upload/vk_upload_context.hpp" -#include "backend/profiling/upload_profiler.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" #include "render/resources/material_gpu.hpp" #include @@ -9,16 +9,22 @@ #include #include -bool VkMaterialUploader::uploadOne(VkBuffer materialBuffer, +bool VkMaterialUploader::init() { return true; } + +void VkMaterialUploader::shutdown() noexcept {} + +bool VkMaterialUploader::uploadOne(VkUploadContext::Recorder recorder, + VkBuffer materialBuffer, VkDeviceSize dstOffsetBytes, - const MaterialGPU &material) { - if (m_upload == nullptr || materialBuffer == VK_NULL_HANDLE) { + const MaterialGPU &material, + VkPipelineStageFlags dstStage) { + if (!recorder || materialBuffer == VK_NULL_HANDLE) { return false; } constexpr VkDeviceSize bytes = sizeof(MaterialGPU); - VkStagingAlloc stage = m_upload->allocStaging(bytes, /*alignment=*/16); + VkStagingAlloc stage = recorder.allocStaging(bytes, /*alignment=*/16); if (!stage) { std::cerr << "[MaterialUploader] allocStaging failed\n"; return false; @@ -26,22 +32,16 @@ bool VkMaterialUploader::uploadOne(VkBuffer materialBuffer, std::memcpy(stage.ptr, &material, sizeof(MaterialGPU)); - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::UploadMemcpyCount, 1); - profilerAdd(m_profiler, UploadProfiler::Stat::UploadMemcpyBytes, bytes); - } + PROFILE_UPLOAD_INC(UploadProfiler::Stat::UploadMemcpyCount); + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::UploadMemcpyBytes, bytes); - m_upload->cmdCopyToBuffer(materialBuffer, dstOffsetBytes, stage.offset, - bytes); - m_upload->cmdBarrierBufferTransferToShader( - materialBuffer, dstOffsetBytes, bytes, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + recorder.cmdCopyToBuffer(materialBuffer, dstOffsetBytes, stage.offset, bytes); + recorder.cmdBarrierBufferTransferToShader(materialBuffer, dstOffsetBytes, + bytes, dstStage); - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::MaterialUploadCount, 1); - profilerAdd(m_profiler, UploadProfiler::Stat::MaterialUploadBytes, - static_cast(bytes)); - } + PROFILE_UPLOAD_INC(UploadProfiler::Stat::MaterialUploadCount); + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::MaterialUploadBytes, + static_cast(bytes)); return true; } diff --git a/src/backend/gpu/upload/vk_material_uploader.hpp b/src/backend/gpu/upload/vk_material_uploader.hpp index f30eae5..a5a7e9c 100644 --- a/src/backend/gpu/upload/vk_material_uploader.hpp +++ b/src/backend/gpu/upload/vk_material_uploader.hpp @@ -1,28 +1,19 @@ #pragma once #include "backend/gpu/upload/vk_upload_context.hpp" -#include "backend/profiling/upload_profiler.hpp" #include "render/resources/material_gpu.hpp" #include +class UploadProfiler; + class VkMaterialUploader { public: - bool init(VkUploadContext *upload, UploadProfiler *profiler) { - m_upload = upload; - m_profiler = profiler; - return m_upload != nullptr; - } - - void shutdown() noexcept { - m_upload = nullptr; - m_profiler = nullptr; - } - - bool uploadOne(VkBuffer materialBuffer, VkDeviceSize dstOffsetBytes, - const MaterialGPU &material); + bool init(); + void shutdown() noexcept; -private: - VkUploadContext *m_upload = nullptr; // non-owning - UploadProfiler *m_profiler = nullptr; // non-owning + bool uploadOne( + VkUploadContext::Recorder recorder, VkBuffer materialBuffer, + VkDeviceSize dstOffsetBytes, const MaterialGPU &material, + VkPipelineStageFlags dstStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); }; diff --git a/src/backend/gpu/upload/vk_texture_uploader.cpp b/src/backend/gpu/upload/vk_texture_uploader.cpp index 5525e88..f7c9e2f 100644 --- a/src/backend/gpu/upload/vk_texture_uploader.cpp +++ b/src/backend/gpu/upload/vk_texture_uploader.cpp @@ -1,9 +1,10 @@ #include "vk_texture_uploader.hpp" +#include "backend/core/vk_backend_ctx.hpp" #include "backend/gpu/textures/vk_texture.hpp" #include "backend/gpu/textures/vk_texture_utils.hpp" #include "backend/gpu/upload/vk_upload_context.hpp" -#include "backend/profiling/upload_profiler.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" #include #include @@ -12,35 +13,20 @@ #include #include -// TODO take in backend ctx -bool VkTextureUploader::init(VmaAllocator allocator, VkDevice device, - VkUploadContext *upload, - UploadProfiler *profiler) { - if (allocator == nullptr || device == VK_NULL_HANDLE || - upload == VK_NULL_HANDLE) { - std::cerr << "[TextureUpload] Invalid init args\n"; - return false; - } - - m_allocator = allocator; - m_device = device; - m_upload = upload; - m_profiler = profiler; +bool VkTextureUploader::init(VkBackendCtx &ctx) { + m_ctx = &ctx; return true; } -void VkTextureUploader::shutdown() noexcept { - m_allocator = nullptr; - m_device = VK_NULL_HANDLE; - m_upload = nullptr; - m_profiler = nullptr; -} +void VkTextureUploader::shutdown() noexcept { m_ctx = nullptr; } -bool VkTextureUploader::uploadRGBA8(const void *rgbaPixels, uint32_t width, - uint32_t height, VkTexture2D &out) { - if (m_allocator == nullptr || m_upload == nullptr) { - std::cerr << "[TextureUpload] Not initialized\n"; +bool VkTextureUploader::uploadRGBA8(VkUploadContext::Recorder recorder, + const void *rgbaPixels, uint32_t width, + uint32_t height, VkTexture2D &out, + VkPipelineStageFlags finalStage) { + if (!recorder) { + std::cerr << "[TextureUpload] Invalid recorder\n"; return false; } @@ -49,10 +35,11 @@ bool VkTextureUploader::uploadRGBA8(const void *rgbaPixels, uint32_t width, return false; } - const VkDeviceSize size = static_cast(width) * - static_cast(height) * 4ULL; + VkDevice device = m_ctx->device(); - VkStagingAlloc stageAlloc = m_upload->allocStaging(size, /*alignment=*/16); + const VkDeviceSize size = VkDeviceSize(width) * VkDeviceSize(height) * 4ULL; + + VkStagingAlloc stageAlloc = recorder.allocStaging(size, /*alignment=*/16); if (!stageAlloc) { std::cerr << "[TextureUpload] Out of staging space (increase per-frame budget " @@ -62,45 +49,38 @@ bool VkTextureUploader::uploadRGBA8(const void *rgbaPixels, uint32_t width, std::memcpy(stageAlloc.ptr, rgbaPixels, static_cast(size)); - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::UploadMemcpyCount, 1); - profilerAdd(m_profiler, UploadProfiler::Stat::UploadMemcpyBytes, size); - } + PROFILE_UPLOAD_INC(UploadProfiler::Stat::UploadMemcpyCount); + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::UploadMemcpyBytes, size); out.shutdown(); // TODO: check for VK_FORMAT_R8G8B8A8_UNORM - // TODO: move to images/ - if (!out.image.init2D(m_allocator, width, height, VK_FORMAT_R8G8B8A8_SRGB, - VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT, - VK_IMAGE_TILING_OPTIMAL)) { + if (!out.image.init2D( + m_ctx->allocator(), width, height, VK_FORMAT_R8G8B8A8_SRGB, + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + VK_IMAGE_TILING_OPTIMAL)) { std::cerr << "[TextureUpload] Failed to create device-local image\n"; return false; } - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::TextureAllocatedBytes, size); - } + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::TextureAllocatedBytes, size); - m_upload->cmdUploadRGBA8ToImage(out.image.handle(), width, height, - stageAlloc.offset, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + recorder.cmdUploadRGBA8ToImage( + out.image.handle(), width, height, stageAlloc.offset, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, finalStage); - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::TextureUploadCount, 1); - profilerAdd(m_profiler, UploadProfiler::Stat::TextureUploadBytes, size); - } + PROFILE_UPLOAD_INC(UploadProfiler::Stat::TextureUploadCount); + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::TextureUploadBytes, size); - out.device = m_device; + out.device = device; - if (!vkCreateTextureView(m_device, out.image.handle(), - VK_FORMAT_R8G8B8A8_SRGB, out.view)) { + if (!vkCreateTextureView(device, out.image.handle(), VK_FORMAT_R8G8B8A8_SRGB, + out.view)) { out.shutdown(); return false; } - if (!vkCreateTextureSampler(m_device, out.sampler)) { + if (!vkCreateTextureSampler(device, out.sampler)) { out.shutdown(); return false; } diff --git a/src/backend/gpu/upload/vk_texture_uploader.hpp b/src/backend/gpu/upload/vk_texture_uploader.hpp index 495b1bf..0c472f3 100644 --- a/src/backend/gpu/upload/vk_texture_uploader.hpp +++ b/src/backend/gpu/upload/vk_texture_uploader.hpp @@ -1,10 +1,8 @@ #pragma once +#include "backend/core/vk_backend_ctx.hpp" #include "backend/gpu/textures/vk_texture.hpp" - -class VkUploadContext; -class VkCommands; -class UploadProfiler; +#include "backend/gpu/upload/vk_upload_context.hpp" #include #include @@ -12,18 +10,14 @@ class UploadProfiler; class VkTextureUploader { public: - bool init(VmaAllocator allocator, VkDevice device, VkUploadContext *upload, - UploadProfiler *profiler); + bool init(VkBackendCtx &ctx); void shutdown() noexcept; - bool uploadRGBA8(const void *rgbaPixels, uint32_t width, uint32_t height, - VkTexture2D &out); + bool uploadRGBA8( + VkUploadContext::Recorder recorder, const void *rgbaPixels, + uint32_t width, uint32_t height, VkTexture2D &out, + VkPipelineStageFlags finalStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); private: - VmaAllocator m_allocator = nullptr; // non-owning - VkDevice m_device = VK_NULL_HANDLE; // non-owning - - VkUploadContext *m_upload = nullptr; // non-owning - VkCommands *m_commands = nullptr; // non-owning - UploadProfiler *m_profiler = nullptr; // non-owning + VkBackendCtx *m_ctx = nullptr; // non-owning }; diff --git a/src/backend/gpu/upload/vk_upload_context.cpp b/src/backend/gpu/upload/vk_upload_context.cpp index 3de1927..3961717 100644 --- a/src/backend/gpu/upload/vk_upload_context.cpp +++ b/src/backend/gpu/upload/vk_upload_context.cpp @@ -2,13 +2,17 @@ #include "backend/core/vk_backend_ctx.hpp" #include "backend/gpu/buffers/vk_buffer.hpp" -#include "backend/profiling/upload_profiler.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" #include "util/vk_barrier.hpp" #include +#include #include #include +#include #include +#include +#include #include template static inline void freeArray(T *&p) noexcept { @@ -16,6 +20,38 @@ template static inline void freeArray(T *&p) noexcept { p = nullptr; } +VkUploadContext &VkUploadContext::operator=(VkUploadContext &&other) noexcept { + if (this == &other) { + return *this; + } + + shutdown(); + + m_ctx = std::exchange(other.m_ctx, nullptr); + m_mode = std::exchange(other.m_mode, Mode::FrameRing); + + m_framesInFlight = std::exchange(other.m_framesInFlight, 0); + m_threadCount = std::exchange(other.m_threadCount, 0); + m_bytesPerSlice = std::exchange(other.m_bytesPerSlice, 0); + + m_bufCopyAlign = std::exchange(other.m_bufCopyAlign, 1); + m_rowPitchAlign = std::exchange(other.m_rowPitchAlign, 1); + + m_staging = std::move(other.m_staging); + m_stagingMapped = std::exchange(other.m_stagingMapped, nullptr); + + m_pools = std::exchange(other.m_pools, nullptr); + m_cmds = std::exchange(other.m_cmds, nullptr); + m_fences = std::exchange(other.m_fences, nullptr); + m_heads = std::exchange(other.m_heads, nullptr); + m_begun = std::exchange(other.m_begun, nullptr); + m_hadWork = std::exchange(other.m_hadWork, nullptr); + + m_submitScratch = std::move(other.m_submitScratch); + + return *this; +} + VkDeviceSize VkUploadContext::alignUp(VkDeviceSize v, VkDeviceSize a) noexcept { if (a == 0) { return v; @@ -24,20 +60,36 @@ VkDeviceSize VkUploadContext::alignUp(VkDeviceSize v, VkDeviceSize a) noexcept { return (v + (a - 1)) & ~(a - 1); } -bool VkUploadContext::init(VkBackendCtx &ctx, uint32_t framesInflight, - VkDeviceSize perFrameBytes, - UploadProfiler *profiler) { - if (perFrameBytes == 0) { - std::cerr << "[UploadCtx] Invalid init args\n"; - return false; +VkCommandPool VkUploadContext::poolAt(uint32_t frameIndex, + uint32_t threadIndex) const noexcept { + if (m_pools == nullptr) { + return VK_NULL_HANDLE; + } + + return m_pools[idx(frameIndex, threadIndex)]; +} + +VkCommandBuffer VkUploadContext::cmdAt(uint32_t frameIndex, + uint32_t threadIndex) const noexcept { + if (m_cmds == nullptr) { + return VK_NULL_HANDLE; } + return m_cmds[idx(frameIndex, threadIndex)]; +} + +bool VkUploadContext::initCommon(VkBackendCtx &ctx, Mode mode, + uint32_t framesInflight, + VkDeviceSize bytesPerFrameSlice, + uint32_t threadCount) { shutdown(); m_ctx = &ctx; + m_mode = mode; + m_framesInFlight = framesInflight; - m_perFrameBytes = perFrameBytes; - m_profiler = profiler; + m_threadCount = threadCount; + m_bytesPerSlice = bytesPerFrameSlice; // Query device limits for alignment VkPhysicalDeviceProperties props{}; @@ -47,14 +99,30 @@ bool VkUploadContext::init(VkBackendCtx &ctx, uint32_t framesInflight, m_rowPitchAlign = std::max( 1, props.limits.optimalBufferCopyRowPitchAlignment); - const VkDeviceSize totalBytes = VkDeviceSize(framesInflight) * perFrameBytes; + const VkDeviceSize totalBytes = + VkDeviceSize(framesInflight) * m_bytesPerSlice; + const uint32_t poolCount = m_framesInFlight * m_threadCount; + + m_pools = (VkCommandPool *)std::calloc(poolCount, sizeof(VkCommandPool)); + m_cmds = (VkCommandBuffer *)std::calloc(poolCount, sizeof(VkCommandBuffer)); + m_fences = (VkFence *)std::calloc(m_framesInFlight, sizeof(VkFence)); + + m_heads = (std::atomic *)std::calloc( + m_framesInFlight, sizeof(std::atomic)); + m_begun = (uint8_t *)std::calloc(poolCount, sizeof(uint8_t)); + m_hadWork = (uint8_t *)std::calloc(poolCount, sizeof(uint8_t)); + + if (m_pools == nullptr || m_cmds == nullptr || m_fences == nullptr || + m_heads == nullptr || m_begun == nullptr || m_hadWork == nullptr) { + std::cerr << "[UploadCtx] Allocation failed\n"; + shutdown(); + return false; + } - // TODO: create per thread - // Create per frame - m_pools = (VkCommandPool *)std::calloc(framesInflight, sizeof(VkCommandPool)); - m_cmds = - (VkCommandBuffer *)std::calloc(framesInflight, sizeof(VkCommandBuffer)); - m_fences = (VkFence *)std::calloc(framesInflight, sizeof(VkFence)); + // Construct atomics + for (uint32_t i = 0; i < m_framesInFlight; ++i) { + new (&m_heads[i]) std::atomic(0); + } if (!m_staging.init(m_ctx->allocator(), totalBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, @@ -64,11 +132,8 @@ bool VkUploadContext::init(VkBackendCtx &ctx, uint32_t framesInflight, return false; } - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::StagingCreatedCount, 1); - profilerAdd(m_profiler, UploadProfiler::Stat::StagingAllocatedBytes, - totalBytes); - } + PROFILE_UPLOAD_INC(UploadProfiler::Stat::StagingCreatedCount); + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::StagingAllocatedBytes, totalBytes); { void *mapped = nullptr; @@ -83,39 +148,14 @@ bool VkUploadContext::init(VkBackendCtx &ctx, uint32_t framesInflight, m_stagingMapped = mapped; } - for (uint32_t i = 0; i < framesInflight; ++i) { - VkCommandPoolCreateInfo cmdPoolInfo{}; - cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - cmdPoolInfo.queueFamilyIndex = m_ctx->graphicsQueueFamily(); - - VkResult res = vkCreateCommandPool(m_ctx->device(), &cmdPoolInfo, nullptr, - &m_pools[i]); - if (res != VK_SUCCESS) { - std::cerr << "[UploadCtx] vkCreateComandPool failed: " << res << "\n"; - shutdown(); - return false; - } - - VkCommandBufferAllocateInfo cmdAllocInfo{}; - cmdAllocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - cmdAllocInfo.commandPool = m_pools[i]; - cmdAllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - cmdAllocInfo.commandBufferCount = 1; - - res = vkAllocateCommandBuffers(m_ctx->device(), &cmdAllocInfo, &m_cmds[i]); - if (res != VK_SUCCESS) { - std::cerr << "[UploadCtx] vkAllocateCommandBuffers failed: " << res - << "\n"; - shutdown(); - return false; - } - + // Fences per frame slot + for (uint32_t fi = 0; fi < framesInflight; ++fi) { VkFenceCreateInfo fenceInfo{}; fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; - res = vkCreateFence(m_ctx->device(), &fenceInfo, nullptr, &m_fences[i]); + VkResult res = + vkCreateFence(m_ctx->device(), &fenceInfo, nullptr, &m_fences[fi]); if (res != VK_SUCCESS) { std::cerr << "[UploadCtx] vkCreateFence failed: " << res << "\n"; shutdown(); @@ -123,9 +163,72 @@ bool VkUploadContext::init(VkBackendCtx &ctx, uint32_t framesInflight, } } + // Create per-frame/per-thread pools and cmd buffers + for (uint32_t fi = 0; fi < m_framesInFlight; ++fi) { + for (uint32_t ti = 0; ti < m_threadCount; ++ti) { + VkCommandPoolCreateInfo cmdPoolInfo{}; + cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + cmdPoolInfo.queueFamilyIndex = m_ctx->graphicsQueueFamily(); + + VkCommandPool pool = VK_NULL_HANDLE; + VkResult res = + vkCreateCommandPool(m_ctx->device(), &cmdPoolInfo, nullptr, &pool); + if (res != VK_SUCCESS) { + std::cerr << "[UploadCtx] vkCreateComandPool failed: " << res << "\n"; + shutdown(); + return false; + } + m_pools[idx(fi, ti)] = pool; + + VkCommandBufferAllocateInfo cmdAllocInfo{}; + cmdAllocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + cmdAllocInfo.commandPool = pool; + cmdAllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmdAllocInfo.commandBufferCount = 1; + + VkCommandBuffer cmd = VK_NULL_HANDLE; + res = vkAllocateCommandBuffers(m_ctx->device(), &cmdAllocInfo, &cmd); + if (res != VK_SUCCESS) { + std::cerr << "[UploadCtx] vkAllocateCommandBuffers failed: " << res + << "\n"; + shutdown(); + return false; + } + m_cmds[idx(fi, ti)] = cmd; + } + } + + m_submitScratch.clear(); + m_submitScratch.shrink_to_fit(); + m_submitScratch.reserve(m_threadCount); + return true; } +bool VkUploadContext::initFrameRing(VkBackendCtx &ctx, uint32_t framesInFlight, + VkDeviceSize bytesPerFrameSlice, + uint32_t threadCount) { + if (framesInFlight == 0 || bytesPerFrameSlice == 0 || threadCount == 0) { + std::cerr << "[UploadCtx] Invalid initFrameRing args\n"; + return false; + } + + return initCommon(ctx, Mode::FrameRing, framesInFlight, bytesPerFrameSlice, + threadCount); +} + +bool VkUploadContext::initOneShot(VkBackendCtx &ctx, VkDeviceSize totalBytes, + uint32_t threadCount) { + if (totalBytes == 0 || threadCount == 0) { + std::cerr << "[UploadCtx] Invalid initOnShot args\n"; + return false; + } + + return initCommon(ctx, Mode::OneShot, /*framesInflight=*/1, totalBytes, + threadCount); +} + void VkUploadContext::shutdown() noexcept { VkDevice device = VK_NULL_HANDLE; if (m_ctx != nullptr) { @@ -143,19 +246,26 @@ void VkUploadContext::shutdown() noexcept { } if (m_cmds != nullptr && m_pools != nullptr) { - for (uint32_t i = 0; i < m_framesInFlight; ++i) { - if (m_cmds[i] != VK_NULL_HANDLE && m_pools[i] != VK_NULL_HANDLE) { - vkFreeCommandBuffers(device, m_pools[i], 1, &m_cmds[i]); - m_cmds[i] = VK_NULL_HANDLE; + for (uint32_t fi = 0; fi < m_framesInFlight; ++fi) { + for (uint32_t ti = 0; ti < m_threadCount; ++ti) { + VkCommandBuffer cmd = m_cmds[idx(fi, ti)]; + VkCommandPool pool = m_pools[idx(fi, ti)]; + if (cmd != VK_NULL_HANDLE && pool != VK_NULL_HANDLE) { + vkFreeCommandBuffers(device, pool, 1, &cmd); + m_cmds[idx(fi, ti)] = VK_NULL_HANDLE; + } } } } if (m_pools != nullptr) { - for (uint32_t i = 0; i < m_framesInFlight; ++i) { - if (m_pools[i] != VK_NULL_HANDLE) { - vkDestroyCommandPool(device, m_pools[i], nullptr); - m_pools[i] = VK_NULL_HANDLE; + for (uint32_t fi = 0; fi < m_framesInFlight; ++fi) { + for (uint32_t ti = 0; ti < m_threadCount; ++ti) { + VkCommandPool pool = m_pools[idx(fi, ti)]; + if (pool != VK_NULL_HANDLE) { + vkDestroyCommandPool(device, pool, nullptr); + m_pools[idx(fi, ti)] = VK_NULL_HANDLE; + } } } } @@ -165,164 +275,266 @@ void VkUploadContext::shutdown() noexcept { vmaUnmapMemory(m_ctx->allocator(), m_staging.allocation()); } + if (m_heads != nullptr) { + for (uint32_t i = 0; i < m_framesInFlight; ++i) { + m_heads[i].~atomic(); + } + } + + freeArray(m_hadWork); + freeArray(m_begun); + freeArray(m_heads); freeArray(m_fences); freeArray(m_cmds); freeArray(m_pools); - m_fences = nullptr; - m_cmds = nullptr; - m_pools = nullptr; - - m_pool = VK_NULL_HANDLE; - m_cmd = VK_NULL_HANDLE; - m_stagingMapped = nullptr; m_staging.shutdown(); m_ctx = nullptr; - m_profiler = nullptr; - + m_mode = Mode::FrameRing; m_framesInFlight = 0; - m_frameIndex = 0; - m_perFrameBytes = 0; + m_threadCount = 0; + m_bytesPerSlice = 0; m_bufCopyAlign = 1; m_rowPitchAlign = 1; - m_sliceBase = 0; - m_sliceHead = 0; - m_recording = false; + m_submitScratch.clear(); } -bool VkUploadContext::beginCmd() { - // TODO: make single method for this, renderer, and command to use - VkCommandBufferBeginInfo bufBeginInfo{}; - bufBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - bufBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; +bool VkUploadContext::waitAndReset(uint32_t frameIndex) { + if (m_ctx == nullptr || m_pools == VK_NULL_HANDLE || m_fences == nullptr) { + std::cerr << "[UploadCtx] waitAndReset invalid state\n"; + return false; + } - VkResult res = vkBeginCommandBuffer(m_cmd, &bufBeginInfo); - if (res != VK_SUCCESS) { - std::cerr << "[UploadCtx] vkBeginCommandBuffer failed: " << res << "\n"; + if (frameIndex >= m_framesInFlight) { + std::cerr << "[UploadCtx] waitAndReset frameIndex out of range\n"; return false; } - return true; -} + VkFence fence = m_fences[frameIndex]; -bool VkUploadContext::endCmd() { - VkResult res = vkEndCommandBuffer(m_cmd); + // TODO: use timeline semaphore to not blocking wait + VkResult res = + vkWaitForFences(m_ctx->device(), 1, &fence, VK_TRUE, UINT64_MAX); if (res != VK_SUCCESS) { - std::cerr << "[UploadCtx] vkEndCommandBuffer failed: " << res << "\n"; + std::cerr << "[UploadCtx] vkWaitForFences failed: " << res << "\n"; return false; } + for (uint32_t ti = 0; ti < m_threadCount; ++ti) { + vkResetCommandPool(m_ctx->device(), m_pools[idx(frameIndex, ti)], 0); + } + + { + const uint32_t base = frameIndex * m_threadCount; + std::memset(m_begun + base, 0, m_threadCount * sizeof(uint8_t)); + std::memset(m_hadWork + base, 0, m_threadCount * sizeof(uint8_t)); + } + + m_heads[frameIndex].store(0, std::memory_order_release); + return true; } bool VkUploadContext::beginFrame(uint32_t frameIndex) { - if (m_ctx == nullptr || m_pools == VK_NULL_HANDLE || m_fences == nullptr) { - std::cerr << "[UploadCtx] beginFrame invalid state\n"; + if (m_mode != Mode::FrameRing) { + std::cerr << "[UploadCtx] beginFrame called on non-FrameRing context\n"; return false; } + return waitAndReset(frameIndex); +} - if (frameIndex >= m_framesInFlight) { - std::cerr << "[UploadCtx] beginFrame frameIndex out of range\n"; +bool VkUploadContext::beginBatch() { + if (m_mode != Mode::OneShot) { + std::cerr << "[UploadCtx] beginBatch called on non-OneShot context\n"; return false; } + return waitAndReset(0); +} - m_frameIndex = frameIndex; - m_pool = m_pools[frameIndex]; - m_cmd = m_cmds[frameIndex]; +VkUploadContext::Recorder VkUploadContext::recorder(uint32_t frameIndex, + uint32_t threadIndex) { + if (m_ctx == nullptr || m_stagingMapped == nullptr) { + return {}; + } - VkFence fence = m_fences[frameIndex]; + if (threadIndex >= m_threadCount) { + return {}; + } - // TODO: use timeline semaphore to not blocking wait - VkResult res = - vkWaitForFences(m_ctx->device(), 1, &fence, VK_TRUE, UINT64_MAX); + if (m_mode == Mode::OneShot) { + frameIndex = 0; + } + + if (frameIndex >= m_framesInFlight) { + return {}; + } + + return Recorder{this, frameIndex, threadIndex}; +} + +bool VkUploadContext::beginCmd(uint32_t frameIndex, uint32_t threadIndex) { + // TODO: make single method for this, renderer, and command to use + + VkCommandBuffer cmd = cmdAt(frameIndex, threadIndex); + if (cmd == VK_NULL_HANDLE) { + return false; + } + + VkCommandBufferBeginInfo bufBeginInfo{}; + bufBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + bufBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VkResult res = vkBeginCommandBuffer(cmd, &bufBeginInfo); if (res != VK_SUCCESS) { - std::cerr << "[UploadCtx] vkWaitForFences failed: " << res << "\n"; + std::cerr << "[UploadCtx] vkBeginCommandBuffer failed: " << res << "\n"; return false; } - vkResetCommandPool(m_ctx->device(), m_pools[frameIndex], 0); + m_begun[idx(frameIndex, threadIndex)] = 1; + return true; +} - m_sliceBase = VkDeviceSize(frameIndex) * m_perFrameBytes; - m_sliceHead = 0; +bool VkUploadContext::endCmd(uint32_t frameIndex, uint32_t threadIndex) { + VkCommandBuffer cmd = cmdAt(frameIndex, threadIndex); + if (cmd == VK_NULL_HANDLE) { + return false; + } - if (!beginCmd()) { + VkResult res = vkEndCommandBuffer(cmd); + if (res != VK_SUCCESS) { + std::cerr << "[UploadCtx] vkEndCommandBuffer failed: " << res << "\n"; return false; } - m_recording = true; - m_hadWork = false; return true; } -VkStagingAlloc VkUploadContext::allocStaging(VkDeviceSize size, +VkStagingAlloc VkUploadContext::allocStaging(uint32_t frameIndex, + VkDeviceSize size, VkDeviceSize alignment) { VkStagingAlloc out{}; - - if (!m_recording || m_stagingMapped == nullptr) { + if (m_stagingMapped == nullptr) { return out; } VkDeviceSize a = std::max(alignment, m_bufCopyAlign); - VkDeviceSize alignedHead = alignUp(m_sliceHead, a); + std::atomic &head = m_heads[frameIndex]; - if (alignedHead + size > m_perFrameBytes) { - return out; + // CAS loop to align without wasting slice space + for (;;) { + VkDeviceSize cur = head.load(std::memory_order_acquire); + VkDeviceSize aligned = alignUp(cur, a); + VkDeviceSize next = aligned + size; + + if (next > m_bytesPerSlice) { + return out; + } + + if (head.compare_exchange_weak(cur, next, std::memory_order_acq_rel, + std::memory_order_acquire)) { + VkDeviceSize absOffset = sliceBase(frameIndex) + aligned; + out.ptr = static_cast(m_stagingMapped) + absOffset; + out.offset = absOffset; + out.size = size; + + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::StagingUsedBytes, size); + + return out; + } + } +} + +bool VkUploadContext::submit(uint32_t frameIndex, bool wait) { + if (m_ctx == nullptr) { + return false; } - VkDeviceSize absOffset = m_sliceBase + alignedHead; - out.ptr = static_cast(m_stagingMapped) + absOffset; - out.offset = absOffset; - out.size = size; + m_submitScratch.clear(); - m_sliceHead = alignedHead + size; - m_hadWork = true; + for (uint32_t ti = 0; ti < m_threadCount; ++ti) { + const uint32_t k = idx(frameIndex, ti); + if (m_begun[k] == 0) { + continue; + } + + if (m_hadWork[k] == 0) { + if (!endCmd(frameIndex, ti)) { + return false; + } + continue; + } - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::StagingUsedBytes, size); + if (!endCmd(frameIndex, ti)) { + return false; + } + m_submitScratch.push_back(cmdAt(frameIndex, ti)); } - return out; -} + if (m_submitScratch.empty()) { + return true; + } -void VkUploadContext::cmdCopyToBuffer(VkBuffer dst, VkDeviceSize dstOffset, - VkDeviceSize srcOffset, - VkDeviceSize size) { - if (!m_recording) { - return; + VkFence fence = m_fences[frameIndex]; + VkResult res = vkResetFences(m_ctx->device(), 1, &fence); + if (res != VK_SUCCESS) { + std::cerr << "[UploadCtx] vkResetFences failed: " << res << "\n"; + return false; } - m_hadWork = true; + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = static_cast(m_submitScratch.size()); + submitInfo.pCommandBuffers = m_submitScratch.data(); - VkBufferCopy copy{}; - copy.srcOffset = srcOffset; - copy.dstOffset = dstOffset; - copy.size = size; - vkCmdCopyBuffer(m_cmd, m_staging.handle(), dst, 1, ©); + res = vkQueueSubmit(m_ctx->graphicsQueue(), 1, &submitInfo, fence); + if (res != VK_SUCCESS) { + std::cerr << "[UploadCtx] vkQueueSubmit failed: " << res << "\n"; + return false; + } + + PROFILE_UPLOAD_INC(UploadProfiler::Stat::UploadSubmitCount); + + if (wait) { + res = vkWaitForFences(m_ctx->device(), 1, &fence, VK_TRUE, UINT64_MAX); + if (res != VK_SUCCESS) { + std::cerr << "[UploadCtx] vkWaitForFences(wait) failed: " << res << "\n"; + return false; + } + } + + return true; } -void VkUploadContext::cmdBarrierBufferTransferToShader( - VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, - VkPipelineStageFlags dstStage) { +bool VkUploadContext::flushFrame(uint32_t frameIndex, bool wait) { + if (m_mode != Mode::FrameRing) { + std::cerr << "[UploadCtx] flushFrame called on non-FrameRing context\n"; + return false; + } - if (!m_recording) { - return; + if (frameIndex >= m_framesInFlight) { + return false; } - util::cmdBufferBarrier( - m_cmd, buffer, offset, size, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, dstStage); + return submit(frameIndex, wait); } -void VkUploadContext::transitionImage(VkImage image, VkImageLayout oldLayout, - VkImageLayout newLayout) { - if (!m_recording) { - return; +bool VkUploadContext::flushBatch(bool wait) { + if (m_mode != Mode::OneShot) { + std::cerr << "[UploadCtx] flushBatch called on non-OneShot context\n"; + return false; } + return submit(0, wait); +} + +void VkUploadContext::transitionImage(VkCommandBuffer cmd, VkImage image, + VkImageLayout oldLayout, + VkImageLayout newLayout, + VkPipelineStageFlags finalStage) { VkPipelineStageFlags srcStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; VkPipelineStageFlags dstStage = VK_PIPELINE_STAGE_TRANSFER_BIT; VkAccessFlags srcAccess = 0; @@ -337,8 +549,7 @@ void VkUploadContext::transitionImage(VkImage image, VkImageLayout oldLayout, } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { srcStage = VK_PIPELINE_STAGE_TRANSFER_BIT; - dstStage = - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; // or make this configurable + dstStage = finalStage; srcAccess = VK_ACCESS_TRANSFER_WRITE_BIT; dstAccess = VK_ACCESS_SHADER_READ_BIT; } else { @@ -347,90 +558,114 @@ void VkUploadContext::transitionImage(VkImage image, VkImageLayout oldLayout, return; } - util::cmdImageBarrier(m_cmd, image, oldLayout, newLayout, srcAccess, - dstAccess, srcStage, dstStage, - VK_IMAGE_ASPECT_COLOR_BIT); + util::cmdImageBarrier(cmd, image, oldLayout, newLayout, srcAccess, dstAccess, + srcStage, dstStage, VK_IMAGE_ASPECT_COLOR_BIT); } -void VkUploadContext::cmdUploadRGBA8ToImage(VkImage image, uint32_t width, - uint32_t height, - VkDeviceSize srcOffset, - VkImageLayout finalLayout) { - if (!m_recording) { - return; +VkCommandBuffer VkUploadContext::Recorder::cmd() const noexcept { + if (m_ctx == nullptr) { + return VK_NULL_HANDLE; } - transitionImage(image, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - VkBufferImageCopy region{}; - region.bufferOffset = srcOffset; - region.bufferRowLength = 0; // tightly packed - region.bufferImageHeight = 0; // tightly packed - region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - region.imageSubresource.mipLevel = 0; - region.imageSubresource.baseArrayLayer = 0; - region.imageSubresource.layerCount = 1; - region.imageOffset = VkOffset3D{0, 0, 0}; - region.imageExtent = VkExtent3D{width, height, 1U}; + return m_ctx->cmdAt(m_frameIndex, m_threadIndex); +} - vkCmdCopyBufferToImage(m_cmd, m_staging.handle(), image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); +void VkUploadContext::Recorder::ensureBegun() { + const uint32_t k = m_ctx->idx(m_frameIndex, m_threadIndex); + if (m_ctx->m_begun[k] != 0) { + return; + } - transitionImage(image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, finalLayout); + (void)m_ctx->beginCmd(m_frameIndex, m_threadIndex); } -bool VkUploadContext::flush(bool wait) { - if (!m_recording) { - return true; +VkStagingAlloc VkUploadContext::Recorder::allocStaging(VkDeviceSize size, + VkDeviceSize alignment) { + if (m_ctx == nullptr) { + return {}; } - // End the recording scope if no work done - if (!m_hadWork) { - if (!endCmd()) { - return false; - } + ensureBegun(); - m_recording = false; - return true; + VkStagingAlloc a = m_ctx->allocStaging(m_frameIndex, size, alignment); + + if (a) { + const uint32_t k = m_ctx->idx(m_frameIndex, m_threadIndex); + m_ctx->m_hadWork[k] = 1; } - m_recording = false; + return a; +} - if (!endCmd()) { - return false; - } +void VkUploadContext::Recorder::cmdCopyToBuffer(VkBuffer dst, + VkDeviceSize dstOffset, + VkDeviceSize srcOffset, + VkDeviceSize size) { + ensureBegun(); - VkFence fence = m_fences[m_frameIndex]; - VkResult res = vkResetFences(m_ctx->device(), 1, &fence); - if (res != VK_SUCCESS) { - std::cerr << "[UploadCtx] vkResetFences failed: " << res << "\n"; - return false; + VkCommandBuffer c = cmd(); + if (c == VK_NULL_HANDLE) { + return; } - VkSubmitInfo submitInfo{}; - submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submitInfo.commandBufferCount = 1; - VkCommandBuffer cmd = m_cmds[m_frameIndex]; - submitInfo.pCommandBuffers = &cmd; + const uint32_t k = m_ctx->idx(m_frameIndex, m_threadIndex); + m_ctx->m_hadWork[k] = 1; - res = vkQueueSubmit(m_ctx->graphicsQueue(), 1, &submitInfo, fence); - if (res != VK_SUCCESS) { - std::cerr << "[UploadCtx] vkQueueSubmit failed: " << res << "\n"; - return false; - } + VkBufferCopy copy{}; + copy.srcOffset = srcOffset; + copy.dstOffset = dstOffset; + copy.size = size; + vkCmdCopyBuffer(c, m_ctx->m_staging.handle(), dst, 1, ©); +} + +void VkUploadContext::Recorder::cmdBarrierBufferTransferToShader( + VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, + VkPipelineStageFlags dstStage) { + ensureBegun(); - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::UploadSubmitCount, 1); + VkCommandBuffer c = cmd(); + if (c == VK_NULL_HANDLE) { + return; } - if (wait) { - vkWaitForFences(m_ctx->device(), 1, &fence, VK_TRUE, UINT64_MAX); - if (res != VK_SUCCESS) { - std::cerr << "[UploadCtx] vkWaitForFences(wait) failed: " << res << "\n"; - return false; - } + const uint32_t k = m_ctx->idx(m_frameIndex, m_threadIndex); + m_ctx->m_hadWork[k] = 1; + + util::cmdBufferBarrier(c, buffer, offset, size, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, dstStage); +} + +void VkUploadContext::Recorder::cmdUploadRGBA8ToImage( + VkImage image, uint32_t width, uint32_t height, VkDeviceSize srcOffset, + VkImageLayout finalLayout, VkPipelineStageFlags finalStage) { + ensureBegun(); + + VkCommandBuffer c = cmd(); + if (c == VK_NULL_HANDLE) { + return; } - return true; + const uint32_t k = m_ctx->idx(m_frameIndex, m_threadIndex); + m_ctx->m_hadWork[k] = 1; + + m_ctx->transitionImage(c, image, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, finalStage); + + VkBufferImageCopy region{}; + region.bufferOffset = srcOffset; + region.bufferRowLength = 0; // tightly packed + region.bufferImageHeight = 0; // tightly packed + region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset = VkOffset3D{0, 0, 0}; + region.imageExtent = VkExtent3D{width, height, 1U}; + + vkCmdCopyBufferToImage(c, m_ctx->m_staging.handle(), image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + + m_ctx->transitionImage(c, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + finalLayout, finalStage); } diff --git a/src/backend/gpu/upload/vk_upload_context.hpp b/src/backend/gpu/upload/vk_upload_context.hpp index 8316b5e..6589907 100644 --- a/src/backend/gpu/upload/vk_upload_context.hpp +++ b/src/backend/gpu/upload/vk_upload_context.hpp @@ -2,10 +2,11 @@ #include "backend/core/vk_backend_ctx.hpp" #include "backend/gpu/buffers/vk_buffer.hpp" -#include "backend/profiling/upload_profiler.hpp" +#include #include #include +#include #include struct VkStagingAlloc { @@ -18,110 +19,136 @@ struct VkStagingAlloc { // TODO: check for transfer queue in queue family and use it // TODO: use timeline semaphore values to know when upload is complete // instead of offloading submits to a different command buffer - class VkUploadContext { public: - VkUploadContext() = default; - ~VkUploadContext() noexcept { shutdown(); } + enum class Mode : uint8_t { + FrameRing, // slice per frameIndex in [0, framesInflight) + OneShot, // single reusable batch (not per frame) + }; - VkUploadContext(const VkUploadContext &) = delete; - VkUploadContext &operator=(const VkUploadContext &) = delete; + class Recorder { + public: + Recorder() = default; - VkUploadContext(VkUploadContext &&other) noexcept { - *this = std::move(other); - } - VkUploadContext &operator=(VkUploadContext &&other) noexcept { - if (this == &other) { - return *this; - } + // Allocate space in the staging slice for the current frame. + VkStagingAlloc allocStaging(VkDeviceSize size, VkDeviceSize alignment = 16); + + // Record a copy from staging -> buffer. + void cmdCopyToBuffer(VkBuffer dst, VkDeviceSize dstOffset, + VkDeviceSize srcOffset, VkDeviceSize size); - shutdown(); + // Record a buffer -> image upload for RGBA8 with layout transition: + // UNDEFINED -> TRANSFER_DST_OPTIMAL -> finalLayout + void cmdUploadRGBA8ToImage(VkImage image, uint32_t width, uint32_t height, + VkDeviceSize srcOffset, + VkImageLayout finalLayout, + VkPipelineStageFlags finalStage); - m_ctx = std::exchange(other.m_ctx, nullptr); - m_profiler = std::exchange(other.m_profiler, nullptr); + void cmdBarrierBufferTransferToShader(VkBuffer buffer, VkDeviceSize offset, + VkDeviceSize size, + VkPipelineStageFlags dstStage); - m_framesInFlight = std::exchange(other.m_framesInFlight, 0); - m_frameIndex = std::exchange(other.m_frameIndex, 0); - m_perFrameBytes = std::exchange(other.m_perFrameBytes, 0); + [[nodiscard]] VkCommandBuffer cmd() const noexcept; - m_bufCopyAlign = std::exchange(other.m_bufCopyAlign, 1); - m_rowPitchAlign = std::exchange(other.m_rowPitchAlign, 1); + explicit operator bool() const noexcept { return m_ctx != nullptr; } - m_staging = std::move(other.m_staging); - m_stagingMapped = std::exchange(other.m_stagingMapped, nullptr); + private: + friend class VkUploadContext; + Recorder(VkUploadContext *ctx, uint32_t frameIndex, uint32_t threadIndex) + : m_ctx(ctx), m_frameIndex(frameIndex), m_threadIndex(threadIndex) {} - m_pools = std::exchange(other.m_pools, nullptr); - m_cmds = std::exchange(other.m_cmds, nullptr); - m_pool = std::exchange(other.m_pool, VK_NULL_HANDLE); - m_cmd = std::exchange(other.m_cmd, VK_NULL_HANDLE); + void ensureBegun(); - m_fences = std::exchange(other.m_fences, nullptr); + VkUploadContext *m_ctx = nullptr; // non-owning + uint32_t m_frameIndex = 0; + uint32_t m_threadIndex = 0; + }; + + VkUploadContext() = default; + ~VkUploadContext() noexcept { shutdown(); } - m_sliceBase = std::exchange(other.m_sliceBase, 0); - m_sliceHead = std::exchange(other.m_sliceHead, 0); - m_recording = std::exchange(other.m_recording, false); + VkUploadContext(const VkUploadContext &) = delete; + VkUploadContext &operator=(const VkUploadContext &) = delete; - return *this; + VkUploadContext(VkUploadContext &&other) noexcept { + *this = std::move(other); } + VkUploadContext &operator=(VkUploadContext &&other) noexcept; // perFrameBytes: bytes reserved for each frame slice - bool init(VkBackendCtx &ctx, uint32_t framesInflight, - VkDeviceSize perFrameBytes, UploadProfiler *profiler); + bool initFrameRing(VkBackendCtx &ctx, uint32_t framesInFlight, + VkDeviceSize bytesPerFrameSlice, uint32_t threadCount); + + bool initOneShot(VkBackendCtx &ctx, VkDeviceSize totalBytes, + uint32_t threadCount); + void shutdown() noexcept; + [[nodiscard]] Mode mode() const noexcept { return m_mode; } + [[nodiscard]] uint32_t framesInFlight() const noexcept { + return m_framesInFlight; + } + [[nodiscard]] uint32_t threadCount() const noexcept { return m_threadCount; } + + [[nodiscard]] VkBuffer stagingBuffer() const noexcept { + return m_staging.handle(); + } + [[nodiscard]] VkDeviceSize bytesPerSlice() const noexcept { + return m_bytesPerSlice; + } + // This waits for the fence associated with this frame slice, // resets the cmd pool, and beings recording. bool beginFrame(uint32_t frameIndex); + bool beginBatch(); - // Allocate space in the staging slice for the current frame. - VkStagingAlloc allocStaging(VkDeviceSize size, VkDeviceSize alignment = 16); + // Get a recorder for this frame/batch and thread index + // Note: for OneShot mode, frameIndex is ignored (use 0) + [[nodiscard]] Recorder recorder(uint32_t frameIndex, uint32_t threadIndex); - // Record a copy from staging -> buffer. - void cmdCopyToBuffer(VkBuffer dst, VkDeviceSize dstOffset, - VkDeviceSize srcOffset, VkDeviceSize size); + // If wait=true, wait for completion. + bool flushFrame(uint32_t frameIndex, bool wait); + bool flushBatch(bool wait); - // Record a buffer -> image upload for RGBA8 with layout transition: - // UNDEFINED -> TRANSFER_DST_OPTIMAL -> finalLayout - void cmdUploadRGBA8ToImage(VkImage image, uint32_t width, uint32_t height, - VkDeviceSize srcOffset, VkImageLayout finalLayout); +private: + bool initCommon(VkBackendCtx &ctx, Mode mode, uint32_t framesInflight, + VkDeviceSize bytesPerFrameSlice, uint32_t threadCount); - void cmdBarrierBufferTransferToShader(VkBuffer buffer, VkDeviceSize offset, - VkDeviceSize size, - VkPipelineStageFlags dstStage); + static VkDeviceSize alignUp(VkDeviceSize v, VkDeviceSize a) noexcept; - // If wait=true, wait for completion. - bool flush(bool wait); + bool waitAndReset(uint32_t frameIndex); + bool submit(uint32_t frameIndex, bool wait); - [[nodiscard]] VkCommandBuffer cmd() const noexcept { - return (m_cmds != nullptr && m_frameIndex < m_framesInFlight) - ? m_cmds[m_frameIndex] - : VK_NULL_HANDLE; - } - [[nodiscard]] VkBuffer stagingBuffer() const noexcept { - return m_staging.handle(); - } - [[nodiscard]] VkDeviceSize perFrameBytes() const noexcept { - return m_perFrameBytes; + bool beginCmd(uint32_t frameIndex, uint32_t threadIndex); + bool endCmd(uint32_t frameIndex, uint32_t threadIndex); + + VkStagingAlloc allocStaging(uint32_t frameIndex, VkDeviceSize size, + VkDeviceSize alignment); + + void transitionImage(VkCommandBuffer cmd, VkImage image, + VkImageLayout oldLayout, VkImageLayout newLayout, + VkPipelineStageFlags finalStage); + + [[nodiscard]] uint32_t idx(uint32_t frameIndex, + uint32_t threadIndex) const noexcept { + return (frameIndex * m_threadCount) + threadIndex; } - [[nodiscard]] uint32_t framesInflight() const noexcept { - return m_framesInFlight; + [[nodiscard]] VkDeviceSize sliceBase(uint32_t frameIndex) const noexcept { + return VkDeviceSize(frameIndex) * m_bytesPerSlice; } -private: - static VkDeviceSize alignUp(VkDeviceSize v, VkDeviceSize a) noexcept; - - void transitionImage(VkImage image, VkImageLayout oldLayout, - VkImageLayout newLayout); + [[nodiscard]] VkCommandPool poolAt(uint32_t frameIndex, + uint32_t threadIndex) const noexcept; + [[nodiscard]] VkCommandBuffer cmdAt(uint32_t frameIndex, + uint32_t threadIndex) const noexcept; - bool beginCmd(); - bool endCmd(); + VkBackendCtx *m_ctx = nullptr; // non-owning - VkBackendCtx *m_ctx = nullptr; // non-owning - UploadProfiler *m_profiler = nullptr; // non-owning + Mode m_mode = Mode::FrameRing; uint32_t m_framesInFlight = 0; - uint32_t m_frameIndex = 0; - VkDeviceSize m_perFrameBytes = 0; + uint32_t m_threadCount = 0; + VkDeviceSize m_bytesPerSlice = 0; VkDeviceSize m_bufCopyAlign = 1; VkDeviceSize m_rowPitchAlign = 1; @@ -129,16 +156,15 @@ class VkUploadContext { VkBufferObj m_staging; void *m_stagingMapped = nullptr; - // TODO: make command pool and buffer per thread instead of per frame - VkCommandPool *m_pools = VK_NULL_HANDLE; - VkCommandBuffer *m_cmds = VK_NULL_HANDLE; - VkCommandPool m_pool = VK_NULL_HANDLE; - VkCommandBuffer m_cmd = VK_NULL_HANDLE; - + VkCommandPool *m_pools = nullptr; + VkCommandBuffer *m_cmds = nullptr; VkFence *m_fences = nullptr; - VkDeviceSize m_sliceBase = 0; - VkDeviceSize m_sliceHead = 0; - bool m_recording = false; - bool m_hadWork = false; + // Per-frame atomic head into slice (offset within slice) + std::atomic *m_heads = nullptr; + + uint8_t *m_begun = nullptr; + uint8_t *m_hadWork = nullptr; + + std::vector m_submitScratch; }; diff --git a/src/backend/graphics/vk_pipeline.cpp b/src/backend/graphics/vk_pipeline.cpp index 93b278f..6f0c333 100644 --- a/src/backend/graphics/vk_pipeline.cpp +++ b/src/backend/graphics/vk_pipeline.cpp @@ -12,26 +12,13 @@ #include #include -DEFINE_TU_LOGGER("Backend.Graphics.Pipeline"); -#define LOG_TU_LOGGER() ThisLogger() - bool VkGraphicsPipeline::init(VkDevice device, VkFormat colorFormat, VkFormat depthFormat, VkPipelineLayout pipelineLayout, const std::string &vertSpvPath, const std::string &fragSpvPath) { - if (depthFormat == VK_FORMAT_UNDEFINED) { - LOGE("depthFormat is undefeind"); - return false; - } - - if (colorFormat == VK_FORMAT_UNDEFINED) { - LOGE("colorFormat is undefeind"); - return false; - } - if (pipelineLayout == VK_NULL_HANDLE) { - std::cerr << "[Pipeline] pipelineLayout is null\n"; + LOGE("pipelineLayout is null"); return false; } diff --git a/src/backend/presentation/vk_presenter.cpp b/src/backend/presentation/vk_presenter.cpp index 3da5327..e2d56be 100644 --- a/src/backend/presentation/vk_presenter.cpp +++ b/src/backend/presentation/vk_presenter.cpp @@ -1,21 +1,21 @@ #include "vk_presenter.hpp" #include "backend/core/vk_backend_ctx.hpp" +#include "engine/logging/log.hpp" #include "platform/window/glfw_window.hpp" #include -#include #include bool VkPresenter::init(VkBackendCtx &ctx, GlfwWindow *window, uint32_t width, uint32_t height) { if (window == nullptr) { - std::cerr << "[Presenter] window is null\n"; + LOGE("Window is null"); return false; } if (width == 0 || height == 0) { - std::cerr << "[Presenter] window width and height are 0"; + LOGE("Window width and height are 0"); return false; } @@ -25,19 +25,19 @@ bool VkPresenter::init(VkBackendCtx &ctx, GlfwWindow *window, uint32_t width, m_window = window; if (!m_window->createVulkanSurface(m_ctx->instance(), m_surface)) { - std::cerr << "[Presenter] create surface failed\n"; + LOGE("Surface creation failed"); shutdown(); return false; } if (!m_swapchain.init(*m_ctx, m_surface, width, height)) { - std::cerr << "[Presenter] swapchain init failed\n"; + LOGE("Swapchain initialization failed"); shutdown(); return false; } if (!m_swapchain.createSwapchainImageViews(m_ctx->device())) { - std::cerr << "[Presenter] swapchain image views creation failed\n"; + LOGE("Swapchain image views creation failed"); shutdown(); return false; } @@ -60,6 +60,7 @@ void VkPresenter::shutdown() noexcept { } if (instance != VK_NULL_HANDLE && m_surface != VK_NULL_HANDLE) { + LOGD("Destroying surface"); vkDestroySurfaceKHR(instance, m_surface, nullptr); } diff --git a/src/backend/presentation/vk_presenter.hpp b/src/backend/presentation/vk_presenter.hpp index d701db4..4f85a20 100644 --- a/src/backend/presentation/vk_presenter.hpp +++ b/src/backend/presentation/vk_presenter.hpp @@ -10,6 +10,34 @@ #include // Owns VulkanSwapchain and VkSurfaceKHR +/** + * @brief Owns the presentation surface (VkSurfaceKHR) and swapchain resources + * for a window + * + * Responsibilities: + * - Create/destroy the window presentation surface (VkSurfaceKHR) + * - Create/destroy the swapchain and its image views via VkSwapchain + * - Recreate the swapchain when the framebuffer size changes or when the + * swapchain become out-of-date + * + * Typical usage: + * - init() once after the Vulkan device and window are created + * - On resize/out-of-date/suboptimal events, call recreateSwapchain() + * - shutdown() on teardown (idempotent) + * + * Ownership: + * - VkPresenter owns VkSurfaceKHR + * - VkPresenter owns VkSwapchain + * - VkBackendCtx and GlfwWindow are non-owning and must outlive VkPresenter + * + * Recreation behavior: + * - recreateSwapchain() queries the current framebuffer size from the window + * - If minimized (0x0 framebuffer), recreation is skipped and returns false + * + * Lifetime: + * - init() must be called before use + * - shutdown() is idempotent + */ class VkPresenter { public: VkPresenter() = default; @@ -34,10 +62,40 @@ class VkPresenter { return *this; } + /** + * @brief Creates the window surface and initializes the swapchain + * + * Responsibilities: + * - shutdown() any existing resources + * - Create VkSurfaceKHR from the window + * - Initialize VkSwapchain and create swapchain image views + * + * Preconditions: + * - window != nullptr + * - width > 0 and height > 0 + * - ctx is initialized + * + * Postconditions: + * - m_surface != VK_NULL_HANDLE + * - swapchain image views are created and accessible via colorViews() + * + * + * @return true on success; false on failure. On failure, the object remains + * in shutdown-safe state + */ bool init(VkBackendCtx &ctx, GlfwWindow *window, uint32_t width, uint32_t height); void shutdown() noexcept; + /** + * @brief Recreates the swapchain and its image views using the current + * framebuffer size + * + * Return false if: + * - The presenter is not initialized + * - The window is minimized + * - Swapchain recreation fails + */ [[nodiscard]] bool recreateSwapchain(); [[nodiscard]] VkFormat colorFormat() const { diff --git a/src/backend/presentation/vk_swapchain.cpp b/src/backend/presentation/vk_swapchain.cpp index c85bd2e..968bb39 100644 --- a/src/backend/presentation/vk_swapchain.cpp +++ b/src/backend/presentation/vk_swapchain.cpp @@ -4,17 +4,15 @@ #include #include -#include +#include +#include #include #include -DEFINE_TU_LOGGER("Backend.Instance"); -#define LOG_TU_LOGGER() ThisLogger() - -VkSwapchain::SwapChainSupportDetails +VkSwapchain::SwapchainSupportDetails VkSwapchain::querySwapChainSupport(VkPhysicalDevice device, VkSurfaceKHR surface) { - SwapChainSupportDetails details; + SwapchainSupportDetails details; vkGetPhysicalDeviceSurfaceCapabilitiesKHR(device, surface, &details.capabilities); @@ -36,25 +34,21 @@ VkSwapchain::querySwapChainSupport(VkPhysicalDevice device, device, surface, &presentModeCount, details.presentModes.data()); } - LOGD("Swapchain Capabilites: minImageCount={}, maxImageCount={}", - details.capabilities.minImageCount, details.capabilities.maxImageCount); - - std::cout << "[Swapchain] Capabilites:" - << " minImageCount=" << details.capabilities.minImageCount - << " maxImageCount=" << details.capabilities.maxImageCount - << " currentExtent=(" << details.capabilities.currentExtent.width - << "x" << details.capabilities.currentExtent.height << ")\n"; + LOGD("Swapchain Capabilites: minImageCount={}, maxImageCount={}, " + "currentExtent=({}x{})", + details.capabilities.minImageCount, details.capabilities.maxImageCount, + details.capabilities.currentExtent.width, + details.capabilities.currentExtent.height); - std::cout << "[Swapchain] Available formats: " << formatCount << "\n"; + LOGT("Available swapchain formats: {}", formatCount); for (const auto &f : details.formats) { - std::cout << " format=" << f.format << " colorSpace=" << f.colorSpace - << "\n"; + LOGT(" format={} colorSpace={}", fmt::underlying(f.format), + fmt::underlying(f.colorSpace)); } - std::cout << "[Swapchain] Available present modes: " << presentModeCount - << "\n"; + LOGT("Available swapchain present modes {}", presentModeCount); for (const auto &pm : details.presentModes) { - std::cout << " presentMode=" << pm << "\n"; + LOGT("presentMode={}", fmt::underlying(pm)); } return details; @@ -62,14 +56,13 @@ VkSwapchain::querySwapChainSupport(VkPhysicalDevice device, VkSurfaceFormatKHR VkSwapchain::chooseSwapSurfaceFormat( const std::vector &availableFormats) { - // If architecture allows choosing of any format if (availableFormats.size() == 1 && availableFormats[0].format == VK_FORMAT_UNDEFINED) { VkSurfaceFormatKHR format{}; format.format = VK_FORMAT_B8G8R8A8_SRGB; format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; - std::cout << "[Swapchain] Chose perferred format (UNDEFINED -> default): " - << format.format << " / " << format.colorSpace << "\n"; + LOGI("Chose prefered format: {} / {}", fmt::underlying(format.format), + fmt::underlying(format.colorSpace)); return format; } @@ -78,9 +71,9 @@ VkSurfaceFormatKHR VkSwapchain::chooseSwapSurfaceFormat( for (const auto &availableFormat : availableFormats) { if (availableFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR && availableFormat.format == VK_FORMAT_B8G8R8A8_SRGB) { - std::cout << "[Swapchain] Chose perferred sRGB format: " - << availableFormat.format << " / " << availableFormat.colorSpace - << "\n"; + LOGI("Chose BGRA8 + sRGB_NONLINEAR format: {} / {}", + fmt::underlying(availableFormat.format), + fmt::underlying(availableFormat.colorSpace)); return availableFormat; } } @@ -89,17 +82,17 @@ VkSurfaceFormatKHR VkSwapchain::chooseSwapSurfaceFormat( for (const auto &availableFormat : availableFormats) { if (availableFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR && availableFormat.format == VK_FORMAT_B8G8R8A8_UNORM) { - std::cout << "[Swapchain] Chose UNORM + sRGB_NONLINEAR: " - << availableFormat.format << " / " << availableFormat.colorSpace - << "\n"; + LOGI("Chose UNORM + sRGB_NONLINEAR: {} / {}", + fmt::underlying(availableFormat.format), + fmt::underlying(availableFormat.colorSpace)); return availableFormat; } } // Fallback to first available format - std::cout << "[Swapchain] Using fallback format: " - << availableFormats[0].format << " / " - << availableFormats[0].colorSpace << "\n"; + LOGI("Using fallback format: {} / {}", + fmt::underlying(availableFormats[0].format), + fmt::underlying(availableFormats[0].colorSpace)); return availableFormats[0]; } @@ -110,15 +103,15 @@ VkPresentModeKHR VkSwapchain::chooseSwapPresentMode( // (https://github.com/KhronosGroup/MoltenVK/issues/581#issuecomment-488903202) for (const auto &presentMode : availablePresentModes) { if (presentMode == VK_PRESENT_MODE_MAILBOX_KHR) { - std::cout << "[Swapchain] Chose present mode: MAILBOX\n"; + LOGI("Chose present mode MAILBOX"); return presentMode; } } - // Fallback: FIFO since it is guaranteed to be supported + // Fallback to FIFO since it is guaranteed to be supported for (const auto &presentMode : availablePresentModes) { if (presentMode == VK_PRESENT_MODE_FIFO_KHR) { - std::cout << "[Swapchain] Chose present mode: FIFO\n"; + LOGI("Chose present mode FIFO"); return presentMode; } } @@ -127,7 +120,7 @@ VkPresentModeKHR VkSwapchain::chooseSwapPresentMode( VkPresentModeKHR fallback = availablePresentModes.empty() ? VK_PRESENT_MODE_FIFO_KHR : availablePresentModes[0]; - std::cout << "[Swapchain] Using fallback present mode: " << fallback << "\n"; + LOGI("Using fallback present mode {}", fmt::underlying(fallback)); return fallback; } @@ -135,9 +128,6 @@ VkExtent2D VkSwapchain::chooseSwapExtent(const VkSurfaceCapabilitiesKHR &capabilities, uint32_t width, uint32_t height) { if (capabilities.currentExtent.width != UINT32_MAX) { - std::cout << "[Swapchain] Using currentExtent from capabilities: (" - << capabilities.currentExtent.width << "x" - << capabilities.currentExtent.height << ")\n"; // The surface size is dictated by the window system (common on macOS) return capabilities.currentExtent; } @@ -151,32 +141,33 @@ VkSwapchain::chooseSwapExtent(const VkSurfaceCapabilitiesKHR &capabilities, std::clamp(actualExtent.height, capabilities.minImageExtent.height, capabilities.maxImageExtent.height); - std::cout << "[Swapchain] Using clamped extent: (" << actualExtent.width - << "x" << actualExtent.height << ")\n"; + LOGI("Using clamped extent: ({}x{})", actualExtent.width, + actualExtent.height); return actualExtent; } bool VkSwapchain::init(VkBackendCtx &ctx, VkSurfaceKHR surface, uint32_t width, uint32_t height) { if (surface == VK_NULL_HANDLE) { - std::cerr << "[Swapchain] surface is null\n"; + LOGE("Surface is null"); return false; } VkDevice device = ctx.device(); VkPhysicalDevice physicalDevice = ctx.physicalDevice(); - VkSwapchainKHR old = m_swapChain; + VkSwapchainKHR old = m_swapchain; + LOGD("Destroying swapchainimage views"); destroySwapchainImageViews(device); m_surface = surface; - SwapChainSupportDetails support = + SwapchainSupportDetails support = querySwapChainSupport(physicalDevice, m_surface); if (support.formats.empty() || support.presentModes.empty()) { - std::cerr << "[Swapchain] support incomplete\n"; + LOGE("Swapchain support incomplete"); return false; } @@ -187,17 +178,15 @@ bool VkSwapchain::init(VkBackendCtx &ctx, VkSurfaceKHR surface, uint32_t width, uint32_t imageCount = support.capabilities.minImageCount + 1; if (support.capabilities.maxImageCount > 0 && imageCount > support.capabilities.maxImageCount) { - std::cout << "[Swapchain] Clamping imageCount from" << imageCount - << " to maxImageCount=" << support.capabilities.maxImageCount - << "\n"; + LOGI("Clamping imageCount from {} to maxImageCount={}", imageCount, + support.capabilities.maxImageCount); imageCount = support.capabilities.maxImageCount; } - std::cout << "[Swapchain] Selected parameters:\n"; - std::cout << " imageCount = " << imageCount << "\n"; - std::cout << " colorSpace = " << surfaceFormat.colorSpace << "\n"; - std::cout << " extent = (" << extent.width << "x" << extent.height - << ")\n"; + LOGI("Selected swapchain parameters: imageCount = {} colorSpace = {} extent " + "= ({}x{})", + imageCount, fmt::underlying(surfaceFormat.colorSpace), extent.width, + extent.height); VkSwapchainCreateInfoKHR createInfo{}; createInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; @@ -220,82 +209,81 @@ bool VkSwapchain::init(VkBackendCtx &ctx, VkSurfaceKHR surface, uint32_t width, // Clipped pixels are rendered out createInfo.clipped = VK_TRUE; - std::cout << "[Swapchain] preTransform = " - << support.capabilities.currentTransform << "\n"; - std::cout - << "[Swapchain] compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR\n"; - std::cout << "[Swapchain] clipped = VK_TRUE\n"; + LOGI("Swapchain preTransform = {}, compositeAlpha = {}, clipped = {}", + fmt::underlying(support.capabilities.currentTransform), + fmt::underlying(createInfo.compositeAlpha), createInfo.clipped); createInfo.oldSwapchain = old; VkSwapchainKHR newSwapchain = VK_NULL_HANDLE; - VkResult result = + VkResult res = vkCreateSwapchainKHR(device, &createInfo, nullptr, &newSwapchain); - if (result != VK_SUCCESS) { - std::cerr << "[Swapchain] vkCreateSwapchainKHR failed: " << result << "\n"; + if (res != VK_SUCCESS) { + LOGE("vkCreateSwapchainKHR failed: {}", fmt::underlying(res)); return false; } // Destroy old swapchain if (old != VK_NULL_HANDLE) { + LOGD("Destroying swapchain"); vkDestroySwapchainKHR(device, old, nullptr); } - m_swapChain = newSwapchain; + m_swapchain = newSwapchain; - vkGetSwapchainImagesKHR(device, m_swapChain, &imageCount, nullptr); - m_swapChainImages.resize(imageCount); - vkGetSwapchainImagesKHR(device, m_swapChain, &imageCount, - m_swapChainImages.data()); + vkGetSwapchainImagesKHR(device, m_swapchain, &imageCount, nullptr); + m_swapchainImages.resize(imageCount); + vkGetSwapchainImagesKHR(device, m_swapchain, &imageCount, + m_swapchainImages.data()); - m_swapChainImageFormat = surfaceFormat.format; - m_swapChainExtent = extent; + m_swapchainImageFormat = surfaceFormat.format; + m_swapchainExtent = extent; - std::cout << "[Swapchain] initialized success," << m_swapChainImages.size() - << " images acquired\n"; return true; } void VkSwapchain::shutdown(VkDevice device) noexcept { if (device != VK_NULL_HANDLE) { + LOGD("Destroying swapchain image views"); destroySwapchainImageViews(device); - if (m_swapChain != VK_NULL_HANDLE) { - vkDestroySwapchainKHR(device, m_swapChain, nullptr); + if (m_swapchain != VK_NULL_HANDLE) { + LOGD("Destroying swapchain"); + vkDestroySwapchainKHR(device, m_swapchain, nullptr); } } else { - m_swapChainImageViews.clear(); + m_swapchainImageViews.clear(); } - m_swapChain = VK_NULL_HANDLE; - m_swapChainImages.clear(); + m_swapchain = VK_NULL_HANDLE; + m_swapchainImages.clear(); m_surface = VK_NULL_HANDLE; - m_swapChainImageFormat = VK_FORMAT_UNDEFINED; - m_swapChainExtent = {}; + m_swapchainImageFormat = VK_FORMAT_UNDEFINED; + m_swapchainExtent = {}; } bool VkSwapchain::createSwapchainImageViews(VkDevice device) { destroySwapchainImageViews(device); if (device == VK_NULL_HANDLE) { - std::cerr << "[Swapchain] Device is null\n"; + LOGE("Device is null"); return false; } const auto &images = swapchainImages(); if (images.empty()) { - std::cerr << "[Swapchain] Swapchain images are empty\n"; + LOGE("Swapchain images are empty"); return false; } VkFormat format = swapchainImageFormat(); if (format == VK_FORMAT_UNDEFINED) { - std::cerr << "[Swapchain] Swapchain format undefined\n"; + LOGE("Swapchain format undefined"); return false; } - m_swapChainImageViews.resize(images.size(), VK_NULL_HANDLE); + m_swapchainImageViews.resize(images.size(), VK_NULL_HANDLE); for (size_t i = 0; i < images.size(); ++i) { VkImageViewCreateInfo viewInfo{}; @@ -316,30 +304,28 @@ bool VkSwapchain::createSwapchainImageViews(VkDevice device) { viewInfo.subresourceRange.layerCount = 1; VkResult res = vkCreateImageView(device, &viewInfo, nullptr, - &m_swapChainImageViews[i]); + &m_swapchainImageViews[i]); if (res != VK_SUCCESS) { - std::cerr << "[Swapchain] vkCreateImageView() failed at index " << i - << " error=" << res << "\n"; + LOGE("vkCreateImageView() failed at index {} error=", i, + fmt::underlying(res)); destroySwapchainImageViews(device); return false; } } - std::cout << "[Swapchain] Created " << m_swapChainImageViews.size() - << " swapchain image views\n"; return true; } void VkSwapchain::destroySwapchainImageViews(VkDevice device) noexcept { if (device == VK_NULL_HANDLE) { - m_swapChainImageViews.clear(); + m_swapchainImageViews.clear(); return; } - for (VkImageView v : m_swapChainImageViews) { + for (VkImageView v : m_swapchainImageViews) { if (v != VK_NULL_HANDLE) { vkDestroyImageView(device, v, nullptr); } } - m_swapChainImageViews.clear(); + m_swapchainImageViews.clear(); } diff --git a/src/backend/presentation/vk_swapchain.hpp b/src/backend/presentation/vk_swapchain.hpp index 217f77c..e4d0d13 100644 --- a/src/backend/presentation/vk_swapchain.hpp +++ b/src/backend/presentation/vk_swapchain.hpp @@ -8,6 +8,23 @@ #include #include +/** + * @brief Owns a VkSwapchainKHR and its associated swapchain image views. + * + * Responsibilities: + * - Query swapchain support details for a physical device and surface + * - Select surface format, present mode, and extent + * - Create/destroy VkSwapchainKHR + * - Retrieve swapchain images and create/destroy VkImageViews + * + * Presentation behavior: + * - Prefers MAILBOX present mode when available, otherwise FIFO + * - Prefers BGRA8 + sRGB_NONLINEAR surface formats when available + * + * Recreation behavior: + * - shutdown() is idempotent + * - destroySwapchainImageViews() is safe to call independently + */ class VkSwapchain { public: VkSwapchain() = default; @@ -23,62 +40,102 @@ class VkSwapchain { } m_surface = std::exchange(other.m_surface, VK_NULL_HANDLE); - m_swapChain = std::exchange(other.m_swapChain, VK_NULL_HANDLE); - m_swapChainImages = std::exchange(other.m_swapChainImages, {}); - m_swapChainImageViews = std::exchange(other.m_swapChainImageViews, {}); - m_swapChainImageFormat = - std::exchange(other.m_swapChainImageFormat, VK_FORMAT_UNDEFINED); - m_swapChainExtent = std::exchange(other.m_swapChainExtent, VkExtent2D{}); + m_swapchain = std::exchange(other.m_swapchain, VK_NULL_HANDLE); + m_swapchainImages = std::exchange(other.m_swapchainImages, {}); + m_swapchainImageViews = std::exchange(other.m_swapchainImageViews, {}); + m_swapchainImageFormat = + std::exchange(other.m_swapchainImageFormat, VK_FORMAT_UNDEFINED); + m_swapchainExtent = std::exchange(other.m_swapchainExtent, VkExtent2D{}); return *this; } + /** + * @brief Creates or recreates the swapchain and acquire its images + * + * Responsibilities: + * - Query swapchain support + * - Choose format, present mode, and extent + * - Destroy existing swapchain image views + * - Create a new VkSwapchainKHR (passing the old swapchain if present) + * - Retrieve swapchain images + * + * Preconditions: + * - ctx.device() and ctx.physicalDevice() are valid + * - surface != VK_NULL_HANDLE + * + * Postconditions: + * - m_swapchain != VK_NULL_HANDLE + * - all image views are valid + * + * @return true on success; false on failure. On failure, all views are + * destroyed and the container is cleared + */ bool init(VkBackendCtx &ctx, VkSurfaceKHR surface, uint32_t width, uint32_t height); + + /** + * @brief Destroys all swapchain image views + * + * Safe to call even if no views exist + */ void shutdown(VkDevice device) noexcept; [[nodiscard]] VkSwapchainKHR swapchain() const noexcept { - return m_swapChain; + return m_swapchain; } [[nodiscard]] VkFormat swapchainImageFormat() const noexcept { - return m_swapChainImageFormat; + return m_swapchainImageFormat; } [[nodiscard]] VkExtent2D swapchainExtent() const noexcept { - return m_swapChainExtent; + return m_swapchainExtent; } bool createSwapchainImageViews(VkDevice device); void destroySwapchainImageViews(VkDevice device) noexcept; [[nodiscard]] const std::vector &swapchainImages() const noexcept { - return m_swapChainImages; + return m_swapchainImages; } [[nodiscard]] const std::vector & swapchainImageViews() const noexcept { - return m_swapChainImageViews; + return m_swapchainImageViews; } private: - struct SwapChainSupportDetails { + struct SwapchainSupportDetails { VkSurfaceCapabilitiesKHR capabilities{}; std::vector formats; std::vector presentModes; }; - static SwapChainSupportDetails querySwapChainSupport(VkPhysicalDevice device, + /** + * @brief Queries swapchain support details for a physical device and surface + */ + static SwapchainSupportDetails querySwapChainSupport(VkPhysicalDevice device, VkSurfaceKHR surface); + /** + * @brief Chooses the preferred surface format from the available set + */ static VkSurfaceFormatKHR chooseSwapSurfaceFormat( const std::vector &availableFormats); + /** + * @brief Chooses the preferred present mode from the available set + */ static VkPresentModeKHR chooseSwapPresentMode( const std::vector &availablePresentModes); + /** + * @brief Chooses the swapchain extent based on surface capabilities and + * requested dimensions + */ static VkExtent2D chooseSwapExtent(const VkSurfaceCapabilitiesKHR &capabilities, uint32_t width, uint32_t height); VkSurfaceKHR m_surface = VK_NULL_HANDLE; // non-owning - VkSwapchainKHR m_swapChain = VK_NULL_HANDLE; - std::vector m_swapChainImages; - std::vector m_swapChainImageViews; - VkFormat m_swapChainImageFormat = VK_FORMAT_UNDEFINED; - VkExtent2D m_swapChainExtent{}; + VkSwapchainKHR m_swapchain = VK_NULL_HANDLE; + std::vector m_swapchainImages; + std::vector m_swapchainImageViews; + VkFormat m_swapchainImageFormat = VK_FORMAT_UNDEFINED; + VkExtent2D m_swapchainExtent{}; }; diff --git a/src/backend/profiling/CMakeLists.txt b/src/backend/profiling/CMakeLists.txt index 3fa1501..f1a3bf2 100644 --- a/src/backend/profiling/CMakeLists.txt +++ b/src/backend/profiling/CMakeLists.txt @@ -1,18 +1,12 @@ -add_library(quark_backend_profiling STATIC - vk_gpu_profiler.cpp - upload_profiler.cpp - profiling_logger.cpp -) - -target_include_directories(quark_backend_profiling - PUBLIC - ${CMAKE_SOURCE_DIR}/src -) +add_subdirectory(logging) +add_subdirectory(profilers) +add_subdirectory(telemetry) -target_link_libraries(quark_backend_profiling - PUBLIC - Vulkan::Vulkan - quark::backend::core +add_library(quark_backend_profiling INTERFACE) +target_link_libraries(quark_backend_profiling INTERFACE + quark::backend::profiling::logging + quark::backend::profiling::profilers + quark::backend::profiling::telemetry ) add_library(quark::backend::profiling ALIAS quark_backend_profiling) diff --git a/src/backend/profiling/cpu_profiler.hpp b/src/backend/profiling/cpu_profiler.hpp deleted file mode 100644 index da17f15..0000000 --- a/src/backend/profiling/cpu_profiler.hpp +++ /dev/null @@ -1,148 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -class CpuProfiler { -public: - enum class Stat : uint8_t { - FrameTotal = 0, - Acquire, - WaitForFence, - UpdatePerFrameUBO, - RecordCmd, - QueueSubmit, - QueuePresent, - SwapchainRecreate, - WaitIdle, - Other, - Count - }; - - struct FrameStats { - std::array(Stat::Count)> ms{}; - uint32_t drawCalls = 0; - uint64_t triangles = 0; - uint32_t pipelineBinds = 0; - uint32_t descriptorBinds = 0; - uint32_t instances = 0; - }; - - class Scope { - public: - Scope(CpuProfiler &profiler, Stat stat) noexcept - : m_profiler(&profiler), m_stat(stat), m_t0(clock::now()) {} - ~Scope() noexcept { end(); } - - Scope(const Scope &) = delete; - Scope &operator=(const Scope &) = delete; - - Scope(Scope &&) = delete; - Scope &operator=(Scope &&other) noexcept { - if (this == &other) { - return *this; - } - - end(); - - m_profiler = other.m_profiler; - m_stat = other.m_stat; - m_t0 = other.m_t0; - other.m_profiler = nullptr; - - return *this; - } - - private: - using clock = std::chrono::steady_clock; - - void end() noexcept { - if (m_profiler == nullptr) { - return; - } - - const auto t1 = clock::now(); - const double ms = - std::chrono::duration(t1 - m_t0).count(); - - m_profiler->add(m_stat, ms); - m_profiler = nullptr; - } - - CpuProfiler *m_profiler = nullptr; - Stat m_stat{}; - clock::time_point m_t0; - }; - - void endFrame() noexcept { - const auto idx = [](Stat stat) { return static_cast(stat); }; - - const double frame = m_cur.ms[idx(Stat::FrameTotal)]; - - const double accounted = - m_cur.ms[idx(Stat::Acquire)] + m_cur.ms[idx(Stat::WaitForFence)] + - m_cur.ms[idx(Stat::UpdatePerFrameUBO)] + - m_cur.ms[idx(Stat::RecordCmd)] + m_cur.ms[idx(Stat::QueuePresent)] + - m_cur.ms[idx(Stat::QueueSubmit)] + - m_cur.ms[idx(Stat::SwapchainRecreate)]; - - m_cur.ms[idx(Stat::Other)] = std::max(0.0, frame - accounted); - - m_last = m_cur; - resetCurrent(); - } - - // Counters - void incDrawCalls(uint32_t n = 1) noexcept { m_cur.drawCalls += n; } - void addTriangles(uint64_t n) noexcept { m_cur.triangles += n; } - void incPipelineBinds(uint32_t n = 1) noexcept { m_cur.pipelineBinds += n; } - void incDescriptorBinds(uint32_t n = 1) noexcept { - m_cur.descriptorBinds += n; - } - void addInstances(uint32_t n) noexcept { m_cur.instances += n; } - - [[nodiscard]] const FrameStats &last() const noexcept { return m_last; } - - // Printing / UI - static constexpr std::string_view name(Stat stat) noexcept { - switch (stat) { - case Stat::FrameTotal: - return "FrameTotal"; - case Stat::Acquire: - return "Acquire"; - case Stat::WaitForFence: - return "WaitForFence"; - case Stat::UpdatePerFrameUBO: - return "UpdatePerFrameUBO"; - case Stat::RecordCmd: - return "RecordCmd"; - case Stat::QueueSubmit: - return "QueueSubmit"; - case Stat::QueuePresent: - return "QueuePresent"; - case Stat::SwapchainRecreate: - return "SwapchainRecreate"; - case Stat::Other: - return "Other"; - default: - return "Unknown"; - } - } - -private: - friend class Scope; - - void add(Stat stat, double ms) noexcept { - m_cur.ms[static_cast(stat)] += ms; - } - - void resetCurrent() noexcept { m_cur = FrameStats{}; } - - FrameStats m_cur{}; - FrameStats m_last{}; -}; diff --git a/src/backend/profiling/logging/CMakeLists.txt b/src/backend/profiling/logging/CMakeLists.txt new file mode 100644 index 0000000..267ddc7 --- /dev/null +++ b/src/backend/profiling/logging/CMakeLists.txt @@ -0,0 +1,15 @@ +add_library(quark_backend_profiling_logging STATIC + profiling_logger.cpp +) + +target_include_directories(quark_backend_profiling_logging + PUBLIC + ${CMAKE_SOURCE_DIR}/src +) + +target_link_libraries(quark_backend_profiling_logging + PUBLIC + quark::backend::profiling::profilers +) + +add_library(quark::backend::profiling::logging ALIAS quark_backend_profiling_logging) diff --git a/src/backend/profiling/profiling_logger.cpp b/src/backend/profiling/logging/profiling_logger.cpp similarity index 95% rename from src/backend/profiling/profiling_logger.cpp rename to src/backend/profiling/logging/profiling_logger.cpp index dcb3d6f..7634c00 100644 --- a/src/backend/profiling/profiling_logger.cpp +++ b/src/backend/profiling/logging/profiling_logger.cpp @@ -1,8 +1,8 @@ -#include "backend/profiling/profiling_logger.hpp" +#include "backend/profiling/logging/profiling_logger.hpp" -#include "backend/profiling/cpu_profiler.hpp" -#include "backend/profiling/upload_profiler.hpp" -#include "backend/profiling/vk_gpu_profiler.hpp" +#include "backend/profiling/profilers/cpu_profiler.hpp" +#include "backend/profiling/profilers/upload_profiler.hpp" +#include "backend/profiling/profilers/vk_gpu_profiler.hpp" #include #include @@ -48,7 +48,7 @@ bool FrameLogger::shouldLog() noexcept { return (m_frameCounter % m_period) == 0ULL; } -static inline double msAt(const CpuProfiler::FrameStats &st, +static inline double msAt(const CpuProfiler::Frame &st, CpuProfiler::Stat stat) noexcept { return st.ms[static_cast(stat)]; } @@ -237,9 +237,8 @@ static void logUpload(const UploadProfiler &upload) noexcept { std::cerr << line.data() << "\n"; } -void FrameLogger::logPerFrame(const CpuProfiler &cpu, const VkGpuProfiler &gpu, - const UploadProfiler &upload) noexcept { - +void FrameLogger::logPerFrame(const CpuProfiler *cpu, const VkGpuProfiler &gpu, + const UploadProfiler *upload) noexcept { if (!shouldLog()) { return; } @@ -256,9 +255,9 @@ void FrameLogger::logPerFrame(const CpuProfiler &cpu, const VkGpuProfiler &gpu, // call submit immediate so I would have to pass profiler a lot. But it // doesn't matter since eventually submit Immediate will be removed and // we won't have blocking anymore - logCpu(cpu); + logCpu(*cpu); - logUpload(upload); + logUpload(*upload); logGpu(gpu); std::cout << "\n"; diff --git a/src/backend/profiling/profiling_logger.hpp b/src/backend/profiling/logging/profiling_logger.hpp similarity index 84% rename from src/backend/profiling/profiling_logger.hpp rename to src/backend/profiling/logging/profiling_logger.hpp index 9c28435..5820537 100644 --- a/src/backend/profiling/profiling_logger.hpp +++ b/src/backend/profiling/logging/profiling_logger.hpp @@ -1,8 +1,8 @@ #pragma once -#include "backend/profiling/cpu_profiler.hpp" -#include "backend/profiling/upload_profiler.hpp" -#include "backend/profiling/vk_gpu_profiler.hpp" +#include "backend/profiling/profilers/cpu_profiler.hpp" +#include "backend/profiling/profilers/upload_profiler.hpp" +#include "backend/profiling/profilers/vk_gpu_profiler.hpp" #include #include @@ -17,8 +17,8 @@ inline void ignore_snprintf(int rc) noexcept { (void)rc; } class FrameLogger { public: void setPeriod(uint64_t n) noexcept { m_period = n; } - void logPerFrame(const CpuProfiler &cpu, const VkGpuProfiler &gpu, - const UploadProfiler &upload) noexcept; + void logPerFrame(const CpuProfiler *cpu, const VkGpuProfiler &gpu, + const UploadProfiler *upload) noexcept; private: bool shouldLog() noexcept; diff --git a/src/backend/profiling/profilers/CMakeLists.txt b/src/backend/profiling/profilers/CMakeLists.txt new file mode 100644 index 0000000..e955448 --- /dev/null +++ b/src/backend/profiling/profilers/CMakeLists.txt @@ -0,0 +1,18 @@ +add_library(quark_backend_profiling_profilers STATIC + cpu_profiler.cpp + upload_profiler.cpp + vk_gpu_profiler.cpp +) + +target_include_directories(quark_backend_profiling_profilers + PUBLIC + ${CMAKE_SOURCE_DIR}/src +) + +target_link_libraries(quark_backend_profiling_profilers + PUBLIC + Vulkan::Vulkan + quark::backend::core +) + +add_library(quark::backend::profiling::profilers ALIAS quark_backend_profiling_profilers) diff --git a/src/backend/profiling/profilers/cpu_profiler.cpp b/src/backend/profiling/profilers/cpu_profiler.cpp new file mode 100644 index 0000000..124851d --- /dev/null +++ b/src/backend/profiling/profilers/cpu_profiler.cpp @@ -0,0 +1,60 @@ +#include "backend/profiling/profilers/cpu_profiler.hpp" + +CpuProfiler::Scope::Scope(CpuProfiler &profiler, Stat stat) noexcept + : m_profiler(&profiler), m_stat(stat), m_t0(clock::now()) {} + +CpuProfiler::Scope::~Scope() noexcept { end(); } + +CpuProfiler::Scope &CpuProfiler::Scope::operator=(Scope &&other) noexcept { + if (this == &other) { + return *this; + } + + end(); + + m_profiler = other.m_profiler; + m_stat = other.m_stat; + m_t0 = other.m_t0; + other.m_profiler = nullptr; + + return *this; +} + +void CpuProfiler::Scope::end() noexcept { + if (m_profiler == nullptr) { + return; + } + + const auto t1 = clock::now(); + const double ms = + std::chrono::duration(t1 - m_t0).count(); + + m_profiler->addMs(m_stat, ms); + m_profiler = nullptr; +} + +void CpuProfiler::beginInterval() noexcept { m_cur = Frame{}; } + +void CpuProfiler::endInterval() noexcept { + finalizeOther(); + m_last = m_cur; + resetCurrent(); +} + +void CpuProfiler::addMs(Stat stat, double ms) noexcept { + m_cur.ms[static_cast(stat)] += ms; +} + +void CpuProfiler::finalizeOther() noexcept { + const auto idx = [](Stat stat) { return static_cast(stat); }; + + const double frame = m_cur.ms[idx(Stat::FrameTotal)]; + + const double accounted = + m_cur.ms[idx(Stat::Acquire)] + m_cur.ms[idx(Stat::WaitForFence)] + + m_cur.ms[idx(Stat::UpdatePerFrameUBO)] + m_cur.ms[idx(Stat::RecordCmd)] + + m_cur.ms[idx(Stat::QueuePresent)] + m_cur.ms[idx(Stat::QueueSubmit)] + + m_cur.ms[idx(Stat::SwapchainRecreate)]; + + m_cur.ms[idx(Stat::Other)] = std::max(0.0, frame - accounted); +} diff --git a/src/backend/profiling/profilers/cpu_profiler.hpp b/src/backend/profiling/profilers/cpu_profiler.hpp new file mode 100644 index 0000000..4a9df30 --- /dev/null +++ b/src/backend/profiling/profilers/cpu_profiler.hpp @@ -0,0 +1,78 @@ +#pragma once + +#include +#include +#include +#include +#include + +class CpuProfiler { +public: + enum class Stat : uint8_t { + FrameTotal = 0, + Acquire, + WaitForFence, + UpdatePerFrameUBO, + RecordCmd, + QueueSubmit, + QueuePresent, + SwapchainRecreate, + WaitIdle, + Other, + Count + }; + + struct Frame { + std::array(Stat::Count)> ms{}; + uint32_t drawCalls = 0; + uint64_t triangles = 0; + uint32_t pipelineBinds = 0; + uint32_t descriptorBinds = 0; + uint32_t instances = 0; + }; + + class Scope { + public: + using clock = std::chrono::steady_clock; + + Scope(CpuProfiler &profiler, Stat stat) noexcept; + ~Scope() noexcept; + + Scope(const Scope &) = delete; + Scope &operator=(const Scope &) = delete; + + Scope(Scope &&) = delete; + Scope &operator=(Scope &&other) noexcept; + + private: + void end() noexcept; + + CpuProfiler *m_profiler = nullptr; + Stat m_stat{}; + clock::time_point m_t0; + }; + + void beginInterval() noexcept; + void endInterval() noexcept; + + void incDrawCalls(uint32_t n = 1) noexcept { m_cur.drawCalls += n; } + void addTriangles(uint64_t n) noexcept { m_cur.triangles += n; } + void addInstances(uint32_t n) noexcept { m_cur.instances += n; } + void incPipelineBinds(uint32_t n = 1) noexcept { m_cur.pipelineBinds += n; } + void incDescriptorBinds(uint32_t n = 1) noexcept { + m_cur.descriptorBinds += n; + } + + [[nodiscard]] const Frame &cur() const noexcept { return m_cur; } + [[nodiscard]] const Frame &last() const noexcept { return m_last; } + +private: + friend class Scope; + + void addMs(Stat stat, double ms) noexcept; + void resetCurrent() noexcept { m_cur = Frame{}; } + void finalizeOther() noexcept; + + Frame m_cur{}; + Frame m_last{}; +}; diff --git a/src/backend/profiling/upload_profiler.cpp b/src/backend/profiling/profilers/upload_profiler.cpp similarity index 63% rename from src/backend/profiling/upload_profiler.cpp rename to src/backend/profiling/profilers/upload_profiler.cpp index 1dd2ded..55b2958 100644 --- a/src/backend/profiling/upload_profiler.cpp +++ b/src/backend/profiling/profilers/upload_profiler.cpp @@ -1,8 +1,8 @@ -#include "backend/profiling/upload_profiler.hpp" +#include "backend/profiling/profilers/upload_profiler.hpp" #include -static constexpr bool isLifetimeStat(UploadProfiler::Stat stat) noexcept { +bool UploadProfiler::isLifetimeStat(Stat stat) noexcept { using S = UploadProfiler::Stat; switch (stat) { case S::BufferAllocatedBytes: @@ -17,16 +17,16 @@ static constexpr bool isLifetimeStat(UploadProfiler::Stat stat) noexcept { } } -void UploadProfiler::beginFrame() noexcept { resetFrame(); } +void UploadProfiler::beginInterval() noexcept { resetCur(); } -void UploadProfiler::endFrame() noexcept { - m_lastFrame = m_frame; - resetFrame(); +void UploadProfiler::endInterval() noexcept { + m_lastFrame = m_cur; + resetCur(); } void UploadProfiler::add(Stat stat, std::uint64_t value) noexcept { const size_t i = static_cast(stat); - m_frame.v[i] += value; + m_cur.v[i] += value; if (isLifetimeStat(stat)) { m_lifetime.v[i] += value; diff --git a/src/backend/profiling/profilers/upload_profiler.hpp b/src/backend/profiling/profilers/upload_profiler.hpp new file mode 100644 index 0000000..b9ff911 --- /dev/null +++ b/src/backend/profiling/profilers/upload_profiler.hpp @@ -0,0 +1,77 @@ +#pragma once + +#include +#include +#include +#include + +class UploadProfiler { +public: + enum class Stat : uint8_t { + UploadSubmitCount = 0, + + UploadMemcpyCount, + UploadMemcpyBytes, + + StagingCreatedCount, + StagingAllocatedBytes, + StagingUsedBytes, + + BufferUploadCount, + BufferUploadBytes, + BufferAllocatedBytes, + + TextureUploadCount, + TextureUploadBytes, + TextureAllocatedBytes, + + MaterialUploadCount, + MaterialUploadBytes, + MaterialAllocatedBytes, + + InstanceUploadCount, + InstanceUploadBytes, + InstanceAllocatedBytes, + + Count + }; + + struct Frame { + std::array(Stat::Count)> v{}; + }; + + void beginInterval() noexcept; + void endInterval() noexcept; + + [[nodiscard]] const Frame &cur() const noexcept { return m_cur; } + [[nodiscard]] const Frame &last() const noexcept { return m_lastFrame; } + [[nodiscard]] const Frame &lifetime() const noexcept { return m_lifetime; } + + void add(Stat stat, std::uint64_t value) noexcept; + +private: + static bool isLifetimeStat(Stat stat) noexcept; + + void resetCur() noexcept { m_cur = Frame{}; } + + Frame m_cur{}; + Frame m_lastFrame{}; + Frame m_lifetime{}; +}; + +inline UploadProfiler::Frame & +operator+=(UploadProfiler::Frame &a, const UploadProfiler::Frame &b) noexcept { + for (size_t i = 0; i < a.v.size(); ++i) { + a.v[i] += b.v[i]; + } + + return a; +} + +static inline void profilerAdd(UploadProfiler *profiler, + UploadProfiler::Stat stat, + std::uint64_t v) noexcept { + if (profiler != nullptr) { + profiler->add(stat, v); + } +} diff --git a/src/backend/profiling/vk_gpu_profiler.cpp b/src/backend/profiling/profilers/vk_gpu_profiler.cpp similarity index 99% rename from src/backend/profiling/vk_gpu_profiler.cpp rename to src/backend/profiling/profilers/vk_gpu_profiler.cpp index 784da65..49d6455 100644 --- a/src/backend/profiling/vk_gpu_profiler.cpp +++ b/src/backend/profiling/profilers/vk_gpu_profiler.cpp @@ -1,4 +1,4 @@ -#include "backend/profiling/vk_gpu_profiler.hpp" +#include "backend/profiling/profilers/vk_gpu_profiler.hpp" #include "backend/core/vk_backend_ctx.hpp" diff --git a/src/backend/profiling/vk_gpu_profiler.hpp b/src/backend/profiling/profilers/vk_gpu_profiler.hpp similarity index 99% rename from src/backend/profiling/vk_gpu_profiler.hpp rename to src/backend/profiling/profilers/vk_gpu_profiler.hpp index 0c5a8c5..8c7b8dd 100644 --- a/src/backend/profiling/vk_gpu_profiler.hpp +++ b/src/backend/profiling/profilers/vk_gpu_profiler.hpp @@ -6,8 +6,6 @@ #include #include -class VkBackendCtx; - class VkGpuProfiler { public: enum class Marker : std::uint8_t { diff --git a/src/backend/profiling/telemetry/CMakeLists.txt b/src/backend/profiling/telemetry/CMakeLists.txt new file mode 100644 index 0000000..97bec8f --- /dev/null +++ b/src/backend/profiling/telemetry/CMakeLists.txt @@ -0,0 +1,16 @@ +add_library(quark_backend_profiling_telemetry STATIC + telemetry.cpp + publish.cpp +) + +target_include_directories(quark_backend_profiling_telemetry + PUBLIC + ${CMAKE_SOURCE_DIR}/src +) + +target_link_libraries(quark_backend_profiling_telemetry + PUBLIC + quark::backend::profiling::profilers +) + +add_library(quark::backend::profiling::telemetry ALIAS quark_backend_profiling_telemetry) diff --git a/src/backend/profiling/telemetry/publish.cpp b/src/backend/profiling/telemetry/publish.cpp new file mode 100644 index 0000000..0187b94 --- /dev/null +++ b/src/backend/profiling/telemetry/publish.cpp @@ -0,0 +1,65 @@ +#include "backend/profiling/telemetry/publish.hpp" + +#if defined(ENABLE_TELEMETRY) + +#include "backend/profiling/profilers/cpu_profiler.hpp" +#include "backend/profiling/profilers/upload_profiler.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" + +#include +#include +#include + +namespace profiling { + +static std::atomic g_publishPeriodNs{1'000'000}; // 1ms + +void setPublishPeriod(std::chrono::nanoseconds period) noexcept { + const int64_t ns = period.count(); + g_publishPeriodNs.store(ns > 0 ? ns : 0, std::memory_order_relaxed); +} + +static inline std::chrono::nanoseconds publishPeriod() noexcept { + return std::chrono::nanoseconds{ + g_publishPeriodNs.load(std::memory_order_relaxed)}; +} + +static inline void publishSeqLock(PublishedTelemetry &p, + const CpuProfiler::Frame &cpu, + const UploadProfiler::Frame &upl) noexcept { + p.seq.fetch_add(1, std::memory_order_relaxed); // odd + std::atomic_thread_fence(std::memory_order_release); + + p.cpu = cpu; + p.upload = upl; + + std::atomic_thread_fence(std::memory_order_release); + p.seq.fetch_add(1, std::memory_order_relaxed); // even +} + +bool readPublished(const Telemetry &t, CpuProfiler::Frame &outCpu, + UploadProfiler::Frame &outUpload) noexcept { + const PublishedTelemetry &p = t.published; + + for (int tries = 0; tries < 4; ++tries) { + const uint32_t a = p.seq.load(std::memory_order_acquire); + if (a & 1U) { + continue; + } + + outCpu = p.cpu; + outUpload = p.upload; + + std::atomic_thread_fence(std::memory_order_acquire); + const uint32_t b = p.seq.load(std::memory_order_acquire); + if (a == b) { + return true; + } + } + + return false; +} + +} // namespace profiling + +#endif diff --git a/src/backend/profiling/telemetry/publish.hpp b/src/backend/profiling/telemetry/publish.hpp new file mode 100644 index 0000000..30803e3 --- /dev/null +++ b/src/backend/profiling/telemetry/publish.hpp @@ -0,0 +1,23 @@ +#pragma once + +#if defined(ENABLE_TELEMETRY) + +#include "backend/profiling/profilers/cpu_profiler.hpp" +#include "backend/profiling/profilers/upload_profiler.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" + +#include + +namespace profiling { + +// Time-based publish +void setPublishPeriod(std::chrono::nanoseconds period) noexcept; + +// Opportunistic publish +void publishMaybe() noexcept; + +bool readPublished(const Telemetry &t, CpuProfiler::Frame &outCpu, + UploadProfiler::Frame &outUpload) noexcept; + +} // namespace profiling +#endif diff --git a/src/backend/profiling/telemetry/telemetry.cpp b/src/backend/profiling/telemetry/telemetry.cpp new file mode 100644 index 0000000..03859a7 --- /dev/null +++ b/src/backend/profiling/telemetry/telemetry.cpp @@ -0,0 +1,10 @@ +#include "backend/profiling/telemetry/telemetry.hpp" + +namespace profiling { + +thread_local Telemetry *g_tls = nullptr; + +Telemetry *tlsTelemetry() noexcept { return g_tls; } +void setTlsTelemetry(Telemetry *t) noexcept { g_tls = t; } + +} // namespace profiling diff --git a/src/backend/profiling/telemetry/telemetry.hpp b/src/backend/profiling/telemetry/telemetry.hpp new file mode 100644 index 0000000..1ba23a8 --- /dev/null +++ b/src/backend/profiling/telemetry/telemetry.hpp @@ -0,0 +1,151 @@ +#pragma once + +namespace profiling { + +struct Telemetry; + +extern thread_local Telemetry *g_tls; + +Telemetry *tlsTelemetry() noexcept; +void setTlsTelemetry(Telemetry *t) noexcept; + +inline Telemetry *telemetry() noexcept { return tlsTelemetry(); } + +} // namespace profiling + +#if defined(TRACY_ENABLE) +#define PROF_FRAME() FrameMark +#define PROF_FRAME_N(name_literal) FrameMarkNamed(name_literal) + +#else +#define PROF_FRAME() ((void)0) +#define PROF_FRAME_N(name_literal) ((void)0) + +#define PROF_SCOPE() ((void)0) +#define PROF_SCOPE_N(name_literal) ((void)0) + +#define PROF_THREAD_NAME(name_literal) ((void)0) + +#endif + +#define GE_JOIN_IMPL(a, b) a##b +#define GE_JOIN(a, b) GE_JOIN_IMPL(a, b) + +#if defined(ENABLE_TELEMETRY) + +#include "backend/profiling/profilers/cpu_profiler.hpp" +#include "backend/profiling/profilers/upload_profiler.hpp" +#include +#include + +namespace profiling { + +struct alignas(64) PublishedTelemetry { + std::atomic seq{0}; // even=stable, odd=writer in progress + CpuProfiler::Frame cpu{}; + UploadProfiler::Frame upload{}; +}; + +struct Telemetry { + Telemetry() = default; + ~Telemetry() = default; + + Telemetry(const Telemetry &) = delete; + Telemetry &operator=(const Telemetry &) = delete; + Telemetry(Telemetry &&) = delete; + Telemetry &operator=(Telemetry &&) = delete; + + CpuProfiler cpu; + UploadProfiler upload; + + PublishedTelemetry published{}; + std::chrono::steady_clock::time_point lastPublish; +}; + +inline CpuProfiler *cpuPtr() noexcept { + Telemetry *t = telemetry(); + if (t == nullptr) { + return nullptr; + } + return &t->cpu; +} + +inline UploadProfiler *uploadPtr() noexcept { + Telemetry *t = telemetry(); + if (t == nullptr) { + return nullptr; + } + return &t->upload; +} + +class CpuScope { +public: + CpuScope(CpuProfiler *cpu, CpuProfiler::Stat stat) noexcept { + if (cpu != nullptr) { + m_scope.emplace(*cpu, stat); + } + } + +private: + std::optional m_scope; +}; + +} // namespace profiling + +#define PROFILE_CPU_SCOPE(stat_enum) \ + ::profiling::CpuScope GE_JOIN(_ge_cpu_scope_, __COUNTER__)( \ + ::profiling::cpuPtr(), (stat_enum)) + +#define PROFILE_CPU_INC_DRAW_CALLS(n) \ + do { \ + if (auto *p = ::profiling::cpuPtr()) { \ + p->incDrawCalls((n)); \ + } \ + } while (0) +#define PROFILE_CPU_ADD_TRIANGLES(n) \ + do { \ + if (auto *p = ::profiling::cpuPtr()) { \ + p->addTriangles((n)); \ + } \ + } while (0) + +#define PROFILE_CPU_INC_PIPELINE_BINDS(n) \ + do { \ + if (auto *p = ::profiling::cpuPtr()) { \ + p->incPipelineBinds((n)); \ + } \ + } while (0) + +#define PROFILE_CPU_INC_DESCRIPTOR_BINDS(n) \ + do { \ + if (auto *p = ::profiling::cpuPtr()) { \ + p->incDescriptorBinds((n)); \ + } \ + } while (0) + +#define PROFILE_CPU_ADD_INSTANCES(n) \ + do { \ + if (auto *p = ::profiling::cpuPtr()) { \ + p->addInstances((n)); \ + } \ + } while (0) + +#define PROFILE_UPLOAD_ADD(stat_enum, value_u64) \ + profilerAdd(::profiling::uploadPtr(), (stat_enum), (std::uint64_t)(value_u64)) + +#define PROFILE_UPLOAD_INC(stat_enum) PROFILE_UPLOAD_ADD((stat_enum), 1) + +#else + +#define PROFILE_CPU_SCOPE(stat_enum) ((void)0) + +#define PROFILE_CPU_INC_DRAW_CALLS(n) ((void)0) +#define PROFILE_CPU_ADD_TRIANGLES(n) ((void)0) +#define PROFILE_CPU_INC_PIPELINE_BINDS(n) ((void)0) +#define PROFILE_CPU_INC_DESCRIPTOR_BINDS(n) ((void)0) +#define PROFILE_CPU_ADD_INSTANCES(n) ((void)0) + +#define PROFILE_UPLOAD_ADD(stat_enum, value_u64) ((void)0) +#define PROFILE_UPLOAD_INC(stat_enum) ((void)0) + +#endif diff --git a/src/backend/profiling/upload_profiler.hpp b/src/backend/profiling/upload_profiler.hpp deleted file mode 100644 index 28d6db8..0000000 --- a/src/backend/profiling/upload_profiler.hpp +++ /dev/null @@ -1,114 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -class UploadProfiler { -public: - enum class Stat : uint8_t { - UploadSubmitCount = 0, - - UploadMemcpyCount, - UploadMemcpyBytes, - - StagingCreatedCount, - StagingAllocatedBytes, - StagingUsedBytes, - - BufferUploadCount, - BufferUploadBytes, - BufferAllocatedBytes, - - TextureUploadCount, - TextureUploadBytes, - TextureAllocatedBytes, - - MaterialUploadCount, - MaterialUploadBytes, - MaterialAllocatedBytes, - - InstanceUploadCount, - InstanceUploadBytes, - InstanceAllocatedBytes, - - Count - }; - - struct Stats { - std::array(Stat::Count)> v{}; - }; - - void beginFrame() noexcept; - void endFrame() noexcept; - - [[nodiscard]] const Stats &last() const noexcept { return m_lastFrame; } - [[nodiscard]] const Stats &lifetime() const noexcept { return m_lifetime; } - - void add(Stat stat, std::uint64_t value) noexcept; - - static constexpr std::string_view name(Stat stat) noexcept { - switch (stat) { - case Stat::UploadSubmitCount: - return "UploadSubmitCount"; - case Stat::UploadMemcpyCount: - return "UploadMemcpyCount"; - case Stat::UploadMemcpyBytes: - return "UploadMemcpyBytes"; - - case Stat::StagingCreatedCount: - return "StagingCreatedCount"; - case Stat::StagingUsedBytes: - return "StagingUsedBytes"; - case Stat::StagingAllocatedBytes: - return "StatingAllocatedBytes"; - - case Stat::BufferUploadCount: - return "BufferUploadCount"; - case Stat::BufferUploadBytes: - return "BufferUploadBytes"; - case Stat::BufferAllocatedBytes: - return "StatingAllocatedBytes"; - - case Stat::TextureUploadCount: - return "TextureUploadCount"; - case Stat::TextureUploadBytes: - return "TextureUploadBytes"; - case Stat::TextureAllocatedBytes: - return "TextureAllocatedBytes"; - - case Stat::MaterialUploadCount: - return "MaterialUploadCount"; - case Stat::MaterialUploadBytes: - return "MaterialUploadBytes"; - case Stat::MaterialAllocatedBytes: - return "MaterialAllocatedBytes"; - - case Stat::InstanceUploadCount: - return "InstanceUploadCount"; - case Stat::InstanceUploadBytes: - return "InstanceUploadBytes"; - case Stat::InstanceAllocatedBytes: - return "InstanceAllocatedBytes"; - default: - return "Unknown"; - } - } - -private: - void resetFrame() noexcept { m_frame = Stats{}; } - - Stats m_frame{}; - Stats m_lastFrame{}; - Stats m_lifetime{}; -}; - -static inline void profilerAdd(UploadProfiler *profiler, - UploadProfiler::Stat stat, - std::uint64_t v) noexcept { - if (profiler != nullptr) { - profiler->add(stat, v); - } -} diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index ea015c7..b0c5436 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -2,6 +2,7 @@ add_subdirectory(app) add_subdirectory(camera) add_subdirectory(geometry) add_subdirectory(assets) +add_subdirectory(jobs) add_subdirectory(logging) add_library(quark_engine INTERFACE) @@ -10,6 +11,7 @@ target_link_libraries(quark_engine INTERFACE quark::engine::assets quark::engine::camera quark::engine::geometry + quark::engine::jobs quark::engine::logging ) diff --git a/src/engine/app/CMakeLists.txt b/src/engine/app/CMakeLists.txt index 2360549..bc7c4ba 100644 --- a/src/engine/app/CMakeLists.txt +++ b/src/engine/app/CMakeLists.txt @@ -15,6 +15,7 @@ target_link_libraries(quark_engine_app quark::render quark::engine::geometry quark::platform::window + quark::engine::jobs PRIVATE Vulkan::Vulkan ) diff --git a/src/engine/app/app.cpp b/src/engine/app/app.cpp index 2c6f6e9..50d3638 100644 --- a/src/engine/app/app.cpp +++ b/src/engine/app/app.cpp @@ -1,5 +1,7 @@ #include "app.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" +#include "engine/jobs/job_system.hpp" #include "engine/logging/log.hpp" #include @@ -10,6 +12,12 @@ bool EngineApp::init(const AppConfig &cfg) { shutdown(); +#if defined(ENABLE_TELEMETRY) + profiling::setTlsTelemetry(&m_profTelemetry); +#else + profiling::setTlsTelemetry(nullptr); +#endif + m_cfg = cfg; if (!m_window.init(cfg.width, cfg.height, cfg.title)) { @@ -17,7 +25,10 @@ bool EngineApp::init(const AppConfig &cfg) { return false; } - LOG_INFO("App initialized"); + if (!m_jobs.init()) { + std::cerr << "[App] Failed to initialize the job system\n"; + return false; + } const auto platformExtensions = m_window.requiredVulkanExtensions(); if (platformExtensions.empty()) { @@ -42,13 +53,15 @@ bool EngineApp::init(const AppConfig &cfg) { } if (!m_renderer.init(m_ctx, m_presenter, cfg.framesInFlight, cfg.vertSpvPath, - cfg.fragSpvPath)) { + cfg.fragSpvPath, m_jobs)) { std::cerr << "[App] Renderer init failed\n"; shutdown(); return false; } m_inited = true; + LOGI("App initialized"); + return true; } @@ -60,9 +73,14 @@ void EngineApp::shutdown() noexcept { m_renderer.shutdown(); m_presenter.shutdown(); m_ctx.shutdown(); + m_jobs.shutdown(); m_window.shutdown(); m_inited = false; + +#if defined(ENABLE_TELEMETRY) + profiling::setTlsTelemetry(nullptr); +#endif } void EngineApp::run(const std::function &tick) { diff --git a/src/engine/app/app.hpp b/src/engine/app/app.hpp index aebe993..0547361 100644 --- a/src/engine/app/app.hpp +++ b/src/engine/app/app.hpp @@ -2,7 +2,9 @@ #include "backend/core/vk_backend_ctx.hpp" #include "backend/presentation/vk_presenter.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" #include "engine/geometry/mesh_factory.hpp" +#include "engine/jobs/job_system.hpp" #include "platform/window/glfw_window.hpp" #include "render/renderer.hpp" @@ -51,12 +53,17 @@ class EngineApp { private: GlfwWindow m_window; + JobSystem m_jobs; VkBackendCtx m_ctx; VkPresenter m_presenter; Renderer m_renderer; MeshFactory m_meshes{m_renderer}; +#if defined(ENABLE_TELEMETRY) + profiling::Telemetry m_profTelemetry; +#endif + AppConfig m_cfg{}; bool m_inited = false; }; diff --git a/src/engine/assets/gltf/gltf_cpu_loader.cpp b/src/engine/assets/gltf/gltf_cpu_loader.cpp index 6abbfb4..07be70f 100644 --- a/src/engine/assets/gltf/gltf_cpu_loader.cpp +++ b/src/engine/assets/gltf/gltf_cpu_loader.cpp @@ -119,7 +119,7 @@ static std::string baseColorUri(const cgltf_material *material) { } const cgltf_image *img = tex->image; - if (img->uri != nullptr) { + if (img->uri == nullptr) { return {}; } diff --git a/src/engine/jobs/CMakeLists.txt b/src/engine/jobs/CMakeLists.txt new file mode 100644 index 0000000..ae4746c --- /dev/null +++ b/src/engine/jobs/CMakeLists.txt @@ -0,0 +1,10 @@ +add_library(quark_engine_jobs STATIC + job_system.cpp +) + +target_include_directories(quark_engine_jobs + PUBLIC + ${CMAKE_SOURCE_DIR}/src +) + +add_library(quark::engine::jobs ALIAS quark_engine_jobs) diff --git a/src/engine/jobs/job_system.cpp b/src/engine/jobs/job_system.cpp new file mode 100644 index 0000000..266eeb1 --- /dev/null +++ b/src/engine/jobs/job_system.cpp @@ -0,0 +1,179 @@ +#include "engine/jobs/job_system.hpp" + +#include +#include +#include +#include +#include +#include +#include + +thread_local uint32_t JobSystem::s_tlsWorkerIndex = + JobSystem::kInvalidWorkerIndex; + +uint32_t JobSystem::currentWorkerIndex() noexcept { return s_tlsWorkerIndex; } + +bool JobSystem::init(uint32_t threadCount) { + shutdown(); + + // m_hooks = hooks; + + uint32_t hc = std::max(1U, std::thread::hardware_concurrency()); + if (threadCount == 0) { + threadCount = hc; + } + threadCount = std::max(1U, threadCount); + + m_threadCount = threadCount; + m_stop.store(false, std::memory_order_relaxed); + m_running.store(true, std::memory_order_release); + +#if defined(ENABLE_TELEMETRY) + m_workerTelemetry.clear(); + m_workerTelemetry.reserve(threadCount); + for (uint32_t i = 0; i < threadCount; ++i) { + m_workerTelemetry.emplace_back(std::make_unique()); + } +#endif + + m_workers.reserve(threadCount); + for (uint32_t i = 0; i < threadCount; ++i) { + m_workers.emplace_back([this, i] { workerMain(i); }); + } + + return true; +} + +void JobSystem::shutdown() noexcept { + if (!m_running.exchange(false, std::memory_order_acq_rel)) { + return; + } + + m_stop.store(true, std::memory_order_release); + + // Wake all workers to observe stop + m_queueCv.notify_all(); + for (std::thread &thread : m_workers) { + if (thread.joinable()) { + thread.join(); + } + } + m_workers.clear(); + + // Drop remaining jobs + { + std::lock_guard lock(m_queueMutex); + m_queue.clear(); + } + + m_threadCount = 0; + m_stop.store(false, std::memory_order_relaxed); +} + +void JobSystem::enqueue(JobFn fn) { + if (!fn) { + return; + } + + // Execute inline to avoid silent losses + if (!m_running.load(std::memory_order_acquire)) { + fn(); + return; + } + + m_jobsSubmitted.fetch_add(1, std::memory_order_relaxed); + + { + std::lock_guard lock(m_queueMutex); + m_queue.push_back(Job{std::move(fn)}); + } + m_queueCv.notify_one(); +} + +void JobSystem::Group::enqueue(JobFn fn) { + if (!fn || m_sys == nullptr) { + return; + } + + m_pending.fetch_add(1, std::memory_order_relaxed); + + m_sys->enqueue([this, f = std::move(fn)]() mutable { + f(); + + if (m_pending.fetch_sub(1, std::memory_order_acq_rel) == 1) { + std::lock_guard lock(m_waitMutex); + m_waitCv.notify_all(); + } + }); +} + +void JobSystem::Group::wait() { + if (m_pending.load(std::memory_order_acquire) == 0) { + return; + } + + std::unique_lock lock(m_waitMutex); + m_waitCv.wait( + lock, [this] { return m_pending.load(std::memory_order_acquire) == 0; }); +} + +bool JobSystem::popJob(Job &out) { + std::unique_lock lock(m_queueMutex); + + m_queueCv.wait(lock, [this] { + return m_stop.load(std::memory_order_acquire) || !m_queue.empty(); + }); + + if (m_stop.load(std::memory_order_acquire)) { + return false; + } + + if (m_queue.empty()) { + return false; + } + + out = std::move(m_queue.front()); + m_queue.pop_front(); + return true; +} + +void JobSystem::workerMain(uint32_t workerIndex) { + s_tlsWorkerIndex = workerIndex; + +#if defined(ENABLE_TELEMETRY) + profiling::setTlsTelemetry(m_workerTelemetry[workerIndex].get()); +#endif + + if (m_hooks.onWorkerStart != nullptr) { + m_hooks.onWorkerStart(workerIndex); + } + + for (;;) { + if (m_stop.load(std::memory_order_acquire)) { + return; + } + + Job job{}; + if (!popJob(job)) { + if (m_stop.load(std::memory_order_acquire)) { + return; + } + continue; + } + + if (job.fn) { + job.fn(); + m_jobsExecuted.fetch_add(1, std::memory_order_release); + } + } + + if (m_hooks.onWorkerStop != nullptr) { + m_hooks.onWorkerStop(workerIndex); + } + +#if defined(ENABLE_TELEMETRY) + profiling::setTlsTelemetry(nullptr); +#endif + + s_tlsWorkerIndex = kInvalidWorkerIndex; +} diff --git a/src/engine/jobs/job_system.hpp b/src/engine/jobs/job_system.hpp new file mode 100644 index 0000000..7c077db --- /dev/null +++ b/src/engine/jobs/job_system.hpp @@ -0,0 +1,147 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(ENABLE_TELEMETRY) +#include "backend/profiling/telemetry/telemetry.hpp" +#endif + +class JobSystem { +public: + struct Hooks { + void (*onWorkerStart)(uint32_t workerIndex) = nullptr; + void (*onWorkerStop)(uint32_t workerIndex) = nullptr; + }; + + using JobFn = std::function; + + static constexpr uint32_t kInvalidWorkerIndex = 0xFFFF'FFFFU; + + JobSystem() = default; + ~JobSystem() noexcept { shutdown(); } + + JobSystem(const JobSystem &) = delete; + JobSystem &operator=(const JobSystem &) = delete; + + JobSystem(JobSystem &&) = delete; + JobSystem &operator=(JobSystem &&) = delete; + + bool init(uint32_t threadCount = 0); + // bool init(uint32_t threadCount = 0, Hooks hooks); + void shutdown() noexcept; + + [[nodiscard]] bool initialized() const noexcept { return m_running.load(); } + [[nodiscard]] uint32_t threadCount() const noexcept { return m_threadCount; } + + // TLS worker index for the calling thread + // - Worker threads crated by this JobSystem: 0..threadCount-1 + // - Any other thread (main thread, foreign thread): kInvalidWorkerIndex + [[nodiscard]] static uint32_t currentWorkerIndex() noexcept; + + void enqueue(JobFn fn); + + // A join point for a set of jobs + class Group { + public: + Group() = default; + ~Group() noexcept = default; + + Group(const Group &) = delete; + Group &operator=(const Group &) = delete; + + Group(Group &&) noexcept = delete; + Group &operator=(Group &&) noexcept = delete; + + // increments pending + void enqueue(JobFn fn); + + // waits until pending == 0 + void wait(); + + [[nodiscard]] uint32_t pending() const noexcept { + return m_pending.load(std::memory_order_relaxed); + } + + private: + friend class JobSystem; + explicit Group(JobSystem *sys) : m_sys(sys) {} + + JobSystem *m_sys = nullptr; + std::atomic m_pending{0}; + std::mutex m_waitMutex; + std::condition_variable m_waitCv; + }; + + [[nodiscard]] Group makeGroup() { return Group(this); } + + // Splits [0, count) into chunks of size grain. + template void parallel_for(uint32_t count, uint32_t grain, F &&fn) { + if (count == 0) { + return; + } + + if (grain == 0) { + grain = 1; + } + + auto group = makeGroup(); + for (uint32_t start = 0; start < count; start += grain) { + uint32_t end = (start + grain < count) ? (start + grain) : count; + group.enqueue([start, end, func = std::forward(fn)]() mutable { + for (uint32_t i = start; i < end; ++i) { + func(i); + } + }); + } + + group.wait(); + } + + [[nodiscard]] uint64_t jobsSubmitted() const noexcept { + return m_jobsSubmitted.load(std::memory_order_relaxed); + } + + [[nodiscard]] uint64_t jobsExecuted() const noexcept { + return m_jobsExecuted.load(std::memory_order_relaxed); + } + +private: + Hooks m_hooks{}; + + struct Job { + JobFn fn; + }; + + bool popJob(Job &out); + void workerMain(uint32_t workerIndex); + + // Queue + std::mutex m_queueMutex; + std::condition_variable m_queueCv; + std::deque m_queue; + + std::vector m_workers; + uint32_t m_threadCount = 0; + + std::atomic m_running{false}; + std::atomic m_stop{false}; + + // Debug counters + std::atomic m_jobsSubmitted{0}; + std::atomic m_jobsExecuted{0}; + + static thread_local uint32_t s_tlsWorkerIndex; + +#if defined(ENABLE_TELEMETRY) + std::vector> m_workerTelemetry; +#endif +}; diff --git a/src/engine/logging/log.cpp b/src/engine/logging/log.cpp index 9340864..f722170 100644 --- a/src/engine/logging/log.cpp +++ b/src/engine/logging/log.cpp @@ -14,7 +14,7 @@ namespace log { // Pattern: time | level | thread | logger | msg static constexpr const char *kPattern = - "%Y-%m-%d %H:%M:%S.%e | %^%l%$ | t:%t | %n | %s:%# | %v"; + "%Y-%m-%d %H:%M:%S.%e | %^%l%$ | t:%t | %s:%# | %v"; static std::shared_ptr quark; @@ -61,7 +61,12 @@ void shutdown() { spdlog::drop_all(); } -std::shared_ptr &engine() { return quark; } +std::shared_ptr &engine() { + if (!quark) { + init(); + } + return quark; +} std::shared_ptr get(std::string_view name) { if (!quark) { diff --git a/src/engine/logging/log.hpp b/src/engine/logging/log.hpp index c38e9b7..8483b39 100644 --- a/src/engine/logging/log.hpp +++ b/src/engine/logging/log.hpp @@ -10,40 +10,11 @@ void init(); void shutdown(); std::shared_ptr &engine(); - std::shared_ptr get(std::string_view name); - -inline std::shared_ptr render() { return get("Render"); } -inline std::shared_ptr backend() { return get("Backend"); } - } // namespace log -// #ifndef LOG_TU_LOGGER -// #define LOG_TU_LOGGER() ::log::engine() -// #endif - -#define DEFINE_TU_LOGGER(name) \ - static inline std::shared_ptr ThisLogger() { \ - static auto lg = ::log::get(name); \ - return lg; \ - } \ - static_assert(true) +#define LOG_TU_LOGGER() (::log::engine()) -// Default (Engine) logger -#define LOG_TRACE(...) ::log::engine()->trace(__VA_ARGS__) -#define LOG_DEBUG(...) ::log::engine()->debug(__VA_ARGS__) -#define LOG_INFO(...) ::log::engine()->info(__VA_ARGS__) -#define LOG_WARN(...) \ - ::log::engine()->warn( \ - spdlog::source_loc{__FILE__, __LINE__, SPDLOG_FUNCTION}, __VA_ARGS__) -#define LOG_ERROR(...) \ - ::log::engine()->error( \ - spdlog::source_loc{__FILE__, __LINE__, SPDLOG_FUNCTION}, __VA_ARGS__) -#define LOG_CRIT(...) \ - ::log::engine()->critical( \ - spdlog::source_loc{__FILE__, __LINE__, SPDLOG_FUNCTION}, __VA_ARGS__) - -// Logger-explicit #define LOGT(...) (LOG_TU_LOGGER())->trace(__VA_ARGS__) #define LOGD(...) (LOG_TU_LOGGER())->debug(__VA_ARGS__) #define LOGI(...) (LOG_TU_LOGGER())->info(__VA_ARGS__) @@ -51,12 +22,10 @@ inline std::shared_ptr backend() { return get("Backend"); } (LOG_TU_LOGGER()) \ ->log(spdlog::source_loc{__FILE__, __LINE__, SPDLOG_FUNCTION}, \ spdlog::level::warn, __VA_ARGS__) - #define LOGE(...) \ (LOG_TU_LOGGER()) \ ->log(spdlog::source_loc{__FILE__, __LINE__, SPDLOG_FUNCTION}, \ spdlog::level::err, __VA_ARGS__) - #define LOGC(...) \ (LOG_TU_LOGGER()) \ ->log(spdlog::source_loc{__FILE__, __LINE__, SPDLOG_FUNCTION}, \ diff --git a/src/main.cpp b/src/main.cpp index 98557aa..a8aad18 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -22,12 +22,12 @@ // TODO: fix bad API class UploadScope { public: - UploadScope(Renderer &r, uint32_t frameIndex) : m_r(&r), m_ok(false) { - m_ok = m_r->beginUpload(frameIndex); + UploadScope(Renderer &r) : m_r(&r), m_ok(false) { + m_ok = m_r->beginStaticUploads(); } ~UploadScope() { if (m_ok) { - (void)m_r->endUpload(/*wait=*/false); + (void)m_r->endStaticUploads(/*wait=*/false); } } explicit operator bool() const noexcept { return m_ok; } @@ -65,7 +65,7 @@ static void pushCubeGrid(std::vector &out, MeshHandle mesh, int main() { log::init(); - LOG_INFO("Engine starting..."); + LOGI("Engine starting..."); EngineApp app; AppConfig cfg{}; @@ -91,7 +91,7 @@ int main() { uint32_t material = UINT32_MAX; { - UploadScope up(app.renderer(), 0); + UploadScope up(app.renderer()); if (!up) { std::cerr << "Failed to begin upload\n"; } diff --git a/src/render/CMakeLists.txt b/src/render/CMakeLists.txt index 25a75c7..a79c706 100644 --- a/src/render/CMakeLists.txt +++ b/src/render/CMakeLists.txt @@ -21,7 +21,9 @@ target_link_libraries(quark_render quark::backend::frame quark::backend::presentation quark::backend::graphics + quark::backend::profiling + quark::backend::profiling::profilers quark::render::rendergraph quark::render::resources @@ -32,6 +34,7 @@ target_link_libraries(quark_render quark::backend::gpu::upload quark::engine::camera + quark::engine::jobs PRIVATE quark::engine::geometry ) diff --git a/src/render/renderer.cpp b/src/render/renderer.cpp index fffc934..1a4c06f 100644 --- a/src/render/renderer.cpp +++ b/src/render/renderer.cpp @@ -1,13 +1,15 @@ #include "renderer.hpp" #include "backend/core/vk_backend_ctx.hpp" +#include "backend/gpu/upload/vk_upload_context.hpp" #include "backend/presentation/vk_presenter.hpp" -#include "backend/profiling/cpu_profiler.hpp" -#include "backend/profiling/profiling_logger.hpp" -#include "backend/profiling/vk_gpu_profiler.hpp" +#include "backend/profiling/logging/profiling_logger.hpp" +#include "backend/profiling/profilers/vk_gpu_profiler.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" #include "engine/geometry/transform.hpp" +#include "engine/jobs/job_system.hpp" #include "engine/mesh/mesh_data.hpp" #include "render/rendergraph/swapchain_targets.hpp" @@ -33,23 +35,20 @@ #include #include -DEFINE_TU_LOGGER("Render.Renderer"); -#define LOG_TU_LOGGER() ThisLogger() - static constexpr VkDeviceSize kMiB = 1024ULL * 1024ULL; // 8 MiB -static constexpr VkDeviceSize kUploadStaticBudgetPerFrame = 8ULL * kMiB; +static constexpr VkDeviceSize kUploadStaticBudget = 8ULL * kMiB; // 2 MiB -static constexpr VkDeviceSize kUploadFrameBudgetPerFrame = 2ULL * kMiB; +static constexpr VkDeviceSize kUploadFrameBudget = 2ULL * kMiB; static constexpr uint32_t kRequestedMaxInstancesPerFrame = 16U * 1024U; static constexpr uint32_t kRequestedMaxMaterials = 1024U; bool Renderer::init(VkBackendCtx &ctx, VkPresenter &presenter, uint32_t framesInFlight, const std::string &vertSpvPath, - const std::string &fragSpvPath) { + const std::string &fragSpvPath, JobSystem &jobs) { if (ctx.device() == VK_NULL_HANDLE || ctx.physicalDevice() == VK_NULL_HANDLE || ctx.graphicsQueue() == VK_NULL_HANDLE || @@ -66,15 +65,17 @@ bool Renderer::init(VkBackendCtx &ctx, VkPresenter &presenter, shutdown(); m_ctx = &ctx; + m_jobs = &jobs; m_framesInFlight = framesInFlight; m_vertPath = vertSpvPath; m_fragPath = fragSpvPath; - LOGI("Renderer initialized: framesInFlight={} | shaders: vert='{}' frag='{}' " + LOGI("Renderer initialized: framesInFlight={} | threadCount: {} | shaders: " + "vert='{}' frag='{}' " "| " "uploadMiB: static={} frame={} | caps: instances={} materials={}", - framesInFlight, vertSpvPath, fragSpvPath, - kUploadStaticBudgetPerFrame / kMiB, kUploadFrameBudgetPerFrame / kMiB, + framesInFlight, m_jobs->threadCount(), vertSpvPath, fragSpvPath, + kUploadStaticBudget / kMiB, kUploadFrameBudget / kMiB, kRequestedMaxInstancesPerFrame, kRequestedMaxMaterials); VkDevice device = m_ctx->device(); @@ -115,35 +116,28 @@ bool Renderer::init(VkBackendCtx &ctx, VkPresenter &presenter, } LOGI("Main render pass initialized"); - if (!m_uploads.init(*m_ctx, m_framesInFlight, kUploadStaticBudgetPerFrame, - kUploadFrameBudgetPerFrame, &m_uploadProfiler)) { + // TODO: use job system workers instead of hard setting to 1 thread + if (!m_uploads.init(*m_ctx, m_framesInFlight, kUploadStaticBudget, + kUploadFrameBudget, m_jobs->threadCount())) { LOGE("Failed to initialize upload manager"); shutdown(); return false; } - if (!m_uploads.beginFrame(0)) { + if (!m_uploads.beginStatic()) { LOGE("Failed to begin upload frame"); shutdown(); return false; } if (!m_scene.init(*m_ctx, m_framesInFlight, m_interface, - kRequestedMaxInstancesPerFrame, kRequestedMaxMaterials, - &m_uploadProfiler)) { + kRequestedMaxInstancesPerFrame, kRequestedMaxMaterials)) { LOGE("Failed to initialize scene data"); shutdown(); return false; } - if (!m_scene.rebindUpload(m_uploads.frame(), &m_uploadProfiler)) { - LOGE("Failed to bind scene uploader"); - shutdown(); - return false; - } - - if (!m_resources.init(*m_ctx, m_uploads.statik(), m_interface, m_scene, - &m_uploadProfiler)) { + if (!m_resources.init(*m_ctx, m_interface, m_scene)) { LOGE("Failed to initialize resources store"); shutdown(); return false; @@ -153,7 +147,9 @@ bool Renderer::init(VkBackendCtx &ctx, VkPresenter &presenter, m_scene.materialCapacity()); // Create a 1x1 default white texture and material - if (!m_resources.materials().createDefaultMaterial()) { + // TOOD: use job system worker instead of hardcoding 0 + if (!m_resources.materials().createDefaultMaterial( + m_uploads.staticRecorder(2))) { LOGE("Failed to create the default material"); shutdown(); return false; @@ -290,7 +286,7 @@ void Renderer::recordFrame(VkCommandBuffer cmd, VkPresenter &presenter, vkCmdBeginRendering(cmd, &renderingInfo); vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, m_mainPass.pipeline()); - m_cpuProfiler.incPipelineBinds(1); + PROFILE_CPU_INC_PIPELINE_BINDS(1); // Viewport / scissor VkViewport viewport{}; @@ -308,7 +304,7 @@ void Renderer::recordFrame(VkCommandBuffer cmd, VkPresenter &presenter, vkCmdSetScissor(cmd, 0, 1, &scissor); m_scene.bind(cmd, m_interface, m_frames.currentFrameIndex()); - m_cpuProfiler.incDescriptorBinds(1); + PROFILE_CPU_INC_DESCRIPTOR_BINDS(1); // TODO: sort by mesh, material and stream directly into the uploader // without building vectors per batch @@ -351,6 +347,13 @@ void Renderer::drawBatches(VkCommandBuffer cmd, uint32_t frameIndex, uint32_t cursor = 0; // mat4 units within frame slice + // TODO: parallelize batching with each job having its own workerIndex + VkUploadContext::Recorder rec = m_uploads.frameRecorder(/*threadIndex=*/0); + if (!rec) { + LOGW("Frame recorder invalid (frameIndex={})", frameIndex); + return; + } + for (const auto &[key, models] : batches) { const MeshGpu *mesh = m_resources.meshes().get(key.mesh); if (mesh == nullptr) { @@ -359,18 +362,18 @@ void Renderer::drawBatches(VkCommandBuffer cmd, uint32_t frameIndex, std::span modelsSpan(models.data(), models.size()); auto instanceUpload = - m_scene.uploadInstances(frameIndex, cursor, modelsSpan); + m_scene.uploadInstances(rec, frameIndex, cursor, modelsSpan); if (!instanceUpload) { continue; } const uint32_t instanceCount = instanceUpload.instanceCount; - m_cpuProfiler.addInstances(instanceCount); + PROFILE_CPU_ADD_INSTANCES(instanceCount); m_resources.materials().bindMaterial(cmd, m_interface.pipelineLayout(), 1, key.material); - m_cpuProfiler.incDescriptorBinds(1); + PROFILE_CPU_INC_DESCRIPTOR_BINDS(1); DrawPushConstants pushConstants{}; pushConstants.baseInstance = instanceUpload.baseInstance; @@ -384,27 +387,24 @@ void Renderer::drawBatches(VkCommandBuffer cmd, uint32_t frameIndex, VkBuffer vertBuf = mesh->vertex.handle(); vkCmdBindVertexBuffers(cmd, 0, 1, &vertBuf, &vertBufOffset); +#if defined(ENABLE_TELEMETRY) + const uint64_t trianglesPerInstance = + static_cast(mesh->indexCount) / 3ULL; + const uint64_t triangles = + trianglesPerInstance * static_cast(instanceCount); +#endif + if (mesh->indexed()) { VkDeviceSize indexBufOffset = 0; vkCmdBindIndexBuffer(cmd, mesh->index.handle(), indexBufOffset, mesh->indexType); vkCmdDrawIndexed(cmd, mesh->indexCount, instanceCount, 0, 0, 0); - m_cpuProfiler.incDrawCalls(1); - - const uint64_t trianglesPerInstance = - static_cast(mesh->indexCount) / 3ULL; - const uint64_t triangles = - trianglesPerInstance * static_cast(instanceCount); - m_cpuProfiler.addTriangles(triangles); + PROFILE_CPU_INC_DRAW_CALLS(1); + PROFILE_CPU_ADD_TRIANGLES(triangles); } else { vkCmdDraw(cmd, mesh->vertexCount, instanceCount, 0, 0); - m_cpuProfiler.incDrawCalls(1); - - const uint64_t trianglesPerInstance = - static_cast(mesh->vertexCount) / 3ULL; - const uint64_t triangles = - trianglesPerInstance * static_cast(instanceCount); - m_cpuProfiler.addTriangles(triangles); + PROFILE_CPU_INC_DRAW_CALLS(1); + PROFILE_CPU_ADD_TRIANGLES(triangles); } } } @@ -420,13 +420,25 @@ bool Renderer::drawFrame(VkPresenter &presenter, MeshHandle mesh) { bool Renderer::drawFrame(VkPresenter &presenter, std::span items) { auto endGuard = makeScopeExit([&] { - m_cpuProfiler.endFrame(); - m_uploadProfiler.endFrame(); - m_profileReporter.logPerFrame(m_cpuProfiler, m_gpuProfiler, - m_uploadProfiler); +#if defined(ENABLE_TELEMETRY) + auto *c = profiling::cpuPtr(); + auto *u = profiling::uploadPtr(); + + if (c) { + c->endInterval(); + } + + if (u) { + u->endInterval(); + } + + if (c && u) { + m_profileReporter.logPerFrame(c, m_gpuProfiler, u); + } +#endif }); - CpuProfiler::Scope frameScope(m_cpuProfiler, CpuProfiler::Stat::FrameTotal); + PROFILE_CPU_SCOPE(CpuProfiler::Stat::FrameTotal); if (m_ctx->device() == VK_NULL_HANDLE) { return false; @@ -437,8 +449,7 @@ bool Renderer::drawFrame(VkPresenter &presenter, uint32_t imageIndex = 0; FrameStatus st = FrameStatus::Ok; - st = m_frames.beginFrame(presenter.swapchain(), imageIndex, UINT64_MAX, - &m_cpuProfiler); + st = m_frames.beginFrame(presenter.swapchain(), imageIndex, UINT64_MAX); if (st == FrameStatus::OutOfDate) { (void)recreateSwapchainDependent(presenter, m_vertPath, m_fragPath); @@ -457,7 +468,7 @@ bool Renderer::drawFrame(VkPresenter &presenter, } { - CpuProfiler::Scope s(m_cpuProfiler, CpuProfiler::Stat::UpdatePerFrameUBO); + PROFILE_CPU_SCOPE(CpuProfiler::Stat::UpdatePerFrameUBO); (void)m_scene.update(frameIndex, m_cameraUbo); } @@ -465,24 +476,23 @@ bool Renderer::drawFrame(VkPresenter &presenter, vkResetCommandBuffer(cmd, 0); { - CpuProfiler::Scope s(m_cpuProfiler, CpuProfiler::Stat::RecordCmd); + PROFILE_CPU_SCOPE(CpuProfiler::Stat::RecordCmd); recordFrame(cmd, presenter, m_targets, imageIndex, items); } - if (!m_uploads.flushAll(false)) { - LOGW("Failed to flush"); + if (!m_uploads.flushFrame(false)) { + LOGW("Failed to flush frame uploads"); } - FrameStatus sub = m_frames.submit( - m_ctx->graphicsQueue(), imageIndex, cmd, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, &m_cpuProfiler); + FrameStatus sub = + m_frames.submit(m_ctx->graphicsQueue(), imageIndex, cmd, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); if (sub != FrameStatus::Ok) { return false; } - FrameStatus pst = - m_frames.present(m_ctx->graphicsQueue(), presenter.swapchain(), - imageIndex, &m_cpuProfiler); + FrameStatus pst = m_frames.present(m_ctx->graphicsQueue(), + presenter.swapchain(), imageIndex); m_gpuProfiler.onFrameSubmitted(); (void)m_gpuProfiler.tryCollect(frameIndex); @@ -536,12 +546,12 @@ bool Renderer::recreateSwapchainDependent(VkPresenter &presenter, MeshHandle Renderer::createMesh(const engine::Vertex *vertices, uint32_t vertexCount, const uint32_t *indices, uint32_t indexCount) { - return m_resources.meshes().createMesh(vertices, vertexCount, indices, - indexCount); + return m_resources.meshes().createMesh(m_uploads.staticRecorder(1), vertices, + vertexCount, indices, indexCount); } MeshHandle Renderer::createMesh(const engine::MeshData &mesh) { - return m_resources.meshes().createMesh(mesh); + return m_resources.meshes().createMesh(m_uploads.staticRecorder(1), mesh); } const MeshGpu *Renderer::get(MeshHandle handle) const { @@ -550,20 +560,28 @@ const MeshGpu *Renderer::get(MeshHandle handle) const { TextureHandle Renderer::createTextureFromFile(const std::string &path, bool flipY) { - return m_resources.materials().createTextureFromFile(path, flipY); + + return m_resources.materials().createTextureFromFile( + m_uploads.staticRecorder(2), path, flipY); } uint32_t Renderer::createMaterialFromTexture(TextureHandle handle) { - return m_resources.materials().createMaterialFromTexture(handle); + // TODO: make logic for if static or frame recorder + LOGI("Creating Material from texture"); + return m_resources.materials().createMaterialFromTexture( + m_uploads.staticRecorder(2), handle); } uint32_t Renderer::createMaterialFromBaseColorFactor(const glm::vec4 &factor) { - return m_resources.materials().createMaterialFromBaseColorFactor(factor); + // TODO: make logic for if static or frame recorder + return m_resources.materials().createMaterialFromBaseColorFactor( + m_uploads.staticRecorder(2), factor); } bool Renderer::createTextureFromImage(const engine::ImageData &img, VkTexture2D &outTex) { - return m_resources.materials().createTextureFromImage(img, outTex); + return m_resources.materials().createTextureFromImage( + m_uploads.staticRecorder(2), img, outTex); } void Renderer::setActiveMaterial(uint32_t materialIndex) { @@ -571,11 +589,17 @@ void Renderer::setActiveMaterial(uint32_t materialIndex) { } bool Renderer::updateMaterialGPU(uint32_t materialId, const MaterialGPU &gpu) { - return m_resources.materials().updateMaterialGPU(materialId, gpu); + // TODO: make logic for if static or frame recorder + return m_resources.materials().updateMaterialGPU(m_uploads.staticRecorder(3), + materialId, gpu); } bool Renderer::beginUpload(uint32_t frameIndex) { return m_uploads.beginFrame(frameIndex); } +bool Renderer::endUpload(bool wait) { return m_uploads.flushFrame(wait); } -bool Renderer::endUpload(bool wait) { return m_uploads.flushStatic(wait); } +bool Renderer::beginStaticUploads() { return m_uploads.beginStatic(); } +bool Renderer::endStaticUploads(bool wait) { + return m_uploads.flushStatic(wait); +} diff --git a/src/render/renderer.hpp b/src/render/renderer.hpp index 4f22ce8..7a42372 100644 --- a/src/render/renderer.hpp +++ b/src/render/renderer.hpp @@ -4,10 +4,8 @@ #include "backend/frame/vk_frame_manager.hpp" #include "backend/presentation/vk_presenter.hpp" -#include "backend/profiling/cpu_profiler.hpp" -#include "backend/profiling/profiling_logger.hpp" -#include "backend/profiling/upload_profiler.hpp" -#include "backend/profiling/vk_gpu_profiler.hpp" +#include "backend/profiling/logging/profiling_logger.hpp" +#include "backend/profiling/profilers/vk_gpu_profiler.hpp" #include "render/rendergraph/main_pass.hpp" #include "render/rendergraph/swapchain_targets.hpp" @@ -35,6 +33,7 @@ class VkPresenter; class VkBackendCtx; +class JobSystem; struct DrawItem { MeshHandle mesh{}; @@ -84,9 +83,7 @@ class Renderer { shutdown(); - m_cpuProfiler = std::move(other.m_cpuProfiler); m_gpuProfiler = std::move(other.m_gpuProfiler); - m_uploadProfiler = std::move(other.m_uploadProfiler); m_profileReporter = std::move(other.m_profileReporter); m_framesInFlight = std::exchange(other.m_framesInFlight, 0U); @@ -106,17 +103,12 @@ class Renderer { m_fragPath = std::exchange(other.m_fragPath, {}); m_cameraUbo = other.m_cameraUbo; - // Rebind uploader's inside stores to this renderer's command context - if (m_ctx != nullptr && m_ctx->device() != VK_NULL_HANDLE) { - (void)m_resources.rebind(*m_ctx, m_uploads.statik()); - (void)m_resources.rebind(*m_ctx, m_uploads.frame()); - } - return *this; } bool init(VkBackendCtx &ctx, VkPresenter &presenter, uint32_t framesInFlight, - const std::string &vertSpvPath, const std::string &fragSpvPath); + const std::string &vertSpvPath, const std::string &fragSpvPath, + JobSystem &jobs); void shutdown() noexcept; [[nodiscard]] bool drawFrame(VkPresenter &presenter, MeshHandle mesh); @@ -148,6 +140,9 @@ class Renderer { bool beginUpload(uint32_t frameIndex); bool endUpload(bool wait); + bool beginStaticUploads(); + bool endStaticUploads(bool wait); + // TODO: make PImpl private: bool createDefaultMaterial() noexcept; @@ -170,13 +165,12 @@ class Renderer { std::vector m_swapLayouts; - CpuProfiler m_cpuProfiler; VkGpuProfiler m_gpuProfiler; - UploadProfiler m_uploadProfiler; profiling::FrameLogger m_profileReporter{}; uint32_t m_framesInFlight = 0; VkBackendCtx *m_ctx = nullptr; // non-owning + JobSystem *m_jobs = nullptr; // non-owning SwapchainTargets m_targets; VkShaderInterface m_interface; diff --git a/src/render/rendergraph/swapchain_targets.cpp b/src/render/rendergraph/swapchain_targets.cpp index b643db4..48f428a 100644 --- a/src/render/rendergraph/swapchain_targets.cpp +++ b/src/render/rendergraph/swapchain_targets.cpp @@ -1,9 +1,9 @@ #include "render/rendergraph/swapchain_targets.hpp" #include "backend/presentation/vk_presenter.hpp" +#include "engine/logging/log.hpp" #include -#include #include bool SwapchainTargets::init(VkBackendCtx &ctx, VkPresenter &presenter) { @@ -11,12 +11,12 @@ bool SwapchainTargets::init(VkBackendCtx &ctx, VkPresenter &presenter) { const uint32_t imageCount = presenter.imageCount(); if (imageCount == 0) { - std::cerr << "[SwapchainTargets] presenter.imageCount() == 0\n"; + LOGE("presenter.imageCount() == 0"); return false; } if (!rebuildDepth(ctx, presenter.swapchainExtent(), imageCount)) { - std::cerr << "[SwapchainTargets] Failed to create depth images\n"; + LOGE("Depth images creation failed"); shutdown(); return false; } @@ -47,7 +47,7 @@ bool SwapchainTargets::recreateIfNeeded(VkBackendCtx &ctx, const VkExtent2D newExtent = presenter.swapchainExtent(); const uint32_t newImageCount = presenter.imageCount(); if (newImageCount == 0) { - std::cerr << "[SwapchainTargets] presenter.imageCount() == 0\n"; + LOGE("presenter.imageCount() == 0"); return false; } @@ -75,10 +75,8 @@ bool SwapchainTargets::rebuildDepth(VkBackendCtx &ctx, VkExtent2D extent, m_depthViews.resize(imageCount); for (uint32_t i = 0; i < imageCount; ++i) { - if (!m_depthImages[i].init(ctx.allocator(), ctx.physicalDevice(), - ctx.device(), extent)) { - std::cerr << "[SwapchainTargets] depth init failed at index " << i - << "\n"; + if (!m_depthImages[i].init(ctx, extent)) { + LOGE("Depth initialization failed at index {}", i); // partial cleanup for (auto &depth : m_depthImages) { diff --git a/src/render/resources/material_system.cpp b/src/render/resources/material_system.cpp index 58a0b86..6a698ff 100644 --- a/src/render/resources/material_system.cpp +++ b/src/render/resources/material_system.cpp @@ -1,8 +1,8 @@ #include "render/resources/material_system.hpp" #include "backend/gpu/upload/vk_upload_context.hpp" -#include "backend/profiling/upload_profiler.hpp" #include "engine/assets/stb_image/stb_image_loader.hpp" +#include "engine/logging/log.hpp" #include "render/resources/material_gpu.hpp" #include @@ -28,23 +28,18 @@ uint32_t clampMateriaCapacity(VkPhysicalDevice physicalDevice, } // namespace -bool MaterialSystem::init(VkBackendCtx &ctx, VkUploadContext &upload, +bool MaterialSystem::init(VkBackendCtx &ctx, VkDescriptorSetLayout materialSetLayout, - uint32_t materialCapacity, UploadProfiler *profiler) { + uint32_t materialCapacity) { shutdown(); - m_uploaderProfiler = profiler; - - VkDevice device = ctx.device(); - VmaAllocator allocator = ctx.allocator(); - - if (!m_textureUploader.init(allocator, device, &upload, m_uploaderProfiler)) { + if (!m_textureUploader.init(ctx)) { std::cerr << "[MaterialSystem] Failed to init texture uploader\n"; shutdown(); return false; } - if (!m_materialUploader.init(&upload, m_uploaderProfiler)) { + if (!m_materialUploader.init()) { std::cerr << "[MaterialSystem] Failed to init material uploader\n"; shutdown(); return false; @@ -52,14 +47,13 @@ bool MaterialSystem::init(VkBackendCtx &ctx, VkUploadContext &upload, const uint32_t cappedCapacity = clampMateriaCapacity(ctx.physicalDevice(), materialCapacity); - if (cappedCapacity == 0) { std::cerr << "[MaterialSystem] Material capacity invalid after clamp\n"; shutdown(); return false; } - if (!m_materialSets.init(device, materialSetLayout, materialCapacity)) { + if (!m_materialSets.init(ctx.device(), materialSetLayout, materialCapacity)) { std::cerr << "[MaterialSystem] Failed to init material sets\n"; shutdown(); return false; @@ -85,16 +79,16 @@ void MaterialSystem::shutdown() noexcept { m_defaultMaterial = UINT32_MAX; m_activeMaterial = UINT32_MAX; - - m_uploaderProfiler = nullptr; } -bool MaterialSystem::createDefaultMaterial() noexcept { +bool MaterialSystem::createDefaultMaterial( + VkUploadContext::Recorder staticRecorder) noexcept { VkTexture2D tex; static constexpr std::array kWhiteRGBA8{255, 255, 255, 255}; - if (!m_textureUploader.uploadRGBA8(kWhiteRGBA8.data(), 1, 1, tex)) { + if (!m_textureUploader.uploadRGBA8(staticRecorder, kWhiteRGBA8.data(), 1, 1, + tex)) { std::cerr << "[MaterialSystem] Failed to create default white texture\n"; return false; } @@ -102,7 +96,7 @@ bool MaterialSystem::createDefaultMaterial() noexcept { m_textures.push_back(std::move(tex)); m_whiteTexture = TextureHandle{static_cast(m_textures.size() - 1)}; - m_defaultMaterial = createMaterialFromTexture(m_whiteTexture); + m_defaultMaterial = createMaterialFromTexture(staticRecorder, m_whiteTexture); if (m_defaultMaterial == UINT32_MAX) { std::cerr << "[MaterialSystem] Failed to create default material\n"; return false; @@ -112,8 +106,9 @@ bool MaterialSystem::createDefaultMaterial() noexcept { return true; } -TextureHandle MaterialSystem::createTextureFromFile(const std::string &path, - bool flipY) { +TextureHandle +MaterialSystem::createTextureFromFile(VkUploadContext::Recorder staticRecorder, + const std::string &path, bool flipY) { engine::ImageData img; if (!engine::assets::loadImageRGBA8(path, img, flipY)) { std::cerr << "[MaterialSystem] Failed to load image: " << path << "\n"; @@ -121,8 +116,8 @@ TextureHandle MaterialSystem::createTextureFromFile(const std::string &path, } VkTexture2D tex; - if (!m_textureUploader.uploadRGBA8(img.pixels.data(), img.width, img.height, - tex)) { + if (!m_textureUploader.uploadRGBA8(staticRecorder, img.pixels.data(), + img.width, img.height, tex)) { std::cerr << "[MaterialSystem] Failed to create texture from file\n"; return {}; } @@ -131,8 +126,9 @@ TextureHandle MaterialSystem::createTextureFromFile(const std::string &path, return TextureHandle{static_cast(m_textures.size() - 1)}; } -bool MaterialSystem::createTextureFromImage(const engine::ImageData &img, - VkTexture2D &outTex) { +bool MaterialSystem::createTextureFromImage( + VkUploadContext::Recorder staticRecorder, const engine::ImageData &img, + VkTexture2D &outTex) { if (!img.valid()) { std::cerr << "[MaterialSystem] createTextureFromImage invalid image\n"; return false; @@ -145,12 +141,13 @@ bool MaterialSystem::createTextureFromImage(const engine::ImageData &img, return false; } - return m_textureUploader.uploadRGBA8(img.pixels.data(), img.width, img.height, - outTex); + return m_textureUploader.uploadRGBA8(staticRecorder, img.pixels.data(), + img.width, img.height, outTex); } uint32_t -MaterialSystem::createMaterialFromTexture(TextureHandle textureHandle) { +MaterialSystem::createMaterialFromTexture(VkUploadContext::Recorder recorder, + TextureHandle textureHandle) { if (textureHandle.id >= m_textures.size() || !m_textures[textureHandle.id].valid()) { std::cerr << "[MaterialSystem] Invalid texture handle\n"; @@ -165,7 +162,7 @@ MaterialSystem::createMaterialFromTexture(TextureHandle textureHandle) { MaterialGPU gpu; - if (!writeMaterialGPU(id, gpu)) { + if (!writeMaterialGPU(recorder, id, gpu)) { std::cerr << "[MaterialSystem] Failed to write material GPU table\n"; return UINT32_MAX; } @@ -173,8 +170,8 @@ MaterialSystem::createMaterialFromTexture(TextureHandle textureHandle) { return id; } -uint32_t -MaterialSystem::createMaterialFromBaseColorFactor(const glm::vec4 &factor) { +uint32_t MaterialSystem::createMaterialFromBaseColorFactor( + VkUploadContext::Recorder recorder, const glm::vec4 &factor) { if (m_whiteTexture.id == UINT32_MAX || m_whiteTexture.id >= m_textures.size() || !m_textures[m_whiteTexture.id].valid()) { @@ -191,7 +188,7 @@ MaterialSystem::createMaterialFromBaseColorFactor(const glm::vec4 &factor) { MaterialGPU gpu{}; gpu.baseColorFactor = factor; - if (!writeMaterialGPU(id, gpu)) { + if (!writeMaterialGPU(recorder, id, gpu)) { std::cerr << "[MaterialSystem] Failed to write material GPU table\n"; return UINT32_MAX; } @@ -228,7 +225,8 @@ void MaterialSystem::bindMaterialTable(VkBuffer materialTableBuffer, m_materialTableCapacity = maxMaterialsInTable; } -bool MaterialSystem::writeMaterialGPU(uint32_t materialId, +bool MaterialSystem::writeMaterialGPU(VkUploadContext::Recorder recorder, + uint32_t materialId, const MaterialGPU &gpu) { if (m_materialTable == VK_NULL_HANDLE) { std::cerr << "[MaterialSystem] Material table not bound\n"; @@ -241,10 +239,12 @@ bool MaterialSystem::writeMaterialGPU(uint32_t materialId, } const VkDeviceSize dstOffset = VkDeviceSize(materialId) * sizeof(MaterialGPU); - return m_materialUploader.uploadOne(m_materialTable, dstOffset, gpu); + return m_materialUploader.uploadOne(recorder, m_materialTable, dstOffset, + gpu); } -bool MaterialSystem::updateMaterialGPU(uint32_t materialId, +bool MaterialSystem::updateMaterialGPU(VkUploadContext::Recorder recorder, + uint32_t materialId, const MaterialGPU &gpu) { - return writeMaterialGPU(materialId, gpu); + return writeMaterialGPU(recorder, materialId, gpu); } diff --git a/src/render/resources/material_system.hpp b/src/render/resources/material_system.hpp index a91fb2f..9829319 100644 --- a/src/render/resources/material_system.hpp +++ b/src/render/resources/material_system.hpp @@ -13,25 +13,26 @@ #include #include -class UploadProfiler; - struct TextureHandle { uint32_t id = UINT32_MAX; }; class MaterialSystem { public: - bool init(VkBackendCtx &ctx, VkUploadContext &upload, - VkDescriptorSetLayout materialSetLayout, uint32_t materialCapacity, - UploadProfiler *profiler = nullptr); + bool init(VkBackendCtx &ctx, VkDescriptorSetLayout materialSetLayout, + uint32_t materialCapacity); void shutdown() noexcept; - TextureHandle createTextureFromFile(const std::string &path, bool flipY); - bool createTextureFromImage(const engine::ImageData &img, + TextureHandle createTextureFromFile(VkUploadContext::Recorder staticRec, + const std::string &path, bool flipY); + bool createTextureFromImage(VkUploadContext::Recorder staticRec, + const engine::ImageData &img, VkTexture2D &outTex); - uint32_t createMaterialFromTexture(TextureHandle textureHandle); - uint32_t createMaterialFromBaseColorFactor(const glm::vec4 &factor); + uint32_t createMaterialFromTexture(VkUploadContext::Recorder recorder, + TextureHandle textureHandle); + uint32_t createMaterialFromBaseColorFactor(VkUploadContext::Recorder recorder, + const glm::vec4 &factor); void setActiveMaterial(uint32_t materialIndex); [[nodiscard]] uint32_t setActiveMaterial() const { return m_activeMaterial; } @@ -42,22 +43,16 @@ class MaterialSystem { void bindMaterialTable(VkBuffer materialTableBuffer, uint32_t maxMaterialsInTable) noexcept; - bool updateMaterialGPU(uint32_t materialId, const MaterialGPU &gpu); + bool updateMaterialGPU(VkUploadContext::Recorder recorder, + uint32_t materialId, const MaterialGPU &gpu); - bool createDefaultMaterial() noexcept; + bool createDefaultMaterial(VkUploadContext::Recorder staticRec) noexcept; [[nodiscard]] uint32_t resolveMaterial(uint32_t overrideMaterial) const; - bool rebind(VkBackendCtx &ctx, VkUploadContext &upload) { - const bool okTex = m_textureUploader.init(ctx.allocator(), ctx.device(), - &upload, m_uploaderProfiler); - const bool okMat = m_materialUploader.init(&upload, m_uploaderProfiler); - - return okTex && okMat; - } - private: - bool writeMaterialGPU(uint32_t materialId, const MaterialGPU &gpu); + bool writeMaterialGPU(VkUploadContext::Recorder recorder, uint32_t materialId, + const MaterialGPU &gpu); VkTextureUploader m_textureUploader; VkMaterialUploader m_materialUploader; @@ -72,6 +67,4 @@ class MaterialSystem { TextureHandle m_whiteTexture{UINT32_MAX}; uint32_t m_activeMaterial = UINT32_MAX; - - UploadProfiler *m_uploaderProfiler = nullptr; // non-owning }; diff --git a/src/render/resources/mesh_store.cpp b/src/render/resources/mesh_store.cpp index 640c69a..19c7488 100644 --- a/src/render/resources/mesh_store.cpp +++ b/src/render/resources/mesh_store.cpp @@ -1,17 +1,13 @@ #include "render/resources/mesh_store.hpp" #include "backend/gpu/upload/vk_upload_context.hpp" -#include "backend/profiling/upload_profiler.hpp" #include -bool MeshStore::init(VkBackendCtx &ctx, VkUploadContext &upload, - UploadProfiler *profiler) { +bool MeshStore::init(VkBackendCtx &ctx) { shutdown(); - m_uploaderProfiler = profiler; - - if (!m_uploader.init(ctx.allocator(), &upload, m_uploaderProfiler)) { + if (!m_uploader.init(ctx.allocator())) { std::cerr << "[MeshStore] Failed to init uploader\n"; shutdown(); return false; @@ -27,33 +23,39 @@ void MeshStore::shutdown() noexcept { m_meshes.clear(); m_uploader.shutdown(); - m_uploaderProfiler = nullptr; } -MeshHandle MeshStore::createMesh(const engine::Vertex *vertices, +MeshHandle MeshStore::createMesh(VkUploadContext::Recorder staticRecorder, + const engine::Vertex *vertices, uint32_t vertexCount, const uint32_t *indices, uint32_t indexCount) { MeshGpu gpu{}; + if (!staticRecorder) { + std::cerr << "[MeshStore] createMesh requires a valid static recorder\n"; + return {}; + } + if (vertices == nullptr || vertexCount == 0) { - std::cerr << "[Renderer] createMesh vertices or vertex count are 0\n"; + std::cerr << "[MeshStore] createMesh vertices or vertex count are 0\n"; return {}; } const VkDeviceSize vbSize = VkDeviceSize(sizeof(engine::Vertex)) * vertexCount; - if (!m_uploader.uploadToDeviceLocalBuffer( - vertices, vbSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, gpu.vertex)) { + if (!m_uploader.uploadToDeviceLocalBuffer(staticRecorder, vertices, vbSize, + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + gpu.vertex)) { std::cerr << "[MeshStore] vertex upload failed\n"; return {}; } - gpu.vertexCount = vertexCount; if (indices != nullptr && indexCount > 0) { const VkDeviceSize ibSize = VkDeviceSize(sizeof(uint32_t)) * indexCount; - if (!m_uploader.uploadToDeviceLocalBuffer( - indices, ibSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, gpu.index)) { + if (!m_uploader.uploadToDeviceLocalBuffer(staticRecorder, indices, ibSize, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + gpu.index)) { std::cerr << "[MeshStore] indice upload failed\n"; gpu.shutdown(); return {}; @@ -68,8 +70,9 @@ MeshHandle MeshStore::createMesh(const engine::Vertex *vertices, return MeshHandle{static_cast(m_meshes.size() - 1)}; } -MeshHandle MeshStore::createMesh(const engine::MeshData &mesh) { - return createMesh(mesh.vertices.data(), +MeshHandle MeshStore::createMesh(VkUploadContext::Recorder staticRec, + const engine::MeshData &mesh) { + return createMesh(staticRec, mesh.vertices.data(), static_cast(mesh.vertices.size()), mesh.indices.empty() ? nullptr : mesh.indices.data(), static_cast(mesh.indices.size())); diff --git a/src/render/resources/mesh_store.hpp b/src/render/resources/mesh_store.hpp index a80c973..203ba13 100644 --- a/src/render/resources/mesh_store.hpp +++ b/src/render/resources/mesh_store.hpp @@ -10,30 +10,24 @@ #include #include -class UploadProfiler; - struct MeshHandle { uint32_t id = UINT32_MAX; }; class MeshStore { public: - bool init(VkBackendCtx &ctx, VkUploadContext &upload, - UploadProfiler *profiler); + bool init(VkBackendCtx &ctx); void shutdown() noexcept; - MeshHandle createMesh(const engine::Vertex *vertices, uint32_t vertexCount, + MeshHandle createMesh(VkUploadContext::Recorder staticRecorder, + const engine::Vertex *vertices, uint32_t vertexCount, const uint32_t *indices, uint32_t indexCount); - MeshHandle createMesh(const engine::MeshData &mesh); + MeshHandle createMesh(VkUploadContext::Recorder staticRecorder, + const engine::MeshData &mesh); [[nodiscard]] const MeshGpu *get(MeshHandle handle) const; - bool rebind(VkBackendCtx &ctx, VkUploadContext &upload) { - return m_uploader.init(ctx.allocator(), &upload, m_uploaderProfiler); - } - private: std::vector m_meshes; - VkBufferUploader m_uploader; // non-owning - UploadProfiler *m_uploaderProfiler = nullptr; // non-owning + VkBufferUploader m_uploader; // non-owning }; diff --git a/src/render/resources/resource_store.cpp b/src/render/resources/resource_store.cpp index ec27f05..b4f934d 100644 --- a/src/render/resources/resource_store.cpp +++ b/src/render/resources/resource_store.cpp @@ -2,25 +2,22 @@ #include "backend/core/vk_backend_ctx.hpp" #include "backend/gpu/descriptors/vk_shader_interface.hpp" -#include "backend/gpu/upload/vk_upload_context.hpp" -#include "backend/profiling/upload_profiler.hpp" #include "render/scene/scene_data.hpp" #include -bool ResourceStore::init(VkBackendCtx &ctx, VkUploadContext &upload, - const VkShaderInterface &interface, SceneData &data, - UploadProfiler *profiler) { +bool ResourceStore::init(VkBackendCtx &ctx, const VkShaderInterface &interface, + SceneData &data) { shutdown(); - if (!m_meshes.init(ctx, upload, profiler)) { + if (!m_meshes.init(ctx)) { std::cerr << "[ResourceStore] MeshStore init failed\n"; shutdown(); return false; } - if (!m_materials.init(ctx, upload, interface.setLayoutMaterial(), - data.materialCapacity(), profiler)) { + if (!m_materials.init(ctx, interface.setLayoutMaterial(), + data.materialCapacity())) { std::cerr << "[ResourceStore] MaterialSystem init failed\n"; shutdown(); return false; diff --git a/src/render/resources/resource_store.hpp b/src/render/resources/resource_store.hpp index 1392461..b15b89d 100644 --- a/src/render/resources/resource_store.hpp +++ b/src/render/resources/resource_store.hpp @@ -2,8 +2,6 @@ #include "backend/core/vk_backend_ctx.hpp" #include "backend/gpu/descriptors/vk_shader_interface.hpp" -#include "backend/gpu/upload/vk_upload_context.hpp" -#include "backend/profiling/upload_profiler.hpp" #include "render/resources/material_system.hpp" #include "render/resources/mesh_store.hpp" #include "render/scene/scene_data.hpp" @@ -12,9 +10,8 @@ class VkCommands; class ResourceStore { public: - bool init(VkBackendCtx &ctx, VkUploadContext &uploader, - const VkShaderInterface &interface, SceneData &data, - UploadProfiler *profiler); + bool init(VkBackendCtx &ctx, const VkShaderInterface &interface, + SceneData &data); void shutdown() noexcept; MeshStore &meshes() { return m_meshes; } @@ -23,14 +20,6 @@ class ResourceStore { MaterialSystem &materials() { return m_materials; } [[nodiscard]] const MaterialSystem &materials() const { return m_materials; } - bool rebind(VkBackendCtx &ctx, VkUploadContext &upload) { - if (!m_meshes.rebind(ctx, upload)) { - return false; - } - - return m_materials.rebind(ctx, upload); - } - private: MeshStore m_meshes; MaterialSystem m_materials; diff --git a/src/render/scene/CMakeLists.txt b/src/render/scene/CMakeLists.txt index cdeb2f2..f1bee89 100644 --- a/src/render/scene/CMakeLists.txt +++ b/src/render/scene/CMakeLists.txt @@ -17,7 +17,6 @@ target_link_libraries(quark_render_scene quark::backend::gpu::buffers quark::backend::gpu::descriptors quark::backend::gpu::upload - quark::backend::profiling quark::engine::camera PRIVATE diff --git a/src/render/scene/scene_data.cpp b/src/render/scene/scene_data.cpp index 6de79fd..a49b915 100644 --- a/src/render/scene/scene_data.cpp +++ b/src/render/scene/scene_data.cpp @@ -3,7 +3,7 @@ #include "backend/core/vk_backend_ctx.hpp" #include "backend/gpu/buffers/vk_buffer.hpp" #include "backend/gpu/descriptors/vk_shader_interface.hpp" -#include "backend/profiling/upload_profiler.hpp" +#include "backend/profiling/telemetry/telemetry.hpp" #include "engine/camera/camera_ubo.hpp" #include "render/resources/material_gpu.hpp" @@ -16,11 +16,9 @@ bool SceneData::init(VkBackendCtx &ctx, uint32_t framesInFlight, const VkShaderInterface &interface, uint32_t requestedMaxInstancesPerFrame, - uint32_t requestedMaxMaterials, UploadProfiler *profiler) { + uint32_t requestedMaxMaterials) { shutdown(); - m_profiler = profiler; - if (framesInFlight == 0) { std::cerr << "[SceneData] framesInFlight must be greater than 0\n"; return false; @@ -62,6 +60,8 @@ bool SceneData::init(VkBackendCtx &ctx, uint32_t framesInFlight, return false; } + (void)m_instanceUploader.init(); + m_initiailized = true; return true; } @@ -123,10 +123,8 @@ bool SceneData::initInstanceBuffer(VmaAllocator allocator, return false; } - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::InstanceAllocatedBytes, - static_cast(totalBytes)); - } + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::InstanceAllocatedBytes, + static_cast(totalBytes)); return true; } @@ -158,10 +156,8 @@ bool SceneData::initMaterialBuffer(VmaAllocator allocator, return false; } - if (m_profiler != nullptr) { - profilerAdd(m_profiler, UploadProfiler::Stat::MaterialAllocatedBytes, - static_cast(m_materialTableBytes)); - } + PROFILE_UPLOAD_ADD(UploadProfiler::Stat::MaterialAllocatedBytes, + static_cast(m_materialTableBytes)); return true; } @@ -178,6 +174,7 @@ bool SceneData::initDescriptorSets(VkDevice device, } void SceneData::shutdown() noexcept { + m_instanceUploader.shutdown(); m_sets.shutdown(); m_materialBuf.shutdown(); @@ -188,8 +185,6 @@ void SceneData::shutdown() noexcept { m_maxInstancesPerFrame = 0; m_materialTableBytes = 0; - m_profiler = nullptr; - m_initiailized = false; } @@ -216,18 +211,14 @@ void SceneData::bind(VkCommandBuffer cmd, const VkShaderInterface &interface, m_sets.bind(cmd, interface.pipelineLayout(), 0, frameIndex); } -bool SceneData::rebindUpload(VkUploadContext &upload, - UploadProfiler *profiler) { - return m_instanceUploader.init(&upload, profiler); -} - InstanceUploadResult -SceneData::uploadInstances(uint32_t frameIndex, uint32_t &cursorInstances, +SceneData::uploadInstances(VkUploadContext::Recorder recorder, + uint32_t frameIndex, uint32_t &cursorInstances, std::span models) { const VkDeviceSize frameBase = VkDeviceSize(frameIndex) * m_instanceFrameStride; return m_instanceUploader.uploadMat4Instances( - m_instanceBuf.handle(), frameBase, m_instanceFrameStride, + recorder, m_instanceBuf.handle(), frameBase, m_instanceFrameStride, m_maxInstancesPerFrame, cursorInstances, models); } diff --git a/src/render/scene/scene_data.hpp b/src/render/scene/scene_data.hpp index 63b61ea..ecf67f7 100644 --- a/src/render/scene/scene_data.hpp +++ b/src/render/scene/scene_data.hpp @@ -13,8 +13,6 @@ #include #include -class UploadProfiler; - class SceneData { public: SceneData() = default; @@ -29,19 +27,18 @@ class SceneData { bool init(VkBackendCtx &ctx, uint32_t framesInFlight, const VkShaderInterface &interface, uint32_t requestedMaxInstancesPerFrame, - uint32_t requestedMaxMaterials, UploadProfiler *profiler); + uint32_t requestedMaxMaterials); void shutdown() noexcept; bool update(uint32_t frameIndex, const CameraUBO &camera); void bind(VkCommandBuffer cmd, const VkShaderInterface &interface, uint32_t frameIndex) const; - InstanceUploadResult uploadInstances(uint32_t frameIndex, + InstanceUploadResult uploadInstances(VkUploadContext::Recorder recorder, + uint32_t frameIndex, uint32_t &cursorInstances, std::span models); - bool rebindUpload(VkUploadContext &upload, UploadProfiler *profiler); - [[nodiscard]] VkBuffer materialBuffer() const noexcept { return m_materialBuf.handle(); } @@ -84,8 +81,6 @@ class SceneData { uint32_t m_maxInstancesPerFrame = 0; VkInstanceUploader m_instanceUploader; - UploadProfiler *m_profiler = nullptr; // non-owning - VkSceneSets m_sets; // set 0 bindings bool m_initiailized = false; }; diff --git a/src/render/upload/upload_manager.cpp b/src/render/upload/upload_manager.cpp index 7ee5f44..640c359 100644 --- a/src/render/upload/upload_manager.cpp +++ b/src/render/upload/upload_manager.cpp @@ -1,14 +1,15 @@ #include "render/upload/upload_manager.hpp" +#include "backend/gpu/upload/vk_upload_context.hpp" + #include #include bool UploadManager::init(VkBackendCtx &ctx, uint32_t framesInFlight, - VkDeviceSize staticBudgetPerFrame, - VkDeviceSize frameBudgetPerFrame, - UploadProfiler *profiler) { - if (framesInFlight == 0 || staticBudgetPerFrame == 0 || - frameBudgetPerFrame == 0) { + VkDeviceSize staticTotalBytes, + VkDeviceSize frameBudget, uint32_t threadCount) { + if (framesInFlight == 0 || staticTotalBytes == 0 || frameBudget == 0 || + threadCount == 0) { std::cerr << "[UploadManager] init invalid args\n"; return false; } @@ -17,14 +18,20 @@ bool UploadManager::init(VkBackendCtx &ctx, uint32_t framesInFlight, m_ctx = &ctx; m_framesInFlight = framesInFlight; + m_threadCount = threadCount; + + m_currentFrameIndex = 0; + m_frameBegun = false; + + m_staticActive = false; - if (!m_static.init(ctx, m_framesInFlight, staticBudgetPerFrame, profiler)) { + if (!m_static.initOneShot(ctx, staticTotalBytes, threadCount)) { std::cerr << "[Renderer] Failed to init static upload context\n"; shutdown(); return false; } - if (!m_frame.init(ctx, m_framesInFlight, frameBudgetPerFrame, profiler)) { + if (!m_frame.initFrameRing(ctx, m_framesInFlight, frameBudget, threadCount)) { std::cerr << "[Renderer] Failed to init frame upload context\n"; shutdown(); return false; @@ -36,8 +43,15 @@ bool UploadManager::init(VkBackendCtx &ctx, uint32_t framesInFlight, void UploadManager::shutdown() noexcept { m_frame.shutdown(); m_static.shutdown(); + m_ctx = nullptr; m_framesInFlight = 0; + m_threadCount = 0; + + m_currentFrameIndex = 0; + m_frameBegun = false; + + m_staticActive = false; } bool UploadManager::beginFrame(uint32_t frameIndex) { @@ -45,16 +59,86 @@ bool UploadManager::beginFrame(uint32_t frameIndex) { return false; } - // Begin both lanes for the same frame index - if (!m_static.beginFrame(frameIndex)) { + if (frameIndex >= m_framesInFlight) { + std::cerr << "[UploadManager] beginFrame frameIndex out of range\n"; + return false; + } + + m_currentFrameIndex = frameIndex; + m_frameBegun = false; + + if (!m_frame.beginFrame(frameIndex)) { + std::cerr << "[UploadManager] frame.beginFrame failed\n"; + return false; + } + + m_frameBegun = true; + return true; +} + +bool UploadManager::flushFrame(bool wait) { + if (m_ctx == nullptr) { return false; } - return m_frame.beginFrame(frameIndex); + if (!m_frameBegun) { + return true; + } + + const bool ok = m_frame.flushFrame(m_currentFrameIndex, wait); + m_frameBegun = false; + return ok; +} + +VkUploadContext::Recorder UploadManager::frameRecorder(uint32_t threadIndex) { + if (m_ctx == nullptr || !m_frameBegun) { + return {}; + } + + return m_frame.recorder(m_currentFrameIndex, threadIndex); } -bool UploadManager::flushFrame(bool wait) { return m_frame.flush(wait); } -bool UploadManager::flushStatic(bool wait) { return m_static.flush(wait); } +bool UploadManager::beginStatic() { + if (m_ctx == nullptr) { + return false; + } + + if (m_staticActive) { + std::cerr + << "[UploadManager] beginStatic called while static batch active\n"; + return false; + } + + if (!m_static.beginBatch()) { + std::cerr << "[UploadManager] static.beginBatch failed\n"; + return false; + } + + m_staticActive = true; + return true; +} + +bool UploadManager::flushStatic(bool wait) { + if (m_ctx == nullptr) { + return false; + } + + if (!m_staticActive) { + return true; + } + + const bool ok = m_static.flushBatch(wait); + m_staticActive = false; + return ok; +} + +VkUploadContext::Recorder UploadManager::staticRecorder(uint32_t threadIndex) { + if (m_ctx == nullptr || !m_staticActive) { + return {}; + } + + return m_static.recorder(/*frameIndex=*/0, threadIndex); +} bool UploadManager::flushAll(bool wait) { if (!flushFrame(wait)) { diff --git a/src/render/upload/upload_manager.hpp b/src/render/upload/upload_manager.hpp index 44846c4..e3df0c2 100644 --- a/src/render/upload/upload_manager.hpp +++ b/src/render/upload/upload_manager.hpp @@ -2,7 +2,6 @@ #include "backend/core/vk_backend_ctx.hpp" #include "backend/gpu/upload/vk_upload_context.hpp" -#include "backend/profiling/upload_profiler.hpp" #include #include @@ -37,29 +36,38 @@ class UploadManager { } bool init(VkBackendCtx &ctx, uint32_t framesInFlight, - VkDeviceSize staticBudgetPerFrame, VkDeviceSize frameBudgetPerFrame, - UploadProfiler *profiler); + VkDeviceSize staticTotalBytes, VkDeviceSize frameBudget, + uint32_t threadCount); void shutdown() noexcept; - // TODO: make static not per frame after uploader is - // redone to allow for per thread command pools bool beginFrame(uint32_t frameIndex); - bool flushFrame(bool wait); + [[nodiscard]] VkUploadContext::Recorder frameRecorder(uint32_t threadIndex); + + bool beginStatic(); bool flushStatic(bool wait); + [[nodiscard]] VkUploadContext::Recorder staticRecorder(uint32_t threadIndex); bool flushAll(bool wait); - [[nodiscard]] VkUploadContext &statik() noexcept { return m_static; } - [[nodiscard]] VkUploadContext &frame() noexcept { return m_frame; } - + [[nodiscard]] uint32_t currentFrameIndex() const noexcept { + return m_currentFrameIndex; + } [[nodiscard]] uint32_t framesInFlight() const noexcept { return m_framesInFlight; } + [[nodiscard]] uint32_t threadCount() const noexcept { return m_threadCount; } private: VkBackendCtx *m_ctx = nullptr; // non-owning + // uint32_t m_framesInFlight = 0; + uint32_t m_threadCount = 0; + + uint32_t m_currentFrameIndex = 0; + bool m_frameBegun = false; + + bool m_staticActive = false; VkUploadContext m_static; VkUploadContext m_frame;