From 918568e7e6f74f8742cbd678c3a33e640dd80512 Mon Sep 17 00:00:00 2001 From: Slendi Date: Thu, 5 Mar 2026 08:35:11 +0200 Subject: [PATCH] Screenshot and stuff Signed-off-by: Slendi --- meson.build | 1 + src/AllocTracker.cpp | 310 +++++++++++++++++++++++++++++++++++++++++ src/AllocTracker.h | 12 ++ src/Application.cpp | 109 ++++++++------- src/Application.h | 6 + src/Types.h | 10 ++ src/Util.cpp | 11 ++ src/Util.h | 20 ++- src/VulkanRenderer.cpp | 241 +++++++++++++++++++++++++++----- src/VulkanRenderer.h | 9 ++ 10 files changed, 650 insertions(+), 79 deletions(-) create mode 100644 src/AllocTracker.cpp create mode 100644 src/AllocTracker.h diff --git a/meson.build b/meson.build index e37cca0..4cb7437 100644 --- a/meson.build +++ b/meson.build @@ -170,6 +170,7 @@ exe = executable('vr-compositor', 'src/main.cpp', 'src/Impls.cpp', 'src/Util.cpp', + 'src/AllocTracker.cpp', 'src/Logger.cpp', 'src/DescriptorLayoutBuilder.cpp', 'src/DescriptorAllocator.cpp', diff --git a/src/AllocTracker.cpp b/src/AllocTracker.cpp new file mode 100644 index 0000000..b99fe86 --- /dev/null +++ b/src/AllocTracker.cpp @@ -0,0 +1,310 @@ +#include "AllocTracker.h" + +#include "AllocTracker.h" + +#include +#include +#include +#include +#include +#include + +#include "Logger.h" + +namespace Lunar { +auto log_top_allocators(::Logger &logger, std::size_t max_entries) -> void; +} + +namespace { + +constexpr std::uintptr_t k_empty = 0; +constexpr std::uintptr_t k_tombstone = 1; +constexpr std::size_t k_ptr_table_size = 1 << 18; +constexpr std::size_t k_site_table_size = 1 << 14; + +struct PtrEntry { + std::uintptr_t ptr { k_empty }; + std::size_t size { 0 }; + std::uintptr_t site { 0 }; +}; + +struct SiteEntry { + std::uintptr_t site { 0 }; + std::size_t live_bytes { 0 }; + std::size_t alloc_bytes { 0 }; + std::size_t alloc_count { 0 }; + std::size_t free_count { 0 }; +}; + +static PtrEntry g_ptr_table[k_ptr_table_size]; +static SiteEntry g_site_table[k_site_table_size]; +static std::atomic_flag g_lock = ATOMIC_FLAG_INIT; +static thread_local bool g_tracking_disabled = false; + +auto tracker_enabled() -> bool +{ + static bool enabled = std::getenv("LUNAR_ALLOC_TRACKER") != nullptr; + return enabled; +} + +auto lock_table() -> void +{ + while (g_lock.test_and_set(std::memory_order_acquire)) { + std::this_thread::yield(); + } +} + +auto unlock_table() -> void { g_lock.clear(std::memory_order_release); } + +auto hash_ptr(std::uintptr_t key) -> std::size_t +{ + return (key >> 4) ^ (key >> 9); +} + +auto find_ptr_slot(std::uintptr_t ptr, bool for_insert) -> std::size_t +{ + std::size_t index = hash_ptr(ptr) & (k_ptr_table_size - 1); + std::size_t first_tombstone = k_ptr_table_size; + for (std::size_t probe = 0; probe < k_ptr_table_size; ++probe) { + auto &entry = g_ptr_table[index]; + if (entry.ptr == ptr) { + return index; + } + if (entry.ptr == k_empty) { + return for_insert && first_tombstone != k_ptr_table_size + ? first_tombstone + : index; + } + if (entry.ptr == k_tombstone && first_tombstone == k_ptr_table_size) { + first_tombstone = index; + } + index = (index + 1) & (k_ptr_table_size - 1); + } + return k_ptr_table_size; +} + +auto find_site_slot(std::uintptr_t site, bool for_insert) -> std::size_t +{ + std::size_t index = hash_ptr(site) & (k_site_table_size - 1); + for (std::size_t probe = 0; probe < k_site_table_size; ++probe) { + auto &entry = g_site_table[index]; + if (entry.site == site) { + return index; + } + if (entry.site == 0) { + return for_insert ? index : k_site_table_size; + } + index = (index + 1) & (k_site_table_size - 1); + } + return k_site_table_size; +} + +auto track_alloc(void *ptr, std::size_t size, std::uintptr_t site) -> void +{ + if (!ptr || !tracker_enabled() || g_tracking_disabled) { + return; + } + + lock_table(); + auto slot = find_ptr_slot(reinterpret_cast(ptr), true); + if (slot < k_ptr_table_size) { + g_ptr_table[slot].ptr = reinterpret_cast(ptr); + g_ptr_table[slot].size = size; + g_ptr_table[slot].site = site; + } + + auto site_slot = find_site_slot(site, true); + if (site_slot < k_site_table_size) { + auto &entry = g_site_table[site_slot]; + if (entry.site == 0) { + entry.site = site; + } + entry.live_bytes += size; + entry.alloc_bytes += size; + entry.alloc_count += 1; + } + unlock_table(); +} + +auto track_free(void *ptr) -> void +{ + if (!ptr || !tracker_enabled() || g_tracking_disabled) { + return; + } + + lock_table(); + auto slot = find_ptr_slot(reinterpret_cast(ptr), false); + if (slot < k_ptr_table_size + && g_ptr_table[slot].ptr == reinterpret_cast(ptr)) { + auto size = g_ptr_table[slot].size; + auto site = g_ptr_table[slot].site; + g_ptr_table[slot].ptr = k_tombstone; + g_ptr_table[slot].size = 0; + g_ptr_table[slot].site = 0; + + auto site_slot = find_site_slot(site, false); + if (site_slot < k_site_table_size) { + auto &entry = g_site_table[site_slot]; + if (entry.live_bytes >= size) { + entry.live_bytes -= size; + } else { + entry.live_bytes = 0; + } + entry.free_count += 1; + } + } + unlock_table(); +} + +} // namespace + +void *operator new(std::size_t size) +{ + void *ptr = std::malloc(size); + if (!ptr) { + throw std::bad_alloc(); + } + track_alloc(ptr, size, + reinterpret_cast(__builtin_return_address(0))); + return ptr; +} + +void *operator new[](std::size_t size) +{ + void *ptr = std::malloc(size); + if (!ptr) { + throw std::bad_alloc(); + } + track_alloc(ptr, size, + reinterpret_cast(__builtin_return_address(0))); + return ptr; +} + +void operator delete(void *ptr) noexcept +{ + track_free(ptr); + std::free(ptr); +} + +void operator delete[](void *ptr) noexcept +{ + track_free(ptr); + std::free(ptr); +} + +void operator delete(void *ptr, std::size_t) noexcept +{ + track_free(ptr); + std::free(ptr); +} + +void operator delete[](void *ptr, std::size_t) noexcept +{ + track_free(ptr); + std::free(ptr); +} + +void *operator new(std::size_t size, std::nothrow_t const &) noexcept +{ + void *ptr = std::malloc(size); + if (!ptr) { + return nullptr; + } + track_alloc(ptr, size, + reinterpret_cast(__builtin_return_address(0))); + return ptr; +} + +void *operator new[](std::size_t size, std::nothrow_t const &) noexcept +{ + void *ptr = std::malloc(size); + if (!ptr) { + return nullptr; + } + track_alloc(ptr, size, + reinterpret_cast(__builtin_return_address(0))); + return ptr; +} + +void operator delete(void *ptr, std::nothrow_t const &) noexcept +{ + track_free(ptr); + std::free(ptr); +} + +void operator delete[](void *ptr, std::nothrow_t const &) noexcept +{ + track_free(ptr); + std::free(ptr); +} + +namespace Lunar { + +auto alloc_tracker_enabled() -> bool { return tracker_enabled(); } + +auto log_top_allocators(::Logger &logger, std::size_t max_entries) -> void +{ + if (!tracker_enabled()) { + return; + } + + struct Snapshot { + std::uintptr_t site; + std::size_t live_bytes; + std::size_t alloc_bytes; + std::size_t alloc_count; + std::size_t free_count; + }; + constexpr std::size_t max_slots = 64; + Snapshot top[max_slots] {}; + std::size_t count = 0; + + g_tracking_disabled = true; + lock_table(); + for (auto const &entry : g_site_table) { + if (entry.site == 0 || entry.live_bytes == 0) { + continue; + } + std::size_t insert_at = count; + if (insert_at < max_entries && insert_at < max_slots) { + count++; + } else { + insert_at + = max_entries < max_slots ? max_entries - 1 : max_slots - 1; + } + if (count > max_slots) { + count = max_slots; + } + for (std::size_t i = 0; i < count; ++i) { + if (entry.live_bytes > top[i].live_bytes) { + insert_at = i; + break; + } + } + for (std::size_t i = count; i > insert_at + 1; --i) { + top[i - 1] = top[i - 2]; + } + if (insert_at < count) { + top[insert_at] = Snapshot { + entry.site, + entry.live_bytes, + entry.alloc_bytes, + entry.alloc_count, + entry.free_count, + }; + } + } + unlock_table(); + + std::size_t limit = std::min(count, max_entries); + for (std::size_t i = 0; i < limit; ++i) { + auto const &entry = top[i]; + logger.info("AllocTop[{}]: site=0x{:x} live={} KB allocs={} frees={} " + "total={} KB", + i, entry.site, entry.live_bytes / 1024, entry.alloc_count, + entry.free_count, entry.alloc_bytes / 1024); + } + g_tracking_disabled = false; +} + +} // namespace Lunar diff --git a/src/AllocTracker.h b/src/AllocTracker.h new file mode 100644 index 0000000..ad37a72 --- /dev/null +++ b/src/AllocTracker.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +struct Logger; + +namespace Lunar { + +auto alloc_tracker_enabled() -> bool; +auto log_top_allocators(::Logger &logger, std::size_t max_entries = 10) -> void; + +} // namespace Lunar diff --git a/src/Application.cpp b/src/Application.cpp index 31943be..5ffd829 100644 --- a/src/Application.cpp +++ b/src/Application.cpp @@ -708,7 +708,15 @@ auto Application::run() -> void SDL_Event e; bool const use_sdl { (m_backend == Backend::SDL) }; bool const openxr_enabled { m_openxr != nullptr }; - bool const use_imgui { use_sdl && !openxr_enabled }; + bool const env_disable_imgui { std::getenv("LUNAR_NO_IMGUI") != nullptr }; + m_imgui_allowed = use_sdl && !openxr_enabled && !env_disable_imgui; + bool const use_imgui { m_imgui_allowed }; + if (!m_imgui_allowed) { + m_show_imgui = false; + if (m_renderer) { + m_renderer->set_imgui_enabled(false); + } + } if (use_imgui) { ImGuiIO &io = ImGui::GetIO(); @@ -899,25 +907,16 @@ auto Application::run() -> void bool debug_open { ImGui::Begin("Debug Info", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize) }; if (debug_open) { - ImGui::Text("%s", std::format("FPS: {:.2f}", fps).c_str()); - ImGui::Text("%s", - std::format("Cam pos: ({:.2f}, {:.2f}, {:.2f})", - m_camera.position.x(), m_camera.position.y(), - m_camera.position.z()) - .c_str()); - ImGui::Text("%s", - std::format("Cam tgt: ({:.2f}, {:.2f}, {:.2f})", - m_camera.target.x(), m_camera.target.y(), - m_camera.target.z()) - .c_str()); - ImGui::Text("%s", - std::format("Cam up: ({:.2f}, {:.2f}, {:.2f})", - m_camera.up.x(), m_camera.up.y(), m_camera.up.z()) - .c_str()); - ImGui::Text("%s", - std::format("Cursor r/theta/phi: {:.2f}, {:.2f}, {:.2f}", - m_cursor.r, m_cursor.theta, m_cursor.phi) - .c_str()); + ImGui::Text("FPS: %.2f", fps); + ImGui::Text("Cam pos: (%.2f, %.2f, %.2f)", + m_camera.position.x(), m_camera.position.y(), + m_camera.position.z()); + ImGui::Text("Cam tgt: (%.2f, %.2f, %.2f)", m_camera.target.x(), + m_camera.target.y(), m_camera.target.z()); + ImGui::Text("Cam up: (%.2f, %.2f, %.2f)", m_camera.up.x(), + m_camera.up.y(), m_camera.up.z()); + ImGui::Text("Cursor r/theta/phi: %.2f, %.2f, %.2f", m_cursor.r, + m_cursor.theta, m_cursor.phi); } ImGui::End(); ImGui::PopStyleColor(); @@ -1130,8 +1129,9 @@ auto Application::run() -> void } else { m_renderer->render(record_scene); } + handle_pending_screenshot(); #if defined(TRACY_ENABLE) - FrameMark; + GFrameMark(); #endif } } @@ -2124,42 +2124,22 @@ auto Application::handle_keyboard_event(libinput_event_keyboard *event) -> void } } - if (m_backend == Backend::SDL && !m_openxr && pressed && key == KEY_F11 - && m_ctrl_pressed_count > 0) { + if (m_backend == Backend::SDL && !m_openxr && m_imgui_allowed && pressed + && key == KEY_F11 && m_ctrl_pressed_count > 0) { bool const new_show_imgui { !m_show_imgui }; m_show_imgui = new_show_imgui; mouse_captured(!new_show_imgui); } if (pressed && key == KEY_F12) { - auto screenshot { std::optional {} }; + m_pending_screenshot = true; if (m_renderer) { - screenshot = m_renderer->get_screenshot_pixels(); - } - - if (!screenshot) { - m_logger.warn("Screenshot not ready"); - return; - } - - auto const extent { screenshot->extent }; - auto const stride { static_cast(extent.width * 4) }; - auto const index { m_screenshot_index++ }; - auto const now { std::chrono::system_clock::now() }; - auto filename { std::format( - "screenshot_{:%Y%m%d_%H%M%S}_{:04}.png", now, index) }; - int const result { stbi_write_png(filename.c_str(), - static_cast(extent.width), static_cast(extent.height), 4, - screenshot->pixels.data(), stride) }; - - if (result == 0) { - m_logger.err("Failed to write screenshot {}", filename); - } else { - m_logger.info("Saved screenshot {}", filename); + m_renderer->request_screenshot(); } + return; } - if (m_backend == Backend::SDL) { + if (m_backend == Backend::SDL && m_imgui_allowed) { if (auto imgui_key { linux_key_to_imgui(key) }) { if (m_show_imgui) ImGui::GetIO().AddKeyEvent(*imgui_key, pressed); @@ -2192,6 +2172,41 @@ auto Application::clamp_mouse_to_window(int width, int height) -> void static_cast(m_mouse_x), static_cast(m_mouse_y)); } +auto Application::handle_pending_screenshot() -> void +{ + if (!m_pending_screenshot || !m_renderer) { + return; + } + + auto screenshot { m_renderer->get_screenshot_pixels() }; + if (!screenshot) { + return; + } + + save_screenshot(*screenshot); + m_pending_screenshot = false; +} + +auto Application::save_screenshot( + VulkanRenderer::ScreenshotPixels const &screenshot) -> void +{ + auto const extent { screenshot.extent }; + auto const stride { static_cast(extent.width * 4) }; + auto const index { m_screenshot_index++ }; + auto const now { std::chrono::system_clock::now() }; + auto filename { std::format( + "screenshot_{:%Y%m%d_%H%M%S}_{:04}.png", now, index) }; + int const result { stbi_write_png(filename.c_str(), + static_cast(extent.width), static_cast(extent.height), 4, + screenshot.pixels.data(), stride) }; + + if (result == 0) { + m_logger.err("Failed to write screenshot {}", filename); + } else { + m_logger.info("Saved screenshot {}", filename); + } +} + auto Application::mouse_captured(bool new_state) -> void { if (m_backend != Backend::SDL) { diff --git a/src/Application.h b/src/Application.h index 2935f01..82ac254 100644 --- a/src/Application.h +++ b/src/Application.h @@ -99,9 +99,11 @@ private: bool m_running { true }; bool m_mouse_captured { false }; bool m_show_imgui { false }; + bool m_imgui_allowed { true }; bool m_window_focused { true }; int m_ctrl_pressed_count { 0 }; std::uint32_t m_screenshot_index { 0 }; + bool m_pending_screenshot { false }; double m_mouse_x { 0.0 }; double m_mouse_y { 0.0 }; @@ -115,6 +117,10 @@ private: Camera m_camera; PolarCoordinate m_cursor; + auto handle_pending_screenshot() -> void; + auto save_screenshot(VulkanRenderer::ScreenshotPixels const &screenshot) + -> void; + static inline std::array m_left_joints {}; static inline std::array diff --git a/src/Types.h b/src/Types.h index fb1adec..cd917ee 100644 --- a/src/Types.h +++ b/src/Types.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -44,6 +45,15 @@ struct FrameData { vk::Extent2D screenshot_extent {}; std::vector screenshot_rgba; bool screenshot_ready { false }; + std::vector gl_staging_pool {}; + std::vector gl_staging_sizes {}; + std::size_t gl_staging_cursor { 0 }; + std::vector gl_vertex_pool {}; + std::vector gl_vertex_sizes {}; + std::size_t gl_vertex_cursor { 0 }; + std::vector gl_index_pool {}; + std::vector gl_index_sizes {}; + std::size_t gl_index_cursor { 0 }; }; struct Vertex { diff --git a/src/Util.cpp b/src/Util.cpp index eadc872..9eaac52 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -1,7 +1,18 @@ #include "Util.h" +#include #include +namespace Lunar { + +auto tracy_runtime_enabled() -> bool +{ + static bool enabled = std::getenv("LUNAR_NO_TRACY") == nullptr; + return enabled; +} + +} // namespace Lunar + namespace vkutil { auto transition_image(vk::CommandBuffer cmd, vk::Image image, diff --git a/src/Util.h b/src/Util.h index 8435bf4..0468647 100644 --- a/src/Util.h +++ b/src/Util.h @@ -37,11 +37,29 @@ template privDefer defer_func(F f) { return privDefer(f); } } while (0) #if defined(TRACY_ENABLE) -# define GZoneScopedN(name) ZoneScopedN(name) +# include +namespace Lunar { +auto tracy_runtime_enabled() -> bool; +} +# define GZoneScopedN(name) \ + do { \ + if (Lunar::tracy_runtime_enabled()) { \ + ZoneScopedN(name); \ + } \ + } while (0) +# define GFrameMark() \ + do { \ + if (Lunar::tracy_runtime_enabled()) { \ + FrameMark; \ + } \ + } while (0) #else # define GZoneScopedN(name) \ do { \ } while (0) +# define GFrameMark() \ + do { \ + } while (0) #endif namespace vkutil { diff --git a/src/VulkanRenderer.cpp b/src/VulkanRenderer.cpp index baca8f2..7f27eb2 100644 --- a/src/VulkanRenderer.cpp +++ b/src/VulkanRenderer.cpp @@ -3,10 +3,12 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -14,17 +16,22 @@ #include #include #include +#if defined(__linux__) +# include +#endif #include #include #include #include #include +#include #if defined(TRACY_ENABLE) # include #endif +#include "AllocTracker.h" #include "DescriptorLayoutBuilder.h" #include "DescriptorWriter.h" #include "GraphicsPipelineBuilder.h" @@ -221,8 +228,43 @@ auto VulkanRenderer::GL::flush() -> void auto const index_data_size { m_indices.size() * sizeof(uint32_t) }; auto const staging_size { vertex_data_size + index_data_size }; - auto staging { m_renderer.create_buffer(staging_size, - vk::BufferUsageFlagBits::eTransferSrc, VMA_MEMORY_USAGE_CPU_ONLY) }; + auto &frame { m_renderer.m_vk.get_current_frame() }; + auto acquire_buffer + = [&](std::vector &pool, + std::vector &sizes, std::size_t &cursor, + std::size_t required, vk::BufferUsageFlags usage, + VmaMemoryUsage memory_usage) -> AllocatedBuffer & { + if (cursor < pool.size()) { + if (sizes[cursor] < required) { + m_renderer.destroy_buffer(pool[cursor]); + pool[cursor] + = m_renderer.create_buffer(required, usage, memory_usage); + sizes[cursor] = required; + } + return pool[cursor++]; + } + + pool.emplace_back( + m_renderer.create_buffer(required, usage, memory_usage)); + sizes.emplace_back(required); + ++cursor; + return pool.back(); + }; + + auto &staging = acquire_buffer(frame.gl_staging_pool, + frame.gl_staging_sizes, frame.gl_staging_cursor, staging_size, + vk::BufferUsageFlagBits::eTransferSrc, VMA_MEMORY_USAGE_CPU_ONLY); + auto &vertex_buffer = acquire_buffer(frame.gl_vertex_pool, + frame.gl_vertex_sizes, frame.gl_vertex_cursor, vertex_data_size, + vk::BufferUsageFlagBits::eVertexBuffer + | vk::BufferUsageFlagBits::eTransferDst + | vk::BufferUsageFlagBits::eShaderDeviceAddress, + VMA_MEMORY_USAGE_GPU_ONLY); + auto &index_buffer = acquire_buffer(frame.gl_index_pool, + frame.gl_index_sizes, frame.gl_index_cursor, index_data_size, + vk::BufferUsageFlagBits::eIndexBuffer + | vk::BufferUsageFlagBits::eTransferDst, + VMA_MEMORY_USAGE_GPU_ONLY); void *staging_dst = staging.info.pMappedData; bool staging_mapped_here { false }; @@ -232,23 +274,13 @@ auto VulkanRenderer::GL::flush() -> void assert(res == VK_SUCCESS); staging_mapped_here = true; } - memcpy(staging_dst, m_vertices.data(), vertex_data_size); - memcpy(reinterpret_cast(staging_dst) + vertex_data_size, + std::memcpy(staging_dst, m_vertices.data(), vertex_data_size); + std::memcpy(reinterpret_cast(staging_dst) + vertex_data_size, m_indices.data(), index_data_size); if (staging_mapped_here) { vmaUnmapMemory(m_renderer.m_vk.allocator, staging.allocation); } - auto vertex_buffer { m_renderer.create_buffer(vertex_data_size, - vk::BufferUsageFlagBits::eVertexBuffer - | vk::BufferUsageFlagBits::eTransferDst - | vk::BufferUsageFlagBits::eShaderDeviceAddress, - VMA_MEMORY_USAGE_GPU_ONLY) }; - auto index_buffer { m_renderer.create_buffer(index_data_size, - vk::BufferUsageFlagBits::eIndexBuffer - | vk::BufferUsageFlagBits::eTransferDst, - VMA_MEMORY_USAGE_GPU_ONLY) }; - m_renderer.immediate_submit( [&](vk::CommandBuffer cmd) { vk::BufferCopy vertex_copy {}; @@ -267,8 +299,6 @@ auto VulkanRenderer::GL::flush() -> void /*flush_frame_deletion_queue=*/false, /*clear_frame_descriptors=*/false); - m_renderer.destroy_buffer(staging); - auto cmd { m_cmd }; bind_pipeline_if_needed(); @@ -312,11 +342,6 @@ auto VulkanRenderer::GL::flush() -> void cmd.bindIndexBuffer(index_buffer.buffer, 0, vk::IndexType::eUint32); cmd.drawIndexed(static_cast(m_indices.size()), 1, 0, 0, 0); - m_renderer.m_vk.get_current_frame().deletion_queue.emplace([=, this]() { - m_renderer.destroy_buffer(index_buffer); - m_renderer.destroy_buffer(vertex_buffer); - }); - m_vertices.clear(); m_indices.clear(); } @@ -659,6 +684,9 @@ VulkanRenderer::VulkanRenderer(SDL_Window *window, Logger &logger, throw std::runtime_error("VulkanRenderer requires a valid window"); } + m_mem_stats_enabled = std::getenv("LUNAR_MEM_STATS") != nullptr; + m_last_mem_stats = std::chrono::steady_clock::now(); + m_use_kms = false; m_imgui_enabled = true; @@ -684,6 +712,8 @@ VulkanRenderer::VulkanRenderer(KmsSurfaceConfig /*config*/, Logger &logger, { m_use_kms = true; m_imgui_enabled = false; + m_mem_stats_enabled = std::getenv("LUNAR_MEM_STATS") != nullptr; + m_last_mem_stats = std::chrono::steady_clock::now(); vk_init(); swapchain_init(); @@ -700,6 +730,27 @@ VulkanRenderer::~VulkanRenderer() for (auto &frame_data : m_vk.frames) { frame_data.deletion_queue.flush(); + for (auto &buffer : frame_data.gl_staging_pool) { + if (buffer.buffer) { + destroy_buffer(buffer); + } + } + for (auto &buffer : frame_data.gl_vertex_pool) { + if (buffer.buffer) { + destroy_buffer(buffer); + } + } + for (auto &buffer : frame_data.gl_index_pool) { + if (buffer.buffer) { + destroy_buffer(buffer); + } + } + frame_data.gl_staging_pool.clear(); + frame_data.gl_staging_sizes.clear(); + frame_data.gl_vertex_pool.clear(); + frame_data.gl_vertex_sizes.clear(); + frame_data.gl_index_pool.clear(); + frame_data.gl_index_sizes.clear(); frame_data.main_command_buffer.reset(); frame_data.command_pool.reset(); frame_data.swapchain_semaphore.reset(); @@ -1653,6 +1704,7 @@ auto VulkanRenderer::render(std::function const &record) -> void return; } + log_memory_stats(); process_render_commands(); auto &frame { m_vk.get_current_frame() }; @@ -1660,6 +1712,12 @@ auto VulkanRenderer::render(std::function const &record) -> void m_device.waitForFences(frame.render_fence.get(), true, 1'000'000'000)); frame.deletion_queue.flush(); frame.frame_descriptors.clear_pools(m_vkb.dev.device); + frame.gl_staging_cursor = 0; + frame.gl_vertex_cursor = 0; + frame.gl_index_cursor = 0; + frame.gl_staging_cursor = 0; + frame.gl_vertex_cursor = 0; + frame.gl_index_cursor = 0; emit_frame_screenshot(frame); #if defined(TRACY_ENABLE) emit_tracy_frame_image(frame); @@ -1744,7 +1802,7 @@ auto VulkanRenderer::render(std::function const &record) -> void vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eTransferSrcOptimal); - if (frame.screenshot_buffer.buffer) { + if (frame.screenshot_buffer.buffer && m_screenshot_requested) { vk::BufferImageCopy screenshot_copy {}; screenshot_copy.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; @@ -1759,6 +1817,7 @@ auto VulkanRenderer::render(std::function const &record) -> void vk::ImageLayout::eTransferSrcOptimal, frame.screenshot_buffer.buffer, screenshot_copy); frame.screenshot_ready = true; + m_screenshot_requested = false; } else { frame.screenshot_ready = false; } @@ -1766,6 +1825,7 @@ auto VulkanRenderer::render(std::function const &record) -> void #if defined(TRACY_ENABLE) constexpr std::uint64_t tracy_frame_stride { 10 }; bool const tracy_capture { TracyIsConnected + && Lunar::tracy_runtime_enabled() && (m_vk.frame_number % tracy_frame_stride == 0) }; frame.tracy_frame_ready = false; frame.frame_image_ready = false; @@ -1876,6 +1936,7 @@ auto VulkanRenderer::render_to_image(vk::Image target_image, return; } + log_memory_stats(); process_render_commands(); auto &frame { m_vk.get_current_frame() }; @@ -1963,6 +2024,59 @@ auto VulkanRenderer::draw_imgui( cmd.endRendering(); } +namespace { + +auto read_rss_kb() -> std::optional +{ +#if defined(__linux__) + std::ifstream statm("/proc/self/statm"); + long total_pages { 0 }; + long resident_pages { 0 }; + if (!(statm >> total_pages >> resident_pages)) { + return std::nullopt; + } + long page_size = sysconf(_SC_PAGESIZE); + if (page_size <= 0) { + return std::nullopt; + } + auto bytes = static_cast(resident_pages) + * static_cast(page_size); + return bytes / 1024; +#else + return std::nullopt; +#endif +} + +} // namespace + +auto VulkanRenderer::log_memory_stats() -> void +{ + if (!m_mem_stats_enabled || !m_vk.allocator) { + return; + } + + auto const now = std::chrono::steady_clock::now(); + if (now - m_last_mem_stats < std::chrono::seconds(1)) { + return; + } + m_last_mem_stats = now; + + VmaTotalStatistics stats {}; + vmaCalculateStatistics(m_vk.allocator, &stats); + auto const alloc_bytes = stats.total.statistics.allocationBytes; + auto const block_bytes = stats.total.statistics.blockBytes; + + auto const rss_kb = read_rss_kb(); + if (rss_kb) { + m_logger.info("Mem stats: rss={} KB, vma_alloc={} MB, vma_blocks={} MB", + *rss_kb, alloc_bytes / (1024 * 1024), block_bytes / (1024 * 1024)); + } else { + m_logger.info("Mem stats: vma_alloc={} MB, vma_blocks={} MB", + alloc_bytes / (1024 * 1024), block_bytes / (1024 * 1024)); + } + log_top_allocators(m_logger, 8); +} + auto VulkanRenderer::create_swapchain(uint32_t width, uint32_t height) -> void { vkb::SwapchainBuilder builder { m_vkb.phys_dev, m_vkb.dev, m_vk.surface }; @@ -2188,6 +2302,7 @@ auto VulkanRenderer::destroy_screenshot_buffers() -> void m_latest_screenshot_pixels.clear(); m_latest_screenshot_extent = vk::Extent2D {}; + m_latest_screenshot_layout = vk::ImageLayout::eUndefined; if (m_latest_screenshot) { destroy_image(*m_latest_screenshot); m_latest_screenshot.reset(); @@ -2239,19 +2354,82 @@ auto VulkanRenderer::emit_frame_screenshot(FrameData &frame) -> void destination[i + 3] = 0xff; } - auto const screenshot_flags { vk::ImageUsageFlagBits::eSampled }; auto const screenshot_extent { vk::Extent3D { extent.width, extent.height, 1 } }; + auto const same_extent { m_latest_screenshot + && m_latest_screenshot_extent.width == extent.width + && m_latest_screenshot_extent.height == extent.height }; - if (m_latest_screenshot) { - destroy_image(*m_latest_screenshot); - m_latest_screenshot.reset(); + if (!same_extent) { + if (m_latest_screenshot) { + destroy_image(*m_latest_screenshot); + m_latest_screenshot.reset(); + } + auto const screenshot_flags { vk::ImageUsageFlagBits::eSampled + | vk::ImageUsageFlagBits::eTransferDst }; + m_latest_screenshot = create_image( + screenshot_extent, vk::Format::eR8G8B8A8Unorm, screenshot_flags); + m_latest_screenshot_layout = vk::ImageLayout::eUndefined; } - m_latest_screenshot = create_image(destination.data(), screenshot_extent, - vk::Format::eR8G8B8A8Unorm, screenshot_flags); - m_latest_screenshot_pixels = destination; - m_latest_screenshot_extent = extent; + if (m_latest_screenshot) { + auto const upload_buffer { + create_buffer(byte_count, vk::BufferUsageFlagBits::eTransferSrc, + VMA_MEMORY_USAGE_CPU_TO_GPU), + }; + VmaAllocationInfo upload_info {}; + vmaGetAllocationInfo( + m_vk.allocator, upload_buffer.allocation, &upload_info); + + void *upload_mapped { upload_info.pMappedData }; + bool upload_mapped_here { false }; + if (!upload_mapped) { + auto const map_result { vmaMapMemory( + m_vk.allocator, upload_buffer.allocation, &upload_mapped) }; + if (map_result != VK_SUCCESS) { + destroy_buffer(upload_buffer); + if (mapped_here) { + vmaUnmapMemory( + m_vk.allocator, frame.screenshot_buffer.allocation); + } + return; + } + upload_mapped_here = true; + } + std::memcpy(upload_mapped, destination.data(), byte_count); + + immediate_submit( + [&](vk::CommandBuffer cmd) { + vkutil::transition_image(cmd, m_latest_screenshot->image, + m_latest_screenshot_layout, + vk::ImageLayout::eTransferDstOptimal); + + vk::BufferImageCopy copy_region {}; + copy_region.imageSubresource.aspectMask + = vk::ImageAspectFlagBits::eColor; + copy_region.imageSubresource.mipLevel = 0; + copy_region.imageSubresource.baseArrayLayer = 0; + copy_region.imageSubresource.layerCount = 1; + copy_region.imageExtent = screenshot_extent; + cmd.copyBufferToImage(upload_buffer.buffer, + m_latest_screenshot->image, + vk::ImageLayout::eTransferDstOptimal, copy_region); + + vkutil::transition_image(cmd, m_latest_screenshot->image, + vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eShaderReadOnlyOptimal); + }, + /*flush_frame_deletion_queue=*/false, + /*clear_frame_descriptors=*/false); + + if (upload_mapped_here) { + vmaUnmapMemory(m_vk.allocator, upload_buffer.allocation); + } + destroy_buffer(upload_buffer); + m_latest_screenshot_layout = vk::ImageLayout::eShaderReadOnlyOptimal; + m_latest_screenshot_pixels = destination; + m_latest_screenshot_extent = extent; + } if (mapped_here) { vmaUnmapMemory(m_vk.allocator, frame.screenshot_buffer.allocation); @@ -2402,7 +2580,8 @@ auto VulkanRenderer::emit_tracy_frame_image(FrameData &frame) -> void destination[i + 3] = source[i + 3]; } - if (!frame.tracy_frame_ready || !TracyIsConnected) { + if (!frame.tracy_frame_ready || !TracyIsConnected + || !Lunar::tracy_runtime_enabled()) { frame.frame_image_ready = false; frame.tracy_frame_ready = false; if (mapped_here) { diff --git a/src/VulkanRenderer.h b/src/VulkanRenderer.h index 535c356..99eb0da 100644 --- a/src/VulkanRenderer.h +++ b/src/VulkanRenderer.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -224,6 +225,9 @@ struct VulkanRenderer { { return m_latest_screenshot; } + auto set_imgui_enabled(bool enabled) -> void { m_imgui_enabled = enabled; } + auto imgui_enabled() const -> bool { return m_imgui_enabled; } + auto request_screenshot() -> void { m_screenshot_requested = true; } auto get_screenshot_pixels() const -> std::optional { @@ -246,6 +250,8 @@ struct VulkanRenderer { std::optional m_latest_screenshot {}; std::vector m_latest_screenshot_pixels {}; vk::Extent2D m_latest_screenshot_extent {}; + vk::ImageLayout m_latest_screenshot_layout { vk::ImageLayout::eUndefined }; + bool m_screenshot_requested { false }; private: struct RenderCommand { @@ -270,6 +276,7 @@ private: auto draw_imgui(vk::CommandBuffer cmd, vk::ImageView target_image_view) -> void; + auto log_memory_stats() -> void; auto create_swapchain(uint32_t width, uint32_t height) -> void; auto create_draw_image(uint32_t width, uint32_t height) -> void; @@ -402,6 +409,8 @@ private: std::vector m_pending_render_commands; bool m_use_kms { false }; bool m_imgui_enabled { true }; + bool m_mem_stats_enabled { false }; + std::chrono::steady_clock::time_point m_last_mem_stats {}; std::optional m_kms_state {}; vk::PhysicalDevice m_kms_physical_device {}; vk::Extent2D m_kms_extent {};