Compare commits

...

27 Commits

Author SHA1 Message Date
92912a321c Initial Wayland support
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-27 00:55:28 +02:00
e04f1cf291 Add useful wayland RAII wrappers
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-26 18:27:27 +02:00
efa6e289b6 Add logging for KMS backend
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-17 15:05:25 +02:00
596af80622 Hands and formatting
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-17 14:55:15 +02:00
f4fad2c1ac Hand tracking
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-17 14:40:34 +02:00
e9ae017e9b Make OpenXR work!
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-17 12:42:23 +02:00
cddfa30cfe Add openxr related stuff
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-17 12:36:41 +02:00
5ca02ed9e2 Add untested OpenXR
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-12 19:36:25 +02:00
9f2dab344d DRM/KMS backebd
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-12 16:38:28 +02:00
402cdd43da Make the constructor and destructor of Application private
Application is a singleton now, no need to have it be constructed
outside of the().

Signed-off-by: Slendi <slendi@socopon.com>
2026-01-11 17:45:10 +02:00
fc66ce2fd3 Make Application into a singleton
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-11 17:44:02 +02:00
46f5fab55e Fix initializers, more stuff
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-11 16:55:10 +02:00
e55601b5a6 Add skybox
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-11 15:01:48 +02:00
979dab81b1 Add cubemap
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-11 11:56:07 +02:00
26edfcbe89 Fix some bugs
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-11 11:43:06 +02:00
46c428b13a CPU texture
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-11 11:28:48 +02:00
447114e38d Screenshots
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-11 11:00:42 +02:00
7978606a52 Update smath
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-11 10:58:04 +02:00
6b6465b6f3 Various fixes and culling
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-10 17:25:09 +02:00
e0ca1f1043 Figure out stupid math shit
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-10 16:15:36 +02:00
f896ddae74 Add tracy
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-10 15:35:00 +02:00
7a5d4c9045 Update smath
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-10 15:33:37 +02:00
858f848427 stuff
Signed-off-by: Slendi <slendi@socopon.com>
2026-01-10 15:30:45 +02:00
4d9e1f03b0 Fun menu :3
Signed-off-by: Slendi <slendi@socopon.com>
2025-12-17 21:42:59 +02:00
b01a32194e boop
Signed-off-by: Slendi <slendi@socopon.com>
2025-12-17 21:31:11 +02:00
ffe4712663 fix stuff
Signed-off-by: Slendi <slendi@socopon.com>
2025-12-17 18:02:21 +02:00
a99233fcbe hiu
Signed-off-by: Slendi <slendi@socopon.com>
2025-12-17 15:51:38 +02:00
949 changed files with 622607 additions and 437 deletions

1
.gitignore vendored
View File

@@ -3,3 +3,4 @@ result
.cache
.direnv
.clangd
screenshot*

BIN
assets/cubemap.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 MiB

10
flake.lock generated
View File

@@ -20,12 +20,12 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1764517877,
"narHash": "sha256-pp3uT4hHijIC8JUK5MEqeAWmParJrgBVzHLNfJDZxg4=",
"rev": "2d293cbfa5a793b4c50d17c05ef9e385b90edf6c",
"revCount": 904649,
"lastModified": 1767892417,
"narHash": "sha256-dhhvQY67aboBk8b0/u0XB6vwHdgbROZT3fJAjyNh5Ww=",
"rev": "3497aa5c9457a9d88d71fa93a4a8368816fbeeba",
"revCount": 924538,
"type": "tarball",
"url": "https://api.flakehub.com/f/pinned/NixOS/nixpkgs/0.1.904649%2Brev-2d293cbfa5a793b4c50d17c05ef9e385b90edf6c/019ad7f2-e8f3-79e9-ad92-dd7a45c069d3/source.tar.gz?rev=2d293cbfa5a793b4c50d17c05ef9e385b90edf6c&revCount=904649"
"url": "https://api.flakehub.com/f/pinned/NixOS/nixpkgs/0.1.924538%2Brev-3497aa5c9457a9d88d71fa93a4a8368816fbeeba/019ba2f6-9b41-7674-b81c-5f768968b13a/source.tar.gz?rev=3497aa5c9457a9d88d71fa93a4a8368816fbeeba&revCount=924538"
},
"original": {
"type": "tarball",

View File

@@ -23,6 +23,7 @@
pkg-config
glslang
shaderc
wayland-scanner
];
buildInputs = with pkgs; [
vulkan-loader
@@ -45,11 +46,13 @@
with pkgs;
[
llvmPackages_21.clang-tools
llvmPackages_21.bintools
lldb
codespell
doxygen
gtest
cppcheck
tracy
]
++ buildInputs
++ nativeBuildInputs

View File

@@ -1,4 +1,4 @@
project('vr-compositor', 'cpp',
project('vr-compositor', 'c', 'cpp',
version: '0.1',
default_options: [
'cpp_std=c++26',
@@ -17,14 +17,32 @@ fastgltf_opts.set_override_option('werror', 'false')
fastgltf = cmake.subproject('fastgltf', options: fastgltf_opts)
cc = meson.get_compiler('cpp')
wl_mod = import('wayland')
wayland_dep = dependency('wayland-server')
vulkan_dep = dependency('vulkan')
openxr_dep = dependency('openxr')
zlib_dep = dependency('zlib')
sdl3_dep = dependency('sdl3')
libinput_dep = dependency('libinput')
libudev_dep = dependency('libudev')
wayland_dep = dependency('wayland-server', include_type: 'system')
wayland_client_dep = dependency('wayland-client', include_type: 'system')
vulkan_dep = dependency('vulkan', include_type: 'system')
openxr_dep = dependency('openxr', include_type: 'system')
zlib_dep = dependency('zlib', include_type: 'system')
sdl3_dep = dependency('sdl3', include_type: 'system')
libinput_dep = dependency('libinput', include_type: 'system')
libudev_dep = dependency('libudev', include_type: 'system')
wayland_protocol = wl_mod.scan_xml(
'protocols/wayland.xml',
client: false,
server: true,
)
wayland_protocol_source = wayland_protocol[0]
wayland_server_header = wayland_protocol[1]
xdg_shell_protocol = wl_mod.scan_xml(
'protocols/xdg-shell.xml',
client: true,
server: true,
)
xdg_shell_protocol_source = xdg_shell_protocol[0]
xdg_shell_client_header = xdg_shell_protocol[1]
xdg_shell_server_header = xdg_shell_protocol[2]
imgui_src = files(
'thirdparty/imgui/imgui.cpp',
'thirdparty/imgui/imgui_draw.cpp',
@@ -37,9 +55,25 @@ fastgltf_dep = fastgltf.dependency('fastgltf')
vkbootstrap_dev = get_option('vkbootstrap_dev')
vkbootstrap_lib = get_option('vkbootstrap_lib')
tracy_enable = get_option('tracy_enable')
if tracy_enable and get_option('buildtype') != 'debugoptimized'
warning('Profiling builds should set --buildtype=debugoptimized')
endif
tracy = disabler()
if tracy_enable
tracy_proj = subproject('tracy', default_options: [
'default_library=static',
'warning_level=0',
'werror=false',
])
tracy = tracy_proj.get_variable('tracy_dep')
endif
vkbootstrap_inc = include_directories(
join_paths(vkbootstrap_dev, 'include')
join_paths(vkbootstrap_dev, 'include'),
is_system: true,
)
vkbootstrap_dep = cc.find_library(
@@ -78,10 +112,14 @@ add_project_arguments(
'-Wno-exit-time-destructors',
'-Wno-zero-as-null-pointer-constant',
'-Wno-unused-macros',
'-Wno-reserved-macro-identifier',
'-Wno-reserved-identifier',
'-Wno-suggest-override',
'-Wno-macro-redefined',
'-DVULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE',
'-DVULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1',
# Thanks forityf source
'-Wno-#warnings'
],
language : 'cpp'
)
@@ -112,6 +150,21 @@ imgui_lib = static_library('imgui',
],
)
exe_deps = [
wayland_dep,
vulkan_dep,
openxr_dep,
vkbootstrap_dep,
zlib_dep,
sdl3_dep,
fastgltf_dep,
libinput_dep,
libudev_dep,
]
if tracy_enable
exe_deps += tracy
endif
exe = executable('vr-compositor',
[
'src/main.cpp',
@@ -125,27 +178,39 @@ exe = executable('vr-compositor',
'src/Pipeline.cpp',
'src/Loader.cpp',
'src/DescriptorWriter.cpp',
'src/CPUTexture.cpp',
'src/wayland/WaylandServer.cpp',
'src/wayland/Surface.cpp',
'src/wayland/Shm.cpp',
'src/wayland/protocols/CompositorProtocol.cpp',
'src/wayland/protocols/ShmProtocol.cpp',
'src/wayland/protocols/XdgShellProtocol.cpp',
'src/Skybox.cpp',
'src/VulkanRenderer.cpp',
'src/Application.cpp',
wayland_protocol_source,
xdg_shell_protocol_source,
wayland_server_header,
xdg_shell_server_header,
],
c_args: ['-Wno-missing-variable-declarations'],
include_directories: [
vkbootstrap_inc,
include_directories('.'),
imgui_inc,
'thirdparty/smath/include'
],
link_with: imgui_lib,
dependencies: [
wayland_dep,
vulkan_dep,
openxr_dep,
vkbootstrap_dep,
zlib_dep,
sdl3_dep,
fastgltf_dep,
libinput_dep,
libudev_dep,
],
dependencies: exe_deps,
cpp_args: [
'--embed-dir=' + join_paths(meson.project_build_root(), 'shaders')
'--embed-dir=' + join_paths(meson.project_build_root(), 'shaders'),
],
)
executable('shm-life',
'tools/shm_life.cpp',
dependencies: [wayland_client_dep],
sources: [xdg_shell_protocol_source, xdg_shell_client_header],
c_args: ['-Wno-missing-variable-declarations'],
cpp_args: ['-Wno-cast-qual'],
)

View File

@@ -1,2 +1,3 @@
option('vkbootstrap_dev', type: 'string', description: 'vk-bootstrap dev output path')
option('vkbootstrap_lib', type: 'string', description: 'vk-bootstrap lib output path')
option('tracy_enable', type: 'boolean', value: false, description: 'Enable profiling')

3314
protocols/wayland.xml Normal file

File diff suppressed because it is too large Load Diff

1418
protocols/xdg-shell.xml Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,23 +0,0 @@
#version 460
layout (local_size_x = 16, local_size_y = 16) in;
layout(rgba16f, set = 0, binding = 0) uniform image2D image;
void main() {
ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);
ivec2 size = imageSize(image);
if (texelCoord.x >= size.x || texelCoord.y >= size.y)
return;
vec2 uv = (vec2(texelCoord) + 0.5) / vec2(size);
float v = sin(uv.x * 10.0) + cos(uv.y * 10.0);
float r = 0.5 + 0.5 * cos(6.2831 * (uv.x + v));
float g = 0.5 + 0.5 * cos(6.2831 * (uv.y + v + 0.33));
float b = 0.5 + 0.5 * cos(6.2831 * (uv.x - uv.y + 0.66));
vec4 color = vec4(r, g, b, 1.0);
imageStore(image, texelCoord, color);
}

View File

@@ -14,12 +14,13 @@ else
endif
shader_sources = files(
'gradient.comp',
'triangle.frag',
'triangle.vert',
'triangle_mesh.frag',
'triangle_mesh.vert',
'tex_image.frag',
'skybox.frag',
'skybox.vert',
)
spirv_shaders = []

12
shaders/skybox.frag Normal file
View File

@@ -0,0 +1,12 @@
#version 450
layout (location = 0) in vec3 in_dir;
layout (location = 0) out vec4 out_frag_color;
layout (set = 0, binding = 0) uniform samplerCube environment_map;
void main() {
vec3 dir = normalize(in_dir);
out_frag_color = vec4(texture(environment_map, dir).rgb, 1.0f);
}

14
shaders/skybox.vert Normal file
View File

@@ -0,0 +1,14 @@
#version 450
layout (location = 0) in vec3 in_position;
layout (location = 0) out vec3 out_dir;
layout(push_constant) uniform constants {
mat4 mvp;
} PushConstants;
void main() {
out_dir = in_position;
vec4 pos = PushConstants.mvp * vec4(in_position, 1.0f);
gl_Position = vec4(pos.xy, pos.w, pos.w);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +1,24 @@
#pragma once
#include <array>
#include <cstdint>
#include <filesystem>
#include <functional>
#include <memory>
#include <vector>
#include <SDL3/SDL_video.h>
#include "Logger.h"
#include <imgui.h>
#include <linux/input-event-codes.h>
#include <openxr/openxr.h>
#include "smath.hpp"
#include "Loader.h"
#include "Logger.h"
#include "Skybox.h"
#include "Types.h"
#include "wayland/Display.h"
struct libinput;
struct libinput_event_keyboard;
@@ -16,38 +28,99 @@ struct udev;
namespace Lunar {
struct VulkanRenderer;
struct OpenXrState;
namespace Wayland {
struct WaylandServer;
}
struct Application {
Application();
~Application();
auto run() -> void;
auto binary_directory() const -> std::filesystem::path;
auto mouse_captured(bool new_state) -> void;
auto mouse_captured() const -> bool { return m_mouse_captured; }
auto toggle_mouse_captured() -> void { mouse_captured(!m_mouse_captured); }
auto is_key_down(uint32_t key) const -> bool;
auto is_key_up(uint32_t key) const -> bool;
auto is_key_pressed(uint32_t key) const -> bool;
auto is_key_released(uint32_t key) const -> bool;
static auto the() -> Application &;
private:
enum class Backend {
SDL,
KMS,
};
Application();
~Application();
auto init_input() -> void;
auto init_test_meshes() -> void;
auto init_wayland() -> void;
auto asset_directory() -> std::filesystem::path;
auto shutdown_input() -> void;
auto process_libinput_events() -> void;
auto handle_keyboard_event(libinput_event_keyboard *event) -> void;
auto handle_pointer_motion(libinput_event_pointer *event) -> void;
auto handle_pointer_button(libinput_event_pointer *event) -> void;
auto handle_pointer_axis(libinput_event_pointer *event) -> void;
auto handle_pointer_frame() -> void;
auto handle_pointer_end_frame() -> void;
auto handle_pointer_cancel() -> void;
auto handle_keyboard_key(std::optional<uint32_t> key, bool pressed) -> void;
auto clamp_mouse_to_window(int width, int height) -> void;
auto init_openxr() -> void;
auto init_openxr_session() -> void;
auto shutdown_openxr() -> void;
auto poll_openxr_events() -> void;
auto render_openxr_frame(
std::function<void(VulkanRenderer::GL &)> const &record,
float dt_seconds) -> bool;
auto update_camera_from_xr_view(XrView const &view) -> void;
auto update_hands(XrTime display_time) -> void;
auto render_hands(
VulkanRenderer::GL &gl, smath::Mat4 const &view_projection) -> void;
SDL_Window *m_window { nullptr };
Backend m_backend { Backend::SDL };
Logger m_logger { "Lunar" };
std::unique_ptr<VulkanRenderer> m_renderer;
Skybox m_skybox;
std::vector<std::shared_ptr<Mesh>> m_test_meshes;
udev *m_udev { nullptr };
libinput *m_libinput { nullptr };
std::unique_ptr<OpenXrState> m_openxr {};
std::unique_ptr<Wayland::WaylandServer> m_wayland {};
bool m_running { true };
bool m_mouse_captured { false };
bool m_show_imgui { false };
bool m_window_focused { true };
int m_ctrl_pressed_count { 0 };
std::uint32_t m_screenshot_index { 0 };
double m_mouse_x { 0.0 };
double m_mouse_y { 0.0 };
double m_mouse_dx { 0.0 };
double m_mouse_dy { 0.0 };
float m_mouse_sensitivity { 0.001f };
std::array<bool, KEY_MAX + 1> m_key_state {};
std::array<bool, KEY_MAX + 1> m_key_state_previous {};
Camera m_camera;
PolarCoordinate m_cursor;
static inline std::array<smath::Vec3, XR_HAND_JOINT_COUNT_EXT>
m_left_joints {};
static inline std::array<smath::Vec3, XR_HAND_JOINT_COUNT_EXT>
m_right_joints {};
static inline bool m_left_hand_valid { false };
static inline bool m_right_hand_valid { false };
};
} // namespace Lunar

88
src/CPUTexture.cpp Normal file
View File

@@ -0,0 +1,88 @@
#include "CPUTexture.h"
#include <format>
#include <stdexcept>
#if defined(__clang__)
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wreserved-identifier"
# pragma clang diagnostic ignored "-Wcast-qual"
# pragma clang diagnostic ignored "-Wimplicit-fallthrough"
# pragma clang diagnostic ignored "-Wmissing-field-initializers"
# pragma clang diagnostic ignored "-Wused-but-marked-unused"
# pragma clang diagnostic ignored "-Wmissing-prototypes"
# pragma clang diagnostic ignored "-Wextra-semi-stmt"
# pragma clang diagnostic ignored "-Wimplicit-int-conversion"
# pragma clang diagnostic ignored "-Wsign-conversion"
# pragma clang diagnostic ignored "-Wshorten-64-to-32"
# pragma clang diagnostic ignored "-Wconversion"
# pragma clang diagnostic ignored "-Wcomma"
# pragma clang diagnostic ignored "-Wdouble-promotion"
# pragma clang diagnostic ignored "-Wimplicit-float-conversion"
# pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
# pragma clang diagnostic ignored "-Wdisabled-macro-expansion"
# pragma clang diagnostic ignored "-Wsign-compare"
# pragma clang diagnostic ignored "-Wfloat-equal"
# pragma clang diagnostic ignored "-Wpacked"
# pragma clang diagnostic ignored "-Wold-style-cast"
# pragma clang diagnostic ignored "-Wexit-time-destructors"
# pragma clang diagnostic ignored "-Wglobal-constructors"
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
# pragma clang diagnostic ignored "-Wcast-align"
# pragma clang diagnostic ignored "-Wcast-qual"
# pragma clang diagnostic ignored "-Wshadow"
# pragma clang diagnostic ignored "-Wnewline-eof"
# pragma clang diagnostic ignored "-Wformat-nonliteral"
# pragma clang diagnostic ignored "-Wswitch-default"
# pragma clang diagnostic ignored "-Wswitch-enum"
# pragma clang diagnostic ignored "-Wcovered-switch-default"
# pragma clang diagnostic ignored "-Wdocumentation"
# pragma clang diagnostic ignored "-Wdocumentation-unknown-command"
# pragma clang diagnostic ignored "-Wextra-semi"
# pragma clang diagnostic ignored "-Wundef"
# pragma clang diagnostic ignored "-Wreserved-macro-identifier"
# pragma clang diagnostic ignored "-Wc++98-compat"
# pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
# pragma clang diagnostic ignored "-Wweak-vtables"
# pragma clang diagnostic ignored "-Wswitch"
# pragma clang diagnostic ignored "-Wunused-macros"
# pragma clang diagnostic ignored "-Wextra"
#endif
#define STB_IMAGE_IMPLEMENTATION
#include "../thirdparty/stb/stb_image.h"
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
namespace Lunar {
CPUTexture::CPUTexture(std::filesystem::path const &path)
{
int width_out { 0 };
int height_out { 0 };
int channels_out { 0 };
stbi_uc *data { stbi_load(path.string().c_str(), &width_out, &height_out,
&channels_out, STBI_rgb_alpha) };
if (!data) {
throw std::runtime_error(
std::format("Failed to load texture: {}", path.string()));
}
width = static_cast<uint32_t>(width_out);
height = static_cast<uint32_t>(height_out);
format = vk::Format::eR8G8B8A8Unorm;
pixels.assign(data, data + (width * height * 4));
stbi_image_free(data);
}
CPUTexture::CPUTexture(std::vector<uint8_t> pixels, uint32_t width,
uint32_t height, vk::Format format)
: pixels(std::move(pixels))
, width(width)
, height(height)
, format(format)
{
}
} // namespace Lunar

22
src/CPUTexture.h Normal file
View File

@@ -0,0 +1,22 @@
#pragma once
#include <cstdint>
#include <filesystem>
#include <vector>
#include <vulkan/vulkan.hpp>
namespace Lunar {
struct CPUTexture {
std::vector<uint8_t> pixels;
uint32_t width { 0 };
uint32_t height { 0 };
vk::Format format { vk::Format::eR8G8B8A8Unorm };
explicit CPUTexture(std::filesystem::path const &path);
CPUTexture(std::vector<uint8_t> pixels, uint32_t width, uint32_t height,
vk::Format format);
};
} // namespace Lunar

View File

@@ -1,5 +1,7 @@
#include "DescriptorAllocatorGrowable.h"
#include <algorithm>
#include "Logger.h"
#include "Util.h"
@@ -9,6 +11,10 @@ auto DescriptorAllocatorGrowable::init(VkDevice dev, uint32_t max_sets,
std::span<PoolSizeRatio> pool_ratios) -> void
{
m_ratios.clear();
m_current_pool = VK_NULL_HANDLE;
m_full_pools.clear();
m_used_pools.clear();
m_ready_pools.clear();
m_ratios.insert(m_ratios.begin(), pool_ratios.begin(), pool_ratios.end());
@@ -23,31 +29,56 @@ auto DescriptorAllocatorGrowable::init(VkDevice dev, uint32_t max_sets,
auto DescriptorAllocatorGrowable::clear_pools(VkDevice dev) -> void
{
for (auto const p : m_ready_pools)
std::vector<VkDescriptorPool> all_pools;
all_pools.reserve(
m_ready_pools.size() + m_used_pools.size() + m_full_pools.size());
all_pools.insert(
all_pools.end(), m_ready_pools.begin(), m_ready_pools.end());
all_pools.insert(all_pools.end(), m_used_pools.begin(), m_used_pools.end());
all_pools.insert(all_pools.end(), m_full_pools.begin(), m_full_pools.end());
std::sort(all_pools.begin(), all_pools.end());
all_pools.erase(
std::unique(all_pools.begin(), all_pools.end()), all_pools.end());
for (auto const p : all_pools) {
vkResetDescriptorPool(dev, p, 0);
for (auto const p : m_full_pools) {
vkResetDescriptorPool(dev, p, 0);
m_ready_pools.emplace_back(p);
}
m_ready_pools = std::move(all_pools);
m_used_pools.clear();
m_full_pools.clear();
m_current_pool = VK_NULL_HANDLE;
}
auto DescriptorAllocatorGrowable::destroy_pools(VkDevice dev) -> void
{
for (auto const p : m_ready_pools) {
std::vector<VkDescriptorPool> all_pools;
all_pools.reserve(
m_ready_pools.size() + m_used_pools.size() + m_full_pools.size());
all_pools.insert(
all_pools.end(), m_ready_pools.begin(), m_ready_pools.end());
all_pools.insert(all_pools.end(), m_used_pools.begin(), m_used_pools.end());
all_pools.insert(all_pools.end(), m_full_pools.begin(), m_full_pools.end());
std::sort(all_pools.begin(), all_pools.end());
all_pools.erase(
std::unique(all_pools.begin(), all_pools.end()), all_pools.end());
for (auto const p : all_pools) {
vkDestroyDescriptorPool(dev, p, nullptr);
}
m_ready_pools.clear();
for (auto const p : m_full_pools) {
vkDestroyDescriptorPool(dev, p, nullptr);
}
m_used_pools.clear();
m_full_pools.clear();
m_current_pool = VK_NULL_HANDLE;
}
auto DescriptorAllocatorGrowable::allocate(Logger &logger, VkDevice dev,
VkDescriptorSetLayout layout, void *p_next) -> VkDescriptorSet
{
auto pool_to_use = get_pool(dev);
auto pool_to_use { get_pool(dev) };
VkDescriptorSetAllocateInfo alloci {};
alloci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
@@ -60,31 +91,36 @@ auto DescriptorAllocatorGrowable::allocate(Logger &logger, VkDevice dev,
auto const res = vkAllocateDescriptorSets(dev, &alloci, &ds);
if (res == VK_ERROR_OUT_OF_POOL_MEMORY || res == VK_ERROR_FRAGMENTED_POOL) {
m_full_pools.emplace_back(pool_to_use);
if (m_current_pool == pool_to_use) {
m_current_pool = VK_NULL_HANDLE;
}
pool_to_use = get_pool(dev);
alloci.descriptorPool = pool_to_use;
VK_CHECK(logger, vkAllocateDescriptorSets(dev, &alloci, &ds));
}
m_ready_pools.emplace_back(pool_to_use);
return ds;
}
auto DescriptorAllocatorGrowable::get_pool(VkDevice dev) -> VkDescriptorPool
{
VkDescriptorPool new_pool;
if (m_current_pool != VK_NULL_HANDLE) {
return m_current_pool;
}
if (m_ready_pools.empty()) {
new_pool = create_pool(dev, m_sets_per_pool, m_ratios);
if (!m_ready_pools.empty()) {
m_current_pool = m_ready_pools.back();
m_ready_pools.pop_back();
} else {
m_current_pool = create_pool(dev, m_sets_per_pool, m_ratios);
m_sets_per_pool = static_cast<uint32_t>(m_sets_per_pool * 1.5);
if (m_sets_per_pool > 4092)
m_sets_per_pool = 4092;
} else {
new_pool = m_ready_pools.back();
m_ready_pools.pop_back();
}
return new_pool;
m_used_pools.emplace_back(m_current_pool);
return m_current_pool;
}
auto DescriptorAllocatorGrowable::create_pool(VkDevice dev, uint32_t set_count,

View File

@@ -30,7 +30,9 @@ private:
std::span<PoolSizeRatio> pool_ratios) -> VkDescriptorPool;
std::vector<PoolSizeRatio> m_ratios;
VkDescriptorPool m_current_pool { VK_NULL_HANDLE };
std::vector<VkDescriptorPool> m_full_pools;
std::vector<VkDescriptorPool> m_used_pools;
std::vector<VkDescriptorPool> m_ready_pools;
uint32_t m_sets_per_pool;
};

View File

@@ -32,6 +32,8 @@ auto GraphicsPipelineBuilder::clear() -> GraphicsPipelineBuilder &
m_render_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO;
m_shader_stages.clear();
m_vertex_bindings.clear();
m_vertex_attributes.clear();
return *this;
}
@@ -79,9 +81,15 @@ auto GraphicsPipelineBuilder::set_cull_mode(VkCullModeFlags cull_mode,
auto GraphicsPipelineBuilder::set_multisampling_none()
-> GraphicsPipelineBuilder &
{
return set_multisampling(VK_SAMPLE_COUNT_1_BIT);
}
auto GraphicsPipelineBuilder::set_multisampling(VkSampleCountFlagBits samples)
-> GraphicsPipelineBuilder &
{
m_multisampling.sampleShadingEnable = VK_FALSE;
m_multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
m_multisampling.rasterizationSamples = samples;
m_multisampling.minSampleShading = 1.0f;
m_multisampling.pSampleMask = nullptr;
m_multisampling.alphaToCoverageEnable = VK_FALSE;
@@ -162,6 +170,17 @@ auto GraphicsPipelineBuilder::set_pipeline_layout(VkPipelineLayout layout)
return *this;
}
auto GraphicsPipelineBuilder::set_vertex_input(
std::span<VkVertexInputBindingDescription const> bindings,
std::span<VkVertexInputAttributeDescription const> attributes)
-> GraphicsPipelineBuilder &
{
m_vertex_bindings.assign(bindings.begin(), bindings.end());
m_vertex_attributes.assign(attributes.begin(), attributes.end());
return *this;
}
auto GraphicsPipelineBuilder::disable_depth_testing()
-> GraphicsPipelineBuilder &
{
@@ -217,6 +236,12 @@ auto GraphicsPipelineBuilder::build(VkDevice dev) -> VkPipeline
VkPipelineVertexInputStateCreateInfo vertex_input_ci {};
vertex_input_ci.sType
= VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
vertex_input_ci.vertexBindingDescriptionCount
= static_cast<uint32_t>(m_vertex_bindings.size());
vertex_input_ci.pVertexBindingDescriptions = m_vertex_bindings.data();
vertex_input_ci.vertexAttributeDescriptionCount
= static_cast<uint32_t>(m_vertex_attributes.size());
vertex_input_ci.pVertexAttributeDescriptions = m_vertex_attributes.data();
VkGraphicsPipelineCreateInfo pipeline_ci {};
pipeline_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;

View File

@@ -1,5 +1,6 @@
#pragma once
#include <span>
#include <vector>
#include <vulkan/vulkan_core.h>
@@ -25,6 +26,8 @@ struct GraphicsPipelineBuilder {
auto set_cull_mode(VkCullModeFlags cull_mode, VkFrontFace front_face)
-> GraphicsPipelineBuilder &;
auto set_multisampling_none() -> GraphicsPipelineBuilder &;
auto set_multisampling(VkSampleCountFlagBits samples)
-> GraphicsPipelineBuilder &;
auto disable_blending() -> GraphicsPipelineBuilder &;
auto enable_blending_additive() -> GraphicsPipelineBuilder &;
auto enable_blending_alpha_blend() -> GraphicsPipelineBuilder &;
@@ -33,6 +36,10 @@ struct GraphicsPipelineBuilder {
auto set_depth_format(VkFormat format) -> GraphicsPipelineBuilder &;
auto set_pipeline_layout(VkPipelineLayout layout)
-> GraphicsPipelineBuilder &;
auto set_vertex_input(
std::span<VkVertexInputBindingDescription const> bindings,
std::span<VkVertexInputAttributeDescription const> attributes)
-> GraphicsPipelineBuilder &;
auto disable_depth_testing() -> GraphicsPipelineBuilder &;
auto enable_depth_testing(bool depth_write_enable = true,
VkCompareOp op = VK_COMPARE_OP_LESS_OR_EQUAL)
@@ -50,6 +57,8 @@ private:
VkFormat m_color_attachment_format {};
std::vector<VkPipelineShaderStageCreateInfo> m_shader_stages {};
std::vector<VkVertexInputBindingDescription> m_vertex_bindings {};
std::vector<VkVertexInputAttributeDescription> m_vertex_attributes {};
Logger &m_logger;
};

View File

@@ -61,7 +61,7 @@ auto Mesh::load_gltf_meshes(
{
renderer.logger().debug("Loading GLTF from file: {}", path);
auto data = fastgltf::GltfDataBuffer::FromPath(path);
auto data { fastgltf::GltfDataBuffer::FromPath(path) };
if (data.error() != fastgltf::Error::None) {
renderer.logger().err("Failed to open glTF file: {} (error {})", path,
fastgltf::to_underlying(data.error()));
@@ -98,7 +98,7 @@ auto Mesh::load_gltf_meshes(
new_surface.count = static_cast<uint32_t>(
gltf.accessors[p.indicesAccessor.value()].count);
size_t initial_vertex = vertices.size();
size_t initial_vertex { vertices.size() };
{ // Indices
auto &accessor = gltf.accessors[p.indicesAccessor.value()];
@@ -128,7 +128,7 @@ auto Mesh::load_gltf_meshes(
if (auto attr = p.findAttribute("NORMAL")) { // Normals
auto &accessor = gltf.accessors[attr->accessorIndex];
size_t local_index = 0;
size_t local_index { 0 };
for (auto normal :
fastgltf::iterateAccessor<smath::Vec3>(gltf, accessor)) {
vertices[initial_vertex + local_index].normal = normal;
@@ -138,7 +138,7 @@ auto Mesh::load_gltf_meshes(
if (auto attr = p.findAttribute("TEXCOORD_0")) { // UVs
auto &accessor = gltf.accessors[attr->accessorIndex];
size_t local_index = 0;
size_t local_index { 0 };
for (auto uv :
fastgltf::iterateAccessor<smath::Vec2>(gltf, accessor)) {
uv.unpack(vertices[initial_vertex + local_index].u,
@@ -149,7 +149,7 @@ auto Mesh::load_gltf_meshes(
if (auto attr = p.findAttribute("COLOR_0")) { // Colors
auto &accessor = gltf.accessors[attr->accessorIndex];
size_t local_index = 0;
size_t local_index { 0 };
switch (accessor.type) {
case fastgltf::AccessorType::Vec3: {

View File

@@ -34,7 +34,7 @@
static std::filesystem::path get_log_path(std::string_view app_name)
{
#ifdef _WIN32
PWSTR path = nullptr;
PWSTR path { nullptr };
SHGetKnownFolderPath(FOLDERID_LocalAppData, 0, nullptr, &path);
std::wstring wpath(path);
CoTaskMemFree(path);
@@ -70,7 +70,7 @@ static int compress_file(std::filesystem::path const &input_path,
std::vector<char> buffer(chunk_size);
while (in) {
in.read(buffer.data(), static_cast<std::streamsize>(buffer.size()));
std::streamsize bytes = in.gcount();
std::streamsize bytes { in.gcount() };
if (bytes > 0)
gzwrite(out, buffer.data(), static_cast<unsigned int>(bytes));
}
@@ -99,20 +99,20 @@ Logger::Logger(std::string_view app_name)
if (!file.is_regular_file())
continue;
auto name = file.path().filename().stem().string();
auto name { file.path().filename().stem().string() };
constexpr std::string_view prefix = "log_";
if (name.rfind(prefix, 0) != 0) {
continue;
}
int v = std::stoi(name.substr(prefix.size()));
int v { std::stoi(name.substr(prefix.size())) };
if (v > max)
max = v;
auto ext = file.path().filename().extension().string();
auto ext { file.path().filename().extension().string() };
if (ext == ".txt") {
auto np = file.path();
auto np { file.path() };
np.replace_extension(ext + ".gz");
compress_file(file.path(), np);
}
@@ -153,7 +153,7 @@ static std::string get_current_time_string()
void Logger::log(Level level, std::string_view msg)
{
auto time_str = get_current_time_string();
auto time_str { get_current_time_string() };
std::string level_str;
switch (level) {
case Logger::Level::Debug:

View File

@@ -27,13 +27,13 @@ auto Pipeline::Builder::set_push_constant_ranges(
auto Pipeline::Builder::build_compute(
vk::PipelineShaderStageCreateInfo const &stage) -> Pipeline
{
auto pipeline_layout = build_layout();
auto pipeline_layout { build_layout() };
vk::ComputePipelineCreateInfo pipeline_ci {};
pipeline_ci.layout = pipeline_layout.get();
pipeline_ci.stage = stage;
auto pipeline_ret = m_device.createComputePipelineUnique({}, pipeline_ci);
auto pipeline_ret { m_device.createComputePipelineUnique({}, pipeline_ci) };
VK_CHECK(m_logger, pipeline_ret.result);
return Pipeline {
@@ -46,14 +46,14 @@ auto Pipeline::Builder::build_graphics(
std::function<GraphicsPipelineBuilder &(GraphicsPipelineBuilder &)> const
&configure) -> Pipeline
{
auto pipeline_layout = build_layout();
auto pipeline_layout { build_layout() };
auto builder = GraphicsPipelineBuilder { m_logger };
auto builder { GraphicsPipelineBuilder { m_logger } };
builder.set_pipeline_layout(
static_cast<VkPipelineLayout>(pipeline_layout.get()));
configure(builder);
auto pipeline_handle = builder.build(static_cast<VkDevice>(m_device));
auto pipeline_handle { builder.build(static_cast<VkDevice>(m_device)) };
vk::UniquePipeline pipeline_unique(pipeline_handle,
vk::detail::ObjectDestroy<vk::Device,
VULKAN_HPP_DEFAULT_DISPATCHER_TYPE>(m_device));

314
src/Skybox.cpp Normal file
View File

@@ -0,0 +1,314 @@
#include "Skybox.h"
#include <array>
#include <cstddef>
#include <span>
#include <vector>
#include "CPUTexture.h"
#include "DescriptorWriter.h"
#include "GraphicsPipelineBuilder.h"
#include "Util.h"
namespace Lunar {
namespace {
struct SkyboxPushConstants {
smath::Mat4 mvp;
};
struct FaceOffset {
uint32_t x;
uint32_t y;
};
constexpr std::array<FaceOffset, 6> CROSS_OFFSETS {
FaceOffset { 2, 1 }, // +X
FaceOffset { 0, 1 }, // -X
FaceOffset { 1, 0 }, // +Y
FaceOffset { 1, 2 }, // -Y
FaceOffset { 1, 1 }, // +Z
FaceOffset { 3, 1 }, // -Z
};
} // namespace
auto Skybox::rebuild_pipeline(VulkanRenderer &renderer) -> bool
{
Pipeline::Builder pipeline_builder { renderer.device(), renderer.logger() };
uint8_t skybox_vert_shader_data[] {
#embed "skybox_vert.spv"
};
auto skybox_vert_shader
= vkutil::load_shader_module(std::span<uint8_t>(skybox_vert_shader_data,
sizeof(skybox_vert_shader_data)),
renderer.device());
if (!skybox_vert_shader) {
renderer.logger().err("Failed to load skybox vert shader");
return false;
}
uint8_t skybox_frag_shader_data[] {
#embed "skybox_frag.spv"
};
auto skybox_frag_shader
= vkutil::load_shader_module(std::span<uint8_t>(skybox_frag_shader_data,
sizeof(skybox_frag_shader_data)),
renderer.device());
if (!skybox_frag_shader) {
renderer.logger().err("Failed to load skybox frag shader");
return false;
}
vk::PushConstantRange push_constant_range {};
push_constant_range.stageFlags = vk::ShaderStageFlagBits::eVertex;
push_constant_range.offset = 0;
push_constant_range.size = sizeof(SkyboxPushConstants);
std::array push_constant_ranges { push_constant_range };
pipeline_builder.set_push_constant_ranges(push_constant_ranges);
std::array descriptor_set_layouts {
renderer.single_image_descriptor_layout()
};
pipeline_builder.set_descriptor_set_layouts(descriptor_set_layouts);
VkVertexInputBindingDescription binding {};
binding.binding = 0;
binding.stride = sizeof(Vertex);
binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
VkVertexInputAttributeDescription attribute {};
attribute.location = 0;
attribute.binding = 0;
attribute.format = VK_FORMAT_R32G32B32_SFLOAT;
attribute.offset = offsetof(Vertex, position);
std::array bindings { binding };
std::array attributes { attribute };
m_pipeline = pipeline_builder.build_graphics(
[&](GraphicsPipelineBuilder &builder) -> GraphicsPipelineBuilder & {
builder.set_vertex_input(bindings, attributes);
return builder
.set_shaders(skybox_vert_shader.get(), skybox_frag_shader.get())
.set_input_topology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
.set_polygon_mode(VK_POLYGON_MODE_FILL)
.set_cull_mode(
VK_CULL_MODE_FRONT_BIT, VK_FRONT_FACE_COUNTER_CLOCKWISE)
.set_multisampling(
static_cast<VkSampleCountFlagBits>(renderer.msaa_samples()))
.disable_blending()
.enable_depth_testing(false, VK_COMPARE_OP_LESS_OR_EQUAL)
.set_color_attachment_format(
static_cast<VkFormat>(renderer.draw_image_format()))
.set_depth_format(
static_cast<VkFormat>(renderer.depth_image_format()));
});
m_pipeline_samples = renderer.msaa_samples();
return true;
}
auto Skybox::init(VulkanRenderer &renderer, std::filesystem::path const &path)
-> void
{
if (ok) {
destroy(renderer);
ok = false;
}
CPUTexture texture { path };
if (texture.width == 0 || texture.height == 0) {
renderer.logger().err("Skybox texture is empty: {}", path.string());
ok = false;
return;
}
if (texture.width % 4 != 0 || texture.height % 3 != 0
|| texture.width / 4 != texture.height / 3) {
renderer.logger().err(
"Skybox texture must be 4x3 faces: {}", path.string());
ok = false;
return;
}
uint32_t const face_size = texture.width / 4;
size_t const face_bytes = static_cast<size_t>(face_size) * face_size * 4;
std::vector<uint8_t> cubemap_pixels(face_bytes * CROSS_OFFSETS.size());
for (size_t face = 0; face < CROSS_OFFSETS.size(); ++face) {
auto const offset = CROSS_OFFSETS[face];
for (uint32_t y = 0; y < face_size; ++y) {
for (uint32_t x = 0; x < face_size; ++x) {
uint32_t const src_x = offset.x * face_size + x;
uint32_t const src_y = offset.y * face_size + y;
size_t const src_index
= (static_cast<size_t>(src_y) * texture.width + src_x) * 4;
size_t const dst_index = face * face_bytes
+ (static_cast<size_t>(y) * face_size + x) * 4;
std::copy_n(texture.pixels.data() + src_index, 4,
cubemap_pixels.data() + dst_index);
}
}
}
m_cubemap = renderer.create_cubemap(cubemap_pixels, face_size,
texture.format, vk::ImageUsageFlagBits::eSampled);
if (!m_cubemap.image) {
renderer.logger().err("Failed to create cubemap image");
return;
}
vk::SamplerCreateInfo sampler_ci {};
sampler_ci.magFilter = vk::Filter::eLinear;
sampler_ci.minFilter = vk::Filter::eLinear;
sampler_ci.mipmapMode = vk::SamplerMipmapMode::eLinear;
sampler_ci.addressModeU = vk::SamplerAddressMode::eClampToEdge;
sampler_ci.addressModeV = vk::SamplerAddressMode::eClampToEdge;
sampler_ci.addressModeW = vk::SamplerAddressMode::eClampToEdge;
m_sampler = renderer.device().createSamplerUnique(sampler_ci);
vk::DescriptorPoolSize pool_size {};
pool_size.type = vk::DescriptorType::eCombinedImageSampler;
pool_size.descriptorCount = 1;
vk::DescriptorPoolCreateInfo pool_ci {};
pool_ci.maxSets = 1;
pool_ci.poolSizeCount = 1;
pool_ci.pPoolSizes = &pool_size;
m_descriptor_pool = renderer.device().createDescriptorPoolUnique(pool_ci);
vk::DescriptorSetAllocateInfo alloc_info {};
alloc_info.descriptorPool = m_descriptor_pool.get();
alloc_info.descriptorSetCount = 1;
vk::DescriptorSetLayout layout {
renderer.single_image_descriptor_layout()
};
alloc_info.pSetLayouts = &layout;
m_descriptor_set
= renderer.device().allocateDescriptorSets(alloc_info).front();
DescriptorWriter()
.write_image(0, m_cubemap.image_view, m_sampler.get(),
static_cast<VkImageLayout>(vk::ImageLayout::eShaderReadOnlyOptimal),
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
.update_set(renderer.device(), m_descriptor_set);
std::vector<Vertex> vertices;
vertices.reserve(8);
auto push_vertex = [&](smath::Vec3 const &pos) {
Vertex v {};
v.position = pos;
v.u = 0.0f;
v.v = 0.0f;
v.normal = smath::Vec3 { 0.0f, 0.0f, 1.0f };
v.color = smath::Vec4 { 1.0f, 1.0f, 1.0f, 1.0f };
vertices.emplace_back(v);
};
push_vertex(smath::Vec3 { -1.0f, -1.0f, -1.0f });
push_vertex(smath::Vec3 { 1.0f, -1.0f, -1.0f });
push_vertex(smath::Vec3 { 1.0f, 1.0f, -1.0f });
push_vertex(smath::Vec3 { -1.0f, 1.0f, -1.0f });
push_vertex(smath::Vec3 { -1.0f, -1.0f, 1.0f });
push_vertex(smath::Vec3 { 1.0f, -1.0f, 1.0f });
push_vertex(smath::Vec3 { 1.0f, 1.0f, 1.0f });
push_vertex(smath::Vec3 { -1.0f, 1.0f, 1.0f });
std::vector<uint32_t> indices {
4,
5,
6,
4,
6,
7, // +Z
1,
0,
3,
1,
3,
2, // -Z
5,
1,
2,
5,
2,
6, // +X
0,
4,
7,
0,
7,
3, // -X
7,
6,
2,
7,
2,
3, // +Y
0,
1,
5,
0,
5,
4, // -Y
};
m_index_count = static_cast<uint32_t>(indices.size());
m_cube_mesh = renderer.upload_mesh(indices, vertices);
if (!rebuild_pipeline(renderer)) {
ok = false;
return;
}
ok = true;
}
auto Skybox::destroy(VulkanRenderer &renderer) -> void
{
if (m_cube_mesh.index_buffer.buffer) {
renderer.destroy_buffer(m_cube_mesh.index_buffer);
}
if (m_cube_mesh.vertex_buffer.buffer) {
renderer.destroy_buffer(m_cube_mesh.vertex_buffer);
}
if (m_cubemap.image) {
renderer.destroy_image(m_cubemap);
}
m_sampler.reset();
m_descriptor_pool.reset();
m_pipeline.reset();
m_pipeline_samples = vk::SampleCountFlagBits::e1;
m_descriptor_set = vk::DescriptorSet {};
m_cube_mesh = {};
m_cubemap = {};
m_index_count = 0;
ok = false;
}
auto Skybox::draw(VulkanRenderer::GL &gl, VulkanRenderer &renderer,
smath::Mat4 const &mvp) -> void
{
if (!ok) {
return;
}
if (m_pipeline_samples != renderer.msaa_samples()) {
if (!rebuild_pipeline(renderer)) {
return;
}
}
SkyboxPushConstants push_constants { mvp };
auto bytes { std::as_bytes(std::span { &push_constants, 1 }) };
gl.draw_indexed(m_pipeline, m_descriptor_set, m_cube_mesh.vertex_buffer,
m_cube_mesh.index_buffer, m_index_count, bytes);
}
} // namespace Lunar

36
src/Skybox.h Normal file
View File

@@ -0,0 +1,36 @@
#pragma once
#include <cstdint>
#include <filesystem>
#include <vulkan/vulkan.hpp>
#include "Pipeline.h"
#include "Types.h"
#include "VulkanRenderer.h"
namespace Lunar {
struct Skybox {
bool ok { false };
auto init(VulkanRenderer &renderer, std::filesystem::path const &path)
-> void;
auto destroy(VulkanRenderer &renderer) -> void;
auto draw(VulkanRenderer::GL &gl, VulkanRenderer &renderer,
smath::Mat4 const &mvp) -> void;
private:
auto rebuild_pipeline(VulkanRenderer &renderer) -> bool;
Pipeline m_pipeline {};
GPUMeshBuffers m_cube_mesh {};
AllocatedImage m_cubemap {};
vk::UniqueSampler m_sampler {};
vk::UniqueDescriptorPool m_descriptor_pool {};
vk::DescriptorSet m_descriptor_set {};
vk::SampleCountFlagBits m_pipeline_samples { vk::SampleCountFlagBits::e1 };
uint32_t m_index_count { 0 };
};
} // namespace Lunar

View File

@@ -1,5 +1,9 @@
#pragma once
#include <cmath>
#include <cstdint>
#include <vector>
#include <smath.hpp>
#include <vk_mem_alloc.h>
#include <vulkan/vulkan.hpp>
@@ -31,6 +35,15 @@ struct FrameData {
DeletionQueue deletion_queue;
DescriptorAllocatorGrowable frame_descriptors;
AllocatedBuffer frame_image_buffer {};
vk::Extent2D frame_image_extent {};
std::vector<std::uint8_t> frame_image_rgba;
bool frame_image_ready { false };
bool tracy_frame_ready { false };
AllocatedBuffer screenshot_buffer {};
vk::Extent2D screenshot_extent {};
std::vector<std::uint8_t> screenshot_rgba;
bool screenshot_ready { false };
};
struct Vertex {
@@ -55,4 +68,39 @@ struct GPUSceneData {
smath::Vec4 sunlight_color;
};
struct Camera {
smath::Vec3 position {};
smath::Vec3 target { 0, 0, -1 };
smath::Vec3 up { 0, 1, 0 };
float fovy { smath::deg(70.0f) };
};
struct PolarCoordinate {
float r, theta, phi;
static PolarCoordinate from_vec3(smath::Vec3 const &v)
{
PolarCoordinate p;
p.r = std::sqrt(v.x() * v.x() + v.y() * v.y() + v.z() * v.z());
if (p.r == 0.0f) {
p.theta = 0.0f;
p.phi = 0.0f;
return p;
}
p.theta = std::atan2(v.z(), v.x());
p.phi = std::acos(v.y() / p.r);
return p;
}
smath::Vec3 to_vec3() const
{
float sin_phi { std::sin(phi) };
return smath::Vec3 { r * sin_phi * std::cos(theta), r * std::cos(phi),
r * sin_phi * std::sin(theta) };
}
};
} // namespace Lunar

View File

@@ -89,7 +89,7 @@ auto load_shader_module(std::span<uint8_t> spirv_data, vk::Device device)
namespace vkinit {
auto image_create_info(vk::Format format, vk::ImageUsageFlags usage_flags,
vk::Extent3D extent) -> vk::ImageCreateInfo
vk::Extent3D extent, vk::SampleCountFlagBits samples) -> vk::ImageCreateInfo
{
vk::ImageCreateInfo info {};
info.imageType = vk::ImageType::e2D;
@@ -97,7 +97,7 @@ auto image_create_info(vk::Format format, vk::ImageUsageFlags usage_flags,
info.extent = extent;
info.mipLevels = 1;
info.arrayLayers = 1;
info.samples = vk::SampleCountFlagBits::e1;
info.samples = samples;
info.tiling = vk::ImageTiling::eOptimal;
info.usage = usage_flags;
return info;

View File

@@ -2,7 +2,6 @@
#include <span>
#include <vulkan/vk_enum_string_helper.h>
#include <vulkan/vulkan.hpp>
template<typename F> struct privDefer {
@@ -30,13 +29,21 @@ template<typename F> privDefer<F> defer_func(F f) { return privDefer<F>(f); }
#define VK_CHECK(logger, x) \
do { \
auto err { x }; \
auto result = vk::Result(err); \
auto result { vk::Result(err) }; \
if (result != vk::Result::eSuccess) { \
(logger).err("Detected Vulkan error: {}", vk::to_string(result)); \
throw std::runtime_error("Vulkan error"); \
} \
} while (0)
#if defined(TRACY_ENABLE)
# define GZoneScopedN(name) ZoneScopedN(name)
#else
# define GZoneScopedN(name) \
do { \
} while (0)
#endif
namespace vkutil {
auto transition_image(vk::CommandBuffer cmd, vk::Image image,
@@ -52,7 +59,9 @@ auto load_shader_module(std::span<uint8_t> spirv_data, vk::Device device)
namespace vkinit {
auto image_create_info(vk::Format format, vk::ImageUsageFlags usage_flags,
vk::Extent3D extent) -> vk::ImageCreateInfo;
vk::Extent3D extent,
vk::SampleCountFlagBits samples = vk::SampleCountFlagBits::e1)
-> vk::ImageCreateInfo;
auto imageview_create_info(vk::Format format, vk::Image image,
vk::ImageAspectFlags aspect_flags) -> vk::ImageViewCreateInfo;
auto command_buffer_submit_info(vk::CommandBuffer cmd)

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,14 @@
#pragma once
#include <array>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <mutex>
#include <optional>
#include <span>
#include <string>
#include <variant>
#include <vector>
#include <SDL3/SDL_video.h>
@@ -11,9 +17,9 @@
#include <vk_mem_alloc.h>
#include <vulkan/vulkan.hpp>
#include "CPUTexture.h"
#include "Colors.h"
#include "DeletionQueue.h"
#include "DescriptorAllocator.h"
#include "Loader.h"
#include "Logger.h"
#include "Pipeline.h"
@@ -29,7 +35,13 @@ struct GPUDrawPushConstants {
constexpr unsigned FRAME_OVERLAP = 2;
struct VulkanRenderer {
struct ScreenshotPixels {
std::span<std::uint8_t const> pixels;
vk::Extent2D extent;
};
struct GL {
enum class GeometryKind {
Triangles,
TriangleStrip,
@@ -65,17 +77,27 @@ struct VulkanRenderer {
auto normal(smath::Vec3 const &normal) -> void;
auto set_texture(std::optional<AllocatedImage const *> texture
= std::nullopt) -> void;
auto set_culling(bool enabled) -> void;
auto draw_rectangle(smath::Vec2 pos, smath::Vec2 size,
smath::Vec4 color = smath::Vec4 { Colors::WHITE, 1.0f },
float rotation = 0.0f) -> void;
auto draw_sphere(smath::Vec3 center, float radius, int rings = 16,
int segments = 32, std::optional<smath::Vec4> sphere_color = {})
-> void;
auto end() -> void;
auto flush() -> void;
auto use_pipeline(Pipeline &pipeline) -> void;
auto set_transform(smath::Mat4 const &transform) -> void;
auto push_transform() -> void;
auto pop_transform() -> void;
auto draw_mesh(GPUMeshBuffers const &mesh, smath::Mat4 const &transform,
uint32_t index_count, uint32_t first_index = 0,
int32_t vertex_offset = 0) -> void;
auto draw_indexed(Pipeline &pipeline, vk::DescriptorSet descriptor_set,
AllocatedBuffer const &vertex_buffer,
AllocatedBuffer const &index_buffer, uint32_t index_count,
std::span<std::byte const> push_constants) -> void;
private:
auto push_vertex(smath::Vec3 const &pos) -> void;
@@ -90,7 +112,9 @@ struct VulkanRenderer {
bool m_inside_primitive { false };
bool m_drawing { false };
Pipeline *m_active_pipeline { nullptr };
bool m_culling_enabled { true };
smath::Mat4 m_transform { smath::Mat4::identity() };
std::vector<smath::Mat4> m_transform_stack;
smath::Vec4 m_current_color { 1.0f, 1.0f, 1.0f, 1.0f };
smath::Vec3 m_current_normal { 0.0f, 0.0f, 1.0f };
smath::Vec2 m_current_uv { 0.0f, 0.0f };
@@ -100,25 +124,51 @@ struct VulkanRenderer {
std::vector<uint32_t> m_indices;
};
VulkanRenderer(SDL_Window *window, Logger &logger);
enum class AntiAliasingKind {
NONE,
MSAA_2X,
MSAA_4X,
MSAA_8X,
};
struct KmsSurfaceConfig { };
VulkanRenderer(SDL_Window *window, Logger &logger,
std::span<std::string const> instance_extensions = {},
std::span<std::string const> device_extensions = {});
VulkanRenderer(KmsSurfaceConfig config, Logger &logger,
std::span<std::string const> instance_extensions = {},
std::span<std::string const> device_extensions = {});
~VulkanRenderer();
auto render(std::function<void(GL &)> const &record = {}) -> void;
auto render_to_image(vk::Image target_image, vk::Extent2D target_extent,
std::function<void(GL &)> const &record = {}) -> void;
auto resize(uint32_t width, uint32_t height) -> void;
auto set_offscreen_extent(vk::Extent2D extent) -> void;
auto set_antialiasing(AntiAliasingKind kind) -> void;
auto set_antialiasing_immediate(AntiAliasingKind kind) -> void;
auto antialiasing() const -> AntiAliasingKind
{
return m_vk.antialiasing_kind;
}
auto immediate_submit(std::function<void(vk::CommandBuffer cmd)> &&function,
bool flush_frame_deletion_queue = true,
bool clear_frame_descriptors = true) -> void;
auto upload_mesh(std::span<uint32_t> indices, std::span<Vertex> vertices)
-> GPUMeshBuffers;
auto destroy_buffer(AllocatedBuffer const &buffer) -> void;
auto create_image(CPUTexture const &texture, vk::ImageUsageFlags flags,
bool mipmapped = false) -> AllocatedImage;
auto create_cubemap(std::span<uint8_t const> pixels, uint32_t face_size,
vk::Format format, vk::ImageUsageFlags flags) -> AllocatedImage;
auto destroy_image(AllocatedImage const &img) -> void;
auto destroy_image_later(AllocatedImage img) -> void;
auto rectangle_mesh() const -> GPUMeshBuffers const &
{
return m_vk.rectangle;
}
auto test_meshes() const -> std::vector<std::shared_ptr<Mesh>> const &
{
return m_vk.test_meshes;
}
auto white_texture() const -> AllocatedImage const &
{
return m_vk.white_image;
@@ -137,47 +187,120 @@ struct VulkanRenderer {
}
auto draw_extent() const -> vk::Extent2D { return m_vk.draw_extent; }
auto mesh_pipeline() -> Pipeline & { return m_vk.mesh_pipeline; }
auto wayland_pipeline() -> Pipeline & { return m_vk.wayland_pipeline; }
auto triangle_pipeline() -> Pipeline & { return m_vk.triangle_pipeline; }
auto device() const -> vk::Device { return m_device; }
auto instance() const -> vk::Instance { return m_instance; }
auto physical_device() const -> vk::PhysicalDevice
{
return m_physical_device;
}
auto graphics_queue() const -> vk::Queue { return m_vk.graphics_queue; }
auto graphics_queue_family() const -> uint32_t
{
return m_vk.graphics_queue_family;
}
auto draw_image_format() const -> vk::Format
{
return m_vk.draw_image.format;
}
auto depth_image_format() const -> vk::Format
{
return m_vk.depth_image.format;
}
auto msaa_samples() const -> vk::SampleCountFlagBits
{
return m_vk.msaa_samples;
}
auto single_image_descriptor_layout() const -> vk::DescriptorSetLayout
{
return m_vk.single_image_descriptor_layout;
}
auto gl_api() -> GL & { return gl; }
auto get_screenshot() const -> std::optional<AllocatedImage>
{
return m_latest_screenshot;
}
auto get_screenshot_pixels() const
-> std::optional<VulkanRenderer::ScreenshotPixels>
{
if (m_latest_screenshot_pixels.empty()
|| m_latest_screenshot_extent.width == 0
|| m_latest_screenshot_extent.height == 0) {
return {};
}
auto const span { std::span<std::uint8_t const> {
m_latest_screenshot_pixels.data(),
m_latest_screenshot_pixels.size() } };
return ScreenshotPixels { span, m_latest_screenshot_extent };
}
auto logger() const -> Logger & { return m_logger; }
GL gl;
std::optional<AllocatedImage> m_latest_screenshot {};
std::vector<std::uint8_t> m_latest_screenshot_pixels {};
vk::Extent2D m_latest_screenshot_extent {};
private:
struct RenderCommand {
struct SetAntiAliasing {
AntiAliasingKind kind;
};
std::variant<SetAntiAliasing> payload;
};
auto vk_init() -> void;
auto swapchain_init() -> void;
auto setup_kms_surface() -> void;
auto commands_init() -> void;
auto sync_init() -> void;
auto descriptors_init() -> void;
auto pipelines_init() -> void;
auto background_pipelines_init() -> void;
auto triangle_pipeline_init() -> void;
auto mesh_pipeline_init() -> void;
auto imgui_init() -> void;
auto default_data_init() -> void;
auto draw_background(vk::CommandBuffer cmd) -> void;
auto draw_imgui(vk::CommandBuffer cmd, vk::ImageView target_image_view)
-> void;
auto create_swapchain(uint32_t width, uint32_t height) -> void;
auto create_draw_image(uint32_t width, uint32_t height) -> void;
auto update_draw_image_descriptor() -> void;
auto create_msaa_color_image(uint32_t width, uint32_t height) -> void;
auto destroy_draw_image() -> void;
auto create_depth_image(uint32_t width, uint32_t height) -> void;
auto destroy_depth_image() -> void;
auto destroy_msaa_color_image() -> void;
auto recreate_swapchain(uint32_t width, uint32_t height) -> void;
auto destroy_swapchain() -> void;
auto ensure_screenshot_buffers(vk::Extent2D extent) -> void;
auto destroy_screenshot_buffers() -> void;
auto emit_frame_screenshot(FrameData &frame) -> void;
#if defined(TRACY_ENABLE)
auto ensure_tracy_frame_buffers(vk::Extent2D extent) -> void;
auto destroy_tracy_frame_buffers() -> void;
auto emit_tracy_frame_image(FrameData &frame) -> void;
#endif
auto create_image(vk::Extent3D size, vk::Format format,
vk::ImageUsageFlags flags, bool mipmapped = false) -> AllocatedImage;
vk::ImageUsageFlags flags,
vk::SampleCountFlagBits samples = vk::SampleCountFlagBits::e1,
bool mipmapped = false) -> AllocatedImage;
auto create_image_no_view(vk::Extent3D size, vk::Format format,
vk::ImageUsageFlags flags,
vk::SampleCountFlagBits samples = vk::SampleCountFlagBits::e1,
bool mipmapped = false) -> AllocatedImage;
auto create_image(void const *data, vk::Extent3D size, vk::Format format,
vk::ImageUsageFlags flags, bool mipmapped = false) -> AllocatedImage;
auto destroy_image(AllocatedImage const &img) -> void;
auto create_buffer(size_t alloc_size, vk::BufferUsageFlags usage,
VmaMemoryUsage memory_usage) -> AllocatedBuffer;
auto destroy_buffer(AllocatedBuffer const &buffer) -> void;
auto enqueue_render_command(RenderCommand &&command) -> void;
auto process_render_commands() -> void;
auto apply_antialiasing(AntiAliasingKind kind) -> void;
vk::Instance m_instance {};
vk::PhysicalDevice m_physical_device {};
@@ -210,23 +333,35 @@ private:
std::array<FrameData, FRAME_OVERLAP> frames;
AllocatedImage draw_image {};
vk::ImageLayout draw_image_layout { vk::ImageLayout::eUndefined };
AllocatedImage msaa_color_image {};
vk::ImageLayout msaa_color_image_layout { vk::ImageLayout::eUndefined };
AllocatedImage depth_image {};
vk::ImageLayout depth_image_layout { vk::ImageLayout::eUndefined };
#if defined(TRACY_ENABLE)
AllocatedImage tracy_capture_image {};
vk::ImageLayout tracy_capture_image_layout {
vk::ImageLayout::eUndefined
};
vk::Extent2D tracy_capture_extent {};
#endif
vk::Extent2D draw_extent {};
AntiAliasingKind antialiasing_kind { AntiAliasingKind::NONE };
vk::SampleCountFlagBits msaa_samples { vk::SampleCountFlagBits::e1 };
vk::SampleCountFlags supported_framebuffer_samples {};
VmaAllocator allocator;
DescriptorAllocator descriptor_allocator;
VkDescriptorSet draw_image_descriptors {};
vk::DescriptorSetLayout draw_image_descriptor_layout {};
GPUSceneData scene_data {};
vk::DescriptorSetLayout gpu_scene_data_descriptor_layout {};
vk::DescriptorSetLayout single_image_descriptor_layout {};
Pipeline gradient_pipeline;
Pipeline triangle_pipeline;
Pipeline triangle_pipeline_culled;
Pipeline mesh_pipeline;
Pipeline mesh_pipeline_culled;
Pipeline wayland_pipeline;
GPUMeshBuffers rectangle;
@@ -240,8 +375,6 @@ private:
uint64_t frame_number { 0 };
std::vector<std::shared_ptr<Mesh>> test_meshes;
AllocatedImage white_image {};
AllocatedImage black_image {};
AllocatedImage gray_image {};
@@ -251,8 +384,27 @@ private:
vk::UniqueSampler default_sampler_nearest;
} m_vk;
struct KmsState {
vk::DisplayKHR display {};
vk::DisplayModeKHR mode {};
vk::Extent2D extent {};
uint32_t plane_index { 0 };
uint32_t plane_stack_index { 0 };
std::string display_name {};
};
SDL_Window *m_window { nullptr };
Logger &m_logger;
std::mutex m_command_mutex;
std::vector<RenderCommand> m_pending_render_commands;
bool m_use_kms { false };
bool m_imgui_enabled { true };
std::optional<KmsState> m_kms_state {};
vk::PhysicalDevice m_kms_physical_device {};
vk::Extent2D m_kms_extent {};
bool m_kms_physical_device_set { false };
std::vector<std::string> m_extra_instance_extensions {};
std::vector<std::string> m_extra_device_extensions {};
};
} // namespace Lunar

View File

@@ -1,7 +1,3 @@
#include "src/Application.h"
auto main() -> int
{
Lunar::Application app {};
app.run();
}
auto main() -> int { Lunar::Application::the().run(); }

113
src/wayland/Client.h Normal file
View File

@@ -0,0 +1,113 @@
#pragma once
#include <cassert>
#include <format>
#include <functional>
#include <optional>
#include <tuple>
#include <wayland-server-core.h>
#include "Display.h"
#include "List.h"
namespace Lunar::Wayland {
struct Client {
Client(wl_client *client)
: m_client { std::move(client) }
{
assert(m_client);
}
~Client() = default;
inline auto c_ptr() const -> wl_client * { return m_client; }
static auto from_link(wl_list *link) -> Client
{
return Client { wl_client_from_link(link) };
}
auto flush() { wl_client_flush(m_client); }
auto get_display() -> Display
{
return Display { wl_client_get_display(m_client) };
}
auto get_credentials() noexcept -> std::tuple<pid_t, uid_t, gid_t>
{
std::tuple<pid_t, uid_t, gid_t> ret {};
auto &[pid, uid, gid] { ret };
wl_client_get_credentials(m_client, &pid, &uid, &gid);
return ret;
}
auto get_fd() noexcept -> int { return wl_client_get_fd(m_client); }
auto get_object(uint32_t id) -> std::optional<wl_resource *>
{
if (auto *res { wl_client_get_object(m_client, id) }; res != NULL) {
return res;
} else {
return {};
}
}
auto post_implementation_error(std::string_view string)
{
wl_client_post_implementation_error(
m_client, "%.*s", static_cast<int>(string.size()), string.data());
}
template<typename... Args>
auto post_implementation_error(
std::format_string<Args...> fmt, Args &&...args)
{
post_implementation_error(
std::format(fmt, std::forward<Args>(args)...));
}
auto add_destroy_listener(wl_listener *listener)
{
wl_client_add_destroy_listener(m_client, listener);
}
auto add_destroy_late_listener(wl_listener *listener)
{
wl_client_add_destroy_late_listener(m_client, listener);
}
auto get_link() -> wl_list * { return wl_client_get_link(m_client); }
auto add_resource_created_listener(wl_listener *listener)
{
wl_client_add_resource_created_listener(m_client, listener);
}
auto for_each_resource(
std::function<wl_iterator_result(wl_resource *)> const &fn) -> void
{
wl_client_for_each_resource(
m_client,
(wl_client_for_each_resource_iterator_func_t)[](
wl_resource * res, void *user_data)
->wl_iterator_result {
auto *f { static_cast<
std::function<wl_iterator_result(wl_resource *)> *>(
user_data) };
return (*f)(res);
},
const_cast<void *>(static_cast<void const *>(&fn)));
}
auto set_max_buffer_size(size_t max_buffer_size)
{
wl_client_set_max_buffer_size(m_client, max_buffer_size);
}
private:
wl_client *m_client {};
};
} // namespace Lunar::Wayland

103
src/wayland/Display.h Normal file
View File

@@ -0,0 +1,103 @@
#pragma once
#include <cassert>
#include <format>
#include <stdexcept>
#include <vector>
#include <wayland-server-core.h>
namespace Lunar::Wayland {
struct Display {
Display()
: m_display(wl_display_create())
{
}
Display(wl_display *display)
: m_display { std::move(display) }
, m_should_cleanup { false }
{
}
~Display()
{
if (!m_should_cleanup)
return;
wl_display_destroy_clients(m_display);
wl_display_destroy(m_display);
}
inline auto c_ptr() const -> wl_display * { return m_display; }
auto set_global_filter(
wl_display_global_filter_func_t filter, void *data) noexcept
{
wl_display_set_global_filter(m_display, filter, data);
}
auto next_serial() noexcept -> uint32_t
{
return wl_display_next_serial(m_display);
}
auto set_default_max_buffer_size(size_t max_buffer_size) noexcept
{
wl_display_set_default_max_buffer_size(m_display, max_buffer_size);
}
auto add_socket_fd(int fd)
{
if (wl_display_add_socket_fd(m_display, fd) == -1) {
throw std::runtime_error(
"Failed to add socket fd to Wayland display");
}
}
auto add_socket(char const *name)
{
if (wl_display_add_socket(m_display, name) == -1) {
throw std::runtime_error(std::format(
"Failed to add socket `{}` to Wayland display", name));
}
}
auto add_protocol_logger(wl_protocol_logger_func_t func, void *user_data)
-> wl_protocol_logger *
{
if (auto *logger {
wl_display_add_protocol_logger(m_display, func, user_data) };
logger != NULL) {
return logger;
} else {
throw std::runtime_error(
"Failed to add protocol logger to Wayland display");
}
}
auto add_shm_format(uint32_t format) -> uint32_t *
{
if (auto *fmt { wl_display_add_shm_format(m_display, format) };
fmt != NULL) {
return fmt;
} else {
throw std::runtime_error(
"Failed to add SHM format to Wayland display");
}
}
auto get_client_list() -> std::vector<wl_client *>
{
std::vector<wl_client *> ret {};
auto const list { wl_display_get_client_list(m_display) };
assert(list);
wl_client *client {};
wl_client_for_each(client, list) { ret.push_back(client); }
return ret;
}
private:
wl_display *m_display {};
bool m_should_cleanup { true };
};
} // namespace Lunar::Wayland

71
src/wayland/Global.h Normal file
View File

@@ -0,0 +1,71 @@
#pragma once
#include <cassert>
#include <optional>
#include <utility>
#include <wayland-server-core.h>
#include "Client.h"
#include "Display.h"
namespace Lunar::Wayland {
struct Global {
Global() = delete;
explicit Global(Display &display, wl_interface const *interface,
int version, void *data, wl_global_bind_func_t bind)
: m_global {
wl_global_create(display.c_ptr(), interface, version, data, bind),
}
{
}
Global(wl_global *global)
: m_global { std::move(global) }
, m_should_cleanup { false }
{
assert(m_global);
}
~Global()
{
if (!m_should_cleanup)
return;
wl_global_destroy(m_global);
}
inline auto c_ptr() const -> wl_global * { return m_global; }
auto get_name(Client &client) const -> std::optional<uint32_t>
{
if (auto const ret { wl_global_get_name(m_global, client.c_ptr()) };
ret != 0) {
return ret;
} else {
return {};
}
}
inline auto get_version() const -> uint32_t
{
return wl_global_get_version(m_global);
}
inline auto get_display() const -> Display
{
return wl_global_get_display(m_global);
}
inline auto get_interface() const -> wl_interface const *
{
return wl_global_get_interface(m_global);
}
private:
wl_global *m_global {};
bool m_should_cleanup { true };
};
} // namespace Lunar::Wayland

287
src/wayland/List.h Normal file
View File

@@ -0,0 +1,287 @@
#pragma once
#include <cstddef>
#include <iterator>
#include <wayland-server-core.h>
namespace Lunar::Wayland {
namespace detail {
template<typename T, wl_list T::*Member>
constexpr std::ptrdiff_t member_offset() noexcept
{
return reinterpret_cast<std::ptrdiff_t>(
&(reinterpret_cast<T const volatile *>(0)->*Member));
}
template<typename T, wl_list T::*Member>
inline T *container_of(wl_list *node) noexcept
{
auto *p { reinterpret_cast<std::byte *>(node)
- member_offset<T, Member>() };
return reinterpret_cast<T *>(p);
}
template<typename T, wl_list T::*Member>
inline T const *container_of(wl_list const *node) noexcept
{
auto *p { reinterpret_cast<std::byte const *>(node)
- member_offset<T, Member>() };
return reinterpret_cast<T const *>(p);
}
} // namespace detail
template<typename T, wl_list T::*Member> struct List {
struct Iterator {
using iterator_category = std::bidirectional_iterator_tag;
using value_type = T;
using difference_type = std::ptrdiff_t;
using pointer = T *;
using reference = T &;
Iterator() = default;
Iterator(wl_list *cur, wl_list *head)
: m_cur(cur)
, m_head(head)
{
}
auto operator*() const -> reference
{
return *detail::container_of<T, Member>(m_cur);
}
auto operator->() const -> pointer
{
return detail::container_of<T, Member>(m_cur);
}
auto operator++() -> Iterator &
{
m_cur = m_cur->next;
return *this;
}
auto operator++(int) -> Iterator
{
auto t { *this };
++(*this);
return t;
}
auto operator--() -> Iterator &
{
m_cur = (m_cur == m_head) ? m_head->prev : m_cur->prev;
return *this;
}
auto operator--(int) -> Iterator
{
auto t { *this };
--(*this);
return t;
}
friend auto operator==(Iterator a, Iterator b) -> bool
{
return a.m_cur == b.m_cur;
}
friend auto operator!=(Iterator a, Iterator b) -> bool
{
return !(a == b);
}
private:
wl_list *m_cur { nullptr };
wl_list *m_head { nullptr };
};
struct ConstIterator {
using iterator_category = std::bidirectional_iterator_tag;
using value_type = T const;
using difference_type = std::ptrdiff_t;
using pointer = T const *;
using reference = T const &;
ConstIterator() = default;
ConstIterator(wl_list const *cur, wl_list const *head)
: m_cur(cur)
, m_head(head)
{
}
auto operator*() const -> reference
{
return *detail::container_of<T, Member>(m_cur);
}
auto operator->() const -> pointer
{
return detail::container_of<T, Member>(m_cur);
}
auto operator++() -> ConstIterator &
{
m_cur = m_cur->next;
return *this;
}
auto operator++(int) -> ConstIterator
{
auto t { *this };
++(*this);
return t;
}
auto operator--() -> ConstIterator &
{
m_cur = (m_cur == m_head) ? m_head->prev : m_cur->prev;
return *this;
}
auto operator--(int) -> ConstIterator
{
auto t { *this };
--(*this);
return t;
}
friend auto operator==(ConstIterator a, ConstIterator b) -> bool
{
return a.m_cur == b.m_cur;
}
friend auto operator!=(ConstIterator a, ConstIterator b) -> bool
{
return !(a == b);
}
private:
wl_list const *m_cur { nullptr };
wl_list const *m_head { nullptr };
};
List()
: m_head {}
, m_external_head { nullptr }
, m_should_cleanup { true }
{
wl_list_init(&m_head);
}
explicit List(
wl_list *existing_head, bool should_cleanup = false, bool init = false)
: m_head {}
, m_external_head { existing_head }
, m_should_cleanup { should_cleanup }
{
if (init && m_external_head)
wl_list_init(m_external_head);
}
~List()
{
if (!m_should_cleanup)
return;
clear();
if (auto *h { head_ptr() }; h)
wl_list_init(h);
}
List(List const &) = delete;
auto operator=(List const &) -> List & = delete;
List(List &&other) noexcept { move_from(other); }
auto operator=(List &&other) noexcept -> List &
{
if (this != &other) {
this->~List();
move_from(other);
}
return *this;
}
inline auto c_ptr() -> wl_list * { return head_ptr(); }
inline auto c_ptr() const -> wl_list const * { return head_ptr(); }
auto empty() const noexcept -> bool { return wl_list_empty(head_ptr()); }
auto length() const noexcept -> int { return wl_list_length(head_ptr()); }
auto push_front(T *elem) noexcept -> void
{
wl_list_insert(head_ptr(), &(elem->*Member));
}
auto push_back(T *elem) noexcept -> void
{
auto *h { head_ptr() };
wl_list_insert(h->prev, &(elem->*Member));
}
auto remove(T *elem) noexcept -> void
{
wl_list_remove(&(elem->*Member));
wl_list_init(&(elem->*Member));
}
auto clear() noexcept -> void
{
auto *h { head_ptr() };
while (!wl_list_empty(h)) {
auto *node { h->next };
wl_list_remove(node);
wl_list_init(node);
}
}
auto begin() noexcept -> Iterator
{
auto *h { head_ptr() };
return Iterator(h->next, h);
}
auto end() noexcept -> Iterator
{
auto *h { head_ptr() };
return Iterator(h, h);
}
auto begin() const noexcept -> ConstIterator
{
auto const *h { head_ptr() };
return ConstIterator(h->next, h);
}
auto end() const noexcept -> ConstIterator
{
auto const *h { head_ptr() };
return ConstIterator(h, h);
}
private:
auto head_ptr() noexcept -> wl_list *
{
return m_external_head ? m_external_head : &m_head;
}
auto head_ptr() const noexcept -> wl_list const *
{
return m_external_head ? m_external_head : &m_head;
}
auto move_from(List &other) noexcept -> void
{
m_head = other.m_head;
m_external_head = other.m_external_head;
m_should_cleanup = other.m_should_cleanup;
other.m_external_head = nullptr;
other.m_should_cleanup = false;
wl_list_init(&other.m_head);
}
private:
wl_list m_head {};
wl_list *m_external_head { nullptr };
bool m_should_cleanup { true };
};
} // namespace Lunar::Wayland

43
src/wayland/Region.h Normal file
View File

@@ -0,0 +1,43 @@
#pragma once
#include <cstdint>
#include <vector>
#include <wayland-server-core.h>
namespace Lunar::Wayland {
struct Region {
struct Box {
std::int32_t x {};
std::int32_t y {};
std::int32_t width {};
std::int32_t height {};
};
explicit Region(wl_resource *resource)
: m_resource(resource)
{
}
auto resource() const -> wl_resource * { return m_resource; }
auto add(std::int32_t x, std::int32_t y, std::int32_t width,
std::int32_t height) -> void
{
m_boxes.push_back(Box { x, y, width, height });
}
auto subtract(std::int32_t x, std::int32_t y, std::int32_t width,
std::int32_t height) -> void
{
m_subtract_boxes.push_back(Box { x, y, width, height });
}
private:
wl_resource *m_resource {};
std::vector<Box> m_boxes {};
std::vector<Box> m_subtract_boxes {};
};
} // namespace Lunar::Wayland

96
src/wayland/Shm.cpp Normal file
View File

@@ -0,0 +1,96 @@
#include "Shm.h"
#include <cerrno>
#include <cstring>
#include <sys/mman.h>
#include <unistd.h>
#include <utility>
namespace Lunar::Wayland {
ShmPool::ShmPool(int fd, std::size_t size, Logger &logger)
: m_logger(logger)
, m_fd(fd)
, m_size(size)
{
m_data = mmap(nullptr, m_size, PROT_READ | PROT_WRITE, MAP_SHARED, m_fd, 0);
if (m_data == MAP_FAILED) {
m_data = nullptr;
m_logger.err("Failed to mmap shm pool: {}", std::strerror(errno));
}
}
ShmPool::~ShmPool()
{
if (m_data) {
munmap(m_data, m_size);
}
if (m_fd >= 0) {
close(m_fd);
}
}
auto ShmPool::resize(std::size_t new_size) -> bool
{
if (!m_data) {
return false;
}
void *new_data = mremap(m_data, m_size, new_size, MREMAP_MAYMOVE);
if (new_data == MAP_FAILED) {
m_logger.err("Failed to resize shm pool: {}", std::strerror(errno));
return false;
}
m_data = new_data;
m_size = new_size;
return true;
}
auto ShmPool::data() const -> std::byte *
{
return static_cast<std::byte *>(m_data);
}
ShmBuffer::ShmBuffer(std::shared_ptr<ShmPool> pool, wl_resource *resource,
std::int32_t offset, std::int32_t width, std::int32_t height,
std::int32_t stride, std::uint32_t format)
: pool(std::move(pool))
, resource(resource)
, offset(offset)
, width(width)
, height(height)
, stride(stride)
, format(format)
{
}
auto ShmBuffer::data() const -> std::byte *
{
if (!pool || !pool->data()) {
return nullptr;
}
return pool->data() + offset;
}
auto ShmBuffer::byte_size() const -> std::size_t
{
if (height <= 0 || stride <= 0) {
return 0;
}
return static_cast<std::size_t>(height) * static_cast<std::size_t>(stride);
}
auto shm_buffer_from_resource(wl_resource *resource)
-> std::shared_ptr<ShmBuffer>
{
if (!resource) {
return {};
}
auto *handle { static_cast<std::shared_ptr<ShmBuffer> *>(
wl_resource_get_user_data(resource)) };
if (!handle) {
return {};
}
return *handle;
}
} // namespace Lunar::Wayland

48
src/wayland/Shm.h Normal file
View File

@@ -0,0 +1,48 @@
#pragma once
#include <cstddef>
#include <cstdint>
#include <memory>
#include <wayland-server-core.h>
#include "../Logger.h"
namespace Lunar::Wayland {
struct ShmPool {
explicit ShmPool(int fd, std::size_t size, Logger &logger);
~ShmPool();
auto resize(std::size_t new_size) -> bool;
auto data() const -> std::byte *;
auto size() const -> std::size_t { return m_size; }
private:
Logger &m_logger;
int m_fd { -1 };
std::size_t m_size { 0 };
void *m_data { nullptr };
};
struct ShmBuffer {
ShmBuffer(std::shared_ptr<ShmPool> pool, wl_resource *resource,
std::int32_t offset, std::int32_t width, std::int32_t height,
std::int32_t stride, std::uint32_t format);
auto data() const -> std::byte *;
auto byte_size() const -> std::size_t;
std::shared_ptr<ShmPool> pool;
wl_resource *resource {};
std::int32_t offset {};
std::int32_t width {};
std::int32_t height {};
std::int32_t stride {};
std::uint32_t format {};
};
auto shm_buffer_from_resource(wl_resource *resource)
-> std::shared_ptr<ShmBuffer>;
} // namespace Lunar::Wayland

28
src/wayland/Signal.h Normal file
View File

@@ -0,0 +1,28 @@
#pragma once
#include <cassert>
#include <wayland-server-core.h>
namespace Lunar::Wayland {
struct Signal {
Signal(wl_signal *signal)
: m_signal { std::move(signal) }
{
assert(m_signal);
}
~Signal() = default;
inline auto c_ptr() const -> wl_signal * { return m_signal; }
template<typename T = void> auto flush(T *data)
{
wl_signal_emit_mutable(m_signal, (void *)data);
}
private:
wl_signal *m_signal {};
};
} // namespace Lunar::Wayland

98
src/wayland/Surface.cpp Normal file
View File

@@ -0,0 +1,98 @@
#include "Surface.h"
#include <algorithm>
#include <utility>
#include "WaylandServer.h"
#include "wayland-server-protocol.h"
namespace Lunar::Wayland {
Surface::Surface(WaylandServer &server, wl_resource *resource)
: m_server(server)
, m_resource(resource)
{
m_server.register_surface(this);
}
Surface::~Surface()
{
m_server.unregister_surface(this);
for (auto *callback : m_frame_callbacks) {
if (callback) {
wl_callback_send_done(callback, m_server.now_ms());
wl_resource_destroy(callback);
}
}
}
auto Surface::attach(
std::shared_ptr<ShmBuffer> buffer, std::int32_t x, std::int32_t y) -> void
{
m_pending_buffer = std::move(buffer);
m_pending_offset_x = x;
m_pending_offset_y = y;
}
auto Surface::damage(std::int32_t, std::int32_t, std::int32_t, std::int32_t)
-> void
{
}
auto Surface::damage_buffer(
std::int32_t, std::int32_t, std::int32_t, std::int32_t) -> void
{
}
auto Surface::frame(wl_resource *callback) -> void
{
if (!callback) {
return;
}
m_frame_callbacks.push_back(callback);
}
auto Surface::commit() -> void
{
auto previous { m_current_buffer };
m_current_buffer = m_pending_buffer;
m_pending_buffer.reset();
if (previous && previous != m_current_buffer && previous->resource) {
wl_buffer_send_release(previous->resource);
}
if (!m_frame_callbacks.empty()) {
auto callbacks { std::move(m_frame_callbacks) };
auto done_time { m_server.now_ms() };
for (auto *callback : callbacks) {
if (callback) {
wl_callback_send_done(callback, done_time);
wl_resource_destroy(callback);
}
}
}
}
auto Surface::set_opaque_region(std::shared_ptr<Region> region) -> void
{
m_opaque_region = std::move(region);
}
auto Surface::set_input_region(std::shared_ptr<Region> region) -> void
{
m_input_region = std::move(region);
}
auto Surface::set_buffer_transform(std::int32_t transform) -> void
{
m_buffer_transform = transform;
}
auto Surface::set_buffer_scale(std::int32_t scale) -> void
{
m_buffer_scale = std::max(1, scale);
}
} // namespace Lunar::Wayland

53
src/wayland/Surface.h Normal file
View File

@@ -0,0 +1,53 @@
#pragma once
#include <cstdint>
#include <memory>
#include <vector>
#include <wayland-server-core.h>
#include "Region.h"
#include "Shm.h"
namespace Lunar::Wayland {
struct WaylandServer;
struct Surface {
explicit Surface(WaylandServer &server, wl_resource *resource);
~Surface();
auto resource() const -> wl_resource * { return m_resource; }
auto current_buffer() const -> std::shared_ptr<ShmBuffer> const &
{
return m_current_buffer;
}
auto attach(std::shared_ptr<ShmBuffer> buffer, std::int32_t x,
std::int32_t y) -> void;
auto damage(std::int32_t x, std::int32_t y, std::int32_t width,
std::int32_t height) -> void;
auto damage_buffer(std::int32_t x, std::int32_t y, std::int32_t width,
std::int32_t height) -> void;
auto frame(wl_resource *callback) -> void;
auto commit() -> void;
auto set_opaque_region(std::shared_ptr<Region> region) -> void;
auto set_input_region(std::shared_ptr<Region> region) -> void;
auto set_buffer_transform(std::int32_t transform) -> void;
auto set_buffer_scale(std::int32_t scale) -> void;
private:
WaylandServer &m_server;
wl_resource *m_resource {};
std::shared_ptr<ShmBuffer> m_pending_buffer {};
std::shared_ptr<ShmBuffer> m_current_buffer {};
std::shared_ptr<Region> m_opaque_region {};
std::shared_ptr<Region> m_input_region {};
std::vector<wl_resource *> m_frame_callbacks {};
std::int32_t m_buffer_transform { 0 };
std::int32_t m_buffer_scale { 1 };
std::int32_t m_pending_offset_x { 0 };
std::int32_t m_pending_offset_y { 0 };
};
} // namespace Lunar::Wayland

View File

@@ -0,0 +1,81 @@
#include "WaylandServer.h"
#include <algorithm>
#include <chrono>
#include <span>
#include <stdexcept>
namespace Lunar::Wayland {
WaylandServer::WaylandServer(Logger &logger)
: m_logger(logger)
{
m_loop = wl_display_get_event_loop(m_display.c_ptr());
if (!m_loop) {
throw std::runtime_error("Failed to get Wayland event loop");
}
auto *socket_name { wl_display_add_socket_auto(m_display.c_ptr()) };
if (!socket_name) {
throw std::runtime_error("Failed to create Wayland socket");
}
if (socket_name) {
m_socket_name = socket_name;
}
m_logger.info("Wayland listening on {}", m_socket_name);
m_compositor_global = create_compositor_global();
m_shm_global = create_shm_global();
m_xdg_wm_base_global = create_xdg_wm_base_global();
}
WaylandServer::~WaylandServer() = default;
auto WaylandServer::dispatch() -> void
{
if (!m_loop) {
return;
}
wl_event_loop_dispatch(m_loop, 0);
}
auto WaylandServer::flush() -> void
{
wl_display_flush_clients(m_display.c_ptr());
}
auto WaylandServer::now_ms() const -> std::uint32_t
{
using Clock = std::chrono::steady_clock;
auto now { Clock::now().time_since_epoch() };
auto ms {
std::chrono::duration_cast<std::chrono::milliseconds>(now).count()
};
return static_cast<std::uint32_t>(ms);
}
auto WaylandServer::register_surface(Surface *surface) -> void
{
if (!surface) {
return;
}
m_surfaces.push_back(surface);
}
auto WaylandServer::unregister_surface(Surface *surface) -> void
{
if (!surface) {
return;
}
auto it { std::remove(m_surfaces.begin(), m_surfaces.end(), surface) };
m_surfaces.erase(it, m_surfaces.end());
}
auto WaylandServer::surfaces() const -> std::span<Surface *const>
{
return { m_surfaces.data(), m_surfaces.size() };
}
} // namespace Lunar::Wayland
#include "Surface.h"

View File

@@ -0,0 +1,50 @@
#pragma once
#include <cstdint>
#include <memory>
#include <span>
#include <string>
#include <string_view>
#include <vector>
#include <wayland-server-core.h>
#include "../Logger.h"
#include "Display.h"
#include "Global.h"
namespace Lunar::Wayland {
struct Surface;
struct WaylandServer {
explicit WaylandServer(Logger &logger);
~WaylandServer();
auto display() -> Display & { return m_display; }
auto logger() -> Logger & { return m_logger; }
auto socket_name() const -> std::string_view { return m_socket_name; }
auto dispatch() -> void;
auto flush() -> void;
auto now_ms() const -> std::uint32_t;
auto register_surface(Surface *surface) -> void;
auto unregister_surface(Surface *surface) -> void;
auto surfaces() const -> std::span<Surface *const>;
private:
auto create_compositor_global() -> std::unique_ptr<Global>;
auto create_shm_global() -> std::unique_ptr<Global>;
auto create_xdg_wm_base_global() -> std::unique_ptr<Global>;
Logger &m_logger;
Display m_display {};
wl_event_loop *m_loop { nullptr };
std::string m_socket_name {};
std::unique_ptr<Global> m_compositor_global {};
std::unique_ptr<Global> m_shm_global {};
std::unique_ptr<Global> m_xdg_wm_base_global {};
std::vector<Surface *> m_surfaces {};
};
} // namespace Lunar::Wayland

View File

@@ -0,0 +1,254 @@
#include <algorithm>
#include <cstdint>
#include <memory>
#include <utility>
#include <wayland-server-core.h>
#include "wayland-server-protocol.h"
#include "../Region.h"
#include "../Shm.h"
#include "../Surface.h"
#include "../WaylandServer.h"
namespace Lunar::Wayland {
namespace {
constexpr std::uint32_t COMPOSITOR_VERSION = 4;
auto resource_version(wl_resource *resource) -> std::uint32_t
{
return std::min<std::uint32_t>(
wl_resource_get_version(resource), COMPOSITOR_VERSION);
}
auto region_from_resource(wl_resource *resource) -> std::shared_ptr<Region>
{
if (!resource) {
return {};
}
auto *handle { static_cast<std::shared_ptr<Region> *>(
wl_resource_get_user_data(resource)) };
if (!handle) {
return {};
}
return *handle;
}
void region_destroy(wl_resource *resource)
{
auto *handle { static_cast<std::shared_ptr<Region> *>(
wl_resource_get_user_data(resource)) };
delete handle;
}
void region_handle_destroy(wl_client *, wl_resource *resource)
{
wl_resource_destroy(resource);
}
void region_handle_add(wl_client *, wl_resource *resource, std::int32_t x,
std::int32_t y, std::int32_t width, std::int32_t height)
{
if (auto region { region_from_resource(resource) }) {
region->add(x, y, width, height);
}
}
void region_handle_subtract(wl_client *, wl_resource *resource, std::int32_t x,
std::int32_t y, std::int32_t width, std::int32_t height)
{
if (auto region { region_from_resource(resource) }) {
region->subtract(x, y, width, height);
}
}
struct wl_region_interface const REGION_INTERFACE = {
.destroy = region_handle_destroy,
.add = region_handle_add,
.subtract = region_handle_subtract,
};
auto surface_from_resource(wl_resource *resource) -> Surface *
{
return static_cast<Surface *>(wl_resource_get_user_data(resource));
}
void surface_destroy_resource(wl_resource *resource)
{
auto *surface { surface_from_resource(resource) };
delete surface;
}
void surface_handle_destroy(wl_client *, wl_resource *resource)
{
wl_resource_destroy(resource);
}
void surface_handle_attach(wl_client *, wl_resource *resource,
wl_resource *buffer_resource, std::int32_t x, std::int32_t y)
{
auto *surface { surface_from_resource(resource) };
if (!surface) {
return;
}
surface->attach(shm_buffer_from_resource(buffer_resource), x, y);
}
void surface_handle_damage(wl_client *, wl_resource *resource, std::int32_t x,
std::int32_t y, std::int32_t width, std::int32_t height)
{
if (auto *surface { surface_from_resource(resource) }) {
surface->damage(x, y, width, height);
}
}
void surface_handle_frame(
wl_client *client, wl_resource *resource, std::uint32_t callback_id)
{
auto *surface { surface_from_resource(resource) };
if (!surface) {
return;
}
auto version { wl_resource_get_version(resource) };
auto *callback_resource { wl_resource_create(
client, &wl_callback_interface, version, callback_id) };
if (!callback_resource) {
return;
}
wl_resource_set_implementation(
callback_resource, nullptr, nullptr, nullptr);
surface->frame(callback_resource);
}
void surface_handle_set_opaque_region(
wl_client *, wl_resource *resource, wl_resource *region_resource)
{
if (auto *surface { surface_from_resource(resource) }) {
surface->set_opaque_region(region_from_resource(region_resource));
}
}
void surface_handle_set_input_region(
wl_client *, wl_resource *resource, wl_resource *region_resource)
{
if (auto *surface { surface_from_resource(resource) }) {
surface->set_input_region(region_from_resource(region_resource));
}
}
void surface_handle_commit(wl_client *, wl_resource *resource)
{
if (auto *surface { surface_from_resource(resource) }) {
surface->commit();
}
}
void surface_handle_set_buffer_transform(
wl_client *, wl_resource *resource, std::int32_t transform)
{
if (auto *surface { surface_from_resource(resource) }) {
surface->set_buffer_transform(transform);
}
}
void surface_handle_set_buffer_scale(
wl_client *, wl_resource *resource, std::int32_t scale)
{
if (auto *surface { surface_from_resource(resource) }) {
surface->set_buffer_scale(scale);
}
}
void surface_handle_damage_buffer(wl_client *, wl_resource *resource,
std::int32_t x, std::int32_t y, std::int32_t width, std::int32_t height)
{
if (auto *surface { surface_from_resource(resource) }) {
surface->damage_buffer(x, y, width, height);
}
}
void surface_handle_offset(
wl_client *, wl_resource *resource, std::int32_t x, std::int32_t y)
{
(void)resource;
(void)x;
(void)y;
}
struct wl_surface_interface const SURFACE_INTERFACE = {
.destroy = surface_handle_destroy,
.attach = surface_handle_attach,
.damage = surface_handle_damage,
.frame = surface_handle_frame,
.set_opaque_region = surface_handle_set_opaque_region,
.set_input_region = surface_handle_set_input_region,
.commit = surface_handle_commit,
.set_buffer_transform = surface_handle_set_buffer_transform,
.set_buffer_scale = surface_handle_set_buffer_scale,
.damage_buffer = surface_handle_damage_buffer,
.offset = surface_handle_offset,
};
void compositor_handle_create_surface(
wl_client *client, wl_resource *resource, std::uint32_t id)
{
auto *server { static_cast<WaylandServer *>(
wl_resource_get_user_data(resource)) };
if (!server) {
return;
}
auto version { resource_version(resource) };
auto *surface_resource { wl_resource_create(
client, &wl_surface_interface, version, id) };
if (!surface_resource) {
return;
}
auto *surface { new Surface(*server, surface_resource) };
wl_resource_set_implementation(surface_resource, &SURFACE_INTERFACE,
surface, surface_destroy_resource);
}
void compositor_handle_create_region(
wl_client *client, wl_resource *resource, std::uint32_t id)
{
auto version { wl_resource_get_version(resource) };
auto *region_resource { wl_resource_create(
client, &wl_region_interface, version, id) };
if (!region_resource) {
return;
}
auto region { std::make_shared<Region>(region_resource) };
auto *handle { new std::shared_ptr<Region>(std::move(region)) };
wl_resource_set_implementation(
region_resource, &REGION_INTERFACE, handle, region_destroy);
}
struct wl_compositor_interface const COMPOSITOR_INTERFACE = {
.create_surface = compositor_handle_create_surface,
.create_region = compositor_handle_create_region,
};
void bind_compositor(
wl_client *client, void *data, std::uint32_t version, std::uint32_t id)
{
auto *server { static_cast<WaylandServer *>(data) };
auto *resource { wl_resource_create(client, &wl_compositor_interface,
std::min(version, COMPOSITOR_VERSION), id) };
if (!resource) {
return;
}
wl_resource_set_implementation(
resource, &COMPOSITOR_INTERFACE, server, nullptr);
}
} // namespace
auto WaylandServer::create_compositor_global() -> std::unique_ptr<Global>
{
return std::make_unique<Global>(display(), &wl_compositor_interface,
COMPOSITOR_VERSION, this, bind_compositor);
}
} // namespace Lunar::Wayland

View File

@@ -0,0 +1,171 @@
#include <algorithm>
#include <cstdint>
#include <memory>
#include <unistd.h>
#include <utility>
#include <wayland-server-core.h>
#include "wayland-server-protocol.h"
#include "../Shm.h"
#include "../WaylandServer.h"
namespace Lunar::Wayland {
namespace {
constexpr std::uint32_t SHM_VERSION = 2;
auto shm_pool_from_resource(wl_resource *resource) -> std::shared_ptr<ShmPool>
{
auto *handle { static_cast<std::shared_ptr<ShmPool> *>(
wl_resource_get_user_data(resource)) };
if (!handle) {
return {};
}
return *handle;
}
void shm_pool_destroy_resource(wl_resource *resource)
{
auto *handle { static_cast<std::shared_ptr<ShmPool> *>(
wl_resource_get_user_data(resource)) };
delete handle;
}
void shm_buffer_destroy_resource(wl_resource *resource)
{
auto *handle { static_cast<std::shared_ptr<ShmBuffer> *>(
wl_resource_get_user_data(resource)) };
delete handle;
}
void shm_pool_handle_destroy(wl_client *, wl_resource *resource)
{
wl_resource_destroy(resource);
}
void shm_pool_handle_resize(
wl_client *, wl_resource *resource, std::int32_t size)
{
auto pool { shm_pool_from_resource(resource) };
if (!pool) {
return;
}
if (size <= 0) {
return;
}
pool->resize(static_cast<std::size_t>(size));
}
void shm_handle_release(wl_client *, wl_resource *) { }
void shm_pool_handle_create_buffer(wl_client *client, wl_resource *resource,
std::uint32_t id, std::int32_t offset, std::int32_t width,
std::int32_t height, std::int32_t stride, std::uint32_t format)
{
auto pool { shm_pool_from_resource(resource) };
if (!pool) {
return;
}
if (width <= 0 || height <= 0 || stride <= 0 || offset < 0) {
wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_STRIDE,
"Invalid shm buffer geometry");
return;
}
if (format != WL_SHM_FORMAT_XRGB8888 && format != WL_SHM_FORMAT_ARGB8888) {
wl_resource_post_error(
resource, WL_SHM_ERROR_INVALID_FORMAT, "Unsupported shm format");
return;
}
auto required { static_cast<std::size_t>(offset)
+ static_cast<std::size_t>(height) * static_cast<std::size_t>(stride) };
if (required > pool->size()) {
wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_STRIDE,
"Shm buffer size out of bounds");
return;
}
auto *buffer_resource { wl_resource_create(
client, &wl_buffer_interface, 1, id) };
if (!buffer_resource) {
return;
}
auto buffer { std::make_shared<ShmBuffer>(
pool, buffer_resource, offset, width, height, stride, format) };
auto *handle { new std::shared_ptr<ShmBuffer>(std::move(buffer)) };
wl_resource_set_implementation(
buffer_resource, nullptr, handle, shm_buffer_destroy_resource);
}
struct wl_shm_pool_interface const SHM_POOL_INTERFACE = {
.create_buffer = shm_pool_handle_create_buffer,
.destroy = shm_pool_handle_destroy,
.resize = shm_pool_handle_resize,
};
void shm_handle_create_pool(wl_client *client, wl_resource *resource,
std::uint32_t id, int fd, std::int32_t size)
{
auto *server
= static_cast<WaylandServer *>(wl_resource_get_user_data(resource));
if (!server) {
return;
}
if (size <= 0) {
wl_resource_post_error(
resource, WL_SHM_ERROR_INVALID_STRIDE, "Invalid shm pool size");
close(fd);
return;
}
auto pool { std::make_shared<ShmPool>(
fd, static_cast<std::size_t>(size), server->logger()) };
if (!pool->data()) {
wl_resource_post_error(
resource, WL_SHM_ERROR_INVALID_FD, "Failed to mmap shm pool");
return;
}
auto *pool_resource
= wl_resource_create(client, &wl_shm_pool_interface, 1, id);
if (!pool_resource) {
return;
}
auto *handle { new std::shared_ptr<ShmPool>(std::move(pool)) };
wl_resource_set_implementation(
pool_resource, &SHM_POOL_INTERFACE, handle, shm_pool_destroy_resource);
}
struct wl_shm_interface const SHM_INTERFACE = {
.create_pool = shm_handle_create_pool,
.release = shm_handle_release,
};
void bind_shm(
wl_client *client, void *data, std::uint32_t version, std::uint32_t id)
{
auto *server { static_cast<WaylandServer *>(data) };
auto *resource { wl_resource_create(
client, &wl_shm_interface, std::min(version, SHM_VERSION), id) };
if (!resource) {
return;
}
wl_resource_set_implementation(resource, &SHM_INTERFACE, server, nullptr);
wl_shm_send_format(resource, WL_SHM_FORMAT_XRGB8888);
wl_shm_send_format(resource, WL_SHM_FORMAT_ARGB8888);
}
} // namespace
auto WaylandServer::create_shm_global() -> std::unique_ptr<Global>
{
return std::make_unique<Global>(
display(), &wl_shm_interface, SHM_VERSION, this, bind_shm);
}
} // namespace Lunar::Wayland

View File

@@ -0,0 +1,327 @@
#include <algorithm>
#include <cstdint>
#include <wayland-server-core.h>
#include "xdg-shell-server-protocol.h"
#include "../WaylandServer.h"
namespace Lunar::Wayland {
namespace {
constexpr std::uint32_t XDG_WM_BASE_VERSION = 7;
struct XdgSurface {
WaylandServer &server;
wl_resource *resource {};
wl_resource *surface_resource {};
wl_resource *toplevel_resource {};
std::uint32_t last_serial { 0 };
};
struct XdgToplevel {
WaylandServer &server;
wl_resource *resource {};
XdgSurface *surface {};
};
extern struct xdg_toplevel_interface const XDG_TOPLEVEL_INTERFACE_IMPL;
auto xdg_surface_from_resource(wl_resource *resource) -> XdgSurface *
{
return static_cast<XdgSurface *>(wl_resource_get_user_data(resource));
}
auto xdg_toplevel_from_resource(wl_resource *resource) -> XdgToplevel *
{
return static_cast<XdgToplevel *>(wl_resource_get_user_data(resource));
}
auto send_configure(XdgSurface &surface) -> void
{
std::uint32_t serial = surface.server.display().next_serial();
if (surface.toplevel_resource) {
wl_array states;
wl_array_init(&states);
xdg_toplevel_send_configure(surface.toplevel_resource, 0, 0, &states);
wl_array_release(&states);
}
surface.last_serial = serial;
xdg_surface_send_configure(surface.resource, serial);
}
auto xdg_positioner_handle_destroy(wl_client *, wl_resource *resource) -> void
{
wl_resource_destroy(resource);
}
auto xdg_positioner_handle_set_size(
wl_client *, wl_resource *, std::int32_t, std::int32_t) -> void
{
}
auto xdg_positioner_handle_set_anchor_rect(wl_client *, wl_resource *,
std::int32_t, std::int32_t, std::int32_t, std::int32_t) -> void
{
}
auto xdg_positioner_handle_set_anchor(wl_client *, wl_resource *, std::uint32_t)
-> void
{
}
auto xdg_positioner_handle_set_gravity(
wl_client *, wl_resource *, std::uint32_t) -> void
{
}
auto xdg_positioner_handle_set_constraint_adjustment(
wl_client *, wl_resource *, std::uint32_t) -> void
{
}
auto xdg_positioner_handle_set_offset(
wl_client *, wl_resource *, std::int32_t, std::int32_t) -> void
{
}
auto xdg_positioner_handle_set_reactive(wl_client *, wl_resource *) -> void { }
auto xdg_positioner_handle_set_parent_size(
wl_client *, wl_resource *, std::int32_t, std::int32_t) -> void
{
}
auto xdg_positioner_handle_set_parent_configure(
wl_client *, wl_resource *, std::uint32_t) -> void
{
}
struct xdg_positioner_interface const XDG_POSITIONER_INTERFACE_IMPL = {
.destroy = xdg_positioner_handle_destroy,
.set_size = xdg_positioner_handle_set_size,
.set_anchor_rect = xdg_positioner_handle_set_anchor_rect,
.set_anchor = xdg_positioner_handle_set_anchor,
.set_gravity = xdg_positioner_handle_set_gravity,
.set_constraint_adjustment
= xdg_positioner_handle_set_constraint_adjustment,
.set_offset = xdg_positioner_handle_set_offset,
.set_reactive = xdg_positioner_handle_set_reactive,
.set_parent_size = xdg_positioner_handle_set_parent_size,
.set_parent_configure = xdg_positioner_handle_set_parent_configure,
};
auto xdg_surface_destroy_resource(wl_resource *resource) -> void
{
auto *surface { xdg_surface_from_resource(resource) };
delete surface;
}
auto xdg_surface_handle_destroy(wl_client *, wl_resource *resource) -> void
{
wl_resource_destroy(resource);
}
auto xdg_surface_handle_get_toplevel(
wl_client *client, wl_resource *resource, std::uint32_t id) -> void
{
auto *surface { xdg_surface_from_resource(resource) };
if (!surface) {
return;
}
auto version { wl_resource_get_version(resource) };
auto *toplevel_resource { wl_resource_create(
client, &::xdg_toplevel_interface, version, id) };
if (!toplevel_resource) {
return;
}
auto *toplevel { new XdgToplevel {
surface->server, toplevel_resource, surface } };
surface->toplevel_resource = toplevel_resource;
wl_resource_set_implementation(toplevel_resource,
&XDG_TOPLEVEL_INTERFACE_IMPL, toplevel, [](wl_resource *res) {
auto *tl { xdg_toplevel_from_resource(res) };
if (tl && tl->surface && tl->surface->toplevel_resource == res) {
tl->surface->toplevel_resource = nullptr;
}
delete tl;
});
send_configure(*surface);
}
auto xdg_surface_handle_get_popup(wl_client *, wl_resource *, std::uint32_t,
wl_resource *, wl_resource *) -> void
{
}
auto xdg_surface_handle_set_window_geometry(wl_client *, wl_resource *,
std::int32_t, std::int32_t, std::int32_t, std::int32_t) -> void
{
}
auto xdg_surface_handle_ack_configure(
wl_client *, wl_resource *resource, std::uint32_t serial) -> void
{
if (auto *surface { xdg_surface_from_resource(resource) }) {
surface->last_serial = serial;
}
}
struct xdg_surface_interface const XDG_SURFACE_INTERFACE_IMPL = {
.destroy = xdg_surface_handle_destroy,
.get_toplevel = xdg_surface_handle_get_toplevel,
.get_popup = xdg_surface_handle_get_popup,
.set_window_geometry = xdg_surface_handle_set_window_geometry,
.ack_configure = xdg_surface_handle_ack_configure,
};
auto xdg_toplevel_handle_destroy(wl_client *, wl_resource *resource) -> void
{
wl_resource_destroy(resource);
}
auto xdg_toplevel_handle_set_parent(wl_client *, wl_resource *, wl_resource *)
-> void
{
}
auto xdg_toplevel_handle_set_title(wl_client *, wl_resource *, char const *)
-> void
{
}
auto xdg_toplevel_handle_set_app_id(wl_client *, wl_resource *, char const *)
-> void
{
}
auto xdg_toplevel_handle_show_window_menu(wl_client *, wl_resource *,
wl_resource *, std::uint32_t, std::int32_t, std::int32_t) -> void
{
}
auto xdg_toplevel_handle_move(
wl_client *, wl_resource *, wl_resource *, std::uint32_t) -> void
{
}
auto xdg_toplevel_handle_resize(wl_client *, wl_resource *, wl_resource *,
std::uint32_t, std::uint32_t) -> void
{
}
auto xdg_toplevel_handle_set_max_size(
wl_client *, wl_resource *, std::int32_t, std::int32_t) -> void
{
}
auto xdg_toplevel_handle_set_min_size(
wl_client *, wl_resource *, std::int32_t, std::int32_t) -> void
{
}
auto xdg_toplevel_handle_set_maximized(wl_client *, wl_resource *) -> void { }
auto xdg_toplevel_handle_unset_maximized(wl_client *, wl_resource *) -> void { }
auto xdg_toplevel_handle_set_fullscreen(
wl_client *, wl_resource *, wl_resource *) -> void
{
}
auto xdg_toplevel_handle_unset_fullscreen(wl_client *, wl_resource *) -> void {
}
auto xdg_toplevel_handle_set_minimized(wl_client *, wl_resource *) -> void { }
struct xdg_toplevel_interface const XDG_TOPLEVEL_INTERFACE_IMPL = {
.destroy = xdg_toplevel_handle_destroy,
.set_parent = xdg_toplevel_handle_set_parent,
.set_title = xdg_toplevel_handle_set_title,
.set_app_id = xdg_toplevel_handle_set_app_id,
.show_window_menu = xdg_toplevel_handle_show_window_menu,
.move = xdg_toplevel_handle_move,
.resize = xdg_toplevel_handle_resize,
.set_max_size = xdg_toplevel_handle_set_max_size,
.set_min_size = xdg_toplevel_handle_set_min_size,
.set_maximized = xdg_toplevel_handle_set_maximized,
.unset_maximized = xdg_toplevel_handle_unset_maximized,
.set_fullscreen = xdg_toplevel_handle_set_fullscreen,
.unset_fullscreen = xdg_toplevel_handle_unset_fullscreen,
.set_minimized = xdg_toplevel_handle_set_minimized,
};
auto xdg_wm_base_handle_destroy(wl_client *, wl_resource *resource) -> void
{
wl_resource_destroy(resource);
}
auto xdg_wm_base_handle_create_positioner(
wl_client *client, wl_resource *resource, std::uint32_t id) -> void
{
auto version { wl_resource_get_version(resource) };
auto *positioner { wl_resource_create(
client, &::xdg_positioner_interface, version, id) };
if (!positioner) {
return;
}
wl_resource_set_implementation(
positioner, &XDG_POSITIONER_INTERFACE_IMPL, nullptr, nullptr);
}
auto xdg_wm_base_handle_get_xdg_surface(wl_client *client,
wl_resource *resource, std::uint32_t id, wl_resource *surface_resource)
-> void
{
auto *server
= static_cast<WaylandServer *>(wl_resource_get_user_data(resource));
if (!server) {
return;
}
auto version { wl_resource_get_version(resource) };
auto *xdg_surface_resource { wl_resource_create(
client, &::xdg_surface_interface, version, id) };
if (!xdg_surface_resource) {
return;
}
auto *surface { new XdgSurface {
*server, xdg_surface_resource, surface_resource, nullptr, 0 } };
wl_resource_set_implementation(xdg_surface_resource,
&XDG_SURFACE_INTERFACE_IMPL, surface, xdg_surface_destroy_resource);
}
auto xdg_wm_base_handle_pong(wl_client *, wl_resource *, std::uint32_t) -> void
{
}
struct xdg_wm_base_interface const XDG_WM_BASE_INTERFACE_IMPL = {
.destroy = xdg_wm_base_handle_destroy,
.create_positioner = xdg_wm_base_handle_create_positioner,
.get_xdg_surface = xdg_wm_base_handle_get_xdg_surface,
.pong = xdg_wm_base_handle_pong,
};
auto bind_xdg_wm_base(wl_client *client, void *data, std::uint32_t version,
std::uint32_t id) -> void
{
auto *server { static_cast<WaylandServer *>(data) };
auto *resource { wl_resource_create(client, &::xdg_wm_base_interface,
std::min(version, XDG_WM_BASE_VERSION), id) };
if (!resource) {
return;
}
wl_resource_set_implementation(
resource, &XDG_WM_BASE_INTERFACE_IMPL, server, nullptr);
}
} // namespace
auto WaylandServer::create_xdg_wm_base_global() -> std::unique_ptr<Global>
{
return std::make_unique<Global>(display(), &::xdg_wm_base_interface,
XDG_WM_BASE_VERSION, this, bind_xdg_wm_base);
}
} // namespace Lunar::Wayland

View File

@@ -0,0 +1,18 @@
# Empirical format config, based on observed style guide
# Use this only as an help to fit the surrounding code style - don't reformat whole files at once
---
BasedOnStyle: LLVM
AllowShortIfStatementsOnASingleLine: WithoutElse
AllowShortLoopsOnASingleLine: true
AlwaysBreakTemplateDeclarations: Yes
BreakBeforeBraces: Allman
BreakConstructorInitializers: BeforeComma
BreakStringLiterals: false
ColumnLimit: 120
FixNamespaceComments: false
IndentPPDirectives: AfterHash
IndentWidth: 4
PointerAlignment: Left
SpaceBeforeParens: Never
SpacesInParentheses: true
TabWidth: 4

View File

@@ -0,0 +1,62 @@
---
Checks:
'
clang-diagnostic-*,
clang-analyzer-*,
bugprone-*,
google-*,
misc-*,
modernize-*,
performance-*,
readability-*,
-bugprone-easily-swappable-parameters,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-narrowing-conversions,
-bugprone-reserved-identifier,
-google-readability-braces-around-statements,
-google-readability-casting,
-google-readability-function-size,
-google-readability-todo,
-google-readability-namespace-comments,
-misc-confusable-identifiers,
-misc-no-recursion,
-modernize-avoid-c-arrays,
-modernize-deprecated-headers,
-modernize-use-default-member-init,
-modernize-use-trailing-return-type,
-performance-no-int-to-ptr,
-readability-braces-around-statements,
-readability-else-after-return,
-readability-function-cognitive-complexity,
-readability-identifier-length,
-readability-implicit-bool-conversion,
-readability-isolate-declaration,
-readability-magic-numbers,
-readability-qualified-auto,
-readability-uppercase-literal-suffix
'
WarningsAsErrors: ''
HeaderFilterRegex: ''
AnalyzeTemporaryDtors: false
FormatStyle: none
CheckOptions:
llvm-else-after-return.WarnOnConditionVariables: 'false'
modernize-loop-convert.MinConfidence: reasonable
modernize-replace-auto-ptr.IncludeStyle: llvm
modernize-pass-by-value.IncludeStyle: llvm
google-readability-namespace-comments.ShortNamespaceLines: '10'
google-readability-namespace-comments.SpacesBeforeComments: '2'
cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: 'true'
google-readability-braces-around-statements.ShortStatementLines: '1'
cert-err33-c.CheckedFunctions: '::aligned_alloc;::asctime_s;::at_quick_exit;::atexit;::bsearch;::bsearch_s;::btowc;::c16rtomb;::c32rtomb;::calloc;::clock;::cnd_broadcast;::cnd_init;::cnd_signal;::cnd_timedwait;::cnd_wait;::ctime_s;::fclose;::fflush;::fgetc;::fgetpos;::fgets;::fgetwc;::fopen;::fopen_s;::fprintf;::fprintf_s;::fputc;::fputs;::fputwc;::fputws;::fread;::freopen;::freopen_s;::fscanf;::fscanf_s;::fseek;::fsetpos;::ftell;::fwprintf;::fwprintf_s;::fwrite;::fwscanf;::fwscanf_s;::getc;::getchar;::getenv;::getenv_s;::gets_s;::getwc;::getwchar;::gmtime;::gmtime_s;::localtime;::localtime_s;::malloc;::mbrtoc16;::mbrtoc32;::mbsrtowcs;::mbsrtowcs_s;::mbstowcs;::mbstowcs_s;::memchr;::mktime;::mtx_init;::mtx_lock;::mtx_timedlock;::mtx_trylock;::mtx_unlock;::printf_s;::putc;::putwc;::raise;::realloc;::remove;::rename;::scanf;::scanf_s;::setlocale;::setvbuf;::signal;::snprintf;::snprintf_s;::sprintf;::sprintf_s;::sscanf;::sscanf_s;::strchr;::strerror_s;::strftime;::strpbrk;::strrchr;::strstr;::strtod;::strtof;::strtoimax;::strtok;::strtok_s;::strtol;::strtold;::strtoll;::strtoul;::strtoull;::strtoumax;::strxfrm;::swprintf;::swprintf_s;::swscanf;::swscanf_s;::thrd_create;::thrd_detach;::thrd_join;::thrd_sleep;::time;::timespec_get;::tmpfile;::tmpfile_s;::tmpnam;::tmpnam_s;::tss_create;::tss_get;::tss_set;::ungetc;::ungetwc;::vfprintf;::vfprintf_s;::vfscanf;::vfscanf_s;::vfwprintf;::vfwprintf_s;::vfwscanf;::vfwscanf_s;::vprintf_s;::vscanf;::vscanf_s;::vsnprintf;::vsnprintf_s;::vsprintf;::vsprintf_s;::vsscanf;::vsscanf_s;::vswprintf;::vswprintf_s;::vswscanf;::vswscanf_s;::vwprintf_s;::vwscanf;::vwscanf_s;::wcrtomb;::wcschr;::wcsftime;::wcspbrk;::wcsrchr;::wcsrtombs;::wcsrtombs_s;::wcsstr;::wcstod;::wcstof;::wcstoimax;::wcstok;::wcstok_s;::wcstol;::wcstold;::wcstoll;::wcstombs;::wcstombs_s;::wcstoul;::wcstoull;::wcstoumax;::wcsxfrm;::wctob;::wctrans;::wctype;::wmemchr;::wprintf_s;::wscanf;::wscanf_s;'
modernize-loop-convert.MaxCopySize: '16'
cert-dcl16-c.NewSuffixes: 'L;LL;LU;LLU'
cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField: 'false'
cert-str34-c.DiagnoseSignedUnsignedCharComparisons: 'false'
modernize-use-nullptr.NullMacros: 'NULL'
llvm-qualified-auto.AddConstToQualified: 'false'
modernize-loop-convert.NamingStyle: CamelCase
llvm-else-after-return.WarnOnUnfixable: 'false'
google-readability-function-size.StatementThreshold: '800'
...

1
subprojects/tracy/.github/FUNDING.yml vendored Normal file
View File

@@ -0,0 +1 @@
github: wolfpld

BIN
subprojects/tracy/.github/sponsor.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

View File

@@ -0,0 +1,27 @@
name: Manual
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Fix stupidity
run: |
cp LICENSE LICENSE.
- name: Compile LaTeX
uses: xu-cheng/latex-action@v3
with:
working_directory: manual
root_file: tracy.tex
- uses: actions/upload-artifact@v4
with:
name: manual
path: manual/tracy.pdf

View File

@@ -0,0 +1,75 @@
name: linux
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build:
runs-on: ubuntu-latest
container: archlinux:base-devel
steps:
- name: Install dependencies
run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 tbb debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs
- uses: actions/checkout@v4
- name: Profiler GUI
run: |
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release
cmake --build profiler/build --parallel
- name: Update utility
run: |
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release
cmake --build update/build --parallel
- name: Capture utility
run: |
cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release
cmake --build capture/build --parallel
- name: Csvexport utility
run: |
cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release
cmake --build csvexport/build --parallel
- name: Import utilities
run: |
cmake -B import/build -S import -DCMAKE_BUILD_TYPE=Release
cmake --build import/build --parallel
- name: Library
run: meson setup -Dprefix=$GITHUB_WORKSPACE/bin/lib build && meson compile -C build && meson install -C build
- name: Test application
run: |
# test compilation with different flags
# we clean the build folder to reset cached variables between runs
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release
cmake --build test/build --parallel
rm -rf test/build
# same with TRACY_ON_DEMAND
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_ON_DEMAND=ON .
cmake --build test/build --parallel
rm -rf test/build
# same with TRACY_DELAYED_INIT TRACY_MANUAL_LIFETIME
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_DELAYED_INIT=ON -DTRACY_MANUAL_LIFETIME=ON .
cmake --build test/build --parallel
rm -rf test/build
# same with TRACY_DEMANGLE
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_DEMANGLE=ON .
cmake --build test/build --parallel
rm -rf test/build
- name: Find Artifacts
id: find_artifacts
run: |
mkdir -p bin
cp profiler/build/tracy-profiler bin
cp update/build/tracy-update bin
cp capture/build/tracy-capture bin
cp csvexport/build/tracy-csvexport bin
cp import/build/tracy-import-chrome bin
cp import/build/tracy-import-fuchsia bin
strip bin/tracy-*
- uses: actions/upload-artifact@v4
with:
name: arch-linux
path: bin

36
subprojects/tracy/.gitignore vendored Normal file
View File

@@ -0,0 +1,36 @@
.vs
_build
_compiler
tools/*
*.d
*.o
*.so
*.swp
*.obj
imgui.ini
test/tracy_test
test/tracy_test.exe
*/build/unix/*-*
manual/t*.aux
manual/t*.log
manual/t*.out
manual/t*.pdf
manual/t*.synctex.gz
manual/t*.toc
manual/t*.bbl
manual/t*.blg
manual/t*.fdb_latexmk
manual/t*.fls
profiler/build/win32/packages
profiler/build/win32/Tracy.aps
.deps/
.dirstamp
/_*/**
/**/__pycache__/**
extra/vswhere.exe
extra/tracy-build
.cache
compile_commands.json
profiler/build/wasm/Tracy-release.*
profiler/build/wasm/Tracy-debug.*
profiler/build/wasm/embed.tracy

View File

@@ -0,0 +1,7 @@
{
"recommendations": [
"llvm-vs-code-extensions.vscode-clangd",
"vadimcn.vscode-lldb",
"ms-vscode.cmake-tools"
]
}

14
subprojects/tracy/.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,14 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Launch",
"type": "lldb",
"request": "launch",
"program": "${command:cmake.launchTargetPath}",
"args": [],
"cwd": "${workspaceFolder}",
"terminal": "console"
}
]
}

20
subprojects/tracy/.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,20 @@
{
"cmake.configureOnOpen": true,
"cmake.sourceDirectory": [
"${workspaceFolder}/profiler",
"${workspaceFolder}/capture",
"${workspaceFolder}/csvexport",
"${workspaceFolder}/import",
"${workspaceFolder}/update",
"${workspaceFolder}/test",
"${workspaceFolder}",
],
"cmake.buildDirectory": "${sourceDirectory}/build",
"cmake.autoSelectActiveFolder": false,
"cmake.options.advanced": {
"folder": { "statusBarVisibility": "visible" },
"variant": { "statusBarVisibility": "compact" }
},
"cmake.copyCompileCommands": "${workspaceFolder}/compile_commands.json",
"lldb.launch.initCommands": ["command script import ${workspaceRoot}/extra/natvis.py"],
}

View File

@@ -0,0 +1,195 @@
cmake_minimum_required(VERSION 3.10)
# Run version helper script
include(cmake/version.cmake)
project(Tracy LANGUAGES CXX VERSION ${TRACY_VERSION_STRING})
file(GENERATE OUTPUT .gitignore CONTENT "*")
if(${BUILD_SHARED_LIBS})
set(DEFAULT_STATIC OFF)
else()
set(DEFAULT_STATIC ON)
endif()
option(TRACY_STATIC "Whether to build Tracy as a static library" ${DEFAULT_STATIC})
find_package(Threads REQUIRED)
set(TRACY_PUBLIC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/public)
if(TRACY_STATIC)
set(TRACY_VISIBILITY "STATIC")
else()
set(TRACY_VISIBILITY "SHARED")
endif()
add_library(TracyClient ${TRACY_VISIBILITY} "${TRACY_PUBLIC_DIR}/TracyClient.cpp")
target_compile_features(TracyClient PUBLIC cxx_std_11)
target_include_directories(TracyClient SYSTEM PUBLIC
$<BUILD_INTERFACE:${TRACY_PUBLIC_DIR}>
$<INSTALL_INTERFACE:include>)
target_link_libraries(
TracyClient
PUBLIC
Threads::Threads
${CMAKE_DL_LIBS}
)
# Public dependency on some libraries required when using Mingw
if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} MATCHES "GNU|Clang")
target_link_libraries(TracyClient PUBLIC ws2_32 dbghelp)
endif()
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
find_library(EXECINFO_LIBRARY NAMES execinfo REQUIRED)
target_link_libraries(TracyClient PUBLIC ${EXECINFO_LIBRARY})
endif()
if(TRACY_LIBUNWIND_BACKTRACE)
include(FindPkgConfig)
pkg_check_modules(unwind REQUIRED libunwind)
target_include_directories(TracyClient INTERFACE ${unwind_INCLUDE_DIRS})
target_link_libraries(TracyClient INTERFACE ${unwind_LINK_LIBRARIES})
endif()
add_library(Tracy::TracyClient ALIAS TracyClient)
macro(set_option option help value)
option(${option} ${help} ${value})
if(${option})
message(STATUS "${option}: ON")
target_compile_definitions(TracyClient PUBLIC ${option})
else()
message(STATUS "${option}: OFF")
endif()
endmacro()
set_option(TRACY_ENABLE "Enable profiling" ON)
set_option(TRACY_ON_DEMAND "On-demand profiling" OFF)
set_option(TRACY_CALLSTACK "Enforce callstack collection for tracy regions" OFF)
set_option(TRACY_NO_CALLSTACK "Disable all callstack related functionality" OFF)
set_option(TRACY_NO_CALLSTACK_INLINES "Disables the inline functions in callstacks" OFF)
set_option(TRACY_ONLY_LOCALHOST "Only listen on the localhost interface" OFF)
set_option(TRACY_NO_BROADCAST "Disable client discovery by broadcast to local network" OFF)
set_option(TRACY_ONLY_IPV4 "Tracy will only accept connections on IPv4 addresses (disable IPv6)" OFF)
set_option(TRACY_NO_CODE_TRANSFER "Disable collection of source code" OFF)
set_option(TRACY_NO_CONTEXT_SWITCH "Disable capture of context switches" OFF)
set_option(TRACY_NO_EXIT "Client executable does not exit until all profile data is sent to server" OFF)
set_option(TRACY_NO_SAMPLING "Disable call stack sampling" OFF)
set_option(TRACY_NO_VERIFY "Disable zone validation for C API" OFF)
set_option(TRACY_NO_VSYNC_CAPTURE "Disable capture of hardware Vsync events" OFF)
set_option(TRACY_NO_FRAME_IMAGE "Disable the frame image support and its thread" OFF)
set_option(TRACY_NO_SYSTEM_TRACING "Disable systrace sampling" OFF)
set_option(TRACY_PATCHABLE_NOPSLEDS "Enable nopsleds for efficient patching by system-level tools (e.g. rr)" OFF)
set_option(TRACY_DELAYED_INIT "Enable delayed initialization of the library (init on first call)" OFF)
set_option(TRACY_MANUAL_LIFETIME "Enable the manual lifetime management of the profile" OFF)
set_option(TRACY_FIBERS "Enable fibers support" OFF)
set_option(TRACY_NO_CRASH_HANDLER "Disable crash handling" OFF)
set_option(TRACY_TIMER_FALLBACK "Use lower resolution timers" OFF)
set_option(TRACY_LIBUNWIND_BACKTRACE "Use libunwind backtracing where supported" OFF)
set_option(TRACY_SYMBOL_OFFLINE_RESOLVE "Instead of full runtime symbol resolution, only resolve the image path and offset to enable offline symbol resolution" OFF)
set_option(TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT "Enable libbacktrace to support dynamically loaded elfs in symbol resolution resolution after the first symbol resolve operation" OFF)
# advanced
set_option(TRACY_VERBOSE "[advanced] Verbose output from the profiler" OFF)
mark_as_advanced(TRACY_VERBOSE)
set_option(TRACY_DEMANGLE "[advanced] Don't use default demangling function - You'll need to provide your own" OFF)
mark_as_advanced(TRACY_DEMANGLE)
if(NOT TRACY_STATIC)
target_compile_definitions(TracyClient PRIVATE TRACY_EXPORTS)
target_compile_definitions(TracyClient PUBLIC TRACY_IMPORTS)
endif()
include(CMakePackageConfigHelpers)
include(GNUInstallDirs)
set_target_properties(TracyClient PROPERTIES VERSION ${PROJECT_VERSION})
set(tracy_includes
${TRACY_PUBLIC_DIR}/tracy/TracyC.h
${TRACY_PUBLIC_DIR}/tracy/Tracy.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyD3D11.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyD3D12.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyLua.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyOpenCL.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyOpenGL.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyVulkan.hpp)
set(client_includes
${TRACY_PUBLIC_DIR}/client/tracy_concurrentqueue.h
${TRACY_PUBLIC_DIR}/client/tracy_rpmalloc.hpp
${TRACY_PUBLIC_DIR}/client/tracy_SPSCQueue.h
${TRACY_PUBLIC_DIR}/client/TracyKCore.hpp
${TRACY_PUBLIC_DIR}/client/TracyArmCpuTable.hpp
${TRACY_PUBLIC_DIR}/client/TracyCallstack.h
${TRACY_PUBLIC_DIR}/client/TracyCallstack.hpp
${TRACY_PUBLIC_DIR}/client/TracyCpuid.hpp
${TRACY_PUBLIC_DIR}/client/TracyDebug.hpp
${TRACY_PUBLIC_DIR}/client/TracyDxt1.hpp
${TRACY_PUBLIC_DIR}/client/TracyFastVector.hpp
${TRACY_PUBLIC_DIR}/client/TracyLock.hpp
${TRACY_PUBLIC_DIR}/client/TracyProfiler.hpp
${TRACY_PUBLIC_DIR}/client/TracyRingBuffer.hpp
${TRACY_PUBLIC_DIR}/client/TracyScoped.hpp
${TRACY_PUBLIC_DIR}/client/TracyStringHelpers.hpp
${TRACY_PUBLIC_DIR}/client/TracySysPower.hpp
${TRACY_PUBLIC_DIR}/client/TracySysTime.hpp
${TRACY_PUBLIC_DIR}/client/TracySysTrace.hpp
${TRACY_PUBLIC_DIR}/client/TracyThread.hpp)
set(common_includes
${TRACY_PUBLIC_DIR}/common/tracy_lz4.hpp
${TRACY_PUBLIC_DIR}/common/tracy_lz4hc.hpp
${TRACY_PUBLIC_DIR}/common/TracyAlign.hpp
${TRACY_PUBLIC_DIR}/common/TracyAlloc.hpp
${TRACY_PUBLIC_DIR}/common/TracyApi.h
${TRACY_PUBLIC_DIR}/common/TracyColor.hpp
${TRACY_PUBLIC_DIR}/common/TracyForceInline.hpp
${TRACY_PUBLIC_DIR}/common/TracyMutex.hpp
${TRACY_PUBLIC_DIR}/common/TracyProtocol.hpp
${TRACY_PUBLIC_DIR}/common/TracyQueue.hpp
${TRACY_PUBLIC_DIR}/common/TracySocket.hpp
${TRACY_PUBLIC_DIR}/common/TracyStackFrames.hpp
${TRACY_PUBLIC_DIR}/common/TracySystem.hpp
${TRACY_PUBLIC_DIR}/common/TracyUwp.hpp
${TRACY_PUBLIC_DIR}/common/TracyYield.hpp)
install(TARGETS TracyClient
EXPORT TracyConfig
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
COMPONENT lib)
# Export targets to build tree root
export(TARGETS TracyClient
NAMESPACE Tracy::
FILE ${CMAKE_BINARY_DIR}/TracyTargets.cmake)
install(FILES ${tracy_includes}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tracy)
install(FILES ${client_includes}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/client)
install(FILES ${common_includes}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/common)
install(EXPORT TracyConfig
NAMESPACE Tracy::
FILE TracyTargets.cmake
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/Tracy)
include(CMakePackageConfigHelpers)
configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/TracyConfig.cmake"
INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/Tracy)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/TracyConfig.cmake
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/Tracy)
option(TRACY_CLIENT_PYTHON "Whether to build Tracy python client library" OFF)
if(TRACY_CLIENT_PYTHON)
if(TRACY_STATIC)
message(FATAL_ERROR "Python-bindings require a shared client library")
endif()
add_subdirectory(python)
endif()

View File

@@ -0,0 +1,6 @@
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
find_dependency(Threads REQUIRED)
include("${CMAKE_CURRENT_LIST_DIR}/TracyTargets.cmake")

27
subprojects/tracy/LICENSE Normal file
View File

@@ -0,0 +1,27 @@
Tracy Profiler (https://github.com/wolfpld/tracy) is licensed under the
3-clause BSD license.
Copyright (c) 2017-2024, Bartosz Taudul <wolf@nereid.pl>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the <organization> nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

1239
subprojects/tracy/NEWS Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,28 @@
# Tracy Profiler
[![Sponsor](.github/sponsor.png)](https://github.com/sponsors/wolfpld/)
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
Tracy supports profiling CPU (Direct support is provided for C, C++, Lua and Python integration. At the same time, third-party bindings to many other languages exist on the internet, such as [Rust](https://github.com/nagisa/rust_tracy_client), [Zig](https://github.com/nektro/zig-tracy), [C#](https://github.com/clibequilibrium/Tracy-CSharp), [OCaml](https://github.com/imandra-ai/ocaml-tracy), [Odin](https://github.com/oskarnp/odin-tracy), etc.), GPU (All major graphic APIs: OpenGL, Vulkan, Direct3D 11/12, OpenCL.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
- [Documentation](https://github.com/wolfpld/tracy/releases/latest/download/tracy.pdf) for usage and build process instructions
- [Releases](https://github.com/wolfpld/tracy/releases) containing the documentation (`tracy.pdf`) and compiled Windows x64 binaries (`Tracy-<version>.7z`) as assets
- [Changelog](NEWS)
- [Interactive demo](https://tracy.nereid.pl/)
![](doc/profiler.png)
![](doc/profiler2.png)
![](doc/profiler3.png)
[An Introduction to Tracy Profiler in C++ - Marcos Slomp - CppCon 2023](https://youtu.be/ghXk3Bk5F2U?t=37)
[Introduction to Tracy Profiler v0.2](https://www.youtube.com/watch?v=fB5B46lbapc)
[New features in Tracy Profiler v0.3](https://www.youtube.com/watch?v=3SXpDpDh2Uo)
[New features in Tracy Profiler v0.4](https://www.youtube.com/watch?v=eAkgkaO8B9o)
[New features in Tracy Profiler v0.5](https://www.youtube.com/watch?v=P6E7qLMmzTQ)
[New features in Tracy Profiler v0.6](https://www.youtube.com/watch?v=uJkrFgriuOo)
[New features in Tracy Profiler v0.7](https://www.youtube.com/watch?v=_hU7vw00MZ4)
[New features in Tracy Profiler v0.8](https://www.youtube.com/watch?v=30wpRpHTTag)

View File

@@ -0,0 +1,27 @@
cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_STATISTICS "Disable calculation of statistics" ON)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
set(CMAKE_CXX_STANDARD 20)
project(
tracy-capture
LANGUAGES C CXX
VERSION ${TRACY_VERSION_STRING}
)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/config.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/vendor.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/server.cmake)
set(PROGRAM_FILES
src/capture.cpp
)
add_executable(${PROJECT_NAME} ${PROGRAM_FILES} ${COMMON_FILES} ${SERVER_FILES})
target_link_libraries(${PROJECT_NAME} PRIVATE TracyServer TracyGetOpt)
set_property(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})

View File

@@ -0,0 +1,364 @@
#ifdef _WIN32
# include <windows.h>
# include <io.h>
#else
# include <unistd.h>
#endif
#include <atomic>
#include <chrono>
#include <inttypes.h>
#include <mutex>
#include <signal.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include "../../public/common/TracyProtocol.hpp"
#include "../../public/common/TracyStackFrames.hpp"
#include "../../server/TracyFileWrite.hpp"
#include "../../server/TracyMemory.hpp"
#include "../../server/TracyPrint.hpp"
#include "../../server/TracySysUtil.hpp"
#include "../../server/TracyWorker.hpp"
#ifdef _WIN32
# include "../../getopt/getopt.h"
#endif
// This atomic is written by a signal handler (SigInt). Traditionally that would
// have had to be `volatile sig_atomic_t`, and annoyingly, `bool` was
// technically not allowed there, even though in practice it would work.
// The good thing with C++11 atomics is that we can use atomic<bool> instead
// here and be on the actually supported path.
static std::atomic<bool> s_disconnect { false };
void SigInt( int )
{
// Relaxed order is closest to a traditional `volatile` write.
// We don't need stronger ordering since this signal handler doesn't do
// anything else that would need to be ordered relatively to this.
s_disconnect.store(true, std::memory_order_relaxed);
}
static bool s_isStdoutATerminal = false;
void InitIsStdoutATerminal() {
#ifdef _WIN32
s_isStdoutATerminal = _isatty( fileno( stdout ) );
#else
s_isStdoutATerminal = isatty( fileno( stdout ) );
#endif
}
bool IsStdoutATerminal() { return s_isStdoutATerminal; }
#define ANSI_RESET "\033[0m"
#define ANSI_BOLD "\033[1m"
#define ANSI_BLACK "\033[30m"
#define ANSI_RED "\033[31m"
#define ANSI_GREEN "\033[32m"
#define ANSI_YELLOW "\033[33m"
#define ANSI_BLUE "\033[34m"
#define ANSI_MAGENTA "\033[35m"
#define ANSI_CYAN "\033[36m"
#define ANSI_ERASE_LINE "\033[2K"
// Like printf, but if stdout is a terminal, prepends the output with
// the given `ansiEscape` and appends ANSI_RESET.
void AnsiPrintf( const char* ansiEscape, const char* format, ... ) {
if( IsStdoutATerminal() )
{
// Prepend ansiEscape and append ANSI_RESET.
char buf[256];
va_list args;
va_start( args, format );
vsnprintf( buf, sizeof buf, format, args );
va_end( args );
printf( "%s%s" ANSI_RESET, ansiEscape, buf );
}
else
{
// Just a normal printf.
va_list args;
va_start( args, format );
vfprintf( stdout, format, args );
va_end( args );
}
}
[[noreturn]] void Usage()
{
printf( "Usage: capture -o output.tracy [-a address] [-p port] [-f] [-s seconds] [-m memlimit]\n" );
exit( 1 );
}
int main( int argc, char** argv )
{
#ifdef _WIN32
if( !AttachConsole( ATTACH_PARENT_PROCESS ) )
{
AllocConsole();
SetConsoleMode( GetStdHandle( STD_OUTPUT_HANDLE ), 0x07 );
}
#endif
InitIsStdoutATerminal();
bool overwrite = false;
const char* address = "127.0.0.1";
const char* output = nullptr;
int port = 8086;
int seconds = -1;
int64_t memoryLimit = -1;
int c;
while( ( c = getopt( argc, argv, "a:o:p:fs:m:" ) ) != -1 )
{
switch( c )
{
case 'a':
address = optarg;
break;
case 'o':
output = optarg;
break;
case 'p':
port = atoi( optarg );
break;
case 'f':
overwrite = true;
break;
case 's':
seconds = atoi(optarg);
break;
case 'm':
memoryLimit = std::clamp( atoll( optarg ), 1ll, 999ll ) * tracy::GetPhysicalMemorySize() / 100;
break;
default:
Usage();
break;
}
}
if( !address || !output ) Usage();
struct stat st;
if( stat( output, &st ) == 0 && !overwrite )
{
printf( "Output file %s already exists! Use -f to force overwrite.\n", output );
return 4;
}
FILE* test = fopen( output, "wb" );
if( !test )
{
printf( "Cannot open output file %s for writing!\n", output );
return 5;
}
fclose( test );
unlink( output );
printf( "Connecting to %s:%i...", address, port );
fflush( stdout );
tracy::Worker worker( address, port, memoryLimit );
while( !worker.HasData() )
{
const auto handshake = worker.GetHandshakeStatus();
if( handshake == tracy::HandshakeProtocolMismatch )
{
printf( "\nThe client you are trying to connect to uses incompatible protocol version.\nMake sure you are using the same Tracy version on both client and server.\n" );
return 1;
}
if( handshake == tracy::HandshakeNotAvailable )
{
printf( "\nThe client you are trying to connect to is no longer able to sent profiling data,\nbecause another server was already connected to it.\nYou can do the following:\n\n 1. Restart the client application.\n 2. Rebuild the client application with on-demand mode enabled.\n" );
return 2;
}
if( handshake == tracy::HandshakeDropped )
{
printf( "\nThe client you are trying to connect to has disconnected during the initial\nconnection handshake. Please check your network configuration.\n" );
return 3;
}
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
}
printf( "\nQueue delay: %s\nTimer resolution: %s\n", tracy::TimeToString( worker.GetDelay() ), tracy::TimeToString( worker.GetResolution() ) );
#ifdef _WIN32
signal( SIGINT, SigInt );
#else
struct sigaction sigint, oldsigint;
memset( &sigint, 0, sizeof( sigint ) );
sigint.sa_handler = SigInt;
sigaction( SIGINT, &sigint, &oldsigint );
#endif
const auto firstTime = worker.GetFirstTime();
auto& lock = worker.GetMbpsDataLock();
const auto t0 = std::chrono::high_resolution_clock::now();
while( worker.IsConnected() )
{
// Relaxed order is sufficient here because `s_disconnect` is only ever
// set by this thread or by the SigInt handler, and that handler does
// nothing else than storing `s_disconnect`.
if( s_disconnect.load( std::memory_order_relaxed ) )
{
worker.Disconnect();
// Relaxed order is sufficient because only this thread ever reads
// this value.
s_disconnect.store(false, std::memory_order_relaxed );
break;
}
lock.lock();
const auto mbps = worker.GetMbpsData().back();
const auto compRatio = worker.GetCompRatio();
const auto netTotal = worker.GetDataTransferred();
lock.unlock();
// Output progress info only if destination is a TTY to avoid bloating
// log files (so this is not just about usage of ANSI color codes).
if( IsStdoutATerminal() )
{
const char* unit = "Mbps";
float unitsPerMbps = 1.f;
if( mbps < 0.1f )
{
unit = "Kbps";
unitsPerMbps = 1000.f;
}
AnsiPrintf( ANSI_ERASE_LINE ANSI_CYAN ANSI_BOLD, "\r%7.2f %s", mbps * unitsPerMbps, unit );
printf( " /");
AnsiPrintf( ANSI_CYAN ANSI_BOLD, "%5.1f%%", compRatio * 100.f );
printf( " =");
AnsiPrintf( ANSI_YELLOW ANSI_BOLD, "%7.2f Mbps", mbps / compRatio );
printf( " | ");
AnsiPrintf( ANSI_YELLOW, "Tx: ");
AnsiPrintf( ANSI_GREEN, "%s", tracy::MemSizeToString( netTotal ) );
printf( " | ");
AnsiPrintf( ANSI_RED ANSI_BOLD, "%s", tracy::MemSizeToString( tracy::memUsage.load( std::memory_order_relaxed ) ) );
if( memoryLimit > 0 )
{
printf( " / " );
AnsiPrintf( ANSI_BLUE ANSI_BOLD, "%s", tracy::MemSizeToString( memoryLimit ) );
}
printf( " | ");
AnsiPrintf( ANSI_RED, "%s", tracy::TimeToString( worker.GetLastTime() - firstTime ) );
fflush( stdout );
}
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
if( seconds != -1 )
{
const auto dur = std::chrono::high_resolution_clock::now() - t0;
if( std::chrono::duration_cast<std::chrono::seconds>(dur).count() >= seconds )
{
// Relaxed order is sufficient because only this thread ever reads
// this value.
s_disconnect.store(true, std::memory_order_relaxed );
}
}
}
const auto t1 = std::chrono::high_resolution_clock::now();
const auto& failure = worker.GetFailureType();
if( failure != tracy::Worker::Failure::None )
{
AnsiPrintf( ANSI_RED ANSI_BOLD, "\nInstrumentation failure: %s", tracy::Worker::GetFailureString( failure ) );
auto& fd = worker.GetFailureData();
if( !fd.message.empty() )
{
printf( "\nContext: %s", fd.message.c_str() );
}
if( fd.callstack != 0 )
{
AnsiPrintf( ANSI_BOLD, "\nFailure callstack:\n" );
auto& cs = worker.GetCallstack( fd.callstack );
int fidx = 0;
for( auto& entry : cs )
{
auto frameData = worker.GetCallstackFrame( entry );
if( !frameData )
{
printf( "%3i. %p\n", fidx++, (void*)worker.GetCanonicalPointer( entry ) );
}
else
{
const auto fsz = frameData->size;
for( uint8_t f=0; f<fsz; f++ )
{
const auto& frame = frameData->data[f];
auto txt = worker.GetString( frame.name );
if( fidx == 0 && f != fsz-1 )
{
auto test = tracy::s_tracyStackFrames;
bool match = false;
do
{
if( strcmp( txt, *test ) == 0 )
{
match = true;
break;
}
}
while( *++test );
if( match ) continue;
}
if( f == fsz-1 )
{
printf( "%3i. ", fidx++ );
}
else
{
AnsiPrintf( ANSI_BLACK ANSI_BOLD, "inl. " );
}
AnsiPrintf( ANSI_CYAN, "%s ", txt );
txt = worker.GetString( frame.file );
if( frame.line == 0 )
{
AnsiPrintf( ANSI_YELLOW, "(%s)", txt );
}
else
{
AnsiPrintf( ANSI_YELLOW, "(%s:%" PRIu32 ")", txt, frame.line );
}
if( frameData->imageName.Active() )
{
AnsiPrintf( ANSI_MAGENTA, " %s\n", worker.GetString( frameData->imageName ) );
}
else
{
printf( "\n" );
}
}
}
}
}
}
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nElapsed time: %s\nSaving trace...",
worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() - firstTime ), tracy::RealToString( worker.GetZoneCount() ),
tracy::TimeToString( std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count() ) );
fflush( stdout );
auto f = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output, tracy::FileCompression::Zstd, 3, 4 ) );
if( f )
{
worker.Write( *f, false );
AnsiPrintf( ANSI_GREEN ANSI_BOLD, " done!\n" );
f->Finish();
const auto stats = f->GetCompressionStatistics();
printf( "Trace size %s (%.2f%% ratio)\n", tracy::MemSizeToString( stats.second ), 100.f * stats.second / stats.first );
}
else
{
AnsiPrintf( ANSI_RED ANSI_BOLD, " failed!\n");
}
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,300 @@
#.rst:
# ECMFindModuleHelpers
# --------------------
#
# Helper macros for find modules: ecm_find_package_version_check(),
# ecm_find_package_parse_components() and
# ecm_find_package_handle_library_components().
#
# ::
#
# ecm_find_package_version_check(<name>)
#
# Prints warnings if the CMake version or the project's required CMake version
# is older than that required by extra-cmake-modules.
#
# ::
#
# ecm_find_package_parse_components(<name>
# RESULT_VAR <variable>
# KNOWN_COMPONENTS <component1> [<component2> [...]]
# [SKIP_DEPENDENCY_HANDLING])
#
# This macro will populate <variable> with a list of components found in
# <name>_FIND_COMPONENTS, after checking that all those components are in the
# list of KNOWN_COMPONENTS; if there are any unknown components, it will print
# an error or warning (depending on the value of <name>_FIND_REQUIRED) and call
# return().
#
# The order of components in <variable> is guaranteed to match the order they
# are listed in the KNOWN_COMPONENTS argument.
#
# If SKIP_DEPENDENCY_HANDLING is not set, for each component the variable
# <name>_<component>_component_deps will be checked for dependent components.
# If <component> is listed in <name>_FIND_COMPONENTS, then all its (transitive)
# dependencies will also be added to <variable>.
#
# ::
#
# ecm_find_package_handle_library_components(<name>
# COMPONENTS <component> [<component> [...]]
# [SKIP_DEPENDENCY_HANDLING])
# [SKIP_PKG_CONFIG])
#
# Creates an imported library target for each component. The operation of this
# macro depends on the presence of a number of CMake variables.
#
# The <name>_<component>_lib variable should contain the name of this library,
# and <name>_<component>_header variable should contain the name of a header
# file associated with it (whatever relative path is normally passed to
# '#include'). <name>_<component>_header_subdir variable can be used to specify
# which subdirectory of the include path the headers will be found in.
# ecm_find_package_components() will then search for the library
# and include directory (creating appropriate cache variables) and create an
# imported library target named <name>::<component>.
#
# Additional variables can be used to provide additional information:
#
# If SKIP_PKG_CONFIG, the <name>_<component>_pkg_config variable is set, and
# pkg-config is found, the pkg-config module given by
# <name>_<component>_pkg_config will be searched for and used to help locate the
# library and header file. It will also be used to set
# <name>_<component>_VERSION.
#
# Note that if version information is found via pkg-config,
# <name>_<component>_FIND_VERSION can be set to require a particular version
# for each component.
#
# If SKIP_DEPENDENCY_HANDLING is not set, the INTERFACE_LINK_LIBRARIES property
# of the imported target for <component> will be set to contain the imported
# targets for the components listed in <name>_<component>_component_deps.
# <component>_FOUND will also be set to false if any of the components in
# <name>_<component>_component_deps are not found. This requires the components
# in <name>_<component>_component_deps to be listed before <component> in the
# COMPONENTS argument.
#
# The following variables will be set:
#
# ``<name>_TARGETS``
# the imported targets
# ``<name>_LIBRARIES``
# the found libraries
# ``<name>_INCLUDE_DIRS``
# the combined required include directories for the components
# ``<name>_DEFINITIONS``
# the "other" CFLAGS provided by pkg-config, if any
# ``<name>_VERSION``
# the value of ``<name>_<component>_VERSION`` for the first component that
# has this variable set (note that components are searched for in the order
# they are passed to the macro), although if it is already set, it will not
# be altered
#
# Note that these variables are never cleared, so if
# ecm_find_package_handle_library_components() is called multiple times with
# different components (typically because of multiple find_package() calls) then
# ``<name>_TARGETS``, for example, will contain all the targets found in any
# call (although no duplicates).
#
# Since pre-1.0.0.
#=============================================================================
# Copyright 2014 Alex Merry <alex.merry@kde.org>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
macro(ecm_find_package_version_check module_name)
if(CMAKE_VERSION VERSION_LESS 2.8.12)
message(FATAL_ERROR "CMake 2.8.12 is required by Find${module_name}.cmake")
endif()
if(CMAKE_MINIMUM_REQUIRED_VERSION VERSION_LESS 2.8.12)
message(AUTHOR_WARNING "Your project should require at least CMake 2.8.12 to use Find${module_name}.cmake")
endif()
endmacro()
macro(ecm_find_package_parse_components module_name)
set(ecm_fppc_options SKIP_DEPENDENCY_HANDLING)
set(ecm_fppc_oneValueArgs RESULT_VAR)
set(ecm_fppc_multiValueArgs KNOWN_COMPONENTS DEFAULT_COMPONENTS)
cmake_parse_arguments(ECM_FPPC "${ecm_fppc_options}" "${ecm_fppc_oneValueArgs}" "${ecm_fppc_multiValueArgs}" ${ARGN})
if(ECM_FPPC_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unexpected arguments to ecm_find_package_parse_components: ${ECM_FPPC_UNPARSED_ARGUMENTS}")
endif()
if(NOT ECM_FPPC_RESULT_VAR)
message(FATAL_ERROR "Missing RESULT_VAR argument to ecm_find_package_parse_components")
endif()
if(NOT ECM_FPPC_KNOWN_COMPONENTS)
message(FATAL_ERROR "Missing KNOWN_COMPONENTS argument to ecm_find_package_parse_components")
endif()
if(NOT ECM_FPPC_DEFAULT_COMPONENTS)
set(ECM_FPPC_DEFAULT_COMPONENTS ${ECM_FPPC_KNOWN_COMPONENTS})
endif()
if(${module_name}_FIND_COMPONENTS)
set(ecm_fppc_requestedComps ${${module_name}_FIND_COMPONENTS})
if(NOT ECM_FPPC_SKIP_DEPENDENCY_HANDLING)
# Make sure deps are included
foreach(ecm_fppc_comp ${ecm_fppc_requestedComps})
foreach(ecm_fppc_dep_comp ${${module_name}_${ecm_fppc_comp}_component_deps})
list(FIND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}" ecm_fppc_index)
if("${ecm_fppc_index}" STREQUAL "-1")
if(NOT ${module_name}_FIND_QUIETLY)
message(STATUS "${module_name}: ${ecm_fppc_comp} requires ${${module_name}_${ecm_fppc_comp}_component_deps}")
endif()
list(APPEND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}")
endif()
endforeach()
endforeach()
else()
message(STATUS "Skipping dependency handling for ${module_name}")
endif()
list(REMOVE_DUPLICATES ecm_fppc_requestedComps)
# This makes sure components are listed in the same order as
# KNOWN_COMPONENTS (potentially important for inter-dependencies)
set(${ECM_FPPC_RESULT_VAR})
foreach(ecm_fppc_comp ${ECM_FPPC_KNOWN_COMPONENTS})
list(FIND ecm_fppc_requestedComps "${ecm_fppc_comp}" ecm_fppc_index)
if(NOT "${ecm_fppc_index}" STREQUAL "-1")
list(APPEND ${ECM_FPPC_RESULT_VAR} "${ecm_fppc_comp}")
list(REMOVE_AT ecm_fppc_requestedComps ${ecm_fppc_index})
endif()
endforeach()
# if there are any left, they are unknown components
if(ecm_fppc_requestedComps)
set(ecm_fppc_msgType STATUS)
if(${module_name}_FIND_REQUIRED)
set(ecm_fppc_msgType FATAL_ERROR)
endif()
if(NOT ${module_name}_FIND_QUIETLY)
message(${ecm_fppc_msgType} "${module_name}: requested unknown components ${ecm_fppc_requestedComps}")
endif()
return()
endif()
else()
set(${ECM_FPPC_RESULT_VAR} ${ECM_FPPC_DEFAULT_COMPONENTS})
endif()
endmacro()
macro(ecm_find_package_handle_library_components module_name)
set(ecm_fpwc_options SKIP_PKG_CONFIG SKIP_DEPENDENCY_HANDLING)
set(ecm_fpwc_oneValueArgs)
set(ecm_fpwc_multiValueArgs COMPONENTS)
cmake_parse_arguments(ECM_FPWC "${ecm_fpwc_options}" "${ecm_fpwc_oneValueArgs}" "${ecm_fpwc_multiValueArgs}" ${ARGN})
if(ECM_FPWC_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unexpected arguments to ecm_find_package_handle_components: ${ECM_FPWC_UNPARSED_ARGUMENTS}")
endif()
if(NOT ECM_FPWC_COMPONENTS)
message(FATAL_ERROR "Missing COMPONENTS argument to ecm_find_package_handle_components")
endif()
include(FindPackageHandleStandardArgs)
find_package(PkgConfig QUIET)
foreach(ecm_fpwc_comp ${ECM_FPWC_COMPONENTS})
set(ecm_fpwc_dep_vars)
set(ecm_fpwc_dep_targets)
if(NOT SKIP_DEPENDENCY_HANDLING)
foreach(ecm_fpwc_dep ${${module_name}_${ecm_fpwc_comp}_component_deps})
list(APPEND ecm_fpwc_dep_vars "${module_name}_${ecm_fpwc_dep}_FOUND")
list(APPEND ecm_fpwc_dep_targets "${module_name}::${ecm_fpwc_dep}")
endforeach()
endif()
if(NOT ECM_FPWC_SKIP_PKG_CONFIG AND ${module_name}_${ecm_fpwc_comp}_pkg_config)
pkg_check_modules(PKG_${module_name}_${ecm_fpwc_comp} QUIET
${${module_name}_${ecm_fpwc_comp}_pkg_config})
endif()
find_path(${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
NAMES ${${module_name}_${ecm_fpwc_comp}_header}
HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_INCLUDE_DIRS}
PATH_SUFFIXES ${${module_name}_${ecm_fpwc_comp}_header_subdir}
)
find_library(${module_name}_${ecm_fpwc_comp}_LIBRARY
NAMES ${${module_name}_${ecm_fpwc_comp}_lib}
HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_LIBRARY_DIRS}
)
set(${module_name}_${ecm_fpwc_comp}_VERSION "${PKG_${module_name}_${ecm_fpwc_comp}_VERSION}")
if(NOT ${module_name}_VERSION)
set(${module_name}_VERSION ${${module_name}_${ecm_fpwc_comp}_VERSION})
endif()
set(_name_mismatched_arg)
if(NOT CMAKE_VERSION VERSION_LESS 3.17)
set(_name_mismatched_arg NAME_MISMATCHED)
endif()
find_package_handle_standard_args(${module_name}_${ecm_fpwc_comp}
FOUND_VAR
${module_name}_${ecm_fpwc_comp}_FOUND
REQUIRED_VARS
${module_name}_${ecm_fpwc_comp}_LIBRARY
${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
${ecm_fpwc_dep_vars}
VERSION_VAR
${module_name}_${ecm_fpwc_comp}_VERSION
${_name_mismatched_arg}
)
mark_as_advanced(
${module_name}_${ecm_fpwc_comp}_LIBRARY
${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
)
if(${module_name}_${ecm_fpwc_comp}_FOUND)
list(APPEND ${module_name}_LIBRARIES
"${${module_name}_${ecm_fpwc_comp}_LIBRARY}")
list(APPEND ${module_name}_INCLUDE_DIRS
"${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}")
set(${module_name}_DEFINITIONS
${${module_name}_DEFINITIONS}
${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS})
if(NOT TARGET ${module_name}::${ecm_fpwc_comp})
add_library(${module_name}::${ecm_fpwc_comp} UNKNOWN IMPORTED)
set_target_properties(${module_name}::${ecm_fpwc_comp} PROPERTIES
IMPORTED_LOCATION "${${module_name}_${ecm_fpwc_comp}_LIBRARY}"
INTERFACE_COMPILE_OPTIONS "${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS}"
INTERFACE_INCLUDE_DIRECTORIES "${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}"
INTERFACE_LINK_LIBRARIES "${ecm_fpwc_dep_targets}"
)
endif()
list(APPEND ${module_name}_TARGETS
"${module_name}::${ecm_fpwc_comp}")
endif()
endforeach()
if(${module_name}_LIBRARIES)
list(REMOVE_DUPLICATES ${module_name}_LIBRARIES)
endif()
if(${module_name}_INCLUDE_DIRS)
list(REMOVE_DUPLICATES ${module_name}_INCLUDE_DIRS)
endif()
if(${module_name}_DEFINITIONS)
list(REMOVE_DUPLICATES ${module_name}_DEFINITIONS)
endif()
if(${module_name}_TARGETS)
list(REMOVE_DUPLICATES ${module_name}_TARGETS)
endif()
endmacro()

View File

@@ -0,0 +1,170 @@
#.rst:
# FindWaylandScanner
# ------------------
#
# Try to find wayland-scanner.
#
# If the wayland-scanner executable is not in your PATH, you can provide
# an alternative name or full path location with the ``WaylandScanner_EXECUTABLE``
# variable.
#
# This will define the following variables:
#
# ``WaylandScanner_FOUND``
# True if wayland-scanner is available.
#
# ``WaylandScanner_EXECUTABLE``
# The wayland-scanner executable.
#
# If ``WaylandScanner_FOUND`` is TRUE, it will also define the following imported
# target:
#
# ``Wayland::Scanner``
# The wayland-scanner executable.
#
# This module provides the following functions to generate C protocol
# implementations:
#
# - ``ecm_add_wayland_client_protocol``
# - ``ecm_add_wayland_server_protocol``
#
# ::
#
# ecm_add_wayland_client_protocol(<source_files_var>
# PROTOCOL <xmlfile>
# BASENAME <basename>)
#
# Generate Wayland client protocol files from ``<xmlfile>`` XML
# definition for the ``<basename>`` interface and append those files
# to ``<source_files_var>``.
#
# ::
#
# ecm_add_wayland_server_protocol(<source_files_var>
# PROTOCOL <xmlfile>
# BASENAME <basename>)
#
# Generate Wayland server protocol files from ``<xmlfile>`` XML
# definition for the ``<basename>`` interface and append those files
# to ``<source_files_var>``.
#
# Since 1.4.0.
#=============================================================================
# Copyright 2012-2014 Pier Luigi Fiorini <pierluigi.fiorini@gmail.com>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=============================================================================
include(${CMAKE_CURRENT_LIST_DIR}/ECMFindModuleHelpers.cmake)
ecm_find_package_version_check(WaylandScanner)
# Find wayland-scanner
find_program(WaylandScanner_EXECUTABLE NAMES wayland-scanner)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(WaylandScanner
FOUND_VAR
WaylandScanner_FOUND
REQUIRED_VARS
WaylandScanner_EXECUTABLE
)
mark_as_advanced(WaylandScanner_EXECUTABLE)
if(NOT TARGET Wayland::Scanner AND WaylandScanner_FOUND)
add_executable(Wayland::Scanner IMPORTED)
set_target_properties(Wayland::Scanner PROPERTIES
IMPORTED_LOCATION "${WaylandScanner_EXECUTABLE}"
)
endif()
include(FeatureSummary)
set_package_properties(WaylandScanner PROPERTIES
URL "https://wayland.freedesktop.org/"
DESCRIPTION "Executable that converts XML protocol files to C code"
)
function(ecm_add_wayland_client_protocol out_var)
# Parse arguments
set(oneValueArgs PROTOCOL BASENAME)
cmake_parse_arguments(ARGS "" "${oneValueArgs}" "" ${ARGN})
if(ARGS_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown keywords given to ecm_add_wayland_client_protocol(): \"${ARGS_UNPARSED_ARGUMENTS}\"")
endif()
get_filename_component(_infile ${ARGS_PROTOCOL} ABSOLUTE)
set(_client_header "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-client-protocol.h")
set(_code "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-protocol.c")
set_source_files_properties(${_client_header} GENERATED)
set_source_files_properties(${_code} GENERATED)
set_property(SOURCE ${_client_header} PROPERTY SKIP_AUTOMOC ON)
add_custom_command(OUTPUT "${_client_header}"
COMMAND ${WaylandScanner_EXECUTABLE} client-header ${_infile} ${_client_header}
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile}
VERBATIM
)
add_custom_command(OUTPUT "${_code}"
COMMAND ${WaylandScanner_EXECUTABLE} private-code ${_infile} ${_code}
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile} ${_client_header}
VERBATIM
)
list(APPEND ${out_var} "${_client_header}" "${_code}")
set(${out_var} ${${out_var}} PARENT_SCOPE)
endfunction()
function(ecm_add_wayland_server_protocol out_var)
# Parse arguments
set(oneValueArgs PROTOCOL BASENAME)
cmake_parse_arguments(ARGS "" "${oneValueArgs}" "" ${ARGN})
if(ARGS_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown keywords given to ecm_add_wayland_server_protocol(): \"${ARGS_UNPARSED_ARGUMENTS}\"")
endif()
ecm_add_wayland_client_protocol(${out_var}
PROTOCOL ${ARGS_PROTOCOL}
BASENAME ${ARGS_BASENAME})
get_filename_component(_infile ${ARGS_PROTOCOL} ABSOLUTE)
set(_server_header "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-server-protocol.h")
set_property(SOURCE ${_server_header} PROPERTY SKIP_AUTOMOC ON)
set_source_files_properties(${_server_header} GENERATED)
add_custom_command(OUTPUT "${_server_header}"
COMMAND ${WaylandScanner_EXECUTABLE} server-header ${_infile} ${_server_header}
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile}
VERBATIM
)
list(APPEND ${out_var} "${_server_header}")
set(${out_var} ${${out_var}} PARENT_SCOPE)
endfunction()

View File

@@ -0,0 +1,56 @@
if (NOT NO_ISA_EXTENSIONS)
include(CheckCXXCompilerFlag)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
CHECK_CXX_COMPILER_FLAG("-mcpu=native" COMPILER_SUPPORTS_MCPU_NATIVE)
if(COMPILER_SUPPORTS_MARCH_NATIVE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=native")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=native")
endif()
else()
CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
if(COMPILER_SUPPORTS_MARCH_NATIVE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
endif()
endif()
if(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2")
endif()
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT LEGACY)
set(USE_WAYLAND ON)
else()
set(USE_WAYLAND OFF)
endif()
if(WIN32)
add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN)
add_compile_options(/MP)
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
endif()
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
endif()
if(EMSCRIPTEN)
add_compile_options(-pthread)
add_link_options(-pthread)
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT EMSCRIPTEN)
find_program(MOLD_LINKER mold)
if(MOLD_LINKER)
set(CMAKE_LINKER_TYPE "MOLD")
endif()
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-eliminate-unused-debug-types")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-eliminate-unused-debug-types")
endif()
endif()
file(GENERATE OUTPUT .gitignore CONTENT "*")

View File

@@ -0,0 +1,39 @@
set(TRACY_COMMON_DIR ${CMAKE_CURRENT_LIST_DIR}/../public/common)
set(TRACY_COMMON_SOURCES
tracy_lz4.cpp
tracy_lz4hc.cpp
TracySocket.cpp
TracyStackFrames.cpp
TracySystem.cpp
)
list(TRANSFORM TRACY_COMMON_SOURCES PREPEND "${TRACY_COMMON_DIR}/")
set(TRACY_SERVER_DIR ${CMAKE_CURRENT_LIST_DIR}/../server)
set(TRACY_SERVER_SOURCES
TracyMemory.cpp
TracyMmap.cpp
TracyPrint.cpp
TracySysUtil.cpp
TracyTaskDispatch.cpp
TracyTextureCompression.cpp
TracyThreadCompress.cpp
TracyWorker.cpp
)
list(TRANSFORM TRACY_SERVER_SOURCES PREPEND "${TRACY_SERVER_DIR}/")
add_library(TracyServer STATIC ${TRACY_COMMON_SOURCES} ${TRACY_SERVER_SOURCES})
target_include_directories(TracyServer PUBLIC ${TRACY_COMMON_DIR} ${TRACY_SERVER_DIR})
target_link_libraries(TracyServer PUBLIC TracyCapstone TracyZstd)
if(NO_STATISTICS)
target_compile_definitions(TracyServer PUBLIC TRACY_NO_STATISTICS)
endif()
if(NOT NO_PARALLEL_STL AND UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
target_link_libraries(TracyServer PRIVATE TracyTbb)
endif()

View File

@@ -0,0 +1,238 @@
# Vendor Specific CMake
# The Tracy project keeps most vendor source locally
set (ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}/../")
# Dependencies are taken from the system first and if not found, they are pulled with CPM and built from source
include(FindPkgConfig)
include(${CMAKE_CURRENT_LIST_DIR}/CPM.cmake)
option(DOWNLOAD_CAPSTONE "Force download capstone" ON)
option(DOWNLOAD_GLFW "Force download glfw" OFF)
option(DOWNLOAD_FREETYPE "Force download freetype" OFF)
# capstone
pkg_check_modules(CAPSTONE capstone)
if(CAPSTONE_FOUND AND NOT DOWNLOAD_CAPSTONE)
message(STATUS "Capstone found: ${CAPSTONE}")
add_library(TracyCapstone INTERFACE)
target_include_directories(TracyCapstone INTERFACE ${CAPSTONE_INCLUDE_DIRS})
target_link_libraries(TracyCapstone INTERFACE ${CAPSTONE_LINK_LIBRARIES})
else()
CPMAddPackage(
NAME capstone
GITHUB_REPOSITORY capstone-engine/capstone
GIT_TAG 5.0.3
)
add_library(TracyCapstone INTERFACE)
target_include_directories(TracyCapstone INTERFACE ${capstone_SOURCE_DIR}/include/capstone)
target_link_libraries(TracyCapstone INTERFACE capstone)
endif()
# GLFW
if(NOT USE_WAYLAND AND NOT EMSCRIPTEN)
pkg_check_modules(GLFW glfw3)
if (GLFW_FOUND AND NOT DOWNLOAD_GLFW)
add_library(TracyGlfw3 INTERFACE)
target_include_directories(TracyGlfw3 INTERFACE ${GLFW_INCLUDE_DIRS})
target_link_libraries(TracyGlfw3 INTERFACE ${GLFW_LINK_LIBRARIES})
else()
CPMAddPackage(
NAME glfw
GITHUB_REPOSITORY glfw/glfw
GIT_TAG 3.4
OPTIONS
"GLFW_BUILD_EXAMPLES OFF"
"GLFW_BUILD_TESTS OFF"
"GLFW_BUILD_DOCS OFF"
"GLFW_INSTALL OFF"
)
add_library(TracyGlfw3 INTERFACE)
target_link_libraries(TracyGlfw3 INTERFACE glfw)
endif()
endif()
# freetype
pkg_check_modules(FREETYPE freetype2)
if (FREETYPE_FOUND AND NOT DOWNLOAD_FREETYPE)
add_library(TracyFreetype INTERFACE)
target_include_directories(TracyFreetype INTERFACE ${FREETYPE_INCLUDE_DIRS})
target_link_libraries(TracyFreetype INTERFACE ${FREETYPE_LINK_LIBRARIES})
else()
CPMAddPackage(
NAME freetype
GITHUB_REPOSITORY freetype/freetype
GIT_TAG VER-2-13-2
OPTIONS
"FT_DISABLE_HARFBUZZ ON"
"FT_WITH_HARFBUZZ OFF"
)
add_library(TracyFreetype INTERFACE)
target_link_libraries(TracyFreetype INTERFACE freetype)
endif()
# zstd
set(ZSTD_DIR "${ROOT_DIR}/zstd")
set(ZSTD_SOURCES
decompress/zstd_ddict.c
decompress/zstd_decompress_block.c
decompress/huf_decompress.c
decompress/zstd_decompress.c
common/zstd_common.c
common/error_private.c
common/xxhash.c
common/entropy_common.c
common/debug.c
common/threading.c
common/pool.c
common/fse_decompress.c
compress/zstd_ldm.c
compress/zstd_compress_superblock.c
compress/zstd_opt.c
compress/zstd_compress_sequences.c
compress/fse_compress.c
compress/zstd_double_fast.c
compress/zstd_compress.c
compress/zstd_compress_literals.c
compress/hist.c
compress/zstdmt_compress.c
compress/zstd_lazy.c
compress/huf_compress.c
compress/zstd_fast.c
dictBuilder/zdict.c
dictBuilder/cover.c
dictBuilder/divsufsort.c
dictBuilder/fastcover.c
)
list(TRANSFORM ZSTD_SOURCES PREPEND "${ZSTD_DIR}/")
set_property(SOURCE ${ZSTD_DIR}/decompress/huf_decompress_amd64.S APPEND PROPERTY COMPILE_OPTIONS "-x" "assembler-with-cpp")
add_library(TracyZstd STATIC ${ZSTD_SOURCES})
target_include_directories(TracyZstd PUBLIC ${ZSTD_DIR})
target_compile_definitions(TracyZstd PRIVATE ZSTD_DISABLE_ASM)
# Diff Template Library
set(DTL_DIR "${ROOT_DIR}/dtl")
file(GLOB_RECURSE DTL_HEADERS CONFIGURE_DEPENDS RELATIVE ${DTL_DIR} "*.hpp")
add_library(TracyDtl INTERFACE)
target_sources(TracyDtl INTERFACE ${DTL_HEADERS})
target_include_directories(TracyDtl INTERFACE ${DTL_DIR})
# Get Opt
set(GETOPT_DIR "${ROOT_DIR}/getopt")
set(GETOPT_SOURCES ${GETOPT_DIR}/getopt.c)
set(GETOPT_HEADERS ${GETOPT_DIR}/getopt.h)
add_library(TracyGetOpt STATIC ${GETOPT_SOURCES} ${GETOPT_HEADERS})
target_include_directories(TracyGetOpt PUBLIC ${GETOPT_DIR})
# ImGui
set(IMGUI_DIR "${ROOT_DIR}/imgui")
set(IMGUI_SOURCES
imgui_widgets.cpp
imgui_draw.cpp
imgui_demo.cpp
imgui.cpp
imgui_tables.cpp
misc/freetype/imgui_freetype.cpp
)
list(TRANSFORM IMGUI_SOURCES PREPEND "${IMGUI_DIR}/")
add_definitions(-DIMGUI_ENABLE_FREETYPE)
add_library(TracyImGui STATIC ${IMGUI_SOURCES})
target_include_directories(TracyImGui PUBLIC ${IMGUI_DIR})
target_link_libraries(TracyImGui PUBLIC TracyFreetype)
# NFD
if (NOT NO_FILESELECTOR AND NOT EMSCRIPTEN)
set(NFD_DIR "${ROOT_DIR}/nfd")
if (WIN32)
set(NFD_SOURCES "${NFD_DIR}/nfd_win.cpp")
elseif (APPLE)
set(NFD_SOURCES "${NFD_DIR}/nfd_cocoa.m")
else()
if (GTK_FILESELECTOR)
set(NFD_SOURCES "${NFD_DIR}/nfd_gtk.cpp")
else()
set(NFD_SOURCES "${NFD_DIR}/nfd_portal.cpp")
endif()
endif()
file(GLOB_RECURSE NFD_HEADERS CONFIGURE_DEPENDS RELATIVE ${NFD_DIR} "*.h")
add_library(TracyNfd STATIC ${NFD_SOURCES} ${NFD_HEADERS})
target_include_directories(TracyNfd PUBLIC ${NFD_DIR})
if (APPLE)
find_library(APPKIT_LIBRARY AppKit)
find_library(UNIFORMTYPEIDENTIFIERS_LIBRARY UniformTypeIdentifiers)
target_link_libraries(TracyNfd PUBLIC ${APPKIT_LIBRARY} ${UNIFORMTYPEIDENTIFIERS_LIBRARY})
elseif (UNIX)
if (GTK_FILESELECTOR)
pkg_check_modules(GTK3 gtk+-3.0)
if (NOT GTK3_FOUND)
message(FATAL_ERROR "GTK3 not found. Please install it or set TRACY_GTK_FILESELECTOR to OFF.")
endif()
add_library(TracyGtk3 INTERFACE)
target_include_directories(TracyGtk3 INTERFACE ${GTK3_INCLUDE_DIRS})
target_link_libraries(TracyGtk3 INTERFACE ${GTK3_LINK_LIBRARIES})
target_link_libraries(TracyNfd PUBLIC TracyGtk3)
else()
pkg_check_modules(DBUS dbus-1)
if (NOT DBUS_FOUND)
message(FATAL_ERROR "D-Bus not found. Please install it or set TRACY_GTK_FILESELECTOR to ON.")
endif()
add_library(TracyDbus INTERFACE)
target_include_directories(TracyDbus INTERFACE ${DBUS_INCLUDE_DIRS})
target_link_libraries(TracyDbus INTERFACE ${DBUS_LINK_LIBRARIES})
target_link_libraries(TracyNfd PUBLIC TracyDbus)
endif()
endif()
endif()
# TBB
if (NO_PARALLEL_STL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNO_PARALLEL_SORT")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_PARALLEL_SORT")
else()
if (UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
# Tracy does not use TBB directly, but the implementation of parallel algorithms
# in some versions of libstdc++ depends on TBB. When it does, you must
# explicitly link against -ltbb.
#
# Some distributions have pgk-config files for TBB, others don't.
pkg_check_modules(TBB tbb)
if (TBB_FOUND)
add_library(TracyTbb INTERFACE)
target_include_directories(TracyTbb INTERFACE ${TBB_INCLUDE_DIRS})
target_link_libraries(TracyTbb INTERFACE ${TBB_LINK_LIBRARIES})
else()
CPMAddPackage(
NAME tbb
GITHUB_REPOSITORY oneapi-src/oneTBB
GIT_TAG v2021.12.0-rc2
OPTIONS "TBB_TEST OFF"
)
add_library(TracyTbb INTERFACE)
target_link_libraries(TracyTbb INTERFACE tbb)
endif()
endif()
endif()

View File

@@ -0,0 +1,24 @@
cmake_minimum_required(VERSION 3.10)
message("Parsing public/common/TracyVersion.hpp file")
file(READ "${CMAKE_CURRENT_LIST_DIR}/../public/common/TracyVersion.hpp" version)
# Note: This looks for a specific pattern in TracyVersion.hpp, if it changes
# this needs updating.
string(REGEX MATCH "Major = ([0-9]+)" _ ${version})
# This works do to the above () subexpression selection. See
# https://cmake.org/cmake/help/latest/command/string.html#regex-match for more
# details
set(TRACY_VERSION_MAJOR ${CMAKE_MATCH_1})
string(REGEX MATCH "Minor = ([0-9]+)" _ ${version})
set(TRACY_VERSION_MINOR ${CMAKE_MATCH_1})
string(REGEX MATCH "Patch = ([0-9]+)" _ ${version})
set(TRACY_VERSION_PATCH ${CMAKE_MATCH_1})
set(TRACY_VERSION_STRING "${TRACY_VERSION_MAJOR}.${TRACY_VERSION_MINOR}.${TRACY_VERSION_PATCH}")
message("VERSION ${TRACY_VERSION_STRING}")

View File

@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
set(NO_STATISTICS OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
set(CMAKE_CXX_STANDARD 20)
project(
tracy-csvexport
LANGUAGES C CXX
VERSION ${TRACY_VERSION_STRING}
)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/config.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/vendor.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/server.cmake)
set(PROGRAM_FILES
src/csvexport.cpp
)
add_executable(${PROJECT_NAME} ${PROGRAM_FILES} ${COMMON_FILES} ${SERVER_FILES})
target_link_libraries(${PROJECT_NAME} PRIVATE TracyServer TracyGetOpt)
set_property(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})

View File

@@ -0,0 +1,353 @@
#ifdef _WIN32
# include <windows.h>
#endif
#include <algorithm>
#include <cctype>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <math.h>
#include <stdio.h>
#include <stdint.h>
#include "../../server/TracyFileRead.hpp"
#include "../../server/TracyWorker.hpp"
#include "../../getopt/getopt.h"
void print_usage_exit(int e)
{
fprintf(stderr, "Extract statistics from a trace to a CSV format\n");
fprintf(stderr, "Usage:\n");
fprintf(stderr, " extract [OPTION...] <trace file>\n");
fprintf(stderr, "\n");
fprintf(stderr, " -h, --help Print usage\n");
fprintf(stderr, " -f, --filter arg Filter zone names (default: "")\n");
fprintf(stderr, " -s, --sep arg CSV separator (default: ,)\n");
fprintf(stderr, " -c, --case Case sensitive filtering\n");
fprintf(stderr, " -e, --self Get self times\n");
fprintf(stderr, " -u, --unwrap Report each zone event\n");
fprintf(stderr, " -m, --messages Report only messages\n");
exit(e);
}
struct Args {
const char* filter;
const char* separator;
const char* trace_file;
bool case_sensitive;
bool self_time;
bool unwrap;
bool unwrapMessages;
};
Args parse_args(int argc, char** argv)
{
if (argc == 1)
{
print_usage_exit(1);
}
Args args = { "", ",", "", false, false, false, false };
struct option long_opts[] = {
{ "help", no_argument, NULL, 'h' },
{ "filter", optional_argument, NULL, 'f' },
{ "sep", optional_argument, NULL, 's' },
{ "case", no_argument, NULL, 'c' },
{ "self", no_argument, NULL, 'e' },
{ "unwrap", no_argument, NULL, 'u' },
{ "messages", no_argument, NULL, 'm' },
{ NULL, 0, NULL, 0 }
};
int c;
while ((c = getopt_long(argc, argv, "hf:s:ceum", long_opts, NULL)) != -1)
{
switch (c)
{
case 'h':
print_usage_exit(0);
break;
case 'f':
args.filter = optarg;
break;
case 's':
args.separator = optarg;
break;
case 'c':
args.case_sensitive = true;
break;
case 'e':
args.self_time = true;
break;
case 'u':
args.unwrap = true;
break;
case 'm':
args.unwrapMessages = true;
break;
default:
print_usage_exit(1);
break;
}
}
if (argc != optind + 1)
{
print_usage_exit(1);
}
args.trace_file = argv[optind];
return args;
}
bool is_substring(
const char* term,
const char* s,
bool case_sensitive = false
){
auto new_term = std::string(term);
auto new_s = std::string(s);
if (!case_sensitive) {
std::transform(
new_term.begin(),
new_term.end(),
new_term.begin(),
[](unsigned char c){ return std::tolower(c); }
);
std::transform(
new_s.begin(),
new_s.end(),
new_s.begin(),
[](unsigned char c){ return std::tolower(c); }
);
}
return new_s.find(new_term) != std::string::npos;
}
const char* get_name(int32_t id, const tracy::Worker& worker)
{
auto& srcloc = worker.GetSourceLocation(id);
return worker.GetString(srcloc.name.active ? srcloc.name : srcloc.function);
}
template <typename T>
std::string join(const T& v, const char* sep) {
std::ostringstream s;
for (const auto& i : v) {
if (&i != &v[0]) {
s << sep;
}
s << i;
}
return s.str();
}
// From TracyView.cpp
int64_t GetZoneChildTimeFast(
const tracy::Worker& worker,
const tracy::ZoneEvent& zone
){
int64_t time = 0;
if( zone.HasChildren() )
{
auto& children = worker.GetZoneChildren( zone.Child() );
if( children.is_magic() )
{
auto& vec = *(tracy::Vector<tracy::ZoneEvent>*)&children;
for( auto& v : vec )
{
assert( v.IsEndValid() );
time += v.End() - v.Start();
}
}
else
{
for( auto& v : children )
{
assert( v->IsEndValid() );
time += v->End() - v->Start();
}
}
}
return time;
}
int main(int argc, char** argv)
{
#ifdef _WIN32
if (!AttachConsole(ATTACH_PARENT_PROCESS))
{
AllocConsole();
SetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), 0x07);
}
#endif
Args args = parse_args(argc, argv);
auto f = std::unique_ptr<tracy::FileRead>(
tracy::FileRead::Open(args.trace_file)
);
if (!f)
{
fprintf(stderr, "Could not open file %s\n", args.trace_file);
return 1;
}
auto worker = tracy::Worker(*f);
if (args.unwrapMessages)
{
const auto& msgs = worker.GetMessages();
if (msgs.size() > 0)
{
std::vector<const char*> columnsForMessages;
columnsForMessages = {
"MessageName", "total_ns"
};
std::string headerForMessages = join(columnsForMessages, args.separator);
printf("%s\n", headerForMessages.data());
for(auto& it : msgs)
{
std::vector<std::string> values(columnsForMessages.size());
values[0] = worker.GetString(it->ref);
values[1] = std::to_string(it->time);
std::string row = join(values, args.separator);
printf("%s\n", row.data());
}
}
else
{
printf("There are currently no messages!\n");
}
return 0;
}
while (!worker.AreSourceLocationZonesReady())
{
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
auto& slz = worker.GetSourceLocationZones();
tracy::Vector<decltype(slz.begin())> slz_selected;
slz_selected.reserve(slz.size());
uint32_t total_cnt = 0;
for(auto it = slz.begin(); it != slz.end(); ++it)
{
if(it->second.total != 0)
{
++total_cnt;
if(args.filter[0] == '\0')
{
slz_selected.push_back_no_space_check(it);
}
else
{
auto name = get_name(it->first, worker);
if(is_substring(args.filter, name, args.case_sensitive))
{
slz_selected.push_back_no_space_check(it);
}
}
}
}
std::vector<const char*> columns;
if (args.unwrap)
{
columns = {
"name", "src_file", "src_line", "ns_since_start", "exec_time_ns", "thread"
};
}
else
{
columns = {
"name", "src_file", "src_line", "total_ns", "total_perc",
"counts", "mean_ns", "min_ns", "max_ns", "std_ns"
};
}
std::string header = join(columns, args.separator);
printf("%s\n", header.data());
const auto last_time = worker.GetLastTime();
for(auto& it : slz_selected)
{
std::vector<std::string> values(columns.size());
values[0] = get_name(it->first, worker);
const auto& srcloc = worker.GetSourceLocation(it->first);
values[1] = worker.GetString(srcloc.file);
values[2] = std::to_string(srcloc.line);
const auto& zone_data = it->second;
if (args.unwrap)
{
int i = 0;
for (const auto& zone_thread_data : zone_data.zones) {
const auto zone_event = zone_thread_data.Zone();
const auto tId = zone_thread_data.Thread();
const auto start = zone_event->Start();
const auto end = zone_event->End();
values[3] = std::to_string(start);
auto timespan = end - start;
if (args.self_time) {
timespan -= GetZoneChildTimeFast(worker, *zone_event);
}
values[4] = std::to_string(timespan);
values[5] = std::to_string(tId);
std::string row = join(values, args.separator);
printf("%s\n", row.data());
}
}
else
{
const auto time = args.self_time ? zone_data.selfTotal : zone_data.total;
values[3] = std::to_string(time);
values[4] = std::to_string(100. * time / last_time);
values[5] = std::to_string(zone_data.zones.size());
const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total)
/ zone_data.zones.size();
values[6] = std::to_string(avg);
const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min;
const auto tmax = args.self_time ? zone_data.selfMax : zone_data.max;
values[7] = std::to_string(tmin);
values[8] = std::to_string(tmax);
const auto sz = zone_data.zones.size();
const auto ss = zone_data.sumSq
- 2. * zone_data.total * avg
+ avg * avg * sz;
double std = 0;
if( sz > 1 )
std = sqrt(ss / (sz - 1));
values[9] = std::to_string(std);
std::string row = join(values, args.separator);
printf("%s\n", row.data());
}
}
return 0;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 213 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 250 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 234 KiB

View File

@@ -0,0 +1,706 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_DIFF_H
#define DTL_DIFF_H
namespace dtl {
/**
* diff class template
* sequence must support random_access_iterator.
*/
template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
class Diff
{
private :
dtl_typedefs(elem, sequence)
sequence A;
sequence B;
size_t M;
size_t N;
size_t delta;
size_t offset;
long long *fp;
long long editDistance;
Lcs< elem > lcs;
Ses< elem > ses;
editPath path;
editPathCordinates pathCordinates;
bool swapped;
bool huge;
bool trivial;
bool editDistanceOnly;
uniHunkVec uniHunks;
comparator cmp;
long long ox;
long long oy;
public :
Diff () {}
Diff (const sequence& a,
const sequence& b) : A(a), B(b), ses(false) {
init();
}
Diff (const sequence& a,
const sequence& b,
bool deletesFirst) : A(a), B(b), ses(deletesFirst) {
init();
}
Diff (const sequence& a,
const sequence& b,
const comparator& comp) : A(a), B(b), ses(false), cmp(comp) {
init();
}
Diff (const sequence& a,
const sequence& b,
bool deleteFirst,
const comparator& comp) : A(a), B(b), ses(deleteFirst), cmp(comp) {
init();
}
~Diff() {}
long long getEditDistance () const {
return editDistance;
}
Lcs< elem > getLcs () const {
return lcs;
}
elemVec getLcsVec () const {
return lcs.getSequence();
}
Ses< elem > getSes () const {
return ses;
}
uniHunkVec getUniHunks () const {
return uniHunks;
}
/* These should be deprecated */
bool isHuge () const {
return huge;
}
void onHuge () {
this->huge = true;
}
void offHuge () {
this->huge = false;
}
bool isUnserious () const {
return trivial;
}
void onUnserious () {
this->trivial = true;
}
void offUnserious () {
this->trivial = false;
}
void onOnlyEditDistance () {
this->editDistanceOnly = true;
}
/* These are the replacements for the above */
bool hugeEnabled () const {
return huge;
}
void enableHuge () {
this->huge = true;
}
void disableHuge () {
this->huge = false;
}
bool trivialEnabled () const {
return trivial;
}
void enableTrivial () {
this->trivial = true;
}
void disableTrivial () {
this->trivial = false;
}
void editDistanceOnlyEnabled () {
this->editDistanceOnly = true;
}
/**
* patching with Unified Format Hunks
*/
sequence uniPatch (const sequence& seq) {
elemList seqLst(seq.begin(), seq.end());
sesElemVec shunk;
sesElemVec_iter vsesIt;
elemList_iter lstIt = seqLst.begin();
long long inc_dec_total = 0;
long long gap = 1;
for (uniHunkVec_iter it=uniHunks.begin();it!=uniHunks.end();++it) {
joinSesVec(shunk, it->common[0]);
joinSesVec(shunk, it->change);
joinSesVec(shunk, it->common[1]);
it->a += inc_dec_total;
inc_dec_total += it->inc_dec_count;
for (long long i=0;i<it->a - gap;++i) {
++lstIt;
}
gap = it->a + it->b + it->inc_dec_count;
vsesIt = shunk.begin();
while (vsesIt!=shunk.end()) {
switch (vsesIt->second.type) {
case SES_ADD :
seqLst.insert(lstIt, vsesIt->first);
break;
case SES_DELETE :
if (lstIt != seqLst.end()) {
lstIt = seqLst.erase(lstIt);
}
break;
case SES_COMMON :
if (lstIt != seqLst.end()) {
++lstIt;
}
break;
default :
// no fall-through
break;
}
++vsesIt;
}
shunk.clear();
}
sequence patchedSeq(seqLst.begin(), seqLst.end());
return patchedSeq;
}
/**
* patching with Shortest Edit Script (SES)
*/
sequence patch (const sequence& seq) const {
sesElemVec sesSeq = ses.getSequence();
elemList seqLst(seq.begin(), seq.end());
elemList_iter lstIt = seqLst.begin();
for (sesElemVec_iter sesIt=sesSeq.begin();sesIt!=sesSeq.end();++sesIt) {
switch (sesIt->second.type) {
case SES_ADD :
seqLst.insert(lstIt, sesIt->first);
break;
case SES_DELETE :
lstIt = seqLst.erase(lstIt);
break;
case SES_COMMON :
++lstIt;
break;
default :
// no through
break;
}
}
sequence patchedSeq(seqLst.begin(), seqLst.end());
return patchedSeq;
}
/**
* compose Longest Common Subsequence and Shortest Edit Script.
* The algorithm implemented here is based on "An O(NP) Sequence Comparison Algorithm"
* described by Sun Wu, Udi Manber and Gene Myers
*/
void compose() {
if (isHuge()) {
pathCordinates.reserve(MAX_CORDINATES_SIZE);
}
ox = 0;
oy = 0;
long long p = -1;
fp = new long long[M + N + 3];
fill(&fp[0], &fp[M + N + 3], -1);
path = editPath(M + N + 3);
fill(path.begin(), path.end(), -1);
ONP:
do {
++p;
for (long long k=-p;k<=static_cast<long long>(delta)-1;++k) {
fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
}
for (long long k=static_cast<long long>(delta)+p;k>=static_cast<long long>(delta)+1;--k) {
fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
}
fp[delta+offset] = snake(static_cast<long long>(delta), fp[delta-1+offset]+1, fp[delta+1+offset]);
} while (fp[delta+offset] != static_cast<long long>(N) && pathCordinates.size() < MAX_CORDINATES_SIZE);
editDistance += static_cast<long long>(delta) + 2 * p;
long long r = path[delta+offset];
P cordinate;
editPathCordinates epc(0);
// recording edit distance only
if (editDistanceOnly) {
delete[] this->fp;
return;
}
while(r != -1) {
cordinate.x = pathCordinates[(size_t)r].x;
cordinate.y = pathCordinates[(size_t)r].y;
epc.push_back(cordinate);
r = pathCordinates[(size_t)r].k;
}
// record Longest Common Subsequence & Shortest Edit Script
if (!recordSequence(epc)) {
pathCordinates.resize(0);
epc.resize(0);
p = -1;
goto ONP;
}
delete[] this->fp;
}
/**
* print difference between A and B as an SES
*/
template < typename stream >
void printSES (stream& out) const {
sesElemVec ses_v = ses.getSequence();
for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
}
void printSES (ostream& out = cout) const {
printSES< ostream >(out);
}
/**
* print differences given an SES
*/
template < typename stream >
static void printSES (const Ses< elem >& s, stream& out) {
sesElemVec ses_v = s.getSequence();
for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
}
static void printSES (const Ses< elem >& s, ostream& out = cout) {
printSES< ostream >(s, out);
}
/**
* print difference between A and B as an SES with custom printer
*/
template < typename stream, template < typename SEET, typename STRT > class PT >
void printSES (stream& out) const {
sesElemVec ses_v = ses.getSequence ();
for_each (ses_v.begin (), ses_v.end(), PT < sesElem, stream > (out));
}
/**
* store difference between A and B as an SES with custom storage
*/
template < typename storedData, template < typename SEET, typename STRT > class ST >
void storeSES(storedData& sd) const {
sesElemVec ses_v = ses.getSequence();
for_each(ses_v.begin(), ses_v.end(), ST < sesElem, storedData >(sd));
}
/**
* print difference between A and B in the Unified Format
*/
template < typename stream >
void printUnifiedFormat (stream& out) const {
for_each(uniHunks.begin(), uniHunks.end(), UniHunkPrinter< sesElem, stream >(out));
}
void printUnifiedFormat (ostream& out = cout) const {
printUnifiedFormat< ostream >(out);
}
/**
* print unified format difference with given unified format hunks
*/
template < typename stream >
static void printUnifiedFormat (const uniHunkVec& hunks, stream& out) {
for_each(hunks.begin(), hunks.end(), UniHunkPrinter< sesElem >(out));
}
static void printUnifiedFormat (const uniHunkVec& hunks, ostream& out = cout) {
printUnifiedFormat< ostream >(hunks, out);
}
/**
* compose Unified Format Hunks from Shortest Edit Script
*/
void composeUnifiedHunks () {
sesElemVec common[2];
sesElemVec change;
sesElemVec ses_v = ses.getSequence();
long long l_cnt = 1;
long long length = distance(ses_v.begin(), ses_v.end());
long long middle = 0;
bool isMiddle, isAfter;
elemInfo einfo;
long long a, b, c, d; // @@ -a,b +c,d @@
long long inc_dec_count = 0;
uniHunk< sesElem > hunk;
sesElemVec adds;
sesElemVec deletes;
isMiddle = isAfter = false;
a = b = c = d = 0;
for (sesElemVec_iter it=ses_v.begin();it!=ses_v.end();++it, ++l_cnt) {
einfo = it->second;
switch (einfo.type) {
case SES_ADD :
middle = 0;
++inc_dec_count;
adds.push_back(*it);
if (!isMiddle) isMiddle = true;
if (isMiddle) ++d;
if (l_cnt >= length) {
joinSesVec(change, deletes);
joinSesVec(change, adds);
isAfter = true;
}
break;
case SES_DELETE :
middle = 0;
--inc_dec_count;
deletes.push_back(*it);
if (!isMiddle) isMiddle = true;
if (isMiddle) ++b;
if (l_cnt >= length) {
joinSesVec(change, deletes);
joinSesVec(change, adds);
isAfter = true;
}
break;
case SES_COMMON :
++b;++d;
if (common[1].empty() && adds.empty() && deletes.empty() && change.empty()) {
if (static_cast<long long>(common[0].size()) < DTL_CONTEXT_SIZE) {
if (a == 0 && c == 0) {
if (!wasSwapped()) {
a = einfo.beforeIdx;
c = einfo.afterIdx;
} else {
a = einfo.afterIdx;
c = einfo.beforeIdx;
}
}
common[0].push_back(*it);
} else {
rotate(common[0].begin(), common[0].begin() + 1, common[0].end());
common[0].pop_back();
common[0].push_back(*it);
++a;++c;
--b;--d;
}
}
if (isMiddle && !isAfter) {
++middle;
joinSesVec(change, deletes);
joinSesVec(change, adds);
change.push_back(*it);
if (middle >= DTL_SEPARATE_SIZE || l_cnt >= length) {
isAfter = true;
}
adds.clear();
deletes.clear();
}
break;
default :
// no through
break;
}
// compose unified format hunk
if (isAfter && !change.empty()) {
sesElemVec_iter cit = it;
long long cnt = 0;
for (long long i=0;i<DTL_SEPARATE_SIZE && (cit != ses_v.end());++i, ++cit) {
if (cit->second.type == SES_COMMON) {
++cnt;
}
}
if (cnt < DTL_SEPARATE_SIZE && l_cnt < length) {
middle = 0;
isAfter = false;
continue;
}
if (static_cast<long long>(common[0].size()) >= DTL_SEPARATE_SIZE) {
long long c0size = static_cast<long long>(common[0].size());
rotate(common[0].begin(),
common[0].begin() + (size_t)c0size - DTL_SEPARATE_SIZE,
common[0].end());
for (long long i=0;i<c0size - DTL_SEPARATE_SIZE;++i) {
common[0].pop_back();
}
a += c0size - DTL_SEPARATE_SIZE;
c += c0size - DTL_SEPARATE_SIZE;
}
if (a == 0) ++a;
if (c == 0) ++c;
if (wasSwapped()) swap(a, c);
hunk.a = a;
hunk.b = b;
hunk.c = c;
hunk.d = d;
hunk.common[0] = common[0];
hunk.change = change;
hunk.common[1] = common[1];
hunk.inc_dec_count = inc_dec_count;
uniHunks.push_back(hunk);
isMiddle = false;
isAfter = false;
common[0].clear();
common[1].clear();
adds.clear();
deletes.clear();
change.clear();
a = b = c = d = middle = inc_dec_count = 0;
}
}
}
/**
* compose ses from stream
*/
template <typename stream>
static Ses< elem > composeSesFromStream (stream& st)
{
elem line;
Ses< elem > ret;
long long x_idx, y_idx;
x_idx = y_idx = 1;
while (getline(st, line)) {
elem mark(line.begin(), line.begin() + 1);
elem e(line.begin() + 1, line.end());
if (mark == SES_MARK_DELETE) {
ret.addSequence(e, x_idx, 0, SES_DELETE);
++x_idx;
} else if (mark == SES_MARK_ADD) {
ret.addSequence(e, y_idx, 0, SES_ADD);
++y_idx;
} else if (mark == SES_MARK_COMMON) {
ret.addSequence(e, x_idx, y_idx, SES_COMMON);
++x_idx;
++y_idx;
}
}
return ret;
}
private :
/**
* initialize
*/
void init () {
M = distance(A.begin(), A.end());
N = distance(B.begin(), B.end());
if (M < N) {
swapped = false;
} else {
swap(A, B);
swap(M, N);
swapped = true;
}
editDistance = 0;
delta = N - M;
offset = M + 1;
huge = false;
trivial = false;
editDistanceOnly = false;
fp = NULL;
}
/**
* search shortest path and record the path
*/
long long snake(const long long& k, const long long& above, const long long& below) {
long long r = above > below ? path[(size_t)k-1+offset] : path[(size_t)k+1+offset];
long long y = max(above, below);
long long x = y - k;
while ((size_t)x < M && (size_t)y < N && (swapped ? cmp.impl(B[(size_t)y], A[(size_t)x]) : cmp.impl(A[(size_t)x], B[(size_t)y]))) {
++x;++y;
}
path[(size_t)k+offset] = static_cast<long long>(pathCordinates.size());
if (!editDistanceOnly) {
P p;
p.x = x;p.y = y;p.k = r;
pathCordinates.push_back(p);
}
return y;
}
/**
* record SES and LCS
*/
bool recordSequence (const editPathCordinates& v) {
sequence_const_iter x(A.begin());
sequence_const_iter y(B.begin());
long long x_idx, y_idx; // line number for Unified Format
long long px_idx, py_idx; // cordinates
bool complete = false;
x_idx = y_idx = 1;
px_idx = py_idx = 0;
for (size_t i=v.size()-1;!complete;--i) {
while(px_idx < v[i].x || py_idx < v[i].y) {
if (v[i].y - v[i].x > py_idx - px_idx) {
if (!wasSwapped()) {
ses.addSequence(*y, 0, y_idx + oy, SES_ADD);
} else {
ses.addSequence(*y, y_idx + oy, 0, SES_DELETE);
}
++y;
++y_idx;
++py_idx;
} else if (v[i].y - v[i].x < py_idx - px_idx) {
if (!wasSwapped()) {
ses.addSequence(*x, x_idx + ox, 0, SES_DELETE);
} else {
ses.addSequence(*x, 0, x_idx + ox, SES_ADD);
}
++x;
++x_idx;
++px_idx;
} else {
if (!wasSwapped()) {
lcs.addSequence(*x);
ses.addSequence(*x, x_idx + ox, y_idx + oy, SES_COMMON);
} else {
lcs.addSequence(*y);
ses.addSequence(*y, y_idx + oy, x_idx + ox, SES_COMMON);
}
++x;
++y;
++x_idx;
++y_idx;
++px_idx;
++py_idx;
}
}
if (i == 0) complete = true;
}
if (x_idx > static_cast<long long>(M) && y_idx > static_cast<long long>(N)) {
// all recording succeeded
} else {
// trivial difference
if (trivialEnabled()) {
if (!wasSwapped()) {
recordOddSequence(x_idx, M, x, SES_DELETE);
recordOddSequence(y_idx, N, y, SES_ADD);
} else {
recordOddSequence(x_idx, M, x, SES_ADD);
recordOddSequence(y_idx, N, y, SES_DELETE);
}
return true;
}
// nontrivial difference
sequence A_(A.begin() + (size_t)x_idx - 1, A.end());
sequence B_(B.begin() + (size_t)y_idx - 1, B.end());
A = A_;
B = B_;
M = distance(A.begin(), A.end());
N = distance(B.begin(), B.end());
delta = N - M;
offset = M + 1;
delete[] fp;
fp = new long long[M + N + 3];
fill(&fp[0], &fp[M + N + 3], -1);
fill(path.begin(), path.end(), -1);
ox = x_idx - 1;
oy = y_idx - 1;
return false;
}
return true;
}
/**
* record odd sequence in SES
*/
void inline recordOddSequence (long long idx, long long length, sequence_const_iter it, const edit_t et) {
while(idx < length){
ses.addSequence(*it, idx, 0, et);
++it;
++idx;
++editDistance;
}
ses.addSequence(*it, idx, 0, et);
++editDistance;
}
/**
* join SES vectors
*/
void inline joinSesVec (sesElemVec& s1, sesElemVec& s2) const {
if (!s2.empty()) {
for (sesElemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
s1.push_back(*vit);
}
}
}
/**
* check if the sequences have been swapped
*/
bool inline wasSwapped () const {
return swapped;
}
};
}
#endif // DTL_DIFF_H

View File

@@ -0,0 +1,245 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_DIFF3_H
#define DTL_DIFF3_H
namespace dtl {
/**
* diff3 class template
* sequence must support random_access_iterator.
*/
template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
class Diff3
{
private:
dtl_typedefs(elem, sequence)
sequence A;
sequence B;
sequence C;
sequence S;
Diff< elem, sequence, comparator > diff_ba;
Diff< elem, sequence, comparator > diff_bc;
bool conflict;
elem csepabegin;
elem csepa;
elem csepaend;
public :
Diff3 () {}
Diff3 (const sequence& a,
const sequence& b,
const sequence& c) : A(a), B(b), C(c),
diff_ba(b, a), diff_bc(b, c),
conflict(false) {}
~Diff3 () {}
bool isConflict () const {
return conflict;
}
sequence getMergedSequence () const {
return S;
}
/**
* merge changes B and C into A
*/
bool merge () {
if (diff_ba.getEditDistance() == 0) { // A == B
if (diff_bc.getEditDistance() == 0) { // A == B == C
S = B;
return true;
}
S = C;
return true;
} else { // A != B
if (diff_bc.getEditDistance() == 0) { // A != B == C
S = A;
return true;
} else { // A != B != C
S = merge_();
if (isConflict()) { // conflict occured
return false;
}
}
}
return true;
}
/**
* compose differences
*/
void compose () {
diff_ba.compose();
diff_bc.compose();
}
private :
/**
* merge implementation
*/
sequence merge_ () {
elemVec seq;
Ses< elem > ses_ba = diff_ba.getSes();
Ses< elem > ses_bc = diff_bc.getSes();
sesElemVec ses_ba_v = ses_ba.getSequence();
sesElemVec ses_bc_v = ses_bc.getSequence();
sesElemVec_iter ba_it = ses_ba_v.begin();
sesElemVec_iter bc_it = ses_bc_v.begin();
sesElemVec_iter ba_end = ses_ba_v.end();
sesElemVec_iter bc_end = ses_bc_v.end();
while (!isEnd(ba_end, ba_it) || !isEnd(bc_end, bc_it)) {
while (true) {
if (!isEnd(ba_end, ba_it) &&
!isEnd(bc_end, bc_it) &&
ba_it->first == bc_it->first &&
ba_it->second.type == SES_COMMON &&
bc_it->second.type == SES_COMMON) {
// do nothing
} else {
break;
}
if (!isEnd(ba_end, ba_it)) seq.push_back(ba_it->first);
else if (!isEnd(bc_end, bc_it)) seq.push_back(bc_it->first);
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
}
if (isEnd(ba_end, ba_it) || isEnd(bc_end, bc_it)) break;
if ( ba_it->second.type == SES_COMMON
&& bc_it->second.type == SES_DELETE) {
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else if (ba_it->second.type == SES_COMMON &&
bc_it->second.type == SES_ADD) {
seq.push_back(bc_it->first);
forwardUntilEnd(bc_end, bc_it);
} else if (ba_it->second.type == SES_DELETE &&
bc_it->second.type == SES_COMMON) {
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else if (ba_it->second.type == SES_DELETE &&
bc_it->second.type == SES_DELETE) {
if (ba_it->first == bc_it->first) {
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else {
// conflict
conflict = true;
return B;
}
} else if (ba_it->second.type == SES_DELETE &&
bc_it->second.type == SES_ADD) {
// conflict
conflict = true;
return B;
} else if (ba_it->second.type == SES_ADD &&
bc_it->second.type == SES_COMMON) {
seq.push_back(ba_it->first);
forwardUntilEnd(ba_end, ba_it);
} else if (ba_it->second.type == SES_ADD &&
bc_it->second.type == SES_DELETE) {
// conflict
conflict = true;
return B;
} else if (ba_it->second.type == SES_ADD &&
bc_it->second.type == SES_ADD) {
if (ba_it->first == bc_it->first) {
seq.push_back(ba_it->first);
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else {
// conflict
conflict = true;
return B;
}
}
}
if (isEnd(ba_end, ba_it)) {
addDecentSequence(bc_end, bc_it, seq);
} else if (isEnd(bc_end, bc_it)) {
addDecentSequence(ba_end, ba_it, seq);
}
sequence mergedSeq(seq.begin(), seq.end());
return mergedSeq;
}
/**
* join elem vectors
*/
void inline joinElemVec (elemVec& s1, elemVec& s2) const {
if (!s2.empty()) {
for (elemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
s1.push_back(*vit);
}
}
}
/**
* check if sequence is at end
*/
template <typename T_iter>
bool inline isEnd (const T_iter& end, const T_iter& it) const {
return it == end ? true : false;
}
/**
* increment iterator until iterator is at end
*/
template <typename T_iter>
void inline forwardUntilEnd (const T_iter& end, T_iter& it) const {
if (!isEnd(end, it)) ++it;
}
/**
* add elements whose SES's type is ADD
*/
void inline addDecentSequence (const sesElemVec_iter& end, sesElemVec_iter& it, elemVec& seq) const {
while (!isEnd(end, it)) {
if (it->second.type == SES_ADD) seq.push_back(it->first);
++it;
}
}
};
}
#endif // DTL_DIFF3_H

View File

@@ -0,0 +1,55 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_LCS_H
#define DTL_LCS_H
namespace dtl {
/**
* Longest Common Subsequence template class
*/
template <typename elem>
class Lcs : public Sequence< elem >
{
public :
Lcs () {}
~Lcs () {}
};
}
#endif // DTL_LCS_H

View File

@@ -0,0 +1,65 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_SEQUENCE_H
#define DTL_SEQUENCE_H
namespace dtl {
/**
* sequence class template
*/
template <typename elem>
class Sequence
{
public :
typedef vector< elem > elemVec;
Sequence () {}
virtual ~Sequence () {}
elemVec getSequence () const {
return sequence;
}
void addSequence (elem e) {
sequence.push_back(e);
}
protected :
elemVec sequence;
};
}
#endif // DTL_SEQUENCE_H

View File

@@ -0,0 +1,132 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_SES_H
#define DTL_SES_H
namespace dtl {
/**
* Shortest Edit Script template class
*/
template <typename elem>
class Ses : public Sequence< elem >
{
private :
typedef pair< elem, elemInfo > sesElem;
typedef vector< sesElem > sesElemVec;
public :
Ses () : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(false) {
nextDeleteIdx = 0;
}
Ses (bool moveDel) : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(moveDel) {
nextDeleteIdx = 0;
}
~Ses () {}
bool isOnlyAdd () const {
return onlyAdd;
}
bool isOnlyDelete () const {
return onlyDelete;
}
bool isOnlyCopy () const {
return onlyCopy;
}
bool isOnlyOneOperation () const {
return isOnlyAdd() || isOnlyDelete() || isOnlyCopy();
}
bool isChange () const {
return !onlyCopy;
}
using Sequence< elem >::addSequence;
void addSequence (elem e, long long beforeIdx, long long afterIdx, const edit_t type) {
elemInfo info;
info.beforeIdx = beforeIdx;
info.afterIdx = afterIdx;
info.type = type;
sesElem pe(e, info);
if (!deletesFirst) {
sequence.push_back(pe);
}
switch (type) {
case SES_DELETE:
onlyCopy = false;
onlyAdd = false;
if (deletesFirst) {
sequence.insert(sequence.begin() + nextDeleteIdx, pe);
nextDeleteIdx++;
}
break;
case SES_COMMON:
onlyAdd = false;
onlyDelete = false;
if (deletesFirst) {
sequence.push_back(pe);
nextDeleteIdx = sequence.size();
}
break;
case SES_ADD:
onlyDelete = false;
onlyCopy = false;
if (deletesFirst) {
sequence.push_back(pe);
}
break;
}
}
sesElemVec getSequence () const {
return sequence;
}
private :
sesElemVec sequence;
bool onlyAdd;
bool onlyDelete;
bool onlyCopy;
bool deletesFirst;
size_t nextDeleteIdx;
};
}
#endif // DTL_SES_H

View File

@@ -0,0 +1,47 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DTL_H
#define DTL_H
#include "variables.hpp"
#include "functors.hpp"
#include "Sequence.hpp"
#include "Lcs.hpp"
#include "Ses.hpp"
#include "Diff.hpp"
#include "Diff3.hpp"
#endif // DTL_H

View File

@@ -0,0 +1,151 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_FUNCTORS_H
#define DTL_FUNCTORS_H
namespace dtl {
/**
* printer class template
*/
template <typename sesElem, typename stream = ostream >
class Printer
{
public :
Printer () : out_(cout) {}
Printer (stream& out) : out_(out) {}
virtual ~Printer () {}
virtual void operator() (const sesElem& se) const = 0;
protected :
stream& out_;
};
/**
* common element printer class template
*/
template <typename sesElem, typename stream = ostream >
class CommonPrinter : public Printer < sesElem, stream >
{
public :
CommonPrinter () : Printer < sesElem, stream > () {}
CommonPrinter (stream& out) : Printer < sesElem, stream > (out) {}
~CommonPrinter () {}
void operator() (const sesElem& se) const {
this->out_ << SES_MARK_COMMON << se.first << endl;
}
};
/**
* ses element printer class template
*/
template <typename sesElem, typename stream = ostream >
class ChangePrinter : public Printer < sesElem, stream >
{
public :
ChangePrinter () : Printer < sesElem, stream > () {}
ChangePrinter (stream& out) : Printer < sesElem, stream > (out) {}
~ChangePrinter () {}
void operator() (const sesElem& se) const {
switch (se.second.type) {
case SES_ADD:
this->out_ << SES_MARK_ADD << se.first << endl;
break;
case SES_DELETE:
this->out_ << SES_MARK_DELETE << se.first << endl;
break;
case SES_COMMON:
this->out_ << SES_MARK_COMMON << se.first << endl;
break;
}
}
};
/**
* unified format element printer class template
*/
template <typename sesElem, typename stream = ostream >
class UniHunkPrinter
{
public :
UniHunkPrinter () : out_(cout) {}
UniHunkPrinter (stream& out) : out_(out) {}
~UniHunkPrinter () {}
void operator() (const uniHunk< sesElem >& hunk) const {
out_ << "@@"
<< " -" << hunk.a << "," << hunk.b
<< " +" << hunk.c << "," << hunk.d
<< " @@" << endl;
for_each(hunk.common[0].begin(), hunk.common[0].end(), CommonPrinter< sesElem, stream >(out_));
for_each(hunk.change.begin(), hunk.change.end(), ChangePrinter< sesElem, stream >(out_));
for_each(hunk.common[1].begin(), hunk.common[1].end(), CommonPrinter< sesElem, stream >(out_));
}
private :
stream& out_;
};
/**
* storage class template
*/
template <typename sesElem, typename storedData >
class Storage
{
public:
Storage(storedData& sd) : storedData_(sd) {}
virtual ~Storage() {}
virtual void operator() (const sesElem& se) const = 0;
protected:
storedData& storedData_;
};
/**
* compare class template
*/
template <typename elem>
class Compare
{
public :
Compare () {}
virtual ~Compare () {}
virtual inline bool impl (const elem& e1, const elem& e2) const {
return e1 == e2;
}
};
}
#endif // DTL_FUNCTORS_H

View File

@@ -0,0 +1,142 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_VARIABLES_H
#define DTL_VARIABLES_H
#include <vector>
#include <list>
#include <string>
#include <algorithm>
#include <iostream>
namespace dtl {
using std::vector;
using std::string;
using std::pair;
using std::ostream;
using std::list;
using std::for_each;
using std::distance;
using std::fill;
using std::cout;
using std::endl;
using std::rotate;
using std::swap;
using std::max;
/**
* version string
*/
const string version = "1.20";
/**
* type of edit for SES
*/
typedef int edit_t;
const edit_t SES_DELETE = -1;
const edit_t SES_COMMON = 0;
const edit_t SES_ADD = 1;
/**
* mark of SES
*/
#define SES_MARK_DELETE "-"
#define SES_MARK_COMMON " "
#define SES_MARK_ADD "+"
/**
* info for Unified Format
*/
typedef struct eleminfo {
long long beforeIdx; // index of prev sequence
long long afterIdx; // index of after sequence
edit_t type; // type of edit(Add, Delete, Common)
bool operator==(const eleminfo& other) const{
return (this->beforeIdx == other.beforeIdx && this->afterIdx == other.afterIdx && this->type == other.type);
}
} elemInfo;
const long long DTL_SEPARATE_SIZE = 3;
const long long DTL_CONTEXT_SIZE = 3;
/**
* cordinate for registering route
*/
typedef struct Point {
long long x; // x cordinate
long long y; // y cordinate
long long k; // vertex
} P;
/**
* limit of cordinate size
*/
const unsigned long long MAX_CORDINATES_SIZE = 2000000;
typedef vector< long long > editPath;
typedef vector< P > editPathCordinates;
/**
* Structure of Unified Format Hunk
*/
template <typename sesElem>
struct uniHunk {
long long a, b, c, d; // @@ -a,b +c,d @@
vector< sesElem > common[2]; // anteroposterior commons on changes
vector< sesElem > change; // changes
long long inc_dec_count; // count of increace and decrease
};
#define dtl_typedefs(elem, sequence) \
typedef pair< elem, elemInfo > sesElem; \
typedef vector< sesElem > sesElemVec; \
typedef vector< uniHunk< sesElem > > uniHunkVec; \
typedef list< elem > elemList; \
typedef vector< elem > elemVec; \
typedef typename uniHunkVec::iterator uniHunkVec_iter; \
typedef typename sesElemVec::iterator sesElemVec_iter; \
typedef typename elemList::iterator elemList_iter; \
typedef typename sequence::iterator sequence_iter; \
typedef typename sequence::const_iterator sequence_const_iter; \
typedef typename elemVec::iterator elemVec_iter;
}
#endif // DTL_VARIABLES_H

View File

@@ -0,0 +1,17 @@
cmake_minimum_required(VERSION 3.10)
project(OpenCLVectorAdd)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
find_package(OpenCL REQUIRED)
add_executable(OpenCLVectorAdd OpenCLVectorAdd.cpp)
add_library(TracyClient STATIC ../../public/TracyClient.cpp
../../public/tracy/TracyOpenCL.hpp)
target_include_directories(TracyClient PUBLIC ../../public/tracy)
target_compile_definitions(TracyClient PUBLIC TRACY_ENABLE=1)
target_link_libraries(OpenCLVectorAdd PUBLIC OpenCL::OpenCL TracyClient ${CMAKE_DL_LIBS} Threads::Threads)

View File

@@ -0,0 +1,220 @@
#include <algorithm>
#include <iostream>
#include <cassert>
#include <string>
#include <vector>
#include <numeric>
#include <CL/cl.h>
#include <Tracy.hpp>
#include <TracyOpenCL.hpp>
#define CL_ASSERT(err) \
if((err) != CL_SUCCESS) \
{ \
std::cerr << "OpenCL Call Returned " << err << std::endl; \
assert(false); \
}
const char kernelSource[] =
" void __kernel vectorAdd(global float* C, global float* A, global float* B, int N) "
" { "
" int i = get_global_id(0); "
" if (i < N) { "
" C[i] = A[i] + B[i]; "
" } "
" } ";
int main()
{
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue commandQueue;
cl_kernel vectorAddKernel;
cl_program program;
cl_int err;
cl_mem bufferA, bufferB, bufferC;
TracyCLCtx tracyCLCtx;
{
ZoneScopedN("OpenCL Init");
cl_uint numPlatforms = 0;
CL_ASSERT(clGetPlatformIDs(0, nullptr, &numPlatforms));
if (numPlatforms == 0)
{
std::cerr << "Cannot find OpenCL platform to run this application" << std::endl;
return 1;
}
CL_ASSERT(clGetPlatformIDs(1, &platform, nullptr));
size_t platformNameBufferSize = 0;
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, nullptr, &platformNameBufferSize));
std::string platformName(platformNameBufferSize, '\0');
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, platformNameBufferSize, &platformName[0], nullptr));
std::cout << "OpenCL Platform: " << platformName << std::endl;
CL_ASSERT(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr));
size_t deviceNameBufferSize = 0;
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, nullptr, &deviceNameBufferSize));
std::string deviceName(deviceNameBufferSize, '\0');
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, deviceNameBufferSize, &deviceName[0], nullptr));
std::cout << "OpenCL Device: " << deviceName << std::endl;
err = CL_SUCCESS;
context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
CL_ASSERT(err);
size_t kernelSourceLength = sizeof(kernelSource);
const char* kernelSourceArray = { kernelSource };
program = clCreateProgramWithSource(context, 1, &kernelSourceArray, &kernelSourceLength, &err);
CL_ASSERT(err);
if (clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr) != CL_SUCCESS)
{
size_t programBuildLogBufferSize = 0;
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &programBuildLogBufferSize));
std::string programBuildLog(programBuildLogBufferSize, '\0');
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, programBuildLogBufferSize, &programBuildLog[0], nullptr));
std::clog << programBuildLog << std::endl;
return 1;
}
vectorAddKernel = clCreateKernel(program, "vectorAdd", &err);
CL_ASSERT(err);
commandQueue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
CL_ASSERT(err);
}
tracyCLCtx = TracyCLContext(context, device);
size_t N = 10 * 1024 * 1024 / sizeof(float); // 10MB of floats
std::vector<float> hostA, hostB, hostC;
{
ZoneScopedN("Host Data Init");
hostA.resize(N);
hostB.resize(N);
hostC.resize(N);
std::iota(std::begin(hostA), std::end(hostA), 0.0f);
std::iota(std::begin(hostB), std::end(hostB), 0.0f);
}
{
ZoneScopedN("Host to Device Memory Copy");
bufferA = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
CL_ASSERT(err);
bufferB = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
CL_ASSERT(err);
bufferC = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
CL_ASSERT(err);
cl_event writeBufferAEvent, writeBufferBEvent;
{
ZoneScopedN("Write Buffer A");
TracyCLZoneS(tracyCLCtx, "Write BufferA", 5);
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferA, CL_FALSE, 0, N * sizeof(float), hostA.data(), 0, nullptr, &writeBufferAEvent));
TracyCLZoneSetEvent(writeBufferAEvent);
}
{
ZoneScopedN("Write Buffer B");
TracyCLZone(tracyCLCtx, "Write BufferB");
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferB, CL_FALSE, 0, N * sizeof(float), hostB.data(), 0, nullptr, &writeBufferBEvent));
TracyCLZoneSetEvent(writeBufferBEvent);
}
}
cl_int clN = static_cast<cl_int>(N);
const int numFrames = 10;
const int launchsPerFrame = 10;
constexpr int numLaunchs = numFrames * launchsPerFrame;
std::vector<cl_event> kernelLaunchEvts;
kernelLaunchEvts.reserve(numLaunchs);
for (int i = 0; i < numFrames; ++i)
{
FrameMark;
for (int j = 0; j < launchsPerFrame; ++j) {
ZoneScopedN("VectorAdd Kernel Launch");
TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4);
CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC));
CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA));
CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB));
CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(cl_int), &clN));
cl_event vectorAddKernelEvent;
CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent));
TracyCLZoneSetEvent(vectorAddKernelEvent);
CL_ASSERT(clRetainEvent(vectorAddKernelEvent));
kernelLaunchEvts.push_back(vectorAddKernelEvent);
std::cout << "VectorAdd Kernel Enqueued" << std::endl;
}
{
// Wait frame events to be finished
ZoneScopedN("clFinish");
CL_ASSERT(clFinish(commandQueue));
}
// You should collect on each 'frame' ends, so that streaming can be achieved.
TracyCLCollect(tracyCLCtx);
}
{
ZoneScopedN("Device to Host Memory Copy");
TracyCLZone(tracyCLCtx, "Read Buffer C");
cl_event readbufferCEvent;
CL_ASSERT(clEnqueueReadBuffer(commandQueue, bufferC, CL_TRUE, 0, N * sizeof(float), hostC.data(), 0, nullptr, &readbufferCEvent));
TracyCLZoneSetEvent(readbufferCEvent);
}
CL_ASSERT(clFinish(commandQueue));
std::vector<float> durations(kernelLaunchEvts.size());
for (int i=0; i<kernelLaunchEvts.size(); i++) {
cl_event evt = kernelLaunchEvts[i];
cl_ulong start;
cl_ulong end;
CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr));
CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr));
CL_ASSERT(clReleaseEvent(evt));
durations[i] = (end - start) * 0.001f;
std::cout << "VectorAdd Kernel " << i << " tooks " << static_cast<int>(durations[i]) << "us" << std::endl;
};
float avg = std::accumulate(durations.cbegin(), durations.cend(), 0.0f) / durations.size();
float stddev2 = std::accumulate(durations.cbegin(), durations.cend(), 0.0f, [avg](const float& acc, const float& v) {
auto d = v - avg;
return acc + d*d;
}) / (durations.size() - 1.0f);
std::cout << "VectorAdd runtime avg: " << avg << "us, std: " << sqrt(stddev2) << "us over " << numLaunchs << " runs." << std::endl;
// User should ensure all events are finished, in this case, collect after the clFinish will do the trick.
TracyCLCollect(tracyCLCtx);
{
ZoneScopedN("Checking results");
for (int i = 0; i < N; ++i)
{
assert(hostC[i] == hostA[i] + hostB[i]);
}
}
std::cout << "Results are correct!" << std::endl;
TracyCLDestroy(tracyCLCtx);
return 0;
}

View File

@@ -0,0 +1 @@
Windows/Compiled*Shader.h

View File

@@ -0,0 +1,4 @@
https://github.com/aras-p/ToyPathTracer
Modified to render only 10 frames. Client part requires 12 GB, server part
requires 6.4 GB.

View File

@@ -0,0 +1,33 @@
#if defined(__APPLE__) && !defined(__METAL_VERSION__)
#include <TargetConditionals.h>
#endif
#define kBackbufferWidth 1280
#define kBackbufferHeight 720
#if defined(__EMSCRIPTEN__)
#define CPU_CAN_DO_SIMD 0
#define CPU_CAN_DO_THREADS 0
#else
#define CPU_CAN_DO_SIMD 1
#define CPU_CAN_DO_THREADS 1
#endif
#define DO_SAMPLES_PER_PIXEL 4
#define DO_ANIMATE_SMOOTHING 0.9f
#define DO_LIGHT_SAMPLING 1
#define DO_MITSUBA_COMPARE 0
// Should path tracing be done on the GPU with a compute shader?
#define DO_COMPUTE_GPU 0
#define kCSGroupSizeX 8
#define kCSGroupSizeY 8
#define kCSMaxObjects 64
// Should float3 struct use SSE/NEON?
#define DO_FLOAT3_WITH_SIMD (!(DO_COMPUTE_GPU) && CPU_CAN_DO_SIMD && 1)
// Should HitSpheres function use SSE/NEON?
#define DO_HIT_SPHERES_SIMD (CPU_CAN_DO_SIMD && 1)

View File

@@ -0,0 +1,192 @@
#pragma once
#if defined(_MSC_VER)
#define VM_INLINE __forceinline
#else
#define VM_INLINE __attribute__((unused, always_inline, nodebug)) inline
#endif
#define kSimdWidth 4
#if !defined(__arm__) && !defined(__arm64__) && !defined(__EMSCRIPTEN__)
// ---- SSE implementation
#include <xmmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>
#define SHUFFLE4(V, X,Y,Z,W) float4(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(W,Z,Y,X)))
struct float4
{
VM_INLINE float4() {}
VM_INLINE explicit float4(const float *p) { m = _mm_loadu_ps(p); }
VM_INLINE explicit float4(float x, float y, float z, float w) { m = _mm_set_ps(w, z, y, x); }
VM_INLINE explicit float4(float v) { m = _mm_set_ps1(v); }
VM_INLINE explicit float4(__m128 v) { m = v; }
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
VM_INLINE float getW() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 3, 3, 3))); }
__m128 m;
};
typedef float4 bool4;
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = _mm_and_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = _mm_or_ps(a.m, b.m); return a; }
VM_INLINE float4 operator- (float4 a) { a.m = _mm_xor_ps(a.m, _mm_set1_ps(-0.0f)); return a; }
VM_INLINE float4 min(float4 a, float4 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
VM_INLINE float4 max(float4 a, float4 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
VM_INLINE float hmin(float4 v)
{
v = min(v, SHUFFLE4(v, 2, 3, 0, 0));
v = min(v, SHUFFLE4(v, 1, 0, 0, 0));
return v.getX();
}
// Returns a 4-bit code where bit0..bit3 is X..W
VM_INLINE unsigned mask(float4 v) { return _mm_movemask_ps(v.m); }
// Once we have a comparison, we can branch based on its results:
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
// "select", i.e. hibit(cond) ? b : a
// on SSE4.1 and up this can be done easily via "blend" instruction;
// on older SSEs has to do a bunch of hoops, see
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
{
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
a.m = _mm_blendv_ps(a.m, b.m, cond.m);
#else
__m128 d = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(cond.m), 31));
a.m = _mm_or_ps(_mm_and_ps(d, b.m), _mm_andnot_ps(d, a.m));
#endif
return a;
}
VM_INLINE __m128i select(__m128i a, __m128i b, bool4 cond)
{
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
return _mm_blendv_epi8(a, b, _mm_castps_si128(cond.m));
#else
__m128i d = _mm_srai_epi32(_mm_castps_si128(cond.m), 31);
return _mm_or_si128(_mm_and_si128(d, b), _mm_andnot_si128(d, a));
#endif
}
VM_INLINE float4 sqrtf(float4 v) { return float4(_mm_sqrt_ps(v.m)); }
#elif !defined(__EMSCRIPTEN__)
// ---- NEON implementation
#define USE_NEON 1
#include <arm_neon.h>
struct float4
{
VM_INLINE float4() {}
VM_INLINE explicit float4(const float *p) { m = vld1q_f32(p); }
VM_INLINE explicit float4(float x, float y, float z, float w) { float v[4] = {x, y, z, w}; m = vld1q_f32(v); }
VM_INLINE explicit float4(float v) { m = vdupq_n_f32(v); }
VM_INLINE explicit float4(float32x4_t v) { m = v; }
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
VM_INLINE float getW() const { return vgetq_lane_f32(m, 3); }
float32x4_t m;
};
typedef float4 bool4;
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = vaddq_f32(a.m, b.m); return a; }
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = vsubq_f32(a.m, b.m); return a; }
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = vmulq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = vceqq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = vcltq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = vcleq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = vandq_u32(a.m, b.m); return a; }
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = vorrq_u32(a.m, b.m); return a; }
VM_INLINE float4 operator- (float4 a) { a.m = vnegq_f32(a.m); return a; }
VM_INLINE float4 min(float4 a, float4 b) { a.m = vminq_f32(a.m, b.m); return a; }
VM_INLINE float4 max(float4 a, float4 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
VM_INLINE float hmin(float4 v)
{
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
return vget_lane_f32(minOfMinOfHalfs, 0);
}
// Returns a 4-bit code where bit0..bit3 is X..W
VM_INLINE unsigned mask(float4 v)
{
static const uint32x4_t movemask = { 1, 2, 4, 8 };
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
uint32x4_t t1 = vtstq_u32(t0, highbit);
uint32x4_t t2 = vandq_u32(t1, movemask);
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
}
// Once we have a comparison, we can branch based on its results:
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
// "select", i.e. hibit(cond) ? b : a
// on SSE4.1 and up this can be done easily via "blend" instruction;
// on older SSEs has to do a bunch of hoops, see
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
{
a.m = vbslq_f32(cond.m, b.m, a.m);
return a;
}
VM_INLINE int32x4_t select(int32x4_t a, int32x4_t b, bool4 cond)
{
return vbslq_f32(cond.m, b, a);
}
VM_INLINE float4 sqrtf(float4 v)
{
float32x4_t V = v.m;
float32x4_t S0 = vrsqrteq_f32(V);
float32x4_t P0 = vmulq_f32( V, S0 );
float32x4_t R0 = vrsqrtsq_f32( P0, S0 );
float32x4_t S1 = vmulq_f32( S0, R0 );
float32x4_t P1 = vmulq_f32( V, S1 );
float32x4_t R1 = vrsqrtsq_f32( P1, S1 );
float32x4_t S2 = vmulq_f32( S1, R1 );
float32x4_t P2 = vmulq_f32( V, S2 );
float32x4_t R2 = vrsqrtsq_f32( P2, S2 );
float32x4_t S3 = vmulq_f32( S2, R2 );
return float4(vmulq_f32(V, S3));
}
VM_INLINE float4 splatX(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 0)); }
VM_INLINE float4 splatY(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 1)); }
VM_INLINE float4 splatZ(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 0)); }
VM_INLINE float4 splatW(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 1)); }
#endif

View File

@@ -0,0 +1,203 @@
#include "Maths.h"
#include <stdlib.h>
#include <stdint.h>
static uint32_t XorShift32(uint32_t& state)
{
uint32_t x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 15;
state = x;
return x;
}
float RandomFloat01(uint32_t& state)
{
return (XorShift32(state) & 0xFFFFFF) / 16777216.0f;
}
float3 RandomInUnitDisk(uint32_t& state)
{
float3 p;
do
{
p = 2.0 * float3(RandomFloat01(state),RandomFloat01(state),0) - float3(1,1,0);
} while (dot(p,p) >= 1.0);
return p;
}
float3 RandomInUnitSphere(uint32_t& state)
{
float3 p;
do {
p = 2.0*float3(RandomFloat01(state),RandomFloat01(state),RandomFloat01(state)) - float3(1,1,1);
} while (sqLength(p) >= 1.0);
return p;
}
float3 RandomUnitVector(uint32_t& state)
{
float z = RandomFloat01(state) * 2.0f - 1.0f;
float a = RandomFloat01(state) * 2.0f * kPI;
float r = sqrtf(1.0f - z * z);
float x = r * cosf(a);
float y = r * sinf(a);
return float3(x, y, z);
}
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit)
{
#if DO_HIT_SPHERES_SIMD
float4 hitT = float4(tMax);
#if USE_NEON
int32x4_t id = vdupq_n_s32(-1);
#else
__m128i id = _mm_set1_epi32(-1);
#endif
#if DO_FLOAT3_WITH_SIMD && !USE_NEON
float4 rOrigX = SHUFFLE4(r.orig, 0, 0, 0, 0);
float4 rOrigY = SHUFFLE4(r.orig, 1, 1, 1, 1);
float4 rOrigZ = SHUFFLE4(r.orig, 2, 2, 2, 2);
float4 rDirX = SHUFFLE4(r.dir, 0, 0, 0, 0);
float4 rDirY = SHUFFLE4(r.dir, 1, 1, 1, 1);
float4 rDirZ = SHUFFLE4(r.dir, 2, 2, 2, 2);
#elif DO_FLOAT3_WITH_SIMD
float4 rOrigX = splatX(r.orig.m);
float4 rOrigY = splatY(r.orig.m);
float4 rOrigZ = splatZ(r.orig.m);
float4 rDirX = splatX(r.dir.m);
float4 rDirY = splatY(r.dir.m);
float4 rDirZ = splatZ(r.dir.m);
#else
float4 rOrigX = float4(r.orig.x);
float4 rOrigY = float4(r.orig.y);
float4 rOrigZ = float4(r.orig.z);
float4 rDirX = float4(r.dir.x);
float4 rDirY = float4(r.dir.y);
float4 rDirZ = float4(r.dir.z);
#endif
float4 tMin4 = float4(tMin);
#if USE_NEON
int32x4_t curId = vcombine_u32(vcreate_u32(0ULL | (1ULL<<32)), vcreate_u32(2ULL | (3ULL<<32)));
#else
__m128i curId = _mm_set_epi32(3, 2, 1, 0);
#endif
// process 4 spheres at once
for (int i = 0; i < spheres.simdCount; i += kSimdWidth)
{
// load data for 4 spheres
float4 sCenterX = float4(spheres.centerX + i);
float4 sCenterY = float4(spheres.centerY + i);
float4 sCenterZ = float4(spheres.centerZ + i);
float4 sSqRadius = float4(spheres.sqRadius + i);
// note: we flip this vector and calculate -b (nb) since that happens to be slightly preferable computationally
float4 coX = sCenterX - rOrigX;
float4 coY = sCenterY - rOrigY;
float4 coZ = sCenterZ - rOrigZ;
float4 nb = coX * rDirX + coY * rDirY + coZ * rDirZ;
float4 c = coX * coX + coY * coY + coZ * coZ - sSqRadius;
float4 discr = nb * nb - c;
bool4 discrPos = discr > float4(0.0f);
// if ray hits any of the 4 spheres
if (any(discrPos))
{
float4 discrSq = sqrtf(discr);
// ray could hit spheres at t0 & t1
float4 t0 = nb - discrSq;
float4 t1 = nb + discrSq;
float4 t = select(t1, t0, t0 > tMin4); // if t0 is above min, take it (since it's the earlier hit); else try t1.
bool4 msk = discrPos & (t > tMin4) & (t < hitT);
// if hit, take it
id = select(id, curId, msk);
hitT = select(hitT, t, msk);
}
#if USE_NEON
curId = vaddq_s32(curId, vdupq_n_s32(kSimdWidth));
#else
curId = _mm_add_epi32(curId, _mm_set1_epi32(kSimdWidth));
#endif
}
// now we have up to 4 hits, find and return closest one
float minT = hmin(hitT);
if (minT < tMax) // any actual hits?
{
int minMask = mask(hitT == float4(minT));
if (minMask != 0)
{
int id_scalar[4];
float hitT_scalar[4];
#if USE_NEON
vst1q_s32(id_scalar, id);
vst1q_f32(hitT_scalar, hitT.m);
#else
_mm_storeu_si128((__m128i *)id_scalar, id);
_mm_storeu_ps(hitT_scalar, hitT.m);
#endif
// In general, you would do this with a bit scan (first set/trailing zero count).
// But who cares, it's only 16 options.
static const int laneId[16] =
{
0, 0, 1, 0, // 00xx
2, 0, 1, 0, // 01xx
3, 0, 1, 0, // 10xx
2, 0, 1, 0, // 11xx
};
int lane = laneId[minMask];
int hitId = id_scalar[lane];
float finalHitT = hitT_scalar[lane];
outHit.pos = r.pointAt(finalHitT);
outHit.normal = (outHit.pos - float3(spheres.centerX[hitId], spheres.centerY[hitId], spheres.centerZ[hitId])) * spheres.invRadius[hitId];
outHit.t = finalHitT;
return hitId;
}
}
return -1;
#else // #if DO_HIT_SPHERES_SIMD
float hitT = tMax;
int id = -1;
for (int i = 0; i < spheres.count; ++i)
{
float coX = spheres.centerX[i] - r.orig.getX();
float coY = spheres.centerY[i] - r.orig.getY();
float coZ = spheres.centerZ[i] - r.orig.getZ();
float nb = coX * r.dir.getX() + coY * r.dir.getY() + coZ * r.dir.getZ();
float c = coX * coX + coY * coY + coZ * coZ - spheres.sqRadius[i];
float discr = nb * nb - c;
if (discr > 0)
{
float discrSq = sqrtf(discr);
// Try earlier t
float t = nb - discrSq;
if (t <= tMin) // before min, try later t!
t = nb + discrSq;
if (t > tMin && t < hitT)
{
id = i;
hitT = t;
}
}
}
if (id != -1)
{
outHit.pos = r.pointAt(hitT);
outHit.normal = (outHit.pos - float3(spheres.centerX[id], spheres.centerY[id], spheres.centerZ[id])) * spheres.invRadius[id];
outHit.t = hitT;
return id;
}
else
return -1;
#endif // #else of #if DO_HIT_SPHERES_SIMD
}

View File

@@ -0,0 +1,436 @@
#pragma once
#include <math.h>
#include <assert.h>
#include <stdint.h>
#include "Config.h"
#include "MathSimd.h"
#define kPI 3.1415926f
// SSE/SIMD vector largely based on http://www.codersnotes.com/notes/maths-lib-2016/
#if DO_FLOAT3_WITH_SIMD
#if !defined(__arm__) && !defined(__arm64__)
// ---- SSE implementation
// SHUFFLE3(v, 0,1,2) leaves the vector unchanged (v.xyz).
// SHUFFLE3(v, 0,0,0) splats the X (v.xxx).
#define SHUFFLE3(V, X,Y,Z) float3(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(Z,Z,Y,X)))
struct float3
{
VM_INLINE float3() {}
VM_INLINE explicit float3(const float *p) { m = _mm_set_ps(p[2], p[2], p[1], p[0]); }
VM_INLINE explicit float3(float x, float y, float z) { m = _mm_set_ps(z, z, y, x); }
VM_INLINE explicit float3(float v) { m = _mm_set1_ps(v); }
VM_INLINE explicit float3(__m128 v) { m = v; }
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
VM_INLINE float3 yzx() const { return SHUFFLE3(*this, 1, 2, 0); }
VM_INLINE float3 zxy() const { return SHUFFLE3(*this, 2, 0, 1); }
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
void setX(float x)
{
m = _mm_move_ss(m, _mm_set_ss(x));
}
void setY(float y)
{
__m128 t = _mm_move_ss(m, _mm_set_ss(y));
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 2, 0, 0));
m = _mm_move_ss(t, m);
}
void setZ(float z)
{
__m128 t = _mm_move_ss(m, _mm_set_ss(z));
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 0, 1, 0));
m = _mm_move_ss(t, m);
}
__m128 m;
};
typedef float3 bool3;
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
VM_INLINE float3 operator/ (float3 a, float3 b) { a.m = _mm_div_ps(a.m, b.m); return a; }
VM_INLINE float3 operator* (float3 a, float b) { a.m = _mm_mul_ps(a.m, _mm_set1_ps(b)); return a; }
VM_INLINE float3 operator/ (float3 a, float b) { a.m = _mm_div_ps(a.m, _mm_set1_ps(b)); return a; }
VM_INLINE float3 operator* (float a, float3 b) { b.m = _mm_mul_ps(_mm_set1_ps(a), b.m); return b; }
VM_INLINE float3 operator/ (float a, float3 b) { b.m = _mm_div_ps(_mm_set1_ps(a), b.m); return b; }
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
VM_INLINE float3 min(float3 a, float3 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
VM_INLINE float3 max(float3 a, float3 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
VM_INLINE float3 operator- (float3 a) { return float3(_mm_setzero_ps()) - a; }
VM_INLINE float hmin(float3 v)
{
v = min(v, SHUFFLE3(v, 1, 0, 2));
return min(v, SHUFFLE3(v, 2, 0, 1)).getX();
}
VM_INLINE float hmax(float3 v)
{
v = max(v, SHUFFLE3(v, 1, 0, 2));
return max(v, SHUFFLE3(v, 2, 0, 1)).getX();
}
VM_INLINE float3 cross(float3 a, float3 b)
{
// x <- a.y*b.z - a.z*b.y
// y <- a.z*b.x - a.x*b.z
// z <- a.x*b.y - a.y*b.x
// We can save a shuffle by grouping it in this wacky order:
return (a.zxy()*b - a*b.zxy()).zxy();
}
// Returns a 3-bit code where bit0..bit2 is X..Z
VM_INLINE unsigned mask(float3 v) { return _mm_movemask_ps(v.m) & 7; }
// Once we have a comparison, we can branch based on its results:
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
#else // #if !defined(__arm__) && !defined(__arm64__)
// ---- NEON implementation
#include <arm_neon.h>
struct float3
{
VM_INLINE float3() {}
VM_INLINE explicit float3(const float *p) { float v[4] = {p[0], p[1], p[2], 0}; m = vld1q_f32(v); }
VM_INLINE explicit float3(float x, float y, float z) { float v[4] = {x, y, z, 0}; m = vld1q_f32(v); }
VM_INLINE explicit float3(float v) { m = vdupq_n_f32(v); }
VM_INLINE explicit float3(float32x4_t v) { m = v; }
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
VM_INLINE float3 yzx() const
{
float32x2_t low = vget_low_f32(m);
float32x4_t yzx = vcombine_f32(vext_f32(low, vget_high_f32(m), 1), low);
return float3(yzx);
}
VM_INLINE float3 zxy() const
{
float32x4_t p = m;
p = vuzpq_f32(vreinterpretq_f32_s32(vextq_s32(vreinterpretq_s32_f32(p), vreinterpretq_s32_f32(p), 1)), p).val[1];
return float3(p);
}
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
void setX(float x)
{
m = vsetq_lane_f32(x, m, 0);
}
void setY(float y)
{
m = vsetq_lane_f32(y, m, 1);
}
void setZ(float z)
{
m = vsetq_lane_f32(z, m, 2);
}
float32x4_t m;
};
typedef float3 bool3;
VM_INLINE float32x4_t rcp_2(float32x4_t v)
{
float32x4_t e = vrecpeq_f32(v);
e = vmulq_f32(vrecpsq_f32(e, v), e);
e = vmulq_f32(vrecpsq_f32(e, v), e);
return e;
}
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = vaddq_f32(a.m, b.m); return a; }
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = vsubq_f32(a.m, b.m); return a; }
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = vmulq_f32(a.m, b.m); return a; }
VM_INLINE float3 operator/ (float3 a, float3 b) { float32x4_t recip = rcp_2(b.m); a.m = vmulq_f32(a.m, recip); return a; }
VM_INLINE float3 operator* (float3 a, float b) { a.m = vmulq_f32(a.m, vdupq_n_f32(b)); return a; }
VM_INLINE float3 operator/ (float3 a, float b) { float32x4_t recip = rcp_2(vdupq_n_f32(b)); a.m = vmulq_f32(a.m, recip); return a; }
VM_INLINE float3 operator* (float a, float3 b) { b.m = vmulq_f32(vdupq_n_f32(a), b.m); return b; }
VM_INLINE float3 operator/ (float a, float3 b) { float32x4_t recip = rcp_2(b.m); b.m = vmulq_f32(vdupq_n_f32(a), recip); return b; }
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = vceqq_f32(a.m, b.m); return a; }
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = vcltq_f32(a.m, b.m); return a; }
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = vcleq_f32(a.m, b.m); return a; }
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
VM_INLINE float3 min(float3 a, float3 b) { a.m = vminq_f32(a.m, b.m); return a; }
VM_INLINE float3 max(float3 a, float3 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
VM_INLINE float3 operator- (float3 a) { a.m = vnegq_f32(a.m); return a; }
VM_INLINE float hmin(float3 v)
{
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
return vget_lane_f32(minOfMinOfHalfs, 0);
}
VM_INLINE float hmax(float3 v)
{
float32x2_t maxOfHalfs = vpmax_f32(vget_low_f32(v.m), vget_high_f32(v.m));
float32x2_t maxOfMaxOfHalfs = vpmax_f32(maxOfHalfs, maxOfHalfs);
return vget_lane_f32(maxOfMaxOfHalfs, 0);
}
VM_INLINE float3 cross(float3 a, float3 b)
{
// x <- a.y*b.z - a.z*b.y
// y <- a.z*b.x - a.x*b.z
// z <- a.x*b.y - a.y*b.x
// We can save a shuffle by grouping it in this wacky order:
return (a.zxy()*b - a*b.zxy()).zxy();
}
// Returns a 3-bit code where bit0..bit2 is X..Z
VM_INLINE unsigned mask(float3 v)
{
static const uint32x4_t movemask = { 1, 2, 4, 8 };
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
uint32x4_t t1 = vtstq_u32(t0, highbit);
uint32x4_t t2 = vandq_u32(t1, movemask);
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
}
// Once we have a comparison, we can branch based on its results:
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
#endif // #else of #if !defined(__arm__) && !defined(__arm64__)
#else // #if DO_FLOAT3_WITH_SIMD
// ---- Simple scalar C implementation
struct float3
{
float3() : x(0), y(0), z(0) {}
float3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
float3 operator-() const { return float3(-x, -y, -z); }
float3& operator+=(const float3& o) { x+=o.x; y+=o.y; z+=o.z; return *this; }
float3& operator-=(const float3& o) { x-=o.x; y-=o.y; z-=o.z; return *this; }
float3& operator*=(const float3& o) { x*=o.x; y*=o.y; z*=o.z; return *this; }
float3& operator*=(float o) { x*=o; y*=o; z*=o; return *this; }
VM_INLINE float getX() const { return x; }
VM_INLINE float getY() const { return y; }
VM_INLINE float getZ() const { return z; }
VM_INLINE void setX(float x_) { x = x_; }
VM_INLINE void setY(float y_) { y = y_; }
VM_INLINE void setZ(float z_) { z = z_; }
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
float x, y, z;
};
VM_INLINE float3 operator+(const float3& a, const float3& b) { return float3(a.x+b.x,a.y+b.y,a.z+b.z); }
VM_INLINE float3 operator-(const float3& a, const float3& b) { return float3(a.x-b.x,a.y-b.y,a.z-b.z); }
VM_INLINE float3 operator*(const float3& a, const float3& b) { return float3(a.x*b.x,a.y*b.y,a.z*b.z); }
VM_INLINE float3 operator*(const float3& a, float b) { return float3(a.x*b,a.y*b,a.z*b); }
VM_INLINE float3 operator*(float a, const float3& b) { return float3(a*b.x,a*b.y,a*b.z); }
VM_INLINE float dot(const float3& a, const float3& b) { return a.x*b.x+a.y*b.y+a.z*b.z; }
VM_INLINE float3 cross(const float3& a, const float3& b)
{
return float3(
a.y*b.z - a.z*b.y,
-(a.x*b.z - a.z*b.x),
a.x*b.y - a.y*b.x
);
}
#endif // #else of #if DO_FLOAT3_WITH_SIMD
VM_INLINE float length(float3 v) { return sqrtf(dot(v, v)); }
VM_INLINE float sqLength(float3 v) { return dot(v, v); }
VM_INLINE float3 normalize(float3 v) { return v * (1.0f / length(v)); }
VM_INLINE float3 lerp(float3 a, float3 b, float t) { return a + (b-a)*t; }
inline void AssertUnit(float3 v)
{
assert(fabsf(sqLength(v) - 1.0f) < 0.01f);
}
inline float3 reflect(float3 v, float3 n)
{
return v - 2*dot(v,n)*n;
}
inline bool refract(float3 v, float3 n, float nint, float3& outRefracted)
{
AssertUnit(v);
float dt = dot(v, n);
float discr = 1.0f - nint*nint*(1-dt*dt);
if (discr > 0)
{
outRefracted = nint * (v - n*dt) - n*sqrtf(discr);
return true;
}
return false;
}
inline float schlick(float cosine, float ri)
{
float r0 = (1-ri) / (1+ri);
r0 = r0*r0;
return r0 + (1-r0)*powf(1-cosine, 5);
}
struct Ray
{
Ray() {}
Ray(float3 orig_, float3 dir_) : orig(orig_), dir(dir_) { AssertUnit(dir); }
float3 pointAt(float t) const { return orig + dir * t; }
float3 orig;
float3 dir;
};
struct Hit
{
float3 pos;
float3 normal;
float t;
};
struct Sphere
{
Sphere() : radius(1.0f), invRadius(0.0f) {}
Sphere(float3 center_, float radius_) : center(center_), radius(radius_), invRadius(0.0f) {}
void UpdateDerivedData() { invRadius = 1.0f/radius; }
float3 center;
float radius;
float invRadius;
};
// data for all spheres in a "structure of arrays" layout
struct SpheresSoA
{
SpheresSoA(int c)
{
count = c;
// we'll be processing spheres in kSimdWidth chunks, so make sure to allocate
// enough space
simdCount = (c + (kSimdWidth - 1)) / kSimdWidth * kSimdWidth;
centerX = new float[simdCount];
centerY = new float[simdCount];
centerZ = new float[simdCount];
sqRadius = new float[simdCount];
invRadius = new float[simdCount];
// set all data to "impossible sphere" state
for (int i = count; i < simdCount; ++i)
{
centerX[i] = centerY[i] = centerZ[i] = 10000.0f;
sqRadius[i] = 0.0f;
invRadius[i] = 0.0f;
}
}
~SpheresSoA()
{
delete[] centerX;
delete[] centerY;
delete[] centerZ;
delete[] sqRadius;
delete[] invRadius;
}
float* centerX;
float* centerY;
float* centerZ;
float* sqRadius;
float* invRadius;
int simdCount;
int count;
};
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit);
float RandomFloat01(uint32_t& state);
float3 RandomInUnitDisk(uint32_t& state);
float3 RandomInUnitSphere(uint32_t& state);
float3 RandomUnitVector(uint32_t& state);
struct Camera
{
Camera() {}
// vfov is top to bottom in degrees
Camera(const float3& lookFrom, const float3& lookAt, const float3& vup, float vfov, float aspect, float aperture, float focusDist)
{
lensRadius = aperture / 2;
float theta = vfov*kPI/180;
float halfHeight = tanf(theta/2);
float halfWidth = aspect * halfHeight;
origin = lookFrom;
w = normalize(lookFrom - lookAt);
u = normalize(cross(vup, w));
v = cross(w, u);
lowerLeftCorner = origin - halfWidth*focusDist*u - halfHeight*focusDist*v - focusDist*w;
horizontal = 2*halfWidth*focusDist*u;
vertical = 2*halfHeight*focusDist*v;
}
Ray GetRay(float s, float t, uint32_t& state) const
{
float3 rd = lensRadius * RandomInUnitDisk(state);
float3 offset = u * rd.getX() + v * rd.getY();
return Ray(origin + offset, normalize(lowerLeftCorner + s*horizontal + t*vertical - origin - offset));
}
float3 origin;
float3 lowerLeftCorner;
float3 horizontal;
float3 vertical;
float3 u, v, w;
float lensRadius;
};

View File

@@ -0,0 +1,392 @@
#include "Config.h"
#include "Test.h"
#include "Maths.h"
#include <algorithm>
#if CPU_CAN_DO_THREADS
#include "enkiTS/TaskScheduler_c.h"
#include <thread>
#endif
#include <atomic>
#include "../../../public/tracy/Tracy.hpp"
// 46 spheres (2 emissive) when enabled; 9 spheres (1 emissive) when disabled
#define DO_BIG_SCENE 1
static Sphere s_Spheres[] =
{
{float3(0,-100.5,-1), 100},
{float3(2,0,-1), 0.5f},
{float3(0,0,-1), 0.5f},
{float3(-2,0,-1), 0.5f},
{float3(2,0,1), 0.5f},
{float3(0,0,1), 0.5f},
{float3(-2,0,1), 0.5f},
{float3(0.5f,1,0.5f), 0.5f},
{float3(-1.5f,1.5f,0.f), 0.3f},
#if DO_BIG_SCENE
{float3(4,0,-3), 0.5f}, {float3(3,0,-3), 0.5f}, {float3(2,0,-3), 0.5f}, {float3(1,0,-3), 0.5f}, {float3(0,0,-3), 0.5f}, {float3(-1,0,-3), 0.5f}, {float3(-2,0,-3), 0.5f}, {float3(-3,0,-3), 0.5f}, {float3(-4,0,-3), 0.5f},
{float3(4,0,-4), 0.5f}, {float3(3,0,-4), 0.5f}, {float3(2,0,-4), 0.5f}, {float3(1,0,-4), 0.5f}, {float3(0,0,-4), 0.5f}, {float3(-1,0,-4), 0.5f}, {float3(-2,0,-4), 0.5f}, {float3(-3,0,-4), 0.5f}, {float3(-4,0,-4), 0.5f},
{float3(4,0,-5), 0.5f}, {float3(3,0,-5), 0.5f}, {float3(2,0,-5), 0.5f}, {float3(1,0,-5), 0.5f}, {float3(0,0,-5), 0.5f}, {float3(-1,0,-5), 0.5f}, {float3(-2,0,-5), 0.5f}, {float3(-3,0,-5), 0.5f}, {float3(-4,0,-5), 0.5f},
{float3(4,0,-6), 0.5f}, {float3(3,0,-6), 0.5f}, {float3(2,0,-6), 0.5f}, {float3(1,0,-6), 0.5f}, {float3(0,0,-6), 0.5f}, {float3(-1,0,-6), 0.5f}, {float3(-2,0,-6), 0.5f}, {float3(-3,0,-6), 0.5f}, {float3(-4,0,-6), 0.5f},
{float3(1.5f,1.5f,-2), 0.3f},
#endif // #if DO_BIG_SCENE
};
const int kSphereCount = sizeof(s_Spheres) / sizeof(s_Spheres[0]);
static SpheresSoA s_SpheresSoA(kSphereCount);
struct Material
{
enum Type { Lambert, Metal, Dielectric };
Type type;
float3 albedo;
float3 emissive;
float roughness;
float ri;
};
static Material s_SphereMats[kSphereCount] =
{
{ Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, },
{ Material::Lambert, float3(0.8f, 0.4f, 0.4f), float3(0,0,0), 0, 0, },
{ Material::Lambert, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0, },
{ Material::Metal, float3(0.4f, 0.4f, 0.8f), float3(0,0,0), 0, 0 },
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0 },
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.2f, 0 },
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.6f, 0 },
{ Material::Dielectric, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 1.5f },
{ Material::Lambert, float3(0.8f, 0.6f, 0.2f), float3(30,25,15), 0, 0 },
#if DO_BIG_SCENE
{ Material::Lambert, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
{ Material::Metal, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
{ Material::Metal, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
{ Material::Lambert, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
{ Material::Lambert, float3(0.1f, 0.2f, 0.5f), float3(3,10,20), 0, 0 },
#endif
};
static int s_EmissiveSpheres[kSphereCount];
static int s_EmissiveSphereCount;
static Camera s_Cam;
const float kMinT = 0.001f;
const float kMaxT = 1.0e7f;
const int kMaxDepth = 10;
bool HitWorld(const Ray& r, float tMin, float tMax, Hit& outHit, int& outID)
{
outID = HitSpheres(r, s_SpheresSoA, tMin, tMax, outHit);
return outID != -1;
}
static bool Scatter(const Material& mat, const Ray& r_in, const Hit& rec, float3& attenuation, Ray& scattered, float3& outLightE, int& inoutRayCount, uint32_t& state)
{
ZoneScoped;
outLightE = float3(0,0,0);
if (mat.type == Material::Lambert)
{
// random point on unit sphere that is tangent to the hit point
float3 target = rec.pos + rec.normal + RandomUnitVector(state);
scattered = Ray(rec.pos, normalize(target - rec.pos));
attenuation = mat.albedo;
// sample lights
#if DO_LIGHT_SAMPLING
for (int j = 0; j < s_EmissiveSphereCount; ++j)
{
int i = s_EmissiveSpheres[j];
const Material& smat = s_SphereMats[i];
if (&mat == &smat)
continue; // skip self
const Sphere& s = s_Spheres[i];
// create a random direction towards sphere
// coord system for sampling: sw, su, sv
float3 sw = normalize(s.center - rec.pos);
float3 su = normalize(cross(fabs(sw.getX())>0.01f ? float3(0,1,0):float3(1,0,0), sw));
float3 sv = cross(sw, su);
// sample sphere by solid angle
float cosAMax = sqrtf(1.0f - s.radius*s.radius / sqLength(rec.pos-s.center));
float eps1 = RandomFloat01(state), eps2 = RandomFloat01(state);
float cosA = 1.0f - eps1 + eps1 * cosAMax;
float sinA = sqrtf(1.0f - cosA*cosA);
float phi = 2 * kPI * eps2;
float3 l = su * (cosf(phi) * sinA) + sv * (sinf(phi) * sinA) + sw * cosA;
//l = normalize(l); // NOTE(fg): This is already normalized, by construction.
// shoot shadow ray
Hit lightHit;
int hitID;
++inoutRayCount;
if (HitWorld(Ray(rec.pos, l), kMinT, kMaxT, lightHit, hitID) && hitID == i)
{
float omega = 2 * kPI * (1-cosAMax);
float3 rdir = r_in.dir;
AssertUnit(rdir);
float3 nl = dot(rec.normal, rdir) < 0 ? rec.normal : -rec.normal;
outLightE += (mat.albedo * smat.emissive) * (std::max(0.0f, dot(l, nl)) * omega / kPI);
}
}
#endif
return true;
}
else if (mat.type == Material::Metal)
{
AssertUnit(r_in.dir); AssertUnit(rec.normal);
float3 refl = reflect(r_in.dir, rec.normal);
// reflected ray, and random inside of sphere based on roughness
float roughness = mat.roughness;
#if DO_MITSUBA_COMPARE
roughness = 0; // until we get better BRDF for metals
#endif
scattered = Ray(rec.pos, normalize(refl + roughness*RandomInUnitSphere(state)));
attenuation = mat.albedo;
return dot(scattered.dir, rec.normal) > 0;
}
else if (mat.type == Material::Dielectric)
{
AssertUnit(r_in.dir); AssertUnit(rec.normal);
float3 outwardN;
float3 rdir = r_in.dir;
float3 refl = reflect(rdir, rec.normal);
float nint;
attenuation = float3(1,1,1);
float3 refr;
float reflProb;
float cosine;
if (dot(rdir, rec.normal) > 0)
{
outwardN = -rec.normal;
nint = mat.ri;
cosine = mat.ri * dot(rdir, rec.normal);
}
else
{
outwardN = rec.normal;
nint = 1.0f / mat.ri;
cosine = -dot(rdir, rec.normal);
}
if (refract(rdir, outwardN, nint, refr))
{
reflProb = schlick(cosine, mat.ri);
}
else
{
reflProb = 1;
}
if (RandomFloat01(state) < reflProb)
scattered = Ray(rec.pos, normalize(refl));
else
scattered = Ray(rec.pos, normalize(refr));
}
else
{
attenuation = float3(1,0,1);
return false;
}
return true;
}
static float3 Trace(const Ray& r, int depth, int& inoutRayCount, uint32_t& state, bool doMaterialE = true)
{
ZoneScoped;
Hit rec;
int id = 0;
++inoutRayCount;
if (HitWorld(r, kMinT, kMaxT, rec, id))
{
Ray scattered;
float3 attenuation;
float3 lightE;
const Material& mat = s_SphereMats[id];
float3 matE = mat.emissive;
if (depth < kMaxDepth && Scatter(mat, r, rec, attenuation, scattered, lightE, inoutRayCount, state))
{
#if DO_LIGHT_SAMPLING
if (!doMaterialE) matE = float3(0,0,0); // don't add material emission if told so
// dor Lambert materials, we just did explicit light (emissive) sampling and already
// for their contribution, so if next ray bounce hits the light again, don't add
// emission
doMaterialE = (mat.type != Material::Lambert);
#endif
return matE + lightE + attenuation * Trace(scattered, depth+1, inoutRayCount, state, doMaterialE);
}
else
{
return matE;
}
}
else
{
// sky
#if DO_MITSUBA_COMPARE
return float3(0.15f,0.21f,0.3f); // easier compare with Mitsuba's constant environment light
#else
float3 unitDir = r.dir;
float t = 0.5f*(unitDir.getY() + 1.0f);
return ((1.0f-t)*float3(1.0f, 1.0f, 1.0f) + t*float3(0.5f, 0.7f, 1.0f)) * 0.3f;
#endif
}
}
#if CPU_CAN_DO_THREADS
static enkiTaskScheduler* g_TS;
#endif
void InitializeTest()
{
ZoneScoped;
#if CPU_CAN_DO_THREADS
g_TS = enkiNewTaskScheduler();
enkiInitTaskSchedulerNumThreads(g_TS, std::max<int>( 2, std::thread::hardware_concurrency() - 2));
#endif
}
void ShutdownTest()
{
ZoneScoped;
#if CPU_CAN_DO_THREADS
enkiDeleteTaskScheduler(g_TS);
#endif
}
struct JobData
{
float time;
int frameCount;
int screenWidth, screenHeight;
float* backbuffer;
Camera* cam;
std::atomic<int> rayCount;
unsigned testFlags;
};
static void TraceRowJob(uint32_t start, uint32_t end, uint32_t threadnum, void* data_)
{
ZoneScoped;
JobData& data = *(JobData*)data_;
float* backbuffer = data.backbuffer + start * data.screenWidth * 4;
float invWidth = 1.0f / data.screenWidth;
float invHeight = 1.0f / data.screenHeight;
float lerpFac = float(data.frameCount) / float(data.frameCount+1);
if (data.testFlags & kFlagAnimate)
lerpFac *= DO_ANIMATE_SMOOTHING;
if (!(data.testFlags & kFlagProgressive))
lerpFac = 0;
int rayCount = 0;
for (uint32_t y = start; y < end; ++y)
{
uint32_t state = (y * 9781 + data.frameCount * 6271) | 1;
for (int x = 0; x < data.screenWidth; ++x)
{
float3 col(0, 0, 0);
for (int s = 0; s < DO_SAMPLES_PER_PIXEL; s++)
{
float u = float(x + RandomFloat01(state)) * invWidth;
float v = float(y + RandomFloat01(state)) * invHeight;
Ray r = data.cam->GetRay(u, v, state);
col += Trace(r, 0, rayCount, state);
}
col *= 1.0f / float(DO_SAMPLES_PER_PIXEL);
float3 prev(backbuffer[0], backbuffer[1], backbuffer[2]);
col = prev * lerpFac + col * (1-lerpFac);
col.store(backbuffer);
backbuffer += 4;
}
}
data.rayCount += rayCount;
}
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags)
{
ZoneScoped;
if (testFlags & kFlagAnimate)
{
s_Spheres[1].center.setY(cosf(time) + 1.0f);
s_Spheres[8].center.setZ(sinf(time)*0.3f);
}
float3 lookfrom(0, 2, 3);
float3 lookat(0, 0, 0);
float distToFocus = 3;
#if DO_MITSUBA_COMPARE
float aperture = 0.0f;
#else
float aperture = 0.1f;
#endif
#if DO_BIG_SCENE
aperture *= 0.2f;
#endif
s_EmissiveSphereCount = 0;
for (int i = 0; i < kSphereCount; ++i)
{
Sphere& s = s_Spheres[i];
s.UpdateDerivedData();
s_SpheresSoA.centerX[i] = s.center.getX();
s_SpheresSoA.centerY[i] = s.center.getY();
s_SpheresSoA.centerZ[i] = s.center.getZ();
s_SpheresSoA.sqRadius[i] = s.radius * s.radius;
s_SpheresSoA.invRadius[i] = s.invRadius;
// Remember IDs of emissive spheres (light sources)
const Material& smat = s_SphereMats[i];
if (smat.emissive.getX() > 0 || smat.emissive.getY() > 0 || smat.emissive.getZ() > 0)
{
s_EmissiveSpheres[s_EmissiveSphereCount] = i;
s_EmissiveSphereCount++;
}
}
s_Cam = Camera(lookfrom, lookat, float3(0, 1, 0), 60, float(screenWidth) / float(screenHeight), aperture, distToFocus);
}
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags)
{
ZoneScoped;
JobData args;
args.time = time;
args.frameCount = frameCount;
args.screenWidth = screenWidth;
args.screenHeight = screenHeight;
args.backbuffer = backbuffer;
args.cam = &s_Cam;
args.testFlags = testFlags;
args.rayCount = 0;
#if CPU_CAN_DO_THREADS
enkiTaskSet* task = enkiCreateTaskSet(g_TS, TraceRowJob);
bool threaded = true;
enkiAddTaskSetToPipeMinRange(g_TS, task, &args, screenHeight, threaded ? 4 : screenHeight);
enkiWaitForTaskSet(g_TS, task);
enkiDeleteTaskSet(task);
#else
TraceRowJob(0, screenHeight, 0, &args);
#endif
outRayCount = args.rayCount;
}
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize)
{
ZoneScoped;
outCount = kSphereCount;
outObjectSize = sizeof(Sphere);
outMaterialSize = sizeof(Material);
outCamSize = sizeof(Camera);
}
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount)
{
ZoneScoped;
memcpy(outObjects, s_Spheres, kSphereCount * sizeof(s_Spheres[0]));
memcpy(outMaterials, s_SphereMats, kSphereCount * sizeof(s_SphereMats[0]));
memcpy(outCam, &s_Cam, sizeof(s_Cam));
memcpy(outEmissives, s_EmissiveSpheres, s_EmissiveSphereCount * sizeof(s_EmissiveSpheres[0]));
*outEmissiveCount = s_EmissiveSphereCount;
}

View File

@@ -0,0 +1,17 @@
#pragma once
#include <stdint.h>
enum TestFlags
{
kFlagAnimate = (1 << 0),
kFlagProgressive = (1 << 1),
};
void InitializeTest();
void ShutdownTest();
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags);
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags);
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize);
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount);

View File

@@ -0,0 +1,79 @@
// Copyright (c) 2013 Doug Binks
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#pragma once
#include <stdint.h>
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#undef GetObject
#include <intrin.h>
extern "C" void _ReadWriteBarrier();
#pragma intrinsic(_ReadWriteBarrier)
#pragma intrinsic(_InterlockedCompareExchange)
#pragma intrinsic(_InterlockedExchangeAdd)
// Memory Barriers to prevent CPU and Compiler re-ordering
#define BASE_MEMORYBARRIER_ACQUIRE() _ReadWriteBarrier()
#define BASE_MEMORYBARRIER_RELEASE() _ReadWriteBarrier()
#define BASE_ALIGN(x) __declspec( align( x ) )
#else
#define BASE_MEMORYBARRIER_ACQUIRE() __asm__ __volatile__("": : :"memory")
#define BASE_MEMORYBARRIER_RELEASE() __asm__ __volatile__("": : :"memory")
#define BASE_ALIGN(x) __attribute__ ((aligned( x )))
#endif
namespace enki
{
// Atomically performs: if( *pDest == compareWith ) { *pDest = swapTo; }
// returns old *pDest (so if successfull, returns compareWith)
inline uint32_t AtomicCompareAndSwap( volatile uint32_t* pDest, uint32_t swapTo, uint32_t compareWith )
{
#ifdef _WIN32
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
return _InterlockedCompareExchange( (volatile long*)pDest,swapTo, compareWith );
#else
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
#endif
}
inline uint64_t AtomicCompareAndSwap( volatile uint64_t* pDest, uint64_t swapTo, uint64_t compareWith )
{
#ifdef _WIN32
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
return _InterlockedCompareExchange64( (__int64 volatile*)pDest, swapTo, compareWith );
#else
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
#endif
}
// Atomically performs: tmp = *pDest; *pDest += value; return tmp;
inline int32_t AtomicAdd( volatile int32_t* pDest, int32_t value )
{
#ifdef _WIN32
return _InterlockedExchangeAdd( (long*)pDest, value );
#else
return __sync_fetch_and_add( pDest, value );
#endif
}
}

View File

@@ -0,0 +1,240 @@
// Copyright (c) 2013 Doug Binks
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#pragma once
#include <stdint.h>
#include <assert.h>
#include "Atomics.h"
#include <string.h>
namespace enki
{
// LockLessMultiReadPipe - Single writer, multiple reader thread safe pipe using (semi) lockless programming
// Readers can only read from the back of the pipe
// The single writer can write to the front of the pipe, and read from both ends (a writer can be a reader)
// for many of the principles used here, see http://msdn.microsoft.com/en-us/library/windows/desktop/ee418650(v=vs.85).aspx
// Note: using log2 sizes so we do not need to clamp (multi-operation)
// T is the contained type
// Note this is not true lockless as the use of flags as a form of lock state.
template<uint8_t cSizeLog2, typename T> class LockLessMultiReadPipe
{
public:
LockLessMultiReadPipe();
~LockLessMultiReadPipe() {}
// ReaderTryReadBack returns false if we were unable to read
// This is thread safe for both multiple readers and the writer
bool ReaderTryReadBack( T* pOut );
// WriterTryReadFront returns false if we were unable to read
// This is thread safe for the single writer, but should not be called by readers
bool WriterTryReadFront( T* pOut );
// WriterTryWriteFront returns false if we were unable to write
// This is thread safe for the single writer, but should not be called by readers
bool WriterTryWriteFront( const T& in );
// IsPipeEmpty() is a utility function, not intended for general use
// Should only be used very prudently.
bool IsPipeEmpty() const
{
return 0 == m_WriteIndex - m_ReadCount;
}
void Clear()
{
m_WriteIndex = 0;
m_ReadIndex = 0;
m_ReadCount = 0;
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
}
private:
const static uint32_t ms_cSize = ( 1 << cSizeLog2 );
const static uint32_t ms_cIndexMask = ms_cSize - 1;
const static uint32_t FLAG_INVALID = 0xFFFFFFFF; // 32bit for CAS
const static uint32_t FLAG_CAN_WRITE = 0x00000000; // 32bit for CAS
const static uint32_t FLAG_CAN_READ = 0x11111111; // 32bit for CAS
T m_Buffer[ ms_cSize ];
// read and write indexes allow fast access to the pipe, but actual access
// controlled by the access flags.
volatile uint32_t BASE_ALIGN(4) m_WriteIndex;
volatile uint32_t BASE_ALIGN(4) m_ReadCount;
volatile uint32_t m_Flags[ ms_cSize ];
volatile uint32_t BASE_ALIGN(4) m_ReadIndex;
};
template<uint8_t cSizeLog2, typename T> inline
LockLessMultiReadPipe<cSizeLog2,T>::LockLessMultiReadPipe()
: m_WriteIndex(0)
, m_ReadIndex(0)
, m_ReadCount(0)
{
assert( cSizeLog2 < 32 );
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
}
template<uint8_t cSizeLog2, typename T> inline
bool LockLessMultiReadPipe<cSizeLog2,T>::ReaderTryReadBack( T* pOut )
{
uint32_t actualReadIndex;
uint32_t readCount = m_ReadCount;
// We get hold of read index for consistency,
// and do first pass starting at read count
uint32_t readIndexToUse = readCount;
while(true)
{
uint32_t writeIndex = m_WriteIndex;
// power of two sizes ensures we can use a simple calc without modulus
uint32_t numInPipe = writeIndex - readCount;
if( 0 == numInPipe )
{
return false;
}
if( readIndexToUse >= writeIndex )
{
// move back to start
readIndexToUse = m_ReadIndex;
}
// power of two sizes ensures we can perform AND for a modulus
actualReadIndex = readIndexToUse & ms_cIndexMask;
// Multiple potential readers mean we should check if the data is valid,
// using an atomic compare exchange
uint32_t previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
if( FLAG_CAN_READ == previous )
{
break;
}
++readIndexToUse;
//update known readcount
readCount = m_ReadCount;
}
// we update the read index using an atomic add, as we've only read one piece of data.
// this ensure consistency of the read index, and the above loop ensures readers
// only read from unread data
AtomicAdd( (volatile int32_t*)&m_ReadCount, 1 );
BASE_MEMORYBARRIER_ACQUIRE();
// now read data, ensuring we do so after above reads & CAS
*pOut = m_Buffer[ actualReadIndex ];
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
return true;
}
template<uint8_t cSizeLog2, typename T> inline
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryReadFront( T* pOut )
{
uint32_t writeIndex = m_WriteIndex;
uint32_t frontReadIndex = writeIndex;
// Multiple potential readers mean we should check if the data is valid,
// using an atomic compare exchange - which acts as a form of lock (so not quite lockless really).
uint32_t previous = FLAG_INVALID;
uint32_t actualReadIndex = 0;
while( true )
{
// power of two sizes ensures we can use a simple calc without modulus
uint32_t readCount = m_ReadCount;
uint32_t numInPipe = writeIndex - readCount;
if( 0 == numInPipe || 0 == frontReadIndex )
{
// frontReadIndex can get to 0 here if that item was just being read by another thread.
m_ReadIndex = readCount;
return false;
}
--frontReadIndex;
actualReadIndex = frontReadIndex & ms_cIndexMask;
previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
if( FLAG_CAN_READ == previous )
{
break;
}
else if( m_ReadIndex >= frontReadIndex )
{
return false;
}
}
// now read data, ensuring we do so after above reads & CAS
*pOut = m_Buffer[ actualReadIndex ];
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
BASE_MEMORYBARRIER_RELEASE();
// 32-bit aligned stores are atomic, and writer owns the write index
// we only move one back as this is as many as we have read, not where we have read from.
--m_WriteIndex;
return true;
}
template<uint8_t cSizeLog2, typename T> inline
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryWriteFront( const T& in )
{
// The writer 'owns' the write index, and readers can only reduce
// the amount of data in the pipe.
// We get hold of both values for consistency and to reduce false sharing
// impacting more than one access
uint32_t writeIndex = m_WriteIndex;
// power of two sizes ensures we can perform AND for a modulus
uint32_t actualWriteIndex = writeIndex & ms_cIndexMask;
// a reader may still be reading this item, as there are multiple readers
if( m_Flags[ actualWriteIndex ] != FLAG_CAN_WRITE )
{
return false; // still being read, so have caught up with tail.
}
// as we are the only writer we can update the data without atomics
// whilst the write index has not been updated
m_Buffer[ actualWriteIndex ] = in;
m_Flags[ actualWriteIndex ] = FLAG_CAN_READ;
// We need to ensure the above writes occur prior to updating the write index,
// otherwise another thread might read before it's finished
BASE_MEMORYBARRIER_RELEASE();
// 32-bit aligned stores are atomic, and the writer controls the write index
++writeIndex;
m_WriteIndex = writeIndex;
return true;
}
}

View File

@@ -0,0 +1,437 @@
// Copyright (c) 2013 Doug Binks
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#include <assert.h>
#include "TaskScheduler.h"
#include "LockLessMultiReadPipe.h"
using namespace enki;
static const uint32_t PIPESIZE_LOG2 = 8;
static const uint32_t SPIN_COUNT = 100;
static const uint32_t SPIN_BACKOFF_MULTIPLIER = 10;
static const uint32_t MAX_NUM_INITIAL_PARTITIONS = 8;
// each software thread gets it's own copy of gtl_threadNum, so this is safe to use as a static variable
static THREAD_LOCAL uint32_t gtl_threadNum = 0;
namespace enki
{
struct SubTaskSet
{
ITaskSet* pTask;
TaskSetPartition partition;
};
// we derive class TaskPipe rather than typedef to get forward declaration working easily
class TaskPipe : public LockLessMultiReadPipe<PIPESIZE_LOG2,enki::SubTaskSet> {};
struct ThreadArgs
{
uint32_t threadNum;
TaskScheduler* pTaskScheduler;
};
}
namespace
{
SubTaskSet SplitTask( SubTaskSet& subTask_, uint32_t rangeToSplit_ )
{
SubTaskSet splitTask = subTask_;
uint32_t rangeLeft = subTask_.partition.end - subTask_.partition.start;
if( rangeToSplit_ > rangeLeft )
{
rangeToSplit_ = rangeLeft;
}
splitTask.partition.end = subTask_.partition.start + rangeToSplit_;
subTask_.partition.start = splitTask.partition.end;
return splitTask;
}
#if defined _WIN32
#if defined _M_IX86 || defined _M_X64
#pragma intrinsic(_mm_pause)
inline void Pause() { _mm_pause(); }
#endif
#elif defined __i386__ || defined __x86_64__
inline void Pause() { __asm__ __volatile__("pause;"); }
#else
inline void Pause() { ;} // may have NOP or yield equiv
#endif
}
static void SafeCallback(ProfilerCallbackFunc func_, uint32_t threadnum_)
{
if( func_ )
{
func_(threadnum_);
}
}
ProfilerCallbacks* TaskScheduler::GetProfilerCallbacks()
{
return &m_ProfilerCallbacks;
}
THREADFUNC_DECL TaskScheduler::TaskingThreadFunction( void* pArgs )
{
ThreadArgs args = *(ThreadArgs*)pArgs;
uint32_t threadNum = args.threadNum;
TaskScheduler* pTS = args.pTaskScheduler;
gtl_threadNum = threadNum;
SafeCallback( pTS->m_ProfilerCallbacks.threadStart, threadNum );
uint32_t spinCount = 0;
uint32_t hintPipeToCheck_io = threadNum + 1; // does not need to be clamped.
while( pTS->m_bRunning )
{
if(!pTS->TryRunTask( threadNum, hintPipeToCheck_io ) )
{
// no tasks, will spin then wait
++spinCount;
if( spinCount > SPIN_COUNT )
{
pTS->WaitForTasks( threadNum );
spinCount = 0;
}
else
{
uint32_t spinBackoffCount = spinCount * SPIN_BACKOFF_MULTIPLIER;
while( spinBackoffCount )
{
Pause();
--spinBackoffCount;
}
}
}
else
{
spinCount = 0;
}
}
AtomicAdd( &pTS->m_NumThreadsRunning, -1 );
SafeCallback( pTS->m_ProfilerCallbacks.threadStop, threadNum );
return 0;
}
void TaskScheduler::StartThreads()
{
if( m_bHaveThreads )
{
return;
}
m_bRunning = true;
SemaphoreCreate( m_NewTaskSemaphore );
// we create one less thread than m_NumThreads as the main thread counts as one
m_pThreadNumStore = new ThreadArgs[m_NumThreads];
m_pThreadIDs = new threadid_t[m_NumThreads];
m_pThreadNumStore[0].threadNum = 0;
m_pThreadNumStore[0].pTaskScheduler = this;
m_pThreadIDs[0] = 0;
m_NumThreadsWaiting = 0;
m_NumThreadsRunning = 1;// acount for main thread
for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
{
m_pThreadNumStore[thread].threadNum = thread;
m_pThreadNumStore[thread].pTaskScheduler = this;
ThreadCreate( &m_pThreadIDs[thread], TaskingThreadFunction, &m_pThreadNumStore[thread] );
++m_NumThreadsRunning;
}
// ensure we have sufficient tasks to equally fill either all threads including main
// or just the threads we've launched, this is outside the firstinit as we want to be able
// to runtime change it
if( 1 == m_NumThreads )
{
m_NumPartitions = 1;
m_NumInitialPartitions = 1;
}
else
{
m_NumPartitions = m_NumThreads * (m_NumThreads - 1);
m_NumInitialPartitions = m_NumThreads - 1;
if( m_NumInitialPartitions > MAX_NUM_INITIAL_PARTITIONS )
{
m_NumInitialPartitions = MAX_NUM_INITIAL_PARTITIONS;
}
}
m_bHaveThreads = true;
}
void TaskScheduler::StopThreads( bool bWait_ )
{
if( m_bHaveThreads )
{
// wait for them threads quit before deleting data
m_bRunning = false;
while( bWait_ && m_NumThreadsRunning > 1 )
{
// keep firing event to ensure all threads pick up state of m_bRunning
SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsRunning );
}
for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
{
ThreadTerminate( m_pThreadIDs[thread] );
}
m_NumThreads = 0;
delete[] m_pThreadNumStore;
delete[] m_pThreadIDs;
m_pThreadNumStore = 0;
m_pThreadIDs = 0;
SemaphoreClose( m_NewTaskSemaphore );
m_bHaveThreads = false;
m_NumThreadsWaiting = 0;
m_NumThreadsRunning = 0;
}
}
bool TaskScheduler::TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ )
{
// check for tasks
SubTaskSet subTask;
bool bHaveTask = m_pPipesPerThread[ threadNum ].WriterTryReadFront( &subTask );
uint32_t threadToCheck = hintPipeToCheck_io_;
uint32_t checkCount = 0;
while( !bHaveTask && checkCount < m_NumThreads )
{
threadToCheck = ( hintPipeToCheck_io_ + checkCount ) % m_NumThreads;
if( threadToCheck != threadNum )
{
bHaveTask = m_pPipesPerThread[ threadToCheck ].ReaderTryReadBack( &subTask );
}
++checkCount;
}
if( bHaveTask )
{
// update hint, will preserve value unless actually got task from another thread.
hintPipeToCheck_io_ = threadToCheck;
uint32_t partitionSize = subTask.partition.end - subTask.partition.start;
if( subTask.pTask->m_RangeToRun < partitionSize )
{
SubTaskSet taskToRun = SplitTask( subTask, subTask.pTask->m_RangeToRun );
SplitAndAddTask( gtl_threadNum, subTask, subTask.pTask->m_RangeToRun, 0 );
taskToRun.pTask->ExecuteRange( taskToRun.partition, threadNum );
AtomicAdd( &taskToRun.pTask->m_RunningCount, -1 );
}
else
{
// the task has already been divided up by AddTaskSetToPipe, so just run it
subTask.pTask->ExecuteRange( subTask.partition, threadNum );
AtomicAdd( &subTask.pTask->m_RunningCount, -1 );
}
}
return bHaveTask;
}
void TaskScheduler::WaitForTasks( uint32_t threadNum )
{
// We incrememt the number of threads waiting here in order
// to ensure that the check for tasks occurs after the increment
// to prevent a task being added after a check, then the thread waiting.
// This will occasionally result in threads being mistakenly awoken,
// but they will then go back to sleep.
AtomicAdd( &m_NumThreadsWaiting, 1 );
bool bHaveTasks = false;
for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
{
if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
{
bHaveTasks = true;
break;
}
}
if( !bHaveTasks )
{
SafeCallback( m_ProfilerCallbacks.waitStart, threadNum );
SemaphoreWait( m_NewTaskSemaphore );
SafeCallback( m_ProfilerCallbacks.waitStop, threadNum );
}
int32_t prev = AtomicAdd( &m_NumThreadsWaiting, -1 );
assert( prev != 0 );
}
void TaskScheduler::WakeThreads()
{
SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsWaiting );
}
void TaskScheduler::SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
uint32_t rangeToSplit_, int32_t runningCountOffset_ )
{
int32_t numAdded = 0;
while( subTask_.partition.start != subTask_.partition.end )
{
SubTaskSet taskToAdd = SplitTask( subTask_, rangeToSplit_ );
// add the partition to the pipe
++numAdded;
if( !m_pPipesPerThread[ gtl_threadNum ].WriterTryWriteFront( taskToAdd ) )
{
if( numAdded > 1 )
{
WakeThreads();
}
// alter range to run the appropriate fraction
if( taskToAdd.pTask->m_RangeToRun < rangeToSplit_ )
{
taskToAdd.partition.end = taskToAdd.partition.start + taskToAdd.pTask->m_RangeToRun;
subTask_.partition.start = taskToAdd.partition.end;
}
taskToAdd.pTask->ExecuteRange( taskToAdd.partition, threadNum_ );
--numAdded;
}
}
// increment running count by number added
AtomicAdd( &subTask_.pTask->m_RunningCount, numAdded + runningCountOffset_ );
WakeThreads();
}
void TaskScheduler::AddTaskSetToPipe( ITaskSet* pTaskSet )
{
// set running count to -1 to guarantee it won't be found complete until all subtasks added
pTaskSet->m_RunningCount = -1;
// divide task up and add to pipe
pTaskSet->m_RangeToRun = pTaskSet->m_SetSize / m_NumPartitions;
if( pTaskSet->m_RangeToRun < pTaskSet->m_MinRange ) { pTaskSet->m_RangeToRun = pTaskSet->m_MinRange; }
uint32_t rangeToSplit = pTaskSet->m_SetSize / m_NumInitialPartitions;
if( rangeToSplit < pTaskSet->m_MinRange ) { rangeToSplit = pTaskSet->m_MinRange; }
SubTaskSet subTask;
subTask.pTask = pTaskSet;
subTask.partition.start = 0;
subTask.partition.end = pTaskSet->m_SetSize;
SplitAndAddTask( gtl_threadNum, subTask, rangeToSplit, 1 );
}
void TaskScheduler::WaitforTaskSet( const ITaskSet* pTaskSet )
{
uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
if( pTaskSet )
{
while( pTaskSet->m_RunningCount )
{
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
// should add a spin then wait for task completion event.
}
}
else
{
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
}
}
void TaskScheduler::WaitforAll()
{
bool bHaveTasks = true;
uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
int32_t threadsRunning = m_NumThreadsRunning - 1;
while( bHaveTasks || m_NumThreadsWaiting < threadsRunning )
{
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
bHaveTasks = false;
for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
{
if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
{
bHaveTasks = true;
break;
}
}
}
}
void TaskScheduler::WaitforAllAndShutdown()
{
WaitforAll();
StopThreads(true);
delete[] m_pPipesPerThread;
m_pPipesPerThread = 0;
}
uint32_t TaskScheduler::GetNumTaskThreads() const
{
return m_NumThreads;
}
TaskScheduler::TaskScheduler()
: m_pPipesPerThread(NULL)
, m_NumThreads(0)
, m_pThreadNumStore(NULL)
, m_pThreadIDs(NULL)
, m_bRunning(false)
, m_NumThreadsRunning(0)
, m_NumThreadsWaiting(0)
, m_NumPartitions(0)
, m_bHaveThreads(false)
{
memset(&m_ProfilerCallbacks, 0, sizeof(m_ProfilerCallbacks));
}
TaskScheduler::~TaskScheduler()
{
StopThreads( true ); // Stops threads, waiting for them.
delete[] m_pPipesPerThread;
m_pPipesPerThread = 0;
}
void TaskScheduler::Initialize( uint32_t numThreads_ )
{
assert( numThreads_ );
StopThreads( true ); // Stops threads, waiting for them.
delete[] m_pPipesPerThread;
m_NumThreads = numThreads_;
m_pPipesPerThread = new TaskPipe[ m_NumThreads ];
StartThreads();
}
void TaskScheduler::Initialize()
{
Initialize( GetNumHardwareThreads() );
}

View File

@@ -0,0 +1,177 @@
// Copyright (c) 2013 Doug Binks
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#pragma once
#include <stdint.h>
#include "Threads.h"
namespace enki
{
struct TaskSetPartition
{
uint32_t start;
uint32_t end;
};
class TaskScheduler;
class TaskPipe;
struct ThreadArgs;
struct SubTaskSet;
// Subclass ITaskSet to create tasks.
// TaskSets can be re-used, but check
class ITaskSet
{
public:
ITaskSet()
: m_SetSize(1)
, m_MinRange(1)
, m_RunningCount(0)
, m_RangeToRun(1)
{}
ITaskSet( uint32_t setSize_ )
: m_SetSize( setSize_ )
, m_MinRange(1)
, m_RunningCount(0)
, m_RangeToRun(1)
{}
ITaskSet( uint32_t setSize_, uint32_t minRange_ )
: m_SetSize( setSize_ )
, m_MinRange( minRange_ )
, m_RunningCount(0)
, m_RangeToRun(minRange_)
{}
// Execute range should be overloaded to process tasks. It will be called with a
// range_ where range.start >= 0; range.start < range.end; and range.end < m_SetSize;
// The range values should be mapped so that linearly processing them in order is cache friendly
// i.e. neighbouring values should be close together.
// threadnum should not be used for changing processing of data, it's intended purpose
// is to allow per-thread data buckets for output.
virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum ) = 0;
// Size of set - usually the number of data items to be processed, see ExecuteRange. Defaults to 1
uint32_t m_SetSize;
// Minimum size of of TaskSetPartition range when splitting a task set into partitions.
// This should be set to a value which results in computation effort of at least 10k
// clock cycles to minimize tast scheduler overhead.
// NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
// of m_MinRange.
// Also known as grain size in literature.
uint32_t m_MinRange;
bool GetIsComplete()
{
return 0 == m_RunningCount;
}
private:
friend class TaskScheduler;
volatile int32_t m_RunningCount;
uint32_t m_RangeToRun;
};
// TaskScheduler implements several callbacks intended for profilers
typedef void (*ProfilerCallbackFunc)( uint32_t threadnum_ );
struct ProfilerCallbacks
{
ProfilerCallbackFunc threadStart;
ProfilerCallbackFunc threadStop;
ProfilerCallbackFunc waitStart;
ProfilerCallbackFunc waitStop;
};
class TaskScheduler
{
public:
TaskScheduler();
~TaskScheduler();
// Call either Initialize() or Initialize( numThreads_ ) before adding tasks.
// Initialize() will create GetNumHardwareThreads()-1 threads, which is
// sufficient to fill the system when including the main thread.
// Initialize can be called multiple times - it will wait for completion
// before re-initializing.
void Initialize();
// Initialize( numThreads_ ) - numThreads_ (must be > 0)
// will create numThreads_-1 threads, as thread 0 is
// the thread on which the initialize was called.
void Initialize( uint32_t numThreads_ );
// Adds the TaskSet to pipe and returns if the pipe is not full.
// If the pipe is full, pTaskSet is run.
// should only be called from main thread, or within a task
void AddTaskSetToPipe( ITaskSet* pTaskSet );
// Runs the TaskSets in pipe until true == pTaskSet->GetIsComplete();
// should only be called from thread which created the taskscheduler , or within a task
// if called with 0 it will try to run tasks, and return if none available.
void WaitforTaskSet( const ITaskSet* pTaskSet );
// Waits for all task sets to complete - not guaranteed to work unless we know we
// are in a situation where tasks aren't being continuosly added.
void WaitforAll();
// Waits for all task sets to complete and shutdown threads - not guaranteed to work unless we know we
// are in a situation where tasks aren't being continuosly added.
void WaitforAllAndShutdown();
// Returns the number of threads created for running tasks + 1
// to account for the main thread.
uint32_t GetNumTaskThreads() const;
// Returns the ProfilerCallbacks structure so that it can be modified to
// set the callbacks.
ProfilerCallbacks* GetProfilerCallbacks();
private:
static THREADFUNC_DECL TaskingThreadFunction( void* pArgs );
void WaitForTasks( uint32_t threadNum );
bool TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ );
void StartThreads();
void StopThreads( bool bWait_ );
void SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
uint32_t rangeToSplit_, int32_t runningCountOffset_ );
void WakeThreads();
TaskPipe* m_pPipesPerThread;
uint32_t m_NumThreads;
ThreadArgs* m_pThreadNumStore;
threadid_t* m_pThreadIDs;
volatile bool m_bRunning;
volatile int32_t m_NumThreadsRunning;
volatile int32_t m_NumThreadsWaiting;
uint32_t m_NumPartitions;
uint32_t m_NumInitialPartitions;
semaphoreid_t m_NewTaskSemaphore;
bool m_bHaveThreads;
ProfilerCallbacks m_ProfilerCallbacks;
TaskScheduler( const TaskScheduler& nocopy );
TaskScheduler& operator=( const TaskScheduler& nocopy );
};
}

View File

@@ -0,0 +1,122 @@
// Copyright (c) 2013 Doug Binks
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#include "TaskScheduler_c.h"
#include "TaskScheduler.h"
#include <assert.h>
using namespace enki;
struct enkiTaskScheduler : TaskScheduler
{
};
struct enkiTaskSet : ITaskSet
{
enkiTaskSet( enkiTaskExecuteRange taskFun_ ) : taskFun(taskFun_), pArgs(NULL) {}
virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum )
{
taskFun( range.start, range.end, threadnum, pArgs );
}
enkiTaskExecuteRange taskFun;
void* pArgs;
};
enkiTaskScheduler* enkiNewTaskScheduler()
{
enkiTaskScheduler* pETS = new enkiTaskScheduler();
return pETS;
}
void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ )
{
pETS_->Initialize();
}
void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ )
{
pETS_->Initialize( numThreads_ );
}
void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ )
{
delete pETS_;
}
enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ )
{
return new enkiTaskSet( taskFunc_ );
}
void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ )
{
delete pTaskSet_;
}
void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_, void* pArgs_, uint32_t setSize_ )
{
assert( pTaskSet_ );
assert( pTaskSet_->taskFun );
pTaskSet_->m_SetSize = setSize_;
pTaskSet_->pArgs = pArgs_;
pETS_->AddTaskSetToPipe( pTaskSet_ );
}
void enkiAddTaskSetToPipeMinRange(enkiTaskScheduler * pETS_, enkiTaskSet * pTaskSet_, void * pArgs_, uint32_t setSize_, uint32_t minRange_)
{
assert( pTaskSet_ );
assert( pTaskSet_->taskFun );
pTaskSet_->m_SetSize = setSize_;
pTaskSet_->m_MinRange = minRange_;
pTaskSet_->pArgs = pArgs_;
pETS_->AddTaskSetToPipe( pTaskSet_ );
}
int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
{
assert( pTaskSet_ );
return ( pTaskSet_->GetIsComplete() ) ? 1 : 0;
}
void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
{
pETS_->WaitforTaskSet( pTaskSet_ );
}
void enkiWaitForAll( enkiTaskScheduler* pETS_ )
{
pETS_->WaitforAll();
}
uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ )
{
return pETS_->GetNumTaskThreads();
}
enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ )
{
assert( sizeof(enkiProfilerCallbacks) == sizeof(enki::ProfilerCallbacks) );
return (enkiProfilerCallbacks*)pETS_->GetProfilerCallbacks();
}

View File

@@ -0,0 +1,104 @@
// Copyright (c) 2013 Doug Binks
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
typedef struct enkiTaskScheduler enkiTaskScheduler;
typedef struct enkiTaskSet enkiTaskSet;
typedef void (* enkiTaskExecuteRange)( uint32_t start_, uint32_t end, uint32_t threadnum_, void* pArgs_ );
// Create a new task scheduler
enkiTaskScheduler* enkiNewTaskScheduler();
// Initialize task scheduler - will create GetNumHardwareThreads()-1 threads, which is
// sufficient to fill the system when including the main thread.
// Initialize can be called multiple times - it will wait for completion
// before re-initializing.
void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ );
// Initialize a task scheduler with numThreads_ (must be > 0)
// will create numThreads_-1 threads, as thread 0 is
// the thread on which the initialize was called.
void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ );
// Delete a task scheduler
void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ );
// Create a task set.
enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ );
// Delete a task set.
void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ );
// Schedule the task
void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
void* pArgs_, uint32_t setSize_ );
// Schedule the task with a minimum range.
// This should be set to a value which results in computation effort of at least 10k
// clock cycles to minimize tast scheduler overhead.
// NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
// of m_MinRange.
// Also known as grain size in literature.
void enkiAddTaskSetToPipeMinRange( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
void* pArgs_, uint32_t setSize_, uint32_t minRange_ );
// Check if TaskSet is complete. Doesn't wait. Returns 1 if complete, 0 if not.
int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
// Wait for a given task.
// should only be called from thread which created the taskscheduler , or within a task
// if called with 0 it will try to run tasks, and return if none available.
void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
// Waits for all task sets to complete - not guaranteed to work unless we know we
// are in a situation where tasks aren't being continuosly added.
void enkiWaitForAll( enkiTaskScheduler* pETS_ );
// get number of threads
uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ );
// TaskScheduler implements several callbacks intended for profilers
typedef void (*enkiProfilerCallbackFunc)( uint32_t threadnum_ );
struct enkiProfilerCallbacks
{
enkiProfilerCallbackFunc threadStart;
enkiProfilerCallbackFunc threadStop;
enkiProfilerCallbackFunc waitStart;
enkiProfilerCallbackFunc waitStop;
};
// Get the callback structure so it can be set
struct enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ );
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,210 @@
// Copyright (c) 2013 Doug Binks
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#pragma once
#include <stdint.h>
#include <assert.h>
#ifdef _WIN32
#include "Atomics.h"
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#define THREADFUNC_DECL DWORD WINAPI
#define THREAD_LOCAL __declspec( thread )
namespace enki
{
typedef HANDLE threadid_t;
// declare the thread start function as:
// THREADFUNC_DECL MyThreadStart( void* pArg );
inline bool ThreadCreate( threadid_t* returnid, DWORD ( WINAPI *StartFunc) (void* ), void* pArg )
{
// posix equiv pthread_create
DWORD threadid;
*returnid = CreateThread( 0, 0, StartFunc, pArg, 0, &threadid );
return *returnid != NULL;
}
inline bool ThreadTerminate( threadid_t threadid )
{
// posix equiv pthread_cancel
return CloseHandle( threadid ) == 0;
}
inline uint32_t GetNumHardwareThreads()
{
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
return sysInfo.dwNumberOfProcessors;
}
}
#else // posix
#include <pthread.h>
#include <unistd.h>
#define THREADFUNC_DECL void*
#define THREAD_LOCAL __thread
namespace enki
{
typedef pthread_t threadid_t;
// declare the thread start function as:
// THREADFUNC_DECL MyThreadStart( void* pArg );
inline bool ThreadCreate( threadid_t* returnid, void* ( *StartFunc) (void* ), void* pArg )
{
// posix equiv pthread_create
int32_t retval = pthread_create( returnid, NULL, StartFunc, pArg );
return retval == 0;
}
inline bool ThreadTerminate( threadid_t threadid )
{
// posix equiv pthread_cancel
return pthread_cancel( threadid ) == 0;
}
inline uint32_t GetNumHardwareThreads()
{
return (uint32_t)sysconf( _SC_NPROCESSORS_ONLN );
}
}
#endif // posix
// Semaphore implementation
#ifdef _WIN32
namespace enki
{
struct semaphoreid_t
{
HANDLE sem;
};
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
{
semaphoreid.sem = CreateSemaphore(NULL, 0, MAXLONG, NULL );
}
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
{
CloseHandle( semaphoreid.sem );
}
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
{
DWORD retval = WaitForSingleObject( semaphoreid.sem, INFINITE );
assert( retval != WAIT_FAILED );
}
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
{
if( countWaiting )
{
ReleaseSemaphore( semaphoreid.sem, countWaiting, NULL );
}
}
}
#elif defined(__MACH__)
// OS X does not have POSIX semaphores
// see https://developer.apple.com/library/content/documentation/Darwin/Conceptual/KernelProgramming/synchronization/synchronization.html
#include <mach/mach.h>
namespace enki
{
struct semaphoreid_t
{
semaphore_t sem;
};
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
{
semaphore_create( mach_task_self(), &semaphoreid.sem, SYNC_POLICY_FIFO, 0 );
}
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
{
semaphore_destroy( mach_task_self(), semaphoreid.sem );
}
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
{
semaphore_wait( semaphoreid.sem );
}
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
{
while( countWaiting-- > 0 )
{
semaphore_signal( semaphoreid.sem );
}
}
}
#else // POSIX
#include <semaphore.h>
namespace enki
{
struct semaphoreid_t
{
sem_t sem;
};
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
{
int err = sem_init( &semaphoreid.sem, 0, 0 );
assert( err == 0 );
}
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
{
sem_destroy( &semaphoreid.sem );
}
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
{
int err = sem_wait( &semaphoreid.sem );
assert( err == 0 );
}
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
{
while( countWaiting-- > 0 )
{
sem_post( &semaphoreid.sem );
}
}
}
#endif

Some files were not shown because too many files have changed in this diff Show More