Figure out stupid math shit

Signed-off-by: Slendi <slendi@socopon.com>
This commit is contained in:
2026-01-10 16:15:36 +02:00
parent f896ddae74
commit e0ca1f1043
475 changed files with 499637 additions and 14 deletions

View File

@@ -0,0 +1,395 @@
#include "../Source/Config.h"
inline uint RNG(inout uint state)
{
uint x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 15;
state = x;
return x;
}
float RandomFloat01(inout uint state)
{
return (RNG(state) & 0xFFFFFF) / 16777216.0f;
}
float3 RandomInUnitDisk(inout uint state)
{
float a = RandomFloat01(state) * 2.0f * 3.1415926f;
float2 xy = float2(cos(a), sin(a));
xy *= sqrt(RandomFloat01(state));
return float3(xy, 0);
}
float3 RandomInUnitSphere(inout uint state)
{
float z = RandomFloat01(state) * 2.0f - 1.0f;
float t = RandomFloat01(state) * 2.0f * 3.1415926f;
float r = sqrt(max(0.0, 1.0f - z * z));
float x = r * cos(t);
float y = r * sin(t);
float3 res = float3(x, y, z);
res *= pow(RandomFloat01(state), 1.0 / 3.0);
return res;
}
float3 RandomUnitVector(inout uint state)
{
float z = RandomFloat01(state) * 2.0f - 1.0f;
float a = RandomFloat01(state) * 2.0f * 3.1415926f;
float r = sqrt(1.0f - z * z);
float x = r * cos(a);
float y = r * sin(a);
return float3(x, y, z);
}
struct Ray
{
float3 orig;
float3 dir;
};
Ray MakeRay(float3 orig_, float3 dir_) { Ray r; r.orig = orig_; r.dir = dir_; return r; }
float3 RayPointAt(Ray r, float t) { return r.orig + r.dir * t; }
inline bool refract(float3 v, float3 n, float nint, out float3 outRefracted)
{
float dt = dot(v, n);
float discr = 1.0f - nint * nint*(1 - dt * dt);
if (discr > 0)
{
outRefracted = nint * (v - n * dt) - n * sqrt(discr);
return true;
}
return false;
}
inline float schlick(float cosine, float ri)
{
float r0 = (1 - ri) / (1 + ri);
r0 = r0 * r0;
// note: saturate to guard against possible tiny negative numbers
return r0 + (1 - r0)*pow(saturate(1 - cosine), 5);
}
struct Hit
{
float3 pos;
float3 normal;
float t;
};
struct Sphere
{
float3 center;
float radius;
float invRadius;
};
#define MatLambert 0
#define MatMetal 1
#define MatDielectric 2
struct Material
{
int type;
float3 albedo;
float3 emissive;
float roughness;
float ri;
};
groupshared Sphere s_GroupSpheres[kCSMaxObjects];
groupshared Material s_GroupMaterials[kCSMaxObjects];
groupshared int s_GroupEmissives[kCSMaxObjects];
struct Camera
{
float3 origin;
float3 lowerLeftCorner;
float3 horizontal;
float3 vertical;
float3 u, v, w;
float lensRadius;
};
Ray CameraGetRay(Camera cam, float s, float t, inout uint state)
{
float3 rd = cam.lensRadius * RandomInUnitDisk(state);
float3 offset = cam.u * rd.x + cam.v * rd.y;
return MakeRay(cam.origin + offset, normalize(cam.lowerLeftCorner + s * cam.horizontal + t * cam.vertical - cam.origin - offset));
}
int HitSpheres(Ray r, int sphereCount, float tMin, float tMax, inout Hit outHit)
{
float hitT = tMax;
int id = -1;
for (int i = 0; i < sphereCount; ++i)
{
Sphere s = s_GroupSpheres[i];
float3 co = s.center - r.orig;
float nb = dot(co, r.dir);
float c = dot(co, co) - s.radius*s.radius;
float discr = nb * nb - c;
if (discr > 0)
{
float discrSq = sqrt(discr);
// Try earlier t
float t = nb - discrSq;
if (t <= tMin) // before min, try later t!
t = nb + discrSq;
if (t > tMin && t < hitT)
{
id = i;
hitT = t;
}
}
}
if (id != -1)
{
outHit.pos = RayPointAt(r, hitT);
outHit.normal = (outHit.pos - s_GroupSpheres[id].center) * s_GroupSpheres[id].invRadius;
outHit.t = hitT;
}
return id;
}
struct Params
{
Camera cam;
int sphereCount;
int screenWidth;
int screenHeight;
int frames;
float invWidth;
float invHeight;
float lerpFac;
int emissiveCount;
};
#define kMinT 0.001f
#define kMaxT 1.0e7f
#define kMaxDepth 10
static int HitWorld(int sphereCount, Ray r, float tMin, float tMax, inout Hit outHit)
{
return HitSpheres(r, sphereCount, tMin, tMax, outHit);
}
static bool Scatter(int sphereCount, int emissiveCount, int matID, Ray r_in, Hit rec, out float3 attenuation, out Ray scattered, out float3 outLightE, inout int inoutRayCount, inout uint state)
{
outLightE = float3(0, 0, 0);
Material mat = s_GroupMaterials[matID];
if (mat.type == MatLambert)
{
// random point on unit sphere that is tangent to the hit point
float3 target = rec.pos + rec.normal + RandomUnitVector(state);
scattered = MakeRay(rec.pos, normalize(target - rec.pos));
attenuation = mat.albedo;
// sample lights
#if DO_LIGHT_SAMPLING
for (int j = 0; j < emissiveCount; ++j)
{
int i = s_GroupEmissives[j];
if (matID == i)
continue; // skip self
Material smat = s_GroupMaterials[i];
Sphere s = s_GroupSpheres[i];
// create a random direction towards sphere
// coord system for sampling: sw, su, sv
float3 sw = normalize(s.center - rec.pos);
float3 su = normalize(cross(abs(sw.x)>0.01f ? float3(0, 1, 0) : float3(1, 0, 0), sw));
float3 sv = cross(sw, su);
// sample sphere by solid angle
float cosAMax = sqrt(1.0f - s.radius*s.radius / dot(rec.pos - s.center, rec.pos - s.center));
float eps1 = RandomFloat01(state), eps2 = RandomFloat01(state);
float cosA = 1.0f - eps1 + eps1 * cosAMax;
float sinA = sqrt(1.0f - cosA * cosA);
float phi = 2 * 3.1415926 * eps2;
float3 l = su * cos(phi) * sinA + sv * sin(phi) * sinA + sw * cosA;
// shoot shadow ray
Hit lightHit;
++inoutRayCount;
int hitID = HitWorld(sphereCount, MakeRay(rec.pos, l), kMinT, kMaxT, lightHit);
if (hitID == i)
{
float omega = 2 * 3.1415926 * (1 - cosAMax);
float3 rdir = r_in.dir;
float3 nl = dot(rec.normal, rdir) < 0 ? rec.normal : -rec.normal;
outLightE += (mat.albedo * smat.emissive) * (max(0.0f, dot(l, nl)) * omega / 3.1415926);
}
}
#endif
return true;
}
else if (mat.type == MatMetal)
{
float3 refl = reflect(r_in.dir, rec.normal);
// reflected ray, and random inside of sphere based on roughness
float roughness = mat.roughness;
#if DO_MITSUBA_COMPARE
roughness = 0; // until we get better BRDF for metals
#endif
scattered = MakeRay(rec.pos, normalize(refl + roughness*RandomInUnitSphere(state)));
attenuation = mat.albedo;
return dot(scattered.dir, rec.normal) > 0;
}
else if (mat.type == MatDielectric)
{
float3 outwardN;
float3 rdir = r_in.dir;
float3 refl = reflect(rdir, rec.normal);
float nint;
attenuation = float3(1, 1, 1);
float3 refr;
float reflProb;
float cosine;
if (dot(rdir, rec.normal) > 0)
{
outwardN = -rec.normal;
nint = mat.ri;
cosine = mat.ri * dot(rdir, rec.normal);
}
else
{
outwardN = rec.normal;
nint = 1.0f / mat.ri;
cosine = -dot(rdir, rec.normal);
}
if (refract(rdir, outwardN, nint, refr))
{
reflProb = schlick(cosine, mat.ri);
}
else
{
reflProb = 1;
}
if (RandomFloat01(state) < reflProb)
scattered = MakeRay(rec.pos, normalize(refl));
else
scattered = MakeRay(rec.pos, normalize(refr));
}
else
{
attenuation = float3(1, 0, 1);
scattered = MakeRay(float3(0,0,0), float3(0, 0, 1));
return false;
}
return true;
}
static float3 Trace(int sphereCount, int emissiveCount, Ray r, inout int inoutRayCount, inout uint state)
{
float3 col = 0;
float3 curAtten = 1;
bool doMaterialE = true;
// GPUs don't support recursion, so do tracing iterations in a loop up to max depth
for (int depth = 0; depth < kMaxDepth; ++depth)
{
Hit rec;
++inoutRayCount;
int id = HitWorld(sphereCount, r, kMinT, kMaxT, rec);
if (id >= 0)
{
Ray scattered;
float3 attenuation;
float3 lightE;
Material mat = s_GroupMaterials[id];
float3 matE = mat.emissive;
if (Scatter(sphereCount, emissiveCount, id, r, rec, attenuation, scattered, lightE, inoutRayCount, state))
{
#if DO_LIGHT_SAMPLING
if (!doMaterialE) matE = 0;
doMaterialE = (mat.type != MatLambert);
#endif
col += curAtten * (matE + lightE);
curAtten *= attenuation;
r = scattered;
}
else
{
col += curAtten * matE;
break;
}
}
else
{
// sky
#if DO_MITSUBA_COMPARE
col += curAtten * float3(0.15f, 0.21f, 0.3f); // easier compare with Mitsuba's constant environment light
#else
float3 unitDir = r.dir;
float t = 0.5f*(unitDir.y + 1.0f);
float3 skyCol = ((1.0f - t)*float3(1.0f, 1.0f, 1.0f) + t * float3(0.5f, 0.7f, 1.0f)) * 0.3f;
col += curAtten * skyCol;
#endif
break;
}
}
return col;
}
Texture2D srcImage : register(t0);
RWTexture2D<float4> dstImage : register(u0);
StructuredBuffer<Sphere> g_Spheres : register(t1);
StructuredBuffer<Material> g_Materials : register(t2);
StructuredBuffer<Params> g_Params : register(t3);
StructuredBuffer<int> g_Emissives : register(t4);
RWByteAddressBuffer g_OutRayCount : register(u1);
[numthreads(kCSGroupSizeX, kCSGroupSizeY, 1)]
void main(uint3 gid : SV_DispatchThreadID, uint3 tid : SV_GroupThreadID)
{
// First, move scene data (spheres, materials, emissive indices) into group shared
// memory. Do this in parallel; each thread in group copies its own chunk of data.
uint threadID = tid.y * kCSGroupSizeX + tid.x;
uint groupSize = kCSGroupSizeX * kCSGroupSizeY;
uint objCount = g_Params[0].sphereCount;
uint myObjCount = (objCount + groupSize - 1) / groupSize;
uint myObjStart = threadID * myObjCount;
for (uint io = myObjStart; io < myObjStart + myObjCount; ++io)
{
if (io < objCount)
{
s_GroupSpheres[io] = g_Spheres[io];
s_GroupMaterials[io] = g_Materials[io];
}
if (io < g_Params[0].emissiveCount)
{
s_GroupEmissives[io] = g_Emissives[io];
}
}
GroupMemoryBarrierWithGroupSync();
int rayCount = 0;
float3 col = 0;
Params params = g_Params[0];
uint rngState = (gid.x * 1973 + gid.y * 9277 + params.frames * 26699) | 1;
for (int s = 0; s < DO_SAMPLES_PER_PIXEL; s++)
{
float u = float(gid.x + RandomFloat01(rngState)) * params.invWidth;
float v = float(gid.y + RandomFloat01(rngState)) * params.invHeight;
Ray r = CameraGetRay(params.cam, u, v, rngState);
col += Trace(params.sphereCount, params.emissiveCount, r, rayCount, rngState);
}
col *= 1.0f / float(DO_SAMPLES_PER_PIXEL);
float3 prev = srcImage.Load(int3(gid.xy,0)).rgb;
col = lerp(col, prev, params.lerpFac);
dstImage[gid.xy] = float4(col, 1);
g_OutRayCount.InterlockedAdd(0, rayCount);
}

View File

@@ -0,0 +1,15 @@
float3 LinearToSRGB(float3 rgb)
{
rgb = max(rgb, float3(0, 0, 0));
return max(1.055 * pow(rgb, 0.416666667) - 0.055, 0.0);
}
Texture2D tex : register(t0);
SamplerState smp : register(s0);
float4 main(float2 uv : TEXCOORD0) : SV_Target
{
float3 col = tex.Sample(smp, uv).rgb;
col = LinearToSRGB(col);
return float4(col, 1.0f);
}

View File

@@ -0,0 +1,31 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30907.101
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestCpu", "TestCpu.vcxproj", "{4F84B756-87F5-4B92-827B-DA087DAE1900}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x64.ActiveCfg = Debug|x64
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x64.Build.0 = Debug|x64
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x86.ActiveCfg = Debug|Win32
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x86.Build.0 = Debug|Win32
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x64.ActiveCfg = Release|x64
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x64.Build.0 = Release|x64
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x86.ActiveCfg = Release|Win32
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {067FB780-37B8-465E-AD7E-E7B238B9C04F}
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,245 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{4F84B756-87F5-4B92-827B-DA087DAE1900}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>TestCpu</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<CallingConvention>VectorCall</CallingConvention>
<FloatingPointModel>Fast</FloatingPointModel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>TRACY_ENABLE;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<CallingConvention>VectorCall</CallingConvention>
<FloatingPointModel>Fast</FloatingPointModel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<ExceptionHandling>false</ExceptionHandling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<BufferSecurityCheck>false</BufferSecurityCheck>
<CallingConvention>VectorCall</CallingConvention>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>TRACY_ENABLE;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<ExceptionHandling>false</ExceptionHandling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<BufferSecurityCheck>false</BufferSecurityCheck>
<CallingConvention>VectorCall</CallingConvention>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\..\..\public\TracyClient.cpp" />
<ClCompile Include="..\Source\enkiTS\TaskScheduler.cpp" />
<ClCompile Include="..\Source\enkiTS\TaskScheduler_c.cpp" />
<ClCompile Include="..\Source\Maths.cpp" />
<ClCompile Include="..\Source\Test.cpp" />
<ClCompile Include="TestWin.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\Source\Config.h" />
<ClInclude Include="..\Source\enkiTS\Atomics.h" />
<ClInclude Include="..\Source\enkiTS\LockLessMultiReadPipe.h" />
<ClInclude Include="..\Source\enkiTS\TaskScheduler.h" />
<ClInclude Include="..\Source\enkiTS\TaskScheduler_c.h" />
<ClInclude Include="..\Source\enkiTS\Threads.h" />
<ClInclude Include="..\Source\Maths.h" />
<ClInclude Include="..\Source\MathSimd.h" />
<ClInclude Include="..\Source\Test.h" />
<ClInclude Include="..\Source\stb_image.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\.editorconfig" />
</ItemGroup>
<ItemGroup>
<FxCompile Include="ComputeShader.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_CSBytecode</VariableName>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledComputeShader.h</HeaderFileOutput>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_CSBytecode</VariableName>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledComputeShader.h</HeaderFileOutput>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_CSBytecode</VariableName>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledComputeShader.h</HeaderFileOutput>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_CSBytecode</VariableName>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledComputeShader.h</HeaderFileOutput>
</FxCompile>
<FxCompile Include="PixelShader.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Pixel</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Pixel</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Pixel</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Pixel</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledPixelShader.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledPixelShader.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledPixelShader.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledPixelShader.h</HeaderFileOutput>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_PSBytecode</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_PSBytecode</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_PSBytecode</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_PSBytecode</VariableName>
</FxCompile>
<FxCompile Include="VertexShader.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Vertex</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Vertex</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Vertex</ShaderType>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Vertex</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledVertexShader.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledVertexShader.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledVertexShader.h</HeaderFileOutput>
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledVertexShader.h</HeaderFileOutput>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_VSBytecode</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_VSBytecode</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_VSBytecode</VariableName>
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_VSBytecode</VariableName>
</FxCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@@ -0,0 +1,67 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="TestWin.cpp" />
<ClCompile Include="..\Source\Test.cpp">
<Filter>Source</Filter>
</ClCompile>
<ClCompile Include="..\Source\enkiTS\TaskScheduler.cpp">
<Filter>Source\enkiTS</Filter>
</ClCompile>
<ClCompile Include="..\Source\enkiTS\TaskScheduler_c.cpp">
<Filter>Source\enkiTS</Filter>
</ClCompile>
<ClCompile Include="..\Source\Maths.cpp">
<Filter>Source</Filter>
</ClCompile>
<ClCompile Include="..\..\..\public\TracyClient.cpp" />
</ItemGroup>
<ItemGroup>
<Filter Include="Source">
<UniqueIdentifier>{5f19f217-c1c7-4eeb-be61-8b986fee9375}</UniqueIdentifier>
</Filter>
<Filter Include="Source\enkiTS">
<UniqueIdentifier>{38c448a8-1dcc-4116-9410-a9f8d068caff}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\Source\Test.h">
<Filter>Source</Filter>
</ClInclude>
<ClInclude Include="..\Source\stb_image.h">
<Filter>Source</Filter>
</ClInclude>
<ClInclude Include="..\Source\enkiTS\Atomics.h">
<Filter>Source\enkiTS</Filter>
</ClInclude>
<ClInclude Include="..\Source\enkiTS\LockLessMultiReadPipe.h">
<Filter>Source\enkiTS</Filter>
</ClInclude>
<ClInclude Include="..\Source\enkiTS\TaskScheduler.h">
<Filter>Source\enkiTS</Filter>
</ClInclude>
<ClInclude Include="..\Source\enkiTS\TaskScheduler_c.h">
<Filter>Source\enkiTS</Filter>
</ClInclude>
<ClInclude Include="..\Source\enkiTS\Threads.h">
<Filter>Source\enkiTS</Filter>
</ClInclude>
<ClInclude Include="..\Source\Maths.h">
<Filter>Source</Filter>
</ClInclude>
<ClInclude Include="..\Source\Config.h">
<Filter>Source</Filter>
</ClInclude>
<ClInclude Include="..\Source\MathSimd.h">
<Filter>Source</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="..\.editorconfig" />
</ItemGroup>
<ItemGroup>
<FxCompile Include="VertexShader.hlsl" />
<FxCompile Include="PixelShader.hlsl" />
<FxCompile Include="ComputeShader.hlsl" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,567 @@
#include <stdint.h>
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX
#include <windows.h>
#include <d3d11_1.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <algorithm>
#include "../Source/Config.h"
#include "../Source/Maths.h"
#include "../Source/Test.h"
#include "CompiledVertexShader.h"
#include "CompiledPixelShader.h"
#include "../../../public/tracy/Tracy.hpp"
#include "../../../public/tracy/TracyD3D11.hpp"
static HINSTANCE g_HInstance;
static HWND g_Wnd;
ATOM MyRegisterClass(HINSTANCE hInstance);
BOOL InitInstance(HINSTANCE, int);
LRESULT CALLBACK WndProc(HWND, UINT, WPARAM, LPARAM);
INT_PTR CALLBACK About(HWND, UINT, WPARAM, LPARAM);
static HRESULT InitD3DDevice();
static void ShutdownD3DDevice();
static void RenderFrame();
static float* g_Backbuffer;
static D3D_FEATURE_LEVEL g_D3D11FeatureLevel = D3D_FEATURE_LEVEL_11_0;
static ID3D11Device* g_D3D11Device = nullptr;
static ID3D11DeviceContext* g_D3D11Ctx = nullptr;
static IDXGISwapChain* g_D3D11SwapChain = nullptr;
static ID3D11RenderTargetView* g_D3D11RenderTarget = nullptr;
static ID3D11VertexShader* g_VertexShader;
static ID3D11PixelShader* g_PixelShader;
static ID3D11Texture2D *g_BackbufferTexture, *g_BackbufferTexture2;
static ID3D11ShaderResourceView *g_BackbufferSRV, *g_BackbufferSRV2;
static ID3D11UnorderedAccessView *g_BackbufferUAV, *g_BackbufferUAV2;
static ID3D11SamplerState* g_SamplerLinear;
static ID3D11RasterizerState* g_RasterState;
static int g_BackbufferIndex;
static tracy::D3D11Ctx *g_tracyCtx;
#if DO_COMPUTE_GPU
#include "CompiledComputeShader.h"
struct ComputeParams
{
Camera cam;
int sphereCount;
int screenWidth;
int screenHeight;
int frames;
float invWidth;
float invHeight;
float lerpFac;
int emissiveCount;
};
static ID3D11ComputeShader* g_ComputeShader;
static ID3D11Buffer* g_DataSpheres; static ID3D11ShaderResourceView* g_SRVSpheres;
static ID3D11Buffer* g_DataMaterials; static ID3D11ShaderResourceView* g_SRVMaterials;
static ID3D11Buffer* g_DataParams; static ID3D11ShaderResourceView* g_SRVParams;
static ID3D11Buffer* g_DataEmissives; static ID3D11ShaderResourceView* g_SRVEmissives;
static ID3D11Buffer* g_DataCounter; static ID3D11UnorderedAccessView* g_UAVCounter;
static int g_SphereCount, g_ObjSize, g_MatSize;
static ID3D11Query *g_QueryBegin, *g_QueryEnd, *g_QueryDisjoint;
#endif // #if DO_COMPUTE_GPU
int APIENTRY wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE, _In_ LPWSTR, _In_ int nCmdShow)
{
g_Backbuffer = new float[kBackbufferWidth * kBackbufferHeight * 4];
memset(g_Backbuffer, 0, kBackbufferWidth * kBackbufferHeight * 4 * sizeof(g_Backbuffer[0]));
InitializeTest();
MyRegisterClass(hInstance);
if (!InitInstance (hInstance, nCmdShow))
{
return FALSE;
}
if (FAILED(InitD3DDevice()))
{
ShutdownD3DDevice();
return 0;
}
g_D3D11Device->CreateVertexShader(g_VSBytecode, ARRAYSIZE(g_VSBytecode), NULL, &g_VertexShader);
g_D3D11Device->CreatePixelShader(g_PSBytecode, ARRAYSIZE(g_PSBytecode), NULL, &g_PixelShader);
#if DO_COMPUTE_GPU
g_D3D11Device->CreateComputeShader(g_CSBytecode, ARRAYSIZE(g_CSBytecode), NULL, &g_ComputeShader);
#endif
D3D11_TEXTURE2D_DESC texDesc = {};
texDesc.Width = kBackbufferWidth;
texDesc.Height = kBackbufferHeight;
texDesc.MipLevels = 1;
texDesc.ArraySize = 1;
texDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
texDesc.SampleDesc.Count = 1;
texDesc.SampleDesc.Quality = 0;
#if DO_COMPUTE_GPU
texDesc.Usage = D3D11_USAGE_DEFAULT;
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
texDesc.CPUAccessFlags = 0;
#else
texDesc.Usage = D3D11_USAGE_DYNAMIC;
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
#endif
texDesc.MiscFlags = 0;
g_D3D11Device->CreateTexture2D(&texDesc, NULL, &g_BackbufferTexture);
g_D3D11Device->CreateTexture2D(&texDesc, NULL, &g_BackbufferTexture2);
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.Format = texDesc.Format;
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
srvDesc.Texture2D.MipLevels = 1;
srvDesc.Texture2D.MostDetailedMip = 0;
g_D3D11Device->CreateShaderResourceView(g_BackbufferTexture, &srvDesc, &g_BackbufferSRV);
g_D3D11Device->CreateShaderResourceView(g_BackbufferTexture2, &srvDesc, &g_BackbufferSRV2);
D3D11_SAMPLER_DESC smpDesc = {};
smpDesc.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT;
smpDesc.AddressU = smpDesc.AddressV = smpDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
g_D3D11Device->CreateSamplerState(&smpDesc, &g_SamplerLinear);
D3D11_RASTERIZER_DESC rasterDesc = {};
rasterDesc.FillMode = D3D11_FILL_SOLID;
rasterDesc.CullMode = D3D11_CULL_NONE;
g_D3D11Device->CreateRasterizerState(&rasterDesc, &g_RasterState);
#if DO_COMPUTE_GPU
D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
int camSize;
GetObjectCount(g_SphereCount, g_ObjSize, g_MatSize, camSize);
assert(g_ObjSize == 20);
assert(g_MatSize == 36);
assert(camSize == 88);
D3D11_BUFFER_DESC bdesc = {};
bdesc.ByteWidth = g_SphereCount * g_ObjSize;
bdesc.Usage = D3D11_USAGE_DEFAULT;
bdesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
bdesc.CPUAccessFlags = 0;
bdesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
bdesc.StructureByteStride = g_ObjSize;
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataSpheres);
srvDesc.Format = DXGI_FORMAT_UNKNOWN;
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvDesc.Buffer.FirstElement = 0;
srvDesc.Buffer.NumElements = g_SphereCount;
g_D3D11Device->CreateShaderResourceView(g_DataSpheres, &srvDesc, &g_SRVSpheres);
bdesc.ByteWidth = g_SphereCount * g_MatSize;
bdesc.StructureByteStride = g_MatSize;
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataMaterials);
srvDesc.Buffer.NumElements = g_SphereCount;
g_D3D11Device->CreateShaderResourceView(g_DataMaterials, &srvDesc, &g_SRVMaterials);
bdesc.ByteWidth = sizeof(ComputeParams);
bdesc.StructureByteStride = sizeof(ComputeParams);
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataParams);
srvDesc.Buffer.NumElements = 1;
g_D3D11Device->CreateShaderResourceView(g_DataParams, &srvDesc, &g_SRVParams);
bdesc.ByteWidth = g_SphereCount * 4;
bdesc.StructureByteStride = 4;
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataEmissives);
srvDesc.Buffer.NumElements = g_SphereCount;
g_D3D11Device->CreateShaderResourceView(g_DataEmissives, &srvDesc, &g_SRVEmissives);
bdesc.ByteWidth = 4;
bdesc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS;
bdesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
bdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataCounter);
uavDesc.Format = DXGI_FORMAT_R32_TYPELESS;
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
uavDesc.Buffer.FirstElement = 0;
uavDesc.Buffer.NumElements = 1;
uavDesc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
g_D3D11Device->CreateUnorderedAccessView(g_DataCounter, &uavDesc, &g_UAVCounter);
uavDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D;
uavDesc.Texture2D.MipSlice = 0;
g_D3D11Device->CreateUnorderedAccessView(g_BackbufferTexture, &uavDesc, &g_BackbufferUAV);
g_D3D11Device->CreateUnorderedAccessView(g_BackbufferTexture2, &uavDesc, &g_BackbufferUAV2);
D3D11_QUERY_DESC qDesc = {};
qDesc.Query = D3D11_QUERY_TIMESTAMP;
g_D3D11Device->CreateQuery(&qDesc, &g_QueryBegin);
g_D3D11Device->CreateQuery(&qDesc, &g_QueryEnd);
qDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
g_D3D11Device->CreateQuery(&qDesc, &g_QueryDisjoint);
#endif // #if DO_COMPUTE_GPU
static int framesLeft = 10;
// Main message loop
MSG msg = { 0 };
while (msg.message != WM_QUIT)
{
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE))
{
TranslateMessage(&msg);
DispatchMessage(&msg);
}
else
{
RenderFrame();
TracyD3D11Collect(g_tracyCtx);
if( --framesLeft == 0 ) break;
}
}
ShutdownTest();
ShutdownD3DDevice();
return (int) msg.wParam;
}
ATOM MyRegisterClass(HINSTANCE hInstance)
{
ZoneScoped;
WNDCLASSEXW wcex;
memset(&wcex, 0, sizeof(wcex));
wcex.cbSize = sizeof(WNDCLASSEX);
wcex.style = CS_HREDRAW | CS_VREDRAW;
wcex.lpfnWndProc = WndProc;
wcex.cbClsExtra = 0;
wcex.cbWndExtra = 0;
wcex.hInstance = hInstance;
wcex.hCursor = LoadCursor(nullptr, IDC_ARROW);
wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1);
wcex.lpszClassName = L"TestClass";
return RegisterClassExW(&wcex);
}
BOOL InitInstance(HINSTANCE hInstance, int nCmdShow)
{
ZoneScoped;
g_HInstance = hInstance;
RECT rc = { 0, 0, kBackbufferWidth, kBackbufferHeight };
DWORD style = WS_OVERLAPPED | WS_CAPTION | WS_SYSMENU | WS_MINIMIZEBOX;
AdjustWindowRect(&rc, style, FALSE);
HWND hWnd = CreateWindowW(L"TestClass", L"Test", style, CW_USEDEFAULT, CW_USEDEFAULT, rc.right-rc.left, rc.bottom-rc.top, nullptr, nullptr, hInstance, nullptr);
if (!hWnd)
return FALSE;
g_Wnd = hWnd;
ShowWindow(hWnd, nCmdShow);
UpdateWindow(hWnd);
return TRUE;
}
static uint64_t s_Time;
static int s_Count;
static char s_Buffer[200];
static unsigned s_Flags = kFlagProgressive;
static int s_FrameCount = 0;
static void RenderFrame()
{
ZoneScoped;
TracyD3D11Zone(g_tracyCtx, "RenderFrame");
LARGE_INTEGER time1;
#if DO_COMPUTE_GPU
QueryPerformanceCounter(&time1);
float t = float(clock()) / CLOCKS_PER_SEC;
UpdateTest(t, s_FrameCount, kBackbufferWidth, kBackbufferHeight, s_Flags);
g_BackbufferIndex = 1 - g_BackbufferIndex;
void* dataSpheres = alloca(g_SphereCount * g_ObjSize);
void* dataMaterials = alloca(g_SphereCount * g_MatSize);
void* dataEmissives = alloca(g_SphereCount * 4);
ComputeParams dataParams;
GetSceneDesc(dataSpheres, dataMaterials, &dataParams.cam, dataEmissives, &dataParams.emissiveCount);
dataParams.sphereCount = g_SphereCount;
dataParams.screenWidth = kBackbufferWidth;
dataParams.screenHeight = kBackbufferHeight;
dataParams.frames = s_FrameCount;
dataParams.invWidth = 1.0f / kBackbufferWidth;
dataParams.invHeight = 1.0f / kBackbufferHeight;
float lerpFac = float(s_FrameCount) / float(s_FrameCount + 1);
if (s_Flags & kFlagAnimate)
lerpFac *= DO_ANIMATE_SMOOTHING;
if (!(s_Flags & kFlagProgressive))
lerpFac = 0;
dataParams.lerpFac = lerpFac;
g_D3D11Ctx->UpdateSubresource(g_DataSpheres, 0, NULL, dataSpheres, 0, 0);
g_D3D11Ctx->UpdateSubresource(g_DataMaterials, 0, NULL, dataMaterials, 0, 0);
g_D3D11Ctx->UpdateSubresource(g_DataParams, 0, NULL, &dataParams, 0, 0);
g_D3D11Ctx->UpdateSubresource(g_DataEmissives, 0, NULL, dataEmissives, 0, 0);
ID3D11ShaderResourceView* srvs[] = {
g_BackbufferIndex == 0 ? g_BackbufferSRV2 : g_BackbufferSRV,
g_SRVSpheres,
g_SRVMaterials,
g_SRVParams,
g_SRVEmissives
};
g_D3D11Ctx->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs);
ID3D11UnorderedAccessView* uavs[] = {
g_BackbufferIndex == 0 ? g_BackbufferUAV : g_BackbufferUAV2,
g_UAVCounter
};
g_D3D11Ctx->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, NULL);
g_D3D11Ctx->CSSetShader(g_ComputeShader, NULL, 0);
g_D3D11Ctx->Begin(g_QueryDisjoint);
g_D3D11Ctx->End(g_QueryBegin);
g_D3D11Ctx->Dispatch(kBackbufferWidth/kCSGroupSizeX, kBackbufferHeight/kCSGroupSizeY, 1);
g_D3D11Ctx->End(g_QueryEnd);
uavs[0] = NULL;
g_D3D11Ctx->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, NULL);
++s_FrameCount;
#else
QueryPerformanceCounter(&time1);
float t = float(clock()) / CLOCKS_PER_SEC;
static size_t s_RayCounter = 0;
int rayCount;
UpdateTest(t, s_FrameCount, kBackbufferWidth, kBackbufferHeight, s_Flags);
DrawTest(t, s_FrameCount, kBackbufferWidth, kBackbufferHeight, g_Backbuffer, rayCount, s_Flags);
s_FrameCount++;
s_RayCounter += rayCount;
LARGE_INTEGER time2;
QueryPerformanceCounter(&time2);
uint64_t dt = time2.QuadPart - time1.QuadPart;
++s_Count;
s_Time += dt;
if (s_Count > 10)
{
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
double s = double(s_Time) / double(frequency.QuadPart) / s_Count;
sprintf_s(s_Buffer, sizeof(s_Buffer), "%.2fms (%.1f FPS) %.1fMrays/s %.2fMrays/frame frames %i\n", s * 1000.0f, 1.f / s, s_RayCounter / s_Count / s * 1.0e-6f, s_RayCounter / s_Count * 1.0e-6f, s_FrameCount);
SetWindowTextA(g_Wnd, s_Buffer);
OutputDebugStringA(s_Buffer);
s_Count = 0;
s_Time = 0;
s_RayCounter = 0;
}
D3D11_MAPPED_SUBRESOURCE mapped;
g_D3D11Ctx->Map(g_BackbufferTexture, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped);
const uint8_t* src = (const uint8_t*)g_Backbuffer;
uint8_t* dst = (uint8_t*)mapped.pData;
for (int y = 0; y < kBackbufferHeight; ++y)
{
memcpy(dst, src, kBackbufferWidth * 16);
src += kBackbufferWidth * 16;
dst += mapped.RowPitch;
}
g_D3D11Ctx->Unmap(g_BackbufferTexture, 0);
#endif
g_D3D11Ctx->VSSetShader(g_VertexShader, NULL, 0);
g_D3D11Ctx->PSSetShader(g_PixelShader, NULL, 0);
g_D3D11Ctx->PSSetShaderResources(0, 1, g_BackbufferIndex == 0 ? &g_BackbufferSRV : &g_BackbufferSRV2);
g_D3D11Ctx->PSSetSamplers(0, 1, &g_SamplerLinear);
g_D3D11Ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
g_D3D11Ctx->RSSetState(g_RasterState);
g_D3D11Ctx->Draw(3, 0);
g_D3D11SwapChain->Present(0, 0);
FrameMark;
#if DO_COMPUTE_GPU
g_D3D11Ctx->End(g_QueryDisjoint);
// get GPU times
while (g_D3D11Ctx->GetData(g_QueryDisjoint, NULL, 0, 0) == S_FALSE) { Sleep(0); }
D3D10_QUERY_DATA_TIMESTAMP_DISJOINT tsDisjoint;
g_D3D11Ctx->GetData(g_QueryDisjoint, &tsDisjoint, sizeof(tsDisjoint), 0);
if (!tsDisjoint.Disjoint)
{
UINT64 tsBegin, tsEnd;
// Note: on some GPUs/drivers, even when the disjoint query above already said "yeah I have data",
// might still not return "I have data" for timestamp queries before it.
while (g_D3D11Ctx->GetData(g_QueryBegin, &tsBegin, sizeof(tsBegin), 0) == S_FALSE) { Sleep(0); }
while (g_D3D11Ctx->GetData(g_QueryEnd, &tsEnd, sizeof(tsEnd), 0) == S_FALSE) { Sleep(0); }
float s = float(tsEnd - tsBegin) / float(tsDisjoint.Frequency);
static uint64_t s_RayCounter;
D3D11_MAPPED_SUBRESOURCE mapped;
g_D3D11Ctx->Map(g_DataCounter, 0, D3D11_MAP_READ, 0, &mapped);
s_RayCounter += *(const int*)mapped.pData;
g_D3D11Ctx->Unmap(g_DataCounter, 0);
int zeroCount = 0;
g_D3D11Ctx->UpdateSubresource(g_DataCounter, 0, NULL, &zeroCount, 0, 0);
static float s_Time;
++s_Count;
s_Time += s;
if (s_Count > 150)
{
s = s_Time / s_Count;
sprintf_s(s_Buffer, sizeof(s_Buffer), "%.2fms (%.1f FPS) %.1fMrays/s %.2fMrays/frame frames %i\n", s * 1000.0f, 1.f / s, s_RayCounter / s_Count / s * 1.0e-6f, s_RayCounter / s_Count * 1.0e-6f, s_FrameCount);
SetWindowTextA(g_Wnd, s_Buffer);
s_Count = 0;
s_Time = 0;
s_RayCounter = 0;
}
}
#endif // #if DO_COMPUTE_GPU
}
LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
switch (message)
{
case WM_PAINT:
{
PAINTSTRUCT ps;
HDC hdc = BeginPaint(hWnd, &ps);
EndPaint(hWnd, &ps);
}
break;
case WM_DESTROY:
PostQuitMessage(0);
break;
case WM_CHAR:
if (wParam == 'a')
s_Flags = s_Flags ^ kFlagAnimate;
if (wParam == 'p')
{
s_Flags = s_Flags ^ kFlagProgressive;
s_FrameCount = 0;
}
break;
default:
return DefWindowProc(hWnd, message, wParam, lParam);
}
return 0;
}
static HRESULT InitD3DDevice()
{
ZoneScoped;
HRESULT hr = S_OK;
RECT rc;
GetClientRect(g_Wnd, &rc);
UINT width = rc.right - rc.left;
UINT height = rc.bottom - rc.top;
UINT createDeviceFlags = 0;
#ifdef _DEBUG
createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
#endif
D3D_FEATURE_LEVEL featureLevels[] =
{
D3D_FEATURE_LEVEL_11_0,
};
UINT numFeatureLevels = ARRAYSIZE(featureLevels);
hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, createDeviceFlags, featureLevels, numFeatureLevels, D3D11_SDK_VERSION, &g_D3D11Device, &g_D3D11FeatureLevel, &g_D3D11Ctx);
if (FAILED(hr))
return hr;
// Get DXGI factory
IDXGIFactory1* dxgiFactory = nullptr;
{
IDXGIDevice* dxgiDevice = nullptr;
hr = g_D3D11Device->QueryInterface(__uuidof(IDXGIDevice), reinterpret_cast<void**>(&dxgiDevice));
if (SUCCEEDED(hr))
{
IDXGIAdapter* adapter = nullptr;
hr = dxgiDevice->GetAdapter(&adapter);
if (SUCCEEDED(hr))
{
hr = adapter->GetParent(__uuidof(IDXGIFactory1), reinterpret_cast<void**>(&dxgiFactory));
adapter->Release();
}
dxgiDevice->Release();
}
}
if (FAILED(hr))
return hr;
// Create swap chain
DXGI_SWAP_CHAIN_DESC sd;
ZeroMemory(&sd, sizeof(sd));
sd.BufferCount = 1;
sd.BufferDesc.Width = width;
sd.BufferDesc.Height = height;
sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
sd.BufferDesc.RefreshRate.Numerator = 60;
sd.BufferDesc.RefreshRate.Denominator = 1;
sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
sd.OutputWindow = g_Wnd;
sd.SampleDesc.Count = 1;
sd.SampleDesc.Quality = 0;
sd.Windowed = TRUE;
hr = dxgiFactory->CreateSwapChain(g_D3D11Device, &sd, &g_D3D11SwapChain);
// Prevent Alt-Enter
dxgiFactory->MakeWindowAssociation(g_Wnd, DXGI_MWA_NO_ALT_ENTER);
dxgiFactory->Release();
if (FAILED(hr))
return hr;
// RTV
ID3D11Texture2D* pBackBuffer = nullptr;
hr = g_D3D11SwapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), reinterpret_cast<void**>(&pBackBuffer));
if (FAILED(hr))
return hr;
hr = g_D3D11Device->CreateRenderTargetView(pBackBuffer, nullptr, &g_D3D11RenderTarget);
pBackBuffer->Release();
if (FAILED(hr))
return hr;
g_D3D11Ctx->OMSetRenderTargets(1, &g_D3D11RenderTarget, nullptr);
// Viewport
D3D11_VIEWPORT vp;
vp.Width = (float)width;
vp.Height = (float)height;
vp.MinDepth = 0.0f;
vp.MaxDepth = 1.0f;
vp.TopLeftX = 0;
vp.TopLeftY = 0;
g_D3D11Ctx->RSSetViewports(1, &vp);
g_tracyCtx = TracyD3D11Context(g_D3D11Device, g_D3D11Ctx);
const char* tracyD3D11CtxName = "D3D11";
TracyD3D11ContextName(g_tracyCtx, tracyD3D11CtxName, (uint16_t)strlen(tracyD3D11CtxName));
return S_OK;
}
static void ShutdownD3DDevice()
{
ZoneScoped;
if (g_tracyCtx) TracyD3D11Destroy(g_tracyCtx);
if (g_D3D11Ctx) g_D3D11Ctx->ClearState();
if (g_D3D11RenderTarget) g_D3D11RenderTarget->Release();
if (g_D3D11SwapChain) g_D3D11SwapChain->Release();
if (g_D3D11Ctx) g_D3D11Ctx->Release();
if (g_D3D11Device) g_D3D11Device->Release();
}

View File

@@ -0,0 +1,13 @@
struct vs2ps
{
float2 uv : TEXCOORD0;
float4 pos : SV_Position;
};
vs2ps main(uint vid : SV_VertexID)
{
vs2ps o;
o.uv = float2((vid << 1) & 2, vid & 2);
o.pos = float4(o.uv * float2(2, 2) + float2(-1, -1), 0, 1);
return o;
}