summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Foley <tfoleyNV@users.noreply.github.com>2021-03-12 11:58:14 -0800
committerGitHub <noreply@github.com>2021-03-12 11:58:14 -0800
commitd6a37a0f151e390808f196998c48a341bc4c7b60 (patch)
treec1c6e3af434cb3627af67ecc8706124e4b8c7fb1
parent9ffe2f3ef245034a2dae42017a9059dfe4d02647 (diff)
Add a CPU renderer implementation (#1750)
* Add a CPU renderer implementation This change adds a CPU back-end to `gfx` and ensures that most of our existing CPU tests pass when using it. Detailed notes: * Most of the CPU renderer implementation is copy-pasted from the CUDA case, so they share a lot of similar logic * The main addition to the CPU renderer is a semi-complete implementation of host-memory textures. The logic here handles all the main shapes (Buffer, 1D, 2D, 3D, Cube) and all the currently-supported `Format`s that are sample-able as-is (no D24S8). The implementation is not intended to be fast, and it currently only does nearest-neighbor sampling, but otherwise it tries to avoid cutting too many corners and should be ar reasonable starting point for a more complete (but not performance-oriented) implementation. * Refactored the CPU prelude `IRWTexture` interface to inherit from `ITexture`, since in most cases a single type will end up implementing both. It might be worth it to collapse it all down to a single interface later. * Changed the CPU prelude `ITexture`/`IRWTexture` interface so that it takes both a pointer *and* a size for output arguments. This change seems necessary to allow a shader variable declared as a `Texture2D<float>` to fetch a single `float` when the underlying texture might be using RGBA32F. * Added to the `IComponentType` public API so that we can query a "host callable" for an entry point and not just a binary. * Turned off the `-shaderobj` flag on two tests that weren't yet compatible with shader objects but still had the flag left in on the path (since previously the CPU path always used the non-`gfx` non-shader-object logic anyway) * Disabled one test (`dynamic-dispatch-11`) that relied on the `ConstantBuffer<IInterface>` idiom that we know we are planning to chagne soon anyway. * Made a few changes to the CUDA path to bring it into line with what I added for the CPU path. These were mostly bug fixes around indexing logic for sub-objects and resources. * fixup
-rw-r--r--build/visual-studio/gfx/gfx.vcxproj2
-rw-r--r--build/visual-studio/gfx/gfx.vcxproj.filters6
-rw-r--r--prelude/slang-cpp-types.h60
-rw-r--r--premake5.lua1
-rw-r--r--slang.h17
-rwxr-xr-xsource/slang/slang-compiler.h23
-rw-r--r--source/slang/slang.cpp32
-rw-r--r--tests/compute/dynamic-dispatch-11.slang6
-rw-r--r--tests/compute/performance-profile.slang2
-rw-r--r--tests/compute/unbounded-array-of-array-syntax.slang2
-rw-r--r--tools/gfx/cpu/render-cpu.cpp1796
-rw-r--r--tools/gfx/cpu/render-cpu.h11
-rw-r--r--tools/gfx/cuda/render-cuda.cpp75
-rw-r--r--tools/gfx/render-graphics-common.cpp31
-rw-r--r--tools/gfx/render.cpp7
-rw-r--r--tools/gfx/renderer-shared.h24
-rw-r--r--tools/render-test/cpu-compute-util.cpp14
-rw-r--r--tools/render-test/render-test-main.cpp2
-rw-r--r--tools/render-test/shader-renderer-util.h4
19 files changed, 2011 insertions, 104 deletions
diff --git a/build/visual-studio/gfx/gfx.vcxproj b/build/visual-studio/gfx/gfx.vcxproj
index 5f05d7586..08786b5cf 100644
--- a/build/visual-studio/gfx/gfx.vcxproj
+++ b/build/visual-studio/gfx/gfx.vcxproj
@@ -181,6 +181,7 @@
<ItemGroup>
<ClInclude Include="..\..\..\slang-gfx.h" />
<ClInclude Include="..\..\..\tools\gfx\command-writer.h" />
+ <ClInclude Include="..\..\..\tools\gfx\cpu\render-cpu.h" />
<ClInclude Include="..\..\..\tools\gfx\cuda\render-cuda.h" />
<ClInclude Include="..\..\..\tools\gfx\d3d\d3d-swapchain.h" />
<ClInclude Include="..\..\..\tools\gfx\d3d\d3d-util.h" />
@@ -206,6 +207,7 @@
<ClInclude Include="..\..\..\tools\gfx\vulkan\vk-util.h" />
</ItemGroup>
<ItemGroup>
+ <ClCompile Include="..\..\..\tools\gfx\cpu\render-cpu.cpp" />
<ClCompile Include="..\..\..\tools\gfx\cuda\render-cuda.cpp" />
<ClCompile Include="..\..\..\tools\gfx\d3d\d3d-swapchain.cpp" />
<ClCompile Include="..\..\..\tools\gfx\d3d\d3d-util.cpp" />
diff --git a/build/visual-studio/gfx/gfx.vcxproj.filters b/build/visual-studio/gfx/gfx.vcxproj.filters
index c7836d62f..cff8cc95a 100644
--- a/build/visual-studio/gfx/gfx.vcxproj.filters
+++ b/build/visual-studio/gfx/gfx.vcxproj.filters
@@ -15,6 +15,9 @@
<ClInclude Include="..\..\..\tools\gfx\command-writer.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="..\..\..\tools\gfx\cpu\render-cpu.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="..\..\..\tools\gfx\cuda\render-cuda.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -86,6 +89,9 @@
</ClInclude>
</ItemGroup>
<ItemGroup>
+ <ClCompile Include="..\..\..\tools\gfx\cpu\render-cpu.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\..\..\tools\gfx\cuda\render-cuda.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/prelude/slang-cpp-types.h b/prelude/slang-cpp-types.h
index 9c8fb3dec..99e8f5097 100644
--- a/prelude/slang-cpp-types.h
+++ b/prelude/slang-cpp-types.h
@@ -446,9 +446,9 @@ struct TextureDimensions
struct ITexture
{
virtual TextureDimensions GetDimensions(int mipLevel = -1) = 0;
- virtual void Load(const int* v, void* out) = 0;
- virtual void Sample(SamplerState samplerState, const float* loc, void* out) = 0;
- virtual void SampleLevel(SamplerState samplerState, const float* loc, float level, void* out) = 0;
+ virtual void Load(const int32_t* v, void* outData, size_t dataSize) = 0;
+ virtual void Sample(SamplerState samplerState, const float* loc, void* outData, size_t dataSize) = 0;
+ virtual void SampleLevel(SamplerState samplerState, const float* loc, float level, void* outData, size_t dataSize) = 0;
};
template <typename T>
@@ -470,9 +470,9 @@ struct Texture1D
*outNumberOfLevels = dims.numberOfLevels;
}
- T Load(const int2& loc) const { T out; texture->Load(&loc.x, &out); return out; }
- T Sample(SamplerState samplerState, float loc) const { T out; texture->Sample(samplerState, &loc, &out); return out; }
- T SampleLevel(SamplerState samplerState, float loc, float level) { T out; texture->SampleLevel(samplerState, &loc, level, &out); return out; }
+ T Load(const int2& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
+ T Sample(SamplerState samplerState, float loc) const { T out; texture->Sample(samplerState, &loc, &out, sizeof(out)); return out; }
+ T SampleLevel(SamplerState samplerState, float loc, float level) { T out; texture->SampleLevel(samplerState, &loc, level, &out, sizeof(out)); return out; }
ITexture* texture;
};
@@ -507,9 +507,9 @@ struct Texture2D
*outNumberOfLevels = dims.numberOfLevels;
}
- T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out); return out; }
- T Sample(SamplerState samplerState, const float2& loc) const { T out; texture->Sample(samplerState, &loc.x, &out); return out; }
- T SampleLevel(SamplerState samplerState, const float2& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out); return out; }
+ T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
+ T Sample(SamplerState samplerState, const float2& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
+ T SampleLevel(SamplerState samplerState, const float2& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
ITexture* texture;
};
@@ -548,9 +548,9 @@ struct Texture3D
*outNumberOfLevels = dims.numberOfLevels;
}
- T Load(const int4& loc) const { T out; texture->Load(&loc.x, &out); return out; }
- T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out); return out; }
- T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out); return out; }
+ T Load(const int4& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
+ T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
+ T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
ITexture* texture;
};
@@ -585,8 +585,8 @@ struct TextureCube
*outNumberOfLevels = dims.numberOfLevels;
}
- T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out); return out; }
- T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out); return out; }
+ T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
+ T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
ITexture* texture;
};
@@ -611,9 +611,9 @@ struct Texture1DArray
*outElements = dims.arrayElementCount;
}
- T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out); return out; }
- T Sample(SamplerState samplerState, const float2& loc) const { T out; texture->Sample(samplerState, &loc.x, &out); return out; }
- T SampleLevel(SamplerState samplerState, const float2& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out); return out; }
+ T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
+ T Sample(SamplerState samplerState, const float2& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
+ T SampleLevel(SamplerState samplerState, const float2& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
ITexture* texture;
};
@@ -653,9 +653,9 @@ struct Texture2DArray
*outNumberOfLevels = dims.numberOfLevels;
}
- T Load(const int4& loc) const { T out; texture->Load(&loc.x, &out); return out; }
- T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out); return out; }
- T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out); return out; }
+ T Load(const int4& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
+ T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
+ T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
ITexture* texture;
};
@@ -695,20 +695,16 @@ struct TextureCubeArray
*outNumberOfLevels = dims.numberOfLevels;
}
- T Sample(SamplerState samplerState, const float4& loc) const { T out; texture->Sample(samplerState, &loc.x, &out); return out; }
- T SampleLevel(SamplerState samplerState, const float4& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out); return out; }
+ T Sample(SamplerState samplerState, const float4& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
+ T SampleLevel(SamplerState samplerState, const float4& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
ITexture* texture;
};
/* !!!!!!!!!!!!!!!!!!!!!!!!!!! RWTexture !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
-struct IRWTexture
+struct IRWTexture : ITexture
{
- virtual TextureDimensions GetDimensions(int mipLevel = -1) = 0;
-
- /// Load at specified location.
- virtual void Load(const int32_t* loc, void* out) = 0;
/// Get the reference to the element at loc.
virtual void* refAt(const uint32_t* loc) = 0;
};
@@ -722,7 +718,7 @@ struct RWTexture1D
void GetDimensions(float* outWidth) { *outWidth = texture->GetDimensions().width; }
void GetDimensions(uint32_t mipLevel, float* outWidth, float* outNumberOfLevels) { auto dims = texture->GetDimensions(mipLevel); *outWidth = dims.width; *outNumberOfLevels = dims.numberOfLevels; }
- T Load(int32_t loc) const { T out; texture->Load(&loc, &out); return out; }
+ T Load(int32_t loc) const { T out; texture->Load(&loc, &out, sizeof(out)); return out; }
T& operator[](uint32_t loc) { return *(T*)texture->refAt(&loc); }
IRWTexture* texture;
};
@@ -757,7 +753,7 @@ struct RWTexture2D
*outNumberOfLevels = dims.numberOfLevels;
}
- T Load(const int2& loc) const { T out; texture->Load(&loc.x, &out); return out; }
+ T Load(const int2& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
T& operator[](const uint2& loc) { return *(T*)texture->refAt(&loc.x); }
IRWTexture* texture;
};
@@ -796,7 +792,7 @@ struct RWTexture3D
*outNumberOfLevels = dims.numberOfLevels;
}
- T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out); return out; }
+ T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
T& operator[](const uint3& loc) { return *(T*)texture->refAt(&loc.x); }
IRWTexture* texture;
};
@@ -832,7 +828,7 @@ struct RWTexture1DArray
*outNumberOfLevels = dims.numberOfLevels;
}
- T Load(int2 loc) const { T out; texture->Load(&loc.x, &out); return out; }
+ T Load(int2 loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
T& operator[](uint2 loc) { return *(T*)texture->refAt(&loc.x); }
IRWTexture* texture;
@@ -872,7 +868,7 @@ struct RWTexture2DArray
*outNumberOfLevels = dims.numberOfLevels;
}
- T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out); return out; }
+ T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
T& operator[](const uint3& loc) { return *(T*)texture->refAt(&loc.x); }
IRWTexture* texture;
diff --git a/premake5.lua b/premake5.lua
index a5a94971d..13debb57b 100644
--- a/premake5.lua
+++ b/premake5.lua
@@ -767,6 +767,7 @@ tool "gfx"
files {"slang-gfx.h"}
-- Will compile across targets
+ addSourceDir "tools/gfx/cpu"
addSourceDir "tools/gfx/nvapi"
-- To special case that we may be building using cygwin on windows. If 'true windows' we build for dx12/vk and run the script
diff --git a/slang.h b/slang.h
index f0abc135a..b354e6b12 100644
--- a/slang.h
+++ b/slang.h
@@ -4055,6 +4055,23 @@ namespace slang
virtual SLANG_NO_THROW SlangResult SLANG_MCALL link(
IComponentType** outLinkedComponentType,
ISlangBlob** outDiagnostics = nullptr) = 0;
+
+ /** Get entry point 'callable' functions accessible through the ISlangSharedLibrary interface.
+
+ The functions remain in scope as long as the ISlangSharedLibrary interface is in scope.
+
+ NOTE! Requires a compilation target of SLANG_HOST_CALLABLE.
+
+ @param entryPointIndex The index of the entry point to get code for.
+ @param targetIndex The index of the target to get code for (default: zero).
+ @param outSharedLibrary A pointer to a ISharedLibrary interface which functions can be queried on.
+ @returns A `SlangResult` to indicate success or failure.
+ */
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointHostCallable(
+ int entryPointIndex,
+ int targetIndex,
+ ISlangSharedLibrary** outSharedLibrary,
+ slang::IBlob** outDiagnostics = 0) = 0;
};
#define SLANG_UUID_IComponentType { 0x5bc42be8, 0x5c50, 0x4929, { 0x9e, 0x5e, 0xd1, 0x5e, 0x7c, 0x24, 0x1, 0x5f } };
diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h
index 0eb6f992d..01bdd8502 100755
--- a/source/slang/slang-compiler.h
+++ b/source/slang/slang-compiler.h
@@ -294,6 +294,11 @@ namespace Slang
SLANG_NO_THROW SlangResult SLANG_MCALL link(
slang::IComponentType** outLinkedComponentType,
ISlangBlob** outDiagnostics) SLANG_OVERRIDE;
+ SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointHostCallable(
+ int entryPointIndex,
+ int targetIndex,
+ ISlangSharedLibrary** outSharedLibrary,
+ slang::IBlob** outDiagnostics) SLANG_OVERRIDE;
/// Get the linkage (aka "session" in the public API) for this component type.
Linkage* getLinkage() { return m_linkage; }
@@ -705,6 +710,15 @@ namespace Slang
outDiagnostics);
}
+ SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointHostCallable(
+ int entryPointIndex,
+ int targetIndex,
+ ISlangSharedLibrary** outSharedLibrary,
+ slang::IBlob** outDiagnostics) SLANG_OVERRIDE
+ {
+ return Super::getEntryPointHostCallable(entryPointIndex, targetIndex, outSharedLibrary, outDiagnostics);
+ }
+
/// Create an entry point that refers to the given function.
static RefPtr<EntryPoint> create(
Linkage* linkage,
@@ -912,6 +926,15 @@ namespace Slang
outDiagnostics);
}
+ SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointHostCallable(
+ int entryPointIndex,
+ int targetIndex,
+ ISlangSharedLibrary** outSharedLibrary,
+ slang::IBlob** outDiagnostics) SLANG_OVERRIDE
+ {
+ return Super::getEntryPointHostCallable(entryPointIndex, targetIndex, outSharedLibrary, outDiagnostics);
+ }
+
SLANG_NO_THROW SlangResult SLANG_MCALL findEntryPointByName(
char const* name,
slang::IEntryPoint** outEntryPoint) SLANG_OVERRIDE
diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp
index 2ad5d25f8..ae3a1f419 100644
--- a/source/slang/slang.cpp
+++ b/source/slang/slang.cpp
@@ -2733,6 +2733,33 @@ SLANG_NO_THROW SlangResult SLANG_MCALL ComponentType::getEntryPointCode(
return SLANG_OK;
}
+SLANG_NO_THROW SlangResult SLANG_MCALL ComponentType::getEntryPointHostCallable(
+ int entryPointIndex,
+ int targetIndex,
+ ISlangSharedLibrary** outSharedLibrary,
+ slang::IBlob** outDiagnostics)
+{
+ auto linkage = getLinkage();
+ if(targetIndex < 0 || targetIndex >= linkage->targets.getCount())
+ return SLANG_E_INVALID_ARG;
+ auto target = linkage->targets[targetIndex];
+
+ auto targetProgram = getTargetProgram(target);
+
+ DiagnosticSink sink(linkage->getSourceManager(), Lexer::sourceLocationLexer);
+ auto& entryPointResult = targetProgram->getOrCreateEntryPointResult(entryPointIndex, &sink);
+ sink.getBlobIfNeeded(outDiagnostics);
+
+ if(entryPointResult.format == ResultFormat::None )
+ return SLANG_FAIL;
+
+ ComPtr<ISlangSharedLibrary> sharedLibrary;
+ SLANG_RETURN_ON_FAIL(entryPointResult.getSharedLibrary(sharedLibrary));
+
+ *outSharedLibrary = sharedLibrary.detach();
+ return SLANG_OK;
+}
+
RefPtr<ComponentType> ComponentType::specialize(
SpecializationArg const* inSpecializationArgs,
SlangInt specializationArgCount,
@@ -4385,7 +4412,10 @@ SlangReflection* EndToEndCompileRequest::getReflection()
auto targetReq = linkage->targets[targetIndex];
auto targetProgram = program->getTargetProgram(targetReq);
- auto programLayout = targetProgram->getExistingLayout();
+
+
+ DiagnosticSink sink(linkage->getSourceManager(), Lexer::sourceLocationLexer);
+ auto programLayout = targetProgram->getOrCreateLayout(&sink);
return (SlangReflection*)programLayout;
}
diff --git a/tests/compute/dynamic-dispatch-11.slang b/tests/compute/dynamic-dispatch-11.slang
index 964431aaf..d6f64aa99 100644
--- a/tests/compute/dynamic-dispatch-11.slang
+++ b/tests/compute/dynamic-dispatch-11.slang
@@ -1,8 +1,12 @@
// Test using interface typed shader parameters with dynamic dispatch.
+// TODO: This test has been disabled because it relies on
+// `ConstantBuffer<IInterface>` which we expect to change
+// implementation approaches for soon.
+
//DISABLE_TEST(compute):COMPARE_COMPUTE:-dx11 -shaderobj
//DISABLE_TEST(compute):COMPARE_COMPUTE:-vk -shaderobj
-//TEST(compute):COMPARE_COMPUTE:-cpu -xslang -disable-specialization -shaderobj
+//DISABLE_TEST(compute):COMPARE_COMPUTE:-cpu -xslang -disable-specialization -shaderobj
//DISABLE_TEST(compute):COMPARE_COMPUTE:-cuda -xslang -disable-specialization -shaderobj
[anyValueSize(8)]
diff --git a/tests/compute/performance-profile.slang b/tests/compute/performance-profile.slang
index d8b9e31ae..24b0d04bd 100644
--- a/tests/compute/performance-profile.slang
+++ b/tests/compute/performance-profile.slang
@@ -1,5 +1,5 @@
//TEST(compute):PERFORMANCE_PROFILE:-cpu -compute -compile-arg -O3 -compute-dispatch 256,1,1 -shaderobj
-//TEST(compute):PERFORMANCE_PROFILE:-cpu -compute -source-language cpp -compile-arg -O3 -compute-dispatch 256,1,1 -shaderobj
+//TEST(compute):PERFORMANCE_PROFILE:-cpu -compute -source-language cpp -compile-arg -O3 -compute-dispatch 256,1,1
//TEST(compute):PERFORMANCE_PROFILE:-slang -compute -compute-dispatch 256,1,1 -shaderobj
//TEST(compute):PERFORMANCE_PROFILE:-slang -compute -dx12 -compute-dispatch 256,1,1 -shaderobj
//TEST(compute, vulkan):PERFORMANCE_PROFILE:-vk -compute -compute-dispatch 256,1,1 -shaderobj
diff --git a/tests/compute/unbounded-array-of-array-syntax.slang b/tests/compute/unbounded-array-of-array-syntax.slang
index 6a5f4ea6e..08ed17106 100644
--- a/tests/compute/unbounded-array-of-array-syntax.slang
+++ b/tests/compute/unbounded-array-of-array-syntax.slang
@@ -1,5 +1,5 @@
//IGNORE_TEST:CPU_REFLECTION: -profile cs_5_0 -entry computeMain -target cpp
-//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute
//TEST:CROSS_COMPILE:-target dxbc-assembly -entry computeMain -profile cs_5_1
//TEST:CROSS_COMPILE:-target spirv-assembly -entry computeMain -profile cs_5_1
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute
diff --git a/tools/gfx/cpu/render-cpu.cpp b/tools/gfx/cpu/render-cpu.cpp
new file mode 100644
index 000000000..faa6b3092
--- /dev/null
+++ b/tools/gfx/cpu/render-cpu.cpp
@@ -0,0 +1,1796 @@
+// render-cpu.cpp
+#include "render-cpu.h"
+
+#include "slang.h"
+#include "slang-com-ptr.h"
+#include "slang-com-helper.h"
+#include "core/slang-basic.h"
+#include "core/slang-blob.h"
+
+#include "../command-writer.h"
+#include "../renderer-shared.h"
+#include "../slang-context.h"
+
+#define SLANG_PRELUDE_NAMESPACE slang_prelude
+#include "prelude/slang-cpp-types.h"
+
+namespace gfx
+{
+using namespace Slang;
+
+class CPUBufferResource : public BufferResource
+{
+public:
+ CPUBufferResource(const Desc& _desc)
+ : BufferResource(_desc)
+ {}
+
+ ~CPUBufferResource()
+ {
+ if (m_data)
+ {
+ free(m_data);
+ }
+ }
+
+ SlangResult init()
+ {
+ m_data = malloc(m_desc.sizeInBytes);
+ if(!m_data) return SLANG_E_OUT_OF_MEMORY;
+ return SLANG_OK;
+ }
+
+ SlangResult setData(size_t offset, size_t size, void const* data)
+ {
+ memcpy((char*)m_data + offset, data, size);
+ return SLANG_OK;
+ }
+
+ void* m_data = nullptr;
+};
+
+struct CPUTextureBaseShapeInfo
+{
+ int32_t rank;
+ int32_t baseCoordCount;
+ int32_t implicitArrayElementCount;
+};
+
+static const CPUTextureBaseShapeInfo kCPUTextureBaseShapeInfos[(int)ITextureResource::Type::CountOf] =
+{
+ /* Unknown */ { 0, 0, 0 },
+ /* Buffer */ { 1, 1, 1 },
+ /* Texture1D */ { 1, 1, 1 },
+ /* Texture2D */ { 2, 2, 1 },
+ /* Texture3D */ { 3, 3, 1 },
+ /* TextureCube */ { 2, 3, 6 },
+};
+
+static CPUTextureBaseShapeInfo const* _getBaseShapeInfo(ITextureResource::Type baseShape)
+{
+ return &kCPUTextureBaseShapeInfos[(int)baseShape];
+}
+
+typedef void (*CPUTextureUnpackFunc)(void const* texelData, void* outData, size_t outSize);
+
+struct CPUTextureFormatInfo
+{
+ CPUTextureUnpackFunc unpackFunc;
+};
+
+template<int N>
+void _unpackFloatTexel(void const* texelData, void* outData, size_t outSize)
+{
+ auto input = (float const*) texelData;
+
+ float temp[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
+ for(int i = 0; i < N; ++i)
+ temp[i] = input[i];
+
+ memcpy(outData, temp, outSize);
+}
+
+static inline float _unpackUnorm8Value(uint8_t value)
+{
+ return value / 255.0f;
+}
+
+template<int N>
+void _unpackUnorm8Texel(void const* texelData, void* outData, size_t outSize)
+{
+ auto input = (uint8_t const*) texelData;
+
+ float temp[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
+ for(int i = 0; i < N; ++i)
+ temp[i] = _unpackUnorm8Value(input[i]);
+
+ memcpy(outData, temp, outSize);
+}
+
+void _unpackUnormBGRA8Texel(void const* texelData, void* outData, size_t outSize)
+{
+ auto input = (uint8_t const*) texelData;
+
+ float temp[4];
+ temp[0] = _unpackUnorm8Value(input[2]);
+ temp[1] = _unpackUnorm8Value(input[1]);
+ temp[2] = _unpackUnorm8Value(input[0]);
+ temp[3] = _unpackUnorm8Value(input[3]);
+
+ memcpy(outData, temp, outSize);
+}
+
+template<int N>
+void _unpackUInt16Texel(void const* texelData, void* outData, size_t outSize)
+{
+ auto input = (uint16_t const*) texelData;
+
+ uint32_t temp[4] = { 0, 0, 0, 0 };
+ for(int i = 0; i < N; ++i)
+ temp[i] = input[i];
+
+ memcpy(outData, temp, outSize);
+}
+
+template<int N>
+void _unpackUInt32Texel(void const* texelData, void* outData, size_t outSize)
+{
+ auto input = (uint32_t const*) texelData;
+
+ uint32_t temp[4] = { 0, 0, 0, 0 };
+ for(int i = 0; i < N; ++i)
+ temp[i] = input[i];
+
+ memcpy(outData, temp, outSize);
+}
+
+#define TEXTURE_FORMAT_INFO(FORMAT) static const CPUTextureFormatInfo kCPUTextureFormatInfo_##FORMAT
+
+TEXTURE_FORMAT_INFO(RGBA_Float32) = { &_unpackFloatTexel<4> };
+TEXTURE_FORMAT_INFO(RGB_Float32) = { &_unpackFloatTexel<3> };
+TEXTURE_FORMAT_INFO(RG_Float32) = { &_unpackFloatTexel<2> };
+TEXTURE_FORMAT_INFO(R_Float32) = { &_unpackFloatTexel<1> };
+TEXTURE_FORMAT_INFO(RGBA_Unorm_UInt8) = { &_unpackUnorm8Texel<4> };
+TEXTURE_FORMAT_INFO(BGRA_Unorm_UInt8) = { &_unpackUnormBGRA8Texel };
+TEXTURE_FORMAT_INFO(R_UInt16) = { &_unpackUInt16Texel<1> };
+TEXTURE_FORMAT_INFO(R_UInt32) = { &_unpackUInt32Texel<1> };
+TEXTURE_FORMAT_INFO(D_Float32) = { &_unpackFloatTexel<1> };
+
+#undef TEXTURE_FORMAT_INFO
+
+static CPUTextureFormatInfo const* _getFormatInfo(Format format)
+{
+ switch(format)
+ {
+ case Format::D_Unorm24_S8:
+ default:
+ return nullptr;
+
+
+#define CASE(FORMAT) case Format::FORMAT: return &kCPUTextureFormatInfo_##FORMAT;
+ CASE(RGBA_Float32)
+ CASE(RGB_Float32)
+ CASE(RG_Float32)
+ CASE(R_Float32)
+ CASE(RGBA_Unorm_UInt8)
+ CASE(BGRA_Unorm_UInt8)
+ CASE(R_UInt16)
+ CASE(R_UInt32)
+ CASE(D_Float32)
+
+#undef CASE
+ }
+}
+
+class CPUTextureResource : public TextureResource
+{
+ enum { kMaxRank = 3 };
+
+public:
+ CPUTextureResource(const TextureResource::Desc& desc)
+ : TextureResource(desc)
+ {}
+ ~CPUTextureResource()
+ {
+ }
+
+ Result init(ITextureResource::SubresourceData const* initData)
+ {
+ auto desc = m_desc;
+
+ // The format of the texture will determine the
+ // size of the texels we allocate.
+ //
+ // TODO: Compressed formats usually work in terms
+ // of a fixed block size, so that we cannot actually
+ // compute a simple `texelSize` like this. Instead
+ // we should be computing a `blockSize` and then
+ // a `blockExtents` value that gives the extent
+ // in texels of each block. For uncompressed formats
+ // the block extents would be 1 along each axis.
+ //
+ auto format = desc.format;
+ auto texelSize = gfxGetFormatSize(format);
+ m_texelSize = (int32_t) texelSize;
+
+ int32_t formatBlockSize[kMaxRank] = { 1, 1, 1 };
+
+ auto baseShapeInfo = _getBaseShapeInfo(desc.type);
+ m_baseShape = baseShapeInfo;
+ if(!baseShapeInfo)
+ return SLANG_FAIL;
+
+ auto formatInfo = _getFormatInfo(desc.format);
+ m_formatInfo = formatInfo;
+ if(!formatInfo)
+ return SLANG_FAIL;
+
+ int32_t rank = baseShapeInfo->rank;
+ int32_t effectiveArrayElementCount = desc.arraySize ? desc.arraySize : 1;
+ effectiveArrayElementCount *= baseShapeInfo->implicitArrayElementCount;
+ m_effectiveArrayElementCount = effectiveArrayElementCount;
+
+ int32_t extents[kMaxRank];
+ extents[0] = desc.size.width;
+ extents[1] = desc.size.height;
+ extents[2] = desc.size.depth;
+
+ for(int32_t axis = rank; axis < kMaxRank; ++axis)
+ extents[axis] = 1;
+
+ int32_t levelCount = desc.numMipLevels;
+
+ m_mipLevels.setCount(levelCount);
+
+ int64_t totalDataSize = 0;
+ for( int32_t levelIndex = 0; levelIndex < levelCount; ++levelIndex )
+ {
+ auto& level = m_mipLevels[levelIndex];
+
+ for( int32_t axis = 0; axis < kMaxRank; ++axis )
+ {
+ int32_t extent = extents[axis] >> levelIndex;
+ if(extent < 1) extent = 1;
+ level.extents[axis] = extent;
+ }
+
+ level.strides[0] = texelSize;
+ for( int32_t axis = 1; axis < kMaxRank+1; ++axis)
+ {
+ level.strides[axis] = level.strides[axis-1]*level.extents[axis-1];
+ }
+
+ int64_t levelDataSize = texelSize;
+ levelDataSize *= effectiveArrayElementCount;
+ for( int32_t axis = 0; axis < rank; ++axis)
+ levelDataSize *= int64_t(level.extents[axis]);
+
+ level.offset = totalDataSize;
+ totalDataSize += levelDataSize;
+ }
+
+ void* textureData = malloc(totalDataSize);
+ m_data = textureData;
+
+ if( initData )
+ {
+ int32_t subResourceCounter = 0;
+ for(int32_t arrayElementIndex = 0; arrayElementIndex < effectiveArrayElementCount; ++arrayElementIndex)
+ {
+ for(int32_t mipLevel = 0; mipLevel < m_desc.numMipLevels; ++mipLevel)
+ {
+ int32_t subResourceIndex = subResourceCounter++;
+
+ auto dstRowStride = m_mipLevels[mipLevel].strides[1];
+ auto dstLayerStride = m_mipLevels[mipLevel].strides[2];
+ auto dstArrayStride = m_mipLevels[mipLevel].strides[3];
+
+ auto textureRowSize = m_mipLevels[mipLevel].extents[0]*texelSize;
+
+ auto rowCount = m_mipLevels[mipLevel].extents[1];
+ auto depthLayerCount = m_mipLevels[mipLevel].extents[2];
+
+ auto& srcImage = initData[subResourceIndex];
+ ptrdiff_t srcRowStride = ptrdiff_t(srcImage.strideY);
+ ptrdiff_t srcLayerStride = ptrdiff_t(srcImage.strideZ);
+
+ char* dstLevel = (char*)textureData + m_mipLevels[mipLevel].offset;
+ char* dstImage = dstLevel + dstArrayStride*arrayElementIndex;
+
+ const char* srcLayer = (const char*) srcImage.data;
+ char* dstLayer = dstImage;
+
+ for(int32_t depthLayer = 0; depthLayer < depthLayerCount; ++depthLayer)
+ {
+ const char* srcRow = srcLayer;
+ char* dstRow = dstLayer;
+
+ for(int32_t row = 0; row < rowCount; ++row)
+ {
+ memcpy(dstRow, srcRow, textureRowSize);
+
+ srcRow += srcRowStride;
+ dstRow += dstRowStride;
+ }
+
+ srcLayer += srcLayerStride;
+ dstLayer += dstLayerStride;
+ }
+ }
+ }
+ }
+
+ return SLANG_OK;
+ }
+
+ Desc const& _getDesc() { return m_desc; }
+ Format getFormat() { return m_desc.format; }
+ int32_t getRank() { return m_baseShape->rank; }
+
+ CPUTextureBaseShapeInfo const* m_baseShape;
+ CPUTextureFormatInfo const* m_formatInfo;
+ int32_t m_effectiveArrayElementCount = 0;
+ int32_t m_texelSize = 0;
+
+ struct MipLevel
+ {
+ int32_t extents[kMaxRank];
+ int64_t strides[kMaxRank+1];
+ int64_t offset;
+ };
+ List<MipLevel> m_mipLevels;
+ void* m_data = nullptr;
+};
+
+class CPUResourceView : public IResourceView, public RefObject
+{
+public:
+ enum class Kind
+ {
+ Buffer,
+ Texture,
+ };
+
+ SLANG_REF_OBJECT_IUNKNOWN_ALL
+ IResourceView* getInterface(const Guid& guid)
+ {
+ if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IResourceView)
+ return static_cast<IResourceView*>(this);
+ return nullptr;
+ }
+
+ Kind getViewKind() const { return m_kind; }
+ Desc const& getDesc() const { return m_desc; }
+
+protected:
+ CPUResourceView(Kind kind, Desc const& desc)
+ : m_kind(kind)
+ , m_desc(desc)
+ {}
+
+private:
+ Kind m_kind;
+ Desc m_desc;
+};
+
+class CPUBufferView : public CPUResourceView
+{
+public:
+ CPUBufferView(Desc const& desc, CPUBufferResource* buffer)
+ : CPUResourceView(Kind::Buffer, desc)
+ , m_buffer(buffer)
+ {}
+
+ CPUBufferResource* getBuffer() const { return m_buffer; }
+
+private:
+ RefPtr<CPUBufferResource> m_buffer;
+};
+
+class CPUTextureView : public CPUResourceView, public slang_prelude::IRWTexture
+{
+public:
+ CPUTextureView(Desc const& desc, CPUTextureResource* texture)
+ : CPUResourceView(Kind::Texture, desc)
+ , m_texture(texture)
+ {}
+
+ CPUTextureResource* getTexture() const { return m_texture; }
+
+ //
+ // ITexture interface
+ //
+
+ slang_prelude::TextureDimensions GetDimensions(int mipLevel = -1) SLANG_OVERRIDE
+ {
+ slang_prelude::TextureDimensions dimensions = {};
+
+ CPUTextureResource* texture = m_texture;
+ auto& desc = texture->_getDesc();
+ auto baseShape = texture->m_baseShape;
+
+ dimensions.arrayElementCount = desc.arraySize;
+ dimensions.numberOfLevels = desc.numMipLevels;
+ dimensions.shape = baseShape->rank;
+ dimensions.width = desc.size.width;
+ dimensions.height = desc.size.height;
+ dimensions.depth = desc.size.depth;
+
+ return dimensions;
+ }
+
+ void Load(const int32_t* texelCoords, void* outData, size_t dataSize) SLANG_OVERRIDE
+ {
+ void* texelPtr = _getTexelPtr(texelCoords);
+
+ m_texture->m_formatInfo->unpackFunc(texelPtr, outData, dataSize);
+ }
+
+ void Sample(slang_prelude::SamplerState samplerState, const float* coords, void* outData, size_t dataSize) SLANG_OVERRIDE
+ {
+ // We have no access to information from fragment quads, so we cannot
+ // compute the finite-difference derivatives needed from `coords`.
+ //
+ // The only reasonable thing to do is to sample mip level zero.
+ //
+ SampleLevel(samplerState, coords, 0.0f, outData, dataSize);
+ }
+
+ void SampleLevel(slang_prelude::SamplerState samplerState, const float* coords, float level, void* outData, size_t dataSize) SLANG_OVERRIDE
+ {
+ CPUTextureResource* texture = m_texture;
+ auto baseShape = texture->m_baseShape;
+ auto& desc = texture->_getDesc();
+ int32_t rank = baseShape->rank;
+ int32_t baseCoordCount = baseShape->baseCoordCount;
+
+ int32_t integerMipLevel = int32_t(level + 0.5f);
+ if(integerMipLevel >= desc.numMipLevels) integerMipLevel = desc.numMipLevels-1;
+ if(integerMipLevel < 0) integerMipLevel = 0;
+
+ auto& mipLevelInfo = texture->m_mipLevels[integerMipLevel];
+
+ bool isArray = (desc.arraySize != 0) || (desc.type == ITextureResource::Type::TextureCube);
+ int32_t effectiveArrayElementCount = texture->m_effectiveArrayElementCount;
+ int32_t coordIndex = baseCoordCount;
+ int32_t elementIndex = 0;
+ if( isArray )
+ {
+ elementIndex = int32_t(coords[coordIndex++] + 0.5f);
+ }
+ if(elementIndex >= effectiveArrayElementCount) elementIndex = effectiveArrayElementCount-1;
+ if(elementIndex < 0) elementIndex = 0;
+
+ // Note: for now we are just going to do nearest-neighbor sampling
+ //
+ int64_t texelOffset = mipLevelInfo.offset;
+ texelOffset += elementIndex * mipLevelInfo.strides[3];
+ for(int32_t axis = 0; axis < rank; ++axis)
+ {
+ int32_t extent = mipLevelInfo.extents[axis];
+
+ float coord = coords[axis];
+
+ // TODO: deal with wrap/clamp/repeat if `coord < 0` or `coord > 1`
+
+ int32_t integerCoord = int32_t(coord*(extent-1) + 0.5f);
+
+ if(integerCoord >= extent) integerCoord = extent-1;
+ if(integerCoord < 0) integerCoord = 0;
+
+ texelOffset += integerCoord * mipLevelInfo.strides[axis];
+ }
+
+ auto texelPtr = (char const*)texture->m_data + texelOffset;
+
+ m_texture->m_formatInfo->unpackFunc(texelPtr, outData, dataSize);
+ }
+
+ //
+ // IRWTexture interface
+ //
+
+ void* refAt(const uint32_t* texelCoords) SLANG_OVERRIDE
+ {
+ return _getTexelPtr((int32_t const*)texelCoords);
+ }
+
+private:
+ RefPtr<CPUTextureResource> m_texture;
+
+ void* _getTexelPtr(int32_t const* texelCoords)
+ {
+ CPUTextureResource* texture = m_texture;
+ auto baseShape = texture->m_baseShape;
+ auto& desc = texture->_getDesc();
+
+ int32_t rank = baseShape->rank;
+ int32_t baseCoordCount = baseShape->baseCoordCount;
+
+ bool isArray = (desc.arraySize != 0) || (desc.type == ITextureResource::Type::TextureCube);
+ bool isMultisample = desc.sampleDesc.numSamples > 1;
+ bool isBuffer = desc.type == ITextureResource::Type::Buffer;
+ bool hasMipLevels = !(isMultisample || isBuffer);
+
+ int32_t effectiveArrayElementCount = texture->m_effectiveArrayElementCount;
+
+ int32_t coordIndex = baseCoordCount;
+ int32_t elementIndex = 0;
+ if( isArray )
+ {
+ elementIndex = texelCoords[coordIndex++];
+ }
+ if(elementIndex >= effectiveArrayElementCount) elementIndex = effectiveArrayElementCount-1;
+ if(elementIndex < 0) elementIndex = 0;
+
+ int32_t mipLevel = 0;
+ if(!hasMipLevels)
+ {
+ mipLevel = texelCoords[coordIndex++];
+ }
+ if(mipLevel >= desc.numMipLevels) mipLevel = desc.numMipLevels-1;
+ if(mipLevel < 0) mipLevel = 0;
+
+ auto& mipLevelInfo = texture->m_mipLevels[mipLevel];
+
+ int64_t texelOffset = mipLevelInfo.offset;
+ texelOffset += elementIndex * mipLevelInfo.strides[3];
+ for(int32_t axis = 0; axis < rank; ++axis)
+ {
+ int32_t coord = texelCoords[axis];
+ if(coord >= mipLevelInfo.extents[axis]) coord = mipLevelInfo.extents[axis]-1;
+ if(coord < 0) coord = 0;
+
+ texelOffset += texelCoords[axis] * mipLevelInfo.strides[axis];
+ }
+
+ return (char*)texture->m_data + texelOffset;
+ }
+};
+
+class CPUShaderObjectLayout : public ShaderObjectLayoutBase
+{
+public:
+
+ // TODO: Once memory lifetime stuff is handled, there is
+ // no specific need to even track binding or sub-object
+ // ranges for CPU.
+
+ struct BindingRangeInfo
+ {
+ slang::BindingType bindingType;
+ Index count;
+ Index baseIndex; // Flat index for sub-ojects
+
+ // TODO: The `uniformOffset` field should be removed,
+ // since it cannot be supported by the Slang reflection
+ // API once we fix some design issues.
+ //
+ // It is only being used today for pre-allocation of sub-objects
+ // for constant buffers and parameter blocks (which should be
+ // deprecated/removed anyway).
+ //
+ // Note: We would need to bring this field back, plus
+ // a lot of other complexity, if we ever want to support
+ // setting of resources/buffers directly by a binding
+ // range index and array index.
+ //
+ Index uniformOffset; // Uniform offset for a resource typed field.
+ };
+
+ struct SubObjectRangeInfo
+ {
+ RefPtr<CPUShaderObjectLayout> layout;
+ Index bindingRangeIndex;
+ };
+
+ size_t m_size = 0;
+ List<SubObjectRangeInfo> subObjectRanges;
+ List<BindingRangeInfo> m_bindingRanges;
+
+ Index m_subObjectCount = 0;
+ Index m_resourceCount = 0;
+
+ CPUShaderObjectLayout(RendererBase* renderer, slang::TypeLayoutReflection* layout)
+ {
+ initBase(renderer, layout);
+
+ Index subObjectCount = 0;
+ Index resourceCount = 0;
+
+ m_elementTypeLayout = _unwrapParameterGroups(layout);
+ m_size = m_elementTypeLayout->getSize();
+
+ // Compute the binding ranges that are used to store
+ // the logical contents of the object in memory. These will relate
+ // to the descriptor ranges in the various sets, but not always
+ // in a one-to-one fashion.
+
+ SlangInt bindingRangeCount = m_elementTypeLayout->getBindingRangeCount();
+ for (SlangInt r = 0; r < bindingRangeCount; ++r)
+ {
+ slang::BindingType slangBindingType = m_elementTypeLayout->getBindingRangeType(r);
+ SlangInt count = m_elementTypeLayout->getBindingRangeBindingCount(r);
+ slang::TypeLayoutReflection* slangLeafTypeLayout =
+ m_elementTypeLayout->getBindingRangeLeafTypeLayout(r);
+
+ SlangInt descriptorSetIndex = m_elementTypeLayout->getBindingRangeDescriptorSetIndex(r);
+ SlangInt rangeIndexInDescriptorSet =
+ m_elementTypeLayout->getBindingRangeFirstDescriptorRangeIndex(r);
+
+ // TODO: This logic assumes that for any binding range that might consume
+ // multiple kinds of resources, the descriptor range for its uniform
+ // usage will be the first one in the range.
+ //
+ // We need to decide whether that assumption is one we intend to support
+ // applications making, or whether they should be forced to perform a
+ // linear search over the descriptor ranges for a specific binding range.
+ //
+ auto uniformOffset = m_elementTypeLayout->getDescriptorSetDescriptorRangeIndexOffset(
+ descriptorSetIndex, rangeIndexInDescriptorSet);
+
+ Index baseIndex = 0;
+ switch (slangBindingType)
+ {
+ case slang::BindingType::ConstantBuffer:
+ case slang::BindingType::ParameterBlock:
+ case slang::BindingType::ExistentialValue:
+ baseIndex = subObjectCount;
+ subObjectCount += count;
+ break;
+
+ default:
+ baseIndex = resourceCount;
+ resourceCount += count;
+ break;
+ }
+
+ BindingRangeInfo bindingRangeInfo;
+ bindingRangeInfo.bindingType = slangBindingType;
+ bindingRangeInfo.count = count;
+ bindingRangeInfo.baseIndex = baseIndex;
+ bindingRangeInfo.uniformOffset = uniformOffset;
+ m_bindingRanges.add(bindingRangeInfo);
+ }
+
+ m_subObjectCount = subObjectCount;
+ m_resourceCount = resourceCount;
+
+ SlangInt subObjectRangeCount = m_elementTypeLayout->getSubObjectRangeCount();
+ for (SlangInt r = 0; r < subObjectRangeCount; ++r)
+ {
+ SlangInt bindingRangeIndex = m_elementTypeLayout->getSubObjectRangeBindingRangeIndex(r);
+ auto slangBindingType = m_elementTypeLayout->getBindingRangeType(bindingRangeIndex);
+ slang::TypeLayoutReflection* slangLeafTypeLayout =
+ m_elementTypeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex);
+
+ // A sub-object range can either represent a sub-object of a known
+ // type, like a `ConstantBuffer<Foo>` or `ParameterBlock<Foo>`
+ // (in which case we can pre-compute a layout to use, based on
+ // the type `Foo`) *or* it can represent a sub-object of some
+ // existential type (e.g., `IBar`) in which case we cannot
+ // know the appropraite type/layout of sub-object to allocate.
+ //
+ RefPtr<CPUShaderObjectLayout> subObjectLayout;
+ if (slangBindingType != slang::BindingType::ExistentialValue)
+ {
+ subObjectLayout =
+ new CPUShaderObjectLayout(renderer, slangLeafTypeLayout->getElementTypeLayout());
+ }
+
+ SubObjectRangeInfo subObjectRange;
+ subObjectRange.bindingRangeIndex = bindingRangeIndex;
+ subObjectRange.layout = subObjectLayout;
+ subObjectRanges.add(subObjectRange);
+ }
+ }
+
+ size_t getSize() { return m_size; }
+ Index getResourceCount() const { return m_resourceCount; }
+ Index getSubObjectCount() const { return m_subObjectCount; }
+};
+
+class CPUEntryPointLayout : public CPUShaderObjectLayout
+{
+private:
+ slang::EntryPointLayout* m_entryPointLayout = nullptr;
+
+public:
+ CPUEntryPointLayout(
+ RendererBase* renderer,
+ slang::EntryPointLayout* entryPointLayout)
+ : CPUShaderObjectLayout(renderer, entryPointLayout->getTypeLayout())
+ , m_entryPointLayout(entryPointLayout)
+ {}
+
+ const char* getEntryPointName() { return m_entryPointLayout->getName(); }
+};
+
+class CPUProgramLayout : public CPUShaderObjectLayout
+{
+public:
+ slang::ProgramLayout* m_programLayout = nullptr;
+ List<RefPtr<CPUEntryPointLayout>> m_entryPointLayouts;
+
+ CPUProgramLayout(RendererBase* renderer, slang::ProgramLayout* programLayout)
+ : CPUShaderObjectLayout(renderer, programLayout->getGlobalParamsTypeLayout())
+ , m_programLayout(programLayout)
+ {
+ for (UInt i =0; i< programLayout->getEntryPointCount(); i++)
+ {
+ m_entryPointLayouts.add(new CPUEntryPointLayout(
+ renderer,
+ programLayout->getEntryPointByIndex(i)));
+ }
+
+ }
+
+ int getKernelIndex(UnownedStringSlice kernelName)
+ {
+ auto entryPointCount = (int) m_programLayout->getEntryPointCount();
+ for(int i = 0; i < entryPointCount; i++)
+ {
+ auto entryPoint = m_programLayout->getEntryPointByIndex(i);
+ if (kernelName == entryPoint->getName())
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ void getKernelThreadGroupSize(int kernelIndex, UInt* threadGroupSizes)
+ {
+ auto entryPoint = m_programLayout->getEntryPointByIndex(kernelIndex);
+ entryPoint->getComputeThreadGroupSize(3, threadGroupSizes);
+ }
+
+ CPUEntryPointLayout* getEntryPoint(Index index) { return m_entryPointLayouts[index]; }
+};
+
+class CPUShaderObject : public ShaderObjectBase
+{
+public:
+ void* m_data = nullptr;
+
+ ~CPUShaderObject()
+ {
+ free(m_data);
+ }
+
+ List<RefPtr<CPUShaderObject>> m_objects;
+ List<RefPtr<CPUResourceView>> m_resources;
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ init(IDevice* device, CPUShaderObjectLayout* typeLayout);
+
+ CPUShaderObjectLayout* getLayout()
+ {
+ return static_cast<CPUShaderObjectLayout*>(m_layout.Ptr());
+ }
+
+#if 0
+ virtual SLANG_NO_THROW Result SLANG_MCALL initBuffer(IDevice* device, size_t bufferSize)
+ {
+ BufferResource::Desc bufferDesc;
+ bufferDesc.init(bufferSize);
+ bufferDesc.cpuAccessFlags |= IResource::AccessFlag::Write;
+ ComPtr<IBufferResource> constantBuffer;
+ SLANG_RETURN_ON_FAIL(renderer->createBufferResource(
+ IResource::Usage::ConstantBuffer, bufferDesc, nullptr, constantBuffer.writeRef()));
+ bufferResource = dynamic_cast<MemoryCUDAResource*>(constantBuffer.get());
+ return SLANG_OK;
+ }
+#endif
+
+#if 0
+ virtual SLANG_NO_THROW void* SLANG_MCALL getBuffer()
+ {
+ return bufferResource ? bufferResource->m_cudaMemory : nullptr;
+ }
+
+ virtual SLANG_NO_THROW size_t SLANG_MCALL getBufferSize()
+ {
+ return bufferResource ? bufferResource->getDesc()->sizeInBytes : 0;
+ }
+#endif
+
+ virtual SLANG_NO_THROW slang::TypeLayoutReflection* SLANG_MCALL getElementTypeLayout() override
+ {
+ return getLayout()->getElementTypeLayout();
+ }
+
+ virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override { return 0; }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ getEntryPoint(UInt index, IShaderObject** outEntryPoint) override
+ {
+ *outEntryPoint = nullptr;
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ setData(ShaderOffset const& offset, void const* data, size_t size)
+ {
+ size = Math::Min(size, getLayout()->getSize() - offset.uniformOffset);
+ memcpy((char*)m_data + offset.uniformOffset, data, size);
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL getObject(
+ ShaderOffset const& offset,
+ IShaderObject** outObject)
+ {
+ auto layout = getLayout();
+
+ auto bindingRangeIndex = offset.bindingRangeIndex;
+ SLANG_ASSERT(bindingRangeIndex >= 0);
+ SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
+
+ auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
+ auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
+ CPUShaderObject* subObject = m_objects[subObjectIndex];
+
+ *outObject = ComPtr<IShaderObject>(subObject).detach();
+
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL setObject(
+ ShaderOffset const& offset,
+ IShaderObject* object)
+ {
+ auto layout = getLayout();
+
+ auto bindingRangeIndex = offset.bindingRangeIndex;
+ SLANG_ASSERT(bindingRangeIndex >= 0);
+ SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
+
+ auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
+ auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
+
+ CPUShaderObject* subObject = static_cast<CPUShaderObject*>(object);
+ m_objects[subObjectIndex] = subObject;
+
+ switch( bindingRange.bindingType )
+ {
+ default:
+ SLANG_RETURN_ON_FAIL(setData(offset, &subObject->m_data, sizeof(void*)));
+ break;
+
+ // If the range being assigned into represents an interface/existential-type leaf field,
+ // then we need to consider how the `object` being assigned here affects specialization.
+ // We may also need to assign some data from the sub-object into the ordinary data
+ // buffer for the parent object.
+ //
+ case slang::BindingType::ExistentialValue:
+ {
+ auto renderer = getRenderer();
+
+ ComPtr<slang::ISession> slangSession;
+ SLANG_RETURN_ON_FAIL(renderer->getSlangSession(slangSession.writeRef()));
+
+ // A leaf field of interface type is laid out inside of the parent object
+ // as a tuple of `(RTTI, WitnessTable, Payload)`. The layout of these fields
+ // is a contract between the compiler and any runtime system, so we will
+ // need to rely on details of the binary layout.
+
+ // We start by querying the layout/type of the concrete value that the application
+ // is trying to store into the field, and also the layout/type of the leaf
+ // existential-type field itself.
+ //
+ auto concreteTypeLayout = subObject->getElementTypeLayout();
+ auto concreteType = concreteTypeLayout->getType();
+ //
+ auto existentialTypeLayout = layout->getElementTypeLayout()->getBindingRangeLeafTypeLayout(bindingRangeIndex);
+ auto existentialType = existentialTypeLayout->getType();
+
+ // The first field of the tuple (offset zero) is the run-time type information (RTTI)
+ // ID for the concrete type being stored into the field.
+ //
+ // TODO: We need to be able to gather the RTTI type ID from `object` and then
+ // use `setData(offset, &TypeID, sizeof(TypeID))`.
+
+ // The second field of the tuple (offset 8) is the ID of the "witness" for the
+ // conformance of the concrete type to the interface used by this field.
+ //
+ auto witnessTableOffset = offset;
+ witnessTableOffset.uniformOffset += 8;
+ //
+ // Conformances of a type to an interface are computed and then stored by the
+ // Slang runtime, so we can look up the ID for this particular conformance (which
+ // will create it on demand).
+ //
+ // Note: If the type doesn't actually conform to the required interface for
+ // this sub-object range, then this is the point where we will detect that
+ // fact and error out.
+ //
+ uint32_t conformanceID = 0xFFFFFFFF;
+ SLANG_RETURN_ON_FAIL(slangSession->getTypeConformanceWitnessSequentialID(
+ concreteType, existentialType, &conformanceID));
+ //
+ // Once we have the conformance ID, then we can write it into the object
+ // at the required offset.
+ //
+ SLANG_RETURN_ON_FAIL(setData(witnessTableOffset, &conformanceID, sizeof(conformanceID)));
+
+ // The third field of the tuple (offset 16) is the "payload" that is supposed to
+ // hold the data for a value of the given concrete type.
+ //
+ auto payloadOffset = offset;
+ payloadOffset.uniformOffset += 16;
+
+ // There are two cases we need to consider here for how the payload might be used:
+ //
+ // * If the concrete type of the value being bound is one that can "fit" into the
+ // available payload space, then it should be stored in the payload.
+ //
+ // * If the concrete type of the value cannot fit in the payload space, then it
+ // will need to be stored somewhere else.
+ //
+ if(_doesValueFitInExistentialPayload(concreteTypeLayout, existentialTypeLayout))
+ {
+ // If the value can fit in the payload area, then we will go ahead and copy
+ // its bytes into that area.
+ //
+ auto valueSize = concreteTypeLayout->getSize();
+ SLANG_RETURN_ON_FAIL(setData(payloadOffset, subObject->m_data, valueSize));
+ }
+ else
+ {
+ // If the value cannot fit in the payload area, then we will pass a pointer
+ // to the sub-object instead.
+ //
+ // Note: The Slang compiler does not currently emit code that handles the
+ // pointer case, but that is the expected implementation for values
+ // that do not fit into the fixed-size payload.
+ //
+ SLANG_RETURN_ON_FAIL(setData(payloadOffset, &subObject->m_data, sizeof(void*)));
+ }
+ }
+ break;
+ }
+
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ setResource(ShaderOffset const& offset, IResourceView* inView)
+ {
+ auto layout = getLayout();
+
+ auto bindingRangeIndex = offset.bindingRangeIndex;
+ SLANG_ASSERT(bindingRangeIndex >= 0);
+ SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
+
+ auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
+ auto viewIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
+
+
+ auto view = static_cast<CPUResourceView*>(inView);
+ m_resources[viewIndex] = view;
+
+ switch( view->getViewKind() )
+ {
+ case CPUResourceView::Kind::Texture:
+ {
+ auto textureView = static_cast<CPUTextureView*>(view);
+
+ slang_prelude::IRWTexture* textureObj = textureView;
+ SLANG_RETURN_ON_FAIL(setData(offset, &textureObj, sizeof(textureObj)));
+ }
+ break;
+
+ case CPUResourceView::Kind::Buffer:
+ {
+ auto bufferView = static_cast<CPUBufferView*>(view);
+ auto buffer = bufferView->getBuffer();
+ auto desc = *buffer->getDesc();
+
+ void* dataPtr = buffer->m_data;
+ size_t size = desc.sizeInBytes;
+ if (desc.elementSize > 1)
+ size /= desc.elementSize;
+
+ auto ptrOffset = offset;
+ SLANG_RETURN_ON_FAIL(setData(ptrOffset, &dataPtr, sizeof(dataPtr)));
+
+ auto sizeOffset = offset;
+ sizeOffset.uniformOffset += sizeof(dataPtr);
+ SLANG_RETURN_ON_FAIL(setData(sizeOffset, &size, sizeof(size)));
+ }
+ break;
+ }
+
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ setSampler(ShaderOffset const& offset, ISamplerState* sampler)
+ {
+ SLANG_UNUSED(sampler);
+ SLANG_UNUSED(offset);
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL setCombinedTextureSampler(
+ ShaderOffset const& offset, IResourceView* textureView, ISamplerState* sampler)
+ {
+ SLANG_UNUSED(sampler);
+ setResource(offset, textureView);
+ return SLANG_OK;
+ }
+
+ // Appends all types that are used to specialize the element type of this shader object in `args` list.
+ virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override
+ {
+ // TODO: the logic here is a copy-paste of `GraphicsCommonShaderObject::collectSpecializationArgs`,
+ // consider moving the implementation to `ShaderObjectBase` and share the logic among different implementations.
+
+ auto& subObjectRanges = getLayout()->subObjectRanges;
+ // The following logic is built on the assumption that all fields that involve existential types (and
+ // therefore require specialization) will results in a sub-object range in the type layout.
+ // This allows us to simply scan the sub-object ranges to find out all specialization arguments.
+ for (Index subObjIndex = 0; subObjIndex < subObjectRanges.getCount(); subObjIndex++)
+ {
+ // Retrieve the corresponding binding range of the sub object.
+ auto bindingRange = getLayout()->m_bindingRanges[subObjectRanges[subObjIndex].bindingRangeIndex];
+ switch (bindingRange.bindingType)
+ {
+ case slang::BindingType::ExistentialValue:
+ {
+ // A binding type of `ExistentialValue` means the sub-object represents a interface-typed field.
+ // In this case the specialization argument for this field is the actual specialized type of the bound
+ // shader object. If the shader object's type is an ordinary type without existential fields, then the
+ // type argument will simply be the ordinary type. But if the sub object's type is itself a specialized
+ // type, we need to make sure to use that type as the specialization argument.
+
+ // TODO: need to implement the case where the field is an array of existential values.
+ SLANG_ASSERT(bindingRange.count == 1);
+ ExtendedShaderObjectType specializedSubObjType;
+ SLANG_RETURN_ON_FAIL(m_objects[subObjIndex]->getSpecializedShaderObjectType(&specializedSubObjType));
+ args.add(specializedSubObjType);
+ break;
+ }
+ case slang::BindingType::ParameterBlock:
+ case slang::BindingType::ConstantBuffer:
+ // Currently we only handle the case where the field's type is
+ // `ParameterBlock<SomeStruct>` or `ConstantBuffer<SomeStruct>`, where `SomeStruct` is a struct type
+ // (not directly an interface type). In this case, we just recursively collect the specialization arguments
+ // from the bound sub object.
+ SLANG_RETURN_ON_FAIL(m_objects[subObjIndex]->collectSpecializationArgs(args));
+ // TODO: we need to handle the case where the field is of the form `ParameterBlock<IFoo>`. We should treat
+ // this case the same way as the `ExistentialValue` case here, but currently we lack a mechanism to distinguish
+ // the two scenarios.
+ break;
+ }
+ // TODO: need to handle another case where specialization happens on resources fields e.g. `StructuredBuffer<IFoo>`.
+ }
+ return SLANG_OK;
+ }
+};
+
+class CPUEntryPointShaderObject : public CPUShaderObject
+{
+public:
+ CPUEntryPointLayout* getLayout() { return static_cast<CPUEntryPointLayout*>(m_layout.Ptr()); }
+};
+
+class CPURootShaderObject : public CPUShaderObject
+{
+public:
+ SlangResult init(IDevice* device, CPUProgramLayout* programLayout);
+
+ CPUProgramLayout* getLayout() { return static_cast<CPUProgramLayout*>(m_layout.Ptr()); }
+
+ CPUEntryPointShaderObject* getEntryPoint(Index index) { return m_entryPoints[index]; }
+
+ List<RefPtr<CPUEntryPointShaderObject>> m_entryPoints;
+
+ virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override { return m_entryPoints.getCount(); }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ getEntryPoint(UInt index, IShaderObject** outEntryPoint) override
+ {
+ *outEntryPoint = ComPtr<IShaderObject>(m_entryPoints[index]).detach();
+ return SLANG_OK;
+ }
+ virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override
+ {
+ SLANG_RETURN_ON_FAIL(CPUShaderObject::collectSpecializationArgs(args));
+ for (auto& entryPoint : m_entryPoints)
+ {
+ SLANG_RETURN_ON_FAIL(entryPoint->collectSpecializationArgs(args));
+ }
+ return SLANG_OK;
+ }
+};
+
+class CPUShaderProgram : public ShaderProgramBase
+{
+public:
+ RefPtr<CPUProgramLayout> layout;
+
+ ~CPUShaderProgram()
+ {
+ }
+};
+
+class CPUPipelineState : public PipelineStateBase
+{
+public:
+ CPUShaderProgram* getProgram() { return static_cast<CPUShaderProgram*>(m_program.get()); }
+
+ void init(const ComputePipelineStateDesc& inDesc)
+ {
+ PipelineStateDesc pipelineDesc;
+ pipelineDesc.type = PipelineType::Compute;
+ pipelineDesc.compute = inDesc;
+ initializeBase(pipelineDesc);
+ }
+};
+
+class CPUDevice : public RendererBase
+{
+private:
+ RefPtr<CPUPipelineState> m_currentPipeline = nullptr;
+ RefPtr<CPURootShaderObject> m_currentRootObject = nullptr;
+ DeviceInfo m_info;
+
+ class CommandQueueImpl;
+
+ class CommandBufferImpl
+ : public ICommandBuffer
+ , public CommandWriter
+ , public RefObject
+ {
+ public:
+ SLANG_REF_OBJECT_IUNKNOWN_ALL
+ ICommandBuffer* getInterface(const Guid& guid)
+ {
+ if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer)
+ return static_cast<ICommandBuffer*>(this);
+ return nullptr;
+ }
+ public:
+ virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands(
+ IRenderPassLayout* renderPass,
+ IFramebuffer* framebuffer,
+ IRenderCommandEncoder** outEncoder) override
+ {
+ SLANG_UNUSED(renderPass);
+ SLANG_UNUSED(framebuffer);
+ *outEncoder = nullptr;
+ }
+
+ class ComputeCommandEncoderImpl
+ : public IComputeCommandEncoder
+ {
+ public:
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+ queryInterface(SlangUUID const& uuid, void** outObject) override
+ {
+ if (uuid == GfxGUID::IID_ISlangUnknown ||
+ uuid == GfxGUID::IID_IComputeCommandEncoder)
+ {
+ *outObject = static_cast<IComputeCommandEncoder*>(this);
+ return SLANG_OK;
+ }
+ *outObject = nullptr;
+ return SLANG_E_NO_INTERFACE;
+ }
+ virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; }
+ virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; }
+
+ public:
+ CommandWriter* m_writer;
+
+ virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {}
+ void init(CommandBufferImpl* cmdBuffer)
+ {
+ m_writer = cmdBuffer;
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override
+ {
+ m_writer->setPipelineState(state);
+ }
+ virtual SLANG_NO_THROW void SLANG_MCALL
+ bindRootShaderObject(IShaderObject* object) override
+ {
+ m_writer->bindRootShaderObject(PipelineType::Compute, object);
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet(
+ IPipelineLayout* layout,
+ UInt index,
+ IDescriptorSet* descriptorSet) override
+ {
+ m_writer->setDescriptorSet(PipelineType::Compute, layout, index, descriptorSet);
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override
+ {
+ m_writer->dispatchCompute(x, y, z);
+ }
+ };
+
+ ComputeCommandEncoderImpl m_computeCommandEncoder;
+ virtual SLANG_NO_THROW void SLANG_MCALL
+ encodeComputeCommands(IComputeCommandEncoder** outEncoder) override
+ {
+ m_computeCommandEncoder.init(this);
+ *outEncoder = &m_computeCommandEncoder;
+ }
+
+ class ResourceCommandEncoderImpl
+ : public IResourceCommandEncoder
+ {
+ public:
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+ queryInterface(SlangUUID const& uuid, void** outObject) override
+ {
+ if (uuid == GfxGUID::IID_ISlangUnknown ||
+ uuid == GfxGUID::IID_IResourceCommandEncoder)
+ {
+ *outObject = static_cast<IResourceCommandEncoder*>(this);
+ return SLANG_OK;
+ }
+ *outObject = nullptr;
+ return SLANG_E_NO_INTERFACE;
+ }
+ virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; }
+ virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; }
+
+ public:
+ CommandWriter* m_writer;
+
+ void init(CommandBufferImpl* cmdBuffer)
+ {
+ m_writer = cmdBuffer;
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {}
+ virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer(
+ IBufferResource* dst,
+ size_t dstOffset,
+ IBufferResource* src,
+ size_t srcOffset,
+ size_t size) override
+ {
+ m_writer->copyBuffer(dst, dstOffset, src, srcOffset, size);
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL
+ uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data)
+ {
+ m_writer->uploadBufferData(dst, offset, size, data);
+ }
+ };
+
+ ResourceCommandEncoderImpl m_resourceCommandEncoder;
+
+ virtual SLANG_NO_THROW void SLANG_MCALL
+ encodeResourceCommands(IResourceCommandEncoder** outEncoder) override
+ {
+ m_resourceCommandEncoder.init(this);
+ *outEncoder = &m_resourceCommandEncoder;
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL close() override {}
+ };
+
+ class CommandQueueImpl
+ : public ICommandQueue
+ , public RefObject
+ {
+ public:
+ SLANG_REF_OBJECT_IUNKNOWN_ALL
+ ICommandQueue* getInterface(const Guid& guid)
+ {
+ if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue)
+ return static_cast<ICommandQueue*>(this);
+ return nullptr;
+ }
+
+ public:
+ RefPtr<CPUPipelineState> currentPipeline;
+ RefPtr<CPURootShaderObject> currentRootObject;
+ RefPtr<CPUDevice> renderer;
+ Desc m_desc;
+ public:
+ void init(CPUDevice* inRenderer)
+ {
+ renderer = inRenderer;
+ m_desc.type = ICommandQueue::QueueType::Graphics;
+ }
+ ~CommandQueueImpl()
+ {
+ currentPipeline = nullptr;
+ currentRootObject = nullptr;
+ }
+
+ public:
+ virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override
+ {
+ return m_desc;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createCommandBuffer(ICommandBuffer** outCommandBuffer) override
+ {
+ RefPtr<CommandBufferImpl> result = new CommandBufferImpl();
+ *outCommandBuffer = result.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL
+ executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override
+ {
+ for (uint32_t i = 0; i < count; i++)
+ {
+ execute(static_cast<CommandBufferImpl*>(commandBuffers[i]));
+ }
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL wait() override
+ {}
+
+ public:
+ void setPipelineState(IPipelineState* state)
+ {
+ currentPipeline = static_cast<CPUPipelineState*>(state);
+ }
+
+ Result bindRootShaderObject(PipelineType pipelineType, IShaderObject* object)
+ {
+ currentRootObject = static_cast<CPURootShaderObject*>(object);
+ if (currentRootObject)
+ return SLANG_OK;
+ return SLANG_E_INVALID_ARG;
+ }
+
+ void dispatchCompute(int x, int y, int z)
+ {
+ int entryPointIndex = 0;
+ int targetIndex = 0;
+
+ // Specialize the compute kernel based on the shader object bindings.
+ RefPtr<PipelineStateBase> newPipeline;
+ renderer->maybeSpecializePipeline(currentPipeline, currentRootObject, newPipeline);
+ currentPipeline = static_cast<CPUPipelineState*>(newPipeline.Ptr());
+
+ auto program = currentPipeline->getProgram();
+ auto entryPointLayout = currentRootObject->getLayout()->getEntryPoint(entryPointIndex);
+ auto entryPointName = entryPointLayout->getEntryPointName();
+
+ auto entryPointObject = currentRootObject->getEntryPoint(entryPointIndex);
+
+ ComPtr<ISlangSharedLibrary> sharedLibrary;
+ program->slangProgram->getEntryPointHostCallable(entryPointIndex, targetIndex, sharedLibrary.writeRef());
+
+ auto func = (slang_prelude::ComputeFunc) sharedLibrary->findSymbolAddressByName(entryPointName);
+
+ slang_prelude::ComputeVaryingInput varyingInput;
+ varyingInput.startGroupID.x = 0;
+ varyingInput.startGroupID.y = 0;
+ varyingInput.startGroupID.z = 0;
+ varyingInput.endGroupID.x = x;
+ varyingInput.endGroupID.y = y;
+ varyingInput.endGroupID.z = z;
+
+ auto globalParamsData = currentRootObject->m_data;
+ auto entryPointParamsData = entryPointObject->m_data;
+ func(&varyingInput, entryPointParamsData, globalParamsData);
+ }
+
+ void copyBuffer(
+ IBufferResource* dst,
+ size_t dstOffset,
+ IBufferResource* src,
+ size_t srcOffset,
+ size_t size)
+ {
+ auto dstImpl = static_cast<CPUBufferResource*>(dst);
+ auto srcImpl = static_cast<CPUBufferResource*>(src);
+ memcpy(
+ (uint8_t*)dstImpl->m_data + dstOffset,
+ (uint8_t*)srcImpl->m_data + srcOffset,
+ size);
+ }
+
+ void uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data)
+ {
+ auto dstImpl = static_cast<CPUBufferResource*>(dst);
+ memcpy((uint8_t*)dstImpl->m_data + offset, data, size);
+ }
+
+ void execute(CommandBufferImpl* commandBuffer)
+ {
+ for (auto& cmd : commandBuffer->m_commands)
+ {
+ switch (cmd.name)
+ {
+ case CommandName::SetPipelineState:
+ setPipelineState(commandBuffer->getObject<IPipelineState>(cmd.operands[0]));
+ break;
+ case CommandName::BindRootShaderObject:
+ bindRootShaderObject(
+ (PipelineType)cmd.operands[0],
+ commandBuffer->getObject<IShaderObject>(cmd.operands[1]));
+ break;
+ case CommandName::DispatchCompute:
+ dispatchCompute(
+ int(cmd.operands[0]), int(cmd.operands[1]), int(cmd.operands[2]));
+ break;
+ case CommandName::CopyBuffer:
+ copyBuffer(
+ commandBuffer->getObject<IBufferResource>(cmd.operands[0]),
+ cmd.operands[1],
+ commandBuffer->getObject<IBufferResource>(cmd.operands[2]),
+ cmd.operands[3],
+ cmd.operands[4]);
+ break;
+ case CommandName::UploadBufferData:
+ uploadBufferData(
+ commandBuffer->getObject<IBufferResource>(cmd.operands[0]),
+ cmd.operands[1],
+ cmd.operands[2],
+ commandBuffer->getData<uint8_t>(cmd.operands[3]));
+ break;
+ }
+ }
+ }
+ };
+
+public:
+ ~CPUDevice()
+ {
+ m_currentPipeline = nullptr;
+ m_currentRootObject = nullptr;
+ }
+
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override
+ {
+ SLANG_RETURN_ON_FAIL(slangContext.initialize(desc.slang, SLANG_HOST_CALLABLE, "sm_5_1"));
+
+ SLANG_RETURN_ON_FAIL(RendererBase::initialize(desc));
+
+ // Initialize DeviceInfo
+ {
+ m_info.deviceType = DeviceType::CPU;
+ m_info.bindingStyle = BindingStyle::CUDA;
+ m_info.projectionStyle = ProjectionStyle::DirectX;
+ m_info.apiName = "CPU";
+ static const float kIdentity[] = {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1};
+ ::memcpy(m_info.identityProjectionMatrix, kIdentity, sizeof(kIdentity));
+ m_info.adapterName = "CPU";
+ }
+
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource(
+ IResource::Usage initialUsage,
+ const ITextureResource::Desc& desc,
+ const ITextureResource::SubresourceData* initData,
+ ITextureResource** outResource) override
+ {
+ RefPtr<CPUTextureResource> texture = new CPUTextureResource(desc);
+
+ SLANG_RETURN_ON_FAIL(texture->init(initData));
+
+ *outResource = texture.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createBufferResource(
+ IResource::Usage initialUsage,
+ const IBufferResource::Desc& desc,
+ const void* initData,
+ IBufferResource** outResource) override
+ {
+ RefPtr<CPUBufferResource> resource = new CPUBufferResource(desc);
+ SLANG_RETURN_ON_FAIL(resource->init());
+ if (initData)
+ {
+ SLANG_RETURN_ON_FAIL(resource->setData(0, desc.sizeInBytes, initData));
+ }
+ *outResource = resource.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView(
+ ITextureResource* inTexture, IResourceView::Desc const& desc, IResourceView** outView) override
+ {
+ auto texture = static_cast<CPUTextureResource*>(inTexture);
+ RefPtr<CPUTextureView> view = new CPUTextureView(desc, texture);
+ *outView = view.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createBufferView(
+ IBufferResource* inBuffer, IResourceView::Desc const& desc, IResourceView** outView) override
+ {
+ auto buffer = static_cast<CPUBufferResource*>(inBuffer);
+ RefPtr<CPUBufferView> view = new CPUBufferView(desc, buffer);
+ *outView = view.detach();
+ return SLANG_OK;
+ }
+
+ virtual Result createShaderObjectLayout(
+ slang::TypeLayoutReflection* typeLayout,
+ ShaderObjectLayoutBase** outLayout) override
+ {
+ RefPtr<CPUShaderObjectLayout> cpuLayout = new CPUShaderObjectLayout(this, typeLayout);
+ *outLayout = cpuLayout.detach();
+
+ return SLANG_OK;
+ }
+
+ virtual Result createShaderObject(
+ ShaderObjectLayoutBase* layout,
+ IShaderObject** outObject) override
+ {
+ auto cpuLayout = static_cast<CPUShaderObjectLayout*>(layout);
+
+ RefPtr<CPUShaderObject> result = new CPUShaderObject();
+ SLANG_RETURN_ON_FAIL(result->init(this, cpuLayout));
+ *outObject = result.detach();
+
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override
+ {
+ auto cpuProgram = static_cast<CPUShaderProgram*>(program);
+ auto cpuProgramLayout = cpuProgram->layout;
+
+ RefPtr<CPURootShaderObject> result = new CPURootShaderObject();
+ SLANG_RETURN_ON_FAIL(result->init(this, cpuProgramLayout));
+ *outObject = result.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) override
+ {
+ if( desc.kernelCount == 0 )
+ {
+ return createProgramFromSlang(this, desc, outProgram);
+ }
+
+ if (desc.kernelCount != 1)
+ return SLANG_E_INVALID_ARG;
+
+ RefPtr<CPUShaderProgram> cpuProgram = new CPUShaderProgram();
+
+ // TODO: stuff?
+
+ auto slangProgram = desc.slangProgram;
+ if( slangProgram )
+ {
+ cpuProgram->slangProgram = slangProgram;
+
+ auto slangProgramLayout = slangProgram->getLayout();
+ if(!slangProgramLayout)
+ return SLANG_FAIL;
+
+ RefPtr<CPUProgramLayout> cpuProgramLayout = new CPUProgramLayout(this, slangProgramLayout);
+ cpuProgramLayout->m_programLayout = slangProgramLayout;
+
+ cpuProgram->layout = cpuProgramLayout;
+ }
+
+ *outProgram = cpuProgram.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState(
+ const ComputePipelineStateDesc& desc, IPipelineState** outState) override
+ {
+ RefPtr<CPUPipelineState> state = new CPUPipelineState();
+ state->init(desc);
+ *outState = state.detach();
+ return Result();
+ }
+
+ virtual SLANG_NO_THROW const DeviceInfo& SLANG_MCALL getDeviceInfo() const override
+ {
+ return m_info;
+ }
+
+public:
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override
+ {
+ RefPtr<CommandQueueImpl> queue = new CommandQueueImpl();
+ queue->init(this);
+ *outQueue = queue.detach();
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain(
+ const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(window);
+ SLANG_UNUSED(outSwapchain);
+ return SLANG_FAIL;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout(
+ const IFramebufferLayout::Desc& desc, IFramebufferLayout** outLayout) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outLayout);
+ return SLANG_FAIL;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffer** outFramebuffer) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outFramebuffer);
+ return SLANG_FAIL;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout(
+ const IRenderPassLayout::Desc& desc,
+ IRenderPassLayout** outRenderPassLayout) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outRenderPassLayout);
+ return SLANG_FAIL;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) override
+ {
+ SLANG_UNUSED(desc);
+ *outSampler = nullptr;
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createInputLayout(
+ const InputElementDesc* inputElements,
+ UInt inputElementCount,
+ IInputLayout** outLayout) override
+ {
+ SLANG_UNUSED(inputElements);
+ SLANG_UNUSED(inputElementCount);
+ SLANG_UNUSED(outLayout);
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL createDescriptorSetLayout(
+ const IDescriptorSetLayout::Desc& desc, IDescriptorSetLayout** outLayout) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outLayout);
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createPipelineLayout(const IPipelineLayout::Desc& desc, IPipelineLayout** outLayout) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outLayout);
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createDescriptorSet(IDescriptorSetLayout* layout, IDescriptorSet::Flag::Enum flags, IDescriptorSet** outDescriptorSet) override
+ {
+ SLANG_UNUSED(layout);
+ SLANG_UNUSED(flags);
+ SLANG_UNUSED(outDescriptorSet);
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState(
+ const GraphicsPipelineStateDesc& desc, IPipelineState** outState) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outState);
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource(
+ ITextureResource* texture,
+ ResourceState state,
+ ISlangBlob** outBlob,
+ size_t* outRowPitch,
+ size_t* outPixelSize) override
+ {
+ SLANG_UNUSED(texture);
+ SLANG_UNUSED(outBlob);
+ SLANG_UNUSED(outRowPitch);
+ SLANG_UNUSED(outPixelSize);
+
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL readBufferResource(
+ IBufferResource* buffer,
+ size_t offset,
+ size_t size,
+ ISlangBlob** outBlob) override
+ {
+ auto bufferImpl = static_cast<CPUBufferResource*>(buffer);
+ RefPtr<ListBlob> blob = new ListBlob();
+ blob->m_data.setCount((Index)size);
+ memcpy(
+ blob->m_data.getBuffer(),
+ (uint8_t*)bufferImpl->m_data + offset,
+ size);
+ *outBlob = blob.detach();
+ return SLANG_OK;
+ }
+};
+
+SlangResult CPUShaderObject::init(IDevice* device, CPUShaderObjectLayout* typeLayout)
+{
+ m_layout = typeLayout;
+
+ // If the layout tells us that there is any uniform data,
+ // then we need to allocate a constant buffer to hold that data.
+ //
+ // TODO: Do we need to allocate a shadow copy for use from
+ // the CPU?
+ //
+ // TODO: When/where do we bind this constant buffer into
+ // a descriptor set for later use?
+ //
+ auto slangLayout = getLayout()->getElementTypeLayout();
+ size_t uniformSize = slangLayout->getSize();
+ if (uniformSize)
+ {
+ m_data = malloc(uniformSize);
+ }
+
+ // If the layout specifies that we have any resources or sub-objects,
+ // then we need to size the appropriate arrays to account for them.
+ //
+ // Note: the counts here are the *total* number of resources/sub-objects
+ // and not just the number of resource/sub-object ranges.
+ //
+ m_resources.setCount(typeLayout->getResourceCount());
+ m_objects.setCount(typeLayout->getSubObjectCount());
+
+ for (auto subObjectRange : getLayout()->subObjectRanges)
+ {
+ RefPtr<CPUShaderObjectLayout> subObjectLayout = subObjectRange.layout;
+
+ // In the case where the sub-object range represents an
+ // existential-type leaf field (e.g., an `IBar`), we
+ // cannot pre-allocate the object(s) to go into that
+ // range, since we can't possibly know what to allocate
+ // at this point.
+ //
+ if (!subObjectLayout)
+ continue;
+ //
+ // Otherwise, we will allocate a sub-object to fill
+ // in each entry in this range, based on the layout
+ // information we already have.
+
+ auto& bindingRangeInfo = getLayout()->m_bindingRanges[subObjectRange.bindingRangeIndex];
+ for (Index i = 0; i < bindingRangeInfo.count; ++i)
+ {
+ RefPtr<CPUShaderObject> subObject = new CPUShaderObject();
+ SLANG_RETURN_ON_FAIL(subObject->init(device, subObjectLayout));
+
+ ShaderOffset offset;
+ offset.uniformOffset = bindingRangeInfo.uniformOffset + sizeof(void*) * i;
+ offset.bindingRangeIndex = subObjectRange.bindingRangeIndex;
+ offset.bindingArrayIndex = i;
+
+ SLANG_RETURN_ON_FAIL(setObject(offset, subObject));
+ }
+ }
+ return SLANG_OK;
+}
+
+SlangResult CPURootShaderObject::init(IDevice* device, CPUProgramLayout* programLayout)
+{
+ SLANG_RETURN_ON_FAIL(CPUShaderObject::init(device, programLayout));
+ for (auto& entryPoint : programLayout->m_entryPointLayouts)
+ {
+ RefPtr<CPUEntryPointShaderObject> object = new CPUEntryPointShaderObject();
+ SLANG_RETURN_ON_FAIL(object->init(device, entryPoint));
+ m_entryPoints.add(object);
+ }
+ return SLANG_OK;
+}
+
+SlangResult SLANG_MCALL createCPUDevice(const IDevice::Desc* desc, IDevice** outDevice)
+{
+ RefPtr<CPUDevice> result = new CPUDevice();
+ SLANG_RETURN_ON_FAIL(result->initialize(*desc));
+ *outDevice = result.detach();
+ return SLANG_OK;
+}
+
+}
diff --git a/tools/gfx/cpu/render-cpu.h b/tools/gfx/cpu/render-cpu.h
new file mode 100644
index 000000000..fca57aa4d
--- /dev/null
+++ b/tools/gfx/cpu/render-cpu.h
@@ -0,0 +1,11 @@
+// render-cpu.h
+#pragma once
+
+#include "../renderer-shared.h"
+
+namespace gfx
+{
+
+SlangResult SLANG_MCALL createCPUDevice(const IDevice::Desc* desc, IDevice** outDevice);
+
+}
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp
index d13045359..89aaa33aa 100644
--- a/tools/gfx/cuda/render-cuda.cpp
+++ b/tools/gfx/cuda/render-cuda.cpp
@@ -278,36 +278,17 @@ public:
List<SubObjectRangeInfo> subObjectRanges;
List<BindingRangeInfo> m_bindingRanges;
- slang::TypeLayoutReflection* unwrapParameterGroups(slang::TypeLayoutReflection* typeLayout)
- {
- for (;;)
- {
- if (!typeLayout->getType())
- {
- if (auto elementTypeLayout = typeLayout->getElementTypeLayout())
- typeLayout = elementTypeLayout;
- }
-
- switch (typeLayout->getKind())
- {
- default:
- return typeLayout;
-
- case slang::TypeReflection::Kind::ConstantBuffer:
- case slang::TypeReflection::Kind::ParameterBlock:
- typeLayout = typeLayout->getElementTypeLayout();
- continue;
- }
- }
- }
+ Index m_subObjectCount = 0;
+ Index m_resourceCount = 0;
CUDAShaderObjectLayout(RendererBase* renderer, slang::TypeLayoutReflection* layout)
{
initBase(renderer, layout);
Index subObjectCount = 0;
+ Index resourceCount = 0;
- m_elementTypeLayout = unwrapParameterGroups(layout);
+ m_elementTypeLayout = _unwrapParameterGroups(layout);
// Compute the binding ranges that are used to store
// the logical contents of the object in memory. These will relate
@@ -348,6 +329,8 @@ public:
break;
default:
+ baseIndex = resourceCount;
+ resourceCount += count;
break;
}
@@ -359,6 +342,9 @@ public:
m_bindingRanges.add(bindingRangeInfo);
}
+ m_subObjectCount = subObjectCount;
+ m_resourceCount = resourceCount;
+
SlangInt subObjectRangeCount = m_elementTypeLayout->getSubObjectRangeCount();
for (SlangInt r = 0; r < subObjectRangeCount; ++r)
{
@@ -387,6 +373,9 @@ public:
subObjectRanges.add(subObjectRange);
}
}
+
+ Index getResourceCount() const { return m_resourceCount; }
+ Index getSubObjectCount() const { return m_subObjectCount; }
};
class CUDAProgramLayout : public CUDAShaderObjectLayout
@@ -503,6 +492,11 @@ public:
{
auto subObjectIndex =
getLayout()->m_bindingRanges[offset.bindingRangeIndex].baseIndex + offset.bindingArrayIndex;
+
+ SLANG_ASSERT(subObjectIndex < objects.getCount());
+ if(subObjectIndex >= objects.getCount())
+ return SLANG_E_INVALID_ARG;
+
if (subObjectIndex >= objects.getCount())
{
*object = nullptr;
@@ -525,8 +519,6 @@ public:
auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
auto subObject = dynamic_cast<CUDAShaderObject*>(object);
- if (subObjectIndex >= objects.getCount())
- objects.setCount(subObjectIndex + 1);
// TODO: We should really not need to retain the objects here
objects[subObjectIndex] = subObject;
@@ -635,10 +627,19 @@ public:
virtual SLANG_NO_THROW Result SLANG_MCALL
setResource(ShaderOffset const& offset, IResourceView* resourceView)
{
+ auto layout = getLayout();
+
+ auto bindingRangeIndex = offset.bindingRangeIndex;
+ SLANG_ASSERT(bindingRangeIndex >= 0);
+ SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
+
+ auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
+
+ auto viewIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
auto cudaView = dynamic_cast<CUDAResourceView*>(resourceView);
- if (offset.bindingRangeIndex >= resources.getCount())
- resources.setCount(offset.bindingRangeIndex + 1);
- resources[offset.bindingRangeIndex] = cudaView;
+
+ resources[viewIndex] = cudaView;
+
if (cudaView->textureResource)
{
if (cudaView->desc.type == IResourceView::Type::UnorderedAccess)
@@ -2059,9 +2060,15 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type
initBuffer(device, uniformSize);
}
- // If the layout specifies that we have any sub-objects, then
- // we need to size the array to account for them.
+ // If the layout specifies that we have any resources or sub-objects,
+ // then we need to size the appropriate arrays to account for them.
+ //
+ // Note: the counts here are the *total* number of resources/sub-objects
+ // and not just the number of resource/sub-object ranges.
//
+ resources.setCount(typeLayout->getResourceCount());
+ objects.setCount(typeLayout->getSubObjectCount());
+
Index subObjectCount = slangLayout->getSubObjectRangeCount();
objects.setCount(subObjectCount);
@@ -2087,11 +2094,13 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type
{
RefPtr<CUDAShaderObject> subObject = new CUDAShaderObject();
SLANG_RETURN_ON_FAIL(subObject->init(device, subObjectLayout));
- objects[bindingRangeInfo.baseIndex + i] = subObject;
+
ShaderOffset offset;
offset.uniformOffset = bindingRangeInfo.uniformOffset + sizeof(void*) * i;
- if (subObject->bufferResource)
- SLANG_RETURN_ON_FAIL(setData(offset, &subObject->bufferResource->m_cudaMemory, sizeof(void*)));
+ offset.bindingRangeIndex = subObjectRange.bindingRangeIndex;
+ offset.bindingArrayIndex = i;
+
+ SLANG_RETURN_ON_FAIL(setObject(offset, subObject));
}
}
return SLANG_OK;
diff --git a/tools/gfx/render-graphics-common.cpp b/tools/gfx/render-graphics-common.cpp
index 10713e92b..5ae148ea0 100644
--- a/tools/gfx/render-graphics-common.cpp
+++ b/tools/gfx/render-graphics-common.cpp
@@ -109,29 +109,6 @@ public:
}
}
- slang::TypeLayoutReflection* unwrapParameterGroups(slang::TypeLayoutReflection* typeLayout)
- {
- for (;;)
- {
- if (!typeLayout->getType())
- {
- if (auto elementTypeLayout = typeLayout->getElementTypeLayout())
- typeLayout = elementTypeLayout;
- }
-
- switch (typeLayout->getKind())
- {
- default:
- return typeLayout;
-
- case slang::TypeReflection::Kind::ConstantBuffer:
- case slang::TypeReflection::Kind::ParameterBlock:
- typeLayout = typeLayout->getElementTypeLayout();
- continue;
- }
- }
- }
-
void _addDescriptorSets(
slang::TypeLayoutReflection* typeLayout,
slang::VariableLayoutReflection* varLayout = nullptr)
@@ -178,7 +155,7 @@ public:
Result setElementTypeLayout(slang::TypeLayoutReflection* typeLayout)
{
- typeLayout = unwrapParameterGroups(typeLayout);
+ typeLayout = _unwrapParameterGroups(typeLayout);
m_elementTypeLayout = typeLayout;
@@ -414,8 +391,8 @@ public:
struct Builder : Super::Builder
{
- Builder(IDevice* renderer)
- : Super::Builder(static_cast<RendererBase*>(renderer))
+ Builder(IDevice* device)
+ : Super::Builder(static_cast<RendererBase*>(device))
{}
Result build(EntryPointLayout** outLayout)
@@ -1249,7 +1226,7 @@ protected:
return SLANG_OK;
// Once we have computed how large the buffer should be, we can allocate
- // it using the existing public `IRenderer` API.
+ // it using the existing public `IDevice` API.
//
IDevice* device = getRenderer();
IBufferResource::Desc bufferDesc;
diff --git a/tools/gfx/render.cpp b/tools/gfx/render.cpp
index 4ecb52287..e7d30b728 100644
--- a/tools/gfx/render.cpp
+++ b/tools/gfx/render.cpp
@@ -7,6 +7,7 @@
#include "open-gl/render-gl.h"
#include "vulkan/render-vk.h"
#include "cuda/render-cuda.h"
+#include "cpu/render-cpu.h"
#include <cstring>
namespace gfx {
@@ -97,6 +98,11 @@ extern "C"
return createVKDevice(desc, outDevice);
}
#endif
+ case DeviceType::CPU:
+ {
+ return createCPUDevice(desc, outDevice);
+ }
+ break;
default:
return SLANG_FAIL;
@@ -154,5 +160,4 @@ extern "C"
}
}
-
} // renderer_test
diff --git a/tools/gfx/renderer-shared.h b/tools/gfx/renderer-shared.h
index 9fe9768f4..2a77dcb93 100644
--- a/tools/gfx/renderer-shared.h
+++ b/tools/gfx/renderer-shared.h
@@ -149,6 +149,30 @@ protected:
slang::TypeLayoutReflection* m_elementTypeLayout = nullptr;
ShaderComponentID m_componentID = 0;
+ static slang::TypeLayoutReflection* _unwrapParameterGroups(slang::TypeLayoutReflection* typeLayout)
+ {
+ for (;;)
+ {
+ if (!typeLayout->getType())
+ {
+ if (auto elementTypeLayout = typeLayout->getElementTypeLayout())
+ typeLayout = elementTypeLayout;
+ }
+
+ switch (typeLayout->getKind())
+ {
+ default:
+ return typeLayout;
+
+ case slang::TypeReflection::Kind::ConstantBuffer:
+ case slang::TypeReflection::Kind::ParameterBlock:
+ typeLayout = typeLayout->getElementTypeLayout();
+ continue;
+ }
+ }
+ }
+
+
public:
RendererBase* getDevice() { return m_renderer; }
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp
index 7c9103cb3..6682eef1a 100644
--- a/tools/render-test/cpu-compute-util.cpp
+++ b/tools/render-test/cpu-compute-util.cpp
@@ -52,15 +52,15 @@ struct ValueTexture : public CPUComputeUtil::Resource, public CPPPrelude::ITextu
{
return _calcMipDims(mipLevel, m_dims);
}
- virtual void Load(const int32_t* loc, void* out) SLANG_OVERRIDE
+ virtual void Load(const int32_t* loc, void* out, size_t dataSize) SLANG_OVERRIDE
{
_set(out);
}
- virtual void Sample(CPPPrelude::SamplerState samplerState, const float* loc, void* out) SLANG_OVERRIDE
+ virtual void Sample(CPPPrelude::SamplerState samplerState, const float* loc, void* out, size_t dataSize) SLANG_OVERRIDE
{
_set(out);
}
- virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const float* loc, float level, void* out) SLANG_OVERRIDE
+ virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const float* loc, float level, void* out, size_t dataSize) SLANG_OVERRIDE
{
_set(out);
}
@@ -201,9 +201,15 @@ struct FloatRWTexture : public CPUComputeUtil::Resource, public CPPPrelude::IRWT
{
return _calcMipDims(mipLevel, m_dims);
}
- virtual void Load(const int32_t* loc, void* out) SLANG_OVERRIDE { m_data.getAt((const uint32_t*)loc, (float*)out); }
+ virtual void Load(const int32_t* loc, void* out, size_t dataSize) SLANG_OVERRIDE { m_data.getAt((const uint32_t*)loc, (float*)out); }
virtual void* refAt(const uint32_t* loc) SLANG_OVERRIDE { return m_data.getAt(loc); }
+ virtual void Sample(CPPPrelude::SamplerState samplerState, const float* loc, void* out, size_t dataSize) SLANG_OVERRIDE
+ {}
+
+ virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const float* loc, float level, void* out, size_t dataSize) SLANG_OVERRIDE
+ {}
+
FloatRWTexture(int elementCount, const CPPPrelude::TextureDimensions& inDims, float initialValue):
m_dims(inDims)
{
diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp
index e13642c5c..15100e2a5 100644
--- a/tools/render-test/render-test-main.cpp
+++ b/tools/render-test/render-test-main.cpp
@@ -1294,7 +1294,7 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi
}
// If it's CPU testing we don't need a window or a renderer
- if (options.deviceType == DeviceType::CPU)
+ if (options.deviceType == DeviceType::CPU && !options.useShaderObjects)
{
// Check we have all the required features
for (const auto& renderFeature : options.renderFeatures)
diff --git a/tools/render-test/shader-renderer-util.h b/tools/render-test/shader-renderer-util.h
index ecb8fc8bb..9d583331f 100644
--- a/tools/render-test/shader-renderer-util.h
+++ b/tools/render-test/shader-renderer-util.h
@@ -73,13 +73,13 @@ struct ShaderRendererUtil
bool isOutput,
size_t bufferSize,
const void* initData,
- IDevice* renderer,
+ IDevice* device,
ComPtr<IBufferResource>& bufferOut);
/// Create BindingState::Desc from the contents of layout
static Slang::Result createBindingState(
const ShaderInputLayout& layout,
- IDevice* renderer,
+ IDevice* device,
IBufferResource* addedConstantBuffer,
BindingStateImpl** outBindingState);
};