From 3fe4f5398d524333e955ecb91be5646e86f3b2da Mon Sep 17 00:00:00 2001 From: Dietrich Geisler Date: Wed, 24 Jun 2020 17:22:58 -0400 Subject: Heterogeneous example (#1399) * Introduced heterogeneous example. Example includes C++ source and header files, and does not currently make use of the associated slang file when building. The intent of this commit is to introduce the example as a baseline for later updates as the heterogeneous model is expanded. * Changing namespace * Renamed and rewrote README * Updated example to account for compiler updates * Updated path Co-authored-by: Tim Foley --- examples/cpu-hello-world/main.cpp | 2 + examples/heterogeneous-hello-world/README.md | 4 + .../heterogeneous-hello-world.vcxproj | 185 +++++++++++++++++++++ .../heterogeneous-hello-world.vcxproj.filters | 21 +++ examples/heterogeneous-hello-world/main.cpp | 108 ++++++++++++ examples/heterogeneous-hello-world/shader.cpp | 126 ++++++++++++++ examples/heterogeneous-hello-world/shader.slang | 16 ++ premake5.lua | 3 + slang.sln | 11 ++ tools/render-test/cpu-compute-util.cpp | 2 + 10 files changed, 478 insertions(+) create mode 100644 examples/heterogeneous-hello-world/README.md create mode 100644 examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj create mode 100644 examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj.filters create mode 100644 examples/heterogeneous-hello-world/main.cpp create mode 100644 examples/heterogeneous-hello-world/shader.cpp create mode 100644 examples/heterogeneous-hello-world/shader.slang diff --git a/examples/cpu-hello-world/main.cpp b/examples/cpu-hello-world/main.cpp index 8f22aac6d..898347b47 100644 --- a/examples/cpu-hello-world/main.cpp +++ b/examples/cpu-hello-world/main.cpp @@ -40,6 +40,8 @@ using namespace Slang; #define SLANG_PRELUDE_NAMESPACE CPPPrelude #include "../../prelude/slang-cpp-types.h" +struct UniformState; + static SlangResult _innerMain(int argc, char** argv) { // First, we need to create a "session" for interacting with the Slang diff --git a/examples/heterogeneous-hello-world/README.md b/examples/heterogeneous-hello-world/README.md new file mode 100644 index 000000000..709652922 --- /dev/null +++ b/examples/heterogeneous-hello-world/README.md @@ -0,0 +1,4 @@ +Slang "CPU Hello World Heterogeneous" Example +=============================== + +This example is a work-in-progress to illustrate how a heterogeneous programming example might work. It should NOT be used as a reference for working Slang code yet. \ No newline at end of file diff --git a/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj b/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj new file mode 100644 index 000000000..d80fbc30b --- /dev/null +++ b/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj @@ -0,0 +1,185 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {150CAA5A-0177-6A66-AA92-CFCB96DC2D49} + true + Win32Proj + heterogeneous-hello-world + + + + Application + true + Unicode + v140 + + + Application + true + Unicode + v140 + + + Application + false + Unicode + v140 + + + Application + false + Unicode + v140 + + + + + + + + + + + + + + + + + + + true + ..\..\bin\windows-x86\debug\ + ..\..\intermediate\windows-x86\debug\heterogeneous-hello-world\ + heterogeneous-hello-world + .exe + + + true + ..\..\bin\windows-x64\debug\ + ..\..\intermediate\windows-x64\debug\heterogeneous-hello-world\ + heterogeneous-hello-world + .exe + + + false + ..\..\bin\windows-x86\release\ + ..\..\intermediate\windows-x86\release\heterogeneous-hello-world\ + heterogeneous-hello-world + .exe + + + false + ..\..\bin\windows-x64\release\ + ..\..\intermediate\windows-x64\release\heterogeneous-hello-world\ + heterogeneous-hello-world + .exe + + + + NotUsing + Level3 + _DEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + EditAndContinue + Disabled + MultiThreadedDebug + + + Console + true + + + + + NotUsing + Level3 + _DEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + EditAndContinue + Disabled + MultiThreadedDebug + + + Console + true + + + + + NotUsing + Level3 + NDEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + Full + true + true + false + true + MultiThreaded + + + Console + true + true + + + + + NotUsing + Level3 + NDEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + Full + true + true + false + true + MultiThreaded + + + Console + true + true + + + + + + + + + + + + {DB00DA62-0533-4AFD-B59F-A67D5B3A0808} + + + {F9BE7957-8399-899E-0C49-E714FDDD4B65} + + + {222F7498-B40C-4F3F-A704-DDEB91A4484A} + + + + + + \ No newline at end of file diff --git a/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj.filters b/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj.filters new file mode 100644 index 000000000..0d3970688 --- /dev/null +++ b/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj.filters @@ -0,0 +1,21 @@ + + + + + {E9C7FDCE-D52A-8D73-7EB0-C5296AF258F6} + + + + + Source Files + + + Source Files + + + + + Source Files + + + \ No newline at end of file diff --git a/examples/heterogeneous-hello-world/main.cpp b/examples/heterogeneous-hello-world/main.cpp new file mode 100644 index 000000000..c6475e6f5 --- /dev/null +++ b/examples/heterogeneous-hello-world/main.cpp @@ -0,0 +1,108 @@ +// main.cpp + +#include + +// This file implements an extremely simple example of loading and +// executing a Slang shader program on the CPU. +// +// More information about generation C++ or CPU code can be found in docs/cpu-target.md +// +// NOTE! This test will only run on a system correctly where slang can find a suitable +// C++ compiler - such as clang/gcc/visual studio +// +// The comments in the file will attempt to explain concepts as +// they are introduced. +// +// Of course, in order to use the Slang API, we need to include +// its header. We have set up the build options for this project +// so that it is as simple as: +#include + +// Allows use of ComPtr - which we can use to scope any 'com-like' pointers easily +#include +// Provides macros for handling SlangResult values easily +#include + +// This includes a useful small function for setting up the prelude (described more further below). +#include "../../source/core/slang-test-tool-util.h" + +// Slang namespace is used for elements support code (like core) which we use here +// for ComPtr<> and TestToolUtil +using namespace Slang; + +// Slang source is converted into C++ code which is compiled by a backend compiler. +// That process uses a 'prelude' which defines types and functions that are needed +// for everything else to work. +// +// We include the prelude here, so we can directly use the types as were used by the +// compiled code. It is not necessary to include the prelude, as long as memory is +// laid out in the manner that the generated slang code expects. +#define SLANG_PRELUDE_NAMESPACE CPPPrelude +#include "../../prelude/slang-cpp-types.h" + +struct UniformState +{ + CPPPrelude::RWStructuredBuffer ioBuffer; +}; + +extern"C" void computeMain(CPPPrelude::ComputeVaryingInput* varyingInput, void* params, void* uniformState); + + +static SlangResult _innerMain(int argc, char** argv) +{ + + // the uniformState will be passed as a pointer to the CPU code + UniformState uniformState; + + // The contents of the buffer are modified, so we'll copy it + const float startBufferContents[] = { 2.0f, -10.0f, -3.0f, 5.0f }; + float bufferContents[SLANG_COUNT_OF(startBufferContents)]; + memcpy(bufferContents, startBufferContents, sizeof(startBufferContents)); + + // Set up the ioBuffer such that it uses bufferContents. It is important to set the .count + // such that bounds checking can be performed in the kernel. + uniformState.ioBuffer.data = bufferContents; + uniformState.ioBuffer.count = SLANG_COUNT_OF(bufferContents); + + // In shader.slang, then entry point is attributed with `[numthreads(4, 1, 1)]` meaning each group + // consists of 4 'thread' in x. Our input buffer is 4 wide, and we index the input array via `SV_DispatchThreadID` + // so we only need to run a single group to execute over all of the 4 elements here. + // The group range from { 0, 0, 0 } -> { 1, 1, 1 } means it will execute over the single group { 0, 0, 0 }. + + const CPPPrelude::uint3 startGroupID = { 0, 0, 0}; + const CPPPrelude::uint3 endGroupID = { 1, 1, 1 }; + + CPPPrelude::ComputeVaryingInput varyingInput; + varyingInput.startGroupID = startGroupID; + varyingInput.endGroupID = endGroupID; + + // We don't have any entry point parameters so that's passed as NULL + // We need to cast our definition of the uniform state to the undefined CPPPrelude::UniformState as + // that type is just a name to indicate what kind of thing needs to be passed in. + computeMain(&varyingInput, NULL, (UniformState*)&uniformState); + + // bufferContents holds the output + + // Print out the values before the computation + printf("Before:\n"); + for (float v : startBufferContents) + { + printf("%f, ", v); + } + printf("\n"); + + // Print out the values the the kernel produced + printf("After: \n"); + for (float v : bufferContents) + { + printf("%f, ", v); + } + printf("\n"); + + return SLANG_OK; +} + +int main(int argc, char** argv) +{ + return SLANG_SUCCEEDED(_innerMain(argc, argv)) ? 0 : -1; +} diff --git a/examples/heterogeneous-hello-world/shader.cpp b/examples/heterogeneous-hello-world/shader.cpp new file mode 100644 index 000000000..396b78cb4 --- /dev/null +++ b/examples/heterogeneous-hello-world/shader.cpp @@ -0,0 +1,126 @@ +#include "../../prelude/slang-cpp-prelude.h" + + +namespace { // anonymous + +#ifdef SLANG_PRELUDE_NAMESPACE +using namespace SLANG_PRELUDE_NAMESPACE; +#endif + +struct KernelContext; + + +#line 13 "shader.slang" +struct UniformState +{ + +#line 4 + RWStructuredBuffer ioBuffer_0; + + + +}; + +struct KernelContext +{ + UniformState* uniformState; + uint3 dispatchThreadID; + uint3 groupID; + uint3 groupDispatchThreadID; + uint3 calcGroupThreadID() const + { + uint3 v = { dispatchThreadID.x - groupDispatchThreadID.x, dispatchThreadID.y - groupDispatchThreadID.y, dispatchThreadID.z - groupDispatchThreadID.z }; + return v; + } + +#line 8 + void _computeMain() + + { + +#line 10 + uint32_t tid_0 = dispatchThreadID.x; + + float i_0 = (uniformState->ioBuffer_0)[tid_0]; + bool _S1 = i_0 < 0.50000000000000000000f; + +#line 13 + float _S2 = i_0 + i_0; + +#line 13 + float _S3 = (F32_sqrt((i_0))); + +#line 13 + float o_0 = _S1 ? _S2 : _S3; + + (uniformState->ioBuffer_0)[tid_0] = o_0; + +#line 8 + return; + } + +}; + +} // anonymous + +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* params, void* uniformState) +{ + KernelContext context = {}; + context.uniformState = (UniformState*)uniformState; + context.dispatchThreadID = { + varyingInput->groupID.x * 4 + varyingInput->groupThreadID.x, + varyingInput->groupID.y * 1 + varyingInput->groupThreadID.y, + varyingInput->groupID.z * 1 + varyingInput->groupThreadID.z + }; + context._computeMain(); +} +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain_Group(ComputeVaryingInput* varyingInput, void* params, void* uniformState) +{ + KernelContext context = {}; + context.uniformState = (UniformState*)uniformState; + const uint3 start = { + varyingInput->startGroupID.x * 4, + varyingInput->startGroupID.y * 1, + varyingInput->startGroupID.z * 1 + }; + context.dispatchThreadID = start; + for (uint32_t x = start.x; x < start.x + 4; ++x) + { + context.dispatchThreadID.x = x; + context._computeMain(); + } +} +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain(ComputeVaryingInput* varyingInput, void* params, void* uniformState) +{ + KernelContext context = {}; + context.uniformState = (UniformState*)uniformState; + const uint3 start = { + varyingInput->startGroupID.x * 4, + varyingInput->startGroupID.y * 1, + varyingInput->startGroupID.z * 1 + }; + const uint3 end = { + varyingInput->endGroupID.x * 4, + varyingInput->endGroupID.y * 1, + varyingInput->endGroupID.z * 1 + }; + for (uint32_t z = start.z; z < end.z; ++z) + { + context.dispatchThreadID.z = z; + for (uint32_t y = start.y; y < end.y; ++y) + { + context.dispatchThreadID.y = y; + for (uint32_t x = start.x; x < end.x; ++x) + { + context.dispatchThreadID.x = x; + context._computeMain(); + } + } + } +} diff --git a/examples/heterogeneous-hello-world/shader.slang b/examples/heterogeneous-hello-world/shader.slang new file mode 100644 index 000000000..f650c3481 --- /dev/null +++ b/examples/heterogeneous-hello-world/shader.slang @@ -0,0 +1,16 @@ +// shader.slang + +//TEST_INPUT:ubuffer(random(float, 4096, -1.0, 1.0), stride=4):name=ioBuffer +RWStructuredBuffer ioBuffer; + +[shader("compute")] +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint tid = dispatchThreadID.x; + + float i = ioBuffer[tid]; + float o = i < 0.5 ? (i + i) : sqrt(i); + + ioBuffer[tid] = o; +} diff --git a/premake5.lua b/premake5.lua index 2b3385b1c..b963d44df 100644 --- a/premake5.lua +++ b/premake5.lua @@ -489,6 +489,9 @@ end example "cpu-hello-world" kind "ConsoleApp" +example "heterogeneous-hello-world" + kind "ConsoleApp" + -- Most of the other projects have more interesting configuration going -- on, so let's walk through them in order of increasing complexity. -- diff --git a/slang.sln b/slang.sln index 40e7beaa0..411d9c3a8 100644 --- a/slang.sln +++ b/slang.sln @@ -11,6 +11,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gpu-printing", "examples\gp EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hello-world", "examples\hello-world\hello-world.vcxproj", "{010BE414-ED5B-CF56-16C0-BD18027062C0}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "heterogeneous-hello-world", "examples\heterogeneous-hello-world\heterogeneous-hello-world.vcxproj", "{150CAA5A-0177-6A66-AA92-CFCB96DC2D49}" +EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "model-viewer", "examples\model-viewer\model-viewer.vcxproj", "{2F8724C6-1BC3-2730-84D5-3F277030D04A}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "slang", "source\slang\slang.vcxproj", "{DB00DA62-0533-4AFD-B59F-A67D5B3A0808}" @@ -77,6 +79,14 @@ Global {010BE414-ED5B-CF56-16C0-BD18027062C0}.Release|Win32.Build.0 = Release|Win32 {010BE414-ED5B-CF56-16C0-BD18027062C0}.Release|x64.ActiveCfg = Release|x64 {010BE414-ED5B-CF56-16C0-BD18027062C0}.Release|x64.Build.0 = Release|x64 + {150CAA5A-0177-6A66-AA92-CFCB96DC2D49}.Debug|Win32.ActiveCfg = Debug|Win32 + {150CAA5A-0177-6A66-AA92-CFCB96DC2D49}.Debug|Win32.Build.0 = Debug|Win32 + {150CAA5A-0177-6A66-AA92-CFCB96DC2D49}.Debug|x64.ActiveCfg = Debug|x64 + {150CAA5A-0177-6A66-AA92-CFCB96DC2D49}.Debug|x64.Build.0 = Debug|x64 + {150CAA5A-0177-6A66-AA92-CFCB96DC2D49}.Release|Win32.ActiveCfg = Release|Win32 + {150CAA5A-0177-6A66-AA92-CFCB96DC2D49}.Release|Win32.Build.0 = Release|Win32 + {150CAA5A-0177-6A66-AA92-CFCB96DC2D49}.Release|x64.ActiveCfg = Release|x64 + {150CAA5A-0177-6A66-AA92-CFCB96DC2D49}.Release|x64.Build.0 = Release|x64 {2F8724C6-1BC3-2730-84D5-3F277030D04A}.Debug|Win32.ActiveCfg = Debug|Win32 {2F8724C6-1BC3-2730-84D5-3F277030D04A}.Debug|Win32.Build.0 = Debug|Win32 {2F8724C6-1BC3-2730-84D5-3F277030D04A}.Debug|x64.ActiveCfg = Debug|x64 @@ -157,6 +167,7 @@ Global {4B47A364-37C4-96A7-6041-97BB4C1D333B} = {EB5FC2C6-D72D-B6CC-C0C1-26F3AC2E9231} {57C81DD3-4304-213D-AC16-39349871C957} = {EB5FC2C6-D72D-B6CC-C0C1-26F3AC2E9231} {010BE414-ED5B-CF56-16C0-BD18027062C0} = {EB5FC2C6-D72D-B6CC-C0C1-26F3AC2E9231} + {150CAA5A-0177-6A66-AA92-CFCB96DC2D49} = {EB5FC2C6-D72D-B6CC-C0C1-26F3AC2E9231} {2F8724C6-1BC3-2730-84D5-3F277030D04A} = {EB5FC2C6-D72D-B6CC-C0C1-26F3AC2E9231} {61F7EB00-7281-4BF3-9470-7C2EA92620C3} = {57B5AA5E-C340-1823-CC51-9B17385C7423} {C5ACCA6E-C04D-4B36-8516-3752B3C13C2F} = {57B5AA5E-C340-1823-CC51-9B17385C7423} diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp index dc3ea1afa..70536afe9 100644 --- a/tools/render-test/cpu-compute-util.cpp +++ b/tools/render-test/cpu-compute-util.cpp @@ -12,6 +12,8 @@ #define SLANG_PRELUDE_NAMESPACE CPPPrelude #include "../../prelude/slang-cpp-types.h" +struct UniformState; + namespace renderer_test { using namespace Slang; -- cgit v1.2.3