From 3fe4f5398d524333e955ecb91be5646e86f3b2da Mon Sep 17 00:00:00 2001 From: Dietrich Geisler Date: Wed, 24 Jun 2020 17:22:58 -0400 Subject: Heterogeneous example (#1399) * Introduced heterogeneous example. Example includes C++ source and header files, and does not currently make use of the associated slang file when building. The intent of this commit is to introduce the example as a baseline for later updates as the heterogeneous model is expanded. * Changing namespace * Renamed and rewrote README * Updated example to account for compiler updates * Updated path Co-authored-by: Tim Foley --- examples/cpu-hello-world/main.cpp | 2 + examples/heterogeneous-hello-world/README.md | 4 + .../heterogeneous-hello-world.vcxproj | 185 +++++++++++++++++++++ .../heterogeneous-hello-world.vcxproj.filters | 21 +++ examples/heterogeneous-hello-world/main.cpp | 108 ++++++++++++ examples/heterogeneous-hello-world/shader.cpp | 126 ++++++++++++++ examples/heterogeneous-hello-world/shader.slang | 16 ++ 7 files changed, 462 insertions(+) create mode 100644 examples/heterogeneous-hello-world/README.md create mode 100644 examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj create mode 100644 examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj.filters create mode 100644 examples/heterogeneous-hello-world/main.cpp create mode 100644 examples/heterogeneous-hello-world/shader.cpp create mode 100644 examples/heterogeneous-hello-world/shader.slang (limited to 'examples') diff --git a/examples/cpu-hello-world/main.cpp b/examples/cpu-hello-world/main.cpp index 8f22aac6d..898347b47 100644 --- a/examples/cpu-hello-world/main.cpp +++ b/examples/cpu-hello-world/main.cpp @@ -40,6 +40,8 @@ using namespace Slang; #define SLANG_PRELUDE_NAMESPACE CPPPrelude #include "../../prelude/slang-cpp-types.h" +struct UniformState; + static SlangResult _innerMain(int argc, char** argv) { // First, we need to create a "session" for interacting with the Slang diff --git a/examples/heterogeneous-hello-world/README.md b/examples/heterogeneous-hello-world/README.md new file mode 100644 index 000000000..709652922 --- /dev/null +++ b/examples/heterogeneous-hello-world/README.md @@ -0,0 +1,4 @@ +Slang "CPU Hello World Heterogeneous" Example +=============================== + +This example is a work-in-progress to illustrate how a heterogeneous programming example might work. It should NOT be used as a reference for working Slang code yet. \ No newline at end of file diff --git a/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj b/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj new file mode 100644 index 000000000..d80fbc30b --- /dev/null +++ b/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj @@ -0,0 +1,185 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {150CAA5A-0177-6A66-AA92-CFCB96DC2D49} + true + Win32Proj + heterogeneous-hello-world + + + + Application + true + Unicode + v140 + + + Application + true + Unicode + v140 + + + Application + false + Unicode + v140 + + + Application + false + Unicode + v140 + + + + + + + + + + + + + + + + + + + true + ..\..\bin\windows-x86\debug\ + ..\..\intermediate\windows-x86\debug\heterogeneous-hello-world\ + heterogeneous-hello-world + .exe + + + true + ..\..\bin\windows-x64\debug\ + ..\..\intermediate\windows-x64\debug\heterogeneous-hello-world\ + heterogeneous-hello-world + .exe + + + false + ..\..\bin\windows-x86\release\ + ..\..\intermediate\windows-x86\release\heterogeneous-hello-world\ + heterogeneous-hello-world + .exe + + + false + ..\..\bin\windows-x64\release\ + ..\..\intermediate\windows-x64\release\heterogeneous-hello-world\ + heterogeneous-hello-world + .exe + + + + NotUsing + Level3 + _DEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + EditAndContinue + Disabled + MultiThreadedDebug + + + Console + true + + + + + NotUsing + Level3 + _DEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + EditAndContinue + Disabled + MultiThreadedDebug + + + Console + true + + + + + NotUsing + Level3 + NDEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + Full + true + true + false + true + MultiThreaded + + + Console + true + true + + + + + NotUsing + Level3 + NDEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + Full + true + true + false + true + MultiThreaded + + + Console + true + true + + + + + + + + + + + + {DB00DA62-0533-4AFD-B59F-A67D5B3A0808} + + + {F9BE7957-8399-899E-0C49-E714FDDD4B65} + + + {222F7498-B40C-4F3F-A704-DDEB91A4484A} + + + + + + \ No newline at end of file diff --git a/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj.filters b/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj.filters new file mode 100644 index 000000000..0d3970688 --- /dev/null +++ b/examples/heterogeneous-hello-world/heterogeneous-hello-world.vcxproj.filters @@ -0,0 +1,21 @@ + + + + + {E9C7FDCE-D52A-8D73-7EB0-C5296AF258F6} + + + + + Source Files + + + Source Files + + + + + Source Files + + + \ No newline at end of file diff --git a/examples/heterogeneous-hello-world/main.cpp b/examples/heterogeneous-hello-world/main.cpp new file mode 100644 index 000000000..c6475e6f5 --- /dev/null +++ b/examples/heterogeneous-hello-world/main.cpp @@ -0,0 +1,108 @@ +// main.cpp + +#include + +// This file implements an extremely simple example of loading and +// executing a Slang shader program on the CPU. +// +// More information about generation C++ or CPU code can be found in docs/cpu-target.md +// +// NOTE! This test will only run on a system correctly where slang can find a suitable +// C++ compiler - such as clang/gcc/visual studio +// +// The comments in the file will attempt to explain concepts as +// they are introduced. +// +// Of course, in order to use the Slang API, we need to include +// its header. We have set up the build options for this project +// so that it is as simple as: +#include + +// Allows use of ComPtr - which we can use to scope any 'com-like' pointers easily +#include +// Provides macros for handling SlangResult values easily +#include + +// This includes a useful small function for setting up the prelude (described more further below). +#include "../../source/core/slang-test-tool-util.h" + +// Slang namespace is used for elements support code (like core) which we use here +// for ComPtr<> and TestToolUtil +using namespace Slang; + +// Slang source is converted into C++ code which is compiled by a backend compiler. +// That process uses a 'prelude' which defines types and functions that are needed +// for everything else to work. +// +// We include the prelude here, so we can directly use the types as were used by the +// compiled code. It is not necessary to include the prelude, as long as memory is +// laid out in the manner that the generated slang code expects. +#define SLANG_PRELUDE_NAMESPACE CPPPrelude +#include "../../prelude/slang-cpp-types.h" + +struct UniformState +{ + CPPPrelude::RWStructuredBuffer ioBuffer; +}; + +extern"C" void computeMain(CPPPrelude::ComputeVaryingInput* varyingInput, void* params, void* uniformState); + + +static SlangResult _innerMain(int argc, char** argv) +{ + + // the uniformState will be passed as a pointer to the CPU code + UniformState uniformState; + + // The contents of the buffer are modified, so we'll copy it + const float startBufferContents[] = { 2.0f, -10.0f, -3.0f, 5.0f }; + float bufferContents[SLANG_COUNT_OF(startBufferContents)]; + memcpy(bufferContents, startBufferContents, sizeof(startBufferContents)); + + // Set up the ioBuffer such that it uses bufferContents. It is important to set the .count + // such that bounds checking can be performed in the kernel. + uniformState.ioBuffer.data = bufferContents; + uniformState.ioBuffer.count = SLANG_COUNT_OF(bufferContents); + + // In shader.slang, then entry point is attributed with `[numthreads(4, 1, 1)]` meaning each group + // consists of 4 'thread' in x. Our input buffer is 4 wide, and we index the input array via `SV_DispatchThreadID` + // so we only need to run a single group to execute over all of the 4 elements here. + // The group range from { 0, 0, 0 } -> { 1, 1, 1 } means it will execute over the single group { 0, 0, 0 }. + + const CPPPrelude::uint3 startGroupID = { 0, 0, 0}; + const CPPPrelude::uint3 endGroupID = { 1, 1, 1 }; + + CPPPrelude::ComputeVaryingInput varyingInput; + varyingInput.startGroupID = startGroupID; + varyingInput.endGroupID = endGroupID; + + // We don't have any entry point parameters so that's passed as NULL + // We need to cast our definition of the uniform state to the undefined CPPPrelude::UniformState as + // that type is just a name to indicate what kind of thing needs to be passed in. + computeMain(&varyingInput, NULL, (UniformState*)&uniformState); + + // bufferContents holds the output + + // Print out the values before the computation + printf("Before:\n"); + for (float v : startBufferContents) + { + printf("%f, ", v); + } + printf("\n"); + + // Print out the values the the kernel produced + printf("After: \n"); + for (float v : bufferContents) + { + printf("%f, ", v); + } + printf("\n"); + + return SLANG_OK; +} + +int main(int argc, char** argv) +{ + return SLANG_SUCCEEDED(_innerMain(argc, argv)) ? 0 : -1; +} diff --git a/examples/heterogeneous-hello-world/shader.cpp b/examples/heterogeneous-hello-world/shader.cpp new file mode 100644 index 000000000..396b78cb4 --- /dev/null +++ b/examples/heterogeneous-hello-world/shader.cpp @@ -0,0 +1,126 @@ +#include "../../prelude/slang-cpp-prelude.h" + + +namespace { // anonymous + +#ifdef SLANG_PRELUDE_NAMESPACE +using namespace SLANG_PRELUDE_NAMESPACE; +#endif + +struct KernelContext; + + +#line 13 "shader.slang" +struct UniformState +{ + +#line 4 + RWStructuredBuffer ioBuffer_0; + + + +}; + +struct KernelContext +{ + UniformState* uniformState; + uint3 dispatchThreadID; + uint3 groupID; + uint3 groupDispatchThreadID; + uint3 calcGroupThreadID() const + { + uint3 v = { dispatchThreadID.x - groupDispatchThreadID.x, dispatchThreadID.y - groupDispatchThreadID.y, dispatchThreadID.z - groupDispatchThreadID.z }; + return v; + } + +#line 8 + void _computeMain() + + { + +#line 10 + uint32_t tid_0 = dispatchThreadID.x; + + float i_0 = (uniformState->ioBuffer_0)[tid_0]; + bool _S1 = i_0 < 0.50000000000000000000f; + +#line 13 + float _S2 = i_0 + i_0; + +#line 13 + float _S3 = (F32_sqrt((i_0))); + +#line 13 + float o_0 = _S1 ? _S2 : _S3; + + (uniformState->ioBuffer_0)[tid_0] = o_0; + +#line 8 + return; + } + +}; + +} // anonymous + +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* params, void* uniformState) +{ + KernelContext context = {}; + context.uniformState = (UniformState*)uniformState; + context.dispatchThreadID = { + varyingInput->groupID.x * 4 + varyingInput->groupThreadID.x, + varyingInput->groupID.y * 1 + varyingInput->groupThreadID.y, + varyingInput->groupID.z * 1 + varyingInput->groupThreadID.z + }; + context._computeMain(); +} +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain_Group(ComputeVaryingInput* varyingInput, void* params, void* uniformState) +{ + KernelContext context = {}; + context.uniformState = (UniformState*)uniformState; + const uint3 start = { + varyingInput->startGroupID.x * 4, + varyingInput->startGroupID.y * 1, + varyingInput->startGroupID.z * 1 + }; + context.dispatchThreadID = start; + for (uint32_t x = start.x; x < start.x + 4; ++x) + { + context.dispatchThreadID.x = x; + context._computeMain(); + } +} +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain(ComputeVaryingInput* varyingInput, void* params, void* uniformState) +{ + KernelContext context = {}; + context.uniformState = (UniformState*)uniformState; + const uint3 start = { + varyingInput->startGroupID.x * 4, + varyingInput->startGroupID.y * 1, + varyingInput->startGroupID.z * 1 + }; + const uint3 end = { + varyingInput->endGroupID.x * 4, + varyingInput->endGroupID.y * 1, + varyingInput->endGroupID.z * 1 + }; + for (uint32_t z = start.z; z < end.z; ++z) + { + context.dispatchThreadID.z = z; + for (uint32_t y = start.y; y < end.y; ++y) + { + context.dispatchThreadID.y = y; + for (uint32_t x = start.x; x < end.x; ++x) + { + context.dispatchThreadID.x = x; + context._computeMain(); + } + } + } +} diff --git a/examples/heterogeneous-hello-world/shader.slang b/examples/heterogeneous-hello-world/shader.slang new file mode 100644 index 000000000..f650c3481 --- /dev/null +++ b/examples/heterogeneous-hello-world/shader.slang @@ -0,0 +1,16 @@ +// shader.slang + +//TEST_INPUT:ubuffer(random(float, 4096, -1.0, 1.0), stride=4):name=ioBuffer +RWStructuredBuffer ioBuffer; + +[shader("compute")] +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint tid = dispatchThreadID.x; + + float i = ioBuffer[tid]; + float o = i < 0.5 ? (i + i) : sqrt(i); + + ioBuffer[tid] = o; +} -- cgit v1.2.3