From dc76577e2f1d851d6eb4963fa24d310d847b6786 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Mon, 23 Sep 2019 17:42:14 -0400 Subject: CPU Hello World (#1065) * First pass on cpu-hello-world application. * Improvements to cpu-hello-world * Improved documentation around cpu-hello-world. Added information about C++/CPU targets to README.md Referenced cpu-target. --- examples/cpu-hello-world/README.md | 10 + examples/cpu-hello-world/cpu-hello-world.vcxproj | 184 ++++++++++++++++++ .../cpu-hello-world.vcxproj.filters | 18 ++ examples/cpu-hello-world/main.cpp | 210 +++++++++++++++++++++ examples/cpu-hello-world/shader.slang | 15 ++ 5 files changed, 437 insertions(+) create mode 100644 examples/cpu-hello-world/README.md create mode 100644 examples/cpu-hello-world/cpu-hello-world.vcxproj create mode 100644 examples/cpu-hello-world/cpu-hello-world.vcxproj.filters create mode 100644 examples/cpu-hello-world/main.cpp create mode 100644 examples/cpu-hello-world/shader.slang (limited to 'examples') diff --git a/examples/cpu-hello-world/README.md b/examples/cpu-hello-world/README.md new file mode 100644 index 000000000..32a8bf805 --- /dev/null +++ b/examples/cpu-hello-world/README.md @@ -0,0 +1,10 @@ +Slang "CPU Hello World" Example +=============================== + +The goal of this example is to demonstrate an almost minimal application that uses Slang to produce and use a kernel that is run on CPU. + +The `shader.slang` file contains a compute shader entry point. The shader code should compile as either Slang or HLSL code (that is, this example does not show off any new Slang language features). + +The `main.cpp` file contains the C++ application code, showing how to use the Slang API to load and compile the shader code to produce and execute CPU code. + +This example is not necessarily representative of best practices for integrating Slang into a production engine; the goal is merely to use the minimum amount of code possible to demonstrate a complete application that uses Slang. diff --git a/examples/cpu-hello-world/cpu-hello-world.vcxproj b/examples/cpu-hello-world/cpu-hello-world.vcxproj new file mode 100644 index 000000000..d150aa51d --- /dev/null +++ b/examples/cpu-hello-world/cpu-hello-world.vcxproj @@ -0,0 +1,184 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {4B47A364-37C4-96A7-6041-97BB4C1D333B} + true + Win32Proj + cpu-hello-world + + + + Application + true + Unicode + v140 + + + Application + true + Unicode + v140 + + + Application + false + Unicode + v140 + + + Application + false + Unicode + v140 + + + + + + + + + + + + + + + + + + + true + ..\..\bin\windows-x86\debug\ + ..\..\intermediate\windows-x86\debug\cpu-hello-world\ + cpu-hello-world + .exe + + + true + ..\..\bin\windows-x64\debug\ + ..\..\intermediate\windows-x64\debug\cpu-hello-world\ + cpu-hello-world + .exe + + + false + ..\..\bin\windows-x86\release\ + ..\..\intermediate\windows-x86\release\cpu-hello-world\ + cpu-hello-world + .exe + + + false + ..\..\bin\windows-x64\release\ + ..\..\intermediate\windows-x64\release\cpu-hello-world\ + cpu-hello-world + .exe + + + + NotUsing + Level3 + _DEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + EditAndContinue + Disabled + MultiThreadedDebug + + + Console + true + + + + + NotUsing + Level3 + _DEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + EditAndContinue + Disabled + MultiThreadedDebug + + + Console + true + + + + + NotUsing + Level3 + NDEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + Full + true + true + false + true + MultiThreaded + + + Console + true + true + + + + + NotUsing + Level3 + NDEBUG;%(PreprocessorDefinitions) + ..\..;..\..\tools;%(AdditionalIncludeDirectories) + Full + true + true + false + true + MultiThreaded + + + Console + true + true + + + + + + + + + + + {DB00DA62-0533-4AFD-B59F-A67D5B3A0808} + + + {F9BE7957-8399-899E-0C49-E714FDDD4B65} + + + {222F7498-B40C-4F3F-A704-DDEB91A4484A} + + + + + + \ No newline at end of file diff --git a/examples/cpu-hello-world/cpu-hello-world.vcxproj.filters b/examples/cpu-hello-world/cpu-hello-world.vcxproj.filters new file mode 100644 index 000000000..1ceeb57d7 --- /dev/null +++ b/examples/cpu-hello-world/cpu-hello-world.vcxproj.filters @@ -0,0 +1,18 @@ + + + + + {E9C7FDCE-D52A-8D73-7EB0-C5296AF258F6} + + + + + Source Files + + + + + Source Files + + + \ No newline at end of file diff --git a/examples/cpu-hello-world/main.cpp b/examples/cpu-hello-world/main.cpp new file mode 100644 index 000000000..4ece04da2 --- /dev/null +++ b/examples/cpu-hello-world/main.cpp @@ -0,0 +1,210 @@ +// main.cpp + +#include + +// This file implements an extremely simple example of loading and +// executing a Slang shader program on the CPU. +// +// More information about generation C++ or CPU code can be found in docs/cpu-target.md +// +// NOTE! This test will only run on a system correctly where slang can find a suitable +// C++ compiler - such as clang/gcc/visual studio +// +// The comments in the file will attempt to explain concepts as +// they are introduced. +// +// Of course, in order to use the Slang API, we need to include +// its header. We have set up the build options for this project +// so that it is as simple as: +#include + +// Allows use of ComPtr - which we can use to scope any 'com-like' pointers easily +#include +// Provides macros for handling SlangResult values easily +#include + +// This includes a useful small function for setting up the prelude (described more further below). +#include "../../source/core/slang-test-tool-util.h" + +// Slang namespace is used for elements support code (like core) which we use here +// for ComPtr<> and TestToolUtil +using namespace Slang; + +// Slang source is converted into C++ code which is compiled by a backend compiler. +// That process uses a 'prelude' which defines types and functions that are needed +// for everything else to work. +// +// We include the prelude here, so we can directly use the types as were used by the +// compiled code. It is not necessary to include the prelude, as long as memory is +// laid out in the manner that the generated slang code expects. +#define SLANG_PRELUDE_NAMESPACE CPPPrelude +#include "../../prelude/slang-cpp-types.h" + +static SlangResult _innerMain(int argc, char** argv) +{ + // First, we need to create a "session" for interacting with the Slang + // compiler. This scopes all of our application's interactions + // with the Slang library. At the moment, creating a session causes + // Slang to load and validate its standard library, so this is a + // somewhat heavy-weight operation. When possible, an application + // should try to re-use the same session across multiple compiles. + // + ComPtr slangSession(spCreateSession(NULL)); + + // As touched on earlier, in order to generate the final executable code, + // the slang code is converted into C++, and that C++ needs a 'prelude' which + // is just definitions that the generated code needed to work correctly. + // There is a simple default definition of a prelude provided in the prelude + // directory called 'slang-cpp-prelude.h'. + // + // We need to tell slang either the contents of the prelude, or suitable include/s + // that will work. The actual API call to set the prelude is `setDownstreamCompilerPrelude` + // and this just sets for a specific backend a bit of text placed before generated code. + // + // Most downstream C++ compilers work on files. In that case slang may generate temporary + // files that contain the generated code. Typically the generated files will not be in the + // same directory as the original source so handling includes becomes awkward. The mechanism used here + // is for the prelude code to be an *absolute* path to the 'slang-cpp-prelude.h' - which means + // this will work wherever the generated code is, and allows accessing other files via relative paths. + // + // Look at the source to TestToolUtil::setSessionDefaultPrelude to see what's involed. + TestToolUtil::setSessionDefaultPrelude(argv[0], slangSession); + + // A compile request represents a single invocation of the compiler, + // to process some inputs and produce outputs (or errors). + // + SlangCompileRequest* slangRequest = spCreateCompileRequest(slangSession); + + // We would like to request a CPU code that can be executed directly on the host - + // which is the 'SLANG_HOST_CALLABLE' target. + // If we wanted a just a shared library/dll, we could have used SLANG_SHARED_LIBRARY. + int targetIndex = spAddCodeGenTarget(slangRequest, SLANG_HOST_CALLABLE); + + // A compile request can include one or more "translation units," which more or + // less amount to individual source files (think `.c` files, not the `.h` files they + // might include). + // + // For this example, our code will all be in the Slang language. The user may + // also specify HLSL input here, but that currently doesn't affect the compiler's + // behavior much. + // + int translationUnitIndex = spAddTranslationUnit(slangRequest, SLANG_SOURCE_LANGUAGE_SLANG, nullptr); + + // We will load source code for our translation unit from the file `shader.slang`. + // There are also variations of this API for adding source code from application-provided buffers. + // + spAddTranslationUnitSourceFile(slangRequest, translationUnitIndex, "shader.slang"); + + // Next we will specify the entry points we'd like to compile. + // It is often convenient to put more than one entry point in the same file, + // and the Slang API makes it convenient to use a single run of the compiler + // to compile all entry points. + // + const char entryPointName[] = "computeMain"; + int computeIndex = spAddEntryPoint(slangRequest, translationUnitIndex, entryPointName, SLANG_STAGE_COMPUTE); + + // Once all of the input options for the compiler have been specified, + // we can invoke `spCompile` to run the compiler and see if any errors + // were detected. + // + const SlangResult compileRes = spCompile(slangRequest); + + // Even if there were no errors that forced compilation to fail, the + // compiler may have produced "diagnostic" output such as warnings. + // We will go ahead and print that output here. + // + if(auto diagnostics = spGetDiagnosticOutput(slangRequest)) + { + printf("%s", diagnostics); + } + + // If compilation failed, there is no point in continuing any further. + if(SLANG_FAILED(compileRes)) + { + spDestroyCompileRequest(slangRequest); + return compileRes; + } + + // Get the 'shared library' (note that this doesn't necessarily have to be implemented as a shared library + // it's just an interface to executable code). + ComPtr sharedLibrary; + SLANG_RETURN_ON_FAIL(spGetEntryPointHostCallable(slangRequest, 0, 0, sharedLibrary.writeRef())); + + // Once we have the sharedLibrary, we no longer need the request + // unless we want to use reflection, to for example workout how 'UniformState' and 'UniformEntryPointParams' are laid out + // at runtime. We don't do that here - as we hard code the structures. + spDestroyCompileRequest(slangRequest); + + // Get the function we are going to execute + CPPPrelude::ComputeFunc func = (CPPPrelude::ComputeFunc)sharedLibrary->findFuncByName(entryPointName); + if (!func) + { + spDestroyCompileRequest(slangRequest); + return SLANG_FAIL; + } + + // Define the uniform state structure that is *specific* to our shader defined in shader.slang + // That the layout of the structure can be determined through reflection, or can be inferred from + // the original slang source. Look at the documentation in docs/cpu-target.md which describes + // how different resources map. + // The order of the resources is in the order that they are defined in the source. + struct UniformState + { + CPPPrelude::RWStructuredBuffer ioBuffer; + }; + + // the uniformState will be passed as a pointer to the CPU code + UniformState uniformState; + + // The contents of the buffer are modified, so we'll copy it + const float startBufferContents[] = { 2.0f, -10.0f, -3.0f, 5.0f }; + float bufferContents[SLANG_COUNT_OF(startBufferContents)]; + memcpy(bufferContents, startBufferContents, sizeof(startBufferContents)); + + // Set up the ioBuffer such that it uses bufferContents. It is important to set the .count + // such that bounds checking can be performed in the kernel. + uniformState.ioBuffer.data = bufferContents; + uniformState.ioBuffer.count = SLANG_COUNT_OF(bufferContents); + + // In shader.slang, then entry point is attributed with `[numthreads(4, 1, 1)]` meaning each group + // consists of 4 'thread' in x. Our input buffer is 4 wide, and we index the input array via `SV_DispatchThreadID` + // so we only need to run a single group to execute over all of the 4 elements here. + // The group range from { 0, 0, 0 } -> { 1, 1, 1 } means it will execute over the single group { 0, 0, 0 }. + + const CPPPrelude::uint3 startGroupID = { 0, 0, 0}; + const CPPPrelude::uint3 endGroupID = { 1, 1, 1 }; + + CPPPrelude::ComputeVaryingInput varyingInput; + varyingInput.startGroupID = startGroupID; + varyingInput.endGroupID = endGroupID; + + // We don't have any entry point parameters so that's passed as NULL + // We need to cast our definition of the uniform state to the undefined CPPPrelude::UniformState as + // that type is just a name to indicate what kind of thing needs to be passed in. + func(&varyingInput, NULL, (CPPPrelude::UniformState*)&uniformState); + + // bufferContents holds the output + + // Print out the values before the computation + printf("Before:\n"); + for (float v : startBufferContents) + { + printf("%f, ", v); + } + printf("\n"); + + // Print out the values the the kernel produced + printf("After: \n"); + for (float v : bufferContents) + { + printf("%f, ", v); + } + printf("\n"); + + return SLANG_OK; +} + +int main(int argc, char** argv) +{ + return SLANG_SUCCEEDED(_innerMain(argc, argv)) ? 0 : -1; +} diff --git a/examples/cpu-hello-world/shader.slang b/examples/cpu-hello-world/shader.slang new file mode 100644 index 000000000..bac5832a8 --- /dev/null +++ b/examples/cpu-hello-world/shader.slang @@ -0,0 +1,15 @@ +// shader.slang + +//TEST_INPUT:ubuffer(random(float, 4096, -1.0, 1.0), stride=4):dxbinding(0),glbinding(0),name=ioBuffer +RWStructuredBuffer ioBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint tid = dispatchThreadID.x; + + float i = ioBuffer[tid]; + float o = i < 0.5 ? (i + i) : sqrt(i); + + ioBuffer[tid] = o; +} -- cgit v1.2.3