summaryrefslogtreecommitdiffstats
path: root/examples
diff options
context:
space:
mode:
authorDietrich Geisler <dag368@cornell.edu>2020-07-24 01:50:53 -0400
committerGitHub <noreply@github.com>2020-07-23 22:50:53 -0700
commit7e952cde57719169bd8384427842cba033c9f80c (patch)
treefa50d6c0fdea37e6b3538af57b0f11274d87d0b9 /examples
parent61be38f39cc96ad9644f17f6ab8d262875e99e9e (diff)
CPU/GPU Compute Shader Example (#1451)
* CPU/GPU Compute Shader Example This PR introduces an example to run a simple compute shader on the GPU in the heterogeneous-hello-world example. All loading code is currently run in C++, so the heterogeneity of this example is still a work in progress. This change updates exactly this example, and so should not cause issues elsewhere in the codebase. * Small fix * Added gfx to help the linker * Added back the struct * Updated premake to respect windows conditions * Completely removed het-example * Re-added example Co-authored-by: Tim Foley <tfoleyNV@users.noreply.github.com>
Diffstat (limited to 'examples')
-rw-r--r--examples/heterogeneous-hello-world/main.cpp392
-rw-r--r--examples/heterogeneous-hello-world/shader.slang6
2 files changed, 327 insertions, 71 deletions
diff --git a/examples/heterogeneous-hello-world/main.cpp b/examples/heterogeneous-hello-world/main.cpp
index c6475e6f5..8d5540a32 100644
--- a/examples/heterogeneous-hello-world/main.cpp
+++ b/examples/heterogeneous-hello-world/main.cpp
@@ -1,14 +1,11 @@
// main.cpp
-#include <stdio.h>
-
// This file implements an extremely simple example of loading and
-// executing a Slang shader program on the CPU.
-//
-// More information about generation C++ or CPU code can be found in docs/cpu-target.md
-//
-// NOTE! This test will only run on a system correctly where slang can find a suitable
-// C++ compiler - such as clang/gcc/visual studio
+// executing a Slang shader program. This is primarily an example
+// of how to use Slang as a "drop-in" replacement for an existing
+// HLSL compiler like the `D3DCompile` API. More advanced usage
+// of advanced Slang language and API features is left to the
+// next example.
//
// The comments in the file will attempt to explain concepts as
// they are introduced.
@@ -16,93 +13,354 @@
// Of course, in order to use the Slang API, we need to include
// its header. We have set up the build options for this project
// so that it is as simple as:
+//
#include <slang.h>
+//
+// Other build setups are possible, and Slang doesn't assume that
+// its include directory must be added to your global include
+// path.
+
+// For the purposes of keeping the demo code as simple as possible,
+// while still retaining some level of portability, our examples
+// make use of a small platform and graphics API abstraction layer,
+// which is included in the Slang source distribution under the
+// `tools/` directory.
+//
+// Applications can of course use Slang without ever touching this
+// abstraction layer, so we will not focus on it when explaining
+// examples, except in places where best practices for interacting
+// with Slang may depend on an application/engine making certain
+// design choices in their abstraction layer.
+//
+#include "gfx/render.h"
+#include "gfx/d3d11/render-d3d11.h"
+#include "gfx/window.h"
+using namespace gfx;
-// Allows use of ComPtr - which we can use to scope any 'com-like' pointers easily
-#include <slang-com-ptr.h>
-// Provides macros for handling SlangResult values easily
-#include <slang-com-helper.h>
-
-// This includes a useful small function for setting up the prelude (described more further below).
-#include "../../source/core/slang-test-tool-util.h"
-
-// Slang namespace is used for elements support code (like core) which we use here
-// for ComPtr<> and TestToolUtil
-using namespace Slang;
-
-// Slang source is converted into C++ code which is compiled by a backend compiler.
-// That process uses a 'prelude' which defines types and functions that are needed
-// for everything else to work.
-//
-// We include the prelude here, so we can directly use the types as were used by the
-// compiled code. It is not necessary to include the prelude, as long as memory is
-// laid out in the manner that the generated slang code expects.
-#define SLANG_PRELUDE_NAMESPACE CPPPrelude
-#include "../../prelude/slang-cpp-types.h"
-
-struct UniformState
+// We will start with a function that will invoke the Slang compiler
+// to generate target-specific code from a shader file, and then
+// use that to initialize an API shader program.
+//
+// Note that `Renderer` and `ShaderProgram` here are types from
+// the graphics API abstraction layer, and *not* part of the
+// Slang API. This function is representative of code that a user
+// might write to integrate Slang into their renderer/engine.
+//
+RefPtr<gfx::ShaderProgram> loadShaderProgram(gfx::Renderer* renderer)
{
- CPPPrelude::RWStructuredBuffer<float> ioBuffer;
-};
+ // First, we need to create a "session" for interacting with the Slang
+ // compiler. This scopes all of our application's interactions
+ // with the Slang library. At the moment, creating a session causes
+ // Slang to load and validate its standard library, so this is a
+ // somewhat heavy-weight operation. When possible, an application
+ // should try to re-use the same session across multiple compiles.
+ //
+ SlangSession* slangSession = spCreateSession(NULL);
+
+ // A compile request represents a single invocation of the compiler,
+ // to process some inputs and produce outputs (or errors).
+ //
+ SlangCompileRequest* slangRequest = spCreateCompileRequest(slangSession);
+
+ // We would like to request a single target (output) format: DirectX shader bytecode (DXBC)
+ int targetIndex = spAddCodeGenTarget(slangRequest, SLANG_DXBC);
+
+ // We will specify the desired "profile" for this one target in terms of the
+ // DirectX "shader model" that should be supported.
+ //
+ spSetTargetProfile(slangRequest, targetIndex, spFindProfile(slangSession, "sm_4_0"));
+
+ // A compile request can include one or more "translation units," which more or
+ // less amount to individual source files (think `.c` files, not the `.h` files they
+ // might include).
+ //
+ // For this example, our code will all be in the Slang language. The user may
+ // also specify HLSL input here, but that currently doesn't affect the compiler's
+ // behavior much.
+ //
+ int translationUnitIndex = spAddTranslationUnit(slangRequest, SLANG_SOURCE_LANGUAGE_SLANG, nullptr);
+
+ // We will load source code for our translation unit from the file `shaders.slang`.
+ // There are also variations of this API for adding source code from application-provided buffers.
+ //
+ spAddTranslationUnitSourceFile(slangRequest, translationUnitIndex, "shader.slang");
+
+ // Next we will specify the entry points we'd like to compile.
+ // It is often convenient to put more than one entry point in the same file,
+ // and the Slang API makes it convenient to use a single run of the compiler
+ // to compile all entry points.
+ //
+ // For each entry point, we need to specify the name of a function, the
+ // translation unit in which that function can be found, and the stage
+ // that we need to compile for (e.g., vertex, fragment, geometry, ...).
+ //
+ char const* computeEntryPointName = "computeMain";
+ int computeIndex = spAddEntryPoint(slangRequest, translationUnitIndex, computeEntryPointName, SLANG_STAGE_COMPUTE);
+
+ // Once all of the input options for the compiler have been specified,
+ // we can invoke `spCompile` to run the compiler and see if any errors
+ // were detected.
+ //
+ const SlangResult compileRes = spCompile(slangRequest);
+
+ // Even if there were no errors that forced compilation to fail, the
+ // compiler may have produced "diagnostic" output such as warnings.
+ // We will go ahead and print that output here.
+ //
+ if(auto diagnostics = spGetDiagnosticOutput(slangRequest))
+ {
+ reportError("%s", diagnostics);
+ }
+
+ // If compilation failed, there is no point in continuing any further.
+ if(SLANG_FAILED(compileRes))
+ {
+ spDestroyCompileRequest(slangRequest);
+ spDestroySession(slangSession);
+ return nullptr;
+ }
+
+ // If compilation was successful, then we will extract the code for
+ // our two entry points as "blobs".
+ //
+ // If you are using a D3D API, then your application may want to
+ // take advantage of the fact taht these blobs are binary compatible
+ // with the `ID3DBlob`, `ID3D10Blob`, etc. interfaces.
+ //
+
+ ISlangBlob* computeShaderBlob = nullptr;
+ spGetEntryPointCodeBlob(slangRequest, computeIndex, 0, &computeShaderBlob);
+
+ // We extract the begin/end pointers to the output code buffers
+ // using operations on the `ISlangBlob` interface.
+ //
+ char const* computeCode = (char const*) computeShaderBlob->getBufferPointer();
+ char const* computeCodeEnd = computeCode + computeShaderBlob->getBufferSize();
+
+ // Once we have extracted the output blobs, it is safe to destroy
+ // the compile request and even the session.
+ //
+ spDestroyCompileRequest(slangRequest);
+ spDestroySession(slangSession);
+
+ // Now we use the operations of the example graphics API abstraction
+ // layer to load shader code into the underlying API.
+ //
+ // Reminder: this section does not involve the Slang API at all.
+ //
+
+ gfx::ShaderProgram::KernelDesc kernelDescs[] =
+ {
+ { gfx::StageType::Compute, computeCode, computeCodeEnd },
+ };
+
+ gfx::ShaderProgram::Desc programDesc;
+ programDesc.pipelineType = gfx::PipelineType::Compute;
+ programDesc.kernels = &kernelDescs[0];
+ programDesc.kernelCount = 2;
-extern"C" void computeMain(CPPPrelude::ComputeVaryingInput* varyingInput, void* params, void* uniformState);
+ auto shaderProgram = renderer->createProgram(programDesc);
+ // Once we've used the output blobs from the Slang compiler to initialize
+ // the API-specific shader program, we can release their memory.
+ //
+ computeShaderBlob->release();
-static SlangResult _innerMain(int argc, char** argv)
+ return shaderProgram;
+}
+
+// Now that we've covered the function that actually loads and
+// compiles our Slang shade code, we can go through the rest
+// of the application code without as much commentary.
+//
+Result computeMain()
{
+ // We will hard-code the size of our rendering window and initial array.
+ //
+ int gWindowWidth = 1024;
+ int gWindowHeight = 768;
+ float initialArray[4] = { 3.0f, -20.0f, -6.0f, 8.0f };
+
+ // We will define global variables for the various platform and
+ // graphics API objects that our application needs:
+ //
+ // As a reminder, *none* of these are Slang API objects. All
+ // of them come from the utility library we are using to simplify
+ // building an example program.
+ //
+ gfx::ApplicationContext* gAppContext;
+ gfx::Window* gWindow;
+ RefPtr<gfx::Renderer> gRenderer;
+
+ RefPtr<gfx::BufferResource> gUnorderedAccess;
+ RefPtr<gfx::BufferResource> gReadBuffer;
+
+ RefPtr<gfx::PipelineLayout> gPipelineLayout;
+ RefPtr<gfx::PipelineState> gPipelineState;
+ RefPtr<gfx::DescriptorSet> gDescriptorSet;
+
+ // Create a window for our application to render into.
+ //
+ WindowDesc windowDesc;
+ windowDesc.title = "Hello, World!";
+ windowDesc.width = gWindowWidth;
+ windowDesc.height = gWindowHeight;
+ gWindow = createWindow(windowDesc);
+
+ // Initialize the rendering layer.
+ //
+ // Note: for now we are hard-coding logic to use the
+ // Direct3D11 back-end for the graphics API abstraction.
+ // A future version of this example may support multiple
+ // platforms/APIs.
+ //
+ gRenderer = createD3D11Renderer();
+ Renderer::Desc rendererDesc;
+ rendererDesc.width = gWindowWidth;
+ rendererDesc.height = gWindowHeight;
+ {
+ Result res = gRenderer->initialize(rendererDesc, getPlatformWindowHandle(gWindow));
+ if(SLANG_FAILED(res)) return res;
+ }
+
+ // Create a structured buffer for passing the compute data
+ //
+ int structuredBufferSize = 4 * sizeof(float);
+
+ BufferResource::Desc structuredBufferDesc;
+ structuredBufferDesc.init(structuredBufferSize);
+ structuredBufferDesc.setDefaults(Resource::Usage::UnorderedAccess);
+ structuredBufferDesc.elementSize = 4;
+ structuredBufferDesc.cpuAccessFlags = Resource::AccessFlag::Read;
+
+ gUnorderedAccess = gRenderer->createBufferResource(
+ Resource::Usage::UnorderedAccess,
+ structuredBufferDesc,
+ initialArray);
+ if(!gUnorderedAccess) return SLANG_FAIL;
+
+ // Now we will use our `loadShaderProgram` function to load
+ // the code from `shader.slang` into the graphics API.
+ //
+ RefPtr<ShaderProgram> shaderProgram = loadShaderProgram(gRenderer);
+ if(!shaderProgram) return SLANG_FAIL;
+
+ // Our example graphics API usess a "modern" D3D12/Vulkan style
+ // of resource binding, so now we will dive into describing and
+ // allocating "descriptor sets."
+ //
+ // First, we need to construct a descriptor set *layout*.
+ DescriptorSetLayout::SlotRangeDesc slotRanges[] =
+ {
+ DescriptorSetLayout::SlotRangeDesc(DescriptorSlotType::StorageBuffer),
+ };
+ DescriptorSetLayout::Desc descriptorSetLayoutDesc;
+ descriptorSetLayoutDesc.slotRangeCount = 1;
+ descriptorSetLayoutDesc.slotRanges = &slotRanges[0];
+ auto descriptorSetLayout = gRenderer->createDescriptorSetLayout(descriptorSetLayoutDesc);
+ if(!descriptorSetLayout) return SLANG_FAIL;
+
+ // Next we will allocate a pipeline layout, which specifies
+ // that we will render with only a single descriptor set bound.
+ //
+
+ PipelineLayout::DescriptorSetDesc descriptorSets[] =
+ {
+ PipelineLayout::DescriptorSetDesc( descriptorSetLayout ),
+ };
+ PipelineLayout::Desc pipelineLayoutDesc;
+ pipelineLayoutDesc.renderTargetCount = 1;
+ pipelineLayoutDesc.descriptorSetCount = 1;
+ pipelineLayoutDesc.descriptorSets = &descriptorSets[0];
+ auto pipelineLayout = gRenderer->createPipelineLayout(pipelineLayoutDesc);
+ if(!pipelineLayout) return SLANG_FAIL;
- // the uniformState will be passed as a pointer to the CPU code
- UniformState uniformState;
+ gPipelineLayout = pipelineLayout;
- // The contents of the buffer are modified, so we'll copy it
- const float startBufferContents[] = { 2.0f, -10.0f, -3.0f, 5.0f };
- float bufferContents[SLANG_COUNT_OF(startBufferContents)];
- memcpy(bufferContents, startBufferContents, sizeof(startBufferContents));
+ // Once we have the descriptor set layout, we can allocate
+ // and fill in a descriptor set to hold our parameters.
+ //
+ auto descriptorSet = gRenderer->createDescriptorSet(descriptorSetLayout);
+ if(!descriptorSet) return SLANG_FAIL;
- // Set up the ioBuffer such that it uses bufferContents. It is important to set the .count
- // such that bounds checking can be performed in the kernel.
- uniformState.ioBuffer.data = bufferContents;
- uniformState.ioBuffer.count = SLANG_COUNT_OF(bufferContents);
+ // Once we have the bufferResource created, we can fill in
+ // a descriptor set for creating a structured buffer
+ //
+ ResourceView::Desc resourceViewDesc;
+ resourceViewDesc.type = ResourceView::Type::UnorderedAccess;
+ auto resourceView = gRenderer->createBufferView(gUnorderedAccess, resourceViewDesc);
+ descriptorSet->setResource(0, 0, resourceView);
- // In shader.slang, then entry point is attributed with `[numthreads(4, 1, 1)]` meaning each group
- // consists of 4 'thread' in x. Our input buffer is 4 wide, and we index the input array via `SV_DispatchThreadID`
- // so we only need to run a single group to execute over all of the 4 elements here.
- // The group range from { 0, 0, 0 } -> { 1, 1, 1 } means it will execute over the single group { 0, 0, 0 }.
+ gDescriptorSet = descriptorSet;
- const CPPPrelude::uint3 startGroupID = { 0, 0, 0};
- const CPPPrelude::uint3 endGroupID = { 1, 1, 1 };
+ // Following the D3D12/Vulkan style of API, we need a pipeline state object
+ // (PSO) to encapsulate the configuration of the overall graphics pipeline.
+ //
+ ComputePipelineStateDesc desc;
+ desc.pipelineLayout = gPipelineLayout;
+ desc.program = shaderProgram;
+ auto pipelineState = gRenderer->createComputePipelineState(desc);
+ if(!pipelineState) return SLANG_FAIL;
- CPPPrelude::ComputeVaryingInput varyingInput;
- varyingInput.startGroupID = startGroupID;
- varyingInput.endGroupID = endGroupID;
+ gPipelineState = pipelineState;
- // We don't have any entry point parameters so that's passed as NULL
- // We need to cast our definition of the uniform state to the undefined CPPPrelude::UniformState as
- // that type is just a name to indicate what kind of thing needs to be passed in.
- computeMain(&varyingInput, NULL, (UniformState*)&uniformState);
+ // Once we've initialized all the graphics API objects,
+ // it is time to show our application window and start rendering.
+ //
+ //showWindow(gWindow);
- // bufferContents holds the output
+ // Now we configure our graphics pipeline state by setting the
+ // PSO, binding our descriptor set (which references the
+ // constant buffer that we wrote to above), and setting
+ // some additional bits of state, before drawing our triangle.
+ //
// Print out the values before the computation
printf("Before:\n");
- for (float v : startBufferContents)
+ for (float v : initialArray)
{
printf("%f, ", v);
}
printf("\n");
- // Print out the values the the kernel produced
- printf("After: \n");
- for (float v : bufferContents)
+ gRenderer->setPipelineState(PipelineType::Compute, gPipelineState);
+ gRenderer->setDescriptorSet(PipelineType::Compute, gPipelineLayout, 0, gDescriptorSet);
+
+ gRenderer->dispatchCompute(4, 1, 1);
+
+ if(float* outputData = (float*) gRenderer->map(gUnorderedAccess, MapFlavor::HostRead))
{
- printf("%f, ", v);
+ // Print out the values the the kernel produced
+ printf("After: \n");
+ for (int i = 0; i < 4; i++)
+ {
+ printf("%f, ", outputData[i]);
+ }
+ printf("\n");
+
+ gRenderer->unmap(gUnorderedAccess);
}
- printf("\n");
return SLANG_OK;
}
-int main(int argc, char** argv)
+// This "inner" main function is used by the platform abstraction
+// layer to deal with differences in how an entry point needs
+// to be defined for different platforms.
+//
+void innerMain(ApplicationContext* context)
{
- return SLANG_SUCCEEDED(_innerMain(argc, argv)) ? 0 : -1;
+ // We construct an instance of our example application
+ // `struct` type, and then walk through the lifecyle
+ // of the application.
+
+ if (SLANG_FAILED(computeMain()))
+ {
+ return exitApplication(context, 1);
+ }
}
+
+// This macro instantiates an appropriate main function to
+// invoke the `innerMain` above.
+//
+GFX_CONSOLE_MAIN(innerMain)
diff --git a/examples/heterogeneous-hello-world/shader.slang b/examples/heterogeneous-hello-world/shader.slang
index a032f66ac..036f63c85 100644
--- a/examples/heterogeneous-hello-world/shader.slang
+++ b/examples/heterogeneous-hello-world/shader.slang
@@ -3,6 +3,8 @@
//TEST_INPUT:ubuffer(random(float, 4096, -1.0, 1.0), stride=4):name=ioBuffer
RWStructuredBuffer<float> ioBuffer;
+// There's some weird duplication going on here. It's not clear how we should introduce UniformState
+
[numthreads(4, 1, 1)]
public void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
@@ -13,7 +15,3 @@ public void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
ioBuffer[tid] = o;
}
-
-public int prepMain() {
- return 5;
-}