summaryrefslogtreecommitdiffstats
path: root/examples
diff options
context:
space:
mode:
authorDietrich Geisler <dag368@cornell.edu>2020-07-31 17:51:52 -0400
committerGitHub <noreply@github.com>2020-07-31 14:51:52 -0700
commit011a743668e7cd0b7cf97d27e3bed7d519794aeb (patch)
tree49cf484df958aa705ff910631e8f732a6f0a57b9 /examples
parent4549597709e29b85b5f95503f4f2258c16db12be (diff)
Binary for Heterogeneous Example (#1467)
* Binary Heterogeneous Example This PR introduces the ability to insert the binary of a non-CPU target by using the -heterogeneous flag. Specifically, this PR updates the emitting logic to produce a variable of name `__[name_of_entryPoint]` when the heterogeneous flag is present. * Prelude path fix Co-authored-by: Tim Foley <tfoleyNV@users.noreply.github.com>
Diffstat (limited to 'examples')
-rw-r--r--examples/heterogeneous-hello-world/main.cpp103
-rw-r--r--examples/heterogeneous-hello-world/shader.cpp132
2 files changed, 125 insertions, 110 deletions
diff --git a/examples/heterogeneous-hello-world/main.cpp b/examples/heterogeneous-hello-world/main.cpp
index a590f8c4b..47df20dc5 100644
--- a/examples/heterogeneous-hello-world/main.cpp
+++ b/examples/heterogeneous-hello-world/main.cpp
@@ -63,103 +63,15 @@ struct gfx_DescriptorSet_0;
struct gfx_PipelineState_0;
bool executeComputation_0();
+extern unsigned char __computeMain[];
+extern size_t __computeMainSize;
gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer)
{
- // First, we need to create a "session" for interacting with the Slang
- // compiler. This scopes all of our application's interactions
- // with the Slang library. At the moment, creating a session causes
- // Slang to load and validate its standard library, so this is a
- // somewhat heavy-weight operation. When possible, an application
- // should try to re-use the same session across multiple compiles.
+ // We extract the begin/end pointers to the output code buffers directly
//
- SlangSession* slangSession = spCreateSession(NULL);
-
- // A compile request represents a single invocation of the compiler,
- // to process some inputs and produce outputs (or errors).
- //
- SlangCompileRequest* slangRequest = spCreateCompileRequest(slangSession);
-
- // We would like to request a single target (output) format: DirectX shader bytecode (DXBC)
- int targetIndex = spAddCodeGenTarget(slangRequest, SLANG_DXBC);
-
- // We will specify the desired "profile" for this one target in terms of the
- // DirectX "shader model" that should be supported.
- //
- spSetTargetProfile(slangRequest, targetIndex, spFindProfile(slangSession, "sm_4_0"));
-
- // A compile request can include one or more "translation units," which more or
- // less amount to individual source files (think `.c` files, not the `.h` files they
- // might include).
- //
- // For this example, our code will all be in the Slang language. The user may
- // also specify HLSL input here, but that currently doesn't affect the compiler's
- // behavior much.
- //
- int translationUnitIndex = spAddTranslationUnit(slangRequest, SLANG_SOURCE_LANGUAGE_SLANG, nullptr);
-
- // We will load source code for our translation unit from the file `shaders.slang`.
- // There are also variations of this API for adding source code from application-provided buffers.
- //
- spAddTranslationUnitSourceFile(slangRequest, translationUnitIndex, "shader.slang");
-
- // Next we will specify the entry points we'd like to compile.
- // It is often convenient to put more than one entry point in the same file,
- // and the Slang API makes it convenient to use a single run of the compiler
- // to compile all entry points.
- //
- // For each entry point, we need to specify the name of a function, the
- // translation unit in which that function can be found, and the stage
- // that we need to compile for (e.g., vertex, fragment, geometry, ...).
- //
- char const* computeEntryPointName = "computeMain";
- int computeIndex = spAddEntryPoint(slangRequest, translationUnitIndex, computeEntryPointName, SLANG_STAGE_COMPUTE);
-
- // Once all of the input options for the compiler have been specified,
- // we can invoke `spCompile` to run the compiler and see if any errors
- // were detected.
- //
- const SlangResult compileRes = spCompile(slangRequest);
-
- // Even if there were no errors that forced compilation to fail, the
- // compiler may have produced "diagnostic" output such as warnings.
- // We will go ahead and print that output here.
- //
- if(auto diagnostics = spGetDiagnosticOutput(slangRequest))
- {
- reportError("%s", diagnostics);
- }
-
- // If compilation failed, there is no point in continuing any further.
- if(SLANG_FAILED(compileRes))
- {
- spDestroyCompileRequest(slangRequest);
- spDestroySession(slangSession);
- return nullptr;
- }
-
- // If compilation was successful, then we will extract the code for
- // our two entry points as "blobs".
- //
- // If you are using a D3D API, then your application may want to
- // take advantage of the fact taht these blobs are binary compatible
- // with the `ID3DBlob`, `ID3D10Blob`, etc. interfaces.
- //
-
- ISlangBlob* computeShaderBlob = nullptr;
- spGetEntryPointCodeBlob(slangRequest, computeIndex, 0, &computeShaderBlob);
-
- // We extract the begin/end pointers to the output code buffers
- // using operations on the `ISlangBlob` interface.
- //
- char const* computeCode = (char const*) computeShaderBlob->getBufferPointer();
- char const* computeCodeEnd = computeCode + computeShaderBlob->getBufferSize();
-
- // Once we have extracted the output blobs, it is safe to destroy
- // the compile request and even the session.
- //
- spDestroyCompileRequest(slangRequest);
- spDestroySession(slangSession);
+ char unsigned const* computeCode = __computeMain;
+ char unsigned const* computeCodeEnd = computeCode + __computeMainSize;
// Now we use the operations of the example graphics API abstraction
// layer to load shader code into the underlying API.
@@ -179,11 +91,6 @@ gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer)
gShaderProgram = renderer->createProgram(programDesc);
- // Once we've used the output blobs from the Slang compiler to initialize
- // the API-specific shader program, we can release their memory.
- //
- computeShaderBlob->release();
-
return gShaderProgram;
}
diff --git a/examples/heterogeneous-hello-world/shader.cpp b/examples/heterogeneous-hello-world/shader.cpp
index e8656bed7..d489f7136 100644
--- a/examples/heterogeneous-hello-world/shader.cpp
+++ b/examples/heterogeneous-hello-world/shader.cpp
@@ -1,4 +1,4 @@
-#include "../../prelude/slang-cpp-prelude.h"
+#include "../../slang/prelude/slang-cpp-prelude.h"
//namespace { // anonymous
@@ -7,8 +7,43 @@
using namespace SLANG_PRELUDE_NAMESPACE;
#endif
+Vector<uint32_t, 3> operator+(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b)
+{
+ Vector<uint32_t, 3> r;
+ r.x = a.x + b.x;
+ r.y = a.y + b.y;
+ r.z = a.z + b.z;
+ return r;
+}
+
+Vector<uint32_t, 3> operator*(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b)
+{
+ Vector<uint32_t, 3> r;
+ r.x = a.x * b.x;
+ r.y = a.y * b.y;
+ r.z = a.z * b.z;
+ return r;
+}
+
+Vector<uint32_t, 3> make_VecU3(uint32_t a, uint32_t b, uint32_t c)
+{
+ return Vector<uint32_t, 3>{ a, b, c};
+}
+
+size_t __computeMainSize = 652;
+unsigned char __computeMain[] = {68, 88, 66, 67, 85, 217, 21, 44, 5, 208, 4, 46, 7, 254, 139, 84, 132, 65, 108, 79, 1, 0, 0, 0, 140, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 248, 0, 0, 0, 8, 1, 0, 0, 24, 1, 0, 0, 16, 2, 0, 0, 82, 68, 69, 70, 188, 0, 0, 0, 1, 0, 0, 0, 72, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, 9, 16, 0, 148, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 105, 111, 66, 117, 102, 102, 101, 114, 95, 48, 0, 171, 60, 0, 0, 0, 1, 0, 0, 0, 96, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 120, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, 101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, 0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, 0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+#line 11 "shader.slang"
+struct GlobalParams_0
+{
+ RWStructuredBuffer<float> ioBuffer_0;
+};
+
+struct KernelContext_0
+{
+ GlobalParams_0* globalParams_0;
+};
-#line 21 "../../examples/heterogeneous-hello-world/shader.slang"
struct gfx_Window_0
{
};
@@ -55,6 +90,41 @@ struct gfx_PipelineState_0
};
+#line 7
+void _computeMain(void* _S1, void* entryPointParams_0, void* _S2)
+{
+ ComputeThreadVaryingInput* _S3 = ((ComputeThreadVaryingInput*)(_S1));
+ KernelContext_0 kernelContext_0;
+ *(&(&kernelContext_0)->globalParams_0) = ((GlobalParams_0*)(_S2));
+
+#line 9
+ uint32_t tid_0 = (*(&_S3->groupID) * make_VecU3(4U, 1U, 1U) + *(&_S3->groupThreadID)).x;
+
+ float* _S4 = &(*(&(*(&(&kernelContext_0)->globalParams_0))->ioBuffer_0))[tid_0];
+
+#line 11
+ float i_0 = *_S4;
+ bool _S5 = i_0 < 0.50000000000000000000f;
+
+#line 12
+ float _S6 = i_0 + i_0;
+
+#line 12
+ float _S7 = (F32_sqrt((i_0)));
+
+#line 12
+ float o_0 = _S5 ? _S6 : _S7;
+
+ float* _S8 = &(*(&(*(&(&kernelContext_0)->globalParams_0))->ioBuffer_0))[tid_0];
+
+#line 14
+ *_S8 = o_0;
+
+#line 7
+ return;
+}
+
+
#line 34
gfx_Window_0* createWindow_0(int32_t _0, int32_t _1);
@@ -110,17 +180,17 @@ bool executeComputation_0()
FixedArray<float, 4> initialArray_0 = { 3.00000000000000000000f, -20.00000000000000000000f, -6.00000000000000000000f, 8.00000000000000000000f };
- gfx_Window_0* _S1 = createWindow_0(int(1024), int(768));
- gfx_Renderer_0* _S2 = createRenderer_0(int(1024), int(768), _S1);
- gfx_BufferResource_0* _S3 = createStructuredBuffer_0(_S2, initialArray_0);
- gfx_ShaderProgram_0* _S4 = loadShaderProgram_0(_S2);
- gfx_DescriptorSetLayout_0* _S5 = buildDescriptorSetLayout_0(_S2);
- gfx_PipelineLayout_0* _S6 = buildPipeline_0(_S2, _S5);
- gfx_DescriptorSet_0* _S7 = buildDescriptorSet_0(_S2, _S5, _S3);
- gfx_PipelineState_0* _S8 = buildPipelineState_0(_S4, _S2, _S6);
+ gfx_Window_0* _S9 = createWindow_0(int(1024), int(768));
+ gfx_Renderer_0* _S10 = createRenderer_0(int(1024), int(768), _S9);
+ gfx_BufferResource_0* _S11 = createStructuredBuffer_0(_S10, initialArray_0);
+ gfx_ShaderProgram_0* _S12 = loadShaderProgram_0(_S10);
+ gfx_DescriptorSetLayout_0* _S13 = buildDescriptorSetLayout_0(_S10);
+ gfx_PipelineLayout_0* _S14 = buildPipeline_0(_S10, _S13);
+ gfx_DescriptorSet_0* _S15 = buildDescriptorSet_0(_S10, _S13, _S11);
+ gfx_PipelineState_0* _S16 = buildPipelineState_0(_S12, _S10, _S14);
printInitialValues_0(initialArray_0, int(4));
- dispatchComputation_0(_S2, _S8, _S6, _S7);
- print_output_0(_S2, _S3, int(4));
+ dispatchComputation_0(_S10, _S16, _S14, _S15);
+ print_output_0(_S10, _S11, int(4));
return true;
@@ -128,3 +198,41 @@ bool executeComputation_0()
//} // anonymous
+// [numthreads(4, 1, 1)]
+SLANG_PRELUDE_EXPORT
+void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* entryPointParams, void* globalParams)
+{
+ _computeMain(varyingInput, entryPointParams, globalParams);
+}
+// [numthreads(4, 1, 1)]
+SLANG_PRELUDE_EXPORT
+void computeMain_Group(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams)
+{
+ ComputeThreadVaryingInput threadInput = {};
+ threadInput.groupID = varyingInput->startGroupID;
+ for (uint32_t x = 0; x < 4; ++x)
+ {
+ threadInput.groupThreadID.x = x;
+ _computeMain(&threadInput, entryPointParams, globalParams);
+ }
+}
+// [numthreads(4, 1, 1)]
+SLANG_PRELUDE_EXPORT
+void computeMain(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams)
+{
+ ComputeVaryingInput vi = *varyingInput;
+ ComputeVaryingInput groupVaryingInput = {};
+ for (uint32_t z = vi.startGroupID.z; z < vi.endGroupID.z; ++z)
+ {
+ groupVaryingInput.startGroupID.z = z;
+ for (uint32_t y = vi.startGroupID.y; y < vi.endGroupID.y; ++y)
+ {
+ groupVaryingInput.startGroupID.y = y;
+ for (uint32_t x = vi.startGroupID.x; x < vi.endGroupID.x; ++x)
+ {
+ groupVaryingInput.startGroupID.x = x;
+ computeMain_Group(&groupVaryingInput, entryPointParams, globalParams);
+ }
+ }
+ }
+}