diff options
| author | Dietrich Geisler <dag368@cornell.edu> | 2020-07-31 17:51:52 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-07-31 14:51:52 -0700 |
| commit | 011a743668e7cd0b7cf97d27e3bed7d519794aeb (patch) | |
| tree | 49cf484df958aa705ff910631e8f732a6f0a57b9 /examples | |
| parent | 4549597709e29b85b5f95503f4f2258c16db12be (diff) | |
Binary for Heterogeneous Example (#1467)
* Binary Heterogeneous Example
This PR introduces the ability to insert the binary of a non-CPU target
by using the -heterogeneous flag. Specifically, this PR updates the
emitting logic to produce a variable of name `__[name_of_entryPoint]`
when the heterogeneous flag is present.
* Prelude path fix
Co-authored-by: Tim Foley <tfoleyNV@users.noreply.github.com>
Diffstat (limited to 'examples')
| -rw-r--r-- | examples/heterogeneous-hello-world/main.cpp | 103 | ||||
| -rw-r--r-- | examples/heterogeneous-hello-world/shader.cpp | 132 |
2 files changed, 125 insertions, 110 deletions
diff --git a/examples/heterogeneous-hello-world/main.cpp b/examples/heterogeneous-hello-world/main.cpp index a590f8c4b..47df20dc5 100644 --- a/examples/heterogeneous-hello-world/main.cpp +++ b/examples/heterogeneous-hello-world/main.cpp @@ -63,103 +63,15 @@ struct gfx_DescriptorSet_0; struct gfx_PipelineState_0; bool executeComputation_0(); +extern unsigned char __computeMain[]; +extern size_t __computeMainSize; gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer) { - // First, we need to create a "session" for interacting with the Slang - // compiler. This scopes all of our application's interactions - // with the Slang library. At the moment, creating a session causes - // Slang to load and validate its standard library, so this is a - // somewhat heavy-weight operation. When possible, an application - // should try to re-use the same session across multiple compiles. + // We extract the begin/end pointers to the output code buffers directly // - SlangSession* slangSession = spCreateSession(NULL); - - // A compile request represents a single invocation of the compiler, - // to process some inputs and produce outputs (or errors). - // - SlangCompileRequest* slangRequest = spCreateCompileRequest(slangSession); - - // We would like to request a single target (output) format: DirectX shader bytecode (DXBC) - int targetIndex = spAddCodeGenTarget(slangRequest, SLANG_DXBC); - - // We will specify the desired "profile" for this one target in terms of the - // DirectX "shader model" that should be supported. - // - spSetTargetProfile(slangRequest, targetIndex, spFindProfile(slangSession, "sm_4_0")); - - // A compile request can include one or more "translation units," which more or - // less amount to individual source files (think `.c` files, not the `.h` files they - // might include). - // - // For this example, our code will all be in the Slang language. The user may - // also specify HLSL input here, but that currently doesn't affect the compiler's - // behavior much. - // - int translationUnitIndex = spAddTranslationUnit(slangRequest, SLANG_SOURCE_LANGUAGE_SLANG, nullptr); - - // We will load source code for our translation unit from the file `shaders.slang`. - // There are also variations of this API for adding source code from application-provided buffers. - // - spAddTranslationUnitSourceFile(slangRequest, translationUnitIndex, "shader.slang"); - - // Next we will specify the entry points we'd like to compile. - // It is often convenient to put more than one entry point in the same file, - // and the Slang API makes it convenient to use a single run of the compiler - // to compile all entry points. - // - // For each entry point, we need to specify the name of a function, the - // translation unit in which that function can be found, and the stage - // that we need to compile for (e.g., vertex, fragment, geometry, ...). - // - char const* computeEntryPointName = "computeMain"; - int computeIndex = spAddEntryPoint(slangRequest, translationUnitIndex, computeEntryPointName, SLANG_STAGE_COMPUTE); - - // Once all of the input options for the compiler have been specified, - // we can invoke `spCompile` to run the compiler and see if any errors - // were detected. - // - const SlangResult compileRes = spCompile(slangRequest); - - // Even if there were no errors that forced compilation to fail, the - // compiler may have produced "diagnostic" output such as warnings. - // We will go ahead and print that output here. - // - if(auto diagnostics = spGetDiagnosticOutput(slangRequest)) - { - reportError("%s", diagnostics); - } - - // If compilation failed, there is no point in continuing any further. - if(SLANG_FAILED(compileRes)) - { - spDestroyCompileRequest(slangRequest); - spDestroySession(slangSession); - return nullptr; - } - - // If compilation was successful, then we will extract the code for - // our two entry points as "blobs". - // - // If you are using a D3D API, then your application may want to - // take advantage of the fact taht these blobs are binary compatible - // with the `ID3DBlob`, `ID3D10Blob`, etc. interfaces. - // - - ISlangBlob* computeShaderBlob = nullptr; - spGetEntryPointCodeBlob(slangRequest, computeIndex, 0, &computeShaderBlob); - - // We extract the begin/end pointers to the output code buffers - // using operations on the `ISlangBlob` interface. - // - char const* computeCode = (char const*) computeShaderBlob->getBufferPointer(); - char const* computeCodeEnd = computeCode + computeShaderBlob->getBufferSize(); - - // Once we have extracted the output blobs, it is safe to destroy - // the compile request and even the session. - // - spDestroyCompileRequest(slangRequest); - spDestroySession(slangSession); + char unsigned const* computeCode = __computeMain; + char unsigned const* computeCodeEnd = computeCode + __computeMainSize; // Now we use the operations of the example graphics API abstraction // layer to load shader code into the underlying API. @@ -179,11 +91,6 @@ gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer) gShaderProgram = renderer->createProgram(programDesc); - // Once we've used the output blobs from the Slang compiler to initialize - // the API-specific shader program, we can release their memory. - // - computeShaderBlob->release(); - return gShaderProgram; } diff --git a/examples/heterogeneous-hello-world/shader.cpp b/examples/heterogeneous-hello-world/shader.cpp index e8656bed7..d489f7136 100644 --- a/examples/heterogeneous-hello-world/shader.cpp +++ b/examples/heterogeneous-hello-world/shader.cpp @@ -1,4 +1,4 @@ -#include "../../prelude/slang-cpp-prelude.h" +#include "../../slang/prelude/slang-cpp-prelude.h" //namespace { // anonymous @@ -7,8 +7,43 @@ using namespace SLANG_PRELUDE_NAMESPACE; #endif +Vector<uint32_t, 3> operator+(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b) +{ + Vector<uint32_t, 3> r; + r.x = a.x + b.x; + r.y = a.y + b.y; + r.z = a.z + b.z; + return r; +} + +Vector<uint32_t, 3> operator*(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b) +{ + Vector<uint32_t, 3> r; + r.x = a.x * b.x; + r.y = a.y * b.y; + r.z = a.z * b.z; + return r; +} + +Vector<uint32_t, 3> make_VecU3(uint32_t a, uint32_t b, uint32_t c) +{ + return Vector<uint32_t, 3>{ a, b, c}; +} + +size_t __computeMainSize = 652; +unsigned char __computeMain[] = {68, 88, 66, 67, 85, 217, 21, 44, 5, 208, 4, 46, 7, 254, 139, 84, 132, 65, 108, 79, 1, 0, 0, 0, 140, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 248, 0, 0, 0, 8, 1, 0, 0, 24, 1, 0, 0, 16, 2, 0, 0, 82, 68, 69, 70, 188, 0, 0, 0, 1, 0, 0, 0, 72, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, 9, 16, 0, 148, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 105, 111, 66, 117, 102, 102, 101, 114, 95, 48, 0, 171, 60, 0, 0, 0, 1, 0, 0, 0, 96, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 120, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, 101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, 0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, 0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +#line 11 "shader.slang" +struct GlobalParams_0 +{ + RWStructuredBuffer<float> ioBuffer_0; +}; + +struct KernelContext_0 +{ + GlobalParams_0* globalParams_0; +}; -#line 21 "../../examples/heterogeneous-hello-world/shader.slang" struct gfx_Window_0 { }; @@ -55,6 +90,41 @@ struct gfx_PipelineState_0 }; +#line 7 +void _computeMain(void* _S1, void* entryPointParams_0, void* _S2) +{ + ComputeThreadVaryingInput* _S3 = ((ComputeThreadVaryingInput*)(_S1)); + KernelContext_0 kernelContext_0; + *(&(&kernelContext_0)->globalParams_0) = ((GlobalParams_0*)(_S2)); + +#line 9 + uint32_t tid_0 = (*(&_S3->groupID) * make_VecU3(4U, 1U, 1U) + *(&_S3->groupThreadID)).x; + + float* _S4 = &(*(&(*(&(&kernelContext_0)->globalParams_0))->ioBuffer_0))[tid_0]; + +#line 11 + float i_0 = *_S4; + bool _S5 = i_0 < 0.50000000000000000000f; + +#line 12 + float _S6 = i_0 + i_0; + +#line 12 + float _S7 = (F32_sqrt((i_0))); + +#line 12 + float o_0 = _S5 ? _S6 : _S7; + + float* _S8 = &(*(&(*(&(&kernelContext_0)->globalParams_0))->ioBuffer_0))[tid_0]; + +#line 14 + *_S8 = o_0; + +#line 7 + return; +} + + #line 34 gfx_Window_0* createWindow_0(int32_t _0, int32_t _1); @@ -110,17 +180,17 @@ bool executeComputation_0() FixedArray<float, 4> initialArray_0 = { 3.00000000000000000000f, -20.00000000000000000000f, -6.00000000000000000000f, 8.00000000000000000000f }; - gfx_Window_0* _S1 = createWindow_0(int(1024), int(768)); - gfx_Renderer_0* _S2 = createRenderer_0(int(1024), int(768), _S1); - gfx_BufferResource_0* _S3 = createStructuredBuffer_0(_S2, initialArray_0); - gfx_ShaderProgram_0* _S4 = loadShaderProgram_0(_S2); - gfx_DescriptorSetLayout_0* _S5 = buildDescriptorSetLayout_0(_S2); - gfx_PipelineLayout_0* _S6 = buildPipeline_0(_S2, _S5); - gfx_DescriptorSet_0* _S7 = buildDescriptorSet_0(_S2, _S5, _S3); - gfx_PipelineState_0* _S8 = buildPipelineState_0(_S4, _S2, _S6); + gfx_Window_0* _S9 = createWindow_0(int(1024), int(768)); + gfx_Renderer_0* _S10 = createRenderer_0(int(1024), int(768), _S9); + gfx_BufferResource_0* _S11 = createStructuredBuffer_0(_S10, initialArray_0); + gfx_ShaderProgram_0* _S12 = loadShaderProgram_0(_S10); + gfx_DescriptorSetLayout_0* _S13 = buildDescriptorSetLayout_0(_S10); + gfx_PipelineLayout_0* _S14 = buildPipeline_0(_S10, _S13); + gfx_DescriptorSet_0* _S15 = buildDescriptorSet_0(_S10, _S13, _S11); + gfx_PipelineState_0* _S16 = buildPipelineState_0(_S12, _S10, _S14); printInitialValues_0(initialArray_0, int(4)); - dispatchComputation_0(_S2, _S8, _S6, _S7); - print_output_0(_S2, _S3, int(4)); + dispatchComputation_0(_S10, _S16, _S14, _S15); + print_output_0(_S10, _S11, int(4)); return true; @@ -128,3 +198,41 @@ bool executeComputation_0() //} // anonymous +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* entryPointParams, void* globalParams) +{ + _computeMain(varyingInput, entryPointParams, globalParams); +} +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain_Group(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams) +{ + ComputeThreadVaryingInput threadInput = {}; + threadInput.groupID = varyingInput->startGroupID; + for (uint32_t x = 0; x < 4; ++x) + { + threadInput.groupThreadID.x = x; + _computeMain(&threadInput, entryPointParams, globalParams); + } +} +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams) +{ + ComputeVaryingInput vi = *varyingInput; + ComputeVaryingInput groupVaryingInput = {}; + for (uint32_t z = vi.startGroupID.z; z < vi.endGroupID.z; ++z) + { + groupVaryingInput.startGroupID.z = z; + for (uint32_t y = vi.startGroupID.y; y < vi.endGroupID.y; ++y) + { + groupVaryingInput.startGroupID.y = y; + for (uint32_t x = vi.startGroupID.x; x < vi.endGroupID.x; ++x) + { + groupVaryingInput.startGroupID.x = x; + computeMain_Group(&groupVaryingInput, entryPointParams, globalParams); + } + } + } +} |
