diff options
| author | Dietrich Geisler <dag368@cornell.edu> | 2020-08-17 12:50:44 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-08-17 09:50:44 -0700 |
| commit | ff2d490dc120708a2fcb6eea5880a6b7c6586a4b (patch) | |
| tree | 4bab7a7353ed5e9149510a48227da029d40d9d97 | |
| parent | 0640a10ab85f8be3c3c925cb70711560265e6548 (diff) | |
GPU Foreach Loop (#1498)
* GPU Foreach Loop
This PR introduces the completed GPU foreach loop and updates the
heterogeneous-hello-world example to use it. This PR builds on the
previous introduction of the GPU Foreach loop parsing and semantic
checking PR (#1482) by introducing IR lowering and emmitting. THe new
feature can be used by having a GPU_Foreach loop interacting with a
named non-CPP entry point, and using the -heterogeneous flag.
* Fix to path
Co-authored-by: Tim Foley <tfoleyNV@users.noreply.github.com>
| -rw-r--r-- | examples/heterogeneous-hello-world/main.cpp | 35 | ||||
| -rw-r--r-- | examples/heterogeneous-hello-world/shader.cpp | 116 | ||||
| -rw-r--r-- | examples/heterogeneous-hello-world/shader.slang | 21 | ||||
| -rw-r--r-- | prelude/slang-cpp-prelude.h | 16 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 25 | ||||
| -rw-r--r-- | source/slang/slang-emit-cpp.cpp | 36 | ||||
| -rw-r--r-- | source/slang/slang-ir-inst-defs.h | 3 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts.h | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir-link.cpp | 22 | ||||
| -rw-r--r-- | source/slang/slang-ir.cpp | 11 | ||||
| -rw-r--r-- | source/slang/slang-lower-to-ir.cpp | 32 |
11 files changed, 190 insertions, 129 deletions
diff --git a/examples/heterogeneous-hello-world/main.cpp b/examples/heterogeneous-hello-world/main.cpp index 6bb1bc071..6159f8d44 100644 --- a/examples/heterogeneous-hello-world/main.cpp +++ b/examples/heterogeneous-hello-world/main.cpp @@ -66,12 +66,11 @@ bool executeComputation_0(); extern unsigned char __computeMain[]; extern size_t __computeMainSize; -gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer) +gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer, unsigned char computeCode[], size_t computeCodeSize) { // We extract the begin/end pointers to the output code buffers directly // - char unsigned const* computeCode = __computeMain; - char unsigned const* computeCodeEnd = computeCode + __computeMainSize; + char unsigned const* computeCodeEnd = computeCode + computeCodeSize; // Now we use the operations of the example graphics API abstraction // layer to load shader code into the underlying API. @@ -87,7 +86,7 @@ gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer) gfx::ShaderProgram::Desc programDesc; programDesc.pipelineType = gfx::PipelineType::Compute; programDesc.kernels = &kernelDescs[0]; - programDesc.kernelCount = 2; + programDesc.kernelCount = 1; gShaderProgram = renderer->createProgram(programDesc); @@ -242,13 +241,16 @@ void dispatchComputation( gfx::Renderer* gRenderer, gfx::PipelineState* gPipelineState, gfx::PipelineLayout* gPipelineLayout, - gfx::DescriptorSet* gDescriptorSet) + gfx::DescriptorSet* gDescriptorSet, + unsigned int gridDimsX, + unsigned int gridDimsY, + unsigned int gridDimsZ) { gRenderer->setPipelineState(PipelineType::Compute, gPipelineState); gRenderer->setDescriptorSet(PipelineType::Compute, gPipelineLayout, 0, gDescriptorSet); - gRenderer->dispatchCompute(4, 1, 1); + gRenderer->dispatchCompute(gridDimsX, gridDimsY, gridDimsZ); } void print_output( @@ -286,9 +288,9 @@ gfx_BufferResource_0* createStructuredBuffer_0(gfx_Renderer_0* _0, FixedArray<fl return (gfx_BufferResource_0*)createStructuredBuffer((gfx::Renderer*)_0, (float*)&_1); } -gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Renderer_0* _0) +gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Renderer_0* _0, unsigned char _1[], size_t _2) { - return (gfx_ShaderProgram_0*)loadShaderProgram((gfx::Renderer*)_0); + return (gfx_ShaderProgram_0*)loadShaderProgram((gfx::Renderer*)_0, _1, _2); } gfx_DescriptorSetLayout_0* buildDescriptorSetLayout_0(gfx_Renderer_0* _0) @@ -322,13 +324,26 @@ void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1) printInitialValues((float*)&_0, _1); } -void dispatchComputation_0(gfx_Renderer_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3) +void dispatchComputation_0(gfx_Renderer_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3, unsigned int gridDimsX, unsigned int gridDimsY, unsigned int gridDimsZ) { dispatchComputation( (gfx::Renderer*)_0, (gfx::PipelineState*)_1, (gfx::PipelineLayout*)_2, - (gfx::DescriptorSet*)_3); + (gfx::DescriptorSet*)_3, + gridDimsX, + gridDimsY, + gridDimsZ); +} + +RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0) { + RWStructuredBuffer<float> result; + result.data = (float*)_0; + return result; +} + +gfx_BufferResource_0* unconvertBuffer_0(RWStructuredBuffer<float> _0) { + return (gfx_BufferResource_0*)(_0.data); } void print_output_0(gfx_Renderer_0* _0, gfx_BufferResource_0* _1, int32_t _2) diff --git a/examples/heterogeneous-hello-world/shader.cpp b/examples/heterogeneous-hello-world/shader.cpp index d489f7136..640e8aa3c 100644 --- a/examples/heterogeneous-hello-world/shader.cpp +++ b/examples/heterogeneous-hello-world/shader.cpp @@ -1,27 +1,25 @@ -#include "../../slang/prelude/slang-cpp-prelude.h" +#include "../../prelude/slang-cpp-prelude.h" -//namespace { // anonymous - #ifdef SLANG_PRELUDE_NAMESPACE using namespace SLANG_PRELUDE_NAMESPACE; #endif -Vector<uint32_t, 3> operator+(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b) +Vector<uint32_t, 3> operator*(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b) { Vector<uint32_t, 3> r; - r.x = a.x + b.x; - r.y = a.y + b.y; - r.z = a.z + b.z; + r.x = a.x * b.x; + r.y = a.y * b.y; + r.z = a.z * b.z; return r; } -Vector<uint32_t, 3> operator*(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b) +Vector<uint32_t, 3> operator+(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b) { Vector<uint32_t, 3> r; - r.x = a.x * b.x; - r.y = a.y * b.y; - r.z = a.z * b.z; + r.x = a.x + b.x; + r.y = a.y + b.y; + r.z = a.z + b.z; return r; } @@ -30,20 +28,31 @@ Vector<uint32_t, 3> make_VecU3(uint32_t a, uint32_t b, uint32_t c) return Vector<uint32_t, 3>{ a, b, c}; } -size_t __computeMainSize = 652; -unsigned char __computeMain[] = {68, 88, 66, 67, 85, 217, 21, 44, 5, 208, 4, 46, 7, 254, 139, 84, 132, 65, 108, 79, 1, 0, 0, 0, 140, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 248, 0, 0, 0, 8, 1, 0, 0, 24, 1, 0, 0, 16, 2, 0, 0, 82, 68, 69, 70, 188, 0, 0, 0, 1, 0, 0, 0, 72, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, 9, 16, 0, 148, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 105, 111, 66, 117, 102, 102, 101, 114, 95, 48, 0, 171, 60, 0, 0, 0, 1, 0, 0, 0, 96, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 120, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, 101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, 0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, 0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +size_t __computeMainSize = 668; +unsigned char __computeMain[] = {68, 88, 66, 67, 87, 111, 81, 164, 2, 29, 72, 42, 151, 28, 13, 217, 55, 37, 7, 95, 1, 0, 0, 0, 156, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 8, 1, 0, 0, 24, 1, 0, 0, 40, 1, 0, 0, 32, 2, 0, 0, 82, 68, 69, 70, 204, 0, 0, 0, 1, 0, 0, 0, 88, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, 9, 16, 0, 164, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 101, 110, 116, 114, 121, 80, 111, 105, 110, 116, 80, 97, 114, 97, 109, 115, 95, 105, 111, 66, 117, 102, 102, 101, 114, 95, 48, 0, 60, 0, 0, 0, 1, 0, 0, 0, 112, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 148, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, 101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, 0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, 0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +void computeMain_wrapper(gfx_Renderer_0* renderer, Vector<uint32_t, 3> gridDims, + RWStructuredBuffer<float> buffer) +{ + gfx_ShaderProgram_0* shaderProgram = loadShaderProgram_0(renderer, __computeMain, __computeMainSize); + gfx_DescriptorSetLayout_0* setLayout = buildDescriptorSetLayout_0(renderer); + gfx_PipelineLayout_0* pipelineLayout = buildPipeline_0(renderer, setLayout); + gfx_DescriptorSet_0* descriptorSet = buildDescriptorSet_0(renderer, setLayout, unconvertBuffer_0(buffer)); + gfx_PipelineState_0* pipelineState = buildPipelineState_0(shaderProgram, renderer, pipelineLayout); + dispatchComputation_0(renderer, pipelineState, pipelineLayout, descriptorSet, gridDims.x, gridDims.y, gridDims.z); +} -#line 11 "shader.slang" -struct GlobalParams_0 +#line 7 "../../examples/heterogeneous-hello-world/shader.slang" +struct EntryPointParams_0 { RWStructuredBuffer<float> ioBuffer_0; }; struct KernelContext_0 { - GlobalParams_0* globalParams_0; }; + +#line 21 struct gfx_Window_0 { }; @@ -61,46 +70,16 @@ struct gfx_BufferResource_0 }; -struct gfx_ShaderProgram_0 -{ -}; - - -#line 26 -struct gfx_DescriptorSetLayout_0 -{ -}; - - -#line 24 -struct gfx_PipelineLayout_0 -{ -}; - - -#line 27 -struct gfx_DescriptorSet_0 -{ -}; - - -#line 25 -struct gfx_PipelineState_0 -{ -}; - - #line 7 void _computeMain(void* _S1, void* entryPointParams_0, void* _S2) { ComputeThreadVaryingInput* _S3 = ((ComputeThreadVaryingInput*)(_S1)); KernelContext_0 kernelContext_0; - *(&(&kernelContext_0)->globalParams_0) = ((GlobalParams_0*)(_S2)); #line 9 uint32_t tid_0 = (*(&_S3->groupID) * make_VecU3(4U, 1U, 1U) + *(&_S3->groupThreadID)).x; - float* _S4 = &(*(&(*(&(&kernelContext_0)->globalParams_0))->ioBuffer_0))[tid_0]; + float* _S4 = &(*(&((EntryPointParams_0*)(entryPointParams_0))->ioBuffer_0))[tid_0]; #line 11 float i_0 = *_S4; @@ -115,7 +94,7 @@ void _computeMain(void* _S1, void* entryPointParams_0, void* _S2) #line 12 float o_0 = _S5 ? _S6 : _S7; - float* _S8 = &(*(&(*(&(&kernelContext_0)->globalParams_0))->ioBuffer_0))[tid_0]; + float* _S8 = &(*(&((EntryPointParams_0*)(entryPointParams_0))->ioBuffer_0))[tid_0]; #line 14 *_S8 = o_0; @@ -137,36 +116,15 @@ gfx_Renderer_0* createRenderer_0(int32_t _0, int32_t _1, gfx_Window_0* _2); gfx_BufferResource_0* createStructuredBuffer_0(gfx_Renderer_0* _0, FixedArray<float, 4> _1); -#line 33 -gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Renderer_0* _0); +#line 4 +RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0); #line 40 -gfx_DescriptorSetLayout_0* buildDescriptorSetLayout_0(gfx_Renderer_0* _0); - - -#line 41 -gfx_PipelineLayout_0* buildPipeline_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1); - - -#line 42 -gfx_DescriptorSet_0* buildDescriptorSet_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1, gfx_BufferResource_0* _2); - - - -gfx_PipelineState_0* buildPipelineState_0(gfx_ShaderProgram_0* _0, gfx_Renderer_0* _1, gfx_PipelineLayout_0* _2); - - - void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1); -#line 51 -void dispatchComputation_0(gfx_Renderer_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3); - - - - +#line 41 void print_output_0(gfx_Renderer_0* _0, gfx_BufferResource_0* _1, int32_t _2); @@ -183,21 +141,19 @@ bool executeComputation_0() gfx_Window_0* _S9 = createWindow_0(int(1024), int(768)); gfx_Renderer_0* _S10 = createRenderer_0(int(1024), int(768), _S9); gfx_BufferResource_0* _S11 = createStructuredBuffer_0(_S10, initialArray_0); - gfx_ShaderProgram_0* _S12 = loadShaderProgram_0(_S10); - gfx_DescriptorSetLayout_0* _S13 = buildDescriptorSetLayout_0(_S10); - gfx_PipelineLayout_0* _S14 = buildPipeline_0(_S10, _S13); - gfx_DescriptorSet_0* _S15 = buildDescriptorSet_0(_S10, _S13, _S11); - gfx_PipelineState_0* _S16 = buildPipelineState_0(_S12, _S10, _S14); + Vector<uint32_t, 3> _S12 = make_VecU3(uint32_t(int(4)), uint32_t(int(1)), uint32_t(int(1))); + RWStructuredBuffer<float> _S13 = convertBuffer_0(_S11); + +#line 57 + computeMain_wrapper(_S10, _S12, _S13); + printInitialValues_0(initialArray_0, int(4)); - dispatchComputation_0(_S10, _S16, _S14, _S15); print_output_0(_S10, _S11, int(4)); return true; } -//} // anonymous - // [numthreads(4, 1, 1)] SLANG_PRELUDE_EXPORT void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* entryPointParams, void* globalParams) diff --git a/examples/heterogeneous-hello-world/shader.slang b/examples/heterogeneous-hello-world/shader.slang index 6b56c8700..ec3831788 100644 --- a/examples/heterogeneous-hello-world/shader.slang +++ b/examples/heterogeneous-hello-world/shader.slang @@ -37,22 +37,7 @@ Ptr<gfx::Renderer> createRenderer( int gWindowHeight, Ptr<gfx::Window> gWindow); Ptr<gfx::BufferResource> createStructuredBuffer(Ptr<gfx::Renderer> gRenderer, float[4] initialArray); -Ptr<gfx::DescriptorSetLayout> buildDescriptorSetLayout(Ptr<gfx::Renderer> gRenderer); -Ptr<gfx::PipelineLayout> buildPipeline(Ptr<gfx::Renderer> gRenderer, Ptr<gfx::DescriptorSetLayout> descriptorSetLayout); -Ptr<gfx::DescriptorSet> buildDescriptorSet( - Ptr<gfx::Renderer> gRenderer, - Ptr<gfx::DescriptorSetLayout> descriptorSetLayout, - Ptr<gfx::BufferResource> gStructuredBuffer); -Ptr<gfx::PipelineState> buildPipelineState( - Ptr<gfx::ShaderProgram> shaderProgram, - Ptr<gfx::Renderer> gRenderer, - Ptr<gfx::PipelineLayout> gPipelineLayout); void printInitialValues(float[4] initialArray, int length); -void dispatchComputation( - Ptr<gfx::Renderer> gRenderer, - Ptr<gfx::PipelineState> gPipelineState, - Ptr<gfx::PipelineLayout> gPipelineLayout, - Ptr<gfx::DescriptorSet> gDescriptorSet); void print_output( Ptr<gfx::Renderer> gRenderer, Ptr<gfx::BufferResource> gStructuredBuffer, @@ -71,13 +56,7 @@ public bool executeComputation() { let structuredBuffer = createStructuredBuffer(renderer, initialArray); __GPU_FOREACH(renderer, uint3(4, 1, 1), LAMBDA(uint3 dispatchThreadID) { computeMain(convertBuffer(structuredBuffer), dispatchThreadID) ; }); - let shaderProgram = loadShaderProgram(renderer); - let descriptorSetLayout = buildDescriptorSetLayout(renderer); - let pipelineLayout = buildPipeline(renderer, descriptorSetLayout); - let descriptorSet = buildDescriptorSet(renderer, descriptorSetLayout, structuredBuffer); - let pipelineState = buildPipelineState(shaderProgram, renderer, pipelineLayout); printInitialValues(initialArray, 4); - dispatchComputation(renderer, pipelineState, pipelineLayout, descriptorSet); print_output(renderer, structuredBuffer, 4); diff --git a/prelude/slang-cpp-prelude.h b/prelude/slang-cpp-prelude.h index 4fcddda7c..b00f34d8f 100644 --- a/prelude/slang-cpp-prelude.h +++ b/prelude/slang-cpp-prelude.h @@ -50,4 +50,20 @@ # define SLANG_UNROLL #endif +struct gfx_Renderer_0; +struct gfx_BufferResource_0; +struct gfx_ShaderProgram_0; +struct gfx_DescriptorSetLayout_0; +struct gfx_PipelineLayout_0; +struct gfx_DescriptorSet_0; +struct gfx_BufferResource_0; +struct gfx_PipelineState_0; +gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Renderer_0* _0, unsigned char _1[], size_t _2); +gfx_DescriptorSetLayout_0* buildDescriptorSetLayout_0(gfx_Renderer_0* _0); +gfx_PipelineLayout_0* buildPipeline_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1); +gfx_DescriptorSet_0* buildDescriptorSet_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1, gfx_BufferResource_0* _2); +gfx_PipelineState_0* buildPipelineState_0(gfx_ShaderProgram_0* _0, gfx_Renderer_0* _1, gfx_PipelineLayout_0* _2); +void dispatchComputation_0(gfx_Renderer_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3, uint32_t _4, uint32_t _5, uint32_t _6); +gfx_BufferResource_0* unconvertBuffer_0(RWStructuredBuffer<float> _0); + #endif
\ No newline at end of file diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 0708bc9a8..9e9117dcd 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -2425,6 +2425,31 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO m_writer->emit(")"); break; } + case kIROp_GpuForeach: + { + auto operand = inst->getOperand(2); + if (as<IRFunc>(operand)) + { + //emitOperand(operand->findDecoration<IREntryPointDecoration>(), getInfo(EmitOp::General)); + emitOperand(operand, getInfo(EmitOp::General)); + } + else + { + SLANG_UNEXPECTED("Expected 3rd operand to be a function"); + } + m_writer->emit("_wrapper("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + m_writer->emit(", "); + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + UInt argCount = inst->getOperandCount(); + for (UInt aa = 3; aa < argCount; ++aa) + { + m_writer->emit(", "); + emitOperand(inst->getOperand(aa), getInfo(EmitOp::General)); + } + m_writer->emit(")"); + break; + } default: diagnoseUnhandledInst(inst); break; diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index ebb6bda2d..0c2b4cd93 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -2277,8 +2277,8 @@ void CPPSourceEmitter::emitPreprocessorDirectivesImpl() { // Put all into an anonymous namespace // This includes any generated types, and generated intrinsics - - m_writer->emit("namespace { // anonymous \n\n"); + if (!m_compileRequest->getLinkage()->m_heterogeneous) + m_writer->emit("namespace { // anonymous \n\n"); m_writer->emit("#ifdef SLANG_PRELUDE_NAMESPACE\n"); m_writer->emit("using namespace SLANG_PRELUDE_NAMESPACE;\n"); m_writer->emit("#endif\n\n"); @@ -2556,11 +2556,11 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module) continue; if (auto entryPointDecoration = func->findDecoration<IREntryPointDecoration>()) { - String someName = entryPointDecoration->getName()->getStringSlice(); + String entryPointName = entryPointDecoration->getName()->getStringSlice(); for (int index = 0; index < program->getEntryPointCount(); index++) { auto entryPoint = program->getEntryPoint(index); - if (someName == entryPoint->getName()->text) + if (entryPointName == entryPoint->getName()->text) { for (auto targetRequest : linkage->targets) { @@ -2586,13 +2586,13 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module) auto ptr = (const unsigned char*)blob->getBufferPointer(); m_writer->emit("size_t __"); - m_writer->emit(someName); + m_writer->emit(entryPointName ); m_writer->emit("Size = "); m_writer->emitInt64(blob->getBufferSize()); m_writer->emit(";\n"); m_writer->emit("unsigned char __"); - m_writer->emit(someName); + m_writer->emit(entryPointName ); m_writer->emit("[] = {"); for (unsigned int i = 0; i < blob->getBufferSize() - 1; i++) { m_writer->emitUInt64(ptr[i]); @@ -2602,6 +2602,26 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module) m_writer->emit("};\n"); } } + // Emit a wrapper function for calling the shader blob + m_writer->emit("void "); + m_writer->emit(entryPointName); + m_writer->emit("_wrapper(gfx_Renderer_0* renderer, Vector<uint32_t, 3> gridDims, \n"); + m_writer->emit("\tRWStructuredBuffer<float> buffer)\n{"); + m_writer->emit("\n\tgfx_ShaderProgram_0* shaderProgram = loadShaderProgram_0(renderer, __"); + m_writer->emit(entryPointName); + m_writer->emit(", __"); + m_writer->emit(entryPointName); + m_writer->emit("Size);"); + m_writer->emit("\n\tgfx_DescriptorSetLayout_0* setLayout = buildDescriptorSetLayout_0(renderer);"); + m_writer->emit("\n\tgfx_PipelineLayout_0* pipelineLayout = buildPipeline_0(renderer, setLayout);"); + m_writer->emit("\n\tgfx_DescriptorSet_0* descriptorSet = "); + m_writer->emit("buildDescriptorSet_0(renderer, setLayout, unconvertBuffer_0(buffer));"); + m_writer->emit("\n\tgfx_PipelineState_0* pipelineState = "); + m_writer->emit("buildPipelineState_0(shaderProgram, renderer, pipelineLayout);"); + + m_writer->emit("\n\tdispatchComputation_0(renderer, pipelineState, pipelineLayout, "); + m_writer->emit("descriptorSet, gridDims.x, gridDims.y, gridDims.z);"); + m_writer->emit("\n}\n"); } } } @@ -2645,8 +2665,8 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module) if (m_target == CodeGenTarget::CPPSource) { // Need to close the anonymous namespace when outputting for C++ - - m_writer->emit("} // anonymous\n\n"); + if (!linkage->m_heterogeneous) + m_writer->emit("} // anonymous\n\n"); } // Finally we need to output dll entry points diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index 3761828b9..74bd15531 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -467,6 +467,9 @@ INST(SampleGrad, sampleGrad, 4, 0) INST(GroupMemoryBarrierWithGroupSync, GroupMemoryBarrierWithGroupSync, 0, 0) +// GPU_FOREACH loop of the form +INST(GpuForeach, gpuForeach, 3, 0) + /* Decoration */ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0) diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 55af7db78..a66ca8f7a 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -1946,6 +1946,8 @@ struct IRBuilder return emitWrapExistential(type, value, slotArgCount, slotArgVals.getBuffer()); } + IRInst* emitGpuForeach(List<IRInst*> args); + IRUndefined* emitUndefined(IRType* type); IRInst* findOrAddInst( diff --git a/source/slang/slang-ir-link.cpp b/source/slang/slang-ir-link.cpp index 9de2b5d1c..274fbf6d9 100644 --- a/source/slang/slang-ir-link.cpp +++ b/source/slang/slang-ir-link.cpp @@ -1491,19 +1491,21 @@ LinkedIR linkIR( cloneValue(context, bindInst); } } - - for (IRModule* irModule : irModules) + if (target == CodeGenTarget::CPPSource) { - for (auto inst : irModule->getGlobalInsts()) + for (IRModule* irModule : irModules) { - auto hasPublic = inst->findDecoration<IRPublicDecoration>(); - if (!hasPublic) - continue; - - auto cloned = cloneValue(context, inst); - if (!cloned->findDecorationImpl(kIROp_KeepAliveDecoration)) + for (auto inst : irModule->getGlobalInsts()) { - context->builder->addKeepAliveDecoration(cloned); + auto hasPublic = inst->findDecoration<IRPublicDecoration>(); + if (!hasPublic) + continue; + + auto cloned = cloneValue(context, inst); + if (!cloned->findDecorationImpl(kIROp_KeepAliveDecoration)) + { + context->builder->addKeepAliveDecoration(cloned); + } } } } diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index 13840a84a..2a1db6310 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -3752,6 +3752,17 @@ namespace Slang return inst; } + IRInst* IRBuilder::emitGpuForeach(List<IRInst*> args) + { + auto inst = createInst<IRInst>( + this, + kIROp_GpuForeach, + getVoidType(), + args.getCount(), + args.getBuffer()); + addInst(inst); + return inst; + } // // Decorations diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 9c4808f31..864491f7e 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -4116,7 +4116,39 @@ struct StmtLoweringVisitor : StmtVisitor<StmtLoweringVisitor> void visitGpuForeachStmt(GpuForeachStmt* stmt) { + auto builder = getBuilder(); startBlockIfNeeded(stmt); + + auto renderer = getSimpleVal(context, lowerRValueExpr(context, stmt->renderer)); + auto gridDims = getSimpleVal(context, lowerRValueExpr(context, stmt->gridDims)); + + List<IRInst*> irArgs; + if (auto callExpr = as<InvokeExpr>(stmt->kernelCall)) + { + irArgs.add(renderer); + irArgs.add(gridDims); + auto fref = getSimpleVal(context, lowerRValueExpr(context, callExpr->functionExpr)); + irArgs.add(fref); + for (auto arg : callExpr->arguments) + { + // if a reference to dispatchThreadID, don't emit + if (auto declRefExpr = as<DeclRefExpr>(arg)) + { + if (declRefExpr->declRef.getDecl() == stmt->dispatchThreadID) + { + continue; + } + } + auto irArg = getSimpleVal(context, lowerRValueExpr(context, arg)); + irArgs.add(irArg); + } + } + else + { + SLANG_UNEXPECTED("GPUForeach parsing produced an invalid result"); + } + + builder->emitGpuForeach(irArgs); return; } |
