From ff2d490dc120708a2fcb6eea5880a6b7c6586a4b Mon Sep 17 00:00:00 2001 From: Dietrich Geisler Date: Mon, 17 Aug 2020 12:50:44 -0400 Subject: GPU Foreach Loop (#1498) * GPU Foreach Loop This PR introduces the completed GPU foreach loop and updates the heterogeneous-hello-world example to use it. This PR builds on the previous introduction of the GPU Foreach loop parsing and semantic checking PR (#1482) by introducing IR lowering and emmitting. THe new feature can be used by having a GPU_Foreach loop interacting with a named non-CPP entry point, and using the -heterogeneous flag. * Fix to path Co-authored-by: Tim Foley --- examples/heterogeneous-hello-world/main.cpp | 35 +++++-- examples/heterogeneous-hello-world/shader.cpp | 116 ++++++++---------------- examples/heterogeneous-hello-world/shader.slang | 21 ----- 3 files changed, 61 insertions(+), 111 deletions(-) (limited to 'examples') diff --git a/examples/heterogeneous-hello-world/main.cpp b/examples/heterogeneous-hello-world/main.cpp index 6bb1bc071..6159f8d44 100644 --- a/examples/heterogeneous-hello-world/main.cpp +++ b/examples/heterogeneous-hello-world/main.cpp @@ -66,12 +66,11 @@ bool executeComputation_0(); extern unsigned char __computeMain[]; extern size_t __computeMainSize; -gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer) +gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer, unsigned char computeCode[], size_t computeCodeSize) { // We extract the begin/end pointers to the output code buffers directly // - char unsigned const* computeCode = __computeMain; - char unsigned const* computeCodeEnd = computeCode + __computeMainSize; + char unsigned const* computeCodeEnd = computeCode + computeCodeSize; // Now we use the operations of the example graphics API abstraction // layer to load shader code into the underlying API. @@ -87,7 +86,7 @@ gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer) gfx::ShaderProgram::Desc programDesc; programDesc.pipelineType = gfx::PipelineType::Compute; programDesc.kernels = &kernelDescs[0]; - programDesc.kernelCount = 2; + programDesc.kernelCount = 1; gShaderProgram = renderer->createProgram(programDesc); @@ -242,13 +241,16 @@ void dispatchComputation( gfx::Renderer* gRenderer, gfx::PipelineState* gPipelineState, gfx::PipelineLayout* gPipelineLayout, - gfx::DescriptorSet* gDescriptorSet) + gfx::DescriptorSet* gDescriptorSet, + unsigned int gridDimsX, + unsigned int gridDimsY, + unsigned int gridDimsZ) { gRenderer->setPipelineState(PipelineType::Compute, gPipelineState); gRenderer->setDescriptorSet(PipelineType::Compute, gPipelineLayout, 0, gDescriptorSet); - gRenderer->dispatchCompute(4, 1, 1); + gRenderer->dispatchCompute(gridDimsX, gridDimsY, gridDimsZ); } void print_output( @@ -286,9 +288,9 @@ gfx_BufferResource_0* createStructuredBuffer_0(gfx_Renderer_0* _0, FixedArray _0, int32_t _1) printInitialValues((float*)&_0, _1); } -void dispatchComputation_0(gfx_Renderer_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3) +void dispatchComputation_0(gfx_Renderer_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3, unsigned int gridDimsX, unsigned int gridDimsY, unsigned int gridDimsZ) { dispatchComputation( (gfx::Renderer*)_0, (gfx::PipelineState*)_1, (gfx::PipelineLayout*)_2, - (gfx::DescriptorSet*)_3); + (gfx::DescriptorSet*)_3, + gridDimsX, + gridDimsY, + gridDimsZ); +} + +RWStructuredBuffer convertBuffer_0(gfx_BufferResource_0* _0) { + RWStructuredBuffer result; + result.data = (float*)_0; + return result; +} + +gfx_BufferResource_0* unconvertBuffer_0(RWStructuredBuffer _0) { + return (gfx_BufferResource_0*)(_0.data); } void print_output_0(gfx_Renderer_0* _0, gfx_BufferResource_0* _1, int32_t _2) diff --git a/examples/heterogeneous-hello-world/shader.cpp b/examples/heterogeneous-hello-world/shader.cpp index d489f7136..640e8aa3c 100644 --- a/examples/heterogeneous-hello-world/shader.cpp +++ b/examples/heterogeneous-hello-world/shader.cpp @@ -1,27 +1,25 @@ -#include "../../slang/prelude/slang-cpp-prelude.h" +#include "../../prelude/slang-cpp-prelude.h" -//namespace { // anonymous - #ifdef SLANG_PRELUDE_NAMESPACE using namespace SLANG_PRELUDE_NAMESPACE; #endif -Vector operator+(Vector a, Vector b) +Vector operator*(Vector a, Vector b) { Vector r; - r.x = a.x + b.x; - r.y = a.y + b.y; - r.z = a.z + b.z; + r.x = a.x * b.x; + r.y = a.y * b.y; + r.z = a.z * b.z; return r; } -Vector operator*(Vector a, Vector b) +Vector operator+(Vector a, Vector b) { Vector r; - r.x = a.x * b.x; - r.y = a.y * b.y; - r.z = a.z * b.z; + r.x = a.x + b.x; + r.y = a.y + b.y; + r.z = a.z + b.z; return r; } @@ -30,20 +28,31 @@ Vector make_VecU3(uint32_t a, uint32_t b, uint32_t c) return Vector{ a, b, c}; } -size_t __computeMainSize = 652; -unsigned char __computeMain[] = {68, 88, 66, 67, 85, 217, 21, 44, 5, 208, 4, 46, 7, 254, 139, 84, 132, 65, 108, 79, 1, 0, 0, 0, 140, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 248, 0, 0, 0, 8, 1, 0, 0, 24, 1, 0, 0, 16, 2, 0, 0, 82, 68, 69, 70, 188, 0, 0, 0, 1, 0, 0, 0, 72, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, 9, 16, 0, 148, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 105, 111, 66, 117, 102, 102, 101, 114, 95, 48, 0, 171, 60, 0, 0, 0, 1, 0, 0, 0, 96, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 120, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, 101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, 0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, 0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +size_t __computeMainSize = 668; +unsigned char __computeMain[] = {68, 88, 66, 67, 87, 111, 81, 164, 2, 29, 72, 42, 151, 28, 13, 217, 55, 37, 7, 95, 1, 0, 0, 0, 156, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 8, 1, 0, 0, 24, 1, 0, 0, 40, 1, 0, 0, 32, 2, 0, 0, 82, 68, 69, 70, 204, 0, 0, 0, 1, 0, 0, 0, 88, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, 9, 16, 0, 164, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 101, 110, 116, 114, 121, 80, 111, 105, 110, 116, 80, 97, 114, 97, 109, 115, 95, 105, 111, 66, 117, 102, 102, 101, 114, 95, 48, 0, 60, 0, 0, 0, 1, 0, 0, 0, 112, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 148, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, 101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, 0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, 0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +void computeMain_wrapper(gfx_Renderer_0* renderer, Vector gridDims, + RWStructuredBuffer buffer) +{ + gfx_ShaderProgram_0* shaderProgram = loadShaderProgram_0(renderer, __computeMain, __computeMainSize); + gfx_DescriptorSetLayout_0* setLayout = buildDescriptorSetLayout_0(renderer); + gfx_PipelineLayout_0* pipelineLayout = buildPipeline_0(renderer, setLayout); + gfx_DescriptorSet_0* descriptorSet = buildDescriptorSet_0(renderer, setLayout, unconvertBuffer_0(buffer)); + gfx_PipelineState_0* pipelineState = buildPipelineState_0(shaderProgram, renderer, pipelineLayout); + dispatchComputation_0(renderer, pipelineState, pipelineLayout, descriptorSet, gridDims.x, gridDims.y, gridDims.z); +} -#line 11 "shader.slang" -struct GlobalParams_0 +#line 7 "../../examples/heterogeneous-hello-world/shader.slang" +struct EntryPointParams_0 { RWStructuredBuffer ioBuffer_0; }; struct KernelContext_0 { - GlobalParams_0* globalParams_0; }; + +#line 21 struct gfx_Window_0 { }; @@ -61,46 +70,16 @@ struct gfx_BufferResource_0 }; -struct gfx_ShaderProgram_0 -{ -}; - - -#line 26 -struct gfx_DescriptorSetLayout_0 -{ -}; - - -#line 24 -struct gfx_PipelineLayout_0 -{ -}; - - -#line 27 -struct gfx_DescriptorSet_0 -{ -}; - - -#line 25 -struct gfx_PipelineState_0 -{ -}; - - #line 7 void _computeMain(void* _S1, void* entryPointParams_0, void* _S2) { ComputeThreadVaryingInput* _S3 = ((ComputeThreadVaryingInput*)(_S1)); KernelContext_0 kernelContext_0; - *(&(&kernelContext_0)->globalParams_0) = ((GlobalParams_0*)(_S2)); #line 9 uint32_t tid_0 = (*(&_S3->groupID) * make_VecU3(4U, 1U, 1U) + *(&_S3->groupThreadID)).x; - float* _S4 = &(*(&(*(&(&kernelContext_0)->globalParams_0))->ioBuffer_0))[tid_0]; + float* _S4 = &(*(&((EntryPointParams_0*)(entryPointParams_0))->ioBuffer_0))[tid_0]; #line 11 float i_0 = *_S4; @@ -115,7 +94,7 @@ void _computeMain(void* _S1, void* entryPointParams_0, void* _S2) #line 12 float o_0 = _S5 ? _S6 : _S7; - float* _S8 = &(*(&(*(&(&kernelContext_0)->globalParams_0))->ioBuffer_0))[tid_0]; + float* _S8 = &(*(&((EntryPointParams_0*)(entryPointParams_0))->ioBuffer_0))[tid_0]; #line 14 *_S8 = o_0; @@ -137,36 +116,15 @@ gfx_Renderer_0* createRenderer_0(int32_t _0, int32_t _1, gfx_Window_0* _2); gfx_BufferResource_0* createStructuredBuffer_0(gfx_Renderer_0* _0, FixedArray _1); -#line 33 -gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Renderer_0* _0); +#line 4 +RWStructuredBuffer convertBuffer_0(gfx_BufferResource_0* _0); #line 40 -gfx_DescriptorSetLayout_0* buildDescriptorSetLayout_0(gfx_Renderer_0* _0); - - -#line 41 -gfx_PipelineLayout_0* buildPipeline_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1); - - -#line 42 -gfx_DescriptorSet_0* buildDescriptorSet_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1, gfx_BufferResource_0* _2); - - - -gfx_PipelineState_0* buildPipelineState_0(gfx_ShaderProgram_0* _0, gfx_Renderer_0* _1, gfx_PipelineLayout_0* _2); - - - void printInitialValues_0(FixedArray _0, int32_t _1); -#line 51 -void dispatchComputation_0(gfx_Renderer_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3); - - - - +#line 41 void print_output_0(gfx_Renderer_0* _0, gfx_BufferResource_0* _1, int32_t _2); @@ -183,21 +141,19 @@ bool executeComputation_0() gfx_Window_0* _S9 = createWindow_0(int(1024), int(768)); gfx_Renderer_0* _S10 = createRenderer_0(int(1024), int(768), _S9); gfx_BufferResource_0* _S11 = createStructuredBuffer_0(_S10, initialArray_0); - gfx_ShaderProgram_0* _S12 = loadShaderProgram_0(_S10); - gfx_DescriptorSetLayout_0* _S13 = buildDescriptorSetLayout_0(_S10); - gfx_PipelineLayout_0* _S14 = buildPipeline_0(_S10, _S13); - gfx_DescriptorSet_0* _S15 = buildDescriptorSet_0(_S10, _S13, _S11); - gfx_PipelineState_0* _S16 = buildPipelineState_0(_S12, _S10, _S14); + Vector _S12 = make_VecU3(uint32_t(int(4)), uint32_t(int(1)), uint32_t(int(1))); + RWStructuredBuffer _S13 = convertBuffer_0(_S11); + +#line 57 + computeMain_wrapper(_S10, _S12, _S13); + printInitialValues_0(initialArray_0, int(4)); - dispatchComputation_0(_S10, _S16, _S14, _S15); print_output_0(_S10, _S11, int(4)); return true; } -//} // anonymous - // [numthreads(4, 1, 1)] SLANG_PRELUDE_EXPORT void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* entryPointParams, void* globalParams) diff --git a/examples/heterogeneous-hello-world/shader.slang b/examples/heterogeneous-hello-world/shader.slang index 6b56c8700..ec3831788 100644 --- a/examples/heterogeneous-hello-world/shader.slang +++ b/examples/heterogeneous-hello-world/shader.slang @@ -37,22 +37,7 @@ Ptr createRenderer( int gWindowHeight, Ptr gWindow); Ptr createStructuredBuffer(Ptr gRenderer, float[4] initialArray); -Ptr buildDescriptorSetLayout(Ptr gRenderer); -Ptr buildPipeline(Ptr gRenderer, Ptr descriptorSetLayout); -Ptr buildDescriptorSet( - Ptr gRenderer, - Ptr descriptorSetLayout, - Ptr gStructuredBuffer); -Ptr buildPipelineState( - Ptr shaderProgram, - Ptr gRenderer, - Ptr gPipelineLayout); void printInitialValues(float[4] initialArray, int length); -void dispatchComputation( - Ptr gRenderer, - Ptr gPipelineState, - Ptr gPipelineLayout, - Ptr gDescriptorSet); void print_output( Ptr gRenderer, Ptr gStructuredBuffer, @@ -71,13 +56,7 @@ public bool executeComputation() { let structuredBuffer = createStructuredBuffer(renderer, initialArray); __GPU_FOREACH(renderer, uint3(4, 1, 1), LAMBDA(uint3 dispatchThreadID) { computeMain(convertBuffer(structuredBuffer), dispatchThreadID) ; }); - let shaderProgram = loadShaderProgram(renderer); - let descriptorSetLayout = buildDescriptorSetLayout(renderer); - let pipelineLayout = buildPipeline(renderer, descriptorSetLayout); - let descriptorSet = buildDescriptorSet(renderer, descriptorSetLayout, structuredBuffer); - let pipelineState = buildPipelineState(shaderProgram, renderer, pipelineLayout); printInitialValues(initialArray, 4); - dispatchComputation(renderer, pipelineState, pipelineLayout, descriptorSet); print_output(renderer, structuredBuffer, 4); -- cgit v1.2.3