summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDietrich Geisler <dag368@cornell.edu>2020-08-17 12:50:44 -0400
committerGitHub <noreply@github.com>2020-08-17 09:50:44 -0700
commitff2d490dc120708a2fcb6eea5880a6b7c6586a4b (patch)
tree4bab7a7353ed5e9149510a48227da029d40d9d97
parent0640a10ab85f8be3c3c925cb70711560265e6548 (diff)
GPU Foreach Loop (#1498)
* GPU Foreach Loop This PR introduces the completed GPU foreach loop and updates the heterogeneous-hello-world example to use it. This PR builds on the previous introduction of the GPU Foreach loop parsing and semantic checking PR (#1482) by introducing IR lowering and emmitting. THe new feature can be used by having a GPU_Foreach loop interacting with a named non-CPP entry point, and using the -heterogeneous flag. * Fix to path Co-authored-by: Tim Foley <tfoleyNV@users.noreply.github.com>
-rw-r--r--examples/heterogeneous-hello-world/main.cpp35
-rw-r--r--examples/heterogeneous-hello-world/shader.cpp116
-rw-r--r--examples/heterogeneous-hello-world/shader.slang21
-rw-r--r--prelude/slang-cpp-prelude.h16
-rw-r--r--source/slang/slang-emit-c-like.cpp25
-rw-r--r--source/slang/slang-emit-cpp.cpp36
-rw-r--r--source/slang/slang-ir-inst-defs.h3
-rw-r--r--source/slang/slang-ir-insts.h2
-rw-r--r--source/slang/slang-ir-link.cpp22
-rw-r--r--source/slang/slang-ir.cpp11
-rw-r--r--source/slang/slang-lower-to-ir.cpp32
11 files changed, 190 insertions, 129 deletions
diff --git a/examples/heterogeneous-hello-world/main.cpp b/examples/heterogeneous-hello-world/main.cpp
index 6bb1bc071..6159f8d44 100644
--- a/examples/heterogeneous-hello-world/main.cpp
+++ b/examples/heterogeneous-hello-world/main.cpp
@@ -66,12 +66,11 @@ bool executeComputation_0();
extern unsigned char __computeMain[];
extern size_t __computeMainSize;
-gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer)
+gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer, unsigned char computeCode[], size_t computeCodeSize)
{
// We extract the begin/end pointers to the output code buffers directly
//
- char unsigned const* computeCode = __computeMain;
- char unsigned const* computeCodeEnd = computeCode + __computeMainSize;
+ char unsigned const* computeCodeEnd = computeCode + computeCodeSize;
// Now we use the operations of the example graphics API abstraction
// layer to load shader code into the underlying API.
@@ -87,7 +86,7 @@ gfx::ShaderProgram* loadShaderProgram(gfx::Renderer* renderer)
gfx::ShaderProgram::Desc programDesc;
programDesc.pipelineType = gfx::PipelineType::Compute;
programDesc.kernels = &kernelDescs[0];
- programDesc.kernelCount = 2;
+ programDesc.kernelCount = 1;
gShaderProgram = renderer->createProgram(programDesc);
@@ -242,13 +241,16 @@ void dispatchComputation(
gfx::Renderer* gRenderer,
gfx::PipelineState* gPipelineState,
gfx::PipelineLayout* gPipelineLayout,
- gfx::DescriptorSet* gDescriptorSet)
+ gfx::DescriptorSet* gDescriptorSet,
+ unsigned int gridDimsX,
+ unsigned int gridDimsY,
+ unsigned int gridDimsZ)
{
gRenderer->setPipelineState(PipelineType::Compute, gPipelineState);
gRenderer->setDescriptorSet(PipelineType::Compute, gPipelineLayout, 0, gDescriptorSet);
- gRenderer->dispatchCompute(4, 1, 1);
+ gRenderer->dispatchCompute(gridDimsX, gridDimsY, gridDimsZ);
}
void print_output(
@@ -286,9 +288,9 @@ gfx_BufferResource_0* createStructuredBuffer_0(gfx_Renderer_0* _0, FixedArray<fl
return (gfx_BufferResource_0*)createStructuredBuffer((gfx::Renderer*)_0, (float*)&_1);
}
-gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Renderer_0* _0)
+gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Renderer_0* _0, unsigned char _1[], size_t _2)
{
- return (gfx_ShaderProgram_0*)loadShaderProgram((gfx::Renderer*)_0);
+ return (gfx_ShaderProgram_0*)loadShaderProgram((gfx::Renderer*)_0, _1, _2);
}
gfx_DescriptorSetLayout_0* buildDescriptorSetLayout_0(gfx_Renderer_0* _0)
@@ -322,13 +324,26 @@ void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1)
printInitialValues((float*)&_0, _1);
}
-void dispatchComputation_0(gfx_Renderer_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3)
+void dispatchComputation_0(gfx_Renderer_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3, unsigned int gridDimsX, unsigned int gridDimsY, unsigned int gridDimsZ)
{
dispatchComputation(
(gfx::Renderer*)_0,
(gfx::PipelineState*)_1,
(gfx::PipelineLayout*)_2,
- (gfx::DescriptorSet*)_3);
+ (gfx::DescriptorSet*)_3,
+ gridDimsX,
+ gridDimsY,
+ gridDimsZ);
+}
+
+RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0) {
+ RWStructuredBuffer<float> result;
+ result.data = (float*)_0;
+ return result;
+}
+
+gfx_BufferResource_0* unconvertBuffer_0(RWStructuredBuffer<float> _0) {
+ return (gfx_BufferResource_0*)(_0.data);
}
void print_output_0(gfx_Renderer_0* _0, gfx_BufferResource_0* _1, int32_t _2)
diff --git a/examples/heterogeneous-hello-world/shader.cpp b/examples/heterogeneous-hello-world/shader.cpp
index d489f7136..640e8aa3c 100644
--- a/examples/heterogeneous-hello-world/shader.cpp
+++ b/examples/heterogeneous-hello-world/shader.cpp
@@ -1,27 +1,25 @@
-#include "../../slang/prelude/slang-cpp-prelude.h"
+#include "../../prelude/slang-cpp-prelude.h"
-//namespace { // anonymous
-
#ifdef SLANG_PRELUDE_NAMESPACE
using namespace SLANG_PRELUDE_NAMESPACE;
#endif
-Vector<uint32_t, 3> operator+(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b)
+Vector<uint32_t, 3> operator*(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b)
{
Vector<uint32_t, 3> r;
- r.x = a.x + b.x;
- r.y = a.y + b.y;
- r.z = a.z + b.z;
+ r.x = a.x * b.x;
+ r.y = a.y * b.y;
+ r.z = a.z * b.z;
return r;
}
-Vector<uint32_t, 3> operator*(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b)
+Vector<uint32_t, 3> operator+(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b)
{
Vector<uint32_t, 3> r;
- r.x = a.x * b.x;
- r.y = a.y * b.y;
- r.z = a.z * b.z;
+ r.x = a.x + b.x;
+ r.y = a.y + b.y;
+ r.z = a.z + b.z;
return r;
}
@@ -30,20 +28,31 @@ Vector<uint32_t, 3> make_VecU3(uint32_t a, uint32_t b, uint32_t c)
return Vector<uint32_t, 3>{ a, b, c};
}
-size_t __computeMainSize = 652;
-unsigned char __computeMain[] = {68, 88, 66, 67, 85, 217, 21, 44, 5, 208, 4, 46, 7, 254, 139, 84, 132, 65, 108, 79, 1, 0, 0, 0, 140, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 248, 0, 0, 0, 8, 1, 0, 0, 24, 1, 0, 0, 16, 2, 0, 0, 82, 68, 69, 70, 188, 0, 0, 0, 1, 0, 0, 0, 72, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, 9, 16, 0, 148, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 105, 111, 66, 117, 102, 102, 101, 114, 95, 48, 0, 171, 60, 0, 0, 0, 1, 0, 0, 0, 96, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 120, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, 101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, 0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, 0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+size_t __computeMainSize = 668;
+unsigned char __computeMain[] = {68, 88, 66, 67, 87, 111, 81, 164, 2, 29, 72, 42, 151, 28, 13, 217, 55, 37, 7, 95, 1, 0, 0, 0, 156, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 8, 1, 0, 0, 24, 1, 0, 0, 40, 1, 0, 0, 32, 2, 0, 0, 82, 68, 69, 70, 204, 0, 0, 0, 1, 0, 0, 0, 88, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, 9, 16, 0, 164, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 101, 110, 116, 114, 121, 80, 111, 105, 110, 116, 80, 97, 114, 97, 109, 115, 95, 105, 111, 66, 117, 102, 102, 101, 114, 95, 48, 0, 60, 0, 0, 0, 1, 0, 0, 0, 112, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 148, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, 101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, 0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, 0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+void computeMain_wrapper(gfx_Renderer_0* renderer, Vector<uint32_t, 3> gridDims,
+ RWStructuredBuffer<float> buffer)
+{
+ gfx_ShaderProgram_0* shaderProgram = loadShaderProgram_0(renderer, __computeMain, __computeMainSize);
+ gfx_DescriptorSetLayout_0* setLayout = buildDescriptorSetLayout_0(renderer);
+ gfx_PipelineLayout_0* pipelineLayout = buildPipeline_0(renderer, setLayout);
+ gfx_DescriptorSet_0* descriptorSet = buildDescriptorSet_0(renderer, setLayout, unconvertBuffer_0(buffer));
+ gfx_PipelineState_0* pipelineState = buildPipelineState_0(shaderProgram, renderer, pipelineLayout);
+ dispatchComputation_0(renderer, pipelineState, pipelineLayout, descriptorSet, gridDims.x, gridDims.y, gridDims.z);
+}
-#line 11 "shader.slang"
-struct GlobalParams_0
+#line 7 "../../examples/heterogeneous-hello-world/shader.slang"
+struct EntryPointParams_0
{
RWStructuredBuffer<float> ioBuffer_0;
};
struct KernelContext_0
{
- GlobalParams_0* globalParams_0;
};
+
+#line 21
struct gfx_Window_0
{
};
@@ -61,46 +70,16 @@ struct gfx_BufferResource_0
};
-struct gfx_ShaderProgram_0
-{
-};
-
-
-#line 26
-struct gfx_DescriptorSetLayout_0
-{
-};
-
-
-#line 24
-struct gfx_PipelineLayout_0
-{
-};
-
-
-#line 27
-struct gfx_DescriptorSet_0
-{
-};
-
-
-#line 25
-struct gfx_PipelineState_0
-{
-};
-
-
#line 7
void _computeMain(void* _S1, void* entryPointParams_0, void* _S2)
{
ComputeThreadVaryingInput* _S3 = ((ComputeThreadVaryingInput*)(_S1));
KernelContext_0 kernelContext_0;
- *(&(&kernelContext_0)->globalParams_0) = ((GlobalParams_0*)(_S2));
#line 9
uint32_t tid_0 = (*(&_S3->groupID) * make_VecU3(4U, 1U, 1U) + *(&_S3->groupThreadID)).x;
- float* _S4 = &(*(&(*(&(&kernelContext_0)->globalParams_0))->ioBuffer_0))[tid_0];
+ float* _S4 = &(*(&((EntryPointParams_0*)(entryPointParams_0))->ioBuffer_0))[tid_0];
#line 11
float i_0 = *_S4;
@@ -115,7 +94,7 @@ void _computeMain(void* _S1, void* entryPointParams_0, void* _S2)
#line 12
float o_0 = _S5 ? _S6 : _S7;
- float* _S8 = &(*(&(*(&(&kernelContext_0)->globalParams_0))->ioBuffer_0))[tid_0];
+ float* _S8 = &(*(&((EntryPointParams_0*)(entryPointParams_0))->ioBuffer_0))[tid_0];
#line 14
*_S8 = o_0;
@@ -137,36 +116,15 @@ gfx_Renderer_0* createRenderer_0(int32_t _0, int32_t _1, gfx_Window_0* _2);
gfx_BufferResource_0* createStructuredBuffer_0(gfx_Renderer_0* _0, FixedArray<float, 4> _1);
-#line 33
-gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Renderer_0* _0);
+#line 4
+RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0);
#line 40
-gfx_DescriptorSetLayout_0* buildDescriptorSetLayout_0(gfx_Renderer_0* _0);
-
-
-#line 41
-gfx_PipelineLayout_0* buildPipeline_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1);
-
-
-#line 42
-gfx_DescriptorSet_0* buildDescriptorSet_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1, gfx_BufferResource_0* _2);
-
-
-
-gfx_PipelineState_0* buildPipelineState_0(gfx_ShaderProgram_0* _0, gfx_Renderer_0* _1, gfx_PipelineLayout_0* _2);
-
-
-
void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1);
-#line 51
-void dispatchComputation_0(gfx_Renderer_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3);
-
-
-
-
+#line 41
void print_output_0(gfx_Renderer_0* _0, gfx_BufferResource_0* _1, int32_t _2);
@@ -183,21 +141,19 @@ bool executeComputation_0()
gfx_Window_0* _S9 = createWindow_0(int(1024), int(768));
gfx_Renderer_0* _S10 = createRenderer_0(int(1024), int(768), _S9);
gfx_BufferResource_0* _S11 = createStructuredBuffer_0(_S10, initialArray_0);
- gfx_ShaderProgram_0* _S12 = loadShaderProgram_0(_S10);
- gfx_DescriptorSetLayout_0* _S13 = buildDescriptorSetLayout_0(_S10);
- gfx_PipelineLayout_0* _S14 = buildPipeline_0(_S10, _S13);
- gfx_DescriptorSet_0* _S15 = buildDescriptorSet_0(_S10, _S13, _S11);
- gfx_PipelineState_0* _S16 = buildPipelineState_0(_S12, _S10, _S14);
+ Vector<uint32_t, 3> _S12 = make_VecU3(uint32_t(int(4)), uint32_t(int(1)), uint32_t(int(1)));
+ RWStructuredBuffer<float> _S13 = convertBuffer_0(_S11);
+
+#line 57
+ computeMain_wrapper(_S10, _S12, _S13);
+
printInitialValues_0(initialArray_0, int(4));
- dispatchComputation_0(_S10, _S16, _S14, _S15);
print_output_0(_S10, _S11, int(4));
return true;
}
-//} // anonymous
-
// [numthreads(4, 1, 1)]
SLANG_PRELUDE_EXPORT
void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* entryPointParams, void* globalParams)
diff --git a/examples/heterogeneous-hello-world/shader.slang b/examples/heterogeneous-hello-world/shader.slang
index 6b56c8700..ec3831788 100644
--- a/examples/heterogeneous-hello-world/shader.slang
+++ b/examples/heterogeneous-hello-world/shader.slang
@@ -37,22 +37,7 @@ Ptr<gfx::Renderer> createRenderer(
int gWindowHeight,
Ptr<gfx::Window> gWindow);
Ptr<gfx::BufferResource> createStructuredBuffer(Ptr<gfx::Renderer> gRenderer, float[4] initialArray);
-Ptr<gfx::DescriptorSetLayout> buildDescriptorSetLayout(Ptr<gfx::Renderer> gRenderer);
-Ptr<gfx::PipelineLayout> buildPipeline(Ptr<gfx::Renderer> gRenderer, Ptr<gfx::DescriptorSetLayout> descriptorSetLayout);
-Ptr<gfx::DescriptorSet> buildDescriptorSet(
- Ptr<gfx::Renderer> gRenderer,
- Ptr<gfx::DescriptorSetLayout> descriptorSetLayout,
- Ptr<gfx::BufferResource> gStructuredBuffer);
-Ptr<gfx::PipelineState> buildPipelineState(
- Ptr<gfx::ShaderProgram> shaderProgram,
- Ptr<gfx::Renderer> gRenderer,
- Ptr<gfx::PipelineLayout> gPipelineLayout);
void printInitialValues(float[4] initialArray, int length);
-void dispatchComputation(
- Ptr<gfx::Renderer> gRenderer,
- Ptr<gfx::PipelineState> gPipelineState,
- Ptr<gfx::PipelineLayout> gPipelineLayout,
- Ptr<gfx::DescriptorSet> gDescriptorSet);
void print_output(
Ptr<gfx::Renderer> gRenderer,
Ptr<gfx::BufferResource> gStructuredBuffer,
@@ -71,13 +56,7 @@ public bool executeComputation() {
let structuredBuffer = createStructuredBuffer(renderer, initialArray);
__GPU_FOREACH(renderer, uint3(4, 1, 1), LAMBDA(uint3 dispatchThreadID)
{ computeMain(convertBuffer(structuredBuffer), dispatchThreadID) ; });
- let shaderProgram = loadShaderProgram(renderer);
- let descriptorSetLayout = buildDescriptorSetLayout(renderer);
- let pipelineLayout = buildPipeline(renderer, descriptorSetLayout);
- let descriptorSet = buildDescriptorSet(renderer, descriptorSetLayout, structuredBuffer);
- let pipelineState = buildPipelineState(shaderProgram, renderer, pipelineLayout);
printInitialValues(initialArray, 4);
- dispatchComputation(renderer, pipelineState, pipelineLayout, descriptorSet);
print_output(renderer, structuredBuffer, 4);
diff --git a/prelude/slang-cpp-prelude.h b/prelude/slang-cpp-prelude.h
index 4fcddda7c..b00f34d8f 100644
--- a/prelude/slang-cpp-prelude.h
+++ b/prelude/slang-cpp-prelude.h
@@ -50,4 +50,20 @@
# define SLANG_UNROLL
#endif
+struct gfx_Renderer_0;
+struct gfx_BufferResource_0;
+struct gfx_ShaderProgram_0;
+struct gfx_DescriptorSetLayout_0;
+struct gfx_PipelineLayout_0;
+struct gfx_DescriptorSet_0;
+struct gfx_BufferResource_0;
+struct gfx_PipelineState_0;
+gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Renderer_0* _0, unsigned char _1[], size_t _2);
+gfx_DescriptorSetLayout_0* buildDescriptorSetLayout_0(gfx_Renderer_0* _0);
+gfx_PipelineLayout_0* buildPipeline_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1);
+gfx_DescriptorSet_0* buildDescriptorSet_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1, gfx_BufferResource_0* _2);
+gfx_PipelineState_0* buildPipelineState_0(gfx_ShaderProgram_0* _0, gfx_Renderer_0* _1, gfx_PipelineLayout_0* _2);
+void dispatchComputation_0(gfx_Renderer_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3, uint32_t _4, uint32_t _5, uint32_t _6);
+gfx_BufferResource_0* unconvertBuffer_0(RWStructuredBuffer<float> _0);
+
#endif \ No newline at end of file
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 0708bc9a8..9e9117dcd 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -2425,6 +2425,31 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO
m_writer->emit(")");
break;
}
+ case kIROp_GpuForeach:
+ {
+ auto operand = inst->getOperand(2);
+ if (as<IRFunc>(operand))
+ {
+ //emitOperand(operand->findDecoration<IREntryPointDecoration>(), getInfo(EmitOp::General));
+ emitOperand(operand, getInfo(EmitOp::General));
+ }
+ else
+ {
+ SLANG_UNEXPECTED("Expected 3rd operand to be a function");
+ }
+ m_writer->emit("_wrapper(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ m_writer->emit(", ");
+ emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+ UInt argCount = inst->getOperandCount();
+ for (UInt aa = 3; aa < argCount; ++aa)
+ {
+ m_writer->emit(", ");
+ emitOperand(inst->getOperand(aa), getInfo(EmitOp::General));
+ }
+ m_writer->emit(")");
+ break;
+ }
default:
diagnoseUnhandledInst(inst);
break;
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index ebb6bda2d..0c2b4cd93 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -2277,8 +2277,8 @@ void CPPSourceEmitter::emitPreprocessorDirectivesImpl()
{
// Put all into an anonymous namespace
// This includes any generated types, and generated intrinsics
-
- m_writer->emit("namespace { // anonymous \n\n");
+ if (!m_compileRequest->getLinkage()->m_heterogeneous)
+ m_writer->emit("namespace { // anonymous \n\n");
m_writer->emit("#ifdef SLANG_PRELUDE_NAMESPACE\n");
m_writer->emit("using namespace SLANG_PRELUDE_NAMESPACE;\n");
m_writer->emit("#endif\n\n");
@@ -2556,11 +2556,11 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module)
continue;
if (auto entryPointDecoration = func->findDecoration<IREntryPointDecoration>())
{
- String someName = entryPointDecoration->getName()->getStringSlice();
+ String entryPointName = entryPointDecoration->getName()->getStringSlice();
for (int index = 0; index < program->getEntryPointCount(); index++)
{
auto entryPoint = program->getEntryPoint(index);
- if (someName == entryPoint->getName()->text)
+ if (entryPointName == entryPoint->getName()->text)
{
for (auto targetRequest : linkage->targets)
{
@@ -2586,13 +2586,13 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module)
auto ptr = (const unsigned char*)blob->getBufferPointer();
m_writer->emit("size_t __");
- m_writer->emit(someName);
+ m_writer->emit(entryPointName );
m_writer->emit("Size = ");
m_writer->emitInt64(blob->getBufferSize());
m_writer->emit(";\n");
m_writer->emit("unsigned char __");
- m_writer->emit(someName);
+ m_writer->emit(entryPointName );
m_writer->emit("[] = {");
for (unsigned int i = 0; i < blob->getBufferSize() - 1; i++) {
m_writer->emitUInt64(ptr[i]);
@@ -2602,6 +2602,26 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module)
m_writer->emit("};\n");
}
}
+ // Emit a wrapper function for calling the shader blob
+ m_writer->emit("void ");
+ m_writer->emit(entryPointName);
+ m_writer->emit("_wrapper(gfx_Renderer_0* renderer, Vector<uint32_t, 3> gridDims, \n");
+ m_writer->emit("\tRWStructuredBuffer<float> buffer)\n{");
+ m_writer->emit("\n\tgfx_ShaderProgram_0* shaderProgram = loadShaderProgram_0(renderer, __");
+ m_writer->emit(entryPointName);
+ m_writer->emit(", __");
+ m_writer->emit(entryPointName);
+ m_writer->emit("Size);");
+ m_writer->emit("\n\tgfx_DescriptorSetLayout_0* setLayout = buildDescriptorSetLayout_0(renderer);");
+ m_writer->emit("\n\tgfx_PipelineLayout_0* pipelineLayout = buildPipeline_0(renderer, setLayout);");
+ m_writer->emit("\n\tgfx_DescriptorSet_0* descriptorSet = ");
+ m_writer->emit("buildDescriptorSet_0(renderer, setLayout, unconvertBuffer_0(buffer));");
+ m_writer->emit("\n\tgfx_PipelineState_0* pipelineState = ");
+ m_writer->emit("buildPipelineState_0(shaderProgram, renderer, pipelineLayout);");
+
+ m_writer->emit("\n\tdispatchComputation_0(renderer, pipelineState, pipelineLayout, ");
+ m_writer->emit("descriptorSet, gridDims.x, gridDims.y, gridDims.z);");
+ m_writer->emit("\n}\n");
}
}
}
@@ -2645,8 +2665,8 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module)
if (m_target == CodeGenTarget::CPPSource)
{
// Need to close the anonymous namespace when outputting for C++
-
- m_writer->emit("} // anonymous\n\n");
+ if (!linkage->m_heterogeneous)
+ m_writer->emit("} // anonymous\n\n");
}
// Finally we need to output dll entry points
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index 3761828b9..74bd15531 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -467,6 +467,9 @@ INST(SampleGrad, sampleGrad, 4, 0)
INST(GroupMemoryBarrierWithGroupSync, GroupMemoryBarrierWithGroupSync, 0, 0)
+// GPU_FOREACH loop of the form
+INST(GpuForeach, gpuForeach, 3, 0)
+
/* Decoration */
INST(HighLevelDeclDecoration, highLevelDecl, 1, 0)
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 55af7db78..a66ca8f7a 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -1946,6 +1946,8 @@ struct IRBuilder
return emitWrapExistential(type, value, slotArgCount, slotArgVals.getBuffer());
}
+ IRInst* emitGpuForeach(List<IRInst*> args);
+
IRUndefined* emitUndefined(IRType* type);
IRInst* findOrAddInst(
diff --git a/source/slang/slang-ir-link.cpp b/source/slang/slang-ir-link.cpp
index 9de2b5d1c..274fbf6d9 100644
--- a/source/slang/slang-ir-link.cpp
+++ b/source/slang/slang-ir-link.cpp
@@ -1491,19 +1491,21 @@ LinkedIR linkIR(
cloneValue(context, bindInst);
}
}
-
- for (IRModule* irModule : irModules)
+ if (target == CodeGenTarget::CPPSource)
{
- for (auto inst : irModule->getGlobalInsts())
+ for (IRModule* irModule : irModules)
{
- auto hasPublic = inst->findDecoration<IRPublicDecoration>();
- if (!hasPublic)
- continue;
-
- auto cloned = cloneValue(context, inst);
- if (!cloned->findDecorationImpl(kIROp_KeepAliveDecoration))
+ for (auto inst : irModule->getGlobalInsts())
{
- context->builder->addKeepAliveDecoration(cloned);
+ auto hasPublic = inst->findDecoration<IRPublicDecoration>();
+ if (!hasPublic)
+ continue;
+
+ auto cloned = cloneValue(context, inst);
+ if (!cloned->findDecorationImpl(kIROp_KeepAliveDecoration))
+ {
+ context->builder->addKeepAliveDecoration(cloned);
+ }
}
}
}
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index 13840a84a..2a1db6310 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -3752,6 +3752,17 @@ namespace Slang
return inst;
}
+ IRInst* IRBuilder::emitGpuForeach(List<IRInst*> args)
+ {
+ auto inst = createInst<IRInst>(
+ this,
+ kIROp_GpuForeach,
+ getVoidType(),
+ args.getCount(),
+ args.getBuffer());
+ addInst(inst);
+ return inst;
+ }
//
// Decorations
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index 9c4808f31..864491f7e 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -4116,7 +4116,39 @@ struct StmtLoweringVisitor : StmtVisitor<StmtLoweringVisitor>
void visitGpuForeachStmt(GpuForeachStmt* stmt)
{
+ auto builder = getBuilder();
startBlockIfNeeded(stmt);
+
+ auto renderer = getSimpleVal(context, lowerRValueExpr(context, stmt->renderer));
+ auto gridDims = getSimpleVal(context, lowerRValueExpr(context, stmt->gridDims));
+
+ List<IRInst*> irArgs;
+ if (auto callExpr = as<InvokeExpr>(stmt->kernelCall))
+ {
+ irArgs.add(renderer);
+ irArgs.add(gridDims);
+ auto fref = getSimpleVal(context, lowerRValueExpr(context, callExpr->functionExpr));
+ irArgs.add(fref);
+ for (auto arg : callExpr->arguments)
+ {
+ // if a reference to dispatchThreadID, don't emit
+ if (auto declRefExpr = as<DeclRefExpr>(arg))
+ {
+ if (declRefExpr->declRef.getDecl() == stmt->dispatchThreadID)
+ {
+ continue;
+ }
+ }
+ auto irArg = getSimpleVal(context, lowerRValueExpr(context, arg));
+ irArgs.add(irArg);
+ }
+ }
+ else
+ {
+ SLANG_UNEXPECTED("GPUForeach parsing produced an invalid result");
+ }
+
+ builder->emitGpuForeach(irArgs);
return;
}