From 502aa3812a82cf0d091cff0c67804e4ee448ac78 Mon Sep 17 00:00:00 2001 From: David Siher <32305650+dsiher@users.noreply.github.com> Date: Tue, 14 Sep 2021 12:59:55 -0400 Subject: Bring heterogeneous-hello-world back up to date. (#1935) * Bring heterogeneous-hello-world back up to date. * Reintroduced heterogeneous-hello-world into the premake * No longer uses compiled bytecode for entry point, instead a loadModule call is hardocoded with the slang file name. * Entry point is, similarly, hardcoded for now. * Added a bypass to slang-legalize-types for an unneeded GPUForeach check * Run premake and change to relative path * Removed experimental and added README Co-authored-by: Yong He --- examples/heterogeneous-hello-world/shader.cpp | 215 ++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 examples/heterogeneous-hello-world/shader.cpp (limited to 'examples/heterogeneous-hello-world/shader.cpp') diff --git a/examples/heterogeneous-hello-world/shader.cpp b/examples/heterogeneous-hello-world/shader.cpp new file mode 100644 index 000000000..0c0c24ebc --- /dev/null +++ b/examples/heterogeneous-hello-world/shader.cpp @@ -0,0 +1,215 @@ +#include "../../prelude/slang-cpp-prelude.h" + + +#ifdef SLANG_PRELUDE_NAMESPACE +using namespace SLANG_PRELUDE_NAMESPACE; +#endif + +Vector operator+(Vector a, Vector b) +{ + Vector r; + r.x = a.x + b.x; + r.y = a.y + b.y; + r.z = a.z + b.z; + return r; +} + +Vector operator*(Vector a, Vector b) +{ + Vector r; + r.x = a.x * b.x; + r.y = a.y * b.y; + r.z = a.z * b.z; + return r; +} + +Vector make_VecU3(uint32_t a, uint32_t b, uint32_t c) +{ + return Vector{ a, b, c}; +} + +size_t __computeMainSize = 668; +unsigned char __computeMain[] = {68, 88, 66, 67, 87, 111, 81, 164, 2, 29, 72, 42, 151, 28, 13, 217, 55, 37, 7, 95, 1, +0, 0, 0, 156, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 8, 1, 0, 0, 24, +1, 0, 0, 40, 1, 0, 0, 32, 2, 0, 0, 82, 68, 69, 70, 204, 0, 0, 0, 1, +0, 0, 0, 88, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, +9, 16, 0, 164, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, +0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 101, +110, 116, 114, 121, 80, 111, 105, 110, 116, 80, 97, 114, 97, 109, 115, 95, 105, 111, 66, 117, +102, 102, 101, 114, 95, 48, 0, 60, 0, 0, 0, 1, 0, 0, 0, 112, 0, 0, 0, 4, +0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 4, +0, 0, 0, 2, 0, 0, 0, 148, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, +101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, +76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, +46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, +83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, +0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, +224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, +0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, +0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, +64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, +0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, +0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, +0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, +0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, +0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, +0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, +0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, +0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, +0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, }; +void computeMain_wrapper(gfx_Device_0* device, Vector gridDims, + RWStructuredBuffer buffer) +{ + gfx_ShaderProgram_0* shaderProgram = loadShaderProgram_0(device); + gfx_TransientResourceHeap_0* transientHeap = buildTransientHeap_0(device); + gfx_PipelineState_0* pipelineState = buildPipelineState_0(device, shaderProgram); + gfx_ResourceView_0* bufferView = createBufferView_0(device, unconvertBuffer_0(buffer)); + dispatchComputation_0(device, transientHeap, pipelineState, bufferView, gridDims.x, gridDims.y, gridDims.z); +} + +#line 8 "../../../examples/heterogeneous-hello-world/shader.slang" +struct EntryPointParams_0 +{ + RWStructuredBuffer ioBuffer_0; +}; + + +#line 21 +struct gfx_Device_0 +{ +}; + + +#line 22 +struct gfx_BufferResource_0 +{ +}; + + +#line 23 +struct gfx_ResourceView_0 +{ +}; + + +#line 8 +void _computeMain(void* _S1, void* entryPointParams_0, void* _S2) +{ + +#line 8 + ComputeThreadVaryingInput* _S3 = (slang_bit_cast(_S1)); + + uint32_t tid_0 = (*(&_S3->groupID) * make_VecU3(4U, 1U, 1U) + *(&_S3->groupThreadID)).x; + + float* _S4 = &(*(&(slang_bit_cast(entryPointParams_0))->ioBuffer_0))[tid_0]; + +#line 12 + float i_0 = *_S4; + bool _S5 = i_0 < 0.50000000000000000000f; + +#line 13 + float _S6 = i_0 + i_0; + +#line 13 + float _S7 = (F32_sqrt((i_0))); + +#line 13 + float o_0 = _S5 ? _S6 : _S7; + + float* _S8 = &(*(&(slang_bit_cast(entryPointParams_0))->ioBuffer_0))[tid_0]; + +#line 15 + *_S8 = o_0; + return; +} + + +#line 31 +gfx_Device_0* createDevice_0(); + +gfx_BufferResource_0* createStructuredBuffer_0(gfx_Device_0* _0, FixedArray _1); + + +gfx_ResourceView_0* createBufferView_0(gfx_Device_0* _0, gfx_BufferResource_0* _1); + + +#line 4 +RWStructuredBuffer convertBuffer_0(gfx_BufferResource_0* _0); + + +#line 44 +void printInitialValues_0(FixedArray _0, int32_t _1); + + +#line 50 +bool printOutputValues_0(gfx_Device_0* _0, gfx_BufferResource_0* _1, int32_t _2); + + + + +bool executeComputation_0() +{ + + FixedArray initialArray_0 = { 3.00000000000000000000f, -20.00000000000000000000f, -6.00000000000000000000f, 8.00000000000000000000f }; + + + gfx_Device_0* _S9 = createDevice_0(); + gfx_BufferResource_0* _S10 = createStructuredBuffer_0(_S9, initialArray_0); + gfx_ResourceView_0* _S11 = createBufferView_0(_S9, _S10); + Vector _S12 = make_VecU3(uint32_t(int(4)), uint32_t(int(1)), uint32_t(int(1))); + RWStructuredBuffer _S13 = convertBuffer_0(_S10); + +#line 64 + computeMain_wrapper(_S9, _S12, _S13); + + printInitialValues_0(initialArray_0, int(4)); + bool _S14 = printOutputValues_0(_S9, _S10, int(4)); + + + return true; +} + +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* entryPointParams, void* globalParams) +{ + _computeMain(varyingInput, entryPointParams, globalParams); +} +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain_Group(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams) +{ + ComputeThreadVaryingInput threadInput = {}; + threadInput.groupID = varyingInput->startGroupID; + for (uint32_t x = 0; x < 4; ++x) + { + threadInput.groupThreadID.x = x; + _computeMain(&threadInput, entryPointParams, globalParams); + } +} +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams) +{ + ComputeVaryingInput vi = *varyingInput; + ComputeVaryingInput groupVaryingInput = {}; + for (uint32_t z = vi.startGroupID.z; z < vi.endGroupID.z; ++z) + { + groupVaryingInput.startGroupID.z = z; + for (uint32_t y = vi.startGroupID.y; y < vi.endGroupID.y; ++y) + { + groupVaryingInput.startGroupID.y = y; + for (uint32_t x = vi.startGroupID.x; x < vi.endGroupID.x; ++x) + { + groupVaryingInput.startGroupID.x = x; + computeMain_Group(&groupVaryingInput, entryPointParams, globalParams); + } + } + } +} -- cgit v1.2.3