diff options
| author | David Siher <32305650+dsiher@users.noreply.github.com> | 2021-09-14 12:59:55 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-09-14 09:59:55 -0700 |
| commit | 502aa3812a82cf0d091cff0c67804e4ee448ac78 (patch) | |
| tree | 8ac8def3a30a6531cee7f6b0380d8929811fade5 /examples | |
| parent | d9d42879c4b6c0202732897ec60a355ccc91f243 (diff) | |
Bring heterogeneous-hello-world back up to date. (#1935)
* Bring heterogeneous-hello-world back up to date.
* Reintroduced heterogeneous-hello-world into the premake
* No longer uses compiled bytecode for entry point, instead a loadModule
call is hardocoded with the slang file name.
* Entry point is, similarly, hardcoded for now.
* Added a bypass to slang-legalize-types for an unneeded GPUForeach check
* Run premake and change to relative path
* Removed experimental and added README
Co-authored-by: Yong He <yonghe@outlook.com>
Diffstat (limited to 'examples')
| -rw-r--r-- | examples/experimental/heterogeneous-hello-world/main.cpp | 380 | ||||
| -rw-r--r-- | examples/experimental/heterogeneous-hello-world/shader.cpp | 197 | ||||
| -rw-r--r-- | examples/experimental/heterogeneous-hello-world/shader.slang | 65 | ||||
| -rw-r--r-- | examples/heterogeneous-hello-world/README.md (renamed from examples/experimental/heterogeneous-hello-world/README.md) | 0 | ||||
| -rw-r--r-- | examples/heterogeneous-hello-world/main.cpp | 335 | ||||
| -rw-r--r-- | examples/heterogeneous-hello-world/shader.cpp | 215 | ||||
| -rw-r--r-- | examples/heterogeneous-hello-world/shader.slang | 71 |
7 files changed, 621 insertions, 642 deletions
diff --git a/examples/experimental/heterogeneous-hello-world/main.cpp b/examples/experimental/heterogeneous-hello-world/main.cpp deleted file mode 100644 index 372fcd615..000000000 --- a/examples/experimental/heterogeneous-hello-world/main.cpp +++ /dev/null @@ -1,380 +0,0 @@ -// This example is out of date and currently disabled from build. -// The `gfx` layer has been refactored with a new command list based -// model. The example must be updated to use the new `gfx` interface -// before it can be included in build. - -#if 0 -// main.cpp - -// This file implements an extremely simple example of loading and -// executing a Slang shader program. This is primarily an example -// of how to use Slang as a "drop-in" replacement for an existing -// HLSL compiler like the `D3DCompile` API. More advanced usage -// of advanced Slang language and API features is left to the -// next example. -// -// The comments in the file will attempt to explain concepts as -// they are introduced. -// -// Of course, in order to use the Slang API, we need to include -// its header. We have set up the build options for this project -// so that it is as simple as: -// -#include <slang.h> -// -// Other build setups are possible, and Slang doesn't assume that -// its include directory must be added to your global include -// path. - -// For the purposes of keeping the demo code as simple as possible, -// while still retaining some level of portability, our examples -// make use of a small platform and graphics API abstraction layer, -// which is included in the Slang source distribution under the -// `tools/` directory. -// -// Applications can of course use Slang without ever touching this -// abstraction layer, so we will not focus on it when explaining -// examples, except in places where best practices for interacting -// with Slang may depend on an application/engine making certain -// design choices in their abstraction layer. -// -#include "slang-com-ptr.h" -#include "slang-gfx.h" -#include "tools/graphics-app-framework/window.h" -#include "../../prelude/slang-cpp-types.h" -#include "source/core/slang-basic.h" - -using namespace gfx; - -// We create global ref pointers to avoid dereferencing values -// -ComPtr<gfx::IShaderProgram> gShaderProgram; -Slang::ComPtr<gfx::IRenderer> gRenderer; - -ComPtr<gfx::IBufferResource> gStructuredBuffer; - -ComPtr<gfx::IPipelineLayout> gPipelineLayout; -ComPtr<gfx::IPipelineState> gPipelineState; -ComPtr<gfx::IDescriptorSetLayout> gDescriptorSetLayout; -ComPtr<gfx::IDescriptorSet> gDescriptorSet; - -// Boilerplate types to help the slan-generated file -// -struct gfx_Window_0; -struct gfx_Renderer_0; -struct gfx_BufferResource_0; -struct gfx_ShaderProgram_0; -struct gfx_DescriptorSetLayout_0; -struct gfx_PipelineLayout_0; -struct gfx_DescriptorSet_0; -struct gfx_PipelineState_0; - -bool executeComputation_0(); -extern unsigned char __computeMain[]; -extern size_t __computeMainSize; - -gfx::IShaderProgram* loadShaderProgram(gfx::IRenderer* renderer, unsigned char computeCode[], size_t computeCodeSize) -{ - // We extract the begin/end pointers to the output code buffers directly - // - char unsigned const* computeCodeEnd = computeCode + computeCodeSize; - - // Now we use the operations of the example graphics API abstraction - // layer to load shader code into the underlying API. - // - // Reminder: this section does not involve the Slang API at all. - // - - gfx::IShaderProgram::KernelDesc kernelDescs[] = - { - { gfx::StageType::Compute, computeCode, computeCodeEnd }, - }; - - gfx::IShaderProgram::Desc programDesc = {}; - programDesc.pipelineType = gfx::PipelineType::Compute; - programDesc.kernels = &kernelDescs[0]; - programDesc.kernelCount = 1; - - gShaderProgram = renderer->createProgram(programDesc); - - return gShaderProgram; -} - -// Now that we've covered the function that actually loads and -// compiles our Slang shade code, we can go through the rest -// of the application code without as much commentary. -// -gfx::Window* createWindow(int windowWidth, int windowHeight) -{ - // Create a window for our application to render into. - // - WindowDesc windowDesc; - windowDesc.title = "Hello, World!"; - windowDesc.width = windowWidth; - windowDesc.height = windowHeight; - return createWindow(windowDesc); - //return globalWindow; -} - -gfx::IRenderer* createRenderer( - int windowWidth, - int windowHeight, - gfx::Window* window) -{ - // Initialize the rendering layer. - // - // Note: for now we are hard-coding logic to use the - // Direct3D11 back-end for the graphics API abstraction. - // A future version of this example may support multiple - // platforms/APIs. - // - IRenderer::Desc rendererDesc = {}; - rendererDesc.rendererType = gfx::RendererType::DirectX11; - Result res = gfxCreateRenderer(&rendererDesc, gRenderer.writeRef()); - - if (SLANG_FAILED(res)) return nullptr; - return gRenderer; -} - -gfx::IBufferResource* createStructuredBuffer(gfx::IRenderer* renderer, float* initialArray) -{ - // Create a structured buffer for storing the data for computation - // - int structuredBufferSize = 4 * sizeof(float); - - IBufferResource::Desc structuredBufferDesc; - structuredBufferDesc.init(structuredBufferSize); - structuredBufferDesc.setDefaults(IResource::Usage::UnorderedAccess); - structuredBufferDesc.elementSize = 4; - structuredBufferDesc.cpuAccessFlags = IResource::AccessFlag::Read; - - gStructuredBuffer = renderer->createBufferResource( - IResource::Usage::UnorderedAccess, - structuredBufferDesc, - initialArray); - return gStructuredBuffer; -} - -gfx::IDescriptorSetLayout* buildDescriptorSetLayout(gfx::IRenderer* renderer) -{ - // Our example graphics API usess a "modern" D3D12/Vulkan style - // of resource binding, so now we will dive into describing and - // allocating "descriptor sets." - // - // First, we need to construct a descriptor set *layout*. - // - IDescriptorSetLayout::SlotRangeDesc slotRanges[] = - { - IDescriptorSetLayout::SlotRangeDesc(DescriptorSlotType::StorageBuffer), - }; - IDescriptorSetLayout::Desc descriptorSetLayoutDesc; - descriptorSetLayoutDesc.slotRangeCount = 1; - descriptorSetLayoutDesc.slotRanges = &slotRanges[0]; - gDescriptorSetLayout = renderer->createDescriptorSetLayout(descriptorSetLayoutDesc); - return gDescriptorSetLayout; -} - -gfx::IPipelineLayout* buildPipeline(gfx::IRenderer* renderer, gfx::IDescriptorSetLayout* descriptorSetLayout) -{ - // Next we will allocate a pipeline layout, which specifies - // that we will render with only a single descriptor set bound. - // - - IPipelineLayout::DescriptorSetDesc descriptorSets[] = - { - IPipelineLayout::DescriptorSetDesc(descriptorSetLayout), - }; - IPipelineLayout::Desc pipelineLayoutDesc; - pipelineLayoutDesc.renderTargetCount = 1; - pipelineLayoutDesc.descriptorSetCount = 1; - pipelineLayoutDesc.descriptorSets = &descriptorSets[0]; - gPipelineLayout = renderer->createPipelineLayout(pipelineLayoutDesc); - - return gPipelineLayout; -} - -gfx::IDescriptorSet* buildDescriptorSet( - gfx::IRenderer* renderer, - gfx::IDescriptorSetLayout* descriptorSetLayout, - gfx::IBufferResource* structuredBuffer) -{ - // Once we have the descriptor set layout, we can allocate - // and fill in a descriptor set to hold our parameters. - // - gDescriptorSet = renderer->createDescriptorSet(descriptorSetLayout, gfx::IDescriptorSet::Flag::Transient); - if(!gDescriptorSet) return nullptr; - - // Once we have the bufferResource created, we can fill in - // a descriptor set for creating a structured buffer - // - IResourceView::Desc resourceViewDesc; - resourceViewDesc.type = IResourceView::Type::UnorderedAccess; - auto resourceView = renderer->createBufferView(structuredBuffer, resourceViewDesc); - gDescriptorSet->setResource(0, 0, resourceView); - - return gDescriptorSet; -} - -gfx::IPipelineState* buildPipelineState( - gfx::IShaderProgram* shaderProgram, - gfx::IRenderer* renderer, - gfx::IPipelineLayout* pipelineLayout) -{ - // Following the D3D12/Vulkan style of API, we need a pipeline state object - // (PSO) to encapsulate the configuration of the overall graphics pipeline. - // - ComputePipelineStateDesc desc; - desc.pipelineLayout = pipelineLayout; - desc.program = shaderProgram; - gPipelineState = renderer->createComputePipelineState(desc); - return gPipelineState; -} - -void printInitialValues(float* initialArray, int length) -{ - // Print out the values before the computation - printf("Before:\n"); - for (int i = 0; i < length; i++) - { - printf("%f, ", initialArray[i]); - } - printf("\n"); -} - -void dispatchComputation( - gfx::ICommandQueue* gQueue, - gfx::IPipelineState* gPipelineState, - gfx::IPipelineLayout* gPipelineLayout, - gfx::IDescriptorSet* gDescriptorSet, - unsigned int gridDimsX, - unsigned int gridDimsY, - unsigned int gridDimsZ) -{ - auto cmdBuf = gQueue->createCommandBuffer(); - auto encoder = cmdBuf->encodeComputeCommands(); - encoder->setPipelineState(gPipelineState); - encoder->setDescriptorSet(PipelineType::Compute, gPipelineLayout, 0, gDescriptorSet); - encoder->dispatchCompute(gridDimsX, gridDimsY, gridDimsZ); - encoder->endEncoding(); - gQueue->executeCommandBuffer(cmdBuf); -} - -void print_output( - gfx::IRenderer* renderer, - gfx::IBufferResource* structuredBuffer, - int length) -{ - ComPtr<ISlangBlob> blob; - renderer->readBufferResource(structuredBuffer, 0, length * sizeof(float), blob.writeRef()); - if (float* outputData = (float*)blob->getBufferPointer()) - { - // Print out the values the the kernel produced - printf("After: \n"); - for (int i = 0; i < 4; i++) - { - printf("%f, ", outputData[i]); - } - printf("\n"); - } -} - -// Boilerplate functions to help the slang-generated file and types -gfx_Window_0* createWindow_0(int32_t _0, int32_t _1) -{ - return (gfx_Window_0*)createWindow(_0, _1); -} - -gfx_Renderer_0* createRenderer_0(int32_t _0, int32_t _1, gfx_Window_0* _2) -{ - return (gfx_Renderer_0*)createRenderer(_0, _1, (gfx::Window*)_2); -} - -gfx_BufferResource_0* createStructuredBuffer_0(gfx_Renderer_0* _0, FixedArray<float, 4> _1) -{ - return (gfx_BufferResource_0*)createStructuredBuffer((gfx::IRenderer*)_0, (float*)&_1); -} - -gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Renderer_0* _0, unsigned char _1[], size_t _2) -{ - return (gfx_ShaderProgram_0*)loadShaderProgram((gfx::IRenderer*)_0, _1, _2); -} - -gfx_DescriptorSetLayout_0* buildDescriptorSetLayout_0(gfx_Renderer_0* _0) -{ - return (gfx_DescriptorSetLayout_0*)buildDescriptorSetLayout((gfx::IRenderer*)_0); -} - -gfx_PipelineLayout_0* buildPipeline_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1) -{ - return (gfx_PipelineLayout_0*)buildPipeline((gfx::IRenderer*)_0, (gfx::IDescriptorSetLayout*)_1); -} - -gfx_DescriptorSet_0* buildDescriptorSet_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1, gfx_BufferResource_0* _2) -{ - return (gfx_DescriptorSet_0*)buildDescriptorSet( - (gfx::IRenderer*)_0, - (gfx::IDescriptorSetLayout*)_1, - (gfx::IBufferResource*)_2); -} - -gfx_PipelineState_0* buildPipelineState_0(gfx_ShaderProgram_0* _0, gfx_Renderer_0* _1, gfx_PipelineLayout_0* _2) -{ - return (gfx_PipelineState_0*)buildPipelineState( - (gfx::IShaderProgram*)_0, (gfx::IRenderer*)_1, - (gfx::IPipelineLayout*)_2); -} - -void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1) -{ - printInitialValues((float*)&_0, _1); -} - -void dispatchComputation_0(gfx_CommandQueue_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3, unsigned int gridDimsX, unsigned int gridDimsY, unsigned int gridDimsZ) -{ - dispatchComputation( - (gfx::ICommandQueue*)_0, - (gfx::IPipelineState*)_1, - (gfx::IPipelineLayout*)_2, - (gfx::IDescriptorSet*)_3, - gridDimsX, - gridDimsY, - gridDimsZ); -} - -RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0) { - RWStructuredBuffer<float> result; - result.data = (float*)_0; - return result; -} - -gfx_BufferResource_0* unconvertBuffer_0(RWStructuredBuffer<float> _0) { - return (gfx_BufferResource_0*)(_0.data); -} - -void print_output_0(gfx_CommandQueue_0* _0, gfx_BufferResource_0* _1, int32_t _2) -{ - print_output((gfx::ICommandQueue*)_0, (gfx::IBufferResource*)_1, _2); -} - -// This "inner" main function is used by the platform abstraction -// layer to deal with differences in how an entry point needs -// to be defined for different platforms. -// -void innerMain(ApplicationContext* context) -{ - // We construct an instance of our example application - // `struct` type, and then walk through the lifecyle - // of the application. - - if (!(executeComputation_0())) - { - return exitApplication(context, 1); - } -} - -// This macro instantiates an appropriate main function to -// invoke the `innerMain` above. -// -GFX_CONSOLE_MAIN(innerMain) - -#endif diff --git a/examples/experimental/heterogeneous-hello-world/shader.cpp b/examples/experimental/heterogeneous-hello-world/shader.cpp deleted file mode 100644 index 5a8dd7815..000000000 --- a/examples/experimental/heterogeneous-hello-world/shader.cpp +++ /dev/null @@ -1,197 +0,0 @@ -#if 0 -#include "../../prelude/slang-cpp-prelude.h" - - -#ifdef SLANG_PRELUDE_NAMESPACE -using namespace SLANG_PRELUDE_NAMESPACE; -#endif - -Vector<uint32_t, 3> operator*(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b) -{ - Vector<uint32_t, 3> r; - r.x = a.x * b.x; - r.y = a.y * b.y; - r.z = a.z * b.z; - return r; -} - -Vector<uint32_t, 3> operator+(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b) -{ - Vector<uint32_t, 3> r; - r.x = a.x + b.x; - r.y = a.y + b.y; - r.z = a.z + b.z; - return r; -} - -Vector<uint32_t, 3> make_VecU3(uint32_t a, uint32_t b, uint32_t c) -{ - return Vector<uint32_t, 3>{ a, b, c}; -} - -size_t __computeMainSize = 668; -unsigned char __computeMain[] = {68, 88, 66, 67, 87, 111, 81, 164, 2, 29, 72, 42, 151, 28, 13, 217, 55, 37, 7, 95, 1, 0, 0, 0, 156, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 8, 1, 0, 0, 24, 1, 0, 0, 40, 1, 0, 0, 32, 2, 0, 0, 82, 68, 69, 70, 204, 0, 0, 0, 1, 0, 0, 0, 88, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, 9, 16, 0, 164, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 101, 110, 116, 114, 121, 80, 111, 105, 110, 116, 80, 97, 114, 97, 109, 115, 95, 105, 111, 66, 117, 102, 102, 101, 114, 95, 48, 0, 60, 0, 0, 0, 1, 0, 0, 0, 112, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 148, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, 101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, 0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, 0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -void computeMain_wrapper(gfx_Renderer_0* renderer, Vector<uint32_t, 3> gridDims, - RWStructuredBuffer<float> buffer) -{ - gfx_ShaderProgram_0* shaderProgram = loadShaderProgram_0(renderer, __computeMain, __computeMainSize); - gfx_DescriptorSetLayout_0* setLayout = buildDescriptorSetLayout_0(renderer); - gfx_PipelineLayout_0* pipelineLayout = buildPipeline_0(renderer, setLayout); - gfx_DescriptorSet_0* descriptorSet = buildDescriptorSet_0(renderer, setLayout, unconvertBuffer_0(buffer)); - gfx_PipelineState_0* pipelineState = buildPipelineState_0(shaderProgram, renderer, pipelineLayout); - dispatchComputation_0(renderer, pipelineState, pipelineLayout, descriptorSet, gridDims.x, gridDims.y, gridDims.z); -} - -#line 7 "../../examples/heterogeneous-hello-world/shader.slang" -struct EntryPointParams_0 -{ - RWStructuredBuffer<float> ioBuffer_0; -}; - -struct KernelContext_0 -{ -}; - - -#line 21 -struct gfx_Window_0 -{ -}; - - -#line 22 -struct gfx_Renderer_0 -{ -}; - - -#line 23 -struct gfx_BufferResource_0 -{ -}; - - -#line 7 -void _computeMain(void* _S1, void* entryPointParams_0, void* _S2) -{ - ComputeThreadVaryingInput* _S3 = ((ComputeThreadVaryingInput*)(_S1)); - KernelContext_0 kernelContext_0; - -#line 9 - uint32_t tid_0 = (*(&_S3->groupID) * make_VecU3(4U, 1U, 1U) + *(&_S3->groupThreadID)).x; - - float* _S4 = &(*(&((EntryPointParams_0*)(entryPointParams_0))->ioBuffer_0))[tid_0]; - -#line 11 - float i_0 = *_S4; - bool _S5 = i_0 < 0.50000000000000000000f; - -#line 12 - float _S6 = i_0 + i_0; - -#line 12 - float _S7 = (F32_sqrt((i_0))); - -#line 12 - float o_0 = _S5 ? _S6 : _S7; - - float* _S8 = &(*(&((EntryPointParams_0*)(entryPointParams_0))->ioBuffer_0))[tid_0]; - -#line 14 - *_S8 = o_0; - -#line 7 - return; -} - - -#line 34 -gfx_Window_0* createWindow_0(int32_t _0, int32_t _1); - - -#line 35 -gfx_Renderer_0* createRenderer_0(int32_t _0, int32_t _1, gfx_Window_0* _2); - - - -gfx_BufferResource_0* createStructuredBuffer_0(gfx_Renderer_0* _0, FixedArray<float, 4> _1); - - -#line 4 -RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0); - - -#line 40 -void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1); - - -#line 41 -void print_output_0(gfx_Renderer_0* _0, gfx_BufferResource_0* _1, int32_t _2); - - - - -bool executeComputation_0() -{ - - - - FixedArray<float, 4> initialArray_0 = { 3.00000000000000000000f, -20.00000000000000000000f, -6.00000000000000000000f, 8.00000000000000000000f }; - - - gfx_Window_0* _S9 = createWindow_0(int(1024), int(768)); - gfx_Renderer_0* _S10 = createRenderer_0(int(1024), int(768), _S9); - gfx_CommandQueue_0* _ - gfx_BufferResource_0* _S11 = createStructuredBuffer_0(_S10, initialArray_0); - Vector<uint32_t, 3> _S12 = make_VecU3(uint32_t(int(4)), uint32_t(int(1)), uint32_t(int(1))); - RWStructuredBuffer<float> _S13 = convertBuffer_0(_S11); - -#line 57 - computeMain_wrapper(_S10, _S12, _S13); - - printInitialValues_0(initialArray_0, int(4)); - print_output_0(_S10, _S11, int(4)); - - - return true; -} - -// [numthreads(4, 1, 1)] -SLANG_PRELUDE_EXPORT -void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* entryPointParams, void* globalParams) -{ - _computeMain(varyingInput, entryPointParams, globalParams); -} -// [numthreads(4, 1, 1)] -SLANG_PRELUDE_EXPORT -void computeMain_Group(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams) -{ - ComputeThreadVaryingInput threadInput = {}; - threadInput.groupID = varyingInput->startGroupID; - for (uint32_t x = 0; x < 4; ++x) - { - threadInput.groupThreadID.x = x; - _computeMain(&threadInput, entryPointParams, globalParams); - } -} -// [numthreads(4, 1, 1)] -SLANG_PRELUDE_EXPORT -void computeMain(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams) -{ - ComputeVaryingInput vi = *varyingInput; - ComputeVaryingInput groupVaryingInput = {}; - for (uint32_t z = vi.startGroupID.z; z < vi.endGroupID.z; ++z) - { - groupVaryingInput.startGroupID.z = z; - for (uint32_t y = vi.startGroupID.y; y < vi.endGroupID.y; ++y) - { - groupVaryingInput.startGroupID.y = y; - for (uint32_t x = vi.startGroupID.x; x < vi.endGroupID.x; ++x) - { - groupVaryingInput.startGroupID.x = x; - computeMain_Group(&groupVaryingInput, entryPointParams, globalParams); - } - } - } -} -#endif diff --git a/examples/experimental/heterogeneous-hello-world/shader.slang b/examples/experimental/heterogeneous-hello-world/shader.slang deleted file mode 100644 index 47c883b39..000000000 --- a/examples/experimental/heterogeneous-hello-world/shader.slang +++ /dev/null @@ -1,65 +0,0 @@ -// shader.slang - -//TEST_INPUT:ubuffer(random(float, 4096, -1.0, 1.0), stride=4):name=ioBuffer -RWStructuredBuffer<float> convertBuffer(Ptr<gfx::BufferResource> x); - -[shader("compute")] -[numthreads(4, 1, 1)] -void computeMain(uniform RWStructuredBuffer<float> ioBuffer, uint3 dispatchThreadID : SV_DispatchThreadID) -{ - uint tid = dispatchThreadID.x; - - float i = ioBuffer[tid]; - float o = i < 0.5 ? (i + i) : sqrt(i); - - ioBuffer[tid] = o; -} - -// Forward declarations of gfx types -// -namespace gfx { - struct ApplicationContext{}; - struct Window{}; - struct Renderer{}; - struct BufferResource{}; - struct PipelineLayout{}; - struct PipelineState{}; - struct DescriptorSetLayout{}; - struct DescriptorSet{}; - struct ShaderProgram{}; -} - -// Forward declarations of cpp functions -// -Ptr<gfx::ShaderProgram> loadShaderProgram(Ptr<gfx::Renderer> renderer); -Ptr<gfx::Window> createWindow(int gWindowWidth, int gWindowHeight); -Ptr<gfx::Renderer> createRenderer( - int gWindowWidth, - int gWindowHeight, - Ptr<gfx::Window> gWindow); -Ptr<gfx::BufferResource> createStructuredBuffer(Ptr<gfx::Renderer> gRenderer, float[4] initialArray); -void printInitialValues(float[4] initialArray, int length); -void print_output( - Ptr<gfx::Renderer> gRenderer, - Ptr<gfx::BufferResource> gStructuredBuffer, - int length); - -public bool executeComputation() { - // We will hard-code the size of our rendering window and initial array. - // - int windowWidth = 1024; - int windowHeight = 768; - float initialArray[4] = { 3.0f, -20.0f, -6.0f, 8.0f }; - - // Declare functions - let window = createWindow(windowWidth, windowHeight); - let renderer = createRenderer(windowWidth, windowHeight, window); - let structuredBuffer = createStructuredBuffer(renderer, initialArray); - __GPU_FOREACH(renderer, uint3(4, 1, 1), LAMBDA(uint3 dispatchThreadID) - { computeMain(convertBuffer(structuredBuffer), dispatchThreadID) ; }); - printInitialValues(initialArray, 4); - print_output(renderer, structuredBuffer, 4); - - - return true; -} diff --git a/examples/experimental/heterogeneous-hello-world/README.md b/examples/heterogeneous-hello-world/README.md index 709652922..709652922 100644 --- a/examples/experimental/heterogeneous-hello-world/README.md +++ b/examples/heterogeneous-hello-world/README.md diff --git a/examples/heterogeneous-hello-world/main.cpp b/examples/heterogeneous-hello-world/main.cpp new file mode 100644 index 000000000..9e0bb8b0f --- /dev/null +++ b/examples/heterogeneous-hello-world/main.cpp @@ -0,0 +1,335 @@ +// main.cpp + +// This example uses the Slang gfx layer to target different APIs and execute +// both CPU and GPU code from a single Slang file (?) +// +#include <slang.h> +#include <slang-com-ptr.h> +using Slang::ComPtr; + +#include "slang-gfx.h" +#include "gfx-util/shader-cursor.h" +#include "source/core/slang-basic.h" +#include "../../prelude/slang-cpp-types.h" + +using namespace gfx; +using namespace Slang; + +// Creating global ref pointers to avoid dereferencing values +// +ComPtr<gfx::IDevice> gDevice; +ComPtr<gfx::IShaderProgram> gProgram; +ComPtr<gfx::IBufferResource> gBufferResource; +ComPtr<gfx::IResourceView> gResourceView; +ComPtr<gfx::ITransientResourceHeap> gTransientHeap; +ComPtr<gfx::IPipelineState> gPipelineState; +ComPtr<gfx::ICommandQueue> gQueue; + +// Boilerplate types to help the slang-generated file +// +struct gfx_Device_0; +struct gfx_BufferResource_0; +struct gfx_ShaderProgram_0; +struct gfx_ResourceView_0; +struct gfx_TransientResourceHeap_0; +struct gfx_PipelineState_0; +bool executeComputation_0(); + +// Many Slang API functions return detailed diagnostic information +// (error messages, warnings, etc.) as a "blob" of data, or return +// a null blob pointer instead if there were no issues. +// +// For convenience, we define a subroutine that will dump the information +// in a diagnostic blob if one is produced, and skip it otherwise. +// +void diagnoseIfNeeded(slang::IBlob *diagnosticsBlob) +{ + if (diagnosticsBlob != nullptr) + { + printf("%s", (const char *)diagnosticsBlob->getBufferPointer()); + } +} + +gfx::IDevice* createDevice() +{ + ComPtr<gfx::IDevice> device; + IDevice::Desc deviceDesc = {}; + // Changing device type would happen here. For example: + //deviceDesc.deviceType = DeviceType::CUDA; + SLANG_RETURN_NULL_ON_FAIL(gfxCreateDevice(&deviceDesc, gDevice.writeRef())); + return gDevice; +} + +// Loads the shader code defined in `shader.slang` for use by the `gfx` layer. +// +gfx::IShaderProgram* loadShaderProgram(gfx::IDevice *device) +{ + // We need to obtain a compilation session (`slang::ISession`) that will provide + // a scope to all the compilation and loading of code we do. + // + ComPtr<slang::ISession> slangSession; + SLANG_RETURN_NULL_ON_FAIL(device->getSlangSession(slangSession.writeRef())); + + // We can now start loading code into the slang session. + // + // The simplest way to load code is by calling `loadModule` with the name of a Slang + // module. A call to `loadModule("MyStuff")` will behave more or less as if you + // wrote: + // + // import MyStuff; + // + // In a Slang shader file. The compiler will use its search paths to try to locate + // `MyModule.slang`, then compile and load that file. If a matching module had + // already been loaded previously, that would be used directly. + // + ComPtr<slang::IBlob> diagnosticsBlob; + slang::IModule *module = slangSession->loadModule("shader", diagnosticsBlob.writeRef()); + diagnoseIfNeeded(diagnosticsBlob); + if (!module) + return NULL; + + // Look up entry point (hardcoded for now) + // + char const *computeEntryPointName = "computeMain"; + ComPtr<slang::IEntryPoint> computeEntryPoint; + SLANG_RETURN_NULL_ON_FAIL( + module->findEntryPointByName(computeEntryPointName, computeEntryPoint.writeRef())); + + // At this point we have a few different Slang API objects that represent + // pieces of our code: `module`, `vertexEntryPoint`, and `fragmentEntryPoint`. + // + // A single Slang module could contain many different entry points (e.g., + // four vertex entry points, three fragment entry points, and two compute + // shaders), and before we try to generate output code for our target API + // we need to identify which entry points we plan to use together. + // + // Modules and entry points are both examples of *component types* in the + // Slang API. The API also provides a way to build a *composite* out of + // other pieces, and that is what we are going to do with our module + // and entry points. + // + Slang::List<slang::IComponentType *> componentTypes; + componentTypes.add(module); + componentTypes.add(computeEntryPoint); + + // Actually creating the composite component type is a single operation + // on the Slang session, but the operation could potentially fail if + // something about the composite was invalid (e.g., you are trying to + // combine multiple copies of the same module), so we need to deal + // with the possibility of diagnostic output. + // + ComPtr<slang::IComponentType> composedProgram; + SlangResult result = slangSession->createCompositeComponentType( + componentTypes.getBuffer(), + componentTypes.getCount(), + composedProgram.writeRef(), + diagnosticsBlob.writeRef()); + diagnoseIfNeeded(diagnosticsBlob); + SLANG_RETURN_NULL_ON_FAIL(result); + + // At this point, `composedProgram` represents the shader program + // we want to run, and the compute shader there have been checked. + // We can create a `gfx::IShaderProgram` object from `composedProgram` + // so it may be used by the graphics layer. + gfx::IShaderProgram::Desc programDesc = {}; + programDesc.pipelineType = gfx::PipelineType::Compute; + programDesc.slangProgram = composedProgram.get(); + + gProgram = device->createProgram(programDesc); + + return gProgram; +} + +gfx::IBufferResource* createStructuredBuffer( + gfx::IDevice *device, + float *initialData) +{ + // Create a structured buffer for storing computation data + // + const int numberCount = 4; + int structuredBufferSize = numberCount * sizeof(float); + + IBufferResource::Desc bufferDesc = {}; + bufferDesc.sizeInBytes = numberCount * sizeof(float); + bufferDesc.format = gfx::Format::Unknown; + bufferDesc.elementSize = sizeof(float); + bufferDesc.allowedStates = ResourceStateSet(ResourceState::ShaderResource, + ResourceState::UnorderedAccess, + ResourceState::CopyDestination, + ResourceState::CopySource); + bufferDesc.defaultState = ResourceState::UnorderedAccess; + bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read; + + SlangResult result = device->createBufferResource(bufferDesc, + (void *)initialData, + gBufferResource.writeRef()); + SLANG_RETURN_NULL_ON_FAIL(result); + return gBufferResource; +} + +gfx::IResourceView* createBufferView( + gfx::IDevice* device, + gfx::IBufferResource* buffer) +{ + // Create a resource view for the structured buffer + // + gfx::IResourceView::Desc viewDesc = {}; + viewDesc.type = gfx::IResourceView::Type::UnorderedAccess; + viewDesc.format = gfx::Format::Unknown; + SLANG_RETURN_NULL_ON_FAIL(device->createBufferView(buffer, viewDesc, gResourceView.writeRef())); + return gResourceView; +} + +gfx::ITransientResourceHeap* buildTransientHeap(gfx::IDevice *device) +{ + ITransientResourceHeap::Desc transientHeapDesc = {}; + transientHeapDesc.constantBufferSize = 4096; + SLANG_RETURN_NULL_ON_FAIL( + device->createTransientResourceHeap(transientHeapDesc, gTransientHeap.writeRef())); + return gTransientHeap; +} + +gfx::IPipelineState* buildPipelineState( + gfx::IDevice *device, + gfx::IShaderProgram* shaderProgram) +{ + gfx::ComputePipelineStateDesc pipelineDesc = {}; + pipelineDesc.program = shaderProgram; + SLANG_RETURN_NULL_ON_FAIL( + device->createComputePipelineState(pipelineDesc, gPipelineState.writeRef())); + return gPipelineState; +} + +void printInitialValues(float *initialArray, int length) +{ + printf("Before:\n"); + for (int i = 0; i < length; i++) + { + printf("%f, ", initialArray[i]); + } + printf("\n"); +} + +void dispatchComputation( + gfx::IDevice* device, + gfx::ITransientResourceHeap* transientHeap, + gfx::IPipelineState* pipelineState, + gfx::IResourceView* bufferView, + unsigned int gridDimsX, + unsigned int gridDimsY, + unsigned int gridDimsZ) +{ + ICommandQueue::Desc queueDesc = {ICommandQueue::QueueType::Graphics}; + gQueue = device->createCommandQueue(queueDesc); + + auto commandBuffer = transientHeap->createCommandBuffer(); + auto encoder = commandBuffer->encodeComputeCommands(); + + // First, obtain a root shader object from command encoder to start parameter binding. + auto rootObject = encoder->bindPipeline(pipelineState); + + gfx::ShaderCursor entryPointCursor( + rootObject->getEntryPoint(0)); // get a cursor the the first entry-point. + // Bind buffer view to the entry point. + entryPointCursor.getPath("ioBuffer").setResource(bufferView); + + encoder->dispatchCompute(gridDimsX, gridDimsY, gridDimsZ); + encoder->endEncoding(); + commandBuffer->close(); + gQueue->executeCommandBuffer(commandBuffer); + gQueue->wait(); +} + +bool printOutputValues( + gfx::IDevice *device, + gfx::IBufferResource *buffer, + int length) +{ + ComPtr<ISlangBlob> resultBlob; + SLANG_RETURN_FALSE_ON_FAIL(device->readBufferResource( + buffer, 0, length * sizeof(float), resultBlob.writeRef())); + auto result = reinterpret_cast<const float *>(resultBlob->getBufferPointer()); + printf("After: \n"); + for (int i = 0; i < length; i++) + { + printf("%f, ", result[i]); + } + printf("\n"); + return true; +} + +// Boilerplate functions to help the slang-generated file and types + +gfx_Device_0* createDevice_0() +{ + return (gfx_Device_0*)createDevice(); +} + +gfx_BufferResource_0* createStructuredBuffer_0(gfx_Device_0* _0, FixedArray<float, 4> _1) +{ + return (gfx_BufferResource_0*)createStructuredBuffer((gfx::IDevice*)_0, (float*)&_1); +} + +gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Device_0* _0) +{ + return (gfx_ShaderProgram_0*)loadShaderProgram((gfx::IDevice*)_0); +} + +gfx_ResourceView_0* createBufferView_0(gfx_Device_0* _0, gfx_BufferResource_0* _1) +{ + return (gfx_ResourceView_0*)createBufferView((gfx::IDevice*)_0, (gfx::IBufferResource*)_1); +} + +gfx_TransientResourceHeap_0* buildTransientHeap_0(gfx_Device_0* _0) +{ + return (gfx_TransientResourceHeap_0*)buildTransientHeap((gfx::IDevice*)_0); +} + +gfx_PipelineState_0* buildPipelineState_0(gfx_Device_0* _0, gfx_ShaderProgram_0* _1) +{ + return (gfx_PipelineState_0*)buildPipelineState((gfx::IDevice*)_0, (gfx::IShaderProgram*)_1); +} + +void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1) +{ + printInitialValues((float*)&_0, _1); +} + +void dispatchComputation_0(gfx_Device_0* _0, gfx_TransientResourceHeap_0* _1, gfx_PipelineState_0* _2, gfx_ResourceView_0* _3, unsigned int gridDimsX, unsigned int gridDimsY, unsigned int gridDimsZ) +{ + dispatchComputation( + (gfx::IDevice*)_0, + (gfx::ITransientResourceHeap*)_1, + (gfx::IPipelineState*)_2, + (gfx::IResourceView*)_3, + gridDimsX, + gridDimsY, + gridDimsZ); +} + +RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0) { + RWStructuredBuffer<float> result; + result.data = (float*)_0; + return result; +} + +gfx_BufferResource_0* unconvertBuffer_0(RWStructuredBuffer<float> _0) { + return (gfx_BufferResource_0*)(_0.data); +} + +bool printOutputValues_0(gfx_Device_0* _0, gfx_BufferResource_0* _1, int32_t _2) +{ + return printOutputValues((gfx::IDevice*)_0, (gfx::IBufferResource*)_1, _2); +} + +int main() +{ + // We construct an instance of our example application + // `struct` type, and then walk through the lifecyle + // of the application. + + if (!(executeComputation_0())) + { + return -1; + } +} diff --git a/examples/heterogeneous-hello-world/shader.cpp b/examples/heterogeneous-hello-world/shader.cpp new file mode 100644 index 000000000..0c0c24ebc --- /dev/null +++ b/examples/heterogeneous-hello-world/shader.cpp @@ -0,0 +1,215 @@ +#include "../../prelude/slang-cpp-prelude.h" + + +#ifdef SLANG_PRELUDE_NAMESPACE +using namespace SLANG_PRELUDE_NAMESPACE; +#endif + +Vector<uint32_t, 3> operator+(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b) +{ + Vector<uint32_t, 3> r; + r.x = a.x + b.x; + r.y = a.y + b.y; + r.z = a.z + b.z; + return r; +} + +Vector<uint32_t, 3> operator*(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b) +{ + Vector<uint32_t, 3> r; + r.x = a.x * b.x; + r.y = a.y * b.y; + r.z = a.z * b.z; + return r; +} + +Vector<uint32_t, 3> make_VecU3(uint32_t a, uint32_t b, uint32_t c) +{ + return Vector<uint32_t, 3>{ a, b, c}; +} + +size_t __computeMainSize = 668; +unsigned char __computeMain[] = {68, 88, 66, 67, 87, 111, 81, 164, 2, 29, 72, 42, 151, 28, 13, 217, 55, 37, 7, 95, 1, +0, 0, 0, 156, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 8, 1, 0, 0, 24, +1, 0, 0, 40, 1, 0, 0, 32, 2, 0, 0, 82, 68, 69, 70, 204, 0, 0, 0, 1, +0, 0, 0, 88, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, +9, 16, 0, 164, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, +0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 101, +110, 116, 114, 121, 80, 111, 105, 110, 116, 80, 97, 114, 97, 109, 115, 95, 105, 111, 66, 117, +102, 102, 101, 114, 95, 48, 0, 60, 0, 0, 0, 1, 0, 0, 0, 112, 0, 0, 0, 4, +0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 4, +0, 0, 0, 2, 0, 0, 0, 148, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, +101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, +76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, +46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, +83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, +0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, +224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, +0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, +0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, +64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, +0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, +0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, +0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, +0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, +0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, +0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, +0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, +0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, +0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, }; +void computeMain_wrapper(gfx_Device_0* device, Vector<uint32_t, 3> gridDims, + RWStructuredBuffer<float> buffer) +{ + gfx_ShaderProgram_0* shaderProgram = loadShaderProgram_0(device); + gfx_TransientResourceHeap_0* transientHeap = buildTransientHeap_0(device); + gfx_PipelineState_0* pipelineState = buildPipelineState_0(device, shaderProgram); + gfx_ResourceView_0* bufferView = createBufferView_0(device, unconvertBuffer_0(buffer)); + dispatchComputation_0(device, transientHeap, pipelineState, bufferView, gridDims.x, gridDims.y, gridDims.z); +} + +#line 8 "../../../examples/heterogeneous-hello-world/shader.slang" +struct EntryPointParams_0 +{ + RWStructuredBuffer<float> ioBuffer_0; +}; + + +#line 21 +struct gfx_Device_0 +{ +}; + + +#line 22 +struct gfx_BufferResource_0 +{ +}; + + +#line 23 +struct gfx_ResourceView_0 +{ +}; + + +#line 8 +void _computeMain(void* _S1, void* entryPointParams_0, void* _S2) +{ + +#line 8 + ComputeThreadVaryingInput* _S3 = (slang_bit_cast<ComputeThreadVaryingInput*>(_S1)); + + uint32_t tid_0 = (*(&_S3->groupID) * make_VecU3(4U, 1U, 1U) + *(&_S3->groupThreadID)).x; + + float* _S4 = &(*(&(slang_bit_cast<EntryPointParams_0*>(entryPointParams_0))->ioBuffer_0))[tid_0]; + +#line 12 + float i_0 = *_S4; + bool _S5 = i_0 < 0.50000000000000000000f; + +#line 13 + float _S6 = i_0 + i_0; + +#line 13 + float _S7 = (F32_sqrt((i_0))); + +#line 13 + float o_0 = _S5 ? _S6 : _S7; + + float* _S8 = &(*(&(slang_bit_cast<EntryPointParams_0*>(entryPointParams_0))->ioBuffer_0))[tid_0]; + +#line 15 + *_S8 = o_0; + return; +} + + +#line 31 +gfx_Device_0* createDevice_0(); + +gfx_BufferResource_0* createStructuredBuffer_0(gfx_Device_0* _0, FixedArray<float, 4> _1); + + +gfx_ResourceView_0* createBufferView_0(gfx_Device_0* _0, gfx_BufferResource_0* _1); + + +#line 4 +RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0); + + +#line 44 +void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1); + + +#line 50 +bool printOutputValues_0(gfx_Device_0* _0, gfx_BufferResource_0* _1, int32_t _2); + + + + +bool executeComputation_0() +{ + + FixedArray<float, 4> initialArray_0 = { 3.00000000000000000000f, -20.00000000000000000000f, -6.00000000000000000000f, 8.00000000000000000000f }; + + + gfx_Device_0* _S9 = createDevice_0(); + gfx_BufferResource_0* _S10 = createStructuredBuffer_0(_S9, initialArray_0); + gfx_ResourceView_0* _S11 = createBufferView_0(_S9, _S10); + Vector<uint32_t, 3> _S12 = make_VecU3(uint32_t(int(4)), uint32_t(int(1)), uint32_t(int(1))); + RWStructuredBuffer<float> _S13 = convertBuffer_0(_S10); + +#line 64 + computeMain_wrapper(_S9, _S12, _S13); + + printInitialValues_0(initialArray_0, int(4)); + bool _S14 = printOutputValues_0(_S9, _S10, int(4)); + + + return true; +} + +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* entryPointParams, void* globalParams) +{ + _computeMain(varyingInput, entryPointParams, globalParams); +} +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain_Group(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams) +{ + ComputeThreadVaryingInput threadInput = {}; + threadInput.groupID = varyingInput->startGroupID; + for (uint32_t x = 0; x < 4; ++x) + { + threadInput.groupThreadID.x = x; + _computeMain(&threadInput, entryPointParams, globalParams); + } +} +// [numthreads(4, 1, 1)] +SLANG_PRELUDE_EXPORT +void computeMain(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams) +{ + ComputeVaryingInput vi = *varyingInput; + ComputeVaryingInput groupVaryingInput = {}; + for (uint32_t z = vi.startGroupID.z; z < vi.endGroupID.z; ++z) + { + groupVaryingInput.startGroupID.z = z; + for (uint32_t y = vi.startGroupID.y; y < vi.endGroupID.y; ++y) + { + groupVaryingInput.startGroupID.y = y; + for (uint32_t x = vi.startGroupID.x; x < vi.endGroupID.x; ++x) + { + groupVaryingInput.startGroupID.x = x; + computeMain_Group(&groupVaryingInput, entryPointParams, globalParams); + } + } + } +} diff --git a/examples/heterogeneous-hello-world/shader.slang b/examples/heterogeneous-hello-world/shader.slang new file mode 100644 index 000000000..b66640e3d --- /dev/null +++ b/examples/heterogeneous-hello-world/shader.slang @@ -0,0 +1,71 @@ +// shader.slang + +//TEST_INPUT:ubuffer(random(float, 4096, -1.0, 1.0), stride=4):name=ioBuffer +RWStructuredBuffer<float> convertBuffer(Ptr<gfx::BufferResource> x); + +[shader("compute")] +[numthreads(4, 1, 1)] +void computeMain(uniform RWStructuredBuffer<float> ioBuffer, uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint tid = dispatchThreadID.x; + + float i = ioBuffer[tid]; + float o = i < 0.5 ? (i + i) : sqrt(i); + + ioBuffer[tid] = o; +} + +// Forward declarations of gfx types +// +namespace gfx { + struct Device{}; + struct BufferResource{}; + struct ResourceView{}; + struct TransientResourceHeap{}; + struct PipelineState{}; + struct ShaderProgram{}; +} + +// Forward declarations of cpp functions +// +Ptr<gfx::Device> createDevice(); +Ptr<gfx::ShaderProgram> loadShaderProgram(Ptr<gfx::Device> device); +Ptr<gfx::BufferResource> createStructuredBuffer( + Ptr<gfx::Device> device, + float[4] initialData); +Ptr<gfx::ResourceView> createBufferView( + Ptr<gfx::Device> device, + Ptr<gfx::BufferResource> buffer); +Ptr<gfx::TransientResourceHeap> buildTransientHeap( + Ptr<gfx::Device> device); +Ptr<gfx::PipelineState> buildPipelineState( + Ptr<gfx::Device> device, + Ptr<gfx::ShaderProgram> shaderProgram); +void printInitialValues(float[4] initialArray, int length); +void dispatchComputation( + Ptr<gfx::Device> device, + Ptr<gfx::TransientResourceHeap> transientHeap, + Ptr<gfx::PipelineState> pipelineState, + Ptr<gfx::ResourceView> bufferView); +bool printOutputValues( + Ptr<gfx::Device> device, + Ptr<gfx::BufferResource> buffer, + int length); + +public bool executeComputation() { + // We will hard-code the size of our initial array. + // + float initialArray[4] = { 3.0f, -20.0f, -6.0f, 8.0f }; + + // Declare functions + let device = createDevice(); + let structuredBuffer = createStructuredBuffer(device, initialArray); + let bufferView = createBufferView(device, structuredBuffer); + __GPU_FOREACH(device, uint3(4, 1, 1), LAMBDA(uint3 dispatchThreadID) + { computeMain(convertBuffer(structuredBuffer), dispatchThreadID) ; }); + printInitialValues(initialArray, 4); + printOutputValues(device, structuredBuffer, 4); + + + return true; +} |
