Bring heterogeneous-hello-world back up to date. (#1935)

* Bring heterogeneous-hello-world back up to date. * Reintroduced heterogeneous-hello-world into the premake * No longer uses compiled bytecode for entry point, instead a loadModule call is hardocoded with the slang file name. * Entry point is, similarly, hardcoded for now. * Added a bypass to slang-legalize-types for an unneeded GPUForeach check * Run premake and change to relative path * Removed experimental and added README Co-authored-by: Yong He <yonghe@outlook.com>
author: David Siher <32305650+dsiher@users.noreply.github.com> 2021-09-14 12:59:55 -0400
committer: GitHub <noreply@github.com> 2021-09-14 09:59:55 -0700
commit: 502aa3812a82cf0d091cff0c67804e4ee448ac78 (patch)
tree: 8ac8def3a30a6531cee7f6b0380d8929811fade5 /examples
parent: d9d42879c4b6c0202732897ec60a355ccc91f243 (diff)
7 files changed, 621 insertions, 642 deletions
diff --git a/examples/experimental/heterogeneous-hello-world/main.cpp b/examples/experimental/heterogeneous-hello-world/main.cpp
deleted file mode 100644
index 372fcd615..000000000
--- a/examples/experimental/heterogeneous-hello-world/main.cpp
+++ /dev/null
@@ -1,380 +0,0 @@
-// This example is out of date and currently disabled from build.
-// The `gfx` layer has been refactored with a new command list based
-// model. The example must be updated to use the new `gfx` interface
-// before it can be included in build.
-
-#if 0
-// main.cpp
-
-// This file implements an extremely simple example of loading and
-// executing a Slang shader program. This is primarily an example
-// of how to use Slang as a "drop-in" replacement for an existing
-// HLSL compiler like the `D3DCompile` API. More advanced usage
-// of advanced Slang language and API features is left to the
-// next example.
-//
-// The comments in the file will attempt to explain concepts as
-// they are introduced.
-//
-// Of course, in order to use the Slang API, we need to include
-// its header. We have set up the build options for this project
-// so that it is as simple as:
-//
-#include <slang.h>
-//
-// Other build setups are possible, and Slang doesn't assume that
-// its include directory must be added to your global include
-// path.
-
-// For the purposes of keeping the demo code as simple as possible,
-// while still retaining some level of portability, our examples
-// make use of a small platform and graphics API abstraction layer,
-// which is included in the Slang source distribution under the
-// `tools/` directory.
-//
-// Applications can of course use Slang without ever touching this
-// abstraction layer, so we will not focus on it when explaining
-// examples, except in places where best practices for interacting
-// with Slang may depend on an application/engine making certain
-// design choices in their abstraction layer.
-//
-#include "slang-com-ptr.h"
-#include "slang-gfx.h"
-#include "tools/graphics-app-framework/window.h"
-#include "../../prelude/slang-cpp-types.h"
-#include "source/core/slang-basic.h"
-
-using namespace gfx;
-
-// We create global ref pointers to avoid dereferencing values
-//
-ComPtr<gfx::IShaderProgram>         gShaderProgram;
-Slang::ComPtr<gfx::IRenderer>      gRenderer;
-
-ComPtr<gfx::IBufferResource> gStructuredBuffer;
-
-ComPtr<gfx::IPipelineLayout> gPipelineLayout;
-ComPtr<gfx::IPipelineState> gPipelineState;
-ComPtr<gfx::IDescriptorSetLayout> gDescriptorSetLayout;
-ComPtr<gfx::IDescriptorSet> gDescriptorSet;
-
-// Boilerplate types to help the slan-generated file
-//
-struct gfx_Window_0;
-struct gfx_Renderer_0;
-struct gfx_BufferResource_0;
-struct gfx_ShaderProgram_0;
-struct gfx_DescriptorSetLayout_0;
-struct gfx_PipelineLayout_0;
-struct gfx_DescriptorSet_0;
-struct gfx_PipelineState_0;
-
-bool executeComputation_0();
-extern unsigned char __computeMain[];
-extern size_t __computeMainSize;
-
-gfx::IShaderProgram* loadShaderProgram(gfx::IRenderer* renderer, unsigned char computeCode[], size_t computeCodeSize)
-{
-    // We extract the begin/end pointers to the output code buffers directly
-    //
-    char unsigned const* computeCodeEnd = computeCode + computeCodeSize;
-
-    // Now we use the operations of the example graphics API abstraction
-    // layer to load shader code into the underlying API.
-    //
-    // Reminder: this section does not involve the Slang API at all.
-    //
-
-    gfx::IShaderProgram::KernelDesc kernelDescs[] =
-    {
-        { gfx::StageType::Compute,    computeCode,     computeCodeEnd },
-    };
-
-    gfx::IShaderProgram::Desc programDesc = {};
-    programDesc.pipelineType = gfx::PipelineType::Compute;
-    programDesc.kernels = &kernelDescs[0];
-    programDesc.kernelCount = 1;
-
-    gShaderProgram = renderer->createProgram(programDesc);
-
-    return gShaderProgram;
-}
-
-// Now that we've covered the function that actually loads and
-// compiles our Slang shade code, we can go through the rest
-// of the application code without as much commentary.
-//
-gfx::Window* createWindow(int windowWidth, int windowHeight)
-{
-    // Create a window for our application to render into.
-    //
-    WindowDesc windowDesc;
-    windowDesc.title = "Hello, World!";
-    windowDesc.width = windowWidth;
-    windowDesc.height = windowHeight;
-    return createWindow(windowDesc);
-    //return globalWindow;
-}
-
-gfx::IRenderer* createRenderer(
-    int windowWidth,
-    int windowHeight,
-    gfx::Window* window)
-{
-    // Initialize the rendering layer.
-    //
-    // Note: for now we are hard-coding logic to use the
-    // Direct3D11 back-end for the graphics API abstraction.
-    // A future version of this example may support multiple
-    // platforms/APIs.
-    //
-    IRenderer::Desc rendererDesc = {};
-    rendererDesc.rendererType = gfx::RendererType::DirectX11;
-    Result res = gfxCreateRenderer(&rendererDesc, gRenderer.writeRef());
-
-    if (SLANG_FAILED(res)) return nullptr;
-    return gRenderer;
-}
-
-gfx::IBufferResource* createStructuredBuffer(gfx::IRenderer* renderer, float* initialArray)
-{
-    // Create a structured buffer for storing the data for computation
-    //
-    int structuredBufferSize = 4 * sizeof(float);
-
-    IBufferResource::Desc structuredBufferDesc;
-    structuredBufferDesc.init(structuredBufferSize);
-    structuredBufferDesc.setDefaults(IResource::Usage::UnorderedAccess);
-    structuredBufferDesc.elementSize = 4;
-    structuredBufferDesc.cpuAccessFlags = IResource::AccessFlag::Read;
-
-    gStructuredBuffer = renderer->createBufferResource(
-        IResource::Usage::UnorderedAccess,
-        structuredBufferDesc,
-        initialArray);
-    return gStructuredBuffer;
-}
-
-gfx::IDescriptorSetLayout* buildDescriptorSetLayout(gfx::IRenderer* renderer)
-{
-    // Our example graphics API usess a "modern" D3D12/Vulkan style
-    // of resource binding, so now we will dive into describing and
-    // allocating "descriptor sets."
-    //
-    // First, we need to construct a descriptor set *layout*.
-    //
-    IDescriptorSetLayout::SlotRangeDesc slotRanges[] =
-    {
-        IDescriptorSetLayout::SlotRangeDesc(DescriptorSlotType::StorageBuffer),
-    };
-    IDescriptorSetLayout::Desc descriptorSetLayoutDesc;
-    descriptorSetLayoutDesc.slotRangeCount = 1;
-    descriptorSetLayoutDesc.slotRanges = &slotRanges[0];
-    gDescriptorSetLayout = renderer->createDescriptorSetLayout(descriptorSetLayoutDesc);
-    return gDescriptorSetLayout;
-}
-
-gfx::IPipelineLayout* buildPipeline(gfx::IRenderer* renderer, gfx::IDescriptorSetLayout* descriptorSetLayout)
-{
-    // Next we will allocate a pipeline layout, which specifies
-    // that we will render with only a single descriptor set bound.
-    //
-
-    IPipelineLayout::DescriptorSetDesc descriptorSets[] =
-    {
-        IPipelineLayout::DescriptorSetDesc(descriptorSetLayout),
-    };
-    IPipelineLayout::Desc pipelineLayoutDesc;
-    pipelineLayoutDesc.renderTargetCount = 1;
-    pipelineLayoutDesc.descriptorSetCount = 1;
-    pipelineLayoutDesc.descriptorSets = &descriptorSets[0];
-    gPipelineLayout = renderer->createPipelineLayout(pipelineLayoutDesc);
-
-    return gPipelineLayout;
-}
-
-gfx::IDescriptorSet* buildDescriptorSet(
-    gfx::IRenderer* renderer,
-    gfx::IDescriptorSetLayout* descriptorSetLayout,
-    gfx::IBufferResource* structuredBuffer)
-{
-    // Once we have the descriptor set layout, we can allocate
-    // and fill in a descriptor set to hold our parameters.
-    //
-    gDescriptorSet = renderer->createDescriptorSet(descriptorSetLayout, gfx::IDescriptorSet::Flag::Transient);
-    if(!gDescriptorSet) return nullptr;
-
-    // Once we have the bufferResource created, we can fill in
-    // a descriptor set for creating a structured buffer
-    //
-    IResourceView::Desc resourceViewDesc;
-    resourceViewDesc.type = IResourceView::Type::UnorderedAccess;
-    auto resourceView = renderer->createBufferView(structuredBuffer, resourceViewDesc);
-    gDescriptorSet->setResource(0, 0, resourceView);
-
-    return gDescriptorSet;
-}
-
-gfx::IPipelineState* buildPipelineState(
-    gfx::IShaderProgram* shaderProgram,
-    gfx::IRenderer* renderer,
-    gfx::IPipelineLayout* pipelineLayout)
-{
-    // Following the D3D12/Vulkan style of API, we need a pipeline state object
-    // (PSO) to encapsulate the configuration of the overall graphics pipeline.
-    //
-    ComputePipelineStateDesc desc;
-    desc.pipelineLayout = pipelineLayout;
-    desc.program = shaderProgram;
-    gPipelineState = renderer->createComputePipelineState(desc);
-    return gPipelineState;
-}
-
-void printInitialValues(float* initialArray, int length)
-{
-    // Print out the values before the computation
-    printf("Before:\n");
-    for (int i = 0; i < length; i++)
-    {
-        printf("%f, ", initialArray[i]);
-    }
-    printf("\n");
-}
-
-void dispatchComputation(
-    gfx::ICommandQueue* gQueue,
-    gfx::IPipelineState* gPipelineState,
-    gfx::IPipelineLayout* gPipelineLayout,
-    gfx::IDescriptorSet* gDescriptorSet,
-    unsigned int gridDimsX,
-    unsigned int gridDimsY,
-    unsigned int gridDimsZ)
-{
-    auto cmdBuf = gQueue->createCommandBuffer();
-    auto encoder = cmdBuf->encodeComputeCommands();
-    encoder->setPipelineState(gPipelineState);
-    encoder->setDescriptorSet(PipelineType::Compute, gPipelineLayout, 0, gDescriptorSet);
-    encoder->dispatchCompute(gridDimsX, gridDimsY, gridDimsZ);
-    encoder->endEncoding();
-    gQueue->executeCommandBuffer(cmdBuf);
-}
-
-void print_output(
-    gfx::IRenderer* renderer,
-    gfx::IBufferResource* structuredBuffer,
-    int length)
-{
-    ComPtr<ISlangBlob> blob;
-    renderer->readBufferResource(structuredBuffer, 0, length * sizeof(float), blob.writeRef());
-    if (float* outputData = (float*)blob->getBufferPointer())
-    {
-        // Print out the values the the kernel produced
-        printf("After: \n");
-        for (int i = 0; i < 4; i++)
-        {
-            printf("%f, ", outputData[i]);
-        }
-        printf("\n");
-    }
-}
-
-// Boilerplate functions to help the slang-generated file and types
-gfx_Window_0* createWindow_0(int32_t _0, int32_t _1)
-{
-    return (gfx_Window_0*)createWindow(_0, _1);
-}
-
-gfx_Renderer_0* createRenderer_0(int32_t _0, int32_t _1, gfx_Window_0* _2)
-{
-    return (gfx_Renderer_0*)createRenderer(_0, _1, (gfx::Window*)_2);
-}
-
-gfx_BufferResource_0* createStructuredBuffer_0(gfx_Renderer_0* _0, FixedArray<float, 4> _1)
-{
-    return (gfx_BufferResource_0*)createStructuredBuffer((gfx::IRenderer*)_0, (float*)&_1);
-}
-
-gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Renderer_0* _0, unsigned char _1[], size_t _2)
-{
-    return (gfx_ShaderProgram_0*)loadShaderProgram((gfx::IRenderer*)_0, _1, _2);
-}
-
-gfx_DescriptorSetLayout_0* buildDescriptorSetLayout_0(gfx_Renderer_0* _0)
-{
-    return (gfx_DescriptorSetLayout_0*)buildDescriptorSetLayout((gfx::IRenderer*)_0);
-}
-
-gfx_PipelineLayout_0* buildPipeline_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1)
-{
-    return (gfx_PipelineLayout_0*)buildPipeline((gfx::IRenderer*)_0, (gfx::IDescriptorSetLayout*)_1);
-}
-
-gfx_DescriptorSet_0* buildDescriptorSet_0(gfx_Renderer_0* _0, gfx_DescriptorSetLayout_0* _1, gfx_BufferResource_0* _2)
-{
-    return (gfx_DescriptorSet_0*)buildDescriptorSet(
-        (gfx::IRenderer*)_0,
-        (gfx::IDescriptorSetLayout*)_1,
-        (gfx::IBufferResource*)_2);
-}
-
-gfx_PipelineState_0* buildPipelineState_0(gfx_ShaderProgram_0* _0, gfx_Renderer_0* _1, gfx_PipelineLayout_0* _2)
-{
-    return (gfx_PipelineState_0*)buildPipelineState(
-        (gfx::IShaderProgram*)_0, (gfx::IRenderer*)_1,
-        (gfx::IPipelineLayout*)_2);
-}
-
-void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1)
-{
-    printInitialValues((float*)&_0, _1);
-}
-
-void dispatchComputation_0(gfx_CommandQueue_0* _0, gfx_PipelineState_0* _1, gfx_PipelineLayout_0* _2, gfx_DescriptorSet_0* _3, unsigned int gridDimsX, unsigned int gridDimsY, unsigned int gridDimsZ)
-{
-    dispatchComputation(
-        (gfx::ICommandQueue*)_0,
-        (gfx::IPipelineState*)_1,
-        (gfx::IPipelineLayout*)_2,
-        (gfx::IDescriptorSet*)_3,
-        gridDimsX,
-        gridDimsY,
-        gridDimsZ);
-}
-
-RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0) {
-    RWStructuredBuffer<float> result;
-    result.data = (float*)_0;
-    return result;
-}
-
-gfx_BufferResource_0* unconvertBuffer_0(RWStructuredBuffer<float> _0) {
-    return (gfx_BufferResource_0*)(_0.data);
-}
-
-void print_output_0(gfx_CommandQueue_0* _0, gfx_BufferResource_0* _1, int32_t _2)
-{
-    print_output((gfx::ICommandQueue*)_0, (gfx::IBufferResource*)_1, _2);
-}
-
-// This "inner" main function is used by the platform abstraction
-// layer to deal with differences in how an entry point needs
-// to be defined for different platforms.
-//
-void innerMain(ApplicationContext* context)
-{
-    // We construct an instance of our example application
-    // `struct` type, and then walk through the lifecyle
-    // of the application.
-
-    if (!(executeComputation_0()))
-    {
-        return exitApplication(context, 1);
-    }
-}
-
-// This macro instantiates an appropriate main function to
-// invoke the `innerMain` above.
-//
-GFX_CONSOLE_MAIN(innerMain)
-
-#endif
diff --git a/examples/experimental/heterogeneous-hello-world/shader.cpp b/examples/experimental/heterogeneous-hello-world/shader.cpp
deleted file mode 100644
index 5a8dd7815..000000000
--- a/examples/experimental/heterogeneous-hello-world/shader.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-#if 0
-#include "../../prelude/slang-cpp-prelude.h"
-
-
-#ifdef SLANG_PRELUDE_NAMESPACE
-using namespace SLANG_PRELUDE_NAMESPACE;
-#endif
-
-Vector<uint32_t, 3> operator*(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b)
-{
-    Vector<uint32_t, 3> r;
-    r.x = a.x * b.x;
-    r.y = a.y * b.y;
-    r.z = a.z * b.z;
-    return r;
-}
-
-Vector<uint32_t, 3> operator+(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b)
-{
-    Vector<uint32_t, 3> r;
-    r.x = a.x + b.x;
-    r.y = a.y + b.y;
-    r.z = a.z + b.z;
-    return r;
-}
-
-Vector<uint32_t, 3> make_VecU3(uint32_t a, uint32_t b, uint32_t c)
-{
-    return Vector<uint32_t, 3>{ a, b, c};
-}
-
-size_t __computeMainSize = 668;
-unsigned char __computeMain[] = {68, 88, 66, 67, 87, 111, 81, 164, 2, 29, 72, 42, 151, 28, 13, 217, 55, 37, 7, 95, 1, 0, 0, 0, 156, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 8, 1, 0, 0, 24, 1, 0, 0, 40, 1, 0, 0, 32, 2, 0, 0, 82, 68, 69, 70, 204, 0, 0, 0, 1, 0, 0, 0, 88, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, 9, 16, 0, 164, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 101, 110, 116, 114, 121, 80, 111, 105, 110, 116, 80, 97, 114, 97, 109, 115, 95, 105, 111, 66, 117, 102, 102, 101, 114, 95, 48, 0, 60, 0, 0, 0, 1, 0, 0, 0, 112, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 148, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, 101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, 0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, 0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-void computeMain_wrapper(gfx_Renderer_0* renderer, Vector<uint32_t, 3> gridDims, 
-	RWStructuredBuffer<float> buffer)
-{
-	gfx_ShaderProgram_0* shaderProgram = loadShaderProgram_0(renderer, __computeMain, __computeMainSize);
-	gfx_DescriptorSetLayout_0* setLayout = buildDescriptorSetLayout_0(renderer);
-	gfx_PipelineLayout_0* pipelineLayout = buildPipeline_0(renderer, setLayout);
-	gfx_DescriptorSet_0* descriptorSet = buildDescriptorSet_0(renderer, setLayout, unconvertBuffer_0(buffer));
-	gfx_PipelineState_0* pipelineState = buildPipelineState_0(shaderProgram, renderer, pipelineLayout);
-	dispatchComputation_0(renderer, pipelineState, pipelineLayout, descriptorSet, gridDims.x, gridDims.y, gridDims.z);
-}
-
-#line 7 "../../examples/heterogeneous-hello-world/shader.slang"
-struct EntryPointParams_0
-{
-    RWStructuredBuffer<float> ioBuffer_0;
-};
-
-struct KernelContext_0
-{
-};
-
-
-#line 21
-struct gfx_Window_0
-{
-};
-
-
-#line 22
-struct gfx_Renderer_0
-{
-};
-
-
-#line 23
-struct gfx_BufferResource_0
-{
-};
-
-
-#line 7
-void _computeMain(void* _S1, void* entryPointParams_0, void* _S2)
-{
-    ComputeThreadVaryingInput* _S3 = ((ComputeThreadVaryingInput*)(_S1));
-    KernelContext_0 kernelContext_0;
-
-#line 9
-    uint32_t tid_0 = (*(&_S3->groupID) * make_VecU3(4U, 1U, 1U) + *(&_S3->groupThreadID)).x;
-
-    float* _S4 = &(*(&((EntryPointParams_0*)(entryPointParams_0))->ioBuffer_0))[tid_0];
-
-#line 11
-    float i_0 = *_S4;
-    bool _S5 = i_0 < 0.50000000000000000000f;
-
-#line 12
-    float _S6 = i_0 + i_0;
-
-#line 12
-    float _S7 = (F32_sqrt((i_0)));
-
-#line 12
-    float o_0 = _S5 ? _S6 : _S7;
-
-    float* _S8 = &(*(&((EntryPointParams_0*)(entryPointParams_0))->ioBuffer_0))[tid_0];
-
-#line 14
-    *_S8 = o_0;
-
-#line 7
-    return;
-}
-
-
-#line 34
-gfx_Window_0* createWindow_0(int32_t _0, int32_t _1);
-
-
-#line 35
-gfx_Renderer_0* createRenderer_0(int32_t _0, int32_t _1, gfx_Window_0* _2);
-
-
-
-gfx_BufferResource_0* createStructuredBuffer_0(gfx_Renderer_0* _0, FixedArray<float, 4> _1);
-
-
-#line 4
-RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0);
-
-
-#line 40
-void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1);
-
-
-#line 41
-void print_output_0(gfx_Renderer_0* _0, gfx_BufferResource_0* _1, int32_t _2);
-
-
-
-
-bool executeComputation_0()
-{
-
-
-
-    FixedArray<float, 4> initialArray_0 = { 3.00000000000000000000f, -20.00000000000000000000f, -6.00000000000000000000f, 8.00000000000000000000f };
-
-
-    gfx_Window_0* _S9 = createWindow_0(int(1024), int(768));
-    gfx_Renderer_0* _S10 = createRenderer_0(int(1024), int(768), _S9);
-    gfx_CommandQueue_0* _
-    gfx_BufferResource_0* _S11 = createStructuredBuffer_0(_S10, initialArray_0);
-    Vector<uint32_t, 3> _S12 = make_VecU3(uint32_t(int(4)), uint32_t(int(1)), uint32_t(int(1)));
-    RWStructuredBuffer<float> _S13 = convertBuffer_0(_S11);
-
-#line 57
-    computeMain_wrapper(_S10, _S12, _S13);
-
-    printInitialValues_0(initialArray_0, int(4));
-    print_output_0(_S10, _S11, int(4));
-
-
-    return true;
-}
-
-// [numthreads(4, 1, 1)]
-SLANG_PRELUDE_EXPORT
-void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* entryPointParams, void* globalParams)
-{
-    _computeMain(varyingInput, entryPointParams, globalParams);
-}
-// [numthreads(4, 1, 1)]
-SLANG_PRELUDE_EXPORT
-void computeMain_Group(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams)
-{
-    ComputeThreadVaryingInput threadInput = {};
-    threadInput.groupID = varyingInput->startGroupID;
-    for (uint32_t x = 0; x < 4; ++x)
-    {
-        threadInput.groupThreadID.x = x;
-        _computeMain(&threadInput, entryPointParams, globalParams);
-    }
-}
-// [numthreads(4, 1, 1)]
-SLANG_PRELUDE_EXPORT
-void computeMain(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams)
-{
-    ComputeVaryingInput vi = *varyingInput;
-    ComputeVaryingInput groupVaryingInput = {};
-    for (uint32_t z = vi.startGroupID.z; z < vi.endGroupID.z; ++z)
-    {
-        groupVaryingInput.startGroupID.z = z;
-        for (uint32_t y = vi.startGroupID.y; y < vi.endGroupID.y; ++y)
-        {
-            groupVaryingInput.startGroupID.y = y;
-            for (uint32_t x = vi.startGroupID.x; x < vi.endGroupID.x; ++x)
-            {
-                groupVaryingInput.startGroupID.x = x;
-                computeMain_Group(&groupVaryingInput, entryPointParams, globalParams);
-            }
-        }
-    }
-}
-#endif
diff --git a/examples/experimental/heterogeneous-hello-world/shader.slang b/examples/experimental/heterogeneous-hello-world/shader.slang
deleted file mode 100644
index 47c883b39..000000000
--- a/examples/experimental/heterogeneous-hello-world/shader.slang
+++ /dev/null
@@ -1,65 +0,0 @@
-// shader.slang
-
-//TEST_INPUT:ubuffer(random(float, 4096, -1.0, 1.0), stride=4):name=ioBuffer
-RWStructuredBuffer<float> convertBuffer(Ptr<gfx::BufferResource> x);
-
-[shader("compute")]
-[numthreads(4, 1, 1)]
-void computeMain(uniform RWStructuredBuffer<float> ioBuffer, uint3 dispatchThreadID : SV_DispatchThreadID)
-{
-    uint tid = dispatchThreadID.x;
-
-    float i = ioBuffer[tid];
-    float o = i < 0.5 ? (i + i) : sqrt(i);
-
-    ioBuffer[tid] = o;
-}
-
-// Forward declarations of gfx types
-//
-namespace gfx {
-    struct ApplicationContext{};
-    struct Window{};
-    struct Renderer{};
-    struct BufferResource{};
-    struct PipelineLayout{};
-    struct PipelineState{};
-    struct DescriptorSetLayout{};
-    struct DescriptorSet{};
-    struct ShaderProgram{};
-}
-
-// Forward declarations of cpp functions
-//
-Ptr<gfx::ShaderProgram> loadShaderProgram(Ptr<gfx::Renderer> renderer);
-Ptr<gfx::Window> createWindow(int gWindowWidth, int gWindowHeight);
-Ptr<gfx::Renderer> createRenderer(
-    int gWindowWidth,
-    int gWindowHeight,
-    Ptr<gfx::Window> gWindow);
-Ptr<gfx::BufferResource> createStructuredBuffer(Ptr<gfx::Renderer> gRenderer, float[4] initialArray);
-void printInitialValues(float[4] initialArray, int length);
-void print_output(
-    Ptr<gfx::Renderer> gRenderer,
-    Ptr<gfx::BufferResource> gStructuredBuffer,
-    int length);
-
-public bool executeComputation() {
-    // We will hard-code the size of our rendering window and initial array.
-    //
-    int     windowWidth = 1024;
-    int     windowHeight = 768;
-    float   initialArray[4] = { 3.0f, -20.0f, -6.0f, 8.0f };
-
-    // Declare functions
-    let window = createWindow(windowWidth, windowHeight);
-    let renderer = createRenderer(windowWidth, windowHeight, window);
-    let structuredBuffer = createStructuredBuffer(renderer, initialArray);
-    __GPU_FOREACH(renderer, uint3(4, 1, 1), LAMBDA(uint3 dispatchThreadID)
-        { computeMain(convertBuffer(structuredBuffer), dispatchThreadID) ; });
-    printInitialValues(initialArray, 4);
-    print_output(renderer, structuredBuffer, 4);
-
-
-    return true;
-}
diff --git a/examples/experimental/heterogeneous-hello-world/README.md b/examples/heterogeneous-hello-world/README.md
index 709652922..709652922 100644
--- a/examples/experimental/heterogeneous-hello-world/README.md
+++ b/examples/heterogeneous-hello-world/README.md
diff --git a/examples/heterogeneous-hello-world/main.cpp b/examples/heterogeneous-hello-world/main.cpp
new file mode 100644
index 000000000..9e0bb8b0f
--- /dev/null
+++ b/examples/heterogeneous-hello-world/main.cpp
@@ -0,0 +1,335 @@
+// main.cpp
+
+// This example uses the Slang gfx layer to target different APIs and execute
+// both CPU and GPU code from a single Slang file (?)
+//
+#include <slang.h>
+#include <slang-com-ptr.h>
+using Slang::ComPtr;
+
+#include "slang-gfx.h"
+#include "gfx-util/shader-cursor.h"
+#include "source/core/slang-basic.h"
+#include "../../prelude/slang-cpp-types.h"
+
+using namespace gfx;
+using namespace Slang;
+
+// Creating global ref pointers to avoid dereferencing values
+//
+ComPtr<gfx::IDevice> gDevice;
+ComPtr<gfx::IShaderProgram> gProgram;
+ComPtr<gfx::IBufferResource> gBufferResource;
+ComPtr<gfx::IResourceView> gResourceView;
+ComPtr<gfx::ITransientResourceHeap> gTransientHeap;
+ComPtr<gfx::IPipelineState> gPipelineState;
+ComPtr<gfx::ICommandQueue> gQueue;
+
+// Boilerplate types to help the slang-generated file
+//
+struct gfx_Device_0;
+struct gfx_BufferResource_0;
+struct gfx_ShaderProgram_0;
+struct gfx_ResourceView_0;
+struct gfx_TransientResourceHeap_0;
+struct gfx_PipelineState_0;
+bool executeComputation_0();
+
+// Many Slang API functions return detailed diagnostic information
+// (error messages, warnings, etc.) as a "blob" of data, or return
+// a null blob pointer instead if there were no issues.
+//
+// For convenience, we define a subroutine that will dump the information
+// in a diagnostic blob if one is produced, and skip it otherwise.
+//
+void diagnoseIfNeeded(slang::IBlob *diagnosticsBlob)
+{
+    if (diagnosticsBlob != nullptr)
+    {
+        printf("%s", (const char *)diagnosticsBlob->getBufferPointer());
+    }
+}
+
+gfx::IDevice* createDevice()
+{
+    ComPtr<gfx::IDevice> device;
+    IDevice::Desc deviceDesc = {};
+    // Changing device type would happen here. For example:
+    //deviceDesc.deviceType = DeviceType::CUDA;
+    SLANG_RETURN_NULL_ON_FAIL(gfxCreateDevice(&deviceDesc, gDevice.writeRef()));
+    return gDevice;
+}
+
+// Loads the shader code defined in `shader.slang` for use by the `gfx` layer.
+//
+gfx::IShaderProgram* loadShaderProgram(gfx::IDevice *device)
+{
+    // We need to obtain a compilation session (`slang::ISession`) that will provide
+    // a scope to all the compilation and loading of code we do.
+    //
+    ComPtr<slang::ISession> slangSession;
+    SLANG_RETURN_NULL_ON_FAIL(device->getSlangSession(slangSession.writeRef()));
+
+    // We can now start loading code into the slang session.
+    //
+    // The simplest way to load code is by calling `loadModule` with the name of a Slang
+    // module. A call to `loadModule("MyStuff")` will behave more or less as if you
+    // wrote:
+    //
+    //      import MyStuff;
+    //
+    // In a Slang shader file. The compiler will use its search paths to try to locate
+    // `MyModule.slang`, then compile and load that file. If a matching module had
+    // already been loaded previously, that would be used directly.
+    //
+    ComPtr<slang::IBlob> diagnosticsBlob;
+    slang::IModule *module = slangSession->loadModule("shader", diagnosticsBlob.writeRef());
+    diagnoseIfNeeded(diagnosticsBlob);
+    if (!module)
+        return NULL;
+
+    // Look up entry point (hardcoded for now)
+    //
+    char const *computeEntryPointName = "computeMain";
+    ComPtr<slang::IEntryPoint> computeEntryPoint;
+    SLANG_RETURN_NULL_ON_FAIL(
+        module->findEntryPointByName(computeEntryPointName, computeEntryPoint.writeRef()));
+
+    // At this point we have a few different Slang API objects that represent
+    // pieces of our code: `module`, `vertexEntryPoint`, and `fragmentEntryPoint`.
+    //
+    // A single Slang module could contain many different entry points (e.g.,
+    // four vertex entry points, three fragment entry points, and two compute
+    // shaders), and before we try to generate output code for our target API
+    // we need to identify which entry points we plan to use together.
+    //
+    // Modules and entry points are both examples of *component types* in the
+    // Slang API. The API also provides a way to build a *composite* out of
+    // other pieces, and that is what we are going to do with our module
+    // and entry points.
+    //
+    Slang::List<slang::IComponentType *> componentTypes;
+    componentTypes.add(module);
+    componentTypes.add(computeEntryPoint);
+
+    // Actually creating the composite component type is a single operation
+    // on the Slang session, but the operation could potentially fail if
+    // something about the composite was invalid (e.g., you are trying to
+    // combine multiple copies of the same module), so we need to deal
+    // with the possibility of diagnostic output.
+    //
+    ComPtr<slang::IComponentType> composedProgram;
+    SlangResult result = slangSession->createCompositeComponentType(
+        componentTypes.getBuffer(),
+        componentTypes.getCount(),
+        composedProgram.writeRef(),
+        diagnosticsBlob.writeRef());
+    diagnoseIfNeeded(diagnosticsBlob);
+    SLANG_RETURN_NULL_ON_FAIL(result);
+
+    // At this point, `composedProgram` represents the shader program
+    // we want to run, and the compute shader there have been checked.
+    // We can create a `gfx::IShaderProgram` object from `composedProgram`
+    // so it may be used by the graphics layer.
+    gfx::IShaderProgram::Desc programDesc = {};
+    programDesc.pipelineType = gfx::PipelineType::Compute;
+    programDesc.slangProgram = composedProgram.get();
+
+    gProgram = device->createProgram(programDesc);
+
+    return gProgram;
+}
+
+gfx::IBufferResource* createStructuredBuffer(
+    gfx::IDevice *device,
+    float *initialData)
+{
+    // Create a structured buffer for storing computation data
+    //
+    const int numberCount = 4;
+    int structuredBufferSize = numberCount * sizeof(float);
+
+    IBufferResource::Desc bufferDesc = {};
+    bufferDesc.sizeInBytes = numberCount * sizeof(float);
+    bufferDesc.format = gfx::Format::Unknown;
+    bufferDesc.elementSize = sizeof(float);
+    bufferDesc.allowedStates = ResourceStateSet(ResourceState::ShaderResource,
+                                                ResourceState::UnorderedAccess,
+                                                ResourceState::CopyDestination,
+                                                ResourceState::CopySource);
+    bufferDesc.defaultState = ResourceState::UnorderedAccess;
+    bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read;
+
+    SlangResult result = device->createBufferResource(bufferDesc,
+                                                      (void *)initialData,
+                                                      gBufferResource.writeRef());
+    SLANG_RETURN_NULL_ON_FAIL(result);
+    return gBufferResource;
+}
+
+gfx::IResourceView* createBufferView(
+    gfx::IDevice* device,
+    gfx::IBufferResource* buffer)
+{
+    // Create a resource view for the structured buffer
+    //
+    gfx::IResourceView::Desc viewDesc = {};
+    viewDesc.type = gfx::IResourceView::Type::UnorderedAccess;
+    viewDesc.format = gfx::Format::Unknown;
+    SLANG_RETURN_NULL_ON_FAIL(device->createBufferView(buffer, viewDesc, gResourceView.writeRef()));
+    return gResourceView;
+}
+
+gfx::ITransientResourceHeap* buildTransientHeap(gfx::IDevice *device)
+{
+    ITransientResourceHeap::Desc transientHeapDesc = {};
+    transientHeapDesc.constantBufferSize = 4096;
+    SLANG_RETURN_NULL_ON_FAIL(
+        device->createTransientResourceHeap(transientHeapDesc, gTransientHeap.writeRef()));
+    return gTransientHeap;
+}
+
+gfx::IPipelineState* buildPipelineState(
+    gfx::IDevice *device,
+    gfx::IShaderProgram* shaderProgram)
+{
+    gfx::ComputePipelineStateDesc pipelineDesc = {};
+    pipelineDesc.program = shaderProgram;
+    SLANG_RETURN_NULL_ON_FAIL(
+        device->createComputePipelineState(pipelineDesc, gPipelineState.writeRef()));
+    return gPipelineState;
+}
+
+void printInitialValues(float *initialArray, int length)
+{
+    printf("Before:\n");
+    for (int i = 0; i < length; i++)
+    {
+        printf("%f, ", initialArray[i]);
+    }
+    printf("\n");
+}
+
+void dispatchComputation(
+    gfx::IDevice* device,
+    gfx::ITransientResourceHeap* transientHeap,
+    gfx::IPipelineState* pipelineState,
+    gfx::IResourceView* bufferView,
+    unsigned int gridDimsX,
+    unsigned int gridDimsY,
+    unsigned int gridDimsZ)
+{
+    ICommandQueue::Desc queueDesc = {ICommandQueue::QueueType::Graphics};
+    gQueue = device->createCommandQueue(queueDesc);
+
+    auto commandBuffer = transientHeap->createCommandBuffer();
+    auto encoder = commandBuffer->encodeComputeCommands();
+
+    // First, obtain a root shader object from command encoder to start parameter binding.
+    auto rootObject = encoder->bindPipeline(pipelineState);
+
+    gfx::ShaderCursor entryPointCursor(
+        rootObject->getEntryPoint(0)); // get a cursor the the first entry-point.
+    // Bind buffer view to the entry point.
+    entryPointCursor.getPath("ioBuffer").setResource(bufferView);
+
+    encoder->dispatchCompute(gridDimsX, gridDimsY, gridDimsZ);
+    encoder->endEncoding();
+    commandBuffer->close();
+    gQueue->executeCommandBuffer(commandBuffer);
+    gQueue->wait();
+}
+
+bool printOutputValues(
+    gfx::IDevice *device,
+    gfx::IBufferResource *buffer,
+    int length)
+{
+    ComPtr<ISlangBlob> resultBlob;
+    SLANG_RETURN_FALSE_ON_FAIL(device->readBufferResource(
+        buffer, 0, length * sizeof(float), resultBlob.writeRef()));
+    auto result = reinterpret_cast<const float *>(resultBlob->getBufferPointer());
+    printf("After: \n");
+    for (int i = 0; i < length; i++)
+    {
+        printf("%f, ", result[i]);
+    }
+    printf("\n");
+    return true;
+}
+
+// Boilerplate functions to help the slang-generated file and types
+
+gfx_Device_0* createDevice_0()
+{
+     return (gfx_Device_0*)createDevice();
+}
+
+gfx_BufferResource_0* createStructuredBuffer_0(gfx_Device_0* _0, FixedArray<float, 4> _1)
+{
+    return (gfx_BufferResource_0*)createStructuredBuffer((gfx::IDevice*)_0, (float*)&_1);
+}
+
+gfx_ShaderProgram_0* loadShaderProgram_0(gfx_Device_0* _0)
+{
+    return (gfx_ShaderProgram_0*)loadShaderProgram((gfx::IDevice*)_0);
+}
+
+gfx_ResourceView_0* createBufferView_0(gfx_Device_0* _0, gfx_BufferResource_0* _1)
+{
+    return (gfx_ResourceView_0*)createBufferView((gfx::IDevice*)_0, (gfx::IBufferResource*)_1);
+}
+
+gfx_TransientResourceHeap_0* buildTransientHeap_0(gfx_Device_0* _0)
+{
+    return (gfx_TransientResourceHeap_0*)buildTransientHeap((gfx::IDevice*)_0);
+}
+
+gfx_PipelineState_0* buildPipelineState_0(gfx_Device_0* _0, gfx_ShaderProgram_0* _1)
+{
+    return (gfx_PipelineState_0*)buildPipelineState((gfx::IDevice*)_0, (gfx::IShaderProgram*)_1);
+}
+
+void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1)
+{
+    printInitialValues((float*)&_0, _1);
+}
+
+void dispatchComputation_0(gfx_Device_0* _0, gfx_TransientResourceHeap_0* _1, gfx_PipelineState_0* _2, gfx_ResourceView_0* _3, unsigned int gridDimsX, unsigned int gridDimsY, unsigned int gridDimsZ)
+{
+    dispatchComputation(
+        (gfx::IDevice*)_0,
+        (gfx::ITransientResourceHeap*)_1,
+        (gfx::IPipelineState*)_2,
+        (gfx::IResourceView*)_3,
+        gridDimsX,
+        gridDimsY,
+        gridDimsZ);
+}
+
+RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0) {
+    RWStructuredBuffer<float> result;
+    result.data = (float*)_0;
+    return result;
+}
+
+gfx_BufferResource_0* unconvertBuffer_0(RWStructuredBuffer<float> _0) {
+    return (gfx_BufferResource_0*)(_0.data);
+}
+
+bool printOutputValues_0(gfx_Device_0* _0, gfx_BufferResource_0* _1, int32_t _2)
+{
+    return printOutputValues((gfx::IDevice*)_0, (gfx::IBufferResource*)_1, _2);
+}
+
+int main()
+{
+    // We construct an instance of our example application
+    // `struct` type, and then walk through the lifecyle
+    // of the application.
+
+    if (!(executeComputation_0()))
+    {
+        return -1;
+    }
+}
diff --git a/examples/heterogeneous-hello-world/shader.cpp b/examples/heterogeneous-hello-world/shader.cpp
new file mode 100644
index 000000000..0c0c24ebc
--- /dev/null
+++ b/examples/heterogeneous-hello-world/shader.cpp
@@ -0,0 +1,215 @@
+#include "../../prelude/slang-cpp-prelude.h"
+
+
+#ifdef SLANG_PRELUDE_NAMESPACE
+using namespace SLANG_PRELUDE_NAMESPACE;
+#endif
+
+Vector<uint32_t, 3> operator+(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b)
+{
+    Vector<uint32_t, 3> r;
+    r.x = a.x + b.x;
+    r.y = a.y + b.y;
+    r.z = a.z + b.z;
+    return r;
+}
+
+Vector<uint32_t, 3> operator*(Vector<uint32_t, 3> a, Vector<uint32_t, 3> b)
+{
+    Vector<uint32_t, 3> r;
+    r.x = a.x * b.x;
+    r.y = a.y * b.y;
+    r.z = a.z * b.z;
+    return r;
+}
+
+Vector<uint32_t, 3> make_VecU3(uint32_t a, uint32_t b, uint32_t c)
+{
+    return Vector<uint32_t, 3>{ a, b, c};
+}
+
+size_t __computeMainSize = 668;
+unsigned char __computeMain[] = {68, 88, 66, 67, 87, 111, 81, 164, 2, 29, 72, 42, 151, 28, 13, 217, 55, 37, 7, 95, 1, 
+0, 0, 0, 156, 2, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 8, 1, 0, 0, 24, 
+1, 0, 0, 40, 1, 0, 0, 32, 2, 0, 0, 82, 68, 69, 70, 204, 0, 0, 0, 1, 
+0, 0, 0, 88, 0, 0, 0, 1, 0, 0, 0, 28, 0, 0, 0, 0, 4, 83, 67, 0, 
+9, 16, 0, 164, 0, 0, 0, 60, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 1, 
+0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 101, 
+110, 116, 114, 121, 80, 111, 105, 110, 116, 80, 97, 114, 97, 109, 115, 95, 105, 111, 66, 117, 
+102, 102, 101, 114, 95, 48, 0, 60, 0, 0, 0, 1, 0, 0, 0, 112, 0, 0, 0, 4, 
+0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 4, 
+0, 0, 0, 2, 0, 0, 0, 148, 0, 0, 0, 0, 0, 0, 0, 36, 69, 108, 101, 109, 
+101, 110, 116, 0, 171, 171, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 
+0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 
+76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 
+46, 49, 0, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 
+83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 240, 
+0, 0, 0, 64, 0, 5, 0, 60, 0, 0, 0, 106, 8, 0, 1, 158, 0, 0, 4, 0, 
+224, 17, 0, 0, 0, 0, 0, 4, 0, 0, 0, 95, 0, 0, 2, 18, 0, 2, 0, 104, 
+0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 4, 0, 0, 0, 1, 0, 0, 0, 1, 
+0, 0, 0, 167, 0, 0, 8, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 2, 0, 1, 
+64, 0, 0, 0, 0, 0, 0, 6, 224, 17, 0, 0, 0, 0, 0, 49, 0, 0, 7, 34, 
+0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 
+0, 0, 63, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 
+0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 75, 0, 0, 5, 18, 0, 16, 0, 0, 
+0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 
+0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 
+0, 16, 0, 0, 0, 0, 0, 168, 0, 0, 8, 18, 224, 17, 0, 0, 0, 0, 0, 10, 
+0, 2, 0, 1, 64, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 
+0, 0, 1, 83, 84, 65, 84, 116, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 
+0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+0, 0, 0, 0, 0, 0, 0, };
+void computeMain_wrapper(gfx_Device_0* device, Vector<uint32_t, 3> gridDims, 
+	RWStructuredBuffer<float> buffer)
+{
+	gfx_ShaderProgram_0* shaderProgram = loadShaderProgram_0(device);
+	gfx_TransientResourceHeap_0* transientHeap = buildTransientHeap_0(device);
+	gfx_PipelineState_0* pipelineState = buildPipelineState_0(device, shaderProgram);
+	gfx_ResourceView_0* bufferView = createBufferView_0(device, unconvertBuffer_0(buffer));
+	dispatchComputation_0(device, transientHeap, pipelineState, bufferView, gridDims.x, gridDims.y, gridDims.z);
+}
+
+#line 8 "../../../examples/heterogeneous-hello-world/shader.slang"
+struct EntryPointParams_0
+{
+    RWStructuredBuffer<float> ioBuffer_0;
+};
+
+
+#line 21
+struct gfx_Device_0
+{
+};
+
+
+#line 22
+struct gfx_BufferResource_0
+{
+};
+
+
+#line 23
+struct gfx_ResourceView_0
+{
+};
+
+
+#line 8
+void _computeMain(void* _S1, void* entryPointParams_0, void* _S2)
+{
+
+#line 8
+    ComputeThreadVaryingInput* _S3 = (slang_bit_cast<ComputeThreadVaryingInput*>(_S1));
+
+    uint32_t tid_0 = (*(&_S3->groupID) * make_VecU3(4U, 1U, 1U) + *(&_S3->groupThreadID)).x;
+
+    float* _S4 = &(*(&(slang_bit_cast<EntryPointParams_0*>(entryPointParams_0))->ioBuffer_0))[tid_0];
+
+#line 12
+    float i_0 = *_S4;
+    bool _S5 = i_0 < 0.50000000000000000000f;
+
+#line 13
+    float _S6 = i_0 + i_0;
+
+#line 13
+    float _S7 = (F32_sqrt((i_0)));
+
+#line 13
+    float o_0 = _S5 ? _S6 : _S7;
+
+    float* _S8 = &(*(&(slang_bit_cast<EntryPointParams_0*>(entryPointParams_0))->ioBuffer_0))[tid_0];
+
+#line 15
+    *_S8 = o_0;
+    return;
+}
+
+
+#line 31
+gfx_Device_0* createDevice_0();
+
+gfx_BufferResource_0* createStructuredBuffer_0(gfx_Device_0* _0, FixedArray<float, 4> _1);
+
+
+gfx_ResourceView_0* createBufferView_0(gfx_Device_0* _0, gfx_BufferResource_0* _1);
+
+
+#line 4
+RWStructuredBuffer<float> convertBuffer_0(gfx_BufferResource_0* _0);
+
+
+#line 44
+void printInitialValues_0(FixedArray<float, 4> _0, int32_t _1);
+
+
+#line 50
+bool printOutputValues_0(gfx_Device_0* _0, gfx_BufferResource_0* _1, int32_t _2);
+
+
+
+
+bool executeComputation_0()
+{
+
+    FixedArray<float, 4> initialArray_0 = { 3.00000000000000000000f, -20.00000000000000000000f, -6.00000000000000000000f, 8.00000000000000000000f };
+
+
+    gfx_Device_0* _S9 = createDevice_0();
+    gfx_BufferResource_0* _S10 = createStructuredBuffer_0(_S9, initialArray_0);
+    gfx_ResourceView_0* _S11 = createBufferView_0(_S9, _S10);
+    Vector<uint32_t, 3> _S12 = make_VecU3(uint32_t(int(4)), uint32_t(int(1)), uint32_t(int(1)));
+    RWStructuredBuffer<float> _S13 = convertBuffer_0(_S10);
+
+#line 64
+    computeMain_wrapper(_S9, _S12, _S13);
+
+    printInitialValues_0(initialArray_0, int(4));
+    bool _S14 = printOutputValues_0(_S9, _S10, int(4));
+
+
+    return true;
+}
+
+// [numthreads(4, 1, 1)]
+SLANG_PRELUDE_EXPORT
+void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, void* entryPointParams, void* globalParams)
+{
+    _computeMain(varyingInput, entryPointParams, globalParams);
+}
+// [numthreads(4, 1, 1)]
+SLANG_PRELUDE_EXPORT
+void computeMain_Group(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams)
+{
+    ComputeThreadVaryingInput threadInput = {};
+    threadInput.groupID = varyingInput->startGroupID;
+    for (uint32_t x = 0; x < 4; ++x)
+    {
+        threadInput.groupThreadID.x = x;
+        _computeMain(&threadInput, entryPointParams, globalParams);
+    }
+}
+// [numthreads(4, 1, 1)]
+SLANG_PRELUDE_EXPORT
+void computeMain(ComputeVaryingInput* varyingInput, void* entryPointParams, void* globalParams)
+{
+    ComputeVaryingInput vi = *varyingInput;
+    ComputeVaryingInput groupVaryingInput = {};
+    for (uint32_t z = vi.startGroupID.z; z < vi.endGroupID.z; ++z)
+    {
+        groupVaryingInput.startGroupID.z = z;
+        for (uint32_t y = vi.startGroupID.y; y < vi.endGroupID.y; ++y)
+        {
+            groupVaryingInput.startGroupID.y = y;
+            for (uint32_t x = vi.startGroupID.x; x < vi.endGroupID.x; ++x)
+            {
+                groupVaryingInput.startGroupID.x = x;
+                computeMain_Group(&groupVaryingInput, entryPointParams, globalParams);
+            }
+        }
+    }
+}
diff --git a/examples/heterogeneous-hello-world/shader.slang b/examples/heterogeneous-hello-world/shader.slang
new file mode 100644
index 000000000..b66640e3d
--- /dev/null
+++ b/examples/heterogeneous-hello-world/shader.slang
@@ -0,0 +1,71 @@
+// shader.slang
+
+//TEST_INPUT:ubuffer(random(float, 4096, -1.0, 1.0), stride=4):name=ioBuffer
+RWStructuredBuffer<float> convertBuffer(Ptr<gfx::BufferResource> x);
+
+[shader("compute")]
+[numthreads(4, 1, 1)]
+void computeMain(uniform RWStructuredBuffer<float> ioBuffer, uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    uint tid = dispatchThreadID.x;
+
+    float i = ioBuffer[tid];
+    float o = i < 0.5 ? (i + i) : sqrt(i);
+
+    ioBuffer[tid] = o;
+}
+
+// Forward declarations of gfx types
+//
+namespace gfx {
+    struct Device{};
+    struct BufferResource{};
+    struct ResourceView{};
+    struct TransientResourceHeap{};
+    struct PipelineState{};
+    struct ShaderProgram{};
+}
+
+// Forward declarations of cpp functions
+//
+Ptr<gfx::Device> createDevice();
+Ptr<gfx::ShaderProgram> loadShaderProgram(Ptr<gfx::Device> device);
+Ptr<gfx::BufferResource> createStructuredBuffer(
+    Ptr<gfx::Device> device,
+    float[4] initialData);
+Ptr<gfx::ResourceView> createBufferView(
+    Ptr<gfx::Device> device,
+    Ptr<gfx::BufferResource> buffer);
+Ptr<gfx::TransientResourceHeap> buildTransientHeap(
+    Ptr<gfx::Device> device);
+Ptr<gfx::PipelineState> buildPipelineState(
+    Ptr<gfx::Device> device,
+    Ptr<gfx::ShaderProgram> shaderProgram);
+void printInitialValues(float[4] initialArray, int length);
+void dispatchComputation(
+    Ptr<gfx::Device> device,
+    Ptr<gfx::TransientResourceHeap> transientHeap,
+    Ptr<gfx::PipelineState> pipelineState,
+    Ptr<gfx::ResourceView> bufferView);
+bool printOutputValues(
+    Ptr<gfx::Device> device,
+    Ptr<gfx::BufferResource> buffer,
+    int length);
+
+public bool executeComputation() {
+    // We will hard-code the size of our initial array.
+    //
+    float   initialArray[4] = { 3.0f, -20.0f, -6.0f, 8.0f };
+
+    // Declare functions
+    let device = createDevice();
+    let structuredBuffer = createStructuredBuffer(device, initialArray);
+    let bufferView = createBufferView(device, structuredBuffer);
+    __GPU_FOREACH(device, uint3(4, 1, 1), LAMBDA(uint3 dispatchThreadID)
+        { computeMain(convertBuffer(structuredBuffer), dispatchThreadID) ; });
+    printInitialValues(initialArray, 4);
+    printOutputValues(device, structuredBuffer, 4);
+
+
+    return true;
+}
author	David Siher <32305650+dsiher@users.noreply.github.com>	2021-09-14 12:59:55 -0400
committer	GitHub <noreply@github.com>	2021-09-14 09:59:55 -0700
commit	502aa3812a82cf0d091cff0c67804e4ee448ac78 (patch)
tree	8ac8def3a30a6531cee7f6b0380d8929811fade5 /examples
parent	d9d42879c4b6c0202732897ec60a355ccc91f243 (diff)