8 files changed, 1198 insertions, 336 deletions
diff --git a/examples/hello-world/README.md b/examples/hello-world/README.md
index ba377b8cb..2bd6d4b45 100644
--- a/examples/hello-world/README.md
+++ b/examples/hello-world/README.md
@@ -1,12 +1,5 @@
 Slang "Hello World" Example
-===========================
+==========================
 
-The goal of this example is to demonstrate an almost minimal application that uses Slang for shading.
-
-The `shaders.slang` file contains simple vertex and fragment shader entry points. The shader code should compile as either Slang or HLSL code (that is, this example does not show off any new Slang language features).
-
-The `main.cpp` file contains the C++ application code, showing how to use the Slang API to load and compile the shader code to DirectX shader bytecode (DXBC).
-The application perform rendering using the D3D11 API, through a platform and graphics API abstraction layer that is implemented in `tools/gfx`.
-Note that this abstraction layer is *not* required in order to work with Slang, and it is just there to help us write example and test applications more conveniently.
-
-This example is not necessarily representative of best practices for integrating Slang into a production engine; the goal is merely to use the minimum amount of code possible to demonstrate a complete applicaiton that uses Slang.
+This example shows how to use the Slang API to compile a simple
+compute shader written in Slang into SPIRV and run it in Vulkan.
+\ No newline at end of file
diff --git a/examples/hello-world/hello-world.slang b/examples/hello-world/hello-world.slang
new file mode 100644
index 000000000..565924035
--- /dev/null
+++ b/examples/hello-world/hello-world.slang
@@ -0,0 +1,12 @@
+// hello-world.slang
+StructuredBuffer<float> buffer0;
+StructuredBuffer<float> buffer1;
+RWStructuredBuffer<float> result;
+
+[shader("compute")]
+[numthreads(1,1,1)]
+void computeMain(uint3 threadId : SV_DispatchThreadID)
+{
+    uint index = threadId.x;
+    result[index] = buffer0[index] + buffer1[index];
+}
diff --git a/examples/hello-world/main.cpp b/examples/hello-world/main.cpp
index 6b9104072..da149a8e0 100644
--- a/examples/hello-world/main.cpp
+++ b/examples/hello-world/main.cpp
@@ -1,137 +1,157 @@
 // main.cpp
 
-// This file implements an extremely simple example of loading and
-// executing a Slang shader program. This is primarily an example
-// of how to use Slang as a "drop-in" replacement for an existing
-// HLSL compiler like the `D3DCompile` API. More advanced usage
-// of advanced Slang language and API features is left to the
-// next example.
+// This file provides the application code for the `hello-world` example.
 //
-// The comments in the file will attempt to explain concepts as
-// they are introduced.
-//
-// Of course, in order to use the Slang API, we need to include
-// its header. We have set up the build options for this project
-// so that it is as simple as:
+
+// This example uses Vulkan to run a simple compute shader written in Slang.
+// The goal is to demonstrate how to use the Slang API to cross compile
+// shader code.
 //
 #include <slang.h>
-//
-// Other build setups are possible, and Slang doesn't assume that
-// its include directory must be added to your global include
-// path.
-
-// For the purposes of keeping the demo code as simple as possible,
-// while still retaining some level of portability, our examples
-// make use of a small platform and graphics API abstraction layer,
-// which is included in the Slang source distribution under the
-// `tools/` directory.
-//
-// Applications can of course use Slang without ever touching this
-// abstraction layer, so we will not focus on it when explaining
-// examples, except in places where best practices for interacting
-// with Slang may depend on an application/engine making certain
-// design choices in their abstraction layer.
-//
-#include "slang-gfx.h"
-#include "gfx-util/shader-cursor.h"
-#include "tools/platform/window.h"
-#include "slang-com-ptr.h"
-#include "source/core/slang-basic.h"
+#include <slang-com-ptr.h>
+
+#include "vulkan-api.h"
 #include "examples/example-base/example-base.h"
 
-using namespace gfx;
-using namespace Slang;
+using Slang::ComPtr;
 
-// For the purposes of a small example, we will define the vertex data for a
-// single triangle directly in the source file. It should be easy to extend
-// this example to load data from an external source, if desired.
-//
-struct Vertex
+struct HelloWorldExample
 {
-    float position[3];
-    float color[3];
+    // The Vulkan functions pointers result from loading the vulkan library.
+    VulkanAPI vkAPI;
+
+    // Vulkan objects used in this example.
+    VkQueue queue;
+    VkCommandPool commandPool = VK_NULL_HANDLE;
+
+    // Input and output buffers.
+    VkBuffer inOutBuffers[3] = {};
+    VkDeviceMemory bufferMemories[3] = {};
+
+    const size_t inputElementCount = 16;
+    const size_t bufferSize = sizeof(float) * inputElementCount;
+
+    // We use a staging buffer allocated on host-visible memory to
+    // upload/download data from GPU.
+    VkBuffer stagingBuffer = VK_NULL_HANDLE;
+    VkDeviceMemory stagingMemory = VK_NULL_HANDLE;
+
+    VkDescriptorSetLayout descriptorSetLayout = VK_NULL_HANDLE;
+    VkPipelineLayout pipelineLayout = VK_NULL_HANDLE;
+    VkPipeline pipeline = VK_NULL_HANDLE;
+
+    // Initializes the Vulkan instance and device.
+    int initVulkanInstanceAndDevice();
+
+    // This function contains the most interesting part of this example.
+    // It loads the `hello-world.slang` shader and compile it using the Slang API
+    // into a SPIRV module, then create a Vulkan pipeline from the compiled shader.
+    int createComputePipelineFromShader();
+
+    // Creates the input and output buffers.
+    int createInOutBuffers();
+
+    // Sets up descriptor set bindings and dispatches the compute task.
+    int dispatchCompute();
+
+    // Reads back and prints the result of the compute task.
+    int printComputeResults();
+
+    // Main logic of this example.
+    int run();
+
+    ~HelloWorldExample();
+
 };
 
-static const int kVertexCount = 3;
-static const Vertex kVertexData[kVertexCount] =
+int main()
 {
-    { { 0,  0, 0.5 }, { 1, 0, 0 } },
-    { { 0,  1, 0.5 }, { 0, 0, 1 } },
-    { { 1,  0, 0.5 }, { 0, 1, 0 } },
-};
+    HelloWorldExample example;
+    return example.run();
+}
 
-// The example application will be implemented as a `struct`, so that
-// we can scope the resources it allocates without using global variables.
-//
-struct HelloWorld : public WindowedAppBase
+/************************************************************/
+/* HelloWorldExample Implementation */
+/************************************************************/
+
+int HelloWorldExample::run()
 {
+    RETURN_ON_FAIL(initVulkanInstanceAndDevice());
+    RETURN_ON_FAIL(createComputePipelineFromShader());
+    RETURN_ON_FAIL(createInOutBuffers());
+    RETURN_ON_FAIL(dispatchCompute());
+    RETURN_ON_FAIL(printComputeResults());
+    return 0;
+}
 
-// Many Slang API functions return detailed diagnostic information
-// (error messages, warnings, etc.) as a "blob" of data, or return
-// a null blob pointer instead if there were no issues.
-//
-// For convenience, we define a subroutine that will dump the information
-// in a diagnostic blob if one is produced, and skip it otherwise.
-//
-void diagnoseIfNeeded(slang::IBlob* diagnosticsBlob)
+int HelloWorldExample::initVulkanInstanceAndDevice()
 {
-    if( diagnosticsBlob != nullptr )
+    if (initializeVulkanDevice(vkAPI) != 0)
     {
-        printf("%s", (const char*) diagnosticsBlob->getBufferPointer());
+        printf("Failed to load Vulkan.\n");
+        return -1;
     }
+
+    VkCommandPoolCreateInfo poolCreateInfo = {};
+    poolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+    poolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
+    poolCreateInfo.queueFamilyIndex = vkAPI.queueFamilyIndex;
+    RETURN_ON_FAIL(vkAPI.vkCreateCommandPool(
+        vkAPI.device, &poolCreateInfo, nullptr, &commandPool));
+
+    vkAPI.vkGetDeviceQueue(vkAPI.device, vkAPI.queueFamilyIndex, 0, &queue);
+    return 0;
 }
 
-// The main task an application cares about is compiling shader code
-// from souce (if needed) and loading it through the chosen graphics API.
-//
-// In addition, an application may want to receive reflection information
-// about the program, which is what a `slang::ProgramLayout` provides.
-//
-gfx::Result loadShaderProgram(
-    gfx::IDevice*         device,
-    gfx::IShaderProgram**   outProgram)
+int HelloWorldExample::createComputePipelineFromShader()
 {
-    // We need to obatin a compilation session (`slang::ISession`) that will provide
-    // a scope to all the compilation and loading of code we do.
-    //
-    // Our example application uses the `gfx` graphics API abstraction layer, which already
-    // creates a Slang compilation session for us, so we just grab and use it here.
-    ComPtr<slang::ISession> slangSession;
-    slangSession = device->getSlangSession();
+    // First we need to create slang global session with work with the Slang API.
+    ComPtr<slang::IGlobalSession> slangGlobalSession;
+    RETURN_ON_FAIL(slang::createGlobalSession(slangGlobalSession.writeRef()));
+
+    // Next we create a compilation session to generate SPIRV code from Slang source.
+    slang::SessionDesc sessionDesc = {};
+    slang::TargetDesc targetDesc = {};
+    targetDesc.format = SLANG_SPIRV;
+    targetDesc.profile = slangGlobalSession->findProfile("glsl440");
+    sessionDesc.targetCount = 1;
+    sessionDesc.targets = &targetDesc;
+
+    ComPtr<slang::ISession> session;
+    RETURN_ON_FAIL(slangGlobalSession->createSession(sessionDesc, session.writeRef()));
 
-    // We can now start loading code into the slang session.
+    // Once the session has been obtained, we can start loading code into it.
     //
     // The simplest way to load code is by calling `loadModule` with the name of a Slang
-    // module. A call to `loadModule("MyStuff")` will behave more or less as if you
+    // module. A call to `loadModule("hello-world")` will behave more or less as if you
     // wrote:
     //
-    //      import MyStuff;
+    //      import hello_world;
     //
     // In a Slang shader file. The compiler will use its search paths to try to locate
-    // `MyModule.slang`, then compile and load that file. If a matching module had
+    // `hello-world.slang`, then compile and load that file. If a matching module had
     // already been loaded previously, that would be used directly.
-    //
-    ComPtr<slang::IBlob> diagnosticsBlob;
-    slang::IModule* module = slangSession->loadModule("shaders", diagnosticsBlob.writeRef());
-    diagnoseIfNeeded(diagnosticsBlob);
-    if(!module)
-        return SLANG_FAIL;
+    slang::IModule* slangModule = nullptr;
+    {
+        ComPtr<slang::IBlob> diagnosticBlob;
+        slangModule = session->loadModule("hello-world", diagnosticBlob.writeRef());
+        diagnoseIfNeeded(diagnosticBlob);
+        if (!slangModule)
+            return -1;
+    }
 
-    // Loading the `shaders` module will compile and check all the shader code in it,
+    // Loading the `hello-world` module will compile and check all the shader code in it,
     // including the shader entry points we want to use. Now that the module is loaded
     // we can look up those entry points by name.
     //
     // Note: If you are using this `loadModule` approach to load your shader code it is
     // important to tag your entry point functions with the `[shader("...")]` attribute
-    // (e.g., `[shader("vertex")] void vertexMain(...)`). Without that information there
+    // (e.g., `[shader("compute")] void computeMain(...)`). Without that information there
     // is no umambiguous way for the compiler to know which functions represent entry
     // points when it parses your code via `loadModule()`.
     //
-    ComPtr<slang::IEntryPoint> vertexEntryPoint;
-    SLANG_RETURN_ON_FAIL(module->findEntryPointByName("vertexMain", vertexEntryPoint.writeRef()));
-    //
-    ComPtr<slang::IEntryPoint> fragmentEntryPoint;
-    SLANG_RETURN_ON_FAIL(module->findEntryPointByName("fragmentMain", fragmentEntryPoint.writeRef()));
+    ComPtr<slang::IEntryPoint> entryPoint;
+    slangModule->findEntryPointByName("computeMain", entryPoint.writeRef());
 
     // At this point we have a few different Slang API objects that represent
     // pieces of our code: `module`, `vertexEntryPoint`, and `fragmentEntryPoint`.
@@ -147,18 +167,8 @@ gfx::Result loadShaderProgram(
     // and entry points.
     //
     Slang::List<slang::IComponentType*> componentTypes;
-    componentTypes.add(module);
-
-    // Later on when we go to extract compiled kernel code for our vertex
-    // and fragment shaders, we will need to make use of their order within
-    // the composition, so we will record the relative ordering of the entry
-    // points here as we add them.
-    int entryPointCount = 0;
-    int vertexEntryPointIndex = entryPointCount++;
-    componentTypes.add(vertexEntryPoint);
-
-    int fragmentEntryPointIndex = entryPointCount++;
-    componentTypes.add(fragmentEntryPoint);
+    componentTypes.add(slangModule);
+    componentTypes.add(entryPoint);
 
     // Actually creating the composite component type is a single operation
     // on the Slang session, but the operation could potentially fail if
@@ -166,233 +176,310 @@ gfx::Result loadShaderProgram(
     // combine multiple copies of the same module), so we need to deal
     // with the possibility of diagnostic output.
     //
-    ComPtr<slang::IComponentType> linkedProgram;
-    SlangResult result = slangSession->createCompositeComponentType(
-        componentTypes.getBuffer(),
-        componentTypes.getCount(),
-        linkedProgram.writeRef(),
-        diagnosticsBlob.writeRef());
-    diagnoseIfNeeded(diagnosticsBlob);
-    SLANG_RETURN_ON_FAIL(result);
-
-    // Once we've described the particular composition of entry points
-    // that we want to compile, we defer to the graphics API layer
-    // to extract compiled kernel code and load it into the API-specific
-    // program representation.
-    //
-    gfx::IShaderProgram::Desc programDesc = {};
-    programDesc.pipelineType = gfx::PipelineType::Graphics;
-    programDesc.slangProgram = linkedProgram;
-    SLANG_RETURN_ON_FAIL(device->createProgram(programDesc, outProgram));
-
-    return SLANG_OK;
-}
+    ComPtr<slang::IComponentType> composedProgram;
+    {
+        ComPtr<slang::IBlob> diagnosticsBlob;
+        SlangResult result = session->createCompositeComponentType(
+            componentTypes.getBuffer(),
+            componentTypes.getCount(),
+            composedProgram.writeRef(),
+            diagnosticsBlob.writeRef());
+        diagnoseIfNeeded(diagnosticsBlob);
+        RETURN_ON_FAIL(result);
+    }
 
-//
-// The above function shows the core of what is required to use the
-// Slang API as a simple compiler (e.g., a drop-in replacement for
-// fxc or dxc).
-//
-// The rest of this file implements an extremely simple rendering application
-// that will execute the vertex/fragment shaders loaded with the function
-// we have just defined.
-//
+    // Now we can call `composedProgram->getEntryPointCode()` to retrieve the
+    // compiled SPIRV code that we will use to create a vulkan compute pipeline.
+    // This will trigger the final Slang compilation and spirv code generation.
+    ComPtr<slang::IBlob> spirvCode;
+    {
+        ComPtr<slang::IBlob> diagnosticsBlob;
+        SlangResult result = composedProgram->getEntryPointCode(
+            0, 0, spirvCode.writeRef(), diagnosticsBlob.writeRef());
+        diagnoseIfNeeded(diagnosticsBlob);
+        RETURN_ON_FAIL(result);
+    }
 
-// We will define global variables for the various platform and
-// graphics API objects that our application needs:
-//
-// As a reminder, *none* of these are Slang API objects. All
-// of them come from the utility library we are using to simplify
-// building an example program.
-//
-ComPtr<gfx::IPipelineState> gPipelineState;
-ComPtr<gfx::IBufferResource> gVertexBuffer;
+    // The following steps are all Vulkan API calls to create a pipeline.
+
+    // First we need to create a descriptor set layout and a pipeline layout.
+    // In this example, the pipeline layout is simple: we have a single descriptor
+    // set with three buffer descriptors for our input/output storage buffers.
+    // General applications typically has much more complicated pipeline layouts,
+    // and should consider using Slang's reflection API to learn about the shader
+    // parameter layout of a shader program. However, Slang's reflection API is
+    // out of scope of this example.
+    VkDescriptorSetLayoutCreateInfo descSetLayoutCreateInfo = {
+        VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
+    descSetLayoutCreateInfo.bindingCount = 3;
+    VkDescriptorSetLayoutBinding bindings[3];
+    for (int i = 0; i < 3; i++)
+    {
+        auto& binding = bindings[i];
+        binding.binding = i;
+        binding.descriptorCount = 1;
+        binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+        binding.stageFlags = VK_SHADER_STAGE_ALL;
+        binding.pImmutableSamplers = nullptr;
+    }
+    descSetLayoutCreateInfo.pBindings = bindings;
+    RETURN_ON_FAIL(vkAPI.vkCreateDescriptorSetLayout(
+        vkAPI.device, &descSetLayoutCreateInfo, nullptr, &descriptorSetLayout));
+    VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
+        VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
+    pipelineLayoutCreateInfo.setLayoutCount = 1;
+    pipelineLayoutCreateInfo.pSetLayouts = &descriptorSetLayout;
+    RETURN_ON_FAIL(vkAPI.vkCreatePipelineLayout(
+        vkAPI.device, &pipelineLayoutCreateInfo, nullptr, &pipelineLayout));
+
+    // Next we create a shader module from the compiled SPIRV code.
+    VkShaderModuleCreateInfo shaderCreateInfo = {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
+    shaderCreateInfo.codeSize = spirvCode->getBufferSize();
+    shaderCreateInfo.pCode = static_cast<const uint32_t*>(spirvCode->getBufferPointer());
+    VkShaderModule vkShaderModule;
+    RETURN_ON_FAIL(
+        vkAPI.vkCreateShaderModule(vkAPI.device, &shaderCreateInfo, nullptr, &vkShaderModule));
+
+    // Now we have all we need to create a compute pipeline.
+    VkComputePipelineCreateInfo pipelineCreateInfo = {
+        VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
+    pipelineCreateInfo.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+    pipelineCreateInfo.stage.module = vkShaderModule;
+    pipelineCreateInfo.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
+    pipelineCreateInfo.stage.pName = "main";
+    pipelineCreateInfo.layout = pipelineLayout;
+    RETURN_ON_FAIL(vkAPI.vkCreateComputePipelines(
+        vkAPI.device, VK_NULL_HANDLE, 1, &pipelineCreateInfo, nullptr, &pipeline));
+
+    // We can destroy shader module now since it will no longer be used.
+    vkAPI.vkDestroyShaderModule(vkAPI.device, vkShaderModule, nullptr);
+
+    return 0;
+}
 
-// Now that we've covered the function that actually loads and
-// compiles our Slang shade code, we can go through the rest
-// of the application code without as much commentary.
-//
-Slang::Result initialize()
+int HelloWorldExample::createInOutBuffers()
 {
-    // Create a window for our application to render into.
-    //
-    initializeBase("hello-world", 1024, 768);
-
-    // We will create objects needed to configur the "input assembler"
-    // (IA) stage of the D3D pipeline.
-    //
-    // First, we create an input layout:
-    //
-    InputElementDesc inputElements[] = {
-        { "POSITION", 0, Format::RGB_Float32, offsetof(Vertex, position) },
-        { "COLOR",    0, Format::RGB_Float32, offsetof(Vertex, color) },
-    };
-    auto inputLayout = gDevice->createInputLayout(
-        &inputElements[0],
-        2);
-    if(!inputLayout) return SLANG_FAIL;
-
-    // Next we allocate a vertex buffer for our pre-initialized
-    // vertex data.
-    //
-    IBufferResource::Desc vertexBufferDesc;
-    vertexBufferDesc.init(kVertexCount * sizeof(Vertex));
-    vertexBufferDesc.setDefaults(IResource::Usage::VertexBuffer);
-    gVertexBuffer = gDevice->createBufferResource(
-        IResource::Usage::VertexBuffer,
-        vertexBufferDesc,
-        &kVertexData[0]);
-    if(!gVertexBuffer) return SLANG_FAIL;
-
-    // Now we will use our `loadShaderProgram` function to load
-    // the code from `shaders.slang` into the graphics API.
-    //
-    ComPtr<IShaderProgram> shaderProgram;
-    SLANG_RETURN_ON_FAIL(loadShaderProgram(gDevice, shaderProgram.writeRef()));
-
-    // Following the D3D12/Vulkan style of API, we need a pipeline state object
-    // (PSO) to encapsulate the configuration of the overall graphics pipeline.
-    //
-    GraphicsPipelineStateDesc desc;
-    desc.inputLayout = inputLayout;
-    desc.program = shaderProgram;
-    desc.framebufferLayout = gFramebufferLayout;
-    auto pipelineState = gDevice->createGraphicsPipelineState(desc);
-    if (!pipelineState)
-        return SLANG_FAIL;
+    // Create input and output buffers that resides in device-local memory.
+    for (int i = 0; i < 3; i++)
+    {
+        VkBufferCreateInfo bufferCreateInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
+        bufferCreateInfo.size = bufferSize;
+        bufferCreateInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+                                 VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+        RETURN_ON_FAIL(
+            vkAPI.vkCreateBuffer(vkAPI.device, &bufferCreateInfo, nullptr, &inOutBuffers[i]));
+        VkMemoryRequirements memoryReqs = {};
+        vkAPI.vkGetBufferMemoryRequirements(vkAPI.device, inOutBuffers[i], &memoryReqs);
+
+        int memoryTypeIndex = vkAPI.findMemoryTypeIndex(
+            memoryReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+        assert(memoryTypeIndex >= 0);
+
+        VkMemoryPropertyFlags actualMemoryProperites =
+            vkAPI.deviceMemoryProperties.memoryTypes[memoryTypeIndex].propertyFlags;
+
+        VkMemoryAllocateInfo allocateInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
+        allocateInfo.allocationSize = memoryReqs.size;
+        allocateInfo.memoryTypeIndex = memoryTypeIndex;
+        RETURN_ON_FAIL(
+            vkAPI.vkAllocateMemory(vkAPI.device, &allocateInfo, nullptr, &bufferMemories[i]));
+        RETURN_ON_FAIL(
+            vkAPI.vkBindBufferMemory(vkAPI.device, inOutBuffers[i], bufferMemories[i], 0));
+    }
 
-    gPipelineState = pipelineState;
+    // Create the device memory and buffer object used for reading/writing
+    // data to/from the device local buffers.
+    {
+        VkBufferCreateInfo bufferCreateInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
+        bufferCreateInfo.size = bufferSize;
+        bufferCreateInfo.usage =
+            VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+        RETURN_ON_FAIL(
+            vkAPI.vkCreateBuffer(vkAPI.device, &bufferCreateInfo, nullptr, &stagingBuffer));
+        VkMemoryRequirements memoryReqs = {};
+        vkAPI.vkGetBufferMemoryRequirements(vkAPI.device, stagingBuffer, &memoryReqs);
+
+        int memoryTypeIndex = vkAPI.findMemoryTypeIndex(
+            memoryReqs.memoryTypeBits,
+            VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
+        assert(memoryTypeIndex >= 0);
+
+        VkMemoryPropertyFlags actualMemoryProperites =
+            vkAPI.deviceMemoryProperties.memoryTypes[memoryTypeIndex].propertyFlags;
+
+        VkMemoryAllocateInfo allocateInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
+        allocateInfo.allocationSize = memoryReqs.size;
+        allocateInfo.memoryTypeIndex = memoryTypeIndex;
+        RETURN_ON_FAIL(
+            vkAPI.vkAllocateMemory(vkAPI.device, &allocateInfo, nullptr, &stagingMemory));
+        RETURN_ON_FAIL(vkAPI.vkBindBufferMemory(vkAPI.device, stagingBuffer, stagingMemory, 0));
+    }
 
-    return SLANG_OK;
+    // Map staging buffer and writes in the initial input content.
+    float* stagingBufferData = nullptr;
+    vkAPI.vkMapMemory(vkAPI.device, stagingMemory, 0, bufferSize, 0, (void**)&stagingBufferData);
+    if (!stagingBufferData)
+        return -1;
+    for (size_t i = 0; i < inputElementCount; i++)
+        stagingBufferData[i] = static_cast<float>(i);
+    vkAPI.vkUnmapMemory(vkAPI.device, stagingMemory);
+
+    // Create a temporary command buffer for recording commands that writes initial
+    // data into the input buffers.
+    VkCommandBuffer uploadCommandBuffer;
+    VkCommandBufferAllocateInfo commandBufferAllocInfo = {
+        VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
+    commandBufferAllocInfo.commandBufferCount = 1;
+    commandBufferAllocInfo.commandPool = commandPool;
+    commandBufferAllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+    RETURN_ON_FAIL(vkAPI.vkAllocateCommandBuffers(vkAPI.device, &commandBufferAllocInfo, &uploadCommandBuffer));
+
+    VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
+    vkAPI.vkBeginCommandBuffer(uploadCommandBuffer, &beginInfo);
+    VkBufferCopy bufferCopy = {};
+    bufferCopy.size = bufferSize;
+    vkAPI.vkCmdCopyBuffer(uploadCommandBuffer, stagingBuffer, inOutBuffers[0], 1, &bufferCopy);
+    vkAPI.vkCmdCopyBuffer(uploadCommandBuffer, stagingBuffer, inOutBuffers[1], 1, &bufferCopy);
+    vkAPI.vkEndCommandBuffer(uploadCommandBuffer);
+    VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
+    submitInfo.commandBufferCount = 1;
+    submitInfo.pCommandBuffers = &uploadCommandBuffer;
+    vkAPI.vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE);
+    vkAPI.vkQueueWaitIdle(queue);
+    vkAPI.vkFreeCommandBuffers(vkAPI.device, commandPool, 1, &uploadCommandBuffer);
+    return 0;
 }
 
-// With the initialization out of the way, we can now turn our attention
-// to the per-frame rendering logic. As with the initialization, there is
-// nothing really Slang-specific here, so the commentary doesn't need
-// to be very detailed.
-//
-virtual void renderFrame(int frameBufferIndex) override
+int HelloWorldExample::dispatchCompute()
 {
-    ComPtr<ICommandBuffer> commandBuffer = gTransientHeaps[frameBufferIndex]->createCommandBuffer();
-    auto renderEncoder = commandBuffer->encodeRenderCommands(gRenderPass, gFramebuffers[frameBufferIndex]);
-
-    gfx::Viewport viewport = {};
-    viewport.maxZ = 1.0f;
-    viewport.extentX = (float)windowWidth;
-    viewport.extentY = (float)windowHeight;
-    renderEncoder->setViewportAndScissor(viewport);
-
-    // In order to bind shader parameters to the pipeline, we need
-    // to know how those parameters were assigned to locations/bindings/registers
-    // for the target graphics API.
-    //
-    // The Slang compiler assigns locations to parameters in a deterministic
-    // fashion, so it is possible for a programmer to hard-code locations
-    // into their application code that will match up with their shaders.
-    //
-    // Hard-coding of locations can become intractable as an application needs
-    // to support more different target platforms and graphics APIs, as well
-    // as more shaders with different specialized variants.
-    //
-    // Rather than rely on hard-coded locations, our examples will make use of
-    // reflection information provided by the Slang compiler (see `programLayout`
-    // above), and our example graphics API layer will translate that reflection
-    // information into a layout for a "root shader object."
-    //
-    // The root object will store values/bindings for all of the parameters in
-    // the `IShaderProgram` used to create the pipeline state. At a conceptual
-    // level we can think of `rootObject` as representing the "global scope" of
-    // the shader program that was loaded; it has entries for each global shader
-    // parameter that was declared.
-    //
-    // Readers who are familiar with D3D12 or Vulkan might think of this root
-    // layout as being similar in spirit to a "root signature" or "pipeline layout."
-    //
-    // We start parameter binding by binding the pipeline state in command encoder.
-    // This method will return a transient root shader object for us to write our
-    // shader parameters into.
-    //
-    auto rootObject = renderEncoder->bindPipeline(gPipelineState);
-
-    // We will update the model-view-projection matrix that is passed
-    // into the shader code via the `Uniforms` buffer on a per-frame
-    // basis, even though the data that is loaded does not change
-    // per-frame (we always use an identity matrix).
-    //
-    auto deviceInfo = gDevice->getDeviceInfo();
-
-    // We know that `rootObject` is a root shader object created
-    // from our program, and that it is set up to hold values for
-    // all the parameter of that program. In order to actually
-    // set values, we need to be able to look up the location
-    // of speciic parameter that we want to set.
-    //
-    // Our example graphics API layer supports this operation
-    // with the idea of a *shader cursor* which can be thought
-    // of as pointing "into" a particular shader object at
-    // some location/offset. This design choice abstracts over
-    // the many ways that different platforms and APIs represent
-    // the necessary offset information.
-    //
-    // We construct an initial shader cursor that points at the
-    // entire shader program. You can think of this as akin to
-    // a diretory path of `/` for the root directory in a file
-    // system.
-    //
-    ShaderCursor rootCursor(rootObject);
-    //
-    // Next, we use a convenience overload of `operator[]` to
-    // navigate from the root cursor down to the parameter we
-    // want to set.
-    //
-    // The operation `rootCursor["Uniforms"]` looks up the
-    // offset/location of the global shader parameter `Uniforms`
-    // (which is a uniform/constant buffer), and the subsequent
-    // `["modelViewProjection"]` step navigates from there down
-    // to the member named `modelViewProjection` in that buffer.
-    //
-    // Once we have formed a cursor that "points" at the
-    // model-view projection matrix, we can set its data directly.
-    //
-    rootCursor["Uniforms"]["modelViewProjection"].setData(
-        deviceInfo.identityProjectionMatrix, sizeof(float) * 16);
-    //
-    // Some readers might be concerned about the performance o
-    // the above operations because of the use of strings. For
-    // those readers, here are two things to note:
-    //
-    // * While these `operator[]` steps do need to perform string
-    //   comparisons, they do *not* make copies of the strings or
-    //   perform any heap allocation.
-    //
-    // * There are other overloads of `operator[]` that use the
-    //   *index* of a parameter/field instead of its name, and those
-    //   operations have fixed/constant overhead and perform no
-    //   string comparisons. The indices used are independent of
-    //   the target platform and graphics API, and can thus be
-    //   hard-coded even in cross-platform code.
-    //
-
-    // We also need to set up a few pieces of fixed-function pipeline
-    // state that are not bound by the pipeline state above.
-    //
-    renderEncoder->setVertexBuffer(0, gVertexBuffer, sizeof(Vertex));
-    renderEncoder->setPrimitiveTopology(PrimitiveTopology::TriangleList);
-
-    // Finally, we are ready to issue a draw call for a single triangle.
-    //
-    renderEncoder->draw(3);
-    renderEncoder->endEncoding();
-    commandBuffer->close();
-    gQueue->executeCommandBuffer(commandBuffer);
-
-    // With that, we are done drawing for one frame, and ready for the next.
-    //
-    gSwapchain->present();
+    // Create a descriptor pool.
+    VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {
+        VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
+    VkDescriptorPoolSize poolSizes[] = {
+        VkDescriptorPoolSize{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16}};
+    descriptorPoolCreateInfo.maxSets = 4;
+    descriptorPoolCreateInfo.poolSizeCount = sizeof(poolSizes) / sizeof(VkDescriptorPoolSize);
+    descriptorPoolCreateInfo.pPoolSizes = poolSizes;
+    descriptorPoolCreateInfo.flags = 0;
+    VkDescriptorPool descriptorPool = VK_NULL_HANDLE;
+    RETURN_ON_FAIL(vkAPI.vkCreateDescriptorPool(
+        vkAPI.device, &descriptorPoolCreateInfo, nullptr, &descriptorPool));
+
+    // Allocate descriptor set.
+    VkDescriptorSetAllocateInfo descSetAllocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
+    descSetAllocInfo.descriptorPool = descriptorPool;
+    descSetAllocInfo.descriptorSetCount = 1;
+    descSetAllocInfo.pSetLayouts = &descriptorSetLayout;
+    VkDescriptorSet descriptorSet = VK_NULL_HANDLE;
+    RETURN_ON_FAIL(vkAPI.vkAllocateDescriptorSets(vkAPI.device, &descSetAllocInfo, &descriptorSet));
+
+    // Write descriptor set.
+    VkWriteDescriptorSet descriptorSetWrites[3] = {};
+    VkDescriptorBufferInfo bufferInfo[3];
+    for (int i = 0; i < 3; i++)
+    {
+        bufferInfo[i].buffer = inOutBuffers[i];
+        bufferInfo[i].offset = 0;
+        bufferInfo[i].range = bufferSize;
+
+        descriptorSetWrites[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+        descriptorSetWrites[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+        descriptorSetWrites[i].descriptorCount = 1;
+        descriptorSetWrites[i].dstBinding = i;
+        descriptorSetWrites[i].dstSet = descriptorSet;
+        descriptorSetWrites[i].pBufferInfo = &bufferInfo[i];
+    }
+    vkAPI.vkUpdateDescriptorSets(vkAPI.device, 3, descriptorSetWrites, 0, nullptr);
+
+    // Allocate command buffer and record dispatch commands.
+    VkCommandBuffer commandBuffer;
+    VkCommandBufferAllocateInfo commandBufferAllocInfo = {
+        VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
+    commandBufferAllocInfo.commandBufferCount = 1;
+    commandBufferAllocInfo.commandPool = commandPool;
+    commandBufferAllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+    RETURN_ON_FAIL(
+        vkAPI.vkAllocateCommandBuffers(vkAPI.device, &commandBufferAllocInfo, &commandBuffer));
+    VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
+    vkAPI.vkBeginCommandBuffer(commandBuffer, &beginInfo);
+    vkAPI.vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
+    vkAPI.vkCmdBindDescriptorSets(
+        commandBuffer,
+        VK_PIPELINE_BIND_POINT_COMPUTE,
+        pipelineLayout,
+        0,
+        1,
+        &descriptorSet,
+        0,
+        nullptr);
+    vkAPI.vkCmdDispatch(commandBuffer, (uint32_t)inputElementCount, 1, 1);
+    vkAPI.vkEndCommandBuffer(commandBuffer);
+
+    // Submit command buffer and wait.
+    VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
+    submitInfo.commandBufferCount = 1;
+    submitInfo.pCommandBuffers = &commandBuffer;
+    vkAPI.vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE);
+    vkAPI.vkQueueWaitIdle(queue);
+    vkAPI.vkFreeCommandBuffers(vkAPI.device, commandPool, 1, &commandBuffer);
+
+    // Clean up.
+    vkAPI.vkDestroyDescriptorPool(vkAPI.device, descriptorPool, nullptr);
+    return 0;
 }
 
-};
+int HelloWorldExample::printComputeResults()
+{
+    // Allocate command buffer to read back data.
+    VkCommandBuffer commandBuffer;
+    VkCommandBufferAllocateInfo commandBufferAllocInfo = {
+        VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
+    commandBufferAllocInfo.commandBufferCount = 1;
+    commandBufferAllocInfo.commandPool = commandPool;
+    commandBufferAllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+    RETURN_ON_FAIL(
+        vkAPI.vkAllocateCommandBuffers(vkAPI.device, &commandBufferAllocInfo, &commandBuffer));
+
+    // Record commands to copy output buffer into staging buffer.
+    VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
+    vkAPI.vkBeginCommandBuffer(commandBuffer, &beginInfo);
+    VkBufferCopy bufferCopy = {};
+    bufferCopy.size = bufferSize;
+    vkAPI.vkCmdCopyBuffer(commandBuffer, inOutBuffers[2], stagingBuffer, 1, &bufferCopy);
+    vkAPI.vkEndCommandBuffer(commandBuffer);
+
+    // Execute command buffer and wait.
+    VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
+    submitInfo.commandBufferCount = 1;
+    submitInfo.pCommandBuffers = &commandBuffer;
+    vkAPI.vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE);
+    vkAPI.vkQueueWaitIdle(queue);
+    vkAPI.vkFreeCommandBuffers(vkAPI.device, commandPool, 1, &commandBuffer);
+
+    // Map and read back staging buffer.
+    float* stagingBufferData = nullptr;
+    vkAPI.vkMapMemory(vkAPI.device, stagingMemory, 0, bufferSize, 0, (void**)&stagingBufferData);
+    if (!stagingBufferData)
+        return -1;
+    for (size_t i = 0; i < inputElementCount; i++)
+    {
+        printf("%f\n", stagingBufferData[i]);
+    }
+    return 0;
+}
 
-// This macro instantiates an appropriate main function to
-// run the application defined above.
-PLATFORM_UI_MAIN(innerMain<HelloWorld>)
+HelloWorldExample::~HelloWorldExample()
+{
+    vkAPI.vkDestroyPipeline(vkAPI.device, pipeline, nullptr);
+    for (int i = 0; i < 3; i++)
+    {
+        vkAPI.vkDestroyBuffer(vkAPI.device, inOutBuffers[i], nullptr);
+        vkAPI.vkFreeMemory(vkAPI.device, bufferMemories[i], nullptr);
+    }
+    vkAPI.vkDestroyBuffer(vkAPI.device, stagingBuffer, nullptr);
+    vkAPI.vkFreeMemory(vkAPI.device, stagingMemory, nullptr);
+    vkAPI.vkDestroyPipelineLayout(vkAPI.device, pipelineLayout, nullptr);
+    vkAPI.vkDestroyDescriptorSetLayout(vkAPI.device, descriptorSetLayout, nullptr);
+    vkAPI.vkDestroyCommandPool(vkAPI.device, commandPool, nullptr);
+}
diff --git a/examples/hello-world/vulkan-api.cpp b/examples/hello-world/vulkan-api.cpp
new file mode 100644
index 000000000..529ca7196
--- /dev/null
+++ b/examples/hello-world/vulkan-api.cpp
@@ -0,0 +1,228 @@
+#include "vulkan-api.h"
+#include "slang.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <vector>
+
+#if SLANG_WINDOWS_FAMILY
+#    include <windows.h>
+#else
+#    include <dlfcn.h>
+#endif
+
+#if _DEBUG
+#define ENABLE_VALIDATION_LAYER 1
+#endif
+
+VKAPI_ATTR VkBool32 VKAPI_CALL debugMessageCallback(
+    VkDebugReportFlagsEXT /*flags*/,
+    VkDebugReportObjectTypeEXT /*objType*/,
+    uint64_t /*srcObject*/,
+    size_t /*location*/,
+    int32_t /*msgCode*/,
+    const char* pLayerPrefix,
+    const char* pMsg,
+    void* /*pUserData*/)
+{
+    printf("[%s]: %s\n", pLayerPrefix, pMsg);
+    return 1;
+}
+
+int initializeVulkanDevice(VulkanAPI& api)
+{
+    // Load vulkan library.
+    const char* dynamicLibraryName = "Unknown";
+
+#if SLANG_WINDOWS_FAMILY
+    dynamicLibraryName = "vulkan-1.dll";
+    HMODULE module = ::LoadLibraryA(dynamicLibraryName);
+    api.vulkanLibraryHandle = (void*)module;
+#define VK_API_GET_GLOBAL_PROC(x) api.x = (PFN_##x)GetProcAddress(module, #x);
+#else
+    dynamicLibraryName = "libvulkan.so.1";
+    api.vulkanLibraryHandle = dlopen(dynamicLibraryName, RTLD_NOW);
+#define VK_API_GET_GLOBAL_PROC(x) api.x = (PFN_##x)dlsym(api.vulkanLibraryHandle, #x);
+#endif
+
+    // Initialize all the global functions.
+    VK_API_ALL_GLOBAL_PROCS(VK_API_GET_GLOBAL_PROC)
+    if (!api.vkCreateInstance)
+        return -1;
+
+    // Create Vulkan Instance.
+    VkApplicationInfo applicationInfo = {VK_STRUCTURE_TYPE_APPLICATION_INFO};
+    applicationInfo.pApplicationName = "slang-hello-world";
+    applicationInfo.pEngineName = "slang-hello-world";
+    applicationInfo.apiVersion = VK_API_VERSION_1_0;
+    applicationInfo.engineVersion = 1;
+    applicationInfo.applicationVersion = 1;
+    const char* instanceExtensions[] = {
+        VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
+        VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
+    };
+    VkInstanceCreateInfo instanceCreateInfo = {VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO};
+    instanceCreateInfo.pApplicationInfo = &applicationInfo;
+    instanceCreateInfo.enabledExtensionCount = SLANG_COUNT_OF(instanceExtensions);
+    instanceCreateInfo.ppEnabledExtensionNames = &instanceExtensions[0];
+    std::vector<const char*> layers;
+#ifdef ENABLE_VALIDATION_LAYER
+    layers.push_back("VK_LAYER_KHRONOS_validation");
+#endif
+    if (layers.size())
+    {
+        instanceCreateInfo.ppEnabledLayerNames = &layers[0];
+        instanceCreateInfo.enabledLayerCount = (uint32_t)layers.size();
+    }
+    if (api.vkCreateInstance(&instanceCreateInfo, nullptr, &api.instance) != 0)
+        return -1;
+
+    // Load instance functions.
+    api.initInstanceProcs();
+
+    // Create debug report callback.
+    if (api.vkCreateDebugReportCallbackEXT)
+    {
+        VkDebugReportFlagsEXT debugFlags =
+            VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT;
+
+        VkDebugReportCallbackCreateInfoEXT debugCreateInfo = {
+            VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT};
+        debugCreateInfo.pfnCallback = &debugMessageCallback;
+        debugCreateInfo.pUserData = nullptr;
+        debugCreateInfo.flags = debugFlags;
+
+        RETURN_ON_FAIL(api.vkCreateDebugReportCallbackEXT(
+            api.instance, &debugCreateInfo, nullptr, &api.debugReportCallback));
+    }
+
+    // Enumerate physical devices.
+    uint32_t numPhysicalDevices = 0;
+    RETURN_ON_FAIL(
+        api.vkEnumeratePhysicalDevices(api.instance, &numPhysicalDevices, nullptr));
+    std::vector<VkPhysicalDevice> physicalDevices;
+    physicalDevices.resize(numPhysicalDevices);
+    RETURN_ON_FAIL(api.vkEnumeratePhysicalDevices(
+        api.instance, &numPhysicalDevices, &physicalDevices[0]));
+
+    // We will use device 0.
+    api.initPhysicalDevice(physicalDevices[0]);
+
+    VkDeviceCreateInfo deviceCreateInfo = {VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO};
+    deviceCreateInfo.queueCreateInfoCount = 1;
+    deviceCreateInfo.pEnabledFeatures = &api.deviceFeatures;
+
+    // Find proper queue family index.
+    uint32_t numQueueFamilies = 0;
+    api.vkGetPhysicalDeviceQueueFamilyProperties(api.physicalDevice, &numQueueFamilies, nullptr);
+
+    std::vector<VkQueueFamilyProperties> queueFamilies;
+    queueFamilies.resize(numQueueFamilies);
+    api.vkGetPhysicalDeviceQueueFamilyProperties(
+        api.physicalDevice, &numQueueFamilies, &queueFamilies[0]);
+
+    // Find a queue that can service our needs.
+    auto requiredQueueFlags = VK_QUEUE_COMPUTE_BIT;
+    for (int i = 0; i < int(numQueueFamilies); ++i)
+    {
+        if ((queueFamilies[i].queueFlags & requiredQueueFlags) == requiredQueueFlags)
+        {
+            api.queueFamilyIndex = i;
+            break;
+        }
+    }
+    if (api.queueFamilyIndex == -1)
+        return -1;
+
+    VkDeviceQueueCreateInfo queueCreateInfo = {VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO};
+    float queuePriority = 0.0f;
+    queueCreateInfo.queueFamilyIndex = api.queueFamilyIndex;
+    queueCreateInfo.queueCount = 1;
+    queueCreateInfo.pQueuePriorities = &queuePriority;
+    deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
+    deviceCreateInfo.enabledExtensionCount = 0;
+    deviceCreateInfo.ppEnabledExtensionNames = nullptr;
+    RETURN_ON_FAIL(api.vkCreateDevice(api.physicalDevice, &deviceCreateInfo, nullptr, &api.device));
+
+    // Load device functions.
+    api.initDeviceProcs();
+
+    return 0;
+}
+
+int VulkanAPI::initInstanceProcs()
+{
+    assert(instance && vkGetInstanceProcAddr != nullptr);
+
+#define VK_API_GET_INSTANCE_PROC(x) x = (PFN_##x)vkGetInstanceProcAddr(instance, #x);
+
+    VK_API_ALL_INSTANCE_PROCS(VK_API_GET_INSTANCE_PROC)
+    // Get optional
+    VK_API_INSTANCE_PROCS_OPT(VK_API_GET_INSTANCE_PROC)
+
+#undef VK_API_GET_INSTANCE_PROC
+
+    return 0;
+}
+
+int VulkanAPI::initPhysicalDevice(VkPhysicalDevice inPhysicalDevice)
+{
+    assert(physicalDevice == VK_NULL_HANDLE);
+    physicalDevice = inPhysicalDevice;
+
+    vkGetPhysicalDeviceProperties(physicalDevice, &deviceProperties);
+    vkGetPhysicalDeviceFeatures(physicalDevice, &deviceFeatures);
+    vkGetPhysicalDeviceMemoryProperties(physicalDevice, &deviceMemoryProperties);
+
+    return 0;
+}
+
+int VulkanAPI::initDeviceProcs()
+{
+    assert(instance && device && vkGetDeviceProcAddr != nullptr);
+
+#define VK_API_GET_DEVICE_PROC(x) x = (PFN_##x)vkGetDeviceProcAddr(device, #x);
+    VK_API_DEVICE_PROCS(VK_API_GET_DEVICE_PROC)
+#undef VK_API_GET_DEVICE_PROC
+
+    return 0;
+}
+
+int VulkanAPI::findMemoryTypeIndex(uint32_t typeBits, VkMemoryPropertyFlags properties)
+{
+    assert(typeBits);
+
+    const int numMemoryTypes = int(deviceMemoryProperties.memoryTypeCount);
+
+    // bit holds current test bit against typeBits. Ie bit == 1 << typeBits
+
+    uint32_t bit = 1;
+    for (int i = 0; i < numMemoryTypes; ++i, bit += bit)
+    {
+        auto const& memoryType = deviceMemoryProperties.memoryTypes[i];
+        if ((typeBits & bit) && (memoryType.propertyFlags & properties) == properties)
+        {
+            return i;
+        }
+    }
+
+    // assert(!"failed to find a usable memory type");
+    return -1;
+}
+
+VulkanAPI::~VulkanAPI()
+{
+    if (vkDestroyDevice)
+    {
+        vkDestroyDevice(device, nullptr);
+    }
+    if (vkDestroyDebugReportCallbackEXT)
+    {
+        vkDestroyDebugReportCallbackEXT(instance, debugReportCallback, nullptr);
+    }
+    if (vkDestroyInstance)
+    {
+        vkDestroyInstance(instance, nullptr);
+    }
+}
diff --git a/examples/hello-world/vulkan-api.h b/examples/hello-world/vulkan-api.h
new file mode 100644
index 000000000..17d0465a6
--- /dev/null
+++ b/examples/hello-world/vulkan-api.h
@@ -0,0 +1,132 @@
+#pragma once
+
+#define VK_NO_PROTOTYPES
+#include "vulkan/vulkan.h"
+
+// This file provides basic loading and helper functions for using
+// the Vulkan API.
+
+// The Vulkan function pointers we will use in this example.
+#define VK_API_GLOBAL_PROCS(x) \
+    x(vkGetInstanceProcAddr) \
+    x(vkCreateInstance) \
+    x(vkEnumerateInstanceLayerProperties) \
+    x(vkDestroyInstance) \
+    /* */
+
+#define VK_API_INSTANCE_PROCS_OPT(x) \
+    x(vkGetPhysicalDeviceFeatures2) \
+    x(vkGetPhysicalDeviceProperties2) \
+    x(vkCreateDebugReportCallbackEXT) \
+    x(vkDestroyDebugReportCallbackEXT) \
+    x(vkDebugReportMessageEXT) \
+    /* */
+
+#define VK_API_INSTANCE_PROCS(x) \
+    x(vkCreateDevice) \
+    x(vkDestroyDevice) \
+    x(vkEnumeratePhysicalDevices) \
+    x(vkGetPhysicalDeviceProperties) \
+    x(vkGetPhysicalDeviceFeatures) \
+    x(vkGetPhysicalDeviceMemoryProperties) \
+    x(vkGetPhysicalDeviceQueueFamilyProperties) \
+    x(vkGetPhysicalDeviceFormatProperties) \
+    x(vkGetDeviceProcAddr) \
+    /* */
+
+#define VK_API_DEVICE_PROCS(x) \
+    x(vkCreateDescriptorPool) \
+    x(vkDestroyDescriptorPool) \
+    x(vkGetDeviceQueue) \
+    x(vkQueueSubmit) \
+    x(vkQueueWaitIdle) \
+    x(vkCreateBuffer) \
+    x(vkAllocateMemory) \
+    x(vkMapMemory) \
+    x(vkUnmapMemory) \
+    x(vkCmdCopyBuffer) \
+    x(vkDestroyBuffer) \
+    x(vkFreeMemory) \
+    x(vkCreateDescriptorSetLayout) \
+    x(vkDestroyDescriptorSetLayout) \
+    x(vkAllocateDescriptorSets) \
+    x(vkUpdateDescriptorSets) \
+    x(vkCreatePipelineLayout) \
+    x(vkDestroyPipelineLayout) \
+    x(vkCreateComputePipelines) \
+    x(vkDestroyPipeline) \
+    x(vkCreateShaderModule) \
+    x(vkDestroyShaderModule) \
+    x(vkCreateCommandPool) \
+    x(vkDestroyCommandPool) \
+    \
+    x(vkGetBufferMemoryRequirements) \
+    \
+    x(vkCmdBindPipeline) \
+    x(vkCmdBindDescriptorSets) \
+    x(vkCmdDispatch) \
+    \
+    x(vkFreeCommandBuffers) \
+    x(vkAllocateCommandBuffers) \
+    x(vkBeginCommandBuffer) \
+    x(vkEndCommandBuffer) \
+    x(vkBindBufferMemory) \
+    /* */
+
+#define VK_API_ALL_GLOBAL_PROCS(x) \
+    VK_API_GLOBAL_PROCS(x)
+
+#define VK_API_ALL_INSTANCE_PROCS(x) \
+    VK_API_INSTANCE_PROCS(x) \
+
+#define VK_API_ALL_PROCS(x) \
+    VK_API_ALL_GLOBAL_PROCS(x) \
+    VK_API_ALL_INSTANCE_PROCS(x) \
+    VK_API_DEVICE_PROCS(x) \
+    VK_API_INSTANCE_PROCS_OPT(x) \
+    /* */
+
+#define VK_API_DECLARE_PROC(NAME) PFN_##NAME NAME = nullptr;
+
+struct VulkanAPI
+{
+    VkInstance instance = VK_NULL_HANDLE;
+    VkDevice device = VK_NULL_HANDLE;
+    VkPhysicalDevice physicalDevice = VK_NULL_HANDLE;
+    VkDebugReportCallbackEXT debugReportCallback = VK_NULL_HANDLE;
+    void* vulkanLibraryHandle = nullptr;
+    VkPhysicalDeviceProperties deviceProperties;
+    VkPhysicalDeviceFeatures deviceFeatures;
+    VkPhysicalDeviceMemoryProperties deviceMemoryProperties;
+    int queueFamilyIndex = -1;
+
+    VK_API_ALL_PROCS(VK_API_DECLARE_PROC)
+
+    /// Initialize the instance functions
+    int initInstanceProcs();
+
+    /// Called before initDevice
+    int initPhysicalDevice(VkPhysicalDevice physicalDevice);
+
+    /// Initialize the device functions
+    int initDeviceProcs();
+
+    /// Finds the proper memory type index for a given requirement.
+    int findMemoryTypeIndex(uint32_t typeBits, VkMemoryPropertyFlags properties);
+
+    /// Clean up and frees device and instance.
+    ~VulkanAPI();
+};
+
+#define RETURN_ON_FAIL(x) \
+    {                           \
+        auto _res = x;          \
+        if (_res != 0)          \
+        {                       \
+            return -1;          \
+        }                       \
+    }
+
+// Loads Vulkan library and creates a VkDevice.
+// Returns 0 if successful.
+int initializeVulkanDevice(VulkanAPI& api);
diff --git a/examples/triangle/README.md b/examples/triangle/README.md
new file mode 100644
index 000000000..ba377b8cb
--- /dev/null
+++ b/examples/triangle/README.md
@@ -0,0 +1,12 @@
+Slang "Hello World" Example
+===========================
+
+The goal of this example is to demonstrate an almost minimal application that uses Slang for shading.
+
+The `shaders.slang` file contains simple vertex and fragment shader entry points. The shader code should compile as either Slang or HLSL code (that is, this example does not show off any new Slang language features).
+
+The `main.cpp` file contains the C++ application code, showing how to use the Slang API to load and compile the shader code to DirectX shader bytecode (DXBC).
+The application perform rendering using the D3D11 API, through a platform and graphics API abstraction layer that is implemented in `tools/gfx`.
+Note that this abstraction layer is *not* required in order to work with Slang, and it is just there to help us write example and test applications more conveniently.
+
+This example is not necessarily representative of best practices for integrating Slang into a production engine; the goal is merely to use the minimum amount of code possible to demonstrate a complete applicaiton that uses Slang.
diff --git a/examples/triangle/main.cpp b/examples/triangle/main.cpp
new file mode 100644
index 000000000..6b9104072
--- /dev/null
+++ b/examples/triangle/main.cpp
@@ -0,0 +1,398 @@
+// main.cpp
+
+// This file implements an extremely simple example of loading and
+// executing a Slang shader program. This is primarily an example
+// of how to use Slang as a "drop-in" replacement for an existing
+// HLSL compiler like the `D3DCompile` API. More advanced usage
+// of advanced Slang language and API features is left to the
+// next example.
+//
+// The comments in the file will attempt to explain concepts as
+// they are introduced.
+//
+// Of course, in order to use the Slang API, we need to include
+// its header. We have set up the build options for this project
+// so that it is as simple as:
+//
+#include <slang.h>
+//
+// Other build setups are possible, and Slang doesn't assume that
+// its include directory must be added to your global include
+// path.
+
+// For the purposes of keeping the demo code as simple as possible,
+// while still retaining some level of portability, our examples
+// make use of a small platform and graphics API abstraction layer,
+// which is included in the Slang source distribution under the
+// `tools/` directory.
+//
+// Applications can of course use Slang without ever touching this
+// abstraction layer, so we will not focus on it when explaining
+// examples, except in places where best practices for interacting
+// with Slang may depend on an application/engine making certain
+// design choices in their abstraction layer.
+//
+#include "slang-gfx.h"
+#include "gfx-util/shader-cursor.h"
+#include "tools/platform/window.h"
+#include "slang-com-ptr.h"
+#include "source/core/slang-basic.h"
+#include "examples/example-base/example-base.h"
+
+using namespace gfx;
+using namespace Slang;
+
+// For the purposes of a small example, we will define the vertex data for a
+// single triangle directly in the source file. It should be easy to extend
+// this example to load data from an external source, if desired.
+//
+struct Vertex
+{
+    float position[3];
+    float color[3];
+};
+
+static const int kVertexCount = 3;
+static const Vertex kVertexData[kVertexCount] =
+{
+    { { 0,  0, 0.5 }, { 1, 0, 0 } },
+    { { 0,  1, 0.5 }, { 0, 0, 1 } },
+    { { 1,  0, 0.5 }, { 0, 1, 0 } },
+};
+
+// The example application will be implemented as a `struct`, so that
+// we can scope the resources it allocates without using global variables.
+//
+struct HelloWorld : public WindowedAppBase
+{
+
+// Many Slang API functions return detailed diagnostic information
+// (error messages, warnings, etc.) as a "blob" of data, or return
+// a null blob pointer instead if there were no issues.
+//
+// For convenience, we define a subroutine that will dump the information
+// in a diagnostic blob if one is produced, and skip it otherwise.
+//
+void diagnoseIfNeeded(slang::IBlob* diagnosticsBlob)
+{
+    if( diagnosticsBlob != nullptr )
+    {
+        printf("%s", (const char*) diagnosticsBlob->getBufferPointer());
+    }
+}
+
+// The main task an application cares about is compiling shader code
+// from souce (if needed) and loading it through the chosen graphics API.
+//
+// In addition, an application may want to receive reflection information
+// about the program, which is what a `slang::ProgramLayout` provides.
+//
+gfx::Result loadShaderProgram(
+    gfx::IDevice*         device,
+    gfx::IShaderProgram**   outProgram)
+{
+    // We need to obatin a compilation session (`slang::ISession`) that will provide
+    // a scope to all the compilation and loading of code we do.
+    //
+    // Our example application uses the `gfx` graphics API abstraction layer, which already
+    // creates a Slang compilation session for us, so we just grab and use it here.
+    ComPtr<slang::ISession> slangSession;
+    slangSession = device->getSlangSession();
+
+    // We can now start loading code into the slang session.
+    //
+    // The simplest way to load code is by calling `loadModule` with the name of a Slang
+    // module. A call to `loadModule("MyStuff")` will behave more or less as if you
+    // wrote:
+    //
+    //      import MyStuff;
+    //
+    // In a Slang shader file. The compiler will use its search paths to try to locate
+    // `MyModule.slang`, then compile and load that file. If a matching module had
+    // already been loaded previously, that would be used directly.
+    //
+    ComPtr<slang::IBlob> diagnosticsBlob;
+    slang::IModule* module = slangSession->loadModule("shaders", diagnosticsBlob.writeRef());
+    diagnoseIfNeeded(diagnosticsBlob);
+    if(!module)
+        return SLANG_FAIL;
+
+    // Loading the `shaders` module will compile and check all the shader code in it,
+    // including the shader entry points we want to use. Now that the module is loaded
+    // we can look up those entry points by name.
+    //
+    // Note: If you are using this `loadModule` approach to load your shader code it is
+    // important to tag your entry point functions with the `[shader("...")]` attribute
+    // (e.g., `[shader("vertex")] void vertexMain(...)`). Without that information there
+    // is no umambiguous way for the compiler to know which functions represent entry
+    // points when it parses your code via `loadModule()`.
+    //
+    ComPtr<slang::IEntryPoint> vertexEntryPoint;
+    SLANG_RETURN_ON_FAIL(module->findEntryPointByName("vertexMain", vertexEntryPoint.writeRef()));
+    //
+    ComPtr<slang::IEntryPoint> fragmentEntryPoint;
+    SLANG_RETURN_ON_FAIL(module->findEntryPointByName("fragmentMain", fragmentEntryPoint.writeRef()));
+
+    // At this point we have a few different Slang API objects that represent
+    // pieces of our code: `module`, `vertexEntryPoint`, and `fragmentEntryPoint`.
+    //
+    // A single Slang module could contain many different entry points (e.g.,
+    // four vertex entry points, three fragment entry points, and two compute
+    // shaders), and before we try to generate output code for our target API
+    // we need to identify which entry points we plan to use together.
+    //
+    // Modules and entry points are both examples of *component types* in the
+    // Slang API. The API also provides a way to build a *composite* out of
+    // other pieces, and that is what we are going to do with our module
+    // and entry points.
+    //
+    Slang::List<slang::IComponentType*> componentTypes;
+    componentTypes.add(module);
+
+    // Later on when we go to extract compiled kernel code for our vertex
+    // and fragment shaders, we will need to make use of their order within
+    // the composition, so we will record the relative ordering of the entry
+    // points here as we add them.
+    int entryPointCount = 0;
+    int vertexEntryPointIndex = entryPointCount++;
+    componentTypes.add(vertexEntryPoint);
+
+    int fragmentEntryPointIndex = entryPointCount++;
+    componentTypes.add(fragmentEntryPoint);
+
+    // Actually creating the composite component type is a single operation
+    // on the Slang session, but the operation could potentially fail if
+    // something about the composite was invalid (e.g., you are trying to
+    // combine multiple copies of the same module), so we need to deal
+    // with the possibility of diagnostic output.
+    //
+    ComPtr<slang::IComponentType> linkedProgram;
+    SlangResult result = slangSession->createCompositeComponentType(
+        componentTypes.getBuffer(),
+        componentTypes.getCount(),
+        linkedProgram.writeRef(),
+        diagnosticsBlob.writeRef());
+    diagnoseIfNeeded(diagnosticsBlob);
+    SLANG_RETURN_ON_FAIL(result);
+
+    // Once we've described the particular composition of entry points
+    // that we want to compile, we defer to the graphics API layer
+    // to extract compiled kernel code and load it into the API-specific
+    // program representation.
+    //
+    gfx::IShaderProgram::Desc programDesc = {};
+    programDesc.pipelineType = gfx::PipelineType::Graphics;
+    programDesc.slangProgram = linkedProgram;
+    SLANG_RETURN_ON_FAIL(device->createProgram(programDesc, outProgram));
+
+    return SLANG_OK;
+}
+
+//
+// The above function shows the core of what is required to use the
+// Slang API as a simple compiler (e.g., a drop-in replacement for
+// fxc or dxc).
+//
+// The rest of this file implements an extremely simple rendering application
+// that will execute the vertex/fragment shaders loaded with the function
+// we have just defined.
+//
+
+// We will define global variables for the various platform and
+// graphics API objects that our application needs:
+//
+// As a reminder, *none* of these are Slang API objects. All
+// of them come from the utility library we are using to simplify
+// building an example program.
+//
+ComPtr<gfx::IPipelineState> gPipelineState;
+ComPtr<gfx::IBufferResource> gVertexBuffer;
+
+// Now that we've covered the function that actually loads and
+// compiles our Slang shade code, we can go through the rest
+// of the application code without as much commentary.
+//
+Slang::Result initialize()
+{
+    // Create a window for our application to render into.
+    //
+    initializeBase("hello-world", 1024, 768);
+
+    // We will create objects needed to configur the "input assembler"
+    // (IA) stage of the D3D pipeline.
+    //
+    // First, we create an input layout:
+    //
+    InputElementDesc inputElements[] = {
+        { "POSITION", 0, Format::RGB_Float32, offsetof(Vertex, position) },
+        { "COLOR",    0, Format::RGB_Float32, offsetof(Vertex, color) },
+    };
+    auto inputLayout = gDevice->createInputLayout(
+        &inputElements[0],
+        2);
+    if(!inputLayout) return SLANG_FAIL;
+
+    // Next we allocate a vertex buffer for our pre-initialized
+    // vertex data.
+    //
+    IBufferResource::Desc vertexBufferDesc;
+    vertexBufferDesc.init(kVertexCount * sizeof(Vertex));
+    vertexBufferDesc.setDefaults(IResource::Usage::VertexBuffer);
+    gVertexBuffer = gDevice->createBufferResource(
+        IResource::Usage::VertexBuffer,
+        vertexBufferDesc,
+        &kVertexData[0]);
+    if(!gVertexBuffer) return SLANG_FAIL;
+
+    // Now we will use our `loadShaderProgram` function to load
+    // the code from `shaders.slang` into the graphics API.
+    //
+    ComPtr<IShaderProgram> shaderProgram;
+    SLANG_RETURN_ON_FAIL(loadShaderProgram(gDevice, shaderProgram.writeRef()));
+
+    // Following the D3D12/Vulkan style of API, we need a pipeline state object
+    // (PSO) to encapsulate the configuration of the overall graphics pipeline.
+    //
+    GraphicsPipelineStateDesc desc;
+    desc.inputLayout = inputLayout;
+    desc.program = shaderProgram;
+    desc.framebufferLayout = gFramebufferLayout;
+    auto pipelineState = gDevice->createGraphicsPipelineState(desc);
+    if (!pipelineState)
+        return SLANG_FAIL;
+
+    gPipelineState = pipelineState;
+
+    return SLANG_OK;
+}
+
+// With the initialization out of the way, we can now turn our attention
+// to the per-frame rendering logic. As with the initialization, there is
+// nothing really Slang-specific here, so the commentary doesn't need
+// to be very detailed.
+//
+virtual void renderFrame(int frameBufferIndex) override
+{
+    ComPtr<ICommandBuffer> commandBuffer = gTransientHeaps[frameBufferIndex]->createCommandBuffer();
+    auto renderEncoder = commandBuffer->encodeRenderCommands(gRenderPass, gFramebuffers[frameBufferIndex]);
+
+    gfx::Viewport viewport = {};
+    viewport.maxZ = 1.0f;
+    viewport.extentX = (float)windowWidth;
+    viewport.extentY = (float)windowHeight;
+    renderEncoder->setViewportAndScissor(viewport);
+
+    // In order to bind shader parameters to the pipeline, we need
+    // to know how those parameters were assigned to locations/bindings/registers
+    // for the target graphics API.
+    //
+    // The Slang compiler assigns locations to parameters in a deterministic
+    // fashion, so it is possible for a programmer to hard-code locations
+    // into their application code that will match up with their shaders.
+    //
+    // Hard-coding of locations can become intractable as an application needs
+    // to support more different target platforms and graphics APIs, as well
+    // as more shaders with different specialized variants.
+    //
+    // Rather than rely on hard-coded locations, our examples will make use of
+    // reflection information provided by the Slang compiler (see `programLayout`
+    // above), and our example graphics API layer will translate that reflection
+    // information into a layout for a "root shader object."
+    //
+    // The root object will store values/bindings for all of the parameters in
+    // the `IShaderProgram` used to create the pipeline state. At a conceptual
+    // level we can think of `rootObject` as representing the "global scope" of
+    // the shader program that was loaded; it has entries for each global shader
+    // parameter that was declared.
+    //
+    // Readers who are familiar with D3D12 or Vulkan might think of this root
+    // layout as being similar in spirit to a "root signature" or "pipeline layout."
+    //
+    // We start parameter binding by binding the pipeline state in command encoder.
+    // This method will return a transient root shader object for us to write our
+    // shader parameters into.
+    //
+    auto rootObject = renderEncoder->bindPipeline(gPipelineState);
+
+    // We will update the model-view-projection matrix that is passed
+    // into the shader code via the `Uniforms` buffer on a per-frame
+    // basis, even though the data that is loaded does not change
+    // per-frame (we always use an identity matrix).
+    //
+    auto deviceInfo = gDevice->getDeviceInfo();
+
+    // We know that `rootObject` is a root shader object created
+    // from our program, and that it is set up to hold values for
+    // all the parameter of that program. In order to actually
+    // set values, we need to be able to look up the location
+    // of speciic parameter that we want to set.
+    //
+    // Our example graphics API layer supports this operation
+    // with the idea of a *shader cursor* which can be thought
+    // of as pointing "into" a particular shader object at
+    // some location/offset. This design choice abstracts over
+    // the many ways that different platforms and APIs represent
+    // the necessary offset information.
+    //
+    // We construct an initial shader cursor that points at the
+    // entire shader program. You can think of this as akin to
+    // a diretory path of `/` for the root directory in a file
+    // system.
+    //
+    ShaderCursor rootCursor(rootObject);
+    //
+    // Next, we use a convenience overload of `operator[]` to
+    // navigate from the root cursor down to the parameter we
+    // want to set.
+    //
+    // The operation `rootCursor["Uniforms"]` looks up the
+    // offset/location of the global shader parameter `Uniforms`
+    // (which is a uniform/constant buffer), and the subsequent
+    // `["modelViewProjection"]` step navigates from there down
+    // to the member named `modelViewProjection` in that buffer.
+    //
+    // Once we have formed a cursor that "points" at the
+    // model-view projection matrix, we can set its data directly.
+    //
+    rootCursor["Uniforms"]["modelViewProjection"].setData(
+        deviceInfo.identityProjectionMatrix, sizeof(float) * 16);
+    //
+    // Some readers might be concerned about the performance o
+    // the above operations because of the use of strings. For
+    // those readers, here are two things to note:
+    //
+    // * While these `operator[]` steps do need to perform string
+    //   comparisons, they do *not* make copies of the strings or
+    //   perform any heap allocation.
+    //
+    // * There are other overloads of `operator[]` that use the
+    //   *index* of a parameter/field instead of its name, and those
+    //   operations have fixed/constant overhead and perform no
+    //   string comparisons. The indices used are independent of
+    //   the target platform and graphics API, and can thus be
+    //   hard-coded even in cross-platform code.
+    //
+
+    // We also need to set up a few pieces of fixed-function pipeline
+    // state that are not bound by the pipeline state above.
+    //
+    renderEncoder->setVertexBuffer(0, gVertexBuffer, sizeof(Vertex));
+    renderEncoder->setPrimitiveTopology(PrimitiveTopology::TriangleList);
+
+    // Finally, we are ready to issue a draw call for a single triangle.
+    //
+    renderEncoder->draw(3);
+    renderEncoder->endEncoding();
+    commandBuffer->close();
+    gQueue->executeCommandBuffer(commandBuffer);
+
+    // With that, we are done drawing for one frame, and ready for the next.
+    //
+    gSwapchain->present();
+}
+
+};
+
+// This macro instantiates an appropriate main function to
+// run the application defined above.
+PLATFORM_UI_MAIN(innerMain<HelloWorld>)
diff --git a/examples/hello-world/shaders.slang b/examples/triangle/shaders.slang
index fe35db9c8..fe35db9c8 100644
--- a/examples/hello-world/shaders.slang
+++ b/examples/triangle/shaders.slang