Add a CPU renderer implementation (#1750)

* Add a CPU renderer implementation This change adds a CPU back-end to `gfx` and ensures that most of our existing CPU tests pass when using it. Detailed notes: * Most of the CPU renderer implementation is copy-pasted from the CUDA case, so they share a lot of similar logic * The main addition to the CPU renderer is a semi-complete implementation of host-memory textures. The logic here handles all the main shapes (Buffer, 1D, 2D, 3D, Cube) and all the currently-supported `Format`s that are sample-able as-is (no D24S8). The implementation is not intended to be fast, and it currently only does nearest-neighbor sampling, but otherwise it tries to avoid cutting too many corners and should be ar reasonable starting point for a more complete (but not performance-oriented) implementation. * Refactored the CPU prelude `IRWTexture` interface to inherit from `ITexture`, since in most cases a single type will end up implementing both. It might be worth it to collapse it all down to a single interface later. * Changed the CPU prelude `ITexture`/`IRWTexture` interface so that it takes both a pointer *and* a size for output arguments. This change seems necessary to allow a shader variable declared as a `Texture2D<float>` to fetch a single `float` when the underlying texture might be using RGBA32F. * Added to the `IComponentType` public API so that we can query a "host callable" for an entry point and not just a binary. * Turned off the `-shaderobj` flag on two tests that weren't yet compatible with shader objects but still had the flag left in on the path (since previously the CPU path always used the non-`gfx` non-shader-object logic anyway) * Disabled one test (`dynamic-dispatch-11`) that relied on the `ConstantBuffer<IInterface>` idiom that we know we are planning to chagne soon anyway. * Made a few changes to the CUDA path to bring it into line with what I added for the CPU path. These were mostly bug fixes around indexing logic for sub-objects and resources. * fixup
author: Tim Foley <tfoleyNV@users.noreply.github.com> 2021-03-12 11:58:14 -0800
committer: GitHub <noreply@github.com> 2021-03-12 11:58:14 -0800
commit: d6a37a0f151e390808f196998c48a341bc4c7b60 (patch)
tree: c1c6e3af434cb3627af67ecc8706124e4b8c7fb1 /tools/gfx/cpu
parent: 9ffe2f3ef245034a2dae42017a9059dfe4d02647 (diff)
2 files changed, 1807 insertions, 0 deletions
diff --git a/tools/gfx/cpu/render-cpu.cpp b/tools/gfx/cpu/render-cpu.cpp
new file mode 100644
index 000000000..faa6b3092
--- /dev/null
+++ b/tools/gfx/cpu/render-cpu.cpp
@@ -0,0 +1,1796 @@
+// render-cpu.cpp
+#include "render-cpu.h"
+
+#include "slang.h"
+#include "slang-com-ptr.h"
+#include "slang-com-helper.h"
+#include "core/slang-basic.h"
+#include "core/slang-blob.h"
+
+#include "../command-writer.h"
+#include "../renderer-shared.h"
+#include "../slang-context.h"
+
+#define SLANG_PRELUDE_NAMESPACE slang_prelude
+#include "prelude/slang-cpp-types.h"
+
+namespace gfx
+{
+using namespace Slang;
+
+class CPUBufferResource : public BufferResource
+{
+public:
+    CPUBufferResource(const Desc& _desc)
+        : BufferResource(_desc)
+    {}
+
+    ~CPUBufferResource()
+    {
+        if (m_data)
+        {
+            free(m_data);
+        }
+    }
+
+    SlangResult init()
+    {
+        m_data = malloc(m_desc.sizeInBytes);
+        if(!m_data) return SLANG_E_OUT_OF_MEMORY;
+        return SLANG_OK;
+    }
+
+    SlangResult setData(size_t offset, size_t size, void const* data)
+    {
+        memcpy((char*)m_data + offset, data, size);
+        return SLANG_OK;
+    }
+
+    void* m_data = nullptr;
+};
+
+struct CPUTextureBaseShapeInfo
+{
+    int32_t rank;
+    int32_t baseCoordCount;
+    int32_t implicitArrayElementCount;
+};
+
+static const CPUTextureBaseShapeInfo kCPUTextureBaseShapeInfos[(int)ITextureResource::Type::CountOf] =
+{
+    /* Unknown */       { 0, 0, 0 },
+    /* Buffer */        { 1, 1, 1 },
+    /* Texture1D */     { 1, 1, 1 },
+    /* Texture2D */     { 2, 2, 1 },
+    /* Texture3D */     { 3, 3, 1 },
+    /* TextureCube */   { 2, 3, 6 },
+};
+
+static CPUTextureBaseShapeInfo const* _getBaseShapeInfo(ITextureResource::Type baseShape)
+{
+    return &kCPUTextureBaseShapeInfos[(int)baseShape];
+}
+
+typedef void (*CPUTextureUnpackFunc)(void const* texelData, void* outData, size_t outSize);
+
+struct CPUTextureFormatInfo
+{
+    CPUTextureUnpackFunc unpackFunc;
+};
+
+template<int N>
+void _unpackFloatTexel(void const* texelData, void* outData, size_t outSize)
+{
+    auto input = (float const*) texelData;
+
+    float temp[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
+    for(int i = 0; i < N; ++i)
+        temp[i] = input[i];
+
+    memcpy(outData, temp, outSize);
+}
+
+static inline float _unpackUnorm8Value(uint8_t value)
+{
+    return value / 255.0f;
+}
+
+template<int N>
+void _unpackUnorm8Texel(void const* texelData, void* outData, size_t outSize)
+{
+    auto input = (uint8_t const*) texelData;
+
+    float temp[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
+    for(int i = 0; i < N; ++i)
+        temp[i] = _unpackUnorm8Value(input[i]);
+
+    memcpy(outData, temp, outSize);
+}
+
+void _unpackUnormBGRA8Texel(void const* texelData, void* outData, size_t outSize)
+{
+    auto input = (uint8_t const*) texelData;
+
+    float temp[4];
+    temp[0] = _unpackUnorm8Value(input[2]);
+    temp[1] = _unpackUnorm8Value(input[1]);
+    temp[2] = _unpackUnorm8Value(input[0]);
+    temp[3] = _unpackUnorm8Value(input[3]);
+
+    memcpy(outData, temp, outSize);
+}
+
+template<int N>
+void _unpackUInt16Texel(void const* texelData, void* outData, size_t outSize)
+{
+    auto input = (uint16_t const*) texelData;
+
+    uint32_t temp[4] = { 0, 0, 0, 0 };
+    for(int i = 0; i < N; ++i)
+        temp[i] = input[i];
+
+    memcpy(outData, temp, outSize);
+}
+
+template<int N>
+void _unpackUInt32Texel(void const* texelData, void* outData, size_t outSize)
+{
+    auto input = (uint32_t const*) texelData;
+
+    uint32_t temp[4] = { 0, 0, 0, 0 };
+    for(int i = 0; i < N; ++i)
+        temp[i] = input[i];
+
+    memcpy(outData, temp, outSize);
+}
+
+#define TEXTURE_FORMAT_INFO(FORMAT) static const CPUTextureFormatInfo kCPUTextureFormatInfo_##FORMAT
+
+TEXTURE_FORMAT_INFO(RGBA_Float32)      = { &_unpackFloatTexel<4> };
+TEXTURE_FORMAT_INFO(RGB_Float32)       = { &_unpackFloatTexel<3> };
+TEXTURE_FORMAT_INFO(RG_Float32)        = { &_unpackFloatTexel<2> };
+TEXTURE_FORMAT_INFO(R_Float32)         = { &_unpackFloatTexel<1> };
+TEXTURE_FORMAT_INFO(RGBA_Unorm_UInt8)  = { &_unpackUnorm8Texel<4> };
+TEXTURE_FORMAT_INFO(BGRA_Unorm_UInt8)  = { &_unpackUnormBGRA8Texel };
+TEXTURE_FORMAT_INFO(R_UInt16)          = { &_unpackUInt16Texel<1> };
+TEXTURE_FORMAT_INFO(R_UInt32)          = { &_unpackUInt32Texel<1> };
+TEXTURE_FORMAT_INFO(D_Float32)         = { &_unpackFloatTexel<1> };
+
+#undef TEXTURE_FORMAT_INFO
+
+static CPUTextureFormatInfo const* _getFormatInfo(Format format)
+{
+    switch(format)
+    {
+    case Format::D_Unorm24_S8:
+    default:
+        return nullptr;
+
+
+#define CASE(FORMAT) case Format::FORMAT: return &kCPUTextureFormatInfo_##FORMAT;
+    CASE(RGBA_Float32)
+    CASE(RGB_Float32)
+    CASE(RG_Float32)
+    CASE(R_Float32)
+    CASE(RGBA_Unorm_UInt8)
+    CASE(BGRA_Unorm_UInt8)
+    CASE(R_UInt16)
+    CASE(R_UInt32)
+    CASE(D_Float32)
+
+#undef CASE
+    }
+}
+
+class CPUTextureResource : public TextureResource
+{
+    enum { kMaxRank = 3 };
+
+public:
+    CPUTextureResource(const TextureResource::Desc& desc)
+        : TextureResource(desc)
+    {}
+    ~CPUTextureResource()
+    {
+    }
+
+    Result init(ITextureResource::SubresourceData const* initData)
+    {
+        auto desc = m_desc;
+
+        // The format of the texture will determine the
+        // size of the texels we allocate.
+        //
+        // TODO: Compressed formats usually work in terms
+        // of a fixed block size, so that we cannot actually
+        // compute a simple `texelSize` like this. Instead
+        // we should be computing a `blockSize` and then
+        // a `blockExtents` value that gives the extent
+        // in texels of each block. For uncompressed formats
+        // the block extents would be 1 along each axis.
+        //
+        auto format = desc.format;
+        auto texelSize = gfxGetFormatSize(format);
+        m_texelSize = (int32_t) texelSize;
+
+        int32_t formatBlockSize[kMaxRank] = { 1, 1, 1 };
+
+        auto baseShapeInfo = _getBaseShapeInfo(desc.type);
+        m_baseShape = baseShapeInfo;
+        if(!baseShapeInfo)
+            return SLANG_FAIL;
+
+        auto formatInfo = _getFormatInfo(desc.format);
+        m_formatInfo = formatInfo;
+        if(!formatInfo)
+            return SLANG_FAIL;
+
+        int32_t rank = baseShapeInfo->rank;
+        int32_t effectiveArrayElementCount = desc.arraySize ? desc.arraySize : 1;
+        effectiveArrayElementCount *= baseShapeInfo->implicitArrayElementCount;
+        m_effectiveArrayElementCount = effectiveArrayElementCount;
+
+        int32_t extents[kMaxRank];
+        extents[0] = desc.size.width;
+        extents[1] = desc.size.height;
+        extents[2] = desc.size.depth;
+
+        for(int32_t axis = rank; axis < kMaxRank; ++axis)
+            extents[axis] = 1;
+
+        int32_t levelCount = desc.numMipLevels;
+
+        m_mipLevels.setCount(levelCount);
+
+        int64_t totalDataSize = 0;
+        for( int32_t levelIndex = 0; levelIndex < levelCount; ++levelIndex )
+        {
+            auto& level = m_mipLevels[levelIndex];
+
+            for( int32_t axis = 0; axis < kMaxRank; ++axis )
+            {
+                int32_t extent = extents[axis] >> levelIndex;
+                if(extent < 1) extent = 1;
+                level.extents[axis] = extent;
+            }
+
+            level.strides[0] = texelSize;
+            for( int32_t axis = 1; axis < kMaxRank+1; ++axis)
+            {
+                level.strides[axis] = level.strides[axis-1]*level.extents[axis-1];
+            }
+
+            int64_t levelDataSize = texelSize;
+            levelDataSize *= effectiveArrayElementCount;
+            for( int32_t axis = 0; axis < rank; ++axis)
+                levelDataSize *= int64_t(level.extents[axis]);
+
+            level.offset = totalDataSize;
+            totalDataSize += levelDataSize;
+        }
+
+        void* textureData = malloc(totalDataSize);
+        m_data = textureData;
+
+        if( initData )
+        {
+            int32_t subResourceCounter = 0;
+            for(int32_t arrayElementIndex = 0; arrayElementIndex < effectiveArrayElementCount; ++arrayElementIndex)
+            {
+                for(int32_t mipLevel = 0; mipLevel < m_desc.numMipLevels; ++mipLevel)
+                {
+                    int32_t subResourceIndex = subResourceCounter++;
+
+                    auto dstRowStride = m_mipLevels[mipLevel].strides[1];
+                    auto dstLayerStride = m_mipLevels[mipLevel].strides[2];
+                    auto dstArrayStride = m_mipLevels[mipLevel].strides[3];
+
+                    auto textureRowSize = m_mipLevels[mipLevel].extents[0]*texelSize;
+
+                    auto rowCount = m_mipLevels[mipLevel].extents[1];
+                    auto depthLayerCount = m_mipLevels[mipLevel].extents[2];
+
+                    auto& srcImage = initData[subResourceIndex];
+                    ptrdiff_t srcRowStride = ptrdiff_t(srcImage.strideY);
+                    ptrdiff_t srcLayerStride = ptrdiff_t(srcImage.strideZ);
+
+                    char* dstLevel = (char*)textureData + m_mipLevels[mipLevel].offset;
+                    char* dstImage = dstLevel + dstArrayStride*arrayElementIndex;
+
+                    const char* srcLayer = (const char*) srcImage.data;
+                    char* dstLayer = dstImage;
+
+                    for(int32_t depthLayer = 0; depthLayer < depthLayerCount; ++depthLayer)
+                    {
+                        const char* srcRow = srcLayer;
+                        char* dstRow = dstLayer;
+
+                        for(int32_t row = 0; row < rowCount; ++row)
+                        {
+                            memcpy(dstRow, srcRow, textureRowSize);
+
+                            srcRow += srcRowStride;
+                            dstRow += dstRowStride;
+                        }
+
+                        srcLayer += srcLayerStride;
+                        dstLayer += dstLayerStride;
+                    }
+                }
+            }
+        }
+
+        return SLANG_OK;
+    }
+
+    Desc const& _getDesc() { return m_desc; }
+    Format getFormat() { return m_desc.format; }
+    int32_t getRank() { return m_baseShape->rank; }
+
+    CPUTextureBaseShapeInfo const* m_baseShape;
+    CPUTextureFormatInfo const* m_formatInfo;
+    int32_t m_effectiveArrayElementCount = 0;
+    int32_t m_texelSize = 0;
+
+    struct MipLevel
+    {
+        int32_t extents[kMaxRank];
+        int64_t strides[kMaxRank+1];
+        int64_t offset;
+    };
+    List<MipLevel>  m_mipLevels;
+    void*           m_data = nullptr;
+};
+
+class CPUResourceView : public IResourceView, public RefObject
+{
+public:
+    enum class Kind
+    {
+        Buffer,
+        Texture,
+    };
+
+    SLANG_REF_OBJECT_IUNKNOWN_ALL
+    IResourceView* getInterface(const Guid& guid)
+    {
+        if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IResourceView)
+            return static_cast<IResourceView*>(this);
+        return nullptr;
+    }
+
+    Kind getViewKind() const { return m_kind; }
+    Desc const& getDesc() const { return m_desc; }
+
+protected:
+    CPUResourceView(Kind kind, Desc const& desc)
+        : m_kind(kind)
+        , m_desc(desc)
+    {}
+
+private:
+    Kind m_kind;
+    Desc m_desc;
+};
+
+class CPUBufferView : public CPUResourceView
+{
+public:
+    CPUBufferView(Desc const& desc, CPUBufferResource* buffer)
+        : CPUResourceView(Kind::Buffer, desc)
+        , m_buffer(buffer)
+    {}
+
+    CPUBufferResource* getBuffer() const { return m_buffer; }
+
+private:
+    RefPtr<CPUBufferResource> m_buffer;
+};
+
+class CPUTextureView : public CPUResourceView, public slang_prelude::IRWTexture
+{
+public:
+    CPUTextureView(Desc const& desc, CPUTextureResource* texture)
+        : CPUResourceView(Kind::Texture, desc)
+        , m_texture(texture)
+    {}
+
+    CPUTextureResource* getTexture() const { return m_texture; }
+
+    //
+    // ITexture interface
+    //
+
+    slang_prelude::TextureDimensions GetDimensions(int mipLevel = -1) SLANG_OVERRIDE
+    {
+        slang_prelude::TextureDimensions dimensions = {};
+
+        CPUTextureResource* texture = m_texture;
+        auto& desc = texture->_getDesc();
+        auto baseShape = texture->m_baseShape;
+
+        dimensions.arrayElementCount = desc.arraySize;
+        dimensions.numberOfLevels = desc.numMipLevels;
+        dimensions.shape = baseShape->rank;
+        dimensions.width = desc.size.width;
+        dimensions.height = desc.size.height;
+        dimensions.depth = desc.size.depth;
+
+        return dimensions;
+    }
+
+    void Load(const int32_t* texelCoords, void* outData, size_t dataSize) SLANG_OVERRIDE
+    {
+        void* texelPtr = _getTexelPtr(texelCoords);
+
+        m_texture->m_formatInfo->unpackFunc(texelPtr, outData, dataSize);
+    }
+
+    void Sample(slang_prelude::SamplerState samplerState, const float* coords, void* outData, size_t dataSize) SLANG_OVERRIDE
+    {
+        // We have no access to information from fragment quads, so we cannot
+        // compute the finite-difference derivatives needed from `coords`.
+        //
+        // The only reasonable thing to do is to sample mip level zero.
+        //
+        SampleLevel(samplerState, coords, 0.0f, outData, dataSize);
+    }
+
+    void SampleLevel(slang_prelude::SamplerState samplerState, const float* coords, float level, void* outData, size_t dataSize) SLANG_OVERRIDE
+    {
+        CPUTextureResource* texture = m_texture;
+        auto baseShape = texture->m_baseShape;
+        auto& desc = texture->_getDesc();
+        int32_t rank = baseShape->rank;
+        int32_t baseCoordCount = baseShape->baseCoordCount;
+
+        int32_t integerMipLevel = int32_t(level + 0.5f);
+        if(integerMipLevel >= desc.numMipLevels) integerMipLevel = desc.numMipLevels-1;
+        if(integerMipLevel < 0) integerMipLevel = 0;
+
+        auto& mipLevelInfo = texture->m_mipLevels[integerMipLevel];
+
+        bool isArray = (desc.arraySize != 0) || (desc.type == ITextureResource::Type::TextureCube);
+        int32_t effectiveArrayElementCount = texture->m_effectiveArrayElementCount;
+        int32_t coordIndex = baseCoordCount;
+        int32_t elementIndex = 0;
+        if( isArray )
+        {
+            elementIndex = int32_t(coords[coordIndex++] + 0.5f);
+        }
+        if(elementIndex >= effectiveArrayElementCount) elementIndex = effectiveArrayElementCount-1;
+        if(elementIndex < 0) elementIndex = 0;
+
+        // Note: for now we are just going to do nearest-neighbor sampling
+        //
+        int64_t texelOffset = mipLevelInfo.offset;
+        texelOffset += elementIndex * mipLevelInfo.strides[3];
+        for(int32_t axis = 0; axis < rank; ++axis)
+        {
+            int32_t extent = mipLevelInfo.extents[axis];
+
+            float coord = coords[axis];
+
+            // TODO: deal with wrap/clamp/repeat if `coord < 0` or `coord > 1`
+
+            int32_t integerCoord = int32_t(coord*(extent-1) + 0.5f);
+
+            if(integerCoord >= extent) integerCoord = extent-1;
+            if(integerCoord < 0) integerCoord = 0;
+
+            texelOffset += integerCoord * mipLevelInfo.strides[axis];
+        }
+
+        auto texelPtr = (char const*)texture->m_data + texelOffset;
+
+        m_texture->m_formatInfo->unpackFunc(texelPtr, outData, dataSize);
+    }
+
+    //
+    // IRWTexture interface
+    //
+
+    void* refAt(const uint32_t* texelCoords) SLANG_OVERRIDE
+    {
+        return _getTexelPtr((int32_t const*)texelCoords);
+    }
+
+private:
+    RefPtr<CPUTextureResource> m_texture;
+
+    void* _getTexelPtr(int32_t const* texelCoords)
+    {
+        CPUTextureResource* texture = m_texture;
+        auto baseShape = texture->m_baseShape;
+        auto& desc = texture->_getDesc();
+
+        int32_t rank = baseShape->rank;
+        int32_t baseCoordCount = baseShape->baseCoordCount;
+
+        bool isArray = (desc.arraySize != 0) || (desc.type == ITextureResource::Type::TextureCube);
+        bool isMultisample = desc.sampleDesc.numSamples > 1;
+        bool isBuffer = desc.type == ITextureResource::Type::Buffer;
+        bool hasMipLevels = !(isMultisample || isBuffer);
+
+        int32_t effectiveArrayElementCount = texture->m_effectiveArrayElementCount;
+
+        int32_t coordIndex = baseCoordCount;
+        int32_t elementIndex = 0;
+        if( isArray )
+        {
+            elementIndex = texelCoords[coordIndex++];
+        }
+        if(elementIndex >= effectiveArrayElementCount) elementIndex = effectiveArrayElementCount-1;
+        if(elementIndex < 0) elementIndex = 0;
+
+        int32_t mipLevel = 0;
+        if(!hasMipLevels)
+        {
+            mipLevel = texelCoords[coordIndex++];
+        }
+        if(mipLevel >= desc.numMipLevels) mipLevel = desc.numMipLevels-1;
+        if(mipLevel < 0) mipLevel = 0;
+
+        auto& mipLevelInfo = texture->m_mipLevels[mipLevel];
+
+        int64_t texelOffset = mipLevelInfo.offset;
+        texelOffset += elementIndex * mipLevelInfo.strides[3];
+        for(int32_t axis = 0; axis < rank; ++axis)
+        {
+            int32_t coord = texelCoords[axis];
+            if(coord >= mipLevelInfo.extents[axis]) coord = mipLevelInfo.extents[axis]-1;
+            if(coord < 0) coord = 0;
+
+            texelOffset += texelCoords[axis] * mipLevelInfo.strides[axis];
+        }
+
+        return (char*)texture->m_data + texelOffset;
+    }
+};
+
+class CPUShaderObjectLayout : public ShaderObjectLayoutBase
+{
+public:
+
+    // TODO: Once memory lifetime stuff is handled, there is
+    // no specific need to even track binding or sub-object
+    // ranges for CPU.
+
+    struct BindingRangeInfo
+    {
+        slang::BindingType bindingType;
+        Index count;
+        Index baseIndex; // Flat index for sub-ojects
+
+        // TODO: The `uniformOffset` field should be removed,
+        // since it cannot be supported by the Slang reflection
+        // API once we fix some design issues.
+        //
+        // It is only being used today for pre-allocation of sub-objects
+        // for constant buffers and parameter blocks (which should be
+        // deprecated/removed anyway).
+        //
+        // Note: We would need to bring this field back, plus
+        // a lot of other complexity, if we ever want to support
+        // setting of resources/buffers directly by a binding
+        // range index and array index.
+        //
+        Index uniformOffset; // Uniform offset for a resource typed field.
+    };
+
+    struct SubObjectRangeInfo
+    {
+        RefPtr<CPUShaderObjectLayout> layout;
+        Index bindingRangeIndex;
+    };
+
+    size_t m_size = 0;
+    List<SubObjectRangeInfo> subObjectRanges;
+    List<BindingRangeInfo> m_bindingRanges;
+
+    Index m_subObjectCount = 0;
+    Index m_resourceCount = 0;
+
+    CPUShaderObjectLayout(RendererBase* renderer, slang::TypeLayoutReflection* layout)
+    {
+        initBase(renderer, layout);
+
+        Index subObjectCount = 0;
+        Index resourceCount = 0;
+
+        m_elementTypeLayout = _unwrapParameterGroups(layout);
+        m_size = m_elementTypeLayout->getSize();
+
+        // Compute the binding ranges that are used to store
+        // the logical contents of the object in memory. These will relate
+        // to the descriptor ranges in the various sets, but not always
+        // in a one-to-one fashion.
+
+        SlangInt bindingRangeCount = m_elementTypeLayout->getBindingRangeCount();
+        for (SlangInt r = 0; r < bindingRangeCount; ++r)
+        {
+            slang::BindingType slangBindingType = m_elementTypeLayout->getBindingRangeType(r);
+            SlangInt count = m_elementTypeLayout->getBindingRangeBindingCount(r);
+            slang::TypeLayoutReflection* slangLeafTypeLayout =
+                m_elementTypeLayout->getBindingRangeLeafTypeLayout(r);
+
+            SlangInt descriptorSetIndex = m_elementTypeLayout->getBindingRangeDescriptorSetIndex(r);
+            SlangInt rangeIndexInDescriptorSet =
+                m_elementTypeLayout->getBindingRangeFirstDescriptorRangeIndex(r);
+
+            // TODO: This logic assumes that for any binding range that might consume
+            // multiple kinds of resources, the descriptor range for its uniform
+            // usage will be the first one in the range.
+            //
+            // We need to decide whether that assumption is one we intend to support
+            // applications making, or whether they should be forced to perform a
+            // linear search over the descriptor ranges for a specific binding range.
+            //
+            auto uniformOffset = m_elementTypeLayout->getDescriptorSetDescriptorRangeIndexOffset(
+                descriptorSetIndex, rangeIndexInDescriptorSet);
+
+            Index baseIndex = 0;
+            switch (slangBindingType)
+            {
+            case slang::BindingType::ConstantBuffer:
+            case slang::BindingType::ParameterBlock:
+            case slang::BindingType::ExistentialValue:
+                baseIndex = subObjectCount;
+                subObjectCount += count;
+                break;
+
+            default:
+                baseIndex = resourceCount;
+                resourceCount += count;
+                break;
+            }
+
+            BindingRangeInfo bindingRangeInfo;
+            bindingRangeInfo.bindingType = slangBindingType;
+            bindingRangeInfo.count = count;
+            bindingRangeInfo.baseIndex = baseIndex;
+            bindingRangeInfo.uniformOffset = uniformOffset;
+            m_bindingRanges.add(bindingRangeInfo);
+        }
+
+        m_subObjectCount = subObjectCount;
+        m_resourceCount = resourceCount;
+
+        SlangInt subObjectRangeCount = m_elementTypeLayout->getSubObjectRangeCount();
+        for (SlangInt r = 0; r < subObjectRangeCount; ++r)
+        {
+            SlangInt bindingRangeIndex = m_elementTypeLayout->getSubObjectRangeBindingRangeIndex(r);
+            auto slangBindingType = m_elementTypeLayout->getBindingRangeType(bindingRangeIndex);
+            slang::TypeLayoutReflection* slangLeafTypeLayout =
+                m_elementTypeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex);
+
+            // A sub-object range can either represent a sub-object of a known
+            // type, like a `ConstantBuffer<Foo>` or `ParameterBlock<Foo>`
+            // (in which case we can pre-compute a layout to use, based on
+            // the type `Foo`) *or* it can represent a sub-object of some
+            // existential type (e.g., `IBar`) in which case we cannot
+            // know the appropraite type/layout of sub-object to allocate.
+            //
+            RefPtr<CPUShaderObjectLayout> subObjectLayout;
+            if (slangBindingType != slang::BindingType::ExistentialValue)
+            {
+                subObjectLayout =
+                    new CPUShaderObjectLayout(renderer, slangLeafTypeLayout->getElementTypeLayout());
+            }
+
+            SubObjectRangeInfo subObjectRange;
+            subObjectRange.bindingRangeIndex = bindingRangeIndex;
+            subObjectRange.layout = subObjectLayout;
+            subObjectRanges.add(subObjectRange);
+        }
+    }
+
+    size_t getSize() { return m_size; }
+    Index getResourceCount() const { return m_resourceCount; }
+    Index getSubObjectCount() const { return m_subObjectCount; }
+};
+
+class CPUEntryPointLayout : public CPUShaderObjectLayout
+{
+private:
+    slang::EntryPointLayout* m_entryPointLayout = nullptr;
+
+public:
+    CPUEntryPointLayout(
+        RendererBase*               renderer,
+        slang::EntryPointLayout*    entryPointLayout)
+        : CPUShaderObjectLayout(renderer, entryPointLayout->getTypeLayout())
+        , m_entryPointLayout(entryPointLayout)
+    {}
+
+    const char* getEntryPointName() { return m_entryPointLayout->getName(); }
+};
+
+class CPUProgramLayout : public CPUShaderObjectLayout
+{
+public:
+    slang::ProgramLayout* m_programLayout = nullptr;
+    List<RefPtr<CPUEntryPointLayout>> m_entryPointLayouts;
+
+    CPUProgramLayout(RendererBase* renderer, slang::ProgramLayout* programLayout)
+        : CPUShaderObjectLayout(renderer, programLayout->getGlobalParamsTypeLayout())
+        , m_programLayout(programLayout)
+    {
+        for (UInt i =0; i< programLayout->getEntryPointCount(); i++)
+        {
+            m_entryPointLayouts.add(new CPUEntryPointLayout(
+                renderer,
+                programLayout->getEntryPointByIndex(i)));
+        }
+
+    }
+
+    int getKernelIndex(UnownedStringSlice kernelName)
+    {
+        auto entryPointCount = (int) m_programLayout->getEntryPointCount();
+        for(int i = 0; i < entryPointCount; i++)
+        {
+            auto entryPoint = m_programLayout->getEntryPointByIndex(i);
+            if (kernelName == entryPoint->getName())
+            {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    void getKernelThreadGroupSize(int kernelIndex, UInt* threadGroupSizes)
+    {
+        auto entryPoint = m_programLayout->getEntryPointByIndex(kernelIndex);
+        entryPoint->getComputeThreadGroupSize(3, threadGroupSizes);
+    }
+
+    CPUEntryPointLayout* getEntryPoint(Index index) { return m_entryPointLayouts[index]; }
+};
+
+class CPUShaderObject : public ShaderObjectBase
+{
+public:
+    void* m_data = nullptr;
+
+    ~CPUShaderObject()
+    {
+        free(m_data);
+    }
+
+    List<RefPtr<CPUShaderObject>> m_objects;
+    List<RefPtr<CPUResourceView>> m_resources;
+
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        init(IDevice* device, CPUShaderObjectLayout* typeLayout);
+
+    CPUShaderObjectLayout* getLayout()
+    {
+        return static_cast<CPUShaderObjectLayout*>(m_layout.Ptr());
+    }
+
+#if 0
+    virtual SLANG_NO_THROW Result SLANG_MCALL initBuffer(IDevice* device, size_t bufferSize)
+    {
+        BufferResource::Desc bufferDesc;
+        bufferDesc.init(bufferSize);
+        bufferDesc.cpuAccessFlags |= IResource::AccessFlag::Write;
+        ComPtr<IBufferResource> constantBuffer;
+        SLANG_RETURN_ON_FAIL(renderer->createBufferResource(
+            IResource::Usage::ConstantBuffer, bufferDesc, nullptr, constantBuffer.writeRef()));
+        bufferResource = dynamic_cast<MemoryCUDAResource*>(constantBuffer.get());
+        return SLANG_OK;
+    }
+#endif
+
+#if 0
+    virtual SLANG_NO_THROW void* SLANG_MCALL getBuffer()
+    {
+        return bufferResource ? bufferResource->m_cudaMemory : nullptr;
+    }
+
+    virtual SLANG_NO_THROW size_t SLANG_MCALL getBufferSize()
+    {
+        return bufferResource ? bufferResource->getDesc()->sizeInBytes : 0;
+    }
+#endif
+
+    virtual SLANG_NO_THROW slang::TypeLayoutReflection* SLANG_MCALL getElementTypeLayout() override
+    {
+        return getLayout()->getElementTypeLayout();
+    }
+
+    virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override { return 0; }
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        getEntryPoint(UInt index, IShaderObject** outEntryPoint) override
+    {
+        *outEntryPoint = nullptr;
+        return SLANG_OK;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        setData(ShaderOffset const& offset, void const* data, size_t size)
+    {
+        size = Math::Min(size, getLayout()->getSize() - offset.uniformOffset);
+        memcpy((char*)m_data + offset.uniformOffset, data, size);
+        return SLANG_OK;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL getObject(
+        ShaderOffset const& offset,
+        IShaderObject**     outObject)
+    {
+        auto layout = getLayout();
+
+        auto bindingRangeIndex = offset.bindingRangeIndex;
+        SLANG_ASSERT(bindingRangeIndex >= 0);
+        SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
+
+        auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
+        auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
+        CPUShaderObject* subObject = m_objects[subObjectIndex];
+
+        *outObject = ComPtr<IShaderObject>(subObject).detach();
+
+        return SLANG_OK;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL setObject(
+        ShaderOffset const& offset,
+        IShaderObject*      object)
+    {
+        auto layout = getLayout();
+
+        auto bindingRangeIndex = offset.bindingRangeIndex;
+        SLANG_ASSERT(bindingRangeIndex >= 0);
+        SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
+
+        auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
+        auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
+
+        CPUShaderObject* subObject = static_cast<CPUShaderObject*>(object);
+        m_objects[subObjectIndex] = subObject;
+
+        switch( bindingRange.bindingType )
+        {
+        default:
+            SLANG_RETURN_ON_FAIL(setData(offset, &subObject->m_data, sizeof(void*)));
+            break;
+
+        // If the range being assigned into represents an interface/existential-type leaf field,
+        // then we need to consider how the `object` being assigned here affects specialization.
+        // We may also need to assign some data from the sub-object into the ordinary data
+        // buffer for the parent object.
+        //
+        case slang::BindingType::ExistentialValue:
+            {
+                auto renderer = getRenderer();
+
+                ComPtr<slang::ISession> slangSession;
+                SLANG_RETURN_ON_FAIL(renderer->getSlangSession(slangSession.writeRef()));
+
+                // A leaf field of interface type is laid out inside of the parent object
+                // as a tuple of `(RTTI, WitnessTable, Payload)`. The layout of these fields
+                // is a contract between the compiler and any runtime system, so we will
+                // need to rely on details of the binary layout.
+
+                // We start by querying the layout/type of the concrete value that the application
+                // is trying to store into the field, and also the layout/type of the leaf
+                // existential-type field itself.
+                //
+                auto concreteTypeLayout = subObject->getElementTypeLayout();
+                auto concreteType = concreteTypeLayout->getType();
+                //
+                auto existentialTypeLayout = layout->getElementTypeLayout()->getBindingRangeLeafTypeLayout(bindingRangeIndex);
+                auto existentialType = existentialTypeLayout->getType();
+
+                // The first field of the tuple (offset zero) is the run-time type information (RTTI)
+                // ID for the concrete type being stored into the field.
+                //
+                // TODO: We need to be able to gather the RTTI type ID from `object` and then
+                // use `setData(offset, &TypeID, sizeof(TypeID))`.
+
+                // The second field of the tuple (offset 8) is the ID of the "witness" for the
+                // conformance of the concrete type to the interface used by this field.
+                //
+                auto witnessTableOffset = offset;
+                witnessTableOffset.uniformOffset += 8;
+                //
+                // Conformances of a type to an interface are computed and then stored by the
+                // Slang runtime, so we can look up the ID for this particular conformance (which
+                // will create it on demand).
+                //
+                // Note: If the type doesn't actually conform to the required interface for
+                // this sub-object range, then this is the point where we will detect that
+                // fact and error out.
+                //
+                uint32_t conformanceID = 0xFFFFFFFF;
+                SLANG_RETURN_ON_FAIL(slangSession->getTypeConformanceWitnessSequentialID(
+                    concreteType, existentialType, &conformanceID));
+                //
+                // Once we have the conformance ID, then we can write it into the object
+                // at the required offset.
+                //
+                SLANG_RETURN_ON_FAIL(setData(witnessTableOffset, &conformanceID, sizeof(conformanceID)));
+
+                // The third field of the tuple (offset 16) is the "payload" that is supposed to
+                // hold the data for a value of the given concrete type.
+                //
+                auto payloadOffset = offset;
+                payloadOffset.uniformOffset += 16;
+
+                // There are two cases we need to consider here for how the payload might be used:
+                //
+                // * If the concrete type of the value being bound is one that can "fit" into the
+                //   available payload space,  then it should be stored in the payload.
+                //
+                // * If the concrete type of the value cannot fit in the payload space, then it
+                //   will need to be stored somewhere else.
+                //
+                if(_doesValueFitInExistentialPayload(concreteTypeLayout, existentialTypeLayout))
+                {
+                    // If the value can fit in the payload area, then we will go ahead and copy
+                    // its bytes into that area.
+                    //
+                    auto valueSize = concreteTypeLayout->getSize();
+                    SLANG_RETURN_ON_FAIL(setData(payloadOffset, subObject->m_data, valueSize));
+                }
+                else
+                {
+                    // If the value cannot fit in the payload area, then we will pass a pointer
+                    // to the sub-object instead.
+                    //
+                    // Note: The Slang compiler does not currently emit code that handles the
+                    // pointer case, but that is the expected implementation for values
+                    // that do not fit into the fixed-size payload.
+                    //
+                    SLANG_RETURN_ON_FAIL(setData(payloadOffset, &subObject->m_data, sizeof(void*)));
+                }
+            }
+            break;
+        }
+
+        return SLANG_OK;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        setResource(ShaderOffset const& offset, IResourceView* inView)
+    {
+        auto layout = getLayout();
+
+        auto bindingRangeIndex = offset.bindingRangeIndex;
+        SLANG_ASSERT(bindingRangeIndex >= 0);
+        SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
+
+        auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
+        auto viewIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
+
+
+        auto view = static_cast<CPUResourceView*>(inView);
+        m_resources[viewIndex] = view;
+
+        switch( view->getViewKind() )
+        {
+        case CPUResourceView::Kind::Texture:
+            {
+                auto textureView = static_cast<CPUTextureView*>(view);
+
+                slang_prelude::IRWTexture* textureObj = textureView;
+                SLANG_RETURN_ON_FAIL(setData(offset, &textureObj, sizeof(textureObj)));
+            }
+            break;
+
+        case CPUResourceView::Kind::Buffer:
+            {
+                auto bufferView = static_cast<CPUBufferView*>(view);
+                auto buffer = bufferView->getBuffer();
+                auto desc = *buffer->getDesc();
+
+                void* dataPtr = buffer->m_data;
+                size_t size = desc.sizeInBytes;
+                if (desc.elementSize > 1)
+                    size /= desc.elementSize;
+
+                auto ptrOffset = offset;
+                SLANG_RETURN_ON_FAIL(setData(ptrOffset, &dataPtr, sizeof(dataPtr)));
+
+                auto sizeOffset = offset;
+                sizeOffset.uniformOffset += sizeof(dataPtr);
+                SLANG_RETURN_ON_FAIL(setData(sizeOffset, &size, sizeof(size)));
+            }
+            break;
+        }
+
+        return SLANG_OK;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        setSampler(ShaderOffset const& offset, ISamplerState* sampler)
+    {
+        SLANG_UNUSED(sampler);
+        SLANG_UNUSED(offset);
+        return SLANG_OK;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL setCombinedTextureSampler(
+        ShaderOffset const& offset, IResourceView* textureView, ISamplerState* sampler)
+    {
+        SLANG_UNUSED(sampler);
+        setResource(offset, textureView);
+        return SLANG_OK;
+    }
+
+    // Appends all types that are used to specialize the element type of this shader object in `args` list.
+    virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override
+    {
+        // TODO: the logic here is a copy-paste of `GraphicsCommonShaderObject::collectSpecializationArgs`,
+        // consider moving the implementation to `ShaderObjectBase` and share the logic among different implementations.
+
+        auto& subObjectRanges = getLayout()->subObjectRanges;
+        // The following logic is built on the assumption that all fields that involve existential types (and
+        // therefore require specialization) will results in a sub-object range in the type layout.
+        // This allows us to simply scan the sub-object ranges to find out all specialization arguments.
+        for (Index subObjIndex = 0; subObjIndex < subObjectRanges.getCount(); subObjIndex++)
+        {
+            // Retrieve the corresponding binding range of the sub object.
+            auto bindingRange = getLayout()->m_bindingRanges[subObjectRanges[subObjIndex].bindingRangeIndex];
+            switch (bindingRange.bindingType)
+            {
+            case slang::BindingType::ExistentialValue:
+            {
+                // A binding type of `ExistentialValue` means the sub-object represents a interface-typed field.
+                // In this case the specialization argument for this field is the actual specialized type of the bound
+                // shader object. If the shader object's type is an ordinary type without existential fields, then the
+                // type argument will simply be the ordinary type. But if the sub object's type is itself a specialized
+                // type, we need to make sure to use that type as the specialization argument.
+
+                // TODO: need to implement the case where the field is an array of existential values.
+                SLANG_ASSERT(bindingRange.count == 1);
+                ExtendedShaderObjectType specializedSubObjType;
+                SLANG_RETURN_ON_FAIL(m_objects[subObjIndex]->getSpecializedShaderObjectType(&specializedSubObjType));
+                args.add(specializedSubObjType);
+                break;
+            }
+            case slang::BindingType::ParameterBlock:
+            case slang::BindingType::ConstantBuffer:
+                // Currently we only handle the case where the field's type is
+                // `ParameterBlock<SomeStruct>` or `ConstantBuffer<SomeStruct>`, where `SomeStruct` is a struct type
+                // (not directly an interface type). In this case, we just recursively collect the specialization arguments
+                // from the bound sub object.
+                SLANG_RETURN_ON_FAIL(m_objects[subObjIndex]->collectSpecializationArgs(args));
+                // TODO: we need to handle the case where the field is of the form `ParameterBlock<IFoo>`. We should treat
+                // this case the same way as the `ExistentialValue` case here, but currently we lack a mechanism to distinguish
+                // the two scenarios.
+                break;
+            }
+            // TODO: need to handle another case where specialization happens on resources fields e.g. `StructuredBuffer<IFoo>`.
+        }
+        return SLANG_OK;
+    }
+};
+
+class CPUEntryPointShaderObject : public CPUShaderObject
+{
+public:
+    CPUEntryPointLayout* getLayout() { return static_cast<CPUEntryPointLayout*>(m_layout.Ptr()); }
+};
+
+class CPURootShaderObject : public CPUShaderObject
+{
+public:
+    SlangResult init(IDevice* device, CPUProgramLayout* programLayout);
+
+    CPUProgramLayout* getLayout() { return static_cast<CPUProgramLayout*>(m_layout.Ptr()); }
+
+    CPUEntryPointShaderObject* getEntryPoint(Index index) { return m_entryPoints[index]; }
+
+    List<RefPtr<CPUEntryPointShaderObject>> m_entryPoints;
+
+    virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override { return m_entryPoints.getCount(); }
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        getEntryPoint(UInt index, IShaderObject** outEntryPoint) override
+    {
+        *outEntryPoint = ComPtr<IShaderObject>(m_entryPoints[index]).detach();
+        return SLANG_OK;
+    }
+    virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override
+    {
+        SLANG_RETURN_ON_FAIL(CPUShaderObject::collectSpecializationArgs(args));
+        for (auto& entryPoint : m_entryPoints)
+        {
+            SLANG_RETURN_ON_FAIL(entryPoint->collectSpecializationArgs(args));
+        }
+        return SLANG_OK;
+    }
+};
+
+class CPUShaderProgram : public ShaderProgramBase
+{
+public:
+    RefPtr<CPUProgramLayout> layout;
+
+    ~CPUShaderProgram()
+    {
+    }
+};
+
+class CPUPipelineState : public PipelineStateBase
+{
+public:
+    CPUShaderProgram* getProgram() { return static_cast<CPUShaderProgram*>(m_program.get()); }
+
+    void init(const ComputePipelineStateDesc& inDesc)
+    {
+        PipelineStateDesc pipelineDesc;
+        pipelineDesc.type = PipelineType::Compute;
+        pipelineDesc.compute = inDesc;
+        initializeBase(pipelineDesc);
+    }
+};
+
+class CPUDevice : public RendererBase
+{
+private:
+    RefPtr<CPUPipelineState> m_currentPipeline = nullptr;
+    RefPtr<CPURootShaderObject> m_currentRootObject = nullptr;
+    DeviceInfo m_info;
+
+    class CommandQueueImpl;
+
+    class CommandBufferImpl
+        : public ICommandBuffer
+        , public CommandWriter
+        , public RefObject
+    {
+    public:
+        SLANG_REF_OBJECT_IUNKNOWN_ALL
+        ICommandBuffer* getInterface(const Guid& guid)
+        {
+            if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer)
+                return static_cast<ICommandBuffer*>(this);
+            return nullptr;
+        }
+    public:
+        virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands(
+            IRenderPassLayout* renderPass,
+            IFramebuffer* framebuffer,
+            IRenderCommandEncoder** outEncoder) override
+        {
+            SLANG_UNUSED(renderPass);
+            SLANG_UNUSED(framebuffer);
+            *outEncoder = nullptr;
+        }
+
+        class ComputeCommandEncoderImpl
+            : public IComputeCommandEncoder
+        {
+        public:
+            virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+                queryInterface(SlangUUID const& uuid, void** outObject) override
+            {
+                if (uuid == GfxGUID::IID_ISlangUnknown ||
+                    uuid == GfxGUID::IID_IComputeCommandEncoder)
+                {
+                    *outObject = static_cast<IComputeCommandEncoder*>(this);
+                    return SLANG_OK;
+                }
+                *outObject = nullptr;
+                return SLANG_E_NO_INTERFACE;
+            }
+            virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; }
+            virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; }
+
+        public:
+            CommandWriter* m_writer;
+
+            virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {}
+            void init(CommandBufferImpl* cmdBuffer)
+            {
+                m_writer = cmdBuffer;
+            }
+
+            virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override
+            {
+                m_writer->setPipelineState(state);
+            }
+            virtual SLANG_NO_THROW void SLANG_MCALL
+                bindRootShaderObject(IShaderObject* object) override
+            {
+                m_writer->bindRootShaderObject(PipelineType::Compute, object);
+            }
+
+            virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet(
+                IPipelineLayout* layout,
+                UInt index,
+                IDescriptorSet* descriptorSet) override
+            {
+                m_writer->setDescriptorSet(PipelineType::Compute, layout, index, descriptorSet);
+            }
+
+            virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override
+            {
+                m_writer->dispatchCompute(x, y, z);
+            }
+        };
+
+        ComputeCommandEncoderImpl m_computeCommandEncoder;
+        virtual SLANG_NO_THROW void SLANG_MCALL
+            encodeComputeCommands(IComputeCommandEncoder** outEncoder) override
+        {
+            m_computeCommandEncoder.init(this);
+            *outEncoder = &m_computeCommandEncoder;
+        }
+
+        class ResourceCommandEncoderImpl
+            : public IResourceCommandEncoder
+        {
+        public:
+            virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+                queryInterface(SlangUUID const& uuid, void** outObject) override
+            {
+                if (uuid == GfxGUID::IID_ISlangUnknown ||
+                    uuid == GfxGUID::IID_IResourceCommandEncoder)
+                {
+                    *outObject = static_cast<IResourceCommandEncoder*>(this);
+                    return SLANG_OK;
+                }
+                *outObject = nullptr;
+                return SLANG_E_NO_INTERFACE;
+            }
+            virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; }
+            virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; }
+
+        public:
+            CommandWriter* m_writer;
+
+            void init(CommandBufferImpl* cmdBuffer)
+            {
+                m_writer = cmdBuffer;
+            }
+
+            virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {}
+            virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer(
+                IBufferResource* dst,
+                size_t dstOffset,
+                IBufferResource* src,
+                size_t srcOffset,
+                size_t size) override
+            {
+                m_writer->copyBuffer(dst, dstOffset, src, srcOffset, size);
+            }
+
+            virtual SLANG_NO_THROW void SLANG_MCALL
+                uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data)
+            {
+                m_writer->uploadBufferData(dst, offset, size, data);
+            }
+        };
+
+        ResourceCommandEncoderImpl m_resourceCommandEncoder;
+
+        virtual SLANG_NO_THROW void SLANG_MCALL
+            encodeResourceCommands(IResourceCommandEncoder** outEncoder) override
+        {
+            m_resourceCommandEncoder.init(this);
+            *outEncoder = &m_resourceCommandEncoder;
+        }
+
+        virtual SLANG_NO_THROW void SLANG_MCALL close() override {}
+    };
+
+    class CommandQueueImpl
+        : public ICommandQueue
+        , public RefObject
+    {
+    public:
+        SLANG_REF_OBJECT_IUNKNOWN_ALL
+        ICommandQueue* getInterface(const Guid& guid)
+        {
+            if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue)
+                return static_cast<ICommandQueue*>(this);
+            return nullptr;
+        }
+
+    public:
+        RefPtr<CPUPipelineState> currentPipeline;
+        RefPtr<CPURootShaderObject> currentRootObject;
+        RefPtr<CPUDevice> renderer;
+        Desc m_desc;
+    public:
+        void init(CPUDevice* inRenderer)
+        {
+            renderer = inRenderer;
+            m_desc.type = ICommandQueue::QueueType::Graphics;
+        }
+        ~CommandQueueImpl()
+        {
+            currentPipeline = nullptr;
+            currentRootObject = nullptr;
+        }
+
+    public:
+        virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override
+        {
+            return m_desc;
+        }
+        virtual SLANG_NO_THROW Result SLANG_MCALL
+            createCommandBuffer(ICommandBuffer** outCommandBuffer) override
+        {
+            RefPtr<CommandBufferImpl> result = new CommandBufferImpl();
+            *outCommandBuffer = result.detach();
+            return SLANG_OK;
+        }
+
+        virtual SLANG_NO_THROW void SLANG_MCALL
+            executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override
+        {
+            for (uint32_t i = 0; i < count; i++)
+            {
+                execute(static_cast<CommandBufferImpl*>(commandBuffers[i]));
+            }
+        }
+
+        virtual SLANG_NO_THROW void SLANG_MCALL wait() override
+        {}
+
+    public:
+        void setPipelineState(IPipelineState* state)
+        {
+            currentPipeline = static_cast<CPUPipelineState*>(state);
+        }
+
+        Result bindRootShaderObject(PipelineType pipelineType, IShaderObject* object)
+        {
+            currentRootObject = static_cast<CPURootShaderObject*>(object);
+            if (currentRootObject)
+                return SLANG_OK;
+            return SLANG_E_INVALID_ARG;
+        }
+
+        void dispatchCompute(int x, int y, int z)
+        {
+            int entryPointIndex = 0;
+            int targetIndex = 0;
+
+            // Specialize the compute kernel based on the shader object bindings.
+            RefPtr<PipelineStateBase> newPipeline;
+            renderer->maybeSpecializePipeline(currentPipeline, currentRootObject, newPipeline);
+            currentPipeline = static_cast<CPUPipelineState*>(newPipeline.Ptr());
+
+            auto program = currentPipeline->getProgram();
+            auto entryPointLayout = currentRootObject->getLayout()->getEntryPoint(entryPointIndex);
+            auto entryPointName = entryPointLayout->getEntryPointName();
+
+            auto entryPointObject = currentRootObject->getEntryPoint(entryPointIndex);
+
+            ComPtr<ISlangSharedLibrary> sharedLibrary;
+            program->slangProgram->getEntryPointHostCallable(entryPointIndex, targetIndex, sharedLibrary.writeRef());
+
+            auto func = (slang_prelude::ComputeFunc) sharedLibrary->findSymbolAddressByName(entryPointName);
+
+            slang_prelude::ComputeVaryingInput varyingInput;
+            varyingInput.startGroupID.x = 0;
+            varyingInput.startGroupID.y = 0;
+            varyingInput.startGroupID.z = 0;
+            varyingInput.endGroupID.x = x;
+            varyingInput.endGroupID.y = y;
+            varyingInput.endGroupID.z = z;
+
+            auto globalParamsData = currentRootObject->m_data;
+            auto entryPointParamsData = entryPointObject->m_data;
+            func(&varyingInput, entryPointParamsData, globalParamsData);
+        }
+
+        void copyBuffer(
+            IBufferResource* dst,
+            size_t dstOffset,
+            IBufferResource* src,
+            size_t srcOffset,
+            size_t size)
+        {
+            auto dstImpl = static_cast<CPUBufferResource*>(dst);
+            auto srcImpl = static_cast<CPUBufferResource*>(src);
+            memcpy(
+                (uint8_t*)dstImpl->m_data + dstOffset,
+                (uint8_t*)srcImpl->m_data + srcOffset,
+                size);
+        }
+
+        void uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data)
+        {
+            auto dstImpl = static_cast<CPUBufferResource*>(dst);
+            memcpy((uint8_t*)dstImpl->m_data + offset, data, size);
+        }
+
+        void execute(CommandBufferImpl* commandBuffer)
+        {
+            for (auto& cmd : commandBuffer->m_commands)
+            {
+                switch (cmd.name)
+                {
+                case CommandName::SetPipelineState:
+                    setPipelineState(commandBuffer->getObject<IPipelineState>(cmd.operands[0]));
+                    break;
+                case CommandName::BindRootShaderObject:
+                    bindRootShaderObject(
+                        (PipelineType)cmd.operands[0],
+                        commandBuffer->getObject<IShaderObject>(cmd.operands[1]));
+                    break;
+                case CommandName::DispatchCompute:
+                    dispatchCompute(
+                        int(cmd.operands[0]), int(cmd.operands[1]), int(cmd.operands[2]));
+                    break;
+                case CommandName::CopyBuffer:
+                    copyBuffer(
+                        commandBuffer->getObject<IBufferResource>(cmd.operands[0]),
+                        cmd.operands[1],
+                        commandBuffer->getObject<IBufferResource>(cmd.operands[2]),
+                        cmd.operands[3],
+                        cmd.operands[4]);
+                    break;
+                case CommandName::UploadBufferData:
+                    uploadBufferData(
+                        commandBuffer->getObject<IBufferResource>(cmd.operands[0]),
+                        cmd.operands[1],
+                        cmd.operands[2],
+                        commandBuffer->getData<uint8_t>(cmd.operands[3]));
+                    break;
+                }
+            }
+        }
+    };
+
+public:
+    ~CPUDevice()
+    {
+        m_currentPipeline = nullptr;
+        m_currentRootObject = nullptr;
+    }
+
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override
+    {
+        SLANG_RETURN_ON_FAIL(slangContext.initialize(desc.slang, SLANG_HOST_CALLABLE, "sm_5_1"));
+
+        SLANG_RETURN_ON_FAIL(RendererBase::initialize(desc));
+
+        // Initialize DeviceInfo
+        {
+            m_info.deviceType = DeviceType::CPU;
+            m_info.bindingStyle = BindingStyle::CUDA;
+            m_info.projectionStyle = ProjectionStyle::DirectX;
+            m_info.apiName = "CPU";
+            static const float kIdentity[] = {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1};
+            ::memcpy(m_info.identityProjectionMatrix, kIdentity, sizeof(kIdentity));
+            m_info.adapterName = "CPU";
+        }
+
+        return SLANG_OK;
+    }
+
+    virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource(
+        IResource::Usage initialUsage,
+        const ITextureResource::Desc& desc,
+        const ITextureResource::SubresourceData* initData,
+        ITextureResource** outResource) override
+    {
+        RefPtr<CPUTextureResource> texture = new CPUTextureResource(desc);
+
+        SLANG_RETURN_ON_FAIL(texture->init(initData));
+
+        *outResource = texture.detach();
+        return SLANG_OK;
+    }
+
+    virtual SLANG_NO_THROW Result SLANG_MCALL createBufferResource(
+        IResource::Usage initialUsage,
+        const IBufferResource::Desc& desc,
+        const void* initData,
+        IBufferResource** outResource) override
+    {
+        RefPtr<CPUBufferResource> resource = new CPUBufferResource(desc);
+        SLANG_RETURN_ON_FAIL(resource->init());
+        if (initData)
+        {
+            SLANG_RETURN_ON_FAIL(resource->setData(0, desc.sizeInBytes, initData));
+        }
+        *outResource = resource.detach();
+        return SLANG_OK;
+    }
+
+    virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView(
+        ITextureResource* inTexture, IResourceView::Desc const& desc, IResourceView** outView) override
+    {
+        auto texture = static_cast<CPUTextureResource*>(inTexture);
+        RefPtr<CPUTextureView> view = new CPUTextureView(desc, texture);
+        *outView = view.detach();
+        return SLANG_OK;
+    }
+
+    virtual SLANG_NO_THROW Result SLANG_MCALL createBufferView(
+        IBufferResource* inBuffer, IResourceView::Desc const& desc, IResourceView** outView) override
+    {
+        auto buffer = static_cast<CPUBufferResource*>(inBuffer);
+        RefPtr<CPUBufferView> view = new CPUBufferView(desc, buffer);
+        *outView = view.detach();
+        return SLANG_OK;
+    }
+
+    virtual Result createShaderObjectLayout(
+        slang::TypeLayoutReflection*    typeLayout,
+        ShaderObjectLayoutBase**        outLayout) override
+    {
+        RefPtr<CPUShaderObjectLayout> cpuLayout = new CPUShaderObjectLayout(this, typeLayout);
+        *outLayout = cpuLayout.detach();
+
+        return SLANG_OK;
+    }
+
+    virtual Result createShaderObject(
+        ShaderObjectLayoutBase* layout,
+        IShaderObject**         outObject) override
+    {
+        auto cpuLayout = static_cast<CPUShaderObjectLayout*>(layout);
+
+        RefPtr<CPUShaderObject> result = new CPUShaderObject();
+        SLANG_RETURN_ON_FAIL(result->init(this, cpuLayout));
+        *outObject = result.detach();
+
+        return SLANG_OK;
+    }
+
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override
+    {
+        auto cpuProgram = static_cast<CPUShaderProgram*>(program);
+        auto cpuProgramLayout = cpuProgram->layout;
+
+        RefPtr<CPURootShaderObject> result = new CPURootShaderObject();
+        SLANG_RETURN_ON_FAIL(result->init(this, cpuProgramLayout));
+        *outObject = result.detach();
+        return SLANG_OK;
+    }
+
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) override
+    {
+        if( desc.kernelCount == 0 )
+        {
+            return createProgramFromSlang(this, desc, outProgram);
+        }
+
+        if (desc.kernelCount != 1)
+            return SLANG_E_INVALID_ARG;
+
+        RefPtr<CPUShaderProgram> cpuProgram = new CPUShaderProgram();
+
+        // TODO: stuff?
+
+        auto slangProgram = desc.slangProgram;
+        if( slangProgram )
+        {
+            cpuProgram->slangProgram = slangProgram;
+
+            auto slangProgramLayout = slangProgram->getLayout();
+            if(!slangProgramLayout)
+                return SLANG_FAIL;
+
+            RefPtr<CPUProgramLayout> cpuProgramLayout = new CPUProgramLayout(this, slangProgramLayout);
+            cpuProgramLayout->m_programLayout = slangProgramLayout;
+
+            cpuProgram->layout = cpuProgramLayout;
+        }
+
+        *outProgram = cpuProgram.detach();
+        return SLANG_OK;
+    }
+
+    virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState(
+        const ComputePipelineStateDesc& desc, IPipelineState** outState) override
+    {
+        RefPtr<CPUPipelineState> state = new CPUPipelineState();
+        state->init(desc);
+        *outState = state.detach();
+        return Result();
+    }
+
+    virtual SLANG_NO_THROW const DeviceInfo& SLANG_MCALL getDeviceInfo() const override
+    {
+        return m_info;
+    }
+
+public:
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override
+    {
+        RefPtr<CommandQueueImpl> queue = new CommandQueueImpl();
+        queue->init(this);
+        *outQueue = queue.detach();
+        return SLANG_OK;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain(
+        const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override
+    {
+        SLANG_UNUSED(desc);
+        SLANG_UNUSED(window);
+        SLANG_UNUSED(outSwapchain);
+        return SLANG_FAIL;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout(
+        const IFramebufferLayout::Desc& desc, IFramebufferLayout** outLayout) override
+    {
+        SLANG_UNUSED(desc);
+        SLANG_UNUSED(outLayout);
+        return SLANG_FAIL;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffer** outFramebuffer) override
+    {
+        SLANG_UNUSED(desc);
+        SLANG_UNUSED(outFramebuffer);
+        return SLANG_FAIL;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout(
+        const IRenderPassLayout::Desc& desc,
+        IRenderPassLayout** outRenderPassLayout) override
+    {
+        SLANG_UNUSED(desc);
+        SLANG_UNUSED(outRenderPassLayout);
+        return SLANG_FAIL;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) override
+    {
+        SLANG_UNUSED(desc);
+        *outSampler = nullptr;
+        return SLANG_OK;
+    }
+    
+    virtual SLANG_NO_THROW Result SLANG_MCALL createInputLayout(
+        const InputElementDesc* inputElements,
+        UInt inputElementCount,
+        IInputLayout** outLayout) override
+    {
+        SLANG_UNUSED(inputElements);
+        SLANG_UNUSED(inputElementCount);
+        SLANG_UNUSED(outLayout);
+        return SLANG_E_NOT_AVAILABLE;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL createDescriptorSetLayout(
+        const IDescriptorSetLayout::Desc& desc, IDescriptorSetLayout** outLayout) override
+    {
+        SLANG_UNUSED(desc);
+        SLANG_UNUSED(outLayout);
+        return SLANG_E_NOT_AVAILABLE;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        createPipelineLayout(const IPipelineLayout::Desc& desc, IPipelineLayout** outLayout) override
+    {
+        SLANG_UNUSED(desc);
+        SLANG_UNUSED(outLayout);
+        return SLANG_E_NOT_AVAILABLE;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        createDescriptorSet(IDescriptorSetLayout* layout, IDescriptorSet::Flag::Enum flags, IDescriptorSet** outDescriptorSet) override
+    {
+        SLANG_UNUSED(layout);
+        SLANG_UNUSED(flags);
+        SLANG_UNUSED(outDescriptorSet);
+        return SLANG_E_NOT_AVAILABLE;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState(
+        const GraphicsPipelineStateDesc& desc, IPipelineState** outState) override
+    {
+        SLANG_UNUSED(desc);
+        SLANG_UNUSED(outState);
+        return SLANG_E_NOT_AVAILABLE;
+    }
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource(
+        ITextureResource* texture,
+        ResourceState state,
+        ISlangBlob** outBlob,
+        size_t* outRowPitch,
+        size_t* outPixelSize) override
+    {
+        SLANG_UNUSED(texture);
+        SLANG_UNUSED(outBlob);
+        SLANG_UNUSED(outRowPitch);
+        SLANG_UNUSED(outPixelSize);
+
+        return SLANG_E_NOT_AVAILABLE;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL readBufferResource(
+        IBufferResource* buffer,
+        size_t offset,
+        size_t size,
+        ISlangBlob** outBlob) override
+    {
+        auto bufferImpl = static_cast<CPUBufferResource*>(buffer);
+        RefPtr<ListBlob> blob = new ListBlob();
+        blob->m_data.setCount((Index)size);
+        memcpy(
+            blob->m_data.getBuffer(),
+            (uint8_t*)bufferImpl->m_data + offset,
+            size);
+        *outBlob = blob.detach();
+        return SLANG_OK;
+    }
+};
+
+SlangResult CPUShaderObject::init(IDevice* device, CPUShaderObjectLayout* typeLayout)
+{
+    m_layout = typeLayout;
+
+    // If the layout tells us that there is any uniform data,
+    // then we need to allocate a constant buffer to hold that data.
+    //
+    // TODO: Do we need to allocate a shadow copy for use from
+    // the CPU?
+    //
+    // TODO: When/where do we bind this constant buffer into
+    // a descriptor set for later use?
+    //
+    auto slangLayout = getLayout()->getElementTypeLayout();
+    size_t uniformSize = slangLayout->getSize();
+    if (uniformSize)
+    {
+        m_data = malloc(uniformSize);
+    }
+
+    // If the layout specifies that we have any resources or sub-objects,
+    // then we need to size the appropriate arrays to account for them.
+    //
+    // Note: the counts here are the *total* number of resources/sub-objects
+    // and not just the number of resource/sub-object ranges.
+    //
+    m_resources.setCount(typeLayout->getResourceCount());
+    m_objects.setCount(typeLayout->getSubObjectCount());
+
+    for (auto subObjectRange : getLayout()->subObjectRanges)
+    {
+        RefPtr<CPUShaderObjectLayout> subObjectLayout = subObjectRange.layout;
+
+        // In the case where the sub-object range represents an
+        // existential-type leaf field (e.g., an `IBar`), we
+        // cannot pre-allocate the object(s) to go into that
+        // range, since we can't possibly know what to allocate
+        // at this point.
+        //
+        if (!subObjectLayout)
+            continue;
+        //
+        // Otherwise, we will allocate a sub-object to fill
+        // in each entry in this range, based on the layout
+        // information we already have.
+
+        auto& bindingRangeInfo = getLayout()->m_bindingRanges[subObjectRange.bindingRangeIndex];
+        for (Index i = 0; i < bindingRangeInfo.count; ++i)
+        {
+            RefPtr<CPUShaderObject> subObject = new CPUShaderObject();
+            SLANG_RETURN_ON_FAIL(subObject->init(device, subObjectLayout));
+
+            ShaderOffset offset;
+            offset.uniformOffset = bindingRangeInfo.uniformOffset + sizeof(void*) * i;
+            offset.bindingRangeIndex = subObjectRange.bindingRangeIndex;
+            offset.bindingArrayIndex = i;
+
+            SLANG_RETURN_ON_FAIL(setObject(offset, subObject));
+        }
+    }
+    return SLANG_OK;
+}
+
+SlangResult CPURootShaderObject::init(IDevice* device, CPUProgramLayout* programLayout)
+{
+    SLANG_RETURN_ON_FAIL(CPUShaderObject::init(device, programLayout));
+    for (auto& entryPoint : programLayout->m_entryPointLayouts)
+    {
+        RefPtr<CPUEntryPointShaderObject> object = new CPUEntryPointShaderObject();
+        SLANG_RETURN_ON_FAIL(object->init(device, entryPoint));
+        m_entryPoints.add(object);
+    }
+    return SLANG_OK;
+}
+
+SlangResult SLANG_MCALL createCPUDevice(const IDevice::Desc* desc, IDevice** outDevice)
+{
+    RefPtr<CPUDevice> result = new CPUDevice();
+    SLANG_RETURN_ON_FAIL(result->initialize(*desc));
+    *outDevice = result.detach();
+    return SLANG_OK;
+}
+
+}
diff --git a/tools/gfx/cpu/render-cpu.h b/tools/gfx/cpu/render-cpu.h
new file mode 100644
index 000000000..fca57aa4d
--- /dev/null
+++ b/tools/gfx/cpu/render-cpu.h
@@ -0,0 +1,11 @@
+// render-cpu.h
+#pragma once
+
+#include "../renderer-shared.h"
+
+namespace gfx
+{
+
+SlangResult SLANG_MCALL createCPUDevice(const IDevice::Desc* desc, IDevice** outDevice);
+
+}
author	Tim Foley <tfoleyNV@users.noreply.github.com>	2021-03-12 11:58:14 -0800
committer	GitHub <noreply@github.com>	2021-03-12 11:58:14 -0800
commit	d6a37a0f151e390808f196998c48a341bc4c7b60 (patch)
tree	c1c6e3af434cb3627af67ecc8706124e4b8c7fb1 /tools/gfx/cpu
parent	9ffe2f3ef245034a2dae42017a9059dfe4d02647 (diff)