summaryrefslogtreecommitdiffstats
path: root/tools/gfx/cpu
diff options
context:
space:
mode:
authorTim Foley <tfoleyNV@users.noreply.github.com>2021-03-12 11:58:14 -0800
committerGitHub <noreply@github.com>2021-03-12 11:58:14 -0800
commitd6a37a0f151e390808f196998c48a341bc4c7b60 (patch)
treec1c6e3af434cb3627af67ecc8706124e4b8c7fb1 /tools/gfx/cpu
parent9ffe2f3ef245034a2dae42017a9059dfe4d02647 (diff)
Add a CPU renderer implementation (#1750)
* Add a CPU renderer implementation This change adds a CPU back-end to `gfx` and ensures that most of our existing CPU tests pass when using it. Detailed notes: * Most of the CPU renderer implementation is copy-pasted from the CUDA case, so they share a lot of similar logic * The main addition to the CPU renderer is a semi-complete implementation of host-memory textures. The logic here handles all the main shapes (Buffer, 1D, 2D, 3D, Cube) and all the currently-supported `Format`s that are sample-able as-is (no D24S8). The implementation is not intended to be fast, and it currently only does nearest-neighbor sampling, but otherwise it tries to avoid cutting too many corners and should be ar reasonable starting point for a more complete (but not performance-oriented) implementation. * Refactored the CPU prelude `IRWTexture` interface to inherit from `ITexture`, since in most cases a single type will end up implementing both. It might be worth it to collapse it all down to a single interface later. * Changed the CPU prelude `ITexture`/`IRWTexture` interface so that it takes both a pointer *and* a size for output arguments. This change seems necessary to allow a shader variable declared as a `Texture2D<float>` to fetch a single `float` when the underlying texture might be using RGBA32F. * Added to the `IComponentType` public API so that we can query a "host callable" for an entry point and not just a binary. * Turned off the `-shaderobj` flag on two tests that weren't yet compatible with shader objects but still had the flag left in on the path (since previously the CPU path always used the non-`gfx` non-shader-object logic anyway) * Disabled one test (`dynamic-dispatch-11`) that relied on the `ConstantBuffer<IInterface>` idiom that we know we are planning to chagne soon anyway. * Made a few changes to the CUDA path to bring it into line with what I added for the CPU path. These were mostly bug fixes around indexing logic for sub-objects and resources. * fixup
Diffstat (limited to 'tools/gfx/cpu')
-rw-r--r--tools/gfx/cpu/render-cpu.cpp1796
-rw-r--r--tools/gfx/cpu/render-cpu.h11
2 files changed, 1807 insertions, 0 deletions
diff --git a/tools/gfx/cpu/render-cpu.cpp b/tools/gfx/cpu/render-cpu.cpp
new file mode 100644
index 000000000..faa6b3092
--- /dev/null
+++ b/tools/gfx/cpu/render-cpu.cpp
@@ -0,0 +1,1796 @@
+// render-cpu.cpp
+#include "render-cpu.h"
+
+#include "slang.h"
+#include "slang-com-ptr.h"
+#include "slang-com-helper.h"
+#include "core/slang-basic.h"
+#include "core/slang-blob.h"
+
+#include "../command-writer.h"
+#include "../renderer-shared.h"
+#include "../slang-context.h"
+
+#define SLANG_PRELUDE_NAMESPACE slang_prelude
+#include "prelude/slang-cpp-types.h"
+
+namespace gfx
+{
+using namespace Slang;
+
+class CPUBufferResource : public BufferResource
+{
+public:
+ CPUBufferResource(const Desc& _desc)
+ : BufferResource(_desc)
+ {}
+
+ ~CPUBufferResource()
+ {
+ if (m_data)
+ {
+ free(m_data);
+ }
+ }
+
+ SlangResult init()
+ {
+ m_data = malloc(m_desc.sizeInBytes);
+ if(!m_data) return SLANG_E_OUT_OF_MEMORY;
+ return SLANG_OK;
+ }
+
+ SlangResult setData(size_t offset, size_t size, void const* data)
+ {
+ memcpy((char*)m_data + offset, data, size);
+ return SLANG_OK;
+ }
+
+ void* m_data = nullptr;
+};
+
+struct CPUTextureBaseShapeInfo
+{
+ int32_t rank;
+ int32_t baseCoordCount;
+ int32_t implicitArrayElementCount;
+};
+
+static const CPUTextureBaseShapeInfo kCPUTextureBaseShapeInfos[(int)ITextureResource::Type::CountOf] =
+{
+ /* Unknown */ { 0, 0, 0 },
+ /* Buffer */ { 1, 1, 1 },
+ /* Texture1D */ { 1, 1, 1 },
+ /* Texture2D */ { 2, 2, 1 },
+ /* Texture3D */ { 3, 3, 1 },
+ /* TextureCube */ { 2, 3, 6 },
+};
+
+static CPUTextureBaseShapeInfo const* _getBaseShapeInfo(ITextureResource::Type baseShape)
+{
+ return &kCPUTextureBaseShapeInfos[(int)baseShape];
+}
+
+typedef void (*CPUTextureUnpackFunc)(void const* texelData, void* outData, size_t outSize);
+
+struct CPUTextureFormatInfo
+{
+ CPUTextureUnpackFunc unpackFunc;
+};
+
+template<int N>
+void _unpackFloatTexel(void const* texelData, void* outData, size_t outSize)
+{
+ auto input = (float const*) texelData;
+
+ float temp[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
+ for(int i = 0; i < N; ++i)
+ temp[i] = input[i];
+
+ memcpy(outData, temp, outSize);
+}
+
+static inline float _unpackUnorm8Value(uint8_t value)
+{
+ return value / 255.0f;
+}
+
+template<int N>
+void _unpackUnorm8Texel(void const* texelData, void* outData, size_t outSize)
+{
+ auto input = (uint8_t const*) texelData;
+
+ float temp[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
+ for(int i = 0; i < N; ++i)
+ temp[i] = _unpackUnorm8Value(input[i]);
+
+ memcpy(outData, temp, outSize);
+}
+
+void _unpackUnormBGRA8Texel(void const* texelData, void* outData, size_t outSize)
+{
+ auto input = (uint8_t const*) texelData;
+
+ float temp[4];
+ temp[0] = _unpackUnorm8Value(input[2]);
+ temp[1] = _unpackUnorm8Value(input[1]);
+ temp[2] = _unpackUnorm8Value(input[0]);
+ temp[3] = _unpackUnorm8Value(input[3]);
+
+ memcpy(outData, temp, outSize);
+}
+
+template<int N>
+void _unpackUInt16Texel(void const* texelData, void* outData, size_t outSize)
+{
+ auto input = (uint16_t const*) texelData;
+
+ uint32_t temp[4] = { 0, 0, 0, 0 };
+ for(int i = 0; i < N; ++i)
+ temp[i] = input[i];
+
+ memcpy(outData, temp, outSize);
+}
+
+template<int N>
+void _unpackUInt32Texel(void const* texelData, void* outData, size_t outSize)
+{
+ auto input = (uint32_t const*) texelData;
+
+ uint32_t temp[4] = { 0, 0, 0, 0 };
+ for(int i = 0; i < N; ++i)
+ temp[i] = input[i];
+
+ memcpy(outData, temp, outSize);
+}
+
+#define TEXTURE_FORMAT_INFO(FORMAT) static const CPUTextureFormatInfo kCPUTextureFormatInfo_##FORMAT
+
+TEXTURE_FORMAT_INFO(RGBA_Float32) = { &_unpackFloatTexel<4> };
+TEXTURE_FORMAT_INFO(RGB_Float32) = { &_unpackFloatTexel<3> };
+TEXTURE_FORMAT_INFO(RG_Float32) = { &_unpackFloatTexel<2> };
+TEXTURE_FORMAT_INFO(R_Float32) = { &_unpackFloatTexel<1> };
+TEXTURE_FORMAT_INFO(RGBA_Unorm_UInt8) = { &_unpackUnorm8Texel<4> };
+TEXTURE_FORMAT_INFO(BGRA_Unorm_UInt8) = { &_unpackUnormBGRA8Texel };
+TEXTURE_FORMAT_INFO(R_UInt16) = { &_unpackUInt16Texel<1> };
+TEXTURE_FORMAT_INFO(R_UInt32) = { &_unpackUInt32Texel<1> };
+TEXTURE_FORMAT_INFO(D_Float32) = { &_unpackFloatTexel<1> };
+
+#undef TEXTURE_FORMAT_INFO
+
+static CPUTextureFormatInfo const* _getFormatInfo(Format format)
+{
+ switch(format)
+ {
+ case Format::D_Unorm24_S8:
+ default:
+ return nullptr;
+
+
+#define CASE(FORMAT) case Format::FORMAT: return &kCPUTextureFormatInfo_##FORMAT;
+ CASE(RGBA_Float32)
+ CASE(RGB_Float32)
+ CASE(RG_Float32)
+ CASE(R_Float32)
+ CASE(RGBA_Unorm_UInt8)
+ CASE(BGRA_Unorm_UInt8)
+ CASE(R_UInt16)
+ CASE(R_UInt32)
+ CASE(D_Float32)
+
+#undef CASE
+ }
+}
+
+class CPUTextureResource : public TextureResource
+{
+ enum { kMaxRank = 3 };
+
+public:
+ CPUTextureResource(const TextureResource::Desc& desc)
+ : TextureResource(desc)
+ {}
+ ~CPUTextureResource()
+ {
+ }
+
+ Result init(ITextureResource::SubresourceData const* initData)
+ {
+ auto desc = m_desc;
+
+ // The format of the texture will determine the
+ // size of the texels we allocate.
+ //
+ // TODO: Compressed formats usually work in terms
+ // of a fixed block size, so that we cannot actually
+ // compute a simple `texelSize` like this. Instead
+ // we should be computing a `blockSize` and then
+ // a `blockExtents` value that gives the extent
+ // in texels of each block. For uncompressed formats
+ // the block extents would be 1 along each axis.
+ //
+ auto format = desc.format;
+ auto texelSize = gfxGetFormatSize(format);
+ m_texelSize = (int32_t) texelSize;
+
+ int32_t formatBlockSize[kMaxRank] = { 1, 1, 1 };
+
+ auto baseShapeInfo = _getBaseShapeInfo(desc.type);
+ m_baseShape = baseShapeInfo;
+ if(!baseShapeInfo)
+ return SLANG_FAIL;
+
+ auto formatInfo = _getFormatInfo(desc.format);
+ m_formatInfo = formatInfo;
+ if(!formatInfo)
+ return SLANG_FAIL;
+
+ int32_t rank = baseShapeInfo->rank;
+ int32_t effectiveArrayElementCount = desc.arraySize ? desc.arraySize : 1;
+ effectiveArrayElementCount *= baseShapeInfo->implicitArrayElementCount;
+ m_effectiveArrayElementCount = effectiveArrayElementCount;
+
+ int32_t extents[kMaxRank];
+ extents[0] = desc.size.width;
+ extents[1] = desc.size.height;
+ extents[2] = desc.size.depth;
+
+ for(int32_t axis = rank; axis < kMaxRank; ++axis)
+ extents[axis] = 1;
+
+ int32_t levelCount = desc.numMipLevels;
+
+ m_mipLevels.setCount(levelCount);
+
+ int64_t totalDataSize = 0;
+ for( int32_t levelIndex = 0; levelIndex < levelCount; ++levelIndex )
+ {
+ auto& level = m_mipLevels[levelIndex];
+
+ for( int32_t axis = 0; axis < kMaxRank; ++axis )
+ {
+ int32_t extent = extents[axis] >> levelIndex;
+ if(extent < 1) extent = 1;
+ level.extents[axis] = extent;
+ }
+
+ level.strides[0] = texelSize;
+ for( int32_t axis = 1; axis < kMaxRank+1; ++axis)
+ {
+ level.strides[axis] = level.strides[axis-1]*level.extents[axis-1];
+ }
+
+ int64_t levelDataSize = texelSize;
+ levelDataSize *= effectiveArrayElementCount;
+ for( int32_t axis = 0; axis < rank; ++axis)
+ levelDataSize *= int64_t(level.extents[axis]);
+
+ level.offset = totalDataSize;
+ totalDataSize += levelDataSize;
+ }
+
+ void* textureData = malloc(totalDataSize);
+ m_data = textureData;
+
+ if( initData )
+ {
+ int32_t subResourceCounter = 0;
+ for(int32_t arrayElementIndex = 0; arrayElementIndex < effectiveArrayElementCount; ++arrayElementIndex)
+ {
+ for(int32_t mipLevel = 0; mipLevel < m_desc.numMipLevels; ++mipLevel)
+ {
+ int32_t subResourceIndex = subResourceCounter++;
+
+ auto dstRowStride = m_mipLevels[mipLevel].strides[1];
+ auto dstLayerStride = m_mipLevels[mipLevel].strides[2];
+ auto dstArrayStride = m_mipLevels[mipLevel].strides[3];
+
+ auto textureRowSize = m_mipLevels[mipLevel].extents[0]*texelSize;
+
+ auto rowCount = m_mipLevels[mipLevel].extents[1];
+ auto depthLayerCount = m_mipLevels[mipLevel].extents[2];
+
+ auto& srcImage = initData[subResourceIndex];
+ ptrdiff_t srcRowStride = ptrdiff_t(srcImage.strideY);
+ ptrdiff_t srcLayerStride = ptrdiff_t(srcImage.strideZ);
+
+ char* dstLevel = (char*)textureData + m_mipLevels[mipLevel].offset;
+ char* dstImage = dstLevel + dstArrayStride*arrayElementIndex;
+
+ const char* srcLayer = (const char*) srcImage.data;
+ char* dstLayer = dstImage;
+
+ for(int32_t depthLayer = 0; depthLayer < depthLayerCount; ++depthLayer)
+ {
+ const char* srcRow = srcLayer;
+ char* dstRow = dstLayer;
+
+ for(int32_t row = 0; row < rowCount; ++row)
+ {
+ memcpy(dstRow, srcRow, textureRowSize);
+
+ srcRow += srcRowStride;
+ dstRow += dstRowStride;
+ }
+
+ srcLayer += srcLayerStride;
+ dstLayer += dstLayerStride;
+ }
+ }
+ }
+ }
+
+ return SLANG_OK;
+ }
+
+ Desc const& _getDesc() { return m_desc; }
+ Format getFormat() { return m_desc.format; }
+ int32_t getRank() { return m_baseShape->rank; }
+
+ CPUTextureBaseShapeInfo const* m_baseShape;
+ CPUTextureFormatInfo const* m_formatInfo;
+ int32_t m_effectiveArrayElementCount = 0;
+ int32_t m_texelSize = 0;
+
+ struct MipLevel
+ {
+ int32_t extents[kMaxRank];
+ int64_t strides[kMaxRank+1];
+ int64_t offset;
+ };
+ List<MipLevel> m_mipLevels;
+ void* m_data = nullptr;
+};
+
+class CPUResourceView : public IResourceView, public RefObject
+{
+public:
+ enum class Kind
+ {
+ Buffer,
+ Texture,
+ };
+
+ SLANG_REF_OBJECT_IUNKNOWN_ALL
+ IResourceView* getInterface(const Guid& guid)
+ {
+ if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IResourceView)
+ return static_cast<IResourceView*>(this);
+ return nullptr;
+ }
+
+ Kind getViewKind() const { return m_kind; }
+ Desc const& getDesc() const { return m_desc; }
+
+protected:
+ CPUResourceView(Kind kind, Desc const& desc)
+ : m_kind(kind)
+ , m_desc(desc)
+ {}
+
+private:
+ Kind m_kind;
+ Desc m_desc;
+};
+
+class CPUBufferView : public CPUResourceView
+{
+public:
+ CPUBufferView(Desc const& desc, CPUBufferResource* buffer)
+ : CPUResourceView(Kind::Buffer, desc)
+ , m_buffer(buffer)
+ {}
+
+ CPUBufferResource* getBuffer() const { return m_buffer; }
+
+private:
+ RefPtr<CPUBufferResource> m_buffer;
+};
+
+class CPUTextureView : public CPUResourceView, public slang_prelude::IRWTexture
+{
+public:
+ CPUTextureView(Desc const& desc, CPUTextureResource* texture)
+ : CPUResourceView(Kind::Texture, desc)
+ , m_texture(texture)
+ {}
+
+ CPUTextureResource* getTexture() const { return m_texture; }
+
+ //
+ // ITexture interface
+ //
+
+ slang_prelude::TextureDimensions GetDimensions(int mipLevel = -1) SLANG_OVERRIDE
+ {
+ slang_prelude::TextureDimensions dimensions = {};
+
+ CPUTextureResource* texture = m_texture;
+ auto& desc = texture->_getDesc();
+ auto baseShape = texture->m_baseShape;
+
+ dimensions.arrayElementCount = desc.arraySize;
+ dimensions.numberOfLevels = desc.numMipLevels;
+ dimensions.shape = baseShape->rank;
+ dimensions.width = desc.size.width;
+ dimensions.height = desc.size.height;
+ dimensions.depth = desc.size.depth;
+
+ return dimensions;
+ }
+
+ void Load(const int32_t* texelCoords, void* outData, size_t dataSize) SLANG_OVERRIDE
+ {
+ void* texelPtr = _getTexelPtr(texelCoords);
+
+ m_texture->m_formatInfo->unpackFunc(texelPtr, outData, dataSize);
+ }
+
+ void Sample(slang_prelude::SamplerState samplerState, const float* coords, void* outData, size_t dataSize) SLANG_OVERRIDE
+ {
+ // We have no access to information from fragment quads, so we cannot
+ // compute the finite-difference derivatives needed from `coords`.
+ //
+ // The only reasonable thing to do is to sample mip level zero.
+ //
+ SampleLevel(samplerState, coords, 0.0f, outData, dataSize);
+ }
+
+ void SampleLevel(slang_prelude::SamplerState samplerState, const float* coords, float level, void* outData, size_t dataSize) SLANG_OVERRIDE
+ {
+ CPUTextureResource* texture = m_texture;
+ auto baseShape = texture->m_baseShape;
+ auto& desc = texture->_getDesc();
+ int32_t rank = baseShape->rank;
+ int32_t baseCoordCount = baseShape->baseCoordCount;
+
+ int32_t integerMipLevel = int32_t(level + 0.5f);
+ if(integerMipLevel >= desc.numMipLevels) integerMipLevel = desc.numMipLevels-1;
+ if(integerMipLevel < 0) integerMipLevel = 0;
+
+ auto& mipLevelInfo = texture->m_mipLevels[integerMipLevel];
+
+ bool isArray = (desc.arraySize != 0) || (desc.type == ITextureResource::Type::TextureCube);
+ int32_t effectiveArrayElementCount = texture->m_effectiveArrayElementCount;
+ int32_t coordIndex = baseCoordCount;
+ int32_t elementIndex = 0;
+ if( isArray )
+ {
+ elementIndex = int32_t(coords[coordIndex++] + 0.5f);
+ }
+ if(elementIndex >= effectiveArrayElementCount) elementIndex = effectiveArrayElementCount-1;
+ if(elementIndex < 0) elementIndex = 0;
+
+ // Note: for now we are just going to do nearest-neighbor sampling
+ //
+ int64_t texelOffset = mipLevelInfo.offset;
+ texelOffset += elementIndex * mipLevelInfo.strides[3];
+ for(int32_t axis = 0; axis < rank; ++axis)
+ {
+ int32_t extent = mipLevelInfo.extents[axis];
+
+ float coord = coords[axis];
+
+ // TODO: deal with wrap/clamp/repeat if `coord < 0` or `coord > 1`
+
+ int32_t integerCoord = int32_t(coord*(extent-1) + 0.5f);
+
+ if(integerCoord >= extent) integerCoord = extent-1;
+ if(integerCoord < 0) integerCoord = 0;
+
+ texelOffset += integerCoord * mipLevelInfo.strides[axis];
+ }
+
+ auto texelPtr = (char const*)texture->m_data + texelOffset;
+
+ m_texture->m_formatInfo->unpackFunc(texelPtr, outData, dataSize);
+ }
+
+ //
+ // IRWTexture interface
+ //
+
+ void* refAt(const uint32_t* texelCoords) SLANG_OVERRIDE
+ {
+ return _getTexelPtr((int32_t const*)texelCoords);
+ }
+
+private:
+ RefPtr<CPUTextureResource> m_texture;
+
+ void* _getTexelPtr(int32_t const* texelCoords)
+ {
+ CPUTextureResource* texture = m_texture;
+ auto baseShape = texture->m_baseShape;
+ auto& desc = texture->_getDesc();
+
+ int32_t rank = baseShape->rank;
+ int32_t baseCoordCount = baseShape->baseCoordCount;
+
+ bool isArray = (desc.arraySize != 0) || (desc.type == ITextureResource::Type::TextureCube);
+ bool isMultisample = desc.sampleDesc.numSamples > 1;
+ bool isBuffer = desc.type == ITextureResource::Type::Buffer;
+ bool hasMipLevels = !(isMultisample || isBuffer);
+
+ int32_t effectiveArrayElementCount = texture->m_effectiveArrayElementCount;
+
+ int32_t coordIndex = baseCoordCount;
+ int32_t elementIndex = 0;
+ if( isArray )
+ {
+ elementIndex = texelCoords[coordIndex++];
+ }
+ if(elementIndex >= effectiveArrayElementCount) elementIndex = effectiveArrayElementCount-1;
+ if(elementIndex < 0) elementIndex = 0;
+
+ int32_t mipLevel = 0;
+ if(!hasMipLevels)
+ {
+ mipLevel = texelCoords[coordIndex++];
+ }
+ if(mipLevel >= desc.numMipLevels) mipLevel = desc.numMipLevels-1;
+ if(mipLevel < 0) mipLevel = 0;
+
+ auto& mipLevelInfo = texture->m_mipLevels[mipLevel];
+
+ int64_t texelOffset = mipLevelInfo.offset;
+ texelOffset += elementIndex * mipLevelInfo.strides[3];
+ for(int32_t axis = 0; axis < rank; ++axis)
+ {
+ int32_t coord = texelCoords[axis];
+ if(coord >= mipLevelInfo.extents[axis]) coord = mipLevelInfo.extents[axis]-1;
+ if(coord < 0) coord = 0;
+
+ texelOffset += texelCoords[axis] * mipLevelInfo.strides[axis];
+ }
+
+ return (char*)texture->m_data + texelOffset;
+ }
+};
+
+class CPUShaderObjectLayout : public ShaderObjectLayoutBase
+{
+public:
+
+ // TODO: Once memory lifetime stuff is handled, there is
+ // no specific need to even track binding or sub-object
+ // ranges for CPU.
+
+ struct BindingRangeInfo
+ {
+ slang::BindingType bindingType;
+ Index count;
+ Index baseIndex; // Flat index for sub-ojects
+
+ // TODO: The `uniformOffset` field should be removed,
+ // since it cannot be supported by the Slang reflection
+ // API once we fix some design issues.
+ //
+ // It is only being used today for pre-allocation of sub-objects
+ // for constant buffers and parameter blocks (which should be
+ // deprecated/removed anyway).
+ //
+ // Note: We would need to bring this field back, plus
+ // a lot of other complexity, if we ever want to support
+ // setting of resources/buffers directly by a binding
+ // range index and array index.
+ //
+ Index uniformOffset; // Uniform offset for a resource typed field.
+ };
+
+ struct SubObjectRangeInfo
+ {
+ RefPtr<CPUShaderObjectLayout> layout;
+ Index bindingRangeIndex;
+ };
+
+ size_t m_size = 0;
+ List<SubObjectRangeInfo> subObjectRanges;
+ List<BindingRangeInfo> m_bindingRanges;
+
+ Index m_subObjectCount = 0;
+ Index m_resourceCount = 0;
+
+ CPUShaderObjectLayout(RendererBase* renderer, slang::TypeLayoutReflection* layout)
+ {
+ initBase(renderer, layout);
+
+ Index subObjectCount = 0;
+ Index resourceCount = 0;
+
+ m_elementTypeLayout = _unwrapParameterGroups(layout);
+ m_size = m_elementTypeLayout->getSize();
+
+ // Compute the binding ranges that are used to store
+ // the logical contents of the object in memory. These will relate
+ // to the descriptor ranges in the various sets, but not always
+ // in a one-to-one fashion.
+
+ SlangInt bindingRangeCount = m_elementTypeLayout->getBindingRangeCount();
+ for (SlangInt r = 0; r < bindingRangeCount; ++r)
+ {
+ slang::BindingType slangBindingType = m_elementTypeLayout->getBindingRangeType(r);
+ SlangInt count = m_elementTypeLayout->getBindingRangeBindingCount(r);
+ slang::TypeLayoutReflection* slangLeafTypeLayout =
+ m_elementTypeLayout->getBindingRangeLeafTypeLayout(r);
+
+ SlangInt descriptorSetIndex = m_elementTypeLayout->getBindingRangeDescriptorSetIndex(r);
+ SlangInt rangeIndexInDescriptorSet =
+ m_elementTypeLayout->getBindingRangeFirstDescriptorRangeIndex(r);
+
+ // TODO: This logic assumes that for any binding range that might consume
+ // multiple kinds of resources, the descriptor range for its uniform
+ // usage will be the first one in the range.
+ //
+ // We need to decide whether that assumption is one we intend to support
+ // applications making, or whether they should be forced to perform a
+ // linear search over the descriptor ranges for a specific binding range.
+ //
+ auto uniformOffset = m_elementTypeLayout->getDescriptorSetDescriptorRangeIndexOffset(
+ descriptorSetIndex, rangeIndexInDescriptorSet);
+
+ Index baseIndex = 0;
+ switch (slangBindingType)
+ {
+ case slang::BindingType::ConstantBuffer:
+ case slang::BindingType::ParameterBlock:
+ case slang::BindingType::ExistentialValue:
+ baseIndex = subObjectCount;
+ subObjectCount += count;
+ break;
+
+ default:
+ baseIndex = resourceCount;
+ resourceCount += count;
+ break;
+ }
+
+ BindingRangeInfo bindingRangeInfo;
+ bindingRangeInfo.bindingType = slangBindingType;
+ bindingRangeInfo.count = count;
+ bindingRangeInfo.baseIndex = baseIndex;
+ bindingRangeInfo.uniformOffset = uniformOffset;
+ m_bindingRanges.add(bindingRangeInfo);
+ }
+
+ m_subObjectCount = subObjectCount;
+ m_resourceCount = resourceCount;
+
+ SlangInt subObjectRangeCount = m_elementTypeLayout->getSubObjectRangeCount();
+ for (SlangInt r = 0; r < subObjectRangeCount; ++r)
+ {
+ SlangInt bindingRangeIndex = m_elementTypeLayout->getSubObjectRangeBindingRangeIndex(r);
+ auto slangBindingType = m_elementTypeLayout->getBindingRangeType(bindingRangeIndex);
+ slang::TypeLayoutReflection* slangLeafTypeLayout =
+ m_elementTypeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex);
+
+ // A sub-object range can either represent a sub-object of a known
+ // type, like a `ConstantBuffer<Foo>` or `ParameterBlock<Foo>`
+ // (in which case we can pre-compute a layout to use, based on
+ // the type `Foo`) *or* it can represent a sub-object of some
+ // existential type (e.g., `IBar`) in which case we cannot
+ // know the appropraite type/layout of sub-object to allocate.
+ //
+ RefPtr<CPUShaderObjectLayout> subObjectLayout;
+ if (slangBindingType != slang::BindingType::ExistentialValue)
+ {
+ subObjectLayout =
+ new CPUShaderObjectLayout(renderer, slangLeafTypeLayout->getElementTypeLayout());
+ }
+
+ SubObjectRangeInfo subObjectRange;
+ subObjectRange.bindingRangeIndex = bindingRangeIndex;
+ subObjectRange.layout = subObjectLayout;
+ subObjectRanges.add(subObjectRange);
+ }
+ }
+
+ size_t getSize() { return m_size; }
+ Index getResourceCount() const { return m_resourceCount; }
+ Index getSubObjectCount() const { return m_subObjectCount; }
+};
+
+class CPUEntryPointLayout : public CPUShaderObjectLayout
+{
+private:
+ slang::EntryPointLayout* m_entryPointLayout = nullptr;
+
+public:
+ CPUEntryPointLayout(
+ RendererBase* renderer,
+ slang::EntryPointLayout* entryPointLayout)
+ : CPUShaderObjectLayout(renderer, entryPointLayout->getTypeLayout())
+ , m_entryPointLayout(entryPointLayout)
+ {}
+
+ const char* getEntryPointName() { return m_entryPointLayout->getName(); }
+};
+
+class CPUProgramLayout : public CPUShaderObjectLayout
+{
+public:
+ slang::ProgramLayout* m_programLayout = nullptr;
+ List<RefPtr<CPUEntryPointLayout>> m_entryPointLayouts;
+
+ CPUProgramLayout(RendererBase* renderer, slang::ProgramLayout* programLayout)
+ : CPUShaderObjectLayout(renderer, programLayout->getGlobalParamsTypeLayout())
+ , m_programLayout(programLayout)
+ {
+ for (UInt i =0; i< programLayout->getEntryPointCount(); i++)
+ {
+ m_entryPointLayouts.add(new CPUEntryPointLayout(
+ renderer,
+ programLayout->getEntryPointByIndex(i)));
+ }
+
+ }
+
+ int getKernelIndex(UnownedStringSlice kernelName)
+ {
+ auto entryPointCount = (int) m_programLayout->getEntryPointCount();
+ for(int i = 0; i < entryPointCount; i++)
+ {
+ auto entryPoint = m_programLayout->getEntryPointByIndex(i);
+ if (kernelName == entryPoint->getName())
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ void getKernelThreadGroupSize(int kernelIndex, UInt* threadGroupSizes)
+ {
+ auto entryPoint = m_programLayout->getEntryPointByIndex(kernelIndex);
+ entryPoint->getComputeThreadGroupSize(3, threadGroupSizes);
+ }
+
+ CPUEntryPointLayout* getEntryPoint(Index index) { return m_entryPointLayouts[index]; }
+};
+
+class CPUShaderObject : public ShaderObjectBase
+{
+public:
+ void* m_data = nullptr;
+
+ ~CPUShaderObject()
+ {
+ free(m_data);
+ }
+
+ List<RefPtr<CPUShaderObject>> m_objects;
+ List<RefPtr<CPUResourceView>> m_resources;
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ init(IDevice* device, CPUShaderObjectLayout* typeLayout);
+
+ CPUShaderObjectLayout* getLayout()
+ {
+ return static_cast<CPUShaderObjectLayout*>(m_layout.Ptr());
+ }
+
+#if 0
+ virtual SLANG_NO_THROW Result SLANG_MCALL initBuffer(IDevice* device, size_t bufferSize)
+ {
+ BufferResource::Desc bufferDesc;
+ bufferDesc.init(bufferSize);
+ bufferDesc.cpuAccessFlags |= IResource::AccessFlag::Write;
+ ComPtr<IBufferResource> constantBuffer;
+ SLANG_RETURN_ON_FAIL(renderer->createBufferResource(
+ IResource::Usage::ConstantBuffer, bufferDesc, nullptr, constantBuffer.writeRef()));
+ bufferResource = dynamic_cast<MemoryCUDAResource*>(constantBuffer.get());
+ return SLANG_OK;
+ }
+#endif
+
+#if 0
+ virtual SLANG_NO_THROW void* SLANG_MCALL getBuffer()
+ {
+ return bufferResource ? bufferResource->m_cudaMemory : nullptr;
+ }
+
+ virtual SLANG_NO_THROW size_t SLANG_MCALL getBufferSize()
+ {
+ return bufferResource ? bufferResource->getDesc()->sizeInBytes : 0;
+ }
+#endif
+
+ virtual SLANG_NO_THROW slang::TypeLayoutReflection* SLANG_MCALL getElementTypeLayout() override
+ {
+ return getLayout()->getElementTypeLayout();
+ }
+
+ virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override { return 0; }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ getEntryPoint(UInt index, IShaderObject** outEntryPoint) override
+ {
+ *outEntryPoint = nullptr;
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ setData(ShaderOffset const& offset, void const* data, size_t size)
+ {
+ size = Math::Min(size, getLayout()->getSize() - offset.uniformOffset);
+ memcpy((char*)m_data + offset.uniformOffset, data, size);
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL getObject(
+ ShaderOffset const& offset,
+ IShaderObject** outObject)
+ {
+ auto layout = getLayout();
+
+ auto bindingRangeIndex = offset.bindingRangeIndex;
+ SLANG_ASSERT(bindingRangeIndex >= 0);
+ SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
+
+ auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
+ auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
+ CPUShaderObject* subObject = m_objects[subObjectIndex];
+
+ *outObject = ComPtr<IShaderObject>(subObject).detach();
+
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL setObject(
+ ShaderOffset const& offset,
+ IShaderObject* object)
+ {
+ auto layout = getLayout();
+
+ auto bindingRangeIndex = offset.bindingRangeIndex;
+ SLANG_ASSERT(bindingRangeIndex >= 0);
+ SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
+
+ auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
+ auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
+
+ CPUShaderObject* subObject = static_cast<CPUShaderObject*>(object);
+ m_objects[subObjectIndex] = subObject;
+
+ switch( bindingRange.bindingType )
+ {
+ default:
+ SLANG_RETURN_ON_FAIL(setData(offset, &subObject->m_data, sizeof(void*)));
+ break;
+
+ // If the range being assigned into represents an interface/existential-type leaf field,
+ // then we need to consider how the `object` being assigned here affects specialization.
+ // We may also need to assign some data from the sub-object into the ordinary data
+ // buffer for the parent object.
+ //
+ case slang::BindingType::ExistentialValue:
+ {
+ auto renderer = getRenderer();
+
+ ComPtr<slang::ISession> slangSession;
+ SLANG_RETURN_ON_FAIL(renderer->getSlangSession(slangSession.writeRef()));
+
+ // A leaf field of interface type is laid out inside of the parent object
+ // as a tuple of `(RTTI, WitnessTable, Payload)`. The layout of these fields
+ // is a contract between the compiler and any runtime system, so we will
+ // need to rely on details of the binary layout.
+
+ // We start by querying the layout/type of the concrete value that the application
+ // is trying to store into the field, and also the layout/type of the leaf
+ // existential-type field itself.
+ //
+ auto concreteTypeLayout = subObject->getElementTypeLayout();
+ auto concreteType = concreteTypeLayout->getType();
+ //
+ auto existentialTypeLayout = layout->getElementTypeLayout()->getBindingRangeLeafTypeLayout(bindingRangeIndex);
+ auto existentialType = existentialTypeLayout->getType();
+
+ // The first field of the tuple (offset zero) is the run-time type information (RTTI)
+ // ID for the concrete type being stored into the field.
+ //
+ // TODO: We need to be able to gather the RTTI type ID from `object` and then
+ // use `setData(offset, &TypeID, sizeof(TypeID))`.
+
+ // The second field of the tuple (offset 8) is the ID of the "witness" for the
+ // conformance of the concrete type to the interface used by this field.
+ //
+ auto witnessTableOffset = offset;
+ witnessTableOffset.uniformOffset += 8;
+ //
+ // Conformances of a type to an interface are computed and then stored by the
+ // Slang runtime, so we can look up the ID for this particular conformance (which
+ // will create it on demand).
+ //
+ // Note: If the type doesn't actually conform to the required interface for
+ // this sub-object range, then this is the point where we will detect that
+ // fact and error out.
+ //
+ uint32_t conformanceID = 0xFFFFFFFF;
+ SLANG_RETURN_ON_FAIL(slangSession->getTypeConformanceWitnessSequentialID(
+ concreteType, existentialType, &conformanceID));
+ //
+ // Once we have the conformance ID, then we can write it into the object
+ // at the required offset.
+ //
+ SLANG_RETURN_ON_FAIL(setData(witnessTableOffset, &conformanceID, sizeof(conformanceID)));
+
+ // The third field of the tuple (offset 16) is the "payload" that is supposed to
+ // hold the data for a value of the given concrete type.
+ //
+ auto payloadOffset = offset;
+ payloadOffset.uniformOffset += 16;
+
+ // There are two cases we need to consider here for how the payload might be used:
+ //
+ // * If the concrete type of the value being bound is one that can "fit" into the
+ // available payload space, then it should be stored in the payload.
+ //
+ // * If the concrete type of the value cannot fit in the payload space, then it
+ // will need to be stored somewhere else.
+ //
+ if(_doesValueFitInExistentialPayload(concreteTypeLayout, existentialTypeLayout))
+ {
+ // If the value can fit in the payload area, then we will go ahead and copy
+ // its bytes into that area.
+ //
+ auto valueSize = concreteTypeLayout->getSize();
+ SLANG_RETURN_ON_FAIL(setData(payloadOffset, subObject->m_data, valueSize));
+ }
+ else
+ {
+ // If the value cannot fit in the payload area, then we will pass a pointer
+ // to the sub-object instead.
+ //
+ // Note: The Slang compiler does not currently emit code that handles the
+ // pointer case, but that is the expected implementation for values
+ // that do not fit into the fixed-size payload.
+ //
+ SLANG_RETURN_ON_FAIL(setData(payloadOffset, &subObject->m_data, sizeof(void*)));
+ }
+ }
+ break;
+ }
+
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ setResource(ShaderOffset const& offset, IResourceView* inView)
+ {
+ auto layout = getLayout();
+
+ auto bindingRangeIndex = offset.bindingRangeIndex;
+ SLANG_ASSERT(bindingRangeIndex >= 0);
+ SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
+
+ auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
+ auto viewIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
+
+
+ auto view = static_cast<CPUResourceView*>(inView);
+ m_resources[viewIndex] = view;
+
+ switch( view->getViewKind() )
+ {
+ case CPUResourceView::Kind::Texture:
+ {
+ auto textureView = static_cast<CPUTextureView*>(view);
+
+ slang_prelude::IRWTexture* textureObj = textureView;
+ SLANG_RETURN_ON_FAIL(setData(offset, &textureObj, sizeof(textureObj)));
+ }
+ break;
+
+ case CPUResourceView::Kind::Buffer:
+ {
+ auto bufferView = static_cast<CPUBufferView*>(view);
+ auto buffer = bufferView->getBuffer();
+ auto desc = *buffer->getDesc();
+
+ void* dataPtr = buffer->m_data;
+ size_t size = desc.sizeInBytes;
+ if (desc.elementSize > 1)
+ size /= desc.elementSize;
+
+ auto ptrOffset = offset;
+ SLANG_RETURN_ON_FAIL(setData(ptrOffset, &dataPtr, sizeof(dataPtr)));
+
+ auto sizeOffset = offset;
+ sizeOffset.uniformOffset += sizeof(dataPtr);
+ SLANG_RETURN_ON_FAIL(setData(sizeOffset, &size, sizeof(size)));
+ }
+ break;
+ }
+
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ setSampler(ShaderOffset const& offset, ISamplerState* sampler)
+ {
+ SLANG_UNUSED(sampler);
+ SLANG_UNUSED(offset);
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL setCombinedTextureSampler(
+ ShaderOffset const& offset, IResourceView* textureView, ISamplerState* sampler)
+ {
+ SLANG_UNUSED(sampler);
+ setResource(offset, textureView);
+ return SLANG_OK;
+ }
+
+ // Appends all types that are used to specialize the element type of this shader object in `args` list.
+ virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override
+ {
+ // TODO: the logic here is a copy-paste of `GraphicsCommonShaderObject::collectSpecializationArgs`,
+ // consider moving the implementation to `ShaderObjectBase` and share the logic among different implementations.
+
+ auto& subObjectRanges = getLayout()->subObjectRanges;
+ // The following logic is built on the assumption that all fields that involve existential types (and
+ // therefore require specialization) will results in a sub-object range in the type layout.
+ // This allows us to simply scan the sub-object ranges to find out all specialization arguments.
+ for (Index subObjIndex = 0; subObjIndex < subObjectRanges.getCount(); subObjIndex++)
+ {
+ // Retrieve the corresponding binding range of the sub object.
+ auto bindingRange = getLayout()->m_bindingRanges[subObjectRanges[subObjIndex].bindingRangeIndex];
+ switch (bindingRange.bindingType)
+ {
+ case slang::BindingType::ExistentialValue:
+ {
+ // A binding type of `ExistentialValue` means the sub-object represents a interface-typed field.
+ // In this case the specialization argument for this field is the actual specialized type of the bound
+ // shader object. If the shader object's type is an ordinary type without existential fields, then the
+ // type argument will simply be the ordinary type. But if the sub object's type is itself a specialized
+ // type, we need to make sure to use that type as the specialization argument.
+
+ // TODO: need to implement the case where the field is an array of existential values.
+ SLANG_ASSERT(bindingRange.count == 1);
+ ExtendedShaderObjectType specializedSubObjType;
+ SLANG_RETURN_ON_FAIL(m_objects[subObjIndex]->getSpecializedShaderObjectType(&specializedSubObjType));
+ args.add(specializedSubObjType);
+ break;
+ }
+ case slang::BindingType::ParameterBlock:
+ case slang::BindingType::ConstantBuffer:
+ // Currently we only handle the case where the field's type is
+ // `ParameterBlock<SomeStruct>` or `ConstantBuffer<SomeStruct>`, where `SomeStruct` is a struct type
+ // (not directly an interface type). In this case, we just recursively collect the specialization arguments
+ // from the bound sub object.
+ SLANG_RETURN_ON_FAIL(m_objects[subObjIndex]->collectSpecializationArgs(args));
+ // TODO: we need to handle the case where the field is of the form `ParameterBlock<IFoo>`. We should treat
+ // this case the same way as the `ExistentialValue` case here, but currently we lack a mechanism to distinguish
+ // the two scenarios.
+ break;
+ }
+ // TODO: need to handle another case where specialization happens on resources fields e.g. `StructuredBuffer<IFoo>`.
+ }
+ return SLANG_OK;
+ }
+};
+
+class CPUEntryPointShaderObject : public CPUShaderObject
+{
+public:
+ CPUEntryPointLayout* getLayout() { return static_cast<CPUEntryPointLayout*>(m_layout.Ptr()); }
+};
+
+class CPURootShaderObject : public CPUShaderObject
+{
+public:
+ SlangResult init(IDevice* device, CPUProgramLayout* programLayout);
+
+ CPUProgramLayout* getLayout() { return static_cast<CPUProgramLayout*>(m_layout.Ptr()); }
+
+ CPUEntryPointShaderObject* getEntryPoint(Index index) { return m_entryPoints[index]; }
+
+ List<RefPtr<CPUEntryPointShaderObject>> m_entryPoints;
+
+ virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override { return m_entryPoints.getCount(); }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ getEntryPoint(UInt index, IShaderObject** outEntryPoint) override
+ {
+ *outEntryPoint = ComPtr<IShaderObject>(m_entryPoints[index]).detach();
+ return SLANG_OK;
+ }
+ virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override
+ {
+ SLANG_RETURN_ON_FAIL(CPUShaderObject::collectSpecializationArgs(args));
+ for (auto& entryPoint : m_entryPoints)
+ {
+ SLANG_RETURN_ON_FAIL(entryPoint->collectSpecializationArgs(args));
+ }
+ return SLANG_OK;
+ }
+};
+
+class CPUShaderProgram : public ShaderProgramBase
+{
+public:
+ RefPtr<CPUProgramLayout> layout;
+
+ ~CPUShaderProgram()
+ {
+ }
+};
+
+class CPUPipelineState : public PipelineStateBase
+{
+public:
+ CPUShaderProgram* getProgram() { return static_cast<CPUShaderProgram*>(m_program.get()); }
+
+ void init(const ComputePipelineStateDesc& inDesc)
+ {
+ PipelineStateDesc pipelineDesc;
+ pipelineDesc.type = PipelineType::Compute;
+ pipelineDesc.compute = inDesc;
+ initializeBase(pipelineDesc);
+ }
+};
+
+class CPUDevice : public RendererBase
+{
+private:
+ RefPtr<CPUPipelineState> m_currentPipeline = nullptr;
+ RefPtr<CPURootShaderObject> m_currentRootObject = nullptr;
+ DeviceInfo m_info;
+
+ class CommandQueueImpl;
+
+ class CommandBufferImpl
+ : public ICommandBuffer
+ , public CommandWriter
+ , public RefObject
+ {
+ public:
+ SLANG_REF_OBJECT_IUNKNOWN_ALL
+ ICommandBuffer* getInterface(const Guid& guid)
+ {
+ if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer)
+ return static_cast<ICommandBuffer*>(this);
+ return nullptr;
+ }
+ public:
+ virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands(
+ IRenderPassLayout* renderPass,
+ IFramebuffer* framebuffer,
+ IRenderCommandEncoder** outEncoder) override
+ {
+ SLANG_UNUSED(renderPass);
+ SLANG_UNUSED(framebuffer);
+ *outEncoder = nullptr;
+ }
+
+ class ComputeCommandEncoderImpl
+ : public IComputeCommandEncoder
+ {
+ public:
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+ queryInterface(SlangUUID const& uuid, void** outObject) override
+ {
+ if (uuid == GfxGUID::IID_ISlangUnknown ||
+ uuid == GfxGUID::IID_IComputeCommandEncoder)
+ {
+ *outObject = static_cast<IComputeCommandEncoder*>(this);
+ return SLANG_OK;
+ }
+ *outObject = nullptr;
+ return SLANG_E_NO_INTERFACE;
+ }
+ virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; }
+ virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; }
+
+ public:
+ CommandWriter* m_writer;
+
+ virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {}
+ void init(CommandBufferImpl* cmdBuffer)
+ {
+ m_writer = cmdBuffer;
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override
+ {
+ m_writer->setPipelineState(state);
+ }
+ virtual SLANG_NO_THROW void SLANG_MCALL
+ bindRootShaderObject(IShaderObject* object) override
+ {
+ m_writer->bindRootShaderObject(PipelineType::Compute, object);
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet(
+ IPipelineLayout* layout,
+ UInt index,
+ IDescriptorSet* descriptorSet) override
+ {
+ m_writer->setDescriptorSet(PipelineType::Compute, layout, index, descriptorSet);
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override
+ {
+ m_writer->dispatchCompute(x, y, z);
+ }
+ };
+
+ ComputeCommandEncoderImpl m_computeCommandEncoder;
+ virtual SLANG_NO_THROW void SLANG_MCALL
+ encodeComputeCommands(IComputeCommandEncoder** outEncoder) override
+ {
+ m_computeCommandEncoder.init(this);
+ *outEncoder = &m_computeCommandEncoder;
+ }
+
+ class ResourceCommandEncoderImpl
+ : public IResourceCommandEncoder
+ {
+ public:
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+ queryInterface(SlangUUID const& uuid, void** outObject) override
+ {
+ if (uuid == GfxGUID::IID_ISlangUnknown ||
+ uuid == GfxGUID::IID_IResourceCommandEncoder)
+ {
+ *outObject = static_cast<IResourceCommandEncoder*>(this);
+ return SLANG_OK;
+ }
+ *outObject = nullptr;
+ return SLANG_E_NO_INTERFACE;
+ }
+ virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; }
+ virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; }
+
+ public:
+ CommandWriter* m_writer;
+
+ void init(CommandBufferImpl* cmdBuffer)
+ {
+ m_writer = cmdBuffer;
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {}
+ virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer(
+ IBufferResource* dst,
+ size_t dstOffset,
+ IBufferResource* src,
+ size_t srcOffset,
+ size_t size) override
+ {
+ m_writer->copyBuffer(dst, dstOffset, src, srcOffset, size);
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL
+ uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data)
+ {
+ m_writer->uploadBufferData(dst, offset, size, data);
+ }
+ };
+
+ ResourceCommandEncoderImpl m_resourceCommandEncoder;
+
+ virtual SLANG_NO_THROW void SLANG_MCALL
+ encodeResourceCommands(IResourceCommandEncoder** outEncoder) override
+ {
+ m_resourceCommandEncoder.init(this);
+ *outEncoder = &m_resourceCommandEncoder;
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL close() override {}
+ };
+
+ class CommandQueueImpl
+ : public ICommandQueue
+ , public RefObject
+ {
+ public:
+ SLANG_REF_OBJECT_IUNKNOWN_ALL
+ ICommandQueue* getInterface(const Guid& guid)
+ {
+ if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue)
+ return static_cast<ICommandQueue*>(this);
+ return nullptr;
+ }
+
+ public:
+ RefPtr<CPUPipelineState> currentPipeline;
+ RefPtr<CPURootShaderObject> currentRootObject;
+ RefPtr<CPUDevice> renderer;
+ Desc m_desc;
+ public:
+ void init(CPUDevice* inRenderer)
+ {
+ renderer = inRenderer;
+ m_desc.type = ICommandQueue::QueueType::Graphics;
+ }
+ ~CommandQueueImpl()
+ {
+ currentPipeline = nullptr;
+ currentRootObject = nullptr;
+ }
+
+ public:
+ virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override
+ {
+ return m_desc;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createCommandBuffer(ICommandBuffer** outCommandBuffer) override
+ {
+ RefPtr<CommandBufferImpl> result = new CommandBufferImpl();
+ *outCommandBuffer = result.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL
+ executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override
+ {
+ for (uint32_t i = 0; i < count; i++)
+ {
+ execute(static_cast<CommandBufferImpl*>(commandBuffers[i]));
+ }
+ }
+
+ virtual SLANG_NO_THROW void SLANG_MCALL wait() override
+ {}
+
+ public:
+ void setPipelineState(IPipelineState* state)
+ {
+ currentPipeline = static_cast<CPUPipelineState*>(state);
+ }
+
+ Result bindRootShaderObject(PipelineType pipelineType, IShaderObject* object)
+ {
+ currentRootObject = static_cast<CPURootShaderObject*>(object);
+ if (currentRootObject)
+ return SLANG_OK;
+ return SLANG_E_INVALID_ARG;
+ }
+
+ void dispatchCompute(int x, int y, int z)
+ {
+ int entryPointIndex = 0;
+ int targetIndex = 0;
+
+ // Specialize the compute kernel based on the shader object bindings.
+ RefPtr<PipelineStateBase> newPipeline;
+ renderer->maybeSpecializePipeline(currentPipeline, currentRootObject, newPipeline);
+ currentPipeline = static_cast<CPUPipelineState*>(newPipeline.Ptr());
+
+ auto program = currentPipeline->getProgram();
+ auto entryPointLayout = currentRootObject->getLayout()->getEntryPoint(entryPointIndex);
+ auto entryPointName = entryPointLayout->getEntryPointName();
+
+ auto entryPointObject = currentRootObject->getEntryPoint(entryPointIndex);
+
+ ComPtr<ISlangSharedLibrary> sharedLibrary;
+ program->slangProgram->getEntryPointHostCallable(entryPointIndex, targetIndex, sharedLibrary.writeRef());
+
+ auto func = (slang_prelude::ComputeFunc) sharedLibrary->findSymbolAddressByName(entryPointName);
+
+ slang_prelude::ComputeVaryingInput varyingInput;
+ varyingInput.startGroupID.x = 0;
+ varyingInput.startGroupID.y = 0;
+ varyingInput.startGroupID.z = 0;
+ varyingInput.endGroupID.x = x;
+ varyingInput.endGroupID.y = y;
+ varyingInput.endGroupID.z = z;
+
+ auto globalParamsData = currentRootObject->m_data;
+ auto entryPointParamsData = entryPointObject->m_data;
+ func(&varyingInput, entryPointParamsData, globalParamsData);
+ }
+
+ void copyBuffer(
+ IBufferResource* dst,
+ size_t dstOffset,
+ IBufferResource* src,
+ size_t srcOffset,
+ size_t size)
+ {
+ auto dstImpl = static_cast<CPUBufferResource*>(dst);
+ auto srcImpl = static_cast<CPUBufferResource*>(src);
+ memcpy(
+ (uint8_t*)dstImpl->m_data + dstOffset,
+ (uint8_t*)srcImpl->m_data + srcOffset,
+ size);
+ }
+
+ void uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data)
+ {
+ auto dstImpl = static_cast<CPUBufferResource*>(dst);
+ memcpy((uint8_t*)dstImpl->m_data + offset, data, size);
+ }
+
+ void execute(CommandBufferImpl* commandBuffer)
+ {
+ for (auto& cmd : commandBuffer->m_commands)
+ {
+ switch (cmd.name)
+ {
+ case CommandName::SetPipelineState:
+ setPipelineState(commandBuffer->getObject<IPipelineState>(cmd.operands[0]));
+ break;
+ case CommandName::BindRootShaderObject:
+ bindRootShaderObject(
+ (PipelineType)cmd.operands[0],
+ commandBuffer->getObject<IShaderObject>(cmd.operands[1]));
+ break;
+ case CommandName::DispatchCompute:
+ dispatchCompute(
+ int(cmd.operands[0]), int(cmd.operands[1]), int(cmd.operands[2]));
+ break;
+ case CommandName::CopyBuffer:
+ copyBuffer(
+ commandBuffer->getObject<IBufferResource>(cmd.operands[0]),
+ cmd.operands[1],
+ commandBuffer->getObject<IBufferResource>(cmd.operands[2]),
+ cmd.operands[3],
+ cmd.operands[4]);
+ break;
+ case CommandName::UploadBufferData:
+ uploadBufferData(
+ commandBuffer->getObject<IBufferResource>(cmd.operands[0]),
+ cmd.operands[1],
+ cmd.operands[2],
+ commandBuffer->getData<uint8_t>(cmd.operands[3]));
+ break;
+ }
+ }
+ }
+ };
+
+public:
+ ~CPUDevice()
+ {
+ m_currentPipeline = nullptr;
+ m_currentRootObject = nullptr;
+ }
+
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override
+ {
+ SLANG_RETURN_ON_FAIL(slangContext.initialize(desc.slang, SLANG_HOST_CALLABLE, "sm_5_1"));
+
+ SLANG_RETURN_ON_FAIL(RendererBase::initialize(desc));
+
+ // Initialize DeviceInfo
+ {
+ m_info.deviceType = DeviceType::CPU;
+ m_info.bindingStyle = BindingStyle::CUDA;
+ m_info.projectionStyle = ProjectionStyle::DirectX;
+ m_info.apiName = "CPU";
+ static const float kIdentity[] = {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1};
+ ::memcpy(m_info.identityProjectionMatrix, kIdentity, sizeof(kIdentity));
+ m_info.adapterName = "CPU";
+ }
+
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource(
+ IResource::Usage initialUsage,
+ const ITextureResource::Desc& desc,
+ const ITextureResource::SubresourceData* initData,
+ ITextureResource** outResource) override
+ {
+ RefPtr<CPUTextureResource> texture = new CPUTextureResource(desc);
+
+ SLANG_RETURN_ON_FAIL(texture->init(initData));
+
+ *outResource = texture.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createBufferResource(
+ IResource::Usage initialUsage,
+ const IBufferResource::Desc& desc,
+ const void* initData,
+ IBufferResource** outResource) override
+ {
+ RefPtr<CPUBufferResource> resource = new CPUBufferResource(desc);
+ SLANG_RETURN_ON_FAIL(resource->init());
+ if (initData)
+ {
+ SLANG_RETURN_ON_FAIL(resource->setData(0, desc.sizeInBytes, initData));
+ }
+ *outResource = resource.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView(
+ ITextureResource* inTexture, IResourceView::Desc const& desc, IResourceView** outView) override
+ {
+ auto texture = static_cast<CPUTextureResource*>(inTexture);
+ RefPtr<CPUTextureView> view = new CPUTextureView(desc, texture);
+ *outView = view.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createBufferView(
+ IBufferResource* inBuffer, IResourceView::Desc const& desc, IResourceView** outView) override
+ {
+ auto buffer = static_cast<CPUBufferResource*>(inBuffer);
+ RefPtr<CPUBufferView> view = new CPUBufferView(desc, buffer);
+ *outView = view.detach();
+ return SLANG_OK;
+ }
+
+ virtual Result createShaderObjectLayout(
+ slang::TypeLayoutReflection* typeLayout,
+ ShaderObjectLayoutBase** outLayout) override
+ {
+ RefPtr<CPUShaderObjectLayout> cpuLayout = new CPUShaderObjectLayout(this, typeLayout);
+ *outLayout = cpuLayout.detach();
+
+ return SLANG_OK;
+ }
+
+ virtual Result createShaderObject(
+ ShaderObjectLayoutBase* layout,
+ IShaderObject** outObject) override
+ {
+ auto cpuLayout = static_cast<CPUShaderObjectLayout*>(layout);
+
+ RefPtr<CPUShaderObject> result = new CPUShaderObject();
+ SLANG_RETURN_ON_FAIL(result->init(this, cpuLayout));
+ *outObject = result.detach();
+
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override
+ {
+ auto cpuProgram = static_cast<CPUShaderProgram*>(program);
+ auto cpuProgramLayout = cpuProgram->layout;
+
+ RefPtr<CPURootShaderObject> result = new CPURootShaderObject();
+ SLANG_RETURN_ON_FAIL(result->init(this, cpuProgramLayout));
+ *outObject = result.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) override
+ {
+ if( desc.kernelCount == 0 )
+ {
+ return createProgramFromSlang(this, desc, outProgram);
+ }
+
+ if (desc.kernelCount != 1)
+ return SLANG_E_INVALID_ARG;
+
+ RefPtr<CPUShaderProgram> cpuProgram = new CPUShaderProgram();
+
+ // TODO: stuff?
+
+ auto slangProgram = desc.slangProgram;
+ if( slangProgram )
+ {
+ cpuProgram->slangProgram = slangProgram;
+
+ auto slangProgramLayout = slangProgram->getLayout();
+ if(!slangProgramLayout)
+ return SLANG_FAIL;
+
+ RefPtr<CPUProgramLayout> cpuProgramLayout = new CPUProgramLayout(this, slangProgramLayout);
+ cpuProgramLayout->m_programLayout = slangProgramLayout;
+
+ cpuProgram->layout = cpuProgramLayout;
+ }
+
+ *outProgram = cpuProgram.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState(
+ const ComputePipelineStateDesc& desc, IPipelineState** outState) override
+ {
+ RefPtr<CPUPipelineState> state = new CPUPipelineState();
+ state->init(desc);
+ *outState = state.detach();
+ return Result();
+ }
+
+ virtual SLANG_NO_THROW const DeviceInfo& SLANG_MCALL getDeviceInfo() const override
+ {
+ return m_info;
+ }
+
+public:
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override
+ {
+ RefPtr<CommandQueueImpl> queue = new CommandQueueImpl();
+ queue->init(this);
+ *outQueue = queue.detach();
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain(
+ const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(window);
+ SLANG_UNUSED(outSwapchain);
+ return SLANG_FAIL;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout(
+ const IFramebufferLayout::Desc& desc, IFramebufferLayout** outLayout) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outLayout);
+ return SLANG_FAIL;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffer** outFramebuffer) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outFramebuffer);
+ return SLANG_FAIL;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout(
+ const IRenderPassLayout::Desc& desc,
+ IRenderPassLayout** outRenderPassLayout) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outRenderPassLayout);
+ return SLANG_FAIL;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) override
+ {
+ SLANG_UNUSED(desc);
+ *outSampler = nullptr;
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL createInputLayout(
+ const InputElementDesc* inputElements,
+ UInt inputElementCount,
+ IInputLayout** outLayout) override
+ {
+ SLANG_UNUSED(inputElements);
+ SLANG_UNUSED(inputElementCount);
+ SLANG_UNUSED(outLayout);
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL createDescriptorSetLayout(
+ const IDescriptorSetLayout::Desc& desc, IDescriptorSetLayout** outLayout) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outLayout);
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createPipelineLayout(const IPipelineLayout::Desc& desc, IPipelineLayout** outLayout) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outLayout);
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createDescriptorSet(IDescriptorSetLayout* layout, IDescriptorSet::Flag::Enum flags, IDescriptorSet** outDescriptorSet) override
+ {
+ SLANG_UNUSED(layout);
+ SLANG_UNUSED(flags);
+ SLANG_UNUSED(outDescriptorSet);
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState(
+ const GraphicsPipelineStateDesc& desc, IPipelineState** outState) override
+ {
+ SLANG_UNUSED(desc);
+ SLANG_UNUSED(outState);
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource(
+ ITextureResource* texture,
+ ResourceState state,
+ ISlangBlob** outBlob,
+ size_t* outRowPitch,
+ size_t* outPixelSize) override
+ {
+ SLANG_UNUSED(texture);
+ SLANG_UNUSED(outBlob);
+ SLANG_UNUSED(outRowPitch);
+ SLANG_UNUSED(outPixelSize);
+
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL readBufferResource(
+ IBufferResource* buffer,
+ size_t offset,
+ size_t size,
+ ISlangBlob** outBlob) override
+ {
+ auto bufferImpl = static_cast<CPUBufferResource*>(buffer);
+ RefPtr<ListBlob> blob = new ListBlob();
+ blob->m_data.setCount((Index)size);
+ memcpy(
+ blob->m_data.getBuffer(),
+ (uint8_t*)bufferImpl->m_data + offset,
+ size);
+ *outBlob = blob.detach();
+ return SLANG_OK;
+ }
+};
+
+SlangResult CPUShaderObject::init(IDevice* device, CPUShaderObjectLayout* typeLayout)
+{
+ m_layout = typeLayout;
+
+ // If the layout tells us that there is any uniform data,
+ // then we need to allocate a constant buffer to hold that data.
+ //
+ // TODO: Do we need to allocate a shadow copy for use from
+ // the CPU?
+ //
+ // TODO: When/where do we bind this constant buffer into
+ // a descriptor set for later use?
+ //
+ auto slangLayout = getLayout()->getElementTypeLayout();
+ size_t uniformSize = slangLayout->getSize();
+ if (uniformSize)
+ {
+ m_data = malloc(uniformSize);
+ }
+
+ // If the layout specifies that we have any resources or sub-objects,
+ // then we need to size the appropriate arrays to account for them.
+ //
+ // Note: the counts here are the *total* number of resources/sub-objects
+ // and not just the number of resource/sub-object ranges.
+ //
+ m_resources.setCount(typeLayout->getResourceCount());
+ m_objects.setCount(typeLayout->getSubObjectCount());
+
+ for (auto subObjectRange : getLayout()->subObjectRanges)
+ {
+ RefPtr<CPUShaderObjectLayout> subObjectLayout = subObjectRange.layout;
+
+ // In the case where the sub-object range represents an
+ // existential-type leaf field (e.g., an `IBar`), we
+ // cannot pre-allocate the object(s) to go into that
+ // range, since we can't possibly know what to allocate
+ // at this point.
+ //
+ if (!subObjectLayout)
+ continue;
+ //
+ // Otherwise, we will allocate a sub-object to fill
+ // in each entry in this range, based on the layout
+ // information we already have.
+
+ auto& bindingRangeInfo = getLayout()->m_bindingRanges[subObjectRange.bindingRangeIndex];
+ for (Index i = 0; i < bindingRangeInfo.count; ++i)
+ {
+ RefPtr<CPUShaderObject> subObject = new CPUShaderObject();
+ SLANG_RETURN_ON_FAIL(subObject->init(device, subObjectLayout));
+
+ ShaderOffset offset;
+ offset.uniformOffset = bindingRangeInfo.uniformOffset + sizeof(void*) * i;
+ offset.bindingRangeIndex = subObjectRange.bindingRangeIndex;
+ offset.bindingArrayIndex = i;
+
+ SLANG_RETURN_ON_FAIL(setObject(offset, subObject));
+ }
+ }
+ return SLANG_OK;
+}
+
+SlangResult CPURootShaderObject::init(IDevice* device, CPUProgramLayout* programLayout)
+{
+ SLANG_RETURN_ON_FAIL(CPUShaderObject::init(device, programLayout));
+ for (auto& entryPoint : programLayout->m_entryPointLayouts)
+ {
+ RefPtr<CPUEntryPointShaderObject> object = new CPUEntryPointShaderObject();
+ SLANG_RETURN_ON_FAIL(object->init(device, entryPoint));
+ m_entryPoints.add(object);
+ }
+ return SLANG_OK;
+}
+
+SlangResult SLANG_MCALL createCPUDevice(const IDevice::Desc* desc, IDevice** outDevice)
+{
+ RefPtr<CPUDevice> result = new CPUDevice();
+ SLANG_RETURN_ON_FAIL(result->initialize(*desc));
+ *outDevice = result.detach();
+ return SLANG_OK;
+}
+
+}
diff --git a/tools/gfx/cpu/render-cpu.h b/tools/gfx/cpu/render-cpu.h
new file mode 100644
index 000000000..fca57aa4d
--- /dev/null
+++ b/tools/gfx/cpu/render-cpu.h
@@ -0,0 +1,11 @@
+// render-cpu.h
+#pragma once
+
+#include "../renderer-shared.h"
+
+namespace gfx
+{
+
+SlangResult SLANG_MCALL createCPUDevice(const IDevice::Desc* desc, IDevice** outDevice);
+
+}