From d6a37a0f151e390808f196998c48a341bc4c7b60 Mon Sep 17 00:00:00 2001 From: Tim Foley Date: Fri, 12 Mar 2021 11:58:14 -0800 Subject: Add a CPU renderer implementation (#1750) * Add a CPU renderer implementation This change adds a CPU back-end to `gfx` and ensures that most of our existing CPU tests pass when using it. Detailed notes: * Most of the CPU renderer implementation is copy-pasted from the CUDA case, so they share a lot of similar logic * The main addition to the CPU renderer is a semi-complete implementation of host-memory textures. The logic here handles all the main shapes (Buffer, 1D, 2D, 3D, Cube) and all the currently-supported `Format`s that are sample-able as-is (no D24S8). The implementation is not intended to be fast, and it currently only does nearest-neighbor sampling, but otherwise it tries to avoid cutting too many corners and should be ar reasonable starting point for a more complete (but not performance-oriented) implementation. * Refactored the CPU prelude `IRWTexture` interface to inherit from `ITexture`, since in most cases a single type will end up implementing both. It might be worth it to collapse it all down to a single interface later. * Changed the CPU prelude `ITexture`/`IRWTexture` interface so that it takes both a pointer *and* a size for output arguments. This change seems necessary to allow a shader variable declared as a `Texture2D` to fetch a single `float` when the underlying texture might be using RGBA32F. * Added to the `IComponentType` public API so that we can query a "host callable" for an entry point and not just a binary. * Turned off the `-shaderobj` flag on two tests that weren't yet compatible with shader objects but still had the flag left in on the path (since previously the CPU path always used the non-`gfx` non-shader-object logic anyway) * Disabled one test (`dynamic-dispatch-11`) that relied on the `ConstantBuffer` idiom that we know we are planning to chagne soon anyway. * Made a few changes to the CUDA path to bring it into line with what I added for the CPU path. These were mostly bug fixes around indexing logic for sub-objects and resources. * fixup --- tools/gfx/cuda/render-cuda.cpp | 75 +++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 33 deletions(-) (limited to 'tools/gfx/cuda') diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp index d13045359..89aaa33aa 100644 --- a/tools/gfx/cuda/render-cuda.cpp +++ b/tools/gfx/cuda/render-cuda.cpp @@ -278,36 +278,17 @@ public: List subObjectRanges; List m_bindingRanges; - slang::TypeLayoutReflection* unwrapParameterGroups(slang::TypeLayoutReflection* typeLayout) - { - for (;;) - { - if (!typeLayout->getType()) - { - if (auto elementTypeLayout = typeLayout->getElementTypeLayout()) - typeLayout = elementTypeLayout; - } - - switch (typeLayout->getKind()) - { - default: - return typeLayout; - - case slang::TypeReflection::Kind::ConstantBuffer: - case slang::TypeReflection::Kind::ParameterBlock: - typeLayout = typeLayout->getElementTypeLayout(); - continue; - } - } - } + Index m_subObjectCount = 0; + Index m_resourceCount = 0; CUDAShaderObjectLayout(RendererBase* renderer, slang::TypeLayoutReflection* layout) { initBase(renderer, layout); Index subObjectCount = 0; + Index resourceCount = 0; - m_elementTypeLayout = unwrapParameterGroups(layout); + m_elementTypeLayout = _unwrapParameterGroups(layout); // Compute the binding ranges that are used to store // the logical contents of the object in memory. These will relate @@ -348,6 +329,8 @@ public: break; default: + baseIndex = resourceCount; + resourceCount += count; break; } @@ -359,6 +342,9 @@ public: m_bindingRanges.add(bindingRangeInfo); } + m_subObjectCount = subObjectCount; + m_resourceCount = resourceCount; + SlangInt subObjectRangeCount = m_elementTypeLayout->getSubObjectRangeCount(); for (SlangInt r = 0; r < subObjectRangeCount; ++r) { @@ -387,6 +373,9 @@ public: subObjectRanges.add(subObjectRange); } } + + Index getResourceCount() const { return m_resourceCount; } + Index getSubObjectCount() const { return m_subObjectCount; } }; class CUDAProgramLayout : public CUDAShaderObjectLayout @@ -503,6 +492,11 @@ public: { auto subObjectIndex = getLayout()->m_bindingRanges[offset.bindingRangeIndex].baseIndex + offset.bindingArrayIndex; + + SLANG_ASSERT(subObjectIndex < objects.getCount()); + if(subObjectIndex >= objects.getCount()) + return SLANG_E_INVALID_ARG; + if (subObjectIndex >= objects.getCount()) { *object = nullptr; @@ -525,8 +519,6 @@ public: auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex; auto subObject = dynamic_cast(object); - if (subObjectIndex >= objects.getCount()) - objects.setCount(subObjectIndex + 1); // TODO: We should really not need to retain the objects here objects[subObjectIndex] = subObject; @@ -635,10 +627,19 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL setResource(ShaderOffset const& offset, IResourceView* resourceView) { + auto layout = getLayout(); + + auto bindingRangeIndex = offset.bindingRangeIndex; + SLANG_ASSERT(bindingRangeIndex >= 0); + SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount()); + + auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex]; + + auto viewIndex = bindingRange.baseIndex + offset.bindingArrayIndex; auto cudaView = dynamic_cast(resourceView); - if (offset.bindingRangeIndex >= resources.getCount()) - resources.setCount(offset.bindingRangeIndex + 1); - resources[offset.bindingRangeIndex] = cudaView; + + resources[viewIndex] = cudaView; + if (cudaView->textureResource) { if (cudaView->desc.type == IResourceView::Type::UnorderedAccess) @@ -2059,9 +2060,15 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type initBuffer(device, uniformSize); } - // If the layout specifies that we have any sub-objects, then - // we need to size the array to account for them. + // If the layout specifies that we have any resources or sub-objects, + // then we need to size the appropriate arrays to account for them. + // + // Note: the counts here are the *total* number of resources/sub-objects + // and not just the number of resource/sub-object ranges. // + resources.setCount(typeLayout->getResourceCount()); + objects.setCount(typeLayout->getSubObjectCount()); + Index subObjectCount = slangLayout->getSubObjectRangeCount(); objects.setCount(subObjectCount); @@ -2087,11 +2094,13 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type { RefPtr subObject = new CUDAShaderObject(); SLANG_RETURN_ON_FAIL(subObject->init(device, subObjectLayout)); - objects[bindingRangeInfo.baseIndex + i] = subObject; + ShaderOffset offset; offset.uniformOffset = bindingRangeInfo.uniformOffset + sizeof(void*) * i; - if (subObject->bufferResource) - SLANG_RETURN_ON_FAIL(setData(offset, &subObject->bufferResource->m_cudaMemory, sizeof(void*))); + offset.bindingRangeIndex = subObjectRange.bindingRangeIndex; + offset.bindingArrayIndex = i; + + SLANG_RETURN_ON_FAIL(setObject(offset, subObject)); } } return SLANG_OK; -- cgit v1.2.3