diff options
| author | Tim Foley <tfoleyNV@users.noreply.github.com> | 2021-03-12 11:58:14 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-03-12 11:58:14 -0800 |
| commit | d6a37a0f151e390808f196998c48a341bc4c7b60 (patch) | |
| tree | c1c6e3af434cb3627af67ecc8706124e4b8c7fb1 /tools/gfx/cuda/render-cuda.cpp | |
| parent | 9ffe2f3ef245034a2dae42017a9059dfe4d02647 (diff) | |
Add a CPU renderer implementation (#1750)
* Add a CPU renderer implementation
This change adds a CPU back-end to `gfx` and ensures that most of our existing CPU tests pass when using it.
Detailed notes:
* Most of the CPU renderer implementation is copy-pasted from the CUDA case, so they share a lot of similar logic
* The main addition to the CPU renderer is a semi-complete implementation of host-memory textures. The logic here handles all the main shapes (Buffer, 1D, 2D, 3D, Cube) and all the currently-supported `Format`s that are sample-able as-is (no D24S8). The implementation is not intended to be fast, and it currently only does nearest-neighbor sampling, but otherwise it tries to avoid cutting too many corners and should be ar reasonable starting point for a more complete (but not performance-oriented) implementation.
* Refactored the CPU prelude `IRWTexture` interface to inherit from `ITexture`, since in most cases a single type will end up implementing both. It might be worth it to collapse it all down to a single interface later.
* Changed the CPU prelude `ITexture`/`IRWTexture` interface so that it takes both a pointer *and* a size for output arguments. This change seems necessary to allow a shader variable declared as a `Texture2D<float>` to fetch a single `float` when the underlying texture might be using RGBA32F.
* Added to the `IComponentType` public API so that we can query a "host callable" for an entry point and not just a binary.
* Turned off the `-shaderobj` flag on two tests that weren't yet compatible with shader objects but still had the flag left in on the path (since previously the CPU path always used the non-`gfx` non-shader-object logic anyway)
* Disabled one test (`dynamic-dispatch-11`) that relied on the `ConstantBuffer<IInterface>` idiom that we know we are planning to chagne soon anyway.
* Made a few changes to the CUDA path to bring it into line with what I added for the CPU path. These were mostly bug fixes around indexing logic for sub-objects and resources.
* fixup
Diffstat (limited to 'tools/gfx/cuda/render-cuda.cpp')
| -rw-r--r-- | tools/gfx/cuda/render-cuda.cpp | 75 |
1 files changed, 42 insertions, 33 deletions
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp index d13045359..89aaa33aa 100644 --- a/tools/gfx/cuda/render-cuda.cpp +++ b/tools/gfx/cuda/render-cuda.cpp @@ -278,36 +278,17 @@ public: List<SubObjectRangeInfo> subObjectRanges; List<BindingRangeInfo> m_bindingRanges; - slang::TypeLayoutReflection* unwrapParameterGroups(slang::TypeLayoutReflection* typeLayout) - { - for (;;) - { - if (!typeLayout->getType()) - { - if (auto elementTypeLayout = typeLayout->getElementTypeLayout()) - typeLayout = elementTypeLayout; - } - - switch (typeLayout->getKind()) - { - default: - return typeLayout; - - case slang::TypeReflection::Kind::ConstantBuffer: - case slang::TypeReflection::Kind::ParameterBlock: - typeLayout = typeLayout->getElementTypeLayout(); - continue; - } - } - } + Index m_subObjectCount = 0; + Index m_resourceCount = 0; CUDAShaderObjectLayout(RendererBase* renderer, slang::TypeLayoutReflection* layout) { initBase(renderer, layout); Index subObjectCount = 0; + Index resourceCount = 0; - m_elementTypeLayout = unwrapParameterGroups(layout); + m_elementTypeLayout = _unwrapParameterGroups(layout); // Compute the binding ranges that are used to store // the logical contents of the object in memory. These will relate @@ -348,6 +329,8 @@ public: break; default: + baseIndex = resourceCount; + resourceCount += count; break; } @@ -359,6 +342,9 @@ public: m_bindingRanges.add(bindingRangeInfo); } + m_subObjectCount = subObjectCount; + m_resourceCount = resourceCount; + SlangInt subObjectRangeCount = m_elementTypeLayout->getSubObjectRangeCount(); for (SlangInt r = 0; r < subObjectRangeCount; ++r) { @@ -387,6 +373,9 @@ public: subObjectRanges.add(subObjectRange); } } + + Index getResourceCount() const { return m_resourceCount; } + Index getSubObjectCount() const { return m_subObjectCount; } }; class CUDAProgramLayout : public CUDAShaderObjectLayout @@ -503,6 +492,11 @@ public: { auto subObjectIndex = getLayout()->m_bindingRanges[offset.bindingRangeIndex].baseIndex + offset.bindingArrayIndex; + + SLANG_ASSERT(subObjectIndex < objects.getCount()); + if(subObjectIndex >= objects.getCount()) + return SLANG_E_INVALID_ARG; + if (subObjectIndex >= objects.getCount()) { *object = nullptr; @@ -525,8 +519,6 @@ public: auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex; auto subObject = dynamic_cast<CUDAShaderObject*>(object); - if (subObjectIndex >= objects.getCount()) - objects.setCount(subObjectIndex + 1); // TODO: We should really not need to retain the objects here objects[subObjectIndex] = subObject; @@ -635,10 +627,19 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL setResource(ShaderOffset const& offset, IResourceView* resourceView) { + auto layout = getLayout(); + + auto bindingRangeIndex = offset.bindingRangeIndex; + SLANG_ASSERT(bindingRangeIndex >= 0); + SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount()); + + auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex]; + + auto viewIndex = bindingRange.baseIndex + offset.bindingArrayIndex; auto cudaView = dynamic_cast<CUDAResourceView*>(resourceView); - if (offset.bindingRangeIndex >= resources.getCount()) - resources.setCount(offset.bindingRangeIndex + 1); - resources[offset.bindingRangeIndex] = cudaView; + + resources[viewIndex] = cudaView; + if (cudaView->textureResource) { if (cudaView->desc.type == IResourceView::Type::UnorderedAccess) @@ -2059,9 +2060,15 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type initBuffer(device, uniformSize); } - // If the layout specifies that we have any sub-objects, then - // we need to size the array to account for them. + // If the layout specifies that we have any resources or sub-objects, + // then we need to size the appropriate arrays to account for them. + // + // Note: the counts here are the *total* number of resources/sub-objects + // and not just the number of resource/sub-object ranges. // + resources.setCount(typeLayout->getResourceCount()); + objects.setCount(typeLayout->getSubObjectCount()); + Index subObjectCount = slangLayout->getSubObjectRangeCount(); objects.setCount(subObjectCount); @@ -2087,11 +2094,13 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type { RefPtr<CUDAShaderObject> subObject = new CUDAShaderObject(); SLANG_RETURN_ON_FAIL(subObject->init(device, subObjectLayout)); - objects[bindingRangeInfo.baseIndex + i] = subObject; + ShaderOffset offset; offset.uniformOffset = bindingRangeInfo.uniformOffset + sizeof(void*) * i; - if (subObject->bufferResource) - SLANG_RETURN_ON_FAIL(setData(offset, &subObject->bufferResource->m_cudaMemory, sizeof(void*))); + offset.bindingRangeIndex = subObjectRange.bindingRangeIndex; + offset.bindingArrayIndex = i; + + SLANG_RETURN_ON_FAIL(setObject(offset, subObject)); } } return SLANG_OK; |
