summaryrefslogtreecommitdiffstats
path: root/tools/gfx/cuda/render-cuda.cpp
diff options
context:
space:
mode:
authorTim Foley <tfoleyNV@users.noreply.github.com>2021-03-12 11:58:14 -0800
committerGitHub <noreply@github.com>2021-03-12 11:58:14 -0800
commitd6a37a0f151e390808f196998c48a341bc4c7b60 (patch)
treec1c6e3af434cb3627af67ecc8706124e4b8c7fb1 /tools/gfx/cuda/render-cuda.cpp
parent9ffe2f3ef245034a2dae42017a9059dfe4d02647 (diff)
Add a CPU renderer implementation (#1750)
* Add a CPU renderer implementation This change adds a CPU back-end to `gfx` and ensures that most of our existing CPU tests pass when using it. Detailed notes: * Most of the CPU renderer implementation is copy-pasted from the CUDA case, so they share a lot of similar logic * The main addition to the CPU renderer is a semi-complete implementation of host-memory textures. The logic here handles all the main shapes (Buffer, 1D, 2D, 3D, Cube) and all the currently-supported `Format`s that are sample-able as-is (no D24S8). The implementation is not intended to be fast, and it currently only does nearest-neighbor sampling, but otherwise it tries to avoid cutting too many corners and should be ar reasonable starting point for a more complete (but not performance-oriented) implementation. * Refactored the CPU prelude `IRWTexture` interface to inherit from `ITexture`, since in most cases a single type will end up implementing both. It might be worth it to collapse it all down to a single interface later. * Changed the CPU prelude `ITexture`/`IRWTexture` interface so that it takes both a pointer *and* a size for output arguments. This change seems necessary to allow a shader variable declared as a `Texture2D<float>` to fetch a single `float` when the underlying texture might be using RGBA32F. * Added to the `IComponentType` public API so that we can query a "host callable" for an entry point and not just a binary. * Turned off the `-shaderobj` flag on two tests that weren't yet compatible with shader objects but still had the flag left in on the path (since previously the CPU path always used the non-`gfx` non-shader-object logic anyway) * Disabled one test (`dynamic-dispatch-11`) that relied on the `ConstantBuffer<IInterface>` idiom that we know we are planning to chagne soon anyway. * Made a few changes to the CUDA path to bring it into line with what I added for the CPU path. These were mostly bug fixes around indexing logic for sub-objects and resources. * fixup
Diffstat (limited to 'tools/gfx/cuda/render-cuda.cpp')
-rw-r--r--tools/gfx/cuda/render-cuda.cpp75
1 files changed, 42 insertions, 33 deletions
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp
index d13045359..89aaa33aa 100644
--- a/tools/gfx/cuda/render-cuda.cpp
+++ b/tools/gfx/cuda/render-cuda.cpp
@@ -278,36 +278,17 @@ public:
List<SubObjectRangeInfo> subObjectRanges;
List<BindingRangeInfo> m_bindingRanges;
- slang::TypeLayoutReflection* unwrapParameterGroups(slang::TypeLayoutReflection* typeLayout)
- {
- for (;;)
- {
- if (!typeLayout->getType())
- {
- if (auto elementTypeLayout = typeLayout->getElementTypeLayout())
- typeLayout = elementTypeLayout;
- }
-
- switch (typeLayout->getKind())
- {
- default:
- return typeLayout;
-
- case slang::TypeReflection::Kind::ConstantBuffer:
- case slang::TypeReflection::Kind::ParameterBlock:
- typeLayout = typeLayout->getElementTypeLayout();
- continue;
- }
- }
- }
+ Index m_subObjectCount = 0;
+ Index m_resourceCount = 0;
CUDAShaderObjectLayout(RendererBase* renderer, slang::TypeLayoutReflection* layout)
{
initBase(renderer, layout);
Index subObjectCount = 0;
+ Index resourceCount = 0;
- m_elementTypeLayout = unwrapParameterGroups(layout);
+ m_elementTypeLayout = _unwrapParameterGroups(layout);
// Compute the binding ranges that are used to store
// the logical contents of the object in memory. These will relate
@@ -348,6 +329,8 @@ public:
break;
default:
+ baseIndex = resourceCount;
+ resourceCount += count;
break;
}
@@ -359,6 +342,9 @@ public:
m_bindingRanges.add(bindingRangeInfo);
}
+ m_subObjectCount = subObjectCount;
+ m_resourceCount = resourceCount;
+
SlangInt subObjectRangeCount = m_elementTypeLayout->getSubObjectRangeCount();
for (SlangInt r = 0; r < subObjectRangeCount; ++r)
{
@@ -387,6 +373,9 @@ public:
subObjectRanges.add(subObjectRange);
}
}
+
+ Index getResourceCount() const { return m_resourceCount; }
+ Index getSubObjectCount() const { return m_subObjectCount; }
};
class CUDAProgramLayout : public CUDAShaderObjectLayout
@@ -503,6 +492,11 @@ public:
{
auto subObjectIndex =
getLayout()->m_bindingRanges[offset.bindingRangeIndex].baseIndex + offset.bindingArrayIndex;
+
+ SLANG_ASSERT(subObjectIndex < objects.getCount());
+ if(subObjectIndex >= objects.getCount())
+ return SLANG_E_INVALID_ARG;
+
if (subObjectIndex >= objects.getCount())
{
*object = nullptr;
@@ -525,8 +519,6 @@ public:
auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
auto subObject = dynamic_cast<CUDAShaderObject*>(object);
- if (subObjectIndex >= objects.getCount())
- objects.setCount(subObjectIndex + 1);
// TODO: We should really not need to retain the objects here
objects[subObjectIndex] = subObject;
@@ -635,10 +627,19 @@ public:
virtual SLANG_NO_THROW Result SLANG_MCALL
setResource(ShaderOffset const& offset, IResourceView* resourceView)
{
+ auto layout = getLayout();
+
+ auto bindingRangeIndex = offset.bindingRangeIndex;
+ SLANG_ASSERT(bindingRangeIndex >= 0);
+ SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
+
+ auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
+
+ auto viewIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
auto cudaView = dynamic_cast<CUDAResourceView*>(resourceView);
- if (offset.bindingRangeIndex >= resources.getCount())
- resources.setCount(offset.bindingRangeIndex + 1);
- resources[offset.bindingRangeIndex] = cudaView;
+
+ resources[viewIndex] = cudaView;
+
if (cudaView->textureResource)
{
if (cudaView->desc.type == IResourceView::Type::UnorderedAccess)
@@ -2059,9 +2060,15 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type
initBuffer(device, uniformSize);
}
- // If the layout specifies that we have any sub-objects, then
- // we need to size the array to account for them.
+ // If the layout specifies that we have any resources or sub-objects,
+ // then we need to size the appropriate arrays to account for them.
+ //
+ // Note: the counts here are the *total* number of resources/sub-objects
+ // and not just the number of resource/sub-object ranges.
//
+ resources.setCount(typeLayout->getResourceCount());
+ objects.setCount(typeLayout->getSubObjectCount());
+
Index subObjectCount = slangLayout->getSubObjectRangeCount();
objects.setCount(subObjectCount);
@@ -2087,11 +2094,13 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type
{
RefPtr<CUDAShaderObject> subObject = new CUDAShaderObject();
SLANG_RETURN_ON_FAIL(subObject->init(device, subObjectLayout));
- objects[bindingRangeInfo.baseIndex + i] = subObject;
+
ShaderOffset offset;
offset.uniformOffset = bindingRangeInfo.uniformOffset + sizeof(void*) * i;
- if (subObject->bufferResource)
- SLANG_RETURN_ON_FAIL(setData(offset, &subObject->bufferResource->m_cudaMemory, sizeof(void*)));
+ offset.bindingRangeIndex = subObjectRange.bindingRangeIndex;
+ offset.bindingArrayIndex = i;
+
+ SLANG_RETURN_ON_FAIL(setObject(offset, subObject));
}
}
return SLANG_OK;