summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/gfx/cuda/render-cuda.cpp77
-rw-r--r--tools/gfx/render-graphics-common.cpp19
-rw-r--r--tools/render-test/render-test-main.cpp34
-rw-r--r--tools/render-test/shader-renderer-util.cpp11
4 files changed, 120 insertions, 21 deletions
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp
index 69166c6b6..bf316546e 100644
--- a/tools/gfx/cuda/render-cuda.cpp
+++ b/tools/gfx/cuda/render-cuda.cpp
@@ -549,12 +549,12 @@ public:
{
if (cudaView->desc.type == IResourceView::Type::UnorderedAccess)
{
- auto handle = cudaView->textureResource->getBindlessHandle();
+ auto handle = cudaView->textureResource->m_cudaSurfObj;
setData(offset, &handle, sizeof(uint64_t));
}
else
{
- auto handle = cudaView->textureResource->m_cudaSurfObj;
+ auto handle = cudaView->textureResource->getBindlessHandle();
setData(offset, &handle, sizeof(uint64_t));
}
}
@@ -814,6 +814,38 @@ private:
CUresourcetype resourceType;
size_t elementSize = 0;
+ // Our `ITextureResource::Desc` uses an enumeration to specify
+ // the "shape"/rank of a texture (1D, 2D, 3D, Cube), but CUDA's
+ // `cuMipmappedArrayCreate` seemingly relies on a policy where
+ // the extents of the array in dimenions above the rank are
+ // specified as zero (e.g., a 1D texture requires `height==0`).
+ //
+ // We will start by massaging the extents as specified by the
+ // user into a form that CUDA wants/expects, based on the
+ // texture shape as specified in the `desc`.
+ //
+ int width = desc.size.width;
+ int height = desc.size.height;
+ int depth = desc.size.depth;
+ switch (desc.type)
+ {
+ case IResource::Type::Texture1D:
+ height = 0;
+ depth = 0;
+ break;
+
+ case IResource::Type::Texture2D:
+ depth = 0;
+ break;
+
+ case IResource::Type::Texture3D:
+ break;
+
+ case IResource::Type::TextureCube:
+ depth = 1;
+ break;
+ }
+
{
CUarray_format format = CU_AD_FORMAT_FLOAT;
int numChannels = 0;
@@ -848,9 +880,9 @@ private:
CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
memset(&arrayDesc, 0, sizeof(arrayDesc));
- arrayDesc.Width = desc.size.width;
- arrayDesc.Height = desc.size.height;
- arrayDesc.Depth = desc.size.depth;
+ arrayDesc.Width = width;
+ arrayDesc.Height = height;
+ arrayDesc.Depth = depth;
arrayDesc.Format = format;
arrayDesc.NumChannels = numChannels;
arrayDesc.Flags = 0;
@@ -904,8 +936,8 @@ private:
arrayDesc.Depth *= 6;
}
- arrayDesc.Height = desc.size.height;
- arrayDesc.Width = desc.size.width;
+ arrayDesc.Height = height;
+ arrayDesc.Width = width;
arrayDesc.Format = format;
arrayDesc.NumChannels = numChannels;
@@ -922,9 +954,9 @@ private:
CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
memset(&arrayDesc, 0, sizeof(arrayDesc));
- arrayDesc.Depth = desc.size.depth;
- arrayDesc.Height = desc.size.height;
- arrayDesc.Width = desc.size.width;
+ arrayDesc.Depth = depth;
+ arrayDesc.Height = height;
+ arrayDesc.Width = width;
arrayDesc.Format = format;
arrayDesc.NumChannels = numChannels;
@@ -944,8 +976,8 @@ private:
CUDA_ARRAY_DESCRIPTOR arrayDesc;
memset(&arrayDesc, 0, sizeof(arrayDesc));
- arrayDesc.Height = desc.size.height;
- arrayDesc.Width = desc.size.width;
+ arrayDesc.Height = height;
+ arrayDesc.Width = width;
arrayDesc.Format = format;
arrayDesc.NumChannels = numChannels;
@@ -957,9 +989,6 @@ private:
// Work space for holding data for uploading if it needs to be rearranged
List<uint8_t> workspace;
- auto width = desc.size.width;
- auto height = desc.size.height;
- auto depth = desc.size.depth;
for (int mipLevel = 0; mipLevel < desc.numMipLevels; ++mipLevel)
{
int mipWidth = width >> mipLevel;
@@ -1152,8 +1181,22 @@ private:
resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray;
}
- // Create handle for uav.
- SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&tex->m_cudaSurfObj, &resDesc));
+ // If the texture might be used as a UAV, then we need to allocate
+ // a CUDA "surface" for it.
+ //
+ // Note: We cannot do this unconditionally, because it will fail
+ // on surfaces that are not usable as UAVs (e.g., those with
+ // mipmaps).
+ //
+ // TODO: We should really only be allocating the array at the
+ // time we create a resource, and then allocate the surface or
+ // texture objects as part of view creation.
+ //
+ if( desc.bindFlags & IResource::BindFlag::UnorderedAccess )
+ {
+ SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&tex->m_cudaSurfObj, &resDesc));
+ }
+
// Create handle for sampling.
CUDA_TEXTURE_DESC texDesc;
diff --git a/tools/gfx/render-graphics-common.cpp b/tools/gfx/render-graphics-common.cpp
index 811602ffc..92ccc376c 100644
--- a/tools/gfx/render-graphics-common.cpp
+++ b/tools/gfx/render-graphics-common.cpp
@@ -592,6 +592,10 @@ public:
{
info.rangeOffset = m_descriptorSetBuildInfos[0]->slotRangeDescs.getCount();
}
+ else
+ {
+ info.rangeOffset = 0;
+ }
auto slangEntryPointLayout = entryPointLayout->getSlangLayout();
_addDescriptorSets(
@@ -647,7 +651,20 @@ protected:
}
IPipelineLayout::Desc pipelineLayoutDesc;
- pipelineLayoutDesc.renderTargetCount = m_renderTargetCount;
+
+ // HACK: we set `renderTargetCount` to zero here becasue otherwise the D3D12
+ // render back-end will adjust all UAV registers by this value to account
+ // for the `SV_Target<N>` outputs implicitly consuming `u<N>` registers for
+ // Shader Model 5.0.
+ //
+ // When using the shader object path, all registers are being set via Slang
+ // reflection information, and we do not need/want the automatic adjustment.
+ //
+ // TODO: Once we eliminate the non-shader-object path, this whole issue should
+ // be moot, because the `ProgramLayout` should own/be the pipeline layout anyway.
+ //
+ pipelineLayoutDesc.renderTargetCount = 0;
+
pipelineLayoutDesc.descriptorSetCount = pipelineDescriptorSets.getCount();
pipelineLayoutDesc.descriptorSets = pipelineDescriptorSets.getBuffer();
diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp
index 4c05422ee..89bb25871 100644
--- a/tools/render-test/render-test-main.cpp
+++ b/tools/render-test/render-test-main.cpp
@@ -392,8 +392,38 @@ SlangResult _assignVarsFromLayout(
case ShaderInputType::Object:
{
auto typeName = entry.objectDesc.typeName;
- auto slangType = slangReflection->findTypeByName(typeName.getBuffer());
- auto slangTypeLayout = slangReflection->getTypeLayout(slangType);
+ slang::TypeLayoutReflection* slangTypeLayout = nullptr;
+ if(typeName.getLength() != 0)
+ {
+ // If the input line specified the name of the type
+ // to allocate, then we use it directly.
+ //
+ auto slangType = slangReflection->findTypeByName(typeName.getBuffer());
+ slangTypeLayout = slangReflection->getTypeLayout(slangType);
+ }
+ else
+ {
+ // if the user did not specify what type to allocate,
+ // then we will infer the type from the type of the
+ // value pointed to by `entryCursor`.
+ //
+ slangTypeLayout = entryCursor.getTypeLayout();
+ switch(slangTypeLayout->getKind())
+ {
+ default:
+ break;
+
+ case slang::TypeReflection::Kind::ConstantBuffer:
+ case slang::TypeReflection::Kind::ParameterBlock:
+ // If the cursor is pointing at a constant buffer
+ // or parameter block, then we assume the user
+ // actually means to allocate an object based on
+ // the element type of the block.
+ //
+ slangTypeLayout = slangTypeLayout->getElementTypeLayout();
+ break;
+ }
+ }
ComPtr<IShaderObjectLayout> shaderObjectLayout = renderer->createShaderObjectLayout(slangTypeLayout);
ComPtr<IShaderObject> shaderObject =
diff --git a/tools/render-test/shader-renderer-util.cpp b/tools/render-test/shader-renderer-util.cpp
index c90d197f4..ad3812ae3 100644
--- a/tools/render-test/shader-renderer-util.cpp
+++ b/tools/render-test/shader-renderer-util.cpp
@@ -182,6 +182,7 @@ ComPtr<ISamplerState> _createSamplerState(IRenderer* renderer,
const int textureBindFlags = IResource::BindFlag::NonPixelShaderResource | IResource::BindFlag::PixelShaderResource;
List<IDescriptorSetLayout::SlotRangeDesc> slotRangeDescs;
+ List<Index> mapEntryToSlotRange;
if(addedConstantBuffer)
{
@@ -196,6 +197,7 @@ ComPtr<ISamplerState> _createSamplerState(IRenderer* renderer,
const ShaderInputLayoutEntry& srcEntry = srcEntries[i];
SLANG_ASSERT(srcEntry.onlyCPULikeBinding == false);
+ mapEntryToSlotRange.add(slotRangeDescs.getCount());
IDescriptorSetLayout::SlotRangeDesc slotRangeDesc;
switch (srcEntry.type)
@@ -252,6 +254,10 @@ ComPtr<ISamplerState> _createSamplerState(IRenderer* renderer,
slotRangeDesc.type = DescriptorSlotType::Sampler;
break;
+ case ShaderInputType::Object:
+ // We ignore the `Object` case here, knowing that it is meant for the shader-object path.
+ continue;
+
default:
assert(!"Unhandled type");
return SLANG_FAIL;
@@ -290,7 +296,7 @@ ComPtr<ISamplerState> _createSamplerState(IRenderer* renderer,
{
const ShaderInputLayoutEntry& srcEntry = srcEntries[i];
- auto rangeIndex = i + (addedConstantBuffer ? 1 : 0);
+ auto rangeIndex = mapEntryToSlotRange[i];
switch (srcEntry.type)
{
@@ -407,6 +413,9 @@ ComPtr<ISamplerState> _createSamplerState(IRenderer* renderer,
}
break;
+ case ShaderInputType::Object:
+ break;
+
default:
assert(!"Unhandled type");
return SLANG_FAIL;