diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/gfx/cuda/render-cuda.cpp | 77 | ||||
| -rw-r--r-- | tools/gfx/render-graphics-common.cpp | 19 | ||||
| -rw-r--r-- | tools/render-test/render-test-main.cpp | 34 | ||||
| -rw-r--r-- | tools/render-test/shader-renderer-util.cpp | 11 |
4 files changed, 120 insertions, 21 deletions
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp index 69166c6b6..bf316546e 100644 --- a/tools/gfx/cuda/render-cuda.cpp +++ b/tools/gfx/cuda/render-cuda.cpp @@ -549,12 +549,12 @@ public: { if (cudaView->desc.type == IResourceView::Type::UnorderedAccess) { - auto handle = cudaView->textureResource->getBindlessHandle(); + auto handle = cudaView->textureResource->m_cudaSurfObj; setData(offset, &handle, sizeof(uint64_t)); } else { - auto handle = cudaView->textureResource->m_cudaSurfObj; + auto handle = cudaView->textureResource->getBindlessHandle(); setData(offset, &handle, sizeof(uint64_t)); } } @@ -814,6 +814,38 @@ private: CUresourcetype resourceType; size_t elementSize = 0; + // Our `ITextureResource::Desc` uses an enumeration to specify + // the "shape"/rank of a texture (1D, 2D, 3D, Cube), but CUDA's + // `cuMipmappedArrayCreate` seemingly relies on a policy where + // the extents of the array in dimenions above the rank are + // specified as zero (e.g., a 1D texture requires `height==0`). + // + // We will start by massaging the extents as specified by the + // user into a form that CUDA wants/expects, based on the + // texture shape as specified in the `desc`. + // + int width = desc.size.width; + int height = desc.size.height; + int depth = desc.size.depth; + switch (desc.type) + { + case IResource::Type::Texture1D: + height = 0; + depth = 0; + break; + + case IResource::Type::Texture2D: + depth = 0; + break; + + case IResource::Type::Texture3D: + break; + + case IResource::Type::TextureCube: + depth = 1; + break; + } + { CUarray_format format = CU_AD_FORMAT_FLOAT; int numChannels = 0; @@ -848,9 +880,9 @@ private: CUDA_ARRAY3D_DESCRIPTOR arrayDesc; memset(&arrayDesc, 0, sizeof(arrayDesc)); - arrayDesc.Width = desc.size.width; - arrayDesc.Height = desc.size.height; - arrayDesc.Depth = desc.size.depth; + arrayDesc.Width = width; + arrayDesc.Height = height; + arrayDesc.Depth = depth; arrayDesc.Format = format; arrayDesc.NumChannels = numChannels; arrayDesc.Flags = 0; @@ -904,8 +936,8 @@ private: arrayDesc.Depth *= 6; } - arrayDesc.Height = desc.size.height; - arrayDesc.Width = desc.size.width; + arrayDesc.Height = height; + arrayDesc.Width = width; arrayDesc.Format = format; arrayDesc.NumChannels = numChannels; @@ -922,9 +954,9 @@ private: CUDA_ARRAY3D_DESCRIPTOR arrayDesc; memset(&arrayDesc, 0, sizeof(arrayDesc)); - arrayDesc.Depth = desc.size.depth; - arrayDesc.Height = desc.size.height; - arrayDesc.Width = desc.size.width; + arrayDesc.Depth = depth; + arrayDesc.Height = height; + arrayDesc.Width = width; arrayDesc.Format = format; arrayDesc.NumChannels = numChannels; @@ -944,8 +976,8 @@ private: CUDA_ARRAY_DESCRIPTOR arrayDesc; memset(&arrayDesc, 0, sizeof(arrayDesc)); - arrayDesc.Height = desc.size.height; - arrayDesc.Width = desc.size.width; + arrayDesc.Height = height; + arrayDesc.Width = width; arrayDesc.Format = format; arrayDesc.NumChannels = numChannels; @@ -957,9 +989,6 @@ private: // Work space for holding data for uploading if it needs to be rearranged List<uint8_t> workspace; - auto width = desc.size.width; - auto height = desc.size.height; - auto depth = desc.size.depth; for (int mipLevel = 0; mipLevel < desc.numMipLevels; ++mipLevel) { int mipWidth = width >> mipLevel; @@ -1152,8 +1181,22 @@ private: resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray; } - // Create handle for uav. - SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&tex->m_cudaSurfObj, &resDesc)); + // If the texture might be used as a UAV, then we need to allocate + // a CUDA "surface" for it. + // + // Note: We cannot do this unconditionally, because it will fail + // on surfaces that are not usable as UAVs (e.g., those with + // mipmaps). + // + // TODO: We should really only be allocating the array at the + // time we create a resource, and then allocate the surface or + // texture objects as part of view creation. + // + if( desc.bindFlags & IResource::BindFlag::UnorderedAccess ) + { + SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&tex->m_cudaSurfObj, &resDesc)); + } + // Create handle for sampling. CUDA_TEXTURE_DESC texDesc; diff --git a/tools/gfx/render-graphics-common.cpp b/tools/gfx/render-graphics-common.cpp index 811602ffc..92ccc376c 100644 --- a/tools/gfx/render-graphics-common.cpp +++ b/tools/gfx/render-graphics-common.cpp @@ -592,6 +592,10 @@ public: { info.rangeOffset = m_descriptorSetBuildInfos[0]->slotRangeDescs.getCount(); } + else + { + info.rangeOffset = 0; + } auto slangEntryPointLayout = entryPointLayout->getSlangLayout(); _addDescriptorSets( @@ -647,7 +651,20 @@ protected: } IPipelineLayout::Desc pipelineLayoutDesc; - pipelineLayoutDesc.renderTargetCount = m_renderTargetCount; + + // HACK: we set `renderTargetCount` to zero here becasue otherwise the D3D12 + // render back-end will adjust all UAV registers by this value to account + // for the `SV_Target<N>` outputs implicitly consuming `u<N>` registers for + // Shader Model 5.0. + // + // When using the shader object path, all registers are being set via Slang + // reflection information, and we do not need/want the automatic adjustment. + // + // TODO: Once we eliminate the non-shader-object path, this whole issue should + // be moot, because the `ProgramLayout` should own/be the pipeline layout anyway. + // + pipelineLayoutDesc.renderTargetCount = 0; + pipelineLayoutDesc.descriptorSetCount = pipelineDescriptorSets.getCount(); pipelineLayoutDesc.descriptorSets = pipelineDescriptorSets.getBuffer(); diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp index 4c05422ee..89bb25871 100644 --- a/tools/render-test/render-test-main.cpp +++ b/tools/render-test/render-test-main.cpp @@ -392,8 +392,38 @@ SlangResult _assignVarsFromLayout( case ShaderInputType::Object: { auto typeName = entry.objectDesc.typeName; - auto slangType = slangReflection->findTypeByName(typeName.getBuffer()); - auto slangTypeLayout = slangReflection->getTypeLayout(slangType); + slang::TypeLayoutReflection* slangTypeLayout = nullptr; + if(typeName.getLength() != 0) + { + // If the input line specified the name of the type + // to allocate, then we use it directly. + // + auto slangType = slangReflection->findTypeByName(typeName.getBuffer()); + slangTypeLayout = slangReflection->getTypeLayout(slangType); + } + else + { + // if the user did not specify what type to allocate, + // then we will infer the type from the type of the + // value pointed to by `entryCursor`. + // + slangTypeLayout = entryCursor.getTypeLayout(); + switch(slangTypeLayout->getKind()) + { + default: + break; + + case slang::TypeReflection::Kind::ConstantBuffer: + case slang::TypeReflection::Kind::ParameterBlock: + // If the cursor is pointing at a constant buffer + // or parameter block, then we assume the user + // actually means to allocate an object based on + // the element type of the block. + // + slangTypeLayout = slangTypeLayout->getElementTypeLayout(); + break; + } + } ComPtr<IShaderObjectLayout> shaderObjectLayout = renderer->createShaderObjectLayout(slangTypeLayout); ComPtr<IShaderObject> shaderObject = diff --git a/tools/render-test/shader-renderer-util.cpp b/tools/render-test/shader-renderer-util.cpp index c90d197f4..ad3812ae3 100644 --- a/tools/render-test/shader-renderer-util.cpp +++ b/tools/render-test/shader-renderer-util.cpp @@ -182,6 +182,7 @@ ComPtr<ISamplerState> _createSamplerState(IRenderer* renderer, const int textureBindFlags = IResource::BindFlag::NonPixelShaderResource | IResource::BindFlag::PixelShaderResource; List<IDescriptorSetLayout::SlotRangeDesc> slotRangeDescs; + List<Index> mapEntryToSlotRange; if(addedConstantBuffer) { @@ -196,6 +197,7 @@ ComPtr<ISamplerState> _createSamplerState(IRenderer* renderer, const ShaderInputLayoutEntry& srcEntry = srcEntries[i]; SLANG_ASSERT(srcEntry.onlyCPULikeBinding == false); + mapEntryToSlotRange.add(slotRangeDescs.getCount()); IDescriptorSetLayout::SlotRangeDesc slotRangeDesc; switch (srcEntry.type) @@ -252,6 +254,10 @@ ComPtr<ISamplerState> _createSamplerState(IRenderer* renderer, slotRangeDesc.type = DescriptorSlotType::Sampler; break; + case ShaderInputType::Object: + // We ignore the `Object` case here, knowing that it is meant for the shader-object path. + continue; + default: assert(!"Unhandled type"); return SLANG_FAIL; @@ -290,7 +296,7 @@ ComPtr<ISamplerState> _createSamplerState(IRenderer* renderer, { const ShaderInputLayoutEntry& srcEntry = srcEntries[i]; - auto rangeIndex = i + (addedConstantBuffer ? 1 : 0); + auto rangeIndex = mapEntryToSlotRange[i]; switch (srcEntry.type) { @@ -407,6 +413,9 @@ ComPtr<ISamplerState> _createSamplerState(IRenderer* renderer, } break; + case ShaderInputType::Object: + break; + default: assert(!"Unhandled type"); return SLANG_FAIL; |
