diff options
| author | Tim Foley <tfoleyNV@users.noreply.github.com> | 2021-05-04 10:36:57 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-05-04 10:36:57 -0700 |
| commit | 7d52d3bd8905dfdf3018c41c9cad4685a98eb009 (patch) | |
| tree | 6d98592b88e6bef883ce3708d01348a370228996 | |
| parent | 1a4a51301d084dd1c8c5906eb810eb6caf6f3963 (diff) | |
Cleanup work on D3D12 shader object static specialization (#1830)
* Cleanup work on D3D12 shader object static specialization
This builds on PR #1829 in a small way (because that PR adds `getStride()` to Slang type layout reflection). The only relevant changes here are in the `render-d3d12.cpp` file.
The basic idea here is to clean up the D3D12 path to be more in line with the cleanups made for D3D11 and Vulkan. The way that D3D12 shader parameter binding goes through a root signature means that some of the details that were required for those APIs (in particular, tracking both "primary" and "pending" offsets during multiple steps) are not required for the critical-path binding stuff on D3D12.
There is some subtlety to the handling of the "ordinary" data buffer in the `bindAsValue()` case that I don't like, and that I'm not 100% confident I've gotten right. We may find that we have to revisit that logic as we add more tests.
* fixup
| -rw-r--r-- | tools/gfx/d3d12/render-d3d12.cpp | 792 |
1 files changed, 443 insertions, 349 deletions
diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index 67223088a..da169c03b 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -604,56 +604,94 @@ public: struct DescriptorTable { - DescriptorHeapReference heap; - uint32_t table; + DescriptorHeapReference m_heap; + uint32_t m_offset = 0; + uint32_t m_count = 0; + + SLANG_FORCE_INLINE uint32_t getDescriptorCount() const { return m_count; } /// Get the GPU handle at the specified index - SLANG_FORCE_INLINE D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(uint32_t index) const + SLANG_FORCE_INLINE D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(uint32_t index = 0) const { - return heap.getGpuHandle(table + index); + SLANG_ASSERT(index < getDescriptorCount()); + return m_heap.getGpuHandle(m_offset + index); } /// Get the CPU handle at the specified index - SLANG_FORCE_INLINE D3D12_CPU_DESCRIPTOR_HANDLE getCpuHandle(uint32_t index) const + SLANG_FORCE_INLINE D3D12_CPU_DESCRIPTOR_HANDLE getCpuHandle(uint32_t index = 0) const { - return heap.getCpuHandle(table + index); + SLANG_ASSERT(index < getDescriptorCount()); + return m_heap.getCpuHandle(m_offset + index); + } + + void freeIfSupported() + { + if(m_count) + { + m_heap.freeIfSupported(m_offset, m_count); + m_offset = 0; + m_count = 0; + } + } + + void allocate(uint32_t count) + { + m_offset = m_heap.allocate(count); + m_count = count; + } + + void allocate(DescriptorHeapReference heap, uint32_t count) + { + m_heap = heap; + m_offset = heap.allocate(count); + m_count = count; } }; - struct RootBindingState + /// Contextual data and operations required when binding shader objects to the pipeline state + struct BindingContext { - TransientResourceHeapImpl* transientHeap; - D3D12Device* device; - ArrayView<DescriptorTable> descriptorTables; + PipelineCommandEncoder* encoder; + Submitter* submitter; + TransientResourceHeapImpl* transientHeap; + D3D12Device* device; }; - struct BindingCounts + /// A representation of the offset at which to bind a shader parameter or sub-object + struct BindingOffset { + // Note: When we actually bind a shader object to the pipeline we do not care about + // HLSL-specific notions like `t` registers and `space`s. Those concepts are all + // mediated by the root signature. + // + // Instead, we need to consider the offsets at which the object will be bound + // into the actual D3D12 API state, which consists of the index of the current + // root parameter to bind from, as well as indices into the current descriptor + // tables (for resource views and samplers). + uint32_t rootParam = 0; uint32_t resource = 0; uint32_t sampler = 0; - BindingCounts() - {} - }; - - struct HeapBindingIndex - { - uint32_t descriptorTableIndex = 0; - uint32_t descriptorIndex = 0; + void operator+=(BindingOffset const& offset) + { + rootParam += offset.rootParam; + resource += offset.resource; + sampler += offset.sampler; + } }; - struct RootBindingIndex + /// A reprsentation of an allocated descriptor set, consisting of an option resource table and an optional sampler table + struct DescriptorSet { - uint32_t rootParamIndex = 0; - HeapBindingIndex resource; - HeapBindingIndex sampler; - }; + DescriptorTable resourceTable; + DescriptorTable samplerTable; - struct DescriptorSetInfo - { - uint32_t resourceDescriptorCount = 0; - uint32_t samplerDescriptorCount = 0; + void freeIfSupported() + { + resourceTable.freeIfSupported(); + samplerTable .freeIfSupported(); + } }; // Provides information on how binding ranges are stored in descriptor tables for @@ -684,7 +722,7 @@ public: }; /// Offset information for a sub-object range - struct SubObjectRangeOffset : BindingCounts + struct SubObjectRangeOffset : BindingOffset { SubObjectRangeOffset() {} @@ -702,7 +740,7 @@ public: }; /// Stride information for a sub-object range - struct SubObjectRangeStride + struct SubObjectRangeStride : BindingOffset { SubObjectRangeStride() {} @@ -751,43 +789,51 @@ public: slang::TypeLayoutReflection* m_elementTypeLayout; List<BindingRangeInfo> m_bindingRanges; List<SubObjectRangeInfo> m_subObjectRanges; -// DescriptorSetInfo m_descriptorSetInfo; -// uint32_t m_resourceSlotCount = 0; -// uint32_t m_samplerSlotCount = 0; - uint32_t m_subObjectSlotCount = 0; + /// The number of sub-objects (not just sub-object *ranges*) stored in instances of this layout + uint32_t m_subObjectCount = 0; - BindingCounts m_ownCounts; -// BindingCounts m_childCounts; - BindingCounts m_totalCounts; + /// Counters for the number of root parameters, resources, and samplers in this object itself + BindingOffset m_ownCounts; - uint32_t m_childRootParameterCount = 0; + /// Counters for the number of root parameters, resources, and sampler in this object and transitive sub-objects + BindingOffset m_totalCounts; - uint32_t m_ordinaryDataBufferCount = 0; + /// The number of root parameter consumed by (transitive) sub-objects + uint32_t m_childRootParameterCount = 0; + /// The total size in bytes of the ordinary data for this object and transitive sub-objects uint32_t m_totalOrdinaryDataSize = 0; - -// uint32_t m_totalResourceDescriptorCount = 0; -// uint32_t m_totalSamplerDescriptorCount = 0; - -// uint32_t m_ownRootParameterCount = 0; -// uint32_t m_childRootParameterCount = 0; -// uint32_t m_totalRootParameterCount = 0; - - void addBindingRangesOfType(slang::TypeLayoutReflection* typeLayout) + Result setElementTypeLayout(slang::TypeLayoutReflection* typeLayout) { - SlangInt bindingRangeCount = typeLayout->getBindingRangeCount(); + typeLayout = _unwrapParameterGroups(typeLayout); + m_elementTypeLayout = typeLayout; - // Reserve CBV slot for the implicit constant buffer if the type contains - // ordinary uniform data fields. - if (typeLayout->getSize(slang::ParameterCategory::Uniform) != 0) + // If the type contains any ordinary data, then we must reserve a buffer + // descriptor to hold it when binding as a parameter block. + // + m_totalOrdinaryDataSize = (uint32_t) typeLayout->getSize(); + if (m_totalOrdinaryDataSize != 0) { - m_ordinaryDataBufferCount++; m_ownCounts.resource++; -// m_descriptorSetInfo.resourceDescriptorCount = 1; } + // We will scan over the reflected Slang binding ranges and add them + // to our array. There are two main things we compute along the way: + // + // * For each binding range we compute a `flatIndex` that can be + // used to identify where the values for the given range begin + // in the flattened arrays (e.g., `m_objects`) and descriptor + // tables that hold the state of a shader object. + // + // * We also update the various counters taht keep track of the number + // of sub-objects, resources, samplers, etc. that are being + // consumed. These counters will contribute to figuring out + // the descriptor table(s) that might be needed to represent + // the object. + // + SlangInt bindingRangeCount = typeLayout->getBindingRangeCount(); for (SlangInt r = 0; r < bindingRangeCount; ++r) { slang::BindingType slangBindingType = typeLayout->getBindingRangeType(r); @@ -798,15 +844,13 @@ public: bindingRangeInfo.bindingType = slangBindingType; bindingRangeInfo.count = count; -// bindingRangeInfo.flatIndex = m_flatResourceCount; - switch (slangBindingType) { case slang::BindingType::ConstantBuffer: case slang::BindingType::ParameterBlock: case slang::BindingType::ExistentialValue: - bindingRangeInfo.flatIndex = m_subObjectSlotCount; - m_subObjectSlotCount += count; + bindingRangeInfo.flatIndex = m_subObjectCount; + m_subObjectCount += count; break; case slang::BindingType::Sampler: @@ -829,20 +873,13 @@ public: } m_bindingRanges.add(bindingRangeInfo); } - } - Result setElementTypeLayout(slang::TypeLayoutReflection* typeLayout) - { - typeLayout = _unwrapParameterGroups(typeLayout); - - m_elementTypeLayout = typeLayout; - - m_totalOrdinaryDataSize = (uint32_t) typeLayout->getSize(); - - // Compute the binding ranges that are used to store - // the logical contents of the object in memory. - - addBindingRangesOfType(typeLayout); + // At this point we've computed the number of resources/samplers that + // the type needs to represent its *own* state, and stored those counts + // in `m_ownCounts`. Next we need to consider any resources/samplers + // and root parameters needed to represent the state of the transitive + // sub-objects of this objet, so that we can compute the total size + // of the object when bound to the pipeline. m_totalCounts = m_ownCounts; @@ -884,16 +921,42 @@ public: SubObjectRangeInfo subObjectRange; subObjectRange.bindingRangeIndex = bindingRangeIndex; subObjectRange.layout = subObjectLayout; -// subObjectRange.bindingType = slangBindingType; + // The Slang reflection API stors offset information for sub-object ranges, + // and we care about *some* of that information: in particular, we need + // the offset of sub-objects in terms of uniform/ordinary data for the + // cases where we need to fill in "pending" data in our ordinary buffer. + // subObjectRange.offset = SubObjectRangeOffset(typeLayout->getSubObjectRangeOffset(r)); subObjectRange.stride = SubObjectRangeStride(slangLeafTypeLayout); + // The remaining offset information is computed based on the counters + // we are generating here, which depend only on the in-memory layout + // decisions being made in our implementation. Remember that the + // `register` and `space` values coming from DXBC/DXIL do *not* + // dictate the in-memory layout we use. + // + // Note: One subtle point here is that the `.rootParam` offset we are computing + // here does *not* include any root parameters that would be allocated + // for the parent object type itself (e.g., for descriptor tables + // used if it were bound as a parameter block). The later logic when + // we actually go to bind things will need to apply those offsets. + // + // Note: An even *more* subtle point is that the `.resource` offset + // being computed here *does* include the resource descriptor allocated + // for holding the ordinary data buffer, if any. The implications of + // this for later offset math is subtle. + // subObjectRange.offset.rootParam = m_childRootParameterCount; subObjectRange.offset.resource = m_totalCounts.resource; subObjectRange.offset.sampler = m_totalCounts.sampler; - BindingCounts objectCounts; + // Along with the offset information, we also need to compute the + // "stride" between consecutive sub-objects in the range. The actual + // size/stride of a single object depends on the type of range we + // are dealing with. + // + BindingOffset objectCounts; switch(slangBindingType) { default: @@ -908,20 +971,57 @@ public: // parent type, and we need to account for them // whenever we allocate storage. // - objectCounts.resource = subObjectLayout->getTotalResourceDescriptorCount(); objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); } break; + case slang::BindingType::ParameterBlock: + { + SLANG_ASSERT(subObjectLayout); + + // In contrast to a constant buffer, a parameter block can hide + // the resource and sampler descriptor allocation it uses (since they + // are allocated into the tables that make up the parameter block. + // + // The only resource usage that leaks into the surrounding context + // is the number of root parameters consumed. + // + objectCounts.rootParam = subObjectRange.layout->getTotalRootParameterCount(); + } + break; + case slang::BindingType::ExistentialValue: + // An unspecialized existential/interface value cannot consume any resources + // as part of the parent object (it needs to fit inside the fixed-size + // represnetation of existential types). + // + // However, if we are statically specializing to a type that doesn't "fit" + // we may need to account for additional information that needs to be + // allocaated. + // if(subObjectLayout) { + // The ordinary data for an existential-type value is allocated into + // the same buffer as the parent object, so we only want to consider + // the resource descriptors *other than* the ordinary data buffer. + // + // Otherwise the logic here is identical to the constant buffer case. + // objectCounts.resource = subObjectLayout->getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer(); objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); + // Note: In the implementation for some other graphics API (e.g., Vulkan) there + // needs to be more work done to handle the fact that "pending" data from + // interface-type sub-objects get allocated to a distinct offset after all the + // "primary" data. We are consciously ignoring that issue here, and the physical + // layout of a shader object into the D3D12 binding state may end up interleaving + // resources/samplers for "primary" and "pending" data. + // + // If this choice ever causes issues, we can revisit the approach here. + // An interface-type range that includes ordinary data can // increase the size of the ordinary data buffer we need to // allocate for the parent object. @@ -935,29 +1035,31 @@ public: } } break; - - case slang::BindingType::ParameterBlock: - { - SLANG_ASSERT(subObjectLayout); - - objectCounts.rootParam = subObjectRange.layout->getTotalRootParameterCount(); - } - break; } - auto rangeResourceCount = count * objectCounts.resource; - auto rangeSamplerCount = count * objectCounts.sampler; - auto rangeRootParamCount = count * objectCounts.rootParam; + // Once we've computed the usage for each object in the range, we can + // easily compute the rusage for the entire range. + // + auto rangeResourceCount = count * objectCounts.resource; + auto rangeSamplerCount = count * objectCounts.sampler; + auto rangeRootParamCount = count * objectCounts.rootParam; - m_totalCounts.resource += rangeResourceCount; - m_totalCounts.sampler += rangeSamplerCount; - m_childRootParameterCount += rangeRootParamCount; + m_totalCounts.resource += rangeResourceCount; + m_totalCounts.sampler += rangeSamplerCount; + m_childRootParameterCount += rangeRootParamCount; m_subObjectRanges.add(subObjectRange); } - if(m_totalCounts.resource) m_ownCounts.rootParam++; - if(m_totalCounts.sampler) m_ownCounts.rootParam++; + // Once we have added up the resource usage from all the sub-objects + // we can look at the total number of resources and samplers that + // need to be bound as part of this objects descriptor tables and + // that will allow us to decide whether we need to allocate a root + // parameter for a resource table or not, ans similarly for a + // sampler table. + // + if(m_totalCounts.resource) m_ownCounts.rootParam++; + if(m_totalCounts.sampler) m_ownCounts.rootParam++; m_totalCounts.rootParam = m_ownCounts.rootParam + m_childRootParameterCount; @@ -990,21 +1092,19 @@ public: BindingRangeInfo const& getBindingRange(Index index) { return m_bindingRanges[index]; } -// DescriptorSetInfo getDescriptorSetInfo() { return m_descriptorSetInfo; } - slang::TypeLayoutReflection* getElementTypeLayout() { return m_elementTypeLayout; } uint32_t getResourceSlotCount() { return m_ownCounts.resource; } uint32_t getSamplerSlotCount() { return m_ownCounts.sampler; } - Index getSubObjectSlotCount() { return m_subObjectSlotCount; } + Index getSubObjectSlotCount() { return m_subObjectCount; } uint32_t getTotalResourceDescriptorCount() { return m_totalCounts.resource; } uint32_t getTotalSamplerDescriptorCount() { return m_totalCounts.sampler; } - uint32_t getOrdinaryDataBufferCount() { return m_ordinaryDataBufferCount; } - bool hasOrdinaryDataBuffer() { return m_ordinaryDataBufferCount != 0; } + uint32_t getOrdinaryDataBufferCount() { return m_totalOrdinaryDataSize ? 1 : 0; } + bool hasOrdinaryDataBuffer() { return m_totalOrdinaryDataSize != 0; } - uint32_t getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer() { return m_totalCounts.resource - m_ordinaryDataBufferCount; } + uint32_t getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer() { return m_totalCounts.resource - getOrdinaryDataBufferCount(); } uint32_t getTotalRootParameterCount() { return m_totalCounts.rootParam; } uint32_t getChildRootParameterCount() { return m_childRootParameterCount; } @@ -1028,53 +1128,27 @@ public: initBase(renderer, builder->m_elementTypeLayout); -// m_descriptorSetInfo = builder->m_descriptorSetInfo; m_bindingRanges = _Move(builder->m_bindingRanges); m_subObjectRanges = builder->m_subObjectRanges; m_ownCounts = builder->m_ownCounts; m_totalCounts = builder->m_totalCounts; - m_subObjectSlotCount = builder->m_subObjectSlotCount; + m_subObjectCount = builder->m_subObjectCount; m_childRootParameterCount = builder->m_childRootParameterCount; - m_ordinaryDataBufferCount = builder->m_ordinaryDataBufferCount; m_totalOrdinaryDataSize = builder->m_totalOrdinaryDataSize; -#if 0 - m_resourceSlotCount = builder->m_resourceSlotCount; - m_samplerSlotCount = builder->m_samplerSlotCount; - m_subObjectSlotCount = builder->m_subObjectSlotCount; - - m_totalResourceDescriptorCount = builder->m_totalResourceDescriptorCount; - m_totalSamplerDescriptorCount = builder->m_totalSamplerDescriptorCount; - - m_childRootParameterCount = builder->m_childRootParameterCount; - m_totalRootParameterCount = builder->m_totalRootParameterCount; -#endif - return SLANG_OK; } List<BindingRangeInfo> m_bindingRanges; List<SubObjectRangeInfo> m_subObjectRanges; + BindingOffset m_ownCounts; + BindingOffset m_totalCounts; -// DescriptorSetInfo m_descriptorSetInfo; -// Index m_subObjectCount = 0; - - BindingCounts m_ownCounts; - BindingCounts m_totalCounts; - -// uint32_t m_resourceSlotCount; -// uint32_t m_samplerSlotCount; - uint32_t m_subObjectSlotCount; - -// uint32_t m_totalResourceDescriptorCount; -// uint32_t m_totalSamplerDescriptorCount; - + uint32_t m_subObjectCount = 0; uint32_t m_childRootParameterCount = 0; -// uint32_t m_totalRootParameterCount = 0; - uint32_t m_ordinaryDataBufferCount = 0; uint32_t m_totalOrdinaryDataSize = 0; }; @@ -1086,7 +1160,7 @@ public: struct EntryPointInfo { RefPtr<ShaderObjectLayoutImpl> layout; - BindingCounts offset; + BindingOffset offset; }; struct Builder : Super::Builder @@ -1759,16 +1833,15 @@ public: } #endif - D3D12_ROOT_SIGNATURE_DESC& build( - List<D3D12Device::DescriptorSetInfo>& outRootDescriptorSetInfos) + D3D12_ROOT_SIGNATURE_DESC& build() { for (Index i = 0; i < m_descriptorSets.getCount(); i++) { auto& descriptorSet = m_descriptorSets[i]; - D3D12Device::DescriptorSetInfo setInfo; - setInfo.resourceDescriptorCount = descriptorSet.m_resourceCount; - setInfo.samplerDescriptorCount = descriptorSet.m_samplerCount; - outRootDescriptorSetInfos.add(setInfo); +// D3D12Device::DescriptorSetInfo setInfo; +// setInfo.resourceDescriptorCount = descriptorSet.m_resourceCount; +// setInfo.samplerDescriptorCount = descriptorSet.m_samplerCount; +// outRootDescriptorSetInfos.add(setInfo); if (descriptorSet.m_resourceRanges.getCount()) { D3D12_ROOT_PARAMETER rootParam = {}; @@ -1812,8 +1885,7 @@ public: D3D12Device* device, RootShaderObjectLayoutImpl* rootLayout, slang::IComponentType* program, - ID3D12RootSignature** outRootSignature, - List<DescriptorSetInfo>& outRootDescriptorSetInfos) + ID3D12RootSignature** outRootSignature) { // We are going to build up the root signature by adding // binding/descritpor ranges and nested parameter blocks @@ -1857,7 +1929,7 @@ public: builder.addAsValue(entryPoint->getVarLayout(), rootDescriptorSetIndex); } - auto& rootSignatureDesc = builder.build(outRootDescriptorSetInfos); + auto& rootSignatureDesc = builder.build(); ComPtr<ID3DBlob> signature; ComPtr<ID3DBlob> error; @@ -1917,8 +1989,7 @@ public: device, layout, program, - layout->m_rootSignature.writeRef(), - layout->m_gpuDescriptorSetInfos)); + layout->m_rootSignature.writeRef())); } *outLayout = layout.detach(); @@ -1949,7 +2020,7 @@ public: public: ComPtr<ID3D12RootSignature> m_rootSignature; - List<DescriptorSetInfo> m_gpuDescriptorSetInfos; +// List<DescriptorSetInfo> m_gpuDescriptorSetInfos; }; class ShaderProgramImpl : public ShaderProgramBase @@ -1979,18 +2050,7 @@ public: ~ShaderObjectImpl() { - auto layoutImpl = getLayout(); - if(layoutImpl) - { - if(auto resourceCount = layoutImpl->getResourceSlotCount()) - { - m_resourceHeap.freeIfSupported(m_descriptorSet.m_resourceTable, resourceCount); - } - if(auto samplerCount = layoutImpl->getSamplerSlotCount()) - { - m_samplerHeap.freeIfSupported(m_descriptorSet.m_samplerTable, samplerCount); - } - } + m_descriptorSet.freeIfSupported(); } RendererBase* getDevice() { return m_device.get(); } @@ -2189,8 +2249,7 @@ public: ID3D12Device* d3dDevice = static_cast<D3D12Device*>(getDevice())->m_device; d3dDevice->CopyDescriptorsSimple( 1, - m_resourceHeap.getCpuHandle( - m_descriptorSet.m_resourceTable + + m_descriptorSet.resourceTable.getCpuHandle( bindingRange.flatIndex + (int32_t)offset.bindingArrayIndex), resourceViewImpl->m_descriptor.cpuHandle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); @@ -2210,8 +2269,7 @@ public: ID3D12Device* d3dDevice = static_cast<D3D12Device*>(getDevice())->m_device; d3dDevice->CopyDescriptorsSimple( 1, - m_samplerHeap.getCpuHandle( - m_descriptorSet.m_samplerTable + + m_descriptorSet.samplerTable.getCpuHandle( bindingRange.flatIndex + (int32_t)offset.bindingArrayIndex), samplerImpl->m_descriptor.cpuHandle, @@ -2354,12 +2412,9 @@ public: // but does *not* include any descriptors that are managed // as part of sub-objects. // - m_resourceHeap = viewHeap; - m_samplerHeap = samplerHeap; - if(auto resourceCount = layout->getResourceSlotCount()) { - m_descriptorSet.m_resourceTable = viewHeap.allocate(resourceCount); + m_descriptorSet.resourceTable.allocate(viewHeap, resourceCount); // We must also ensure that the memory for any resources // referenced by descriptors in this object does not get @@ -2367,10 +2422,9 @@ public: // m_boundResources.setCount(resourceCount); } - if(auto samplerCount = layout->getSamplerSlotCount()) { - m_descriptorSet.m_samplerTable = samplerHeap.allocate(samplerCount); + m_descriptorSet.samplerTable.allocate(samplerHeap, samplerCount); } @@ -2583,7 +2637,7 @@ public: // We always know that the ordinary data buffer will be the first descriptor // in the table of resource views. // - auto descriptorTable = m_descriptorSet.m_resourceTable; + auto descriptorTable = m_descriptorSet.resourceTable; D3D12_CONSTANT_BUFFER_VIEW_DESC viewDesc = {}; viewDesc.BufferLocation = static_cast<BufferResourceImpl*>(m_constantBufferWeakPtr) ->m_resource.getResource() @@ -2591,141 +2645,254 @@ public: m_constantBufferOffset; viewDesc.SizeInBytes = (UINT)alignedConstantBufferSize; encoder->m_device->CreateConstantBufferView( - &viewDesc, m_resourceHeap.getCpuHandle(descriptorTable)); + &viewDesc, descriptorTable.getCpuHandle()); } return SLANG_OK; } public: - RootBindingIndex prepareToBindAsParameterBlock(uint32_t inRootParameterIndex) + + /// Prepare to bind this object as a parameter block. + /// + /// This involves allocating and binding any descriptor tables necessary + /// to to store the state of the object. The function returns a descriptor + /// set formed from any table(s) allocated. In addition, the `ioOffset` + /// parameter will be adjusted to be correct for binding values into + /// the resulting descriptor set. + /// + DescriptorSet prepareToBindAsParameterBlock( + BindingContext* context, + BindingOffset& ioOffset, + ShaderObjectLayoutImpl* specializedLayout) { - ShaderObjectLayoutImpl* layout = getLayout(); + auto transientHeap = context->transientHeap; + auto submitter = context->submitter; - auto rootParameterIndex = inRootParameterIndex; + // When writing into the new descriptor set, resource and sampler + // descriptors will need to start at index zero in the respective + // tables. + // + ioOffset.resource = 0; + ioOffset.sampler = 0; - RootBindingIndex rootBindingIndex; + // The index of the next root parameter to bind will be maintained, + // but needs to be incremented by the number of descriptor tables + // we allocate (zero or one resource table and zero or one sampler + // table). + // + auto& rootParamIndex = ioOffset.rootParam; + DescriptorSet descriptorSet; - if(layout->getTotalResourceDescriptorCount()) + if(auto descriptorCount = specializedLayout->getTotalResourceDescriptorCount()) { - rootBindingIndex.resource.descriptorTableIndex = rootParameterIndex++; + // There is a non-zero number of resource descriptors needed, + // so we will allocate a table out of the appropriate heap, + // and store it into the appropriate part of `descriptorSet`. + // + auto descriptorHeap = &transientHeap->m_viewHeap; + auto& table = descriptorSet.resourceTable; + + // Allocate the table. + // + table.allocate(descriptorHeap, descriptorCount); + + // Bind the table to the pipeline, consuming the next available + // root parameter. + // + auto tableRootParamIndex = rootParamIndex++; + submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); } - if(layout->getTotalSamplerDescriptorCount()) + if(auto descriptorCount = specializedLayout->getTotalSamplerDescriptorCount()) { - rootBindingIndex.sampler.descriptorTableIndex = rootParameterIndex++; + // There is a non-zero number of sampler descriptors needed, + // so we will allocate a table out of the appropriate heap, + // and store it into the appropriate part of `descriptorSet`. + // + auto descriptorHeap = &transientHeap->m_samplerHeap; + auto& table = descriptorSet.samplerTable; + + // Allocate the table. + // + table.allocate(descriptorHeap, descriptorCount); + + // Bind the table to the pipeline, consuming the next available + // root parameter. + // + auto tableRootParamIndex = rootParamIndex++; + submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); } - rootBindingIndex.rootParamIndex = rootParameterIndex; - return rootBindingIndex; + return descriptorSet; } - Result bindAsParameterBlock(PipelineCommandEncoder* encoder, RootBindingState& bindingState, uint32_t rootParameterIndex, ShaderObjectLayoutImpl* layout) + /// Bind this object as a `ParameterBlock<X>` + Result bindAsParameterBlock( + BindingContext* context, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) { - auto rootBindingIndex = prepareToBindAsParameterBlock(rootParameterIndex); - SLANG_RETURN_ON_FAIL(bindAsConstantBuffer(encoder, bindingState, rootBindingIndex, layout)); + // The first step to binding an object as a parameter block is to allocate a descriptor + // set (consisting of zero or one resource descriptor table and zero or one sampler + // descriptor table) to represent its values. + // + BindingOffset subOffset = offset; + auto descriptorSet = prepareToBindAsParameterBlock(context, /* inout */ subOffset, specializedLayout); + + // Next we bind the object into that descriptor set as if it were being used + // as a `ConstantBuffer<X>`. + // + SLANG_RETURN_ON_FAIL(bindAsConstantBuffer(context, descriptorSet, subOffset, specializedLayout)); return SLANG_OK; } + /// Bind this object as a `ConstantBuffer<X>` Result bindAsConstantBuffer( - PipelineCommandEncoder* encoder, - RootBindingState& bindingState, - RootBindingIndex const& rootBindingIndex, - ShaderObjectLayoutImpl* specializedLayout) + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) { - SLANG_RETURN_ON_FAIL(_ensureOrdinaryDataBufferCreatedIfNeeded(encoder, specializedLayout)); - SLANG_RETURN_ON_FAIL(_bindImpl(encoder, bindingState, rootBindingIndex, specializedLayout, 0)); + // If we are to bind as a constant buffer we first need to ensure that + // the ordinary data buffer is created, if this object needs one. + // + SLANG_RETURN_ON_FAIL(_ensureOrdinaryDataBufferCreatedIfNeeded(context->encoder, specializedLayout)); + + // Next, we need to bind all of the resource descriptors for this object + // (including any ordinary data buffer) into the provided `descriptorSet`. + // + auto resourceCount = specializedLayout->getResourceSlotCount(); + if(resourceCount) + { + auto& dstTable = descriptorSet.resourceTable; + auto& srcTable = m_descriptorSet.resourceTable; + + context->device->m_device->CopyDescriptorsSimple( + UINT(resourceCount), + dstTable.getCpuHandle(offset.resource), + srcTable.getCpuHandle(), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, + // since the logic is shared with the `bindAsValue()` case below. + // + SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, offset, specializedLayout)); return SLANG_OK; } + /// Bind this object as a value (for an interface-type parameter) Result bindAsValue( - PipelineCommandEncoder* encoder, - RootBindingState& bindingState, - RootBindingIndex const& rootBindingIndex, - ShaderObjectLayoutImpl* layout) + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) { - SLANG_RETURN_ON_FAIL(_bindImpl(encoder, bindingState, rootBindingIndex, layout, layout->getOrdinaryDataBufferCount())); - return SLANG_OK; - } - - Result _bindImpl( - PipelineCommandEncoder* encoder, - RootBindingState& bindingState, - RootBindingIndex const& rootBindingIndex, - ShaderObjectLayoutImpl* layout, - uint32_t skipResourceCount) - { - auto& descSet = m_descriptorSet; - if(auto resourceCount = (layout->getResourceSlotCount() - skipResourceCount)) + // When binding a value for an interface-type field we do *not* want + // to bind a buffer for the ordinary data (if there is any) because + // ordinary data for interface-type fields gets allocated into the + // parent object's ordinary data buffer. + // + // This CPU-memory descriptor table that holds resource descriptors + // will have already been allocated to have space for an ordinary data + // buffer (if needed), so we need to take care to skip over that + // descriptor when copying descriptors from the CPU-memory set + // to the GPU-memory `descriptorSet`. + // + auto skipResourceCount = specializedLayout->getOrdinaryDataBufferCount(); + auto resourceCount = specializedLayout->getResourceSlotCount() - skipResourceCount; + if(resourceCount) { - auto dstIndex = rootBindingIndex.resource; - auto& dstTable = bindingState.descriptorTables[dstIndex.descriptorTableIndex]; - auto& cpuHeap = m_resourceHeap; - auto cpuDescriptorTable = descSet.m_resourceTable; + auto& dstTable = descriptorSet.resourceTable; + auto& srcTable = m_descriptorSet.resourceTable; - bindingState.device->m_device->CopyDescriptorsSimple( + context->device->m_device->CopyDescriptorsSimple( UINT(resourceCount), - dstTable.getCpuHandle(dstIndex.descriptorIndex), - cpuHeap.getCpuHandle(cpuDescriptorTable + skipResourceCount), + dstTable.getCpuHandle(offset.resource), + srcTable.getCpuHandle(skipResourceCount), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } - if (auto samplerCount = layout->getSamplerSlotCount()) + + // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, + // since the logic is shared with the `bindAsConstantBuffer()` case above. + // + // Note: Just like we had to do some subtle handling of the ordinary data buffer + // above, here we need to contend with the fact that the `offset.resource` fields + // computed for sub-object ranges were baked to take the ordinary data buffer + // into account, so that if `skipResourceCount` is non-zero then they are all + // too high by `skipResourceCount`. + // + // We will address the problem here by computing a modified offset that adjusts + // for the ordinary data buffer that we have not bound after all. + // + BindingOffset subOffset = offset; + subOffset.resource -= skipResourceCount; + SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, subOffset, specializedLayout)); + return SLANG_OK; + } + + /// Shared logic for `bindAsConstantBuffer()` and `bindAsValue()` + Result _bindImpl( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) + { + // We start by binding all the sampler decriptors, if needed. + // + // Note: resource descriptors were handled in either `bindAsConstantBuffer()` + // or `bindAsValue()` before calling into `_bindImpl()`. + // + if (auto samplerCount = specializedLayout->getSamplerSlotCount()) { - auto dstIndex = rootBindingIndex.sampler; - auto& dstTable = bindingState.descriptorTables[dstIndex.descriptorTableIndex]; - auto& cpuHeap = m_samplerHeap; - auto cpuDescriptorTable = (int)descSet.m_samplerTable; + auto& dstTable = descriptorSet.samplerTable; + auto& srcTable = m_descriptorSet.samplerTable; - bindingState.device->m_device->CopyDescriptorsSimple( + context->device->m_device->CopyDescriptorsSimple( UINT(samplerCount), - dstTable.getCpuHandle(dstIndex.descriptorIndex), - cpuHeap.getCpuHandle(cpuDescriptorTable), + dstTable.getCpuHandle(offset.sampler), + srcTable.getCpuHandle(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); } - auto& subObjectRanges = layout->getSubObjectRanges(); + // Next we iterate over the sub-object ranges and bind anything they require. + // + auto& subObjectRanges = specializedLayout->getSubObjectRanges(); auto subObjectRangeCount = subObjectRanges.getCount(); for (Index i = 0; i < subObjectRangeCount; i++) { - auto subObjectRange = layout->getSubObjectRange(i); - auto bindingRange = layout->getBindingRange(subObjectRange.bindingRangeIndex); + auto& subObjectRange = specializedLayout->getSubObjectRange(i); + auto& bindingRange = specializedLayout->getBindingRange(subObjectRange.bindingRangeIndex); + auto baseIndex = bindingRange.flatIndex; auto subObjectLayout = subObjectRange.layout.Ptr(); + + BindingOffset rangeOffset = offset; + rangeOffset += subObjectRange.offset; + + BindingOffset rangeStride = subObjectRange.stride; + switch(bindingRange.bindingType) { case slang::BindingType::ConstantBuffer: { - auto baseIndex = bindingRange.flatIndex; + auto objOffset = rangeOffset; for (uint32_t j = 0; j < bindingRange.count; j++) { auto& object = m_objects[baseIndex + j]; - - RootBindingIndex subIndex = rootBindingIndex; - subIndex.rootParamIndex += subObjectRange.offset.rootParam; - subIndex.resource.descriptorIndex += subObjectRange.offset.resource; - subIndex.sampler.descriptorTableIndex += subObjectRange.offset.sampler; - - object->bindAsConstantBuffer( - encoder, - bindingState, - subIndex, - subObjectLayout); + object->bindAsConstantBuffer(context, descriptorSet, objOffset, subObjectLayout); + objOffset += rangeStride; } } break; case slang::BindingType::ParameterBlock: { - auto baseIndex = bindingRange.flatIndex; + auto objOffset = rangeOffset; for (uint32_t j = 0; j < bindingRange.count; j++) { auto& object = m_objects[baseIndex + j]; - - auto subRootParamIndex = rootBindingIndex.rootParamIndex + subObjectRange.offset.rootParam; - - object->bindAsParameterBlock( - encoder, - bindingState, - subRootParamIndex, - subObjectLayout); + object->bindAsParameterBlock(context, objOffset, subObjectLayout); + objOffset += rangeStride; } } break; @@ -2733,94 +2900,27 @@ public: case slang::BindingType::ExistentialValue: if(subObjectLayout) { - auto baseIndex = bindingRange.flatIndex; + auto objOffset = rangeOffset; for (uint32_t j = 0; j < bindingRange.count; j++) { auto& object = m_objects[baseIndex + j]; - - RootBindingIndex subIndex = rootBindingIndex; - subIndex.rootParamIndex += subObjectRange.offset.rootParam; - subIndex.resource.descriptorIndex += subObjectRange.offset.resource; - subIndex.sampler.descriptorTableIndex += subObjectRange.offset.sampler; - - object->bindAsValue( - encoder, - bindingState, - subIndex, - subObjectLayout); + object->bindAsValue(context, descriptorSet, objOffset, subObjectLayout); + objOffset += rangeStride; } } break; } } -// SLANG_RETURN_ON_FAIL(bindChildRootParameters(encoder, bindingState, rootParameterIndex)); - return SLANG_OK; } -#if 0 - Result bindChildRootParameters(PipelineCommandEncoder* encoder, RootBindingState& bindingState, SlangInt rootParameterIndex) - { - ShaderObjectLayoutImpl* layout = getLayout(); - auto& subObjectRanges = layout->getSubObjectRanges(); - auto subObjectRangeCount = subObjectRanges.getCount(); - for (Index i = 0; i < subObjectRangeCount; i++) - { - auto subObjectRange = layout->getSubObjectRange(i); - auto bindingRange = layout->getBindingRange(subObjectRange.bindingRangeIndex); - switch(bindingRange.bindingType) - { - case slang::BindingType::ParameterBlock: - { - auto baseIndex = bindingRange.flatIndex; - auto subRootParameterIndex = rootParameterIndex + subObjectRange.childRootParameterOffset; - for (uint32_t j = 0; j < bindingRange.count; j++) - { - auto& object = m_objects[baseIndex + j]; - - object->bindAsParameterBlock(encoder, bindingState, subRootParameterIndex); - subRootParameterIndex += subObjectRange.layout->getTotalRootParameterCount(); - } - } - break; - case slang::BindingType::ConstantBuffer: - { - // TODO: - } - break; - case slang::BindingType::ExistentialValue: - // If the existential object contains only ordinary data fields, - // the data is already written into m_ordinaryDataBuffer during `setObject`, - // so we don't need to do anything here. - // If the existential object has resource fields, this is the time to set - // those fields as in the "pendingLayout" section. - // TODO: implement resource fields binding for inline existential values. - default: - break; - } - } - - return SLANG_OK; - } -#endif - /// Any "ordinary" / uniform data for this object List<char> m_ordinaryData; List<RefPtr<ShaderObjectImpl>> m_objects; - // The resource and sampler heaps used to allocate the descriptor tables. - DescriptorHeapReference m_resourceHeap; - DescriptorHeapReference m_samplerHeap; - - struct DescriptorSet - { - int32_t m_resourceTable = 0; - int32_t m_samplerTable = 0; -// uint32_t m_resourceCount = 0; -// uint32_t m_samplerCount = 0; - }; + /// A CPU-memory descriptor set holding any descriptors used to represent the resources/samplers in this object's state DescriptorSet m_descriptorSet; ShortList<RefPtr<Resource>, 8> m_boundResources; @@ -2913,24 +3013,33 @@ public: } public: - Result bindRootObject(PipelineCommandEncoder* encoder, RootBindingState& bindingState, RootShaderObjectLayoutImpl* layout) + Result bindAsRoot( + BindingContext* context, + RootShaderObjectLayoutImpl* specializedLayout) { - auto rootBindingIndex = prepareToBindAsParameterBlock(0); + // A root shader object always binds as if it were a parameter block, + // insofar as it needs to allocate a descriptor set to hold the bindings + // for its own state and any sub-objects. + // + // Note: We do not direclty use `bindAsParameterBlock` here because we also + // need to bind the entry points into the same descriptor set that is + // being used for the root object. + // + BindingOffset rootOffset; + auto descriptorSet = prepareToBindAsParameterBlock(context, /* inout */ rootOffset, specializedLayout); - SLANG_RETURN_ON_FAIL(Super::bindAsConstantBuffer(encoder, bindingState, rootBindingIndex, layout)); + SLANG_RETURN_ON_FAIL(Super::bindAsConstantBuffer(context, descriptorSet, rootOffset, specializedLayout)); auto entryPointCount = m_entryPoints.getCount(); for (Index i = 0; i < entryPointCount; ++i) { auto entryPoint = m_entryPoints[i]; - auto& entryPointInfo = layout->getEntryPoint(i); + auto& entryPointInfo = specializedLayout->getEntryPoint(i); - auto entryPointBindingIndex = rootBindingIndex; - entryPointBindingIndex.rootParamIndex += entryPointInfo.offset.rootParam; - entryPointBindingIndex.resource.descriptorIndex += entryPointInfo.offset.resource; - entryPointBindingIndex.sampler.descriptorIndex += entryPointInfo.offset.sampler; + auto entryPointOffset = rootOffset; + entryPointOffset += entryPointInfo.offset; - SLANG_RETURN_ON_FAIL(entryPoint->bindAsConstantBuffer(encoder, bindingState, entryPointBindingIndex, entryPointInfo.layout)); + SLANG_RETURN_ON_FAIL(entryPoint->bindAsConstantBuffer(context, descriptorSet, entryPointOffset, entryPointInfo.layout)); } return SLANG_OK; @@ -3939,38 +4048,23 @@ Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitte RootShaderObjectLayoutImpl* rootLayoutImpl = static_cast<RootShaderObjectLayoutImpl*>(specializedRootLayout.Ptr()); - ShortList<DescriptorTable> descriptorTables; -// auto descSetInfo = rootLayoutImpl->getDescriptorSetInfo(); - auto heap = m_commandBuffer->m_transientHeap; - for (auto& descSet : rootLayoutImpl->m_gpuDescriptorSetInfos) - { - if (descSet.resourceDescriptorCount) - { - DescriptorTable table; - table.heap = &heap->m_viewHeap; - table.table = heap->m_viewHeap.allocate((int)descSet.resourceDescriptorCount); - descriptorTables.add(table); - } - if (descSet.samplerDescriptorCount) - { - DescriptorTable table; - table.heap = &heap->m_samplerHeap; - table.table = heap->m_samplerHeap.allocate((int)descSet.samplerDescriptorCount); - descriptorTables.add(table); - } - } - RootBindingState bindState = {}; - bindState.device = m_renderer; - bindState.transientHeap = m_transientHeap; - auto descTablesView = descriptorTables.getArrayView(); - bindState.descriptorTables = descTablesView.arrayView; - SLANG_RETURN_ON_FAIL(rootObjectImpl->bindRootObject(this, bindState, rootLayoutImpl)); + // We need to set up a context for binding shader objects to the pipeline state. + // This type mostly exists to bundle together a bunch of parameters that would + // otherwise need to be tunneled down through all the shader object binding + // logic. + // + BindingContext context = {}; + context.encoder = this; + context.submitter = submitter; + context.device = m_renderer; + context.transientHeap = m_transientHeap; + + // We kick off binding of shader objects at the root object, and the objects + // themselves will be responsible for allocating, binding, and filling in + // any descriptor tables or other root parameters needed. + // + SLANG_RETURN_ON_FAIL(rootObjectImpl->bindAsRoot(&context, rootLayoutImpl)); - for (Index i = 0; i < descriptorTables.getCount(); i++) - { - submitter->setRootDescriptorTable( - (int)i, descriptorTables[i].heap.getGpuHandle(descriptorTables[i].table)); - } return SLANG_OK; } |
