diff options
| -rw-r--r-- | tools/gfx/d3d12/render-d3d12.cpp | 792 |
1 files changed, 443 insertions, 349 deletions
diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index 67223088a..da169c03b 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -604,56 +604,94 @@ public: struct DescriptorTable { - DescriptorHeapReference heap; - uint32_t table; + DescriptorHeapReference m_heap; + uint32_t m_offset = 0; + uint32_t m_count = 0; + + SLANG_FORCE_INLINE uint32_t getDescriptorCount() const { return m_count; } /// Get the GPU handle at the specified index - SLANG_FORCE_INLINE D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(uint32_t index) const + SLANG_FORCE_INLINE D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(uint32_t index = 0) const { - return heap.getGpuHandle(table + index); + SLANG_ASSERT(index < getDescriptorCount()); + return m_heap.getGpuHandle(m_offset + index); } /// Get the CPU handle at the specified index - SLANG_FORCE_INLINE D3D12_CPU_DESCRIPTOR_HANDLE getCpuHandle(uint32_t index) const + SLANG_FORCE_INLINE D3D12_CPU_DESCRIPTOR_HANDLE getCpuHandle(uint32_t index = 0) const { - return heap.getCpuHandle(table + index); + SLANG_ASSERT(index < getDescriptorCount()); + return m_heap.getCpuHandle(m_offset + index); + } + + void freeIfSupported() + { + if(m_count) + { + m_heap.freeIfSupported(m_offset, m_count); + m_offset = 0; + m_count = 0; + } + } + + void allocate(uint32_t count) + { + m_offset = m_heap.allocate(count); + m_count = count; + } + + void allocate(DescriptorHeapReference heap, uint32_t count) + { + m_heap = heap; + m_offset = heap.allocate(count); + m_count = count; } }; - struct RootBindingState + /// Contextual data and operations required when binding shader objects to the pipeline state + struct BindingContext { - TransientResourceHeapImpl* transientHeap; - D3D12Device* device; - ArrayView<DescriptorTable> descriptorTables; + PipelineCommandEncoder* encoder; + Submitter* submitter; + TransientResourceHeapImpl* transientHeap; + D3D12Device* device; }; - struct BindingCounts + /// A representation of the offset at which to bind a shader parameter or sub-object + struct BindingOffset { + // Note: When we actually bind a shader object to the pipeline we do not care about + // HLSL-specific notions like `t` registers and `space`s. Those concepts are all + // mediated by the root signature. + // + // Instead, we need to consider the offsets at which the object will be bound + // into the actual D3D12 API state, which consists of the index of the current + // root parameter to bind from, as well as indices into the current descriptor + // tables (for resource views and samplers). + uint32_t rootParam = 0; uint32_t resource = 0; uint32_t sampler = 0; - BindingCounts() - {} - }; - - struct HeapBindingIndex - { - uint32_t descriptorTableIndex = 0; - uint32_t descriptorIndex = 0; + void operator+=(BindingOffset const& offset) + { + rootParam += offset.rootParam; + resource += offset.resource; + sampler += offset.sampler; + } }; - struct RootBindingIndex + /// A reprsentation of an allocated descriptor set, consisting of an option resource table and an optional sampler table + struct DescriptorSet { - uint32_t rootParamIndex = 0; - HeapBindingIndex resource; - HeapBindingIndex sampler; - }; + DescriptorTable resourceTable; + DescriptorTable samplerTable; - struct DescriptorSetInfo - { - uint32_t resourceDescriptorCount = 0; - uint32_t samplerDescriptorCount = 0; + void freeIfSupported() + { + resourceTable.freeIfSupported(); + samplerTable .freeIfSupported(); + } }; // Provides information on how binding ranges are stored in descriptor tables for @@ -684,7 +722,7 @@ public: }; /// Offset information for a sub-object range - struct SubObjectRangeOffset : BindingCounts + struct SubObjectRangeOffset : BindingOffset { SubObjectRangeOffset() {} @@ -702,7 +740,7 @@ public: }; /// Stride information for a sub-object range - struct SubObjectRangeStride + struct SubObjectRangeStride : BindingOffset { SubObjectRangeStride() {} @@ -751,43 +789,51 @@ public: slang::TypeLayoutReflection* m_elementTypeLayout; List<BindingRangeInfo> m_bindingRanges; List<SubObjectRangeInfo> m_subObjectRanges; -// DescriptorSetInfo m_descriptorSetInfo; -// uint32_t m_resourceSlotCount = 0; -// uint32_t m_samplerSlotCount = 0; - uint32_t m_subObjectSlotCount = 0; + /// The number of sub-objects (not just sub-object *ranges*) stored in instances of this layout + uint32_t m_subObjectCount = 0; - BindingCounts m_ownCounts; -// BindingCounts m_childCounts; - BindingCounts m_totalCounts; + /// Counters for the number of root parameters, resources, and samplers in this object itself + BindingOffset m_ownCounts; - uint32_t m_childRootParameterCount = 0; + /// Counters for the number of root parameters, resources, and sampler in this object and transitive sub-objects + BindingOffset m_totalCounts; - uint32_t m_ordinaryDataBufferCount = 0; + /// The number of root parameter consumed by (transitive) sub-objects + uint32_t m_childRootParameterCount = 0; + /// The total size in bytes of the ordinary data for this object and transitive sub-objects uint32_t m_totalOrdinaryDataSize = 0; - -// uint32_t m_totalResourceDescriptorCount = 0; -// uint32_t m_totalSamplerDescriptorCount = 0; - -// uint32_t m_ownRootParameterCount = 0; -// uint32_t m_childRootParameterCount = 0; -// uint32_t m_totalRootParameterCount = 0; - - void addBindingRangesOfType(slang::TypeLayoutReflection* typeLayout) + Result setElementTypeLayout(slang::TypeLayoutReflection* typeLayout) { - SlangInt bindingRangeCount = typeLayout->getBindingRangeCount(); + typeLayout = _unwrapParameterGroups(typeLayout); + m_elementTypeLayout = typeLayout; - // Reserve CBV slot for the implicit constant buffer if the type contains - // ordinary uniform data fields. - if (typeLayout->getSize(slang::ParameterCategory::Uniform) != 0) + // If the type contains any ordinary data, then we must reserve a buffer + // descriptor to hold it when binding as a parameter block. + // + m_totalOrdinaryDataSize = (uint32_t) typeLayout->getSize(); + if (m_totalOrdinaryDataSize != 0) { - m_ordinaryDataBufferCount++; m_ownCounts.resource++; -// m_descriptorSetInfo.resourceDescriptorCount = 1; } + // We will scan over the reflected Slang binding ranges and add them + // to our array. There are two main things we compute along the way: + // + // * For each binding range we compute a `flatIndex` that can be + // used to identify where the values for the given range begin + // in the flattened arrays (e.g., `m_objects`) and descriptor + // tables that hold the state of a shader object. + // + // * We also update the various counters taht keep track of the number + // of sub-objects, resources, samplers, etc. that are being + // consumed. These counters will contribute to figuring out + // the descriptor table(s) that might be needed to represent + // the object. + // + SlangInt bindingRangeCount = typeLayout->getBindingRangeCount(); for (SlangInt r = 0; r < bindingRangeCount; ++r) { slang::BindingType slangBindingType = typeLayout->getBindingRangeType(r); @@ -798,15 +844,13 @@ public: bindingRangeInfo.bindingType = slangBindingType; bindingRangeInfo.count = count; -// bindingRangeInfo.flatIndex = m_flatResourceCount; - switch (slangBindingType) { case slang::BindingType::ConstantBuffer: case slang::BindingType::ParameterBlock: case slang::BindingType::ExistentialValue: - bindingRangeInfo.flatIndex = m_subObjectSlotCount; - m_subObjectSlotCount += count; + bindingRangeInfo.flatIndex = m_subObjectCount; + m_subObjectCount += count; break; case slang::BindingType::Sampler: @@ -829,20 +873,13 @@ public: } m_bindingRanges.add(bindingRangeInfo); } - } - Result setElementTypeLayout(slang::TypeLayoutReflection* typeLayout) - { - typeLayout = _unwrapParameterGroups(typeLayout); - - m_elementTypeLayout = typeLayout; - - m_totalOrdinaryDataSize = (uint32_t) typeLayout->getSize(); - - // Compute the binding ranges that are used to store - // the logical contents of the object in memory. - - addBindingRangesOfType(typeLayout); + // At this point we've computed the number of resources/samplers that + // the type needs to represent its *own* state, and stored those counts + // in `m_ownCounts`. Next we need to consider any resources/samplers + // and root parameters needed to represent the state of the transitive + // sub-objects of this objet, so that we can compute the total size + // of the object when bound to the pipeline. m_totalCounts = m_ownCounts; @@ -884,16 +921,42 @@ public: SubObjectRangeInfo subObjectRange; subObjectRange.bindingRangeIndex = bindingRangeIndex; subObjectRange.layout = subObjectLayout; -// subObjectRange.bindingType = slangBindingType; + // The Slang reflection API stors offset information for sub-object ranges, + // and we care about *some* of that information: in particular, we need + // the offset of sub-objects in terms of uniform/ordinary data for the + // cases where we need to fill in "pending" data in our ordinary buffer. + // subObjectRange.offset = SubObjectRangeOffset(typeLayout->getSubObjectRangeOffset(r)); subObjectRange.stride = SubObjectRangeStride(slangLeafTypeLayout); + // The remaining offset information is computed based on the counters + // we are generating here, which depend only on the in-memory layout + // decisions being made in our implementation. Remember that the + // `register` and `space` values coming from DXBC/DXIL do *not* + // dictate the in-memory layout we use. + // + // Note: One subtle point here is that the `.rootParam` offset we are computing + // here does *not* include any root parameters that would be allocated + // for the parent object type itself (e.g., for descriptor tables + // used if it were bound as a parameter block). The later logic when + // we actually go to bind things will need to apply those offsets. + // + // Note: An even *more* subtle point is that the `.resource` offset + // being computed here *does* include the resource descriptor allocated + // for holding the ordinary data buffer, if any. The implications of + // this for later offset math is subtle. + // subObjectRange.offset.rootParam = m_childRootParameterCount; subObjectRange.offset.resource = m_totalCounts.resource; subObjectRange.offset.sampler = m_totalCounts.sampler; - BindingCounts objectCounts; + // Along with the offset information, we also need to compute the + // "stride" between consecutive sub-objects in the range. The actual + // size/stride of a single object depends on the type of range we + // are dealing with. + // + BindingOffset objectCounts; switch(slangBindingType) { default: @@ -908,20 +971,57 @@ public: // parent type, and we need to account for them // whenever we allocate storage. // - objectCounts.resource = subObjectLayout->getTotalResourceDescriptorCount(); objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); } break; + case slang::BindingType::ParameterBlock: + { + SLANG_ASSERT(subObjectLayout); + + // In contrast to a constant buffer, a parameter block can hide + // the resource and sampler descriptor allocation it uses (since they + // are allocated into the tables that make up the parameter block. + // + // The only resource usage that leaks into the surrounding context + // is the number of root parameters consumed. + // + objectCounts.rootParam = subObjectRange.layout->getTotalRootParameterCount(); + } + break; + case slang::BindingType::ExistentialValue: + // An unspecialized existential/interface value cannot consume any resources + // as part of the parent object (it needs to fit inside the fixed-size + // represnetation of existential types). + // + // However, if we are statically specializing to a type that doesn't "fit" + // we may need to account for additional information that needs to be + // allocaated. + // if(subObjectLayout) { + // The ordinary data for an existential-type value is allocated into + // the same buffer as the parent object, so we only want to consider + // the resource descriptors *other than* the ordinary data buffer. + // + // Otherwise the logic here is identical to the constant buffer case. + // objectCounts.resource = subObjectLayout->getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer(); objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); + // Note: In the implementation for some other graphics API (e.g., Vulkan) there + // needs to be more work done to handle the fact that "pending" data from + // interface-type sub-objects get allocated to a distinct offset after all the + // "primary" data. We are consciously ignoring that issue here, and the physical + // layout of a shader object into the D3D12 binding state may end up interleaving + // resources/samplers for "primary" and "pending" data. + // + // If this choice ever causes issues, we can revisit the approach here. + // An interface-type range that includes ordinary data can // increase the size of the ordinary data buffer we need to // allocate for the parent object. @@ -935,29 +1035,31 @@ public: } } break; - - case slang::BindingType::ParameterBlock: - { - SLANG_ASSERT(subObjectLayout); - - objectCounts.rootParam = subObjectRange.layout->getTotalRootParameterCount(); - } - break; } - auto rangeResourceCount = count * objectCounts.resource; - auto rangeSamplerCount = count * objectCounts.sampler; - auto rangeRootParamCount = count * objectCounts.rootParam; + // Once we've computed the usage for each object in the range, we can + // easily compute the rusage for the entire range. + // + auto rangeResourceCount = count * objectCounts.resource; + auto rangeSamplerCount = count * objectCounts.sampler; + auto rangeRootParamCount = count * objectCounts.rootParam; - m_totalCounts.resource += rangeResourceCount; - m_totalCounts.sampler += rangeSamplerCount; - m_childRootParameterCount += rangeRootParamCount; + m_totalCounts.resource += rangeResourceCount; + m_totalCounts.sampler += rangeSamplerCount; + m_childRootParameterCount += rangeRootParamCount; m_subObjectRanges.add(subObjectRange); } - if(m_totalCounts.resource) m_ownCounts.rootParam++; - if(m_totalCounts.sampler) m_ownCounts.rootParam++; + // Once we have added up the resource usage from all the sub-objects + // we can look at the total number of resources and samplers that + // need to be bound as part of this objects descriptor tables and + // that will allow us to decide whether we need to allocate a root + // parameter for a resource table or not, ans similarly for a + // sampler table. + // + if(m_totalCounts.resource) m_ownCounts.rootParam++; + if(m_totalCounts.sampler) m_ownCounts.rootParam++; m_totalCounts.rootParam = m_ownCounts.rootParam + m_childRootParameterCount; @@ -990,21 +1092,19 @@ public: BindingRangeInfo const& getBindingRange(Index index) { return m_bindingRanges[index]; } -// DescriptorSetInfo getDescriptorSetInfo() { return m_descriptorSetInfo; } - slang::TypeLayoutReflection* getElementTypeLayout() { return m_elementTypeLayout; } uint32_t getResourceSlotCount() { return m_ownCounts.resource; } uint32_t getSamplerSlotCount() { return m_ownCounts.sampler; } - Index getSubObjectSlotCount() { return m_subObjectSlotCount; } + Index getSubObjectSlotCount() { return m_subObjectCount; } uint32_t getTotalResourceDescriptorCount() { return m_totalCounts.resource; } uint32_t getTotalSamplerDescriptorCount() { return m_totalCounts.sampler; } - uint32_t getOrdinaryDataBufferCount() { return m_ordinaryDataBufferCount; } - bool hasOrdinaryDataBuffer() { return m_ordinaryDataBufferCount != 0; } + uint32_t getOrdinaryDataBufferCount() { return m_totalOrdinaryDataSize ? 1 : 0; } + bool hasOrdinaryDataBuffer() { return m_totalOrdinaryDataSize != 0; } - uint32_t getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer() { return m_totalCounts.resource - m_ordinaryDataBufferCount; } + uint32_t getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer() { return m_totalCounts.resource - getOrdinaryDataBufferCount(); } uint32_t getTotalRootParameterCount() { return m_totalCounts.rootParam; } uint32_t getChildRootParameterCount() { return m_childRootParameterCount; } @@ -1028,53 +1128,27 @@ public: initBase(renderer, builder->m_elementTypeLayout); -// m_descriptorSetInfo = builder->m_descriptorSetInfo; m_bindingRanges = _Move(builder->m_bindingRanges); m_subObjectRanges = builder->m_subObjectRanges; m_ownCounts = builder->m_ownCounts; m_totalCounts = builder->m_totalCounts; - m_subObjectSlotCount = builder->m_subObjectSlotCount; + m_subObjectCount = builder->m_subObjectCount; m_childRootParameterCount = builder->m_childRootParameterCount; - m_ordinaryDataBufferCount = builder->m_ordinaryDataBufferCount; m_totalOrdinaryDataSize = builder->m_totalOrdinaryDataSize; -#if 0 - m_resourceSlotCount = builder->m_resourceSlotCount; - m_samplerSlotCount = builder->m_samplerSlotCount; - m_subObjectSlotCount = builder->m_subObjectSlotCount; - - m_totalResourceDescriptorCount = builder->m_totalResourceDescriptorCount; - m_totalSamplerDescriptorCount = builder->m_totalSamplerDescriptorCount; - - m_childRootParameterCount = builder->m_childRootParameterCount; - m_totalRootParameterCount = builder->m_totalRootParameterCount; -#endif - return SLANG_OK; } List<BindingRangeInfo> m_bindingRanges; List<SubObjectRangeInfo> m_subObjectRanges; + BindingOffset m_ownCounts; + BindingOffset m_totalCounts; -// DescriptorSetInfo m_descriptorSetInfo; -// Index m_subObjectCount = 0; - - BindingCounts m_ownCounts; - BindingCounts m_totalCounts; - -// uint32_t m_resourceSlotCount; -// uint32_t m_samplerSlotCount; - uint32_t m_subObjectSlotCount; - -// uint32_t m_totalResourceDescriptorCount; -// uint32_t m_totalSamplerDescriptorCount; - + uint32_t m_subObjectCount = 0; uint32_t m_childRootParameterCount = 0; -// uint32_t m_totalRootParameterCount = 0; - uint32_t m_ordinaryDataBufferCount = 0; uint32_t m_totalOrdinaryDataSize = 0; }; @@ -1086,7 +1160,7 @@ public: struct EntryPointInfo { RefPtr<ShaderObjectLayoutImpl> layout; - BindingCounts offset; + BindingOffset offset; }; struct Builder : Super::Builder @@ -1759,16 +1833,15 @@ public: } #endif - D3D12_ROOT_SIGNATURE_DESC& build( - List<D3D12Device::DescriptorSetInfo>& outRootDescriptorSetInfos) + D3D12_ROOT_SIGNATURE_DESC& build() { for (Index i = 0; i < m_descriptorSets.getCount(); i++) { auto& descriptorSet = m_descriptorSets[i]; - D3D12Device::DescriptorSetInfo setInfo; - setInfo.resourceDescriptorCount = descriptorSet.m_resourceCount; - setInfo.samplerDescriptorCount = descriptorSet.m_samplerCount; - outRootDescriptorSetInfos.add(setInfo); +// D3D12Device::DescriptorSetInfo setInfo; +// setInfo.resourceDescriptorCount = descriptorSet.m_resourceCount; +// setInfo.samplerDescriptorCount = descriptorSet.m_samplerCount; +// outRootDescriptorSetInfos.add(setInfo); if (descriptorSet.m_resourceRanges.getCount()) { D3D12_ROOT_PARAMETER rootParam = {}; @@ -1812,8 +1885,7 @@ public: D3D12Device* device, RootShaderObjectLayoutImpl* rootLayout, slang::IComponentType* program, - ID3D12RootSignature** outRootSignature, - List<DescriptorSetInfo>& outRootDescriptorSetInfos) + ID3D12RootSignature** outRootSignature) { // We are going to build up the root signature by adding // binding/descritpor ranges and nested parameter blocks @@ -1857,7 +1929,7 @@ public: builder.addAsValue(entryPoint->getVarLayout(), rootDescriptorSetIndex); } - auto& rootSignatureDesc = builder.build(outRootDescriptorSetInfos); + auto& rootSignatureDesc = builder.build(); ComPtr<ID3DBlob> signature; ComPtr<ID3DBlob> error; @@ -1917,8 +1989,7 @@ public: device, layout, program, - layout->m_rootSignature.writeRef(), - layout->m_gpuDescriptorSetInfos)); + layout->m_rootSignature.writeRef())); } *outLayout = layout.detach(); @@ -1949,7 +2020,7 @@ public: public: ComPtr<ID3D12RootSignature> m_rootSignature; - List<DescriptorSetInfo> m_gpuDescriptorSetInfos; +// List<DescriptorSetInfo> m_gpuDescriptorSetInfos; }; class ShaderProgramImpl : public ShaderProgramBase @@ -1979,18 +2050,7 @@ public: ~ShaderObjectImpl() { - auto layoutImpl = getLayout(); - if(layoutImpl) - { - if(auto resourceCount = layoutImpl->getResourceSlotCount()) - { - m_resourceHeap.freeIfSupported(m_descriptorSet.m_resourceTable, resourceCount); - } - if(auto samplerCount = layoutImpl->getSamplerSlotCount()) - { - m_samplerHeap.freeIfSupported(m_descriptorSet.m_samplerTable, samplerCount); - } - } + m_descriptorSet.freeIfSupported(); } RendererBase* getDevice() { return m_device.get(); } @@ -2189,8 +2249,7 @@ public: ID3D12Device* d3dDevice = static_cast<D3D12Device*>(getDevice())->m_device; d3dDevice->CopyDescriptorsSimple( 1, - m_resourceHeap.getCpuHandle( - m_descriptorSet.m_resourceTable + + m_descriptorSet.resourceTable.getCpuHandle( bindingRange.flatIndex + (int32_t)offset.bindingArrayIndex), resourceViewImpl->m_descriptor.cpuHandle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); @@ -2210,8 +2269,7 @@ public: ID3D12Device* d3dDevice = static_cast<D3D12Device*>(getDevice())->m_device; d3dDevice->CopyDescriptorsSimple( 1, - m_samplerHeap.getCpuHandle( - m_descriptorSet.m_samplerTable + + m_descriptorSet.samplerTable.getCpuHandle( bindingRange.flatIndex + (int32_t)offset.bindingArrayIndex), samplerImpl->m_descriptor.cpuHandle, @@ -2354,12 +2412,9 @@ public: // but does *not* include any descriptors that are managed // as part of sub-objects. // - m_resourceHeap = viewHeap; - m_samplerHeap = samplerHeap; - if(auto resourceCount = layout->getResourceSlotCount()) { - m_descriptorSet.m_resourceTable = viewHeap.allocate(resourceCount); + m_descriptorSet.resourceTable.allocate(viewHeap, resourceCount); // We must also ensure that the memory for any resources // referenced by descriptors in this object does not get @@ -2367,10 +2422,9 @@ public: // m_boundResources.setCount(resourceCount); } - if(auto samplerCount = layout->getSamplerSlotCount()) { - m_descriptorSet.m_samplerTable = samplerHeap.allocate(samplerCount); + m_descriptorSet.samplerTable.allocate(samplerHeap, samplerCount); } @@ -2583,7 +2637,7 @@ public: // We always know that the ordinary data buffer will be the first descriptor // in the table of resource views. // - auto descriptorTable = m_descriptorSet.m_resourceTable; + auto descriptorTable = m_descriptorSet.resourceTable; D3D12_CONSTANT_BUFFER_VIEW_DESC viewDesc = {}; viewDesc.BufferLocation = static_cast<BufferResourceImpl*>(m_constantBufferWeakPtr) ->m_resource.getResource() @@ -2591,141 +2645,254 @@ public: m_constantBufferOffset; viewDesc.SizeInBytes = (UINT)alignedConstantBufferSize; encoder->m_device->CreateConstantBufferView( - &viewDesc, m_resourceHeap.getCpuHandle(descriptorTable)); + &viewDesc, descriptorTable.getCpuHandle()); } return SLANG_OK; } public: - RootBindingIndex prepareToBindAsParameterBlock(uint32_t inRootParameterIndex) + + /// Prepare to bind this object as a parameter block. + /// + /// This involves allocating and binding any descriptor tables necessary + /// to to store the state of the object. The function returns a descriptor + /// set formed from any table(s) allocated. In addition, the `ioOffset` + /// parameter will be adjusted to be correct for binding values into + /// the resulting descriptor set. + /// + DescriptorSet prepareToBindAsParameterBlock( + BindingContext* context, + BindingOffset& ioOffset, + ShaderObjectLayoutImpl* specializedLayout) { - ShaderObjectLayoutImpl* layout = getLayout(); + auto transientHeap = context->transientHeap; + auto submitter = context->submitter; - auto rootParameterIndex = inRootParameterIndex; + // When writing into the new descriptor set, resource and sampler + // descriptors will need to start at index zero in the respective + // tables. + // + ioOffset.resource = 0; + ioOffset.sampler = 0; - RootBindingIndex rootBindingIndex; + // The index of the next root parameter to bind will be maintained, + // but needs to be incremented by the number of descriptor tables + // we allocate (zero or one resource table and zero or one sampler + // table). + // + auto& rootParamIndex = ioOffset.rootParam; + DescriptorSet descriptorSet; - if(layout->getTotalResourceDescriptorCount()) + if(auto descriptorCount = specializedLayout->getTotalResourceDescriptorCount()) { - rootBindingIndex.resource.descriptorTableIndex = rootParameterIndex++; + // There is a non-zero number of resource descriptors needed, + // so we will allocate a table out of the appropriate heap, + // and store it into the appropriate part of `descriptorSet`. + // + auto descriptorHeap = &transientHeap->m_viewHeap; + auto& table = descriptorSet.resourceTable; + + // Allocate the table. + // + table.allocate(descriptorHeap, descriptorCount); + + // Bind the table to the pipeline, consuming the next available + // root parameter. + // + auto tableRootParamIndex = rootParamIndex++; + submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); } - if(layout->getTotalSamplerDescriptorCount()) + if(auto descriptorCount = specializedLayout->getTotalSamplerDescriptorCount()) { - rootBindingIndex.sampler.descriptorTableIndex = rootParameterIndex++; + // There is a non-zero number of sampler descriptors needed, + // so we will allocate a table out of the appropriate heap, + // and store it into the appropriate part of `descriptorSet`. + // + auto descriptorHeap = &transientHeap->m_samplerHeap; + auto& table = descriptorSet.samplerTable; + + // Allocate the table. + // + table.allocate(descriptorHeap, descriptorCount); + + // Bind the table to the pipeline, consuming the next available + // root parameter. + // + auto tableRootParamIndex = rootParamIndex++; + submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); } - rootBindingIndex.rootParamIndex = rootParameterIndex; - return rootBindingIndex; + return descriptorSet; } - Result bindAsParameterBlock(PipelineCommandEncoder* encoder, RootBindingState& bindingState, uint32_t rootParameterIndex, ShaderObjectLayoutImpl* layout) + /// Bind this object as a `ParameterBlock<X>` + Result bindAsParameterBlock( + BindingContext* context, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) { - auto rootBindingIndex = prepareToBindAsParameterBlock(rootParameterIndex); - SLANG_RETURN_ON_FAIL(bindAsConstantBuffer(encoder, bindingState, rootBindingIndex, layout)); + // The first step to binding an object as a parameter block is to allocate a descriptor + // set (consisting of zero or one resource descriptor table and zero or one sampler + // descriptor table) to represent its values. + // + BindingOffset subOffset = offset; + auto descriptorSet = prepareToBindAsParameterBlock(context, /* inout */ subOffset, specializedLayout); + + // Next we bind the object into that descriptor set as if it were being used + // as a `ConstantBuffer<X>`. + // + SLANG_RETURN_ON_FAIL(bindAsConstantBuffer(context, descriptorSet, subOffset, specializedLayout)); return SLANG_OK; } + /// Bind this object as a `ConstantBuffer<X>` Result bindAsConstantBuffer( - PipelineCommandEncoder* encoder, - RootBindingState& bindingState, - RootBindingIndex const& rootBindingIndex, - ShaderObjectLayoutImpl* specializedLayout) + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) { - SLANG_RETURN_ON_FAIL(_ensureOrdinaryDataBufferCreatedIfNeeded(encoder, specializedLayout)); - SLANG_RETURN_ON_FAIL(_bindImpl(encoder, bindingState, rootBindingIndex, specializedLayout, 0)); + // If we are to bind as a constant buffer we first need to ensure that + // the ordinary data buffer is created, if this object needs one. + // + SLANG_RETURN_ON_FAIL(_ensureOrdinaryDataBufferCreatedIfNeeded(context->encoder, specializedLayout)); + + // Next, we need to bind all of the resource descriptors for this object + // (including any ordinary data buffer) into the provided `descriptorSet`. + // + auto resourceCount = specializedLayout->getResourceSlotCount(); + if(resourceCount) + { + auto& dstTable = descriptorSet.resourceTable; + auto& srcTable = m_descriptorSet.resourceTable; + + context->device->m_device->CopyDescriptorsSimple( + UINT(resourceCount), + dstTable.getCpuHandle(offset.resource), + srcTable.getCpuHandle(), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, + // since the logic is shared with the `bindAsValue()` case below. + // + SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, offset, specializedLayout)); return SLANG_OK; } + /// Bind this object as a value (for an interface-type parameter) Result bindAsValue( - PipelineCommandEncoder* encoder, - RootBindingState& bindingState, - RootBindingIndex const& rootBindingIndex, - ShaderObjectLayoutImpl* layout) + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) { - SLANG_RETURN_ON_FAIL(_bindImpl(encoder, bindingState, rootBindingIndex, layout, layout->getOrdinaryDataBufferCount())); - return SLANG_OK; - } - - Result _bindImpl( - PipelineCommandEncoder* encoder, - RootBindingState& bindingState, - RootBindingIndex const& rootBindingIndex, - ShaderObjectLayoutImpl* layout, - uint32_t skipResourceCount) - { - auto& descSet = m_descriptorSet; - if(auto resourceCount = (layout->getResourceSlotCount() - skipResourceCount)) + // When binding a value for an interface-type field we do *not* want + // to bind a buffer for the ordinary data (if there is any) because + // ordinary data for interface-type fields gets allocated into the + // parent object's ordinary data buffer. + // + // This CPU-memory descriptor table that holds resource descriptors + // will have already been allocated to have space for an ordinary data + // buffer (if needed), so we need to take care to skip over that + // descriptor when copying descriptors from the CPU-memory set + // to the GPU-memory `descriptorSet`. + // + auto skipResourceCount = specializedLayout->getOrdinaryDataBufferCount(); + auto resourceCount = specializedLayout->getResourceSlotCount() - skipResourceCount; + if(resourceCount) { - auto dstIndex = rootBindingIndex.resource; - auto& dstTable = bindingState.descriptorTables[dstIndex.descriptorTableIndex]; - auto& cpuHeap = m_resourceHeap; - auto cpuDescriptorTable = descSet.m_resourceTable; + auto& dstTable = descriptorSet.resourceTable; + auto& srcTable = m_descriptorSet.resourceTable; - bindingState.device->m_device->CopyDescriptorsSimple( + context->device->m_device->CopyDescriptorsSimple( UINT(resourceCount), - dstTable.getCpuHandle(dstIndex.descriptorIndex), - cpuHeap.getCpuHandle(cpuDescriptorTable + skipResourceCount), + dstTable.getCpuHandle(offset.resource), + srcTable.getCpuHandle(skipResourceCount), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } - if (auto samplerCount = layout->getSamplerSlotCount()) + + // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, + // since the logic is shared with the `bindAsConstantBuffer()` case above. + // + // Note: Just like we had to do some subtle handling of the ordinary data buffer + // above, here we need to contend with the fact that the `offset.resource` fields + // computed for sub-object ranges were baked to take the ordinary data buffer + // into account, so that if `skipResourceCount` is non-zero then they are all + // too high by `skipResourceCount`. + // + // We will address the problem here by computing a modified offset that adjusts + // for the ordinary data buffer that we have not bound after all. + // + BindingOffset subOffset = offset; + subOffset.resource -= skipResourceCount; + SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, subOffset, specializedLayout)); + return SLANG_OK; + } + + /// Shared logic for `bindAsConstantBuffer()` and `bindAsValue()` + Result _bindImpl( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) + { + // We start by binding all the sampler decriptors, if needed. + // + // Note: resource descriptors were handled in either `bindAsConstantBuffer()` + // or `bindAsValue()` before calling into `_bindImpl()`. + // + if (auto samplerCount = specializedLayout->getSamplerSlotCount()) { - auto dstIndex = rootBindingIndex.sampler; - auto& dstTable = bindingState.descriptorTables[dstIndex.descriptorTableIndex]; - auto& cpuHeap = m_samplerHeap; - auto cpuDescriptorTable = (int)descSet.m_samplerTable; + auto& dstTable = descriptorSet.samplerTable; + auto& srcTable = m_descriptorSet.samplerTable; - bindingState.device->m_device->CopyDescriptorsSimple( + context->device->m_device->CopyDescriptorsSimple( UINT(samplerCount), - dstTable.getCpuHandle(dstIndex.descriptorIndex), - cpuHeap.getCpuHandle(cpuDescriptorTable), + dstTable.getCpuHandle(offset.sampler), + srcTable.getCpuHandle(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); } - auto& subObjectRanges = layout->getSubObjectRanges(); + // Next we iterate over the sub-object ranges and bind anything they require. + // + auto& subObjectRanges = specializedLayout->getSubObjectRanges(); auto subObjectRangeCount = subObjectRanges.getCount(); for (Index i = 0; i < subObjectRangeCount; i++) { - auto subObjectRange = layout->getSubObjectRange(i); - auto bindingRange = layout->getBindingRange(subObjectRange.bindingRangeIndex); + auto& subObjectRange = specializedLayout->getSubObjectRange(i); + auto& bindingRange = specializedLayout->getBindingRange(subObjectRange.bindingRangeIndex); + auto baseIndex = bindingRange.flatIndex; auto subObjectLayout = subObjectRange.layout.Ptr(); + + BindingOffset rangeOffset = offset; + rangeOffset += subObjectRange.offset; + + BindingOffset rangeStride = subObjectRange.stride; + switch(bindingRange.bindingType) { case slang::BindingType::ConstantBuffer: { - auto baseIndex = bindingRange.flatIndex; + auto objOffset = rangeOffset; for (uint32_t j = 0; j < bindingRange.count; j++) { auto& object = m_objects[baseIndex + j]; - - RootBindingIndex subIndex = rootBindingIndex; - subIndex.rootParamIndex += subObjectRange.offset.rootParam; - subIndex.resource.descriptorIndex += subObjectRange.offset.resource; - subIndex.sampler.descriptorTableIndex += subObjectRange.offset.sampler; - - object->bindAsConstantBuffer( - encoder, - bindingState, - subIndex, - subObjectLayout); + object->bindAsConstantBuffer(context, descriptorSet, objOffset, subObjectLayout); + objOffset += rangeStride; } } break; case slang::BindingType::ParameterBlock: { - auto baseIndex = bindingRange.flatIndex; + auto objOffset = rangeOffset; for (uint32_t j = 0; j < bindingRange.count; j++) { auto& object = m_objects[baseIndex + j]; - - auto subRootParamIndex = rootBindingIndex.rootParamIndex + subObjectRange.offset.rootParam; - - object->bindAsParameterBlock( - encoder, - bindingState, - subRootParamIndex, - subObjectLayout); + object->bindAsParameterBlock(context, objOffset, subObjectLayout); + objOffset += rangeStride; } } break; @@ -2733,94 +2900,27 @@ public: case slang::BindingType::ExistentialValue: if(subObjectLayout) { - auto baseIndex = bindingRange.flatIndex; + auto objOffset = rangeOffset; for (uint32_t j = 0; j < bindingRange.count; j++) { auto& object = m_objects[baseIndex + j]; - - RootBindingIndex subIndex = rootBindingIndex; - subIndex.rootParamIndex += subObjectRange.offset.rootParam; - subIndex.resource.descriptorIndex += subObjectRange.offset.resource; - subIndex.sampler.descriptorTableIndex += subObjectRange.offset.sampler; - - object->bindAsValue( - encoder, - bindingState, - subIndex, - subObjectLayout); + object->bindAsValue(context, descriptorSet, objOffset, subObjectLayout); + objOffset += rangeStride; } } break; } } -// SLANG_RETURN_ON_FAIL(bindChildRootParameters(encoder, bindingState, rootParameterIndex)); - return SLANG_OK; } -#if 0 - Result bindChildRootParameters(PipelineCommandEncoder* encoder, RootBindingState& bindingState, SlangInt rootParameterIndex) - { - ShaderObjectLayoutImpl* layout = getLayout(); - auto& subObjectRanges = layout->getSubObjectRanges(); - auto subObjectRangeCount = subObjectRanges.getCount(); - for (Index i = 0; i < subObjectRangeCount; i++) - { - auto subObjectRange = layout->getSubObjectRange(i); - auto bindingRange = layout->getBindingRange(subObjectRange.bindingRangeIndex); - switch(bindingRange.bindingType) - { - case slang::BindingType::ParameterBlock: - { - auto baseIndex = bindingRange.flatIndex; - auto subRootParameterIndex = rootParameterIndex + subObjectRange.childRootParameterOffset; - for (uint32_t j = 0; j < bindingRange.count; j++) - { - auto& object = m_objects[baseIndex + j]; - - object->bindAsParameterBlock(encoder, bindingState, subRootParameterIndex); - subRootParameterIndex += subObjectRange.layout->getTotalRootParameterCount(); - } - } - break; - case slang::BindingType::ConstantBuffer: - { - // TODO: - } - break; - case slang::BindingType::ExistentialValue: - // If the existential object contains only ordinary data fields, - // the data is already written into m_ordinaryDataBuffer during `setObject`, - // so we don't need to do anything here. - // If the existential object has resource fields, this is the time to set - // those fields as in the "pendingLayout" section. - // TODO: implement resource fields binding for inline existential values. - default: - break; - } - } - - return SLANG_OK; - } -#endif - /// Any "ordinary" / uniform data for this object List<char> m_ordinaryData; List<RefPtr<ShaderObjectImpl>> m_objects; - // The resource and sampler heaps used to allocate the descriptor tables. - DescriptorHeapReference m_resourceHeap; - DescriptorHeapReference m_samplerHeap; - - struct DescriptorSet - { - int32_t m_resourceTable = 0; - int32_t m_samplerTable = 0; -// uint32_t m_resourceCount = 0; -// uint32_t m_samplerCount = 0; - }; + /// A CPU-memory descriptor set holding any descriptors used to represent the resources/samplers in this object's state DescriptorSet m_descriptorSet; ShortList<RefPtr<Resource>, 8> m_boundResources; @@ -2913,24 +3013,33 @@ public: } public: - Result bindRootObject(PipelineCommandEncoder* encoder, RootBindingState& bindingState, RootShaderObjectLayoutImpl* layout) + Result bindAsRoot( + BindingContext* context, + RootShaderObjectLayoutImpl* specializedLayout) { - auto rootBindingIndex = prepareToBindAsParameterBlock(0); + // A root shader object always binds as if it were a parameter block, + // insofar as it needs to allocate a descriptor set to hold the bindings + // for its own state and any sub-objects. + // + // Note: We do not direclty use `bindAsParameterBlock` here because we also + // need to bind the entry points into the same descriptor set that is + // being used for the root object. + // + BindingOffset rootOffset; + auto descriptorSet = prepareToBindAsParameterBlock(context, /* inout */ rootOffset, specializedLayout); - SLANG_RETURN_ON_FAIL(Super::bindAsConstantBuffer(encoder, bindingState, rootBindingIndex, layout)); + SLANG_RETURN_ON_FAIL(Super::bindAsConstantBuffer(context, descriptorSet, rootOffset, specializedLayout)); auto entryPointCount = m_entryPoints.getCount(); for (Index i = 0; i < entryPointCount; ++i) { auto entryPoint = m_entryPoints[i]; - auto& entryPointInfo = layout->getEntryPoint(i); + auto& entryPointInfo = specializedLayout->getEntryPoint(i); - auto entryPointBindingIndex = rootBindingIndex; - entryPointBindingIndex.rootParamIndex += entryPointInfo.offset.rootParam; - entryPointBindingIndex.resource.descriptorIndex += entryPointInfo.offset.resource; - entryPointBindingIndex.sampler.descriptorIndex += entryPointInfo.offset.sampler; + auto entryPointOffset = rootOffset; + entryPointOffset += entryPointInfo.offset; - SLANG_RETURN_ON_FAIL(entryPoint->bindAsConstantBuffer(encoder, bindingState, entryPointBindingIndex, entryPointInfo.layout)); + SLANG_RETURN_ON_FAIL(entryPoint->bindAsConstantBuffer(context, descriptorSet, entryPointOffset, entryPointInfo.layout)); } return SLANG_OK; @@ -3939,38 +4048,23 @@ Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitte RootShaderObjectLayoutImpl* rootLayoutImpl = static_cast<RootShaderObjectLayoutImpl*>(specializedRootLayout.Ptr()); - ShortList<DescriptorTable> descriptorTables; -// auto descSetInfo = rootLayoutImpl->getDescriptorSetInfo(); - auto heap = m_commandBuffer->m_transientHeap; - for (auto& descSet : rootLayoutImpl->m_gpuDescriptorSetInfos) - { - if (descSet.resourceDescriptorCount) - { - DescriptorTable table; - table.heap = &heap->m_viewHeap; - table.table = heap->m_viewHeap.allocate((int)descSet.resourceDescriptorCount); - descriptorTables.add(table); - } - if (descSet.samplerDescriptorCount) - { - DescriptorTable table; - table.heap = &heap->m_samplerHeap; - table.table = heap->m_samplerHeap.allocate((int)descSet.samplerDescriptorCount); - descriptorTables.add(table); - } - } - RootBindingState bindState = {}; - bindState.device = m_renderer; - bindState.transientHeap = m_transientHeap; - auto descTablesView = descriptorTables.getArrayView(); - bindState.descriptorTables = descTablesView.arrayView; - SLANG_RETURN_ON_FAIL(rootObjectImpl->bindRootObject(this, bindState, rootLayoutImpl)); + // We need to set up a context for binding shader objects to the pipeline state. + // This type mostly exists to bundle together a bunch of parameters that would + // otherwise need to be tunneled down through all the shader object binding + // logic. + // + BindingContext context = {}; + context.encoder = this; + context.submitter = submitter; + context.device = m_renderer; + context.transientHeap = m_transientHeap; + + // We kick off binding of shader objects at the root object, and the objects + // themselves will be responsible for allocating, binding, and filling in + // any descriptor tables or other root parameters needed. + // + SLANG_RETURN_ON_FAIL(rootObjectImpl->bindAsRoot(&context, rootLayoutImpl)); - for (Index i = 0; i < descriptorTables.getCount(); i++) - { - submitter->setRootDescriptorTable( - (int)i, descriptorTables[i].heap.getGpuHandle(descriptorTables[i].table)); - } return SLANG_OK; } |
