diff options
| author | lucy96chen <47800040+lucy96chen@users.noreply.github.com> | 2022-05-17 10:56:14 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-05-17 10:56:14 -0700 |
| commit | 5a3aa6159e0ef0241b528812e1d138f0d7055f22 (patch) | |
| tree | 71d286e06030ee73f0b739e071cd58dd05d507d1 /tools/gfx/d3d12/d3d12-shader-object.cpp | |
| parent | 716e75b9ed1acfaee3dc7f3bc347ad17fca65e05 (diff) | |
Split render-d3d12.h/cpp into a set of smaller files (#2231)
* Split render-d3d12 into numerous smaller files to make the code easier to parse
* Added all new D3D12 files created from splitting render-d3d12
* Fixed several uses of attachment still floating around; Changed resource-d3d12 and descriptor-heap-d3d12 to match naming conventions of new d3d12 implementation header files
* Readded files with name changes because changing them from inside VS apparently results in them being treated as new files
* Merged in externals changes from master
* Small cleanup changes
* Rerun CI
Co-authored-by: Theresa Foley <10618364+tangent-vector@users.noreply.github.com>
Diffstat (limited to 'tools/gfx/d3d12/d3d12-shader-object.cpp')
| -rw-r--r-- | tools/gfx/d3d12/d3d12-shader-object.cpp | 1179 |
1 files changed, 1179 insertions, 0 deletions
diff --git a/tools/gfx/d3d12/d3d12-shader-object.cpp b/tools/gfx/d3d12/d3d12-shader-object.cpp new file mode 100644 index 000000000..e5875f55c --- /dev/null +++ b/tools/gfx/d3d12/d3d12-shader-object.cpp @@ -0,0 +1,1179 @@ +// d3d12-shader-object.cpp +#include "d3d12-shader-object.h" + +#include "d3d12-buffer.h" +#include "d3d12-command-encoder.h" +#include "d3d12-device.h" +#include "d3d12-resource-views.h" +#include "d3d12-sampler.h" +#include "d3d12-shader-object-layout.h" +#include "d3d12-transient-heap.h" + +#include "d3d12-helper-functions.h" + +namespace gfx +{ +namespace d3d12 +{ + +using namespace Slang; + +GfxCount ShaderObjectImpl::getEntryPointCount() { return 0; } + +Result ShaderObjectImpl::getEntryPoint(GfxIndex index, IShaderObject** outEntryPoint) +{ + *outEntryPoint = nullptr; + return SLANG_OK; +} + +const void* ShaderObjectImpl::getRawData() { return m_data.getBuffer(); } + +Size ShaderObjectImpl::getSize() { return (Size)m_data.getCount(); } + +// TODO: Change Index to Offset/Size? +Result ShaderObjectImpl::setData(ShaderOffset const& inOffset, void const* data, size_t inSize) +{ + Index offset = inOffset.uniformOffset; + Index size = inSize; + + char* dest = m_data.getBuffer(); + Index availableSize = m_data.getCount(); + + // TODO: We really should bounds-check access rather than silently ignoring sets + // that are too large, but we have several test cases that set more data than + // an object actually stores on several targets... + // + if (offset < 0) + { + size += offset; + offset = 0; + } + if ((offset + size) >= availableSize) + { + size = availableSize - offset; + } + + memcpy(dest + offset, data, size); + + m_isConstantBufferDirty = true; + + m_version++; + + return SLANG_OK; +} + +Result ShaderObjectImpl::setObject(ShaderOffset const& offset, IShaderObject* object) +{ + SLANG_RETURN_ON_FAIL(Super::setObject(offset, object)); + if (m_isMutable) + { + auto subObjectIndex = getSubObjectIndex(offset); + if (subObjectIndex >= m_subObjectVersions.getCount()) + m_subObjectVersions.setCount(subObjectIndex + 1); + m_subObjectVersions[subObjectIndex] = static_cast<ShaderObjectImpl*>(object)->m_version; + m_version++; + } + return SLANG_OK; +} + +Result ShaderObjectImpl::setSampler(ShaderOffset const& offset, ISamplerState* sampler) +{ + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + auto samplerImpl = static_cast<SamplerStateImpl*>(sampler); + ID3D12Device* d3dDevice = static_cast<DeviceImpl*>(getDevice())->m_device; + d3dDevice->CopyDescriptorsSimple( + 1, + m_descriptorSet.samplerTable.getCpuHandle( + bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex), + samplerImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + m_version++; + return SLANG_OK; +} + +Result ShaderObjectImpl::setCombinedTextureSampler( + ShaderOffset const& offset, IResourceView* textureView, ISamplerState* sampler) +{ +#if 0 + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + auto resourceViewImpl = static_cast<ResourceViewImpl*>(textureView); + ID3D12Device* d3dDevice = static_cast<DeviceImpl*>(getDevice())->m_device; + d3dDevice->CopyDescriptorsSimple( + 1, + m_resourceHeap.getCpuHandle( + m_descriptorSet.m_resourceTable + + bindingRange.binding.offsetInDescriptorTable.resource + + (int32_t)offset.bindingArrayIndex), + resourceViewImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + auto samplerImpl = static_cast<SamplerStateImpl*>(sampler); + d3dDevice->CopyDescriptorsSimple( + 1, + m_samplerHeap.getCpuHandle( + m_descriptorSet.m_samplerTable + + bindingRange.binding.offsetInDescriptorTable.sampler + + (int32_t)offset.bindingArrayIndex), + samplerImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); +#endif + m_version++; + return SLANG_OK; +} + +Result ShaderObjectImpl::init( + DeviceImpl* device, + ShaderObjectLayoutImpl* layout, + DescriptorHeapReference viewHeap, + DescriptorHeapReference samplerHeap) +{ + m_device = device; + + m_layout = layout; + + m_cachedTransientHeap = nullptr; + m_cachedTransientHeapVersion = 0; + m_isConstantBufferDirty = true; + + // If the layout tells us that there is any uniform data, + // then we will allocate a CPU memory buffer to hold that data + // while it is being set from the host. + // + // Once the user is done setting the parameters/fields of this + // shader object, we will produce a GPU-memory version of the + // uniform data (which includes values from this object and + // any existential-type sub-objects). + // + size_t uniformSize = layout->getElementTypeLayout()->getSize(); + if (uniformSize) + { + m_data.setCount(uniformSize); + memset(m_data.getBuffer(), 0, uniformSize); + } + m_rootArguments.setCount(layout->getOwnUserRootParameterCount()); + memset( + m_rootArguments.getBuffer(), + 0, + sizeof(D3D12_GPU_VIRTUAL_ADDRESS) * m_rootArguments.getCount()); + // Each shader object will own CPU descriptor heap memory + // for any resource or sampler descriptors it might store + // as part of its value. + // + // This allocate includes a reservation for any constant + // buffer descriptor pertaining to the ordinary data, + // but does *not* include any descriptors that are managed + // as part of sub-objects. + // + if (auto resourceCount = layout->getResourceSlotCount()) + { + m_descriptorSet.resourceTable.allocate(viewHeap, resourceCount); + + // We must also ensure that the memory for any resources + // referenced by descriptors in this object does not get + // freed while the object is still live. + // + m_boundResources.setCount(resourceCount); + } + if (auto samplerCount = layout->getSamplerSlotCount()) + { + m_descriptorSet.samplerTable.allocate(samplerHeap, samplerCount); + } + + // If the layout specifies that we have any sub-objects, then + // we need to size the array to account for them. + // + Index subObjectCount = layout->getSubObjectSlotCount(); + m_objects.setCount(subObjectCount); + + for (auto subObjectRangeInfo : layout->getSubObjectRanges()) + { + auto subObjectLayout = subObjectRangeInfo.layout; + + // In the case where the sub-object range represents an + // existential-type leaf field (e.g., an `IBar`), we + // cannot pre-allocate the object(s) to go into that + // range, since we can't possibly know what to allocate + // at this point. + // + if (!subObjectLayout) + continue; + // + // Otherwise, we will allocate a sub-object to fill + // in each entry in this range, based on the layout + // information we already have. + + auto& bindingRangeInfo = layout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); + for (uint32_t i = 0; i < bindingRangeInfo.count; ++i) + { + RefPtr<ShaderObjectImpl> subObject; + SLANG_RETURN_ON_FAIL( + ShaderObjectImpl::create(device, subObjectLayout, subObject.writeRef())); + m_objects[bindingRangeInfo.subObjectIndex + i] = subObject; + } + } + + return SLANG_OK; +} + +/// Write the uniform/ordinary data of this object into the given `dest` buffer at the given +/// `offset` + +Result ShaderObjectImpl::_writeOrdinaryData( + PipelineCommandEncoder* encoder, + BufferResourceImpl* buffer, + Offset offset, + Size destSize, + ShaderObjectLayoutImpl* specializedLayout) +{ + auto src = m_data.getBuffer(); + auto srcSize = Size(m_data.getCount()); + + SLANG_ASSERT(srcSize <= destSize); + + uploadBufferDataImpl( + encoder->m_device, + encoder->m_d3dCmdList, + encoder->m_transientHeap, + buffer, + offset, + srcSize, + src); + + // In the case where this object has any sub-objects of + // existential/interface type, we need to recurse on those objects + // that need to write their state into an appropriate "pending" allocation. + // + // Note: Any values that could fit into the "payload" included + // in the existential-type field itself will have already been + // written as part of `setObject()`. This loop only needs to handle + // those sub-objects that do not "fit." + // + // An implementers looking at this code might wonder if things could be changed + // so that *all* writes related to sub-objects for interface-type fields could + // be handled in this one location, rather than having some in `setObject()` and + // others handled here. + // + Index subObjectRangeCounter = 0; + for (auto const& subObjectRangeInfo : specializedLayout->getSubObjectRanges()) + { + Index subObjectRangeIndex = subObjectRangeCounter++; + auto const& bindingRangeInfo = + specializedLayout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); + + // We only need to handle sub-object ranges for interface/existential-type fields, + // because fields of constant-buffer or parameter-block type are responsible for + // the ordinary/uniform data of their own existential/interface-type sub-objects. + // + if (bindingRangeInfo.bindingType != slang::BindingType::ExistentialValue) + continue; + + // Each sub-object range represents a single "leaf" field, but might be nested + // under zero or more outer arrays, such that the number of existential values + // in the same range can be one or more. + // + auto count = bindingRangeInfo.count; + + // We are not concerned with the case where the existential value(s) in the range + // git into the payload part of the leaf field. + // + // In the case where the value didn't fit, the Slang layout strategy would have + // considered the requirements of the value as a "pending" allocation, and would + // allocate storage for the ordinary/uniform part of that pending allocation inside + // of the parent object's type layout. + // + // Here we assume that the Slang reflection API can provide us with a single byte + // offset and stride for the location of the pending data allocation in the + // specialized type layout, which will store the values for this sub-object range. + // + // TODO: The reflection API functions we are assuming here haven't been implemented + // yet, so the functions being called here are stubs. + // + // TODO: It might not be that a single sub-object range can reliably map to a single + // contiguous array with a single stride; we need to carefully consider what the + // layout logic does for complex cases with multiple layers of nested arrays and + // structures. + // + Offset subObjectRangePendingDataOffset = subObjectRangeInfo.offset.pendingOrdinaryData; + Size subObjectRangePendingDataStride = subObjectRangeInfo.stride.pendingOrdinaryData; + + // If the range doesn't actually need/use the "pending" allocation at all, then + // we need to detect that case and skip such ranges. + // + // TODO: This should probably be handled on a per-object basis by caching a "does it + // fit?" bit as part of the information for bound sub-objects, given that we already + // compute the "does it fit?" status as part of `setObject()`. + // + if (subObjectRangePendingDataOffset == 0) + continue; + + for (uint32_t i = 0; i < count; ++i) + { + auto subObject = m_objects[bindingRangeInfo.subObjectIndex + i]; + + RefPtr<ShaderObjectLayoutImpl> subObjectLayout; + SLANG_RETURN_ON_FAIL(subObject->getSpecializedLayout(subObjectLayout.writeRef())); + + auto subObjectOffset = + subObjectRangePendingDataOffset + i * subObjectRangePendingDataStride; + + subObject->_writeOrdinaryData( + encoder, + buffer, + offset + subObjectOffset, + destSize - subObjectOffset, + subObjectLayout); + } + } + + return SLANG_OK; +} + +bool ShaderObjectImpl::shouldAllocateConstantBuffer(TransientResourceHeapImpl* transientHeap) +{ + if (m_isConstantBufferDirty || m_cachedTransientHeap != transientHeap || + m_cachedTransientHeapVersion != transientHeap->getVersion()) + { + return true; + } + return false; +} + +/// Ensure that the `m_ordinaryDataBuffer` has been created, if it is needed + +Result ShaderObjectImpl::_ensureOrdinaryDataBufferCreatedIfNeeded( + PipelineCommandEncoder* encoder, ShaderObjectLayoutImpl* specializedLayout) +{ + // If data has been changed since last allocation/filling of constant buffer, + // we will need to allocate a new one. + // + if (!shouldAllocateConstantBuffer(encoder->m_transientHeap)) + { + return SLANG_OK; + } + m_isConstantBufferDirty = false; + m_cachedTransientHeap = encoder->m_transientHeap; + m_cachedTransientHeapVersion = encoder->m_transientHeap->getVersion(); + + // Computing the size of the ordinary data buffer is *not* just as simple + // as using the size of the `m_ordinayData` array that we store. The reason + // for the added complexity is that interface-type fields may lead to the + // storage being specialized such that it needs extra appended data to + // store the concrete values that logically belong in those interface-type + // fields but wouldn't fit in the fixed-size allocation we gave them. + // + m_constantBufferSize = specializedLayout->getTotalOrdinaryDataSize(); + if (m_constantBufferSize == 0) + { + return SLANG_OK; + } + + // Once we have computed how large the buffer should be, we can allocate + // it from the transient resource heap. + // + auto alignedConstantBufferSize = D3DUtil::calcAligned(m_constantBufferSize, 256); + SLANG_RETURN_ON_FAIL(encoder->m_commandBuffer->m_transientHeap->allocateConstantBuffer( + alignedConstantBufferSize, m_constantBufferWeakPtr, m_constantBufferOffset)); + + // Once the buffer is allocated, we can use `_writeOrdinaryData` to fill it in. + // + // Note that `_writeOrdinaryData` is potentially recursive in the case + // where this object contains interface/existential-type fields, so we + // don't need or want to inline it into this call site. + // + SLANG_RETURN_ON_FAIL(_writeOrdinaryData( + encoder, + static_cast<BufferResourceImpl*>(m_constantBufferWeakPtr), + m_constantBufferOffset, + m_constantBufferSize, + specializedLayout)); + + { + // We also create and store a descriptor for our root constant buffer + // into the descriptor table allocation that was reserved for them. + // + // We always know that the ordinary data buffer will be the first descriptor + // in the table of resource views. + // + auto descriptorTable = m_descriptorSet.resourceTable; + D3D12_CONSTANT_BUFFER_VIEW_DESC viewDesc = {}; + viewDesc.BufferLocation = static_cast<BufferResourceImpl*>(m_constantBufferWeakPtr) + ->m_resource.getResource() + ->GetGPUVirtualAddress() + + m_constantBufferOffset; + viewDesc.SizeInBytes = (UINT)alignedConstantBufferSize; + encoder->m_device->CreateConstantBufferView(&viewDesc, descriptorTable.getCpuHandle()); + } + + return SLANG_OK; +} + +void ShaderObjectImpl::updateSubObjectsRecursive() +{ + if (!m_isMutable) + return; + auto& subObjectRanges = getLayout()->getSubObjectRanges(); + for (Slang::Index subObjectRangeIndex = 0; subObjectRangeIndex < subObjectRanges.getCount(); + subObjectRangeIndex++) + { + auto const& subObjectRange = subObjectRanges[subObjectRangeIndex]; + auto const& bindingRange = getLayout()->getBindingRange(subObjectRange.bindingRangeIndex); + Slang::Index count = bindingRange.count; + + for (Slang::Index subObjectIndexInRange = 0; subObjectIndexInRange < count; + subObjectIndexInRange++) + { + Slang::Index objectIndex = bindingRange.subObjectIndex + subObjectIndexInRange; + auto subObject = m_objects[objectIndex].Ptr(); + if (!subObject) + continue; + subObject->updateSubObjectsRecursive(); + if (m_subObjectVersions[objectIndex] != m_objects[objectIndex]->m_version) + { + ShaderOffset offset; + offset.bindingRangeIndex = (GfxIndex)subObjectRange.bindingRangeIndex; + offset.bindingArrayIndex = (GfxIndex)subObjectIndexInRange; + setObject(offset, subObject); + } + } + } +} + +/// Prepare to bind this object as a parameter block. +/// +/// This involves allocating and binding any descriptor tables necessary +/// to to store the state of the object. The function returns a descriptor +/// set formed from any table(s) allocated. In addition, the `ioOffset` +/// parameter will be adjusted to be correct for binding values into +/// the resulting descriptor set. +/// +/// Returns: +/// SLANG_OK when successful, +/// SLANG_E_OUT_OF_MEMORY when descriptor heap is full. +/// + +Result ShaderObjectImpl::prepareToBindAsParameterBlock( + BindingContext* context, + BindingOffset& ioOffset, + ShaderObjectLayoutImpl* specializedLayout, + DescriptorSet& outDescriptorSet) +{ + auto transientHeap = context->transientHeap; + auto submitter = context->submitter; + + // When writing into the new descriptor set, resource and sampler + // descriptors will need to start at index zero in the respective + // tables. + // + ioOffset.resource = 0; + ioOffset.sampler = 0; + + // The index of the next root parameter to bind will be maintained, + // but needs to be incremented by the number of descriptor tables + // we allocate (zero or one resource table and zero or one sampler + // table). + // + auto& rootParamIndex = ioOffset.rootParam; + + if (auto descriptorCount = specializedLayout->getTotalResourceDescriptorCount()) + { + // There is a non-zero number of resource descriptors needed, + // so we will allocate a table out of the appropriate heap, + // and store it into the appropriate part of `descriptorSet`. + // + auto descriptorHeap = &transientHeap->getCurrentViewHeap(); + auto& table = outDescriptorSet.resourceTable; + + // Allocate the table. + // + if (!table.allocate(descriptorHeap, descriptorCount)) + { + context->outOfMemoryHeap = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + return SLANG_E_OUT_OF_MEMORY; + } + + // Bind the table to the pipeline, consuming the next available + // root parameter. + // + auto tableRootParamIndex = rootParamIndex++; + submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); + } + if (auto descriptorCount = specializedLayout->getTotalSamplerDescriptorCount()) + { + // There is a non-zero number of sampler descriptors needed, + // so we will allocate a table out of the appropriate heap, + // and store it into the appropriate part of `descriptorSet`. + // + auto descriptorHeap = &transientHeap->getCurrentSamplerHeap(); + auto& table = outDescriptorSet.samplerTable; + + // Allocate the table. + // + if (!table.allocate(descriptorHeap, descriptorCount)) + { + context->outOfMemoryHeap = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + return SLANG_E_OUT_OF_MEMORY; + } + + // Bind the table to the pipeline, consuming the next available + // root parameter. + // + auto tableRootParamIndex = rootParamIndex++; + submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); + } + + return SLANG_OK; +} + +bool ShaderObjectImpl::checkIfCachedDescriptorSetIsValidRecursive(BindingContext* context) +{ + if (shouldAllocateConstantBuffer(context->transientHeap)) + return false; + if (m_isMutable && m_version != m_cachedGPUDescriptorSetVersion) + return false; + if (m_cachedGPUDescriptorSet.resourceTable.getDescriptorCount() != 0 && + m_cachedGPUDescriptorSet.resourceTable.m_heap.ptr.linearHeap->getHeap() != + m_cachedTransientHeap->getCurrentViewHeap().getHeap()) + return false; + if (m_cachedGPUDescriptorSet.samplerTable.getDescriptorCount() != 0 && + m_cachedGPUDescriptorSet.samplerTable.m_heap.ptr.linearHeap->getHeap() != + m_cachedTransientHeap->getCurrentSamplerHeap().getHeap()) + return false; + + auto& subObjectRanges = getLayout()->getSubObjectRanges(); + for (Slang::Index subObjectRangeIndex = 0; subObjectRangeIndex < subObjectRanges.getCount(); + subObjectRangeIndex++) + { + auto const& subObjectRange = subObjectRanges[subObjectRangeIndex]; + auto const& bindingRange = getLayout()->getBindingRange(subObjectRange.bindingRangeIndex); + if (bindingRange.bindingType != slang::BindingType::ParameterBlock) + continue; + Slang::Index count = bindingRange.count; + + for (Slang::Index subObjectIndexInRange = 0; subObjectIndexInRange < count; + subObjectIndexInRange++) + { + Slang::Index objectIndex = bindingRange.subObjectIndex + subObjectIndexInRange; + auto subObject = m_objects[objectIndex].Ptr(); + if (!subObject) + continue; + if (subObject->checkIfCachedDescriptorSetIsValidRecursive(context)) + return false; + } + } + return true; +} + +/// Bind this object as a `ParameterBlock<X>` + +Result ShaderObjectImpl::bindAsParameterBlock( + BindingContext* context, BindingOffset const& offset, ShaderObjectLayoutImpl* specializedLayout) +{ + if (checkIfCachedDescriptorSetIsValidRecursive(context)) + { + // If we already have a valid gpu descriptor table in the current + // heap, bind it. + auto rootParamIndex = offset.rootParam; + if (m_cachedGPUDescriptorSet.resourceTable.getDescriptorCount()) + { + auto tableRootParamIndex = rootParamIndex++; + context->submitter->setRootDescriptorTable( + tableRootParamIndex, m_cachedGPUDescriptorSet.resourceTable.getGpuHandle()); + } + if (m_cachedGPUDescriptorSet.samplerTable.getDescriptorCount()) + { + auto tableRootParamIndex = rootParamIndex++; + context->submitter->setRootDescriptorTable( + tableRootParamIndex, m_cachedGPUDescriptorSet.samplerTable.getGpuHandle()); + } + return SLANG_OK; + } + + // The first step to binding an object as a parameter block is to allocate a descriptor + // set (consisting of zero or one resource descriptor table and zero or one sampler + // descriptor table) to represent its values. + // + BindingOffset subOffset = offset; + SLANG_RETURN_ON_FAIL(prepareToBindAsParameterBlock( + context, /* inout */ subOffset, specializedLayout, m_cachedGPUDescriptorSet)); + + // Next we bind the object into that descriptor set as if it were being used + // as a `ConstantBuffer<X>`. + // + SLANG_RETURN_ON_FAIL( + bindAsConstantBuffer(context, m_cachedGPUDescriptorSet, subOffset, specializedLayout)); + + m_cachedGPUDescriptorSetVersion = m_version; + return SLANG_OK; +} + +/// Bind this object as a `ConstantBuffer<X>` + +Result ShaderObjectImpl::bindAsConstantBuffer( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) +{ + // If we are to bind as a constant buffer we first need to ensure that + // the ordinary data buffer is created, if this object needs one. + // + SLANG_RETURN_ON_FAIL( + _ensureOrdinaryDataBufferCreatedIfNeeded(context->encoder, specializedLayout)); + + // Next, we need to bind all of the resource descriptors for this object + // (including any ordinary data buffer) into the provided `descriptorSet`. + // + auto resourceCount = specializedLayout->getResourceSlotCount(); + if (resourceCount) + { + auto& dstTable = descriptorSet.resourceTable; + auto& srcTable = m_descriptorSet.resourceTable; + + context->device->m_device->CopyDescriptorsSimple( + UINT(resourceCount), + dstTable.getCpuHandle(offset.resource), + srcTable.getCpuHandle(), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, + // since the logic is shared with the `bindAsValue()` case below. + // + SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, offset, specializedLayout)); + return SLANG_OK; +} + +/// Bind this object as a value (for an interface-type parameter) + +Result ShaderObjectImpl::bindAsValue( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) +{ + // When binding a value for an interface-type field we do *not* want + // to bind a buffer for the ordinary data (if there is any) because + // ordinary data for interface-type fields gets allocated into the + // parent object's ordinary data buffer. + // + // This CPU-memory descriptor table that holds resource descriptors + // will have already been allocated to have space for an ordinary data + // buffer (if needed), so we need to take care to skip over that + // descriptor when copying descriptors from the CPU-memory set + // to the GPU-memory `descriptorSet`. + // + auto skipResourceCount = specializedLayout->getOrdinaryDataBufferCount(); + auto resourceCount = specializedLayout->getResourceSlotCount() - skipResourceCount; + if (resourceCount) + { + auto& dstTable = descriptorSet.resourceTable; + auto& srcTable = m_descriptorSet.resourceTable; + + context->device->m_device->CopyDescriptorsSimple( + UINT(resourceCount), + dstTable.getCpuHandle(offset.resource), + srcTable.getCpuHandle(skipResourceCount), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, + // since the logic is shared with the `bindAsConstantBuffer()` case above. + // + // Note: Just like we had to do some subtle handling of the ordinary data buffer + // above, here we need to contend with the fact that the `offset.resource` fields + // computed for sub-object ranges were baked to take the ordinary data buffer + // into account, so that if `skipResourceCount` is non-zero then they are all + // too high by `skipResourceCount`. + // + // We will address the problem here by computing a modified offset that adjusts + // for the ordinary data buffer that we have not bound after all. + // + BindingOffset subOffset = offset; + subOffset.resource -= skipResourceCount; + SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, subOffset, specializedLayout)); + return SLANG_OK; +} + +/// Shared logic for `bindAsConstantBuffer()` and `bindAsValue()` + +Result ShaderObjectImpl::_bindImpl( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) +{ + // We start by binding all the sampler decriptors, if needed. + // + // Note: resource descriptors were handled in either `bindAsConstantBuffer()` + // or `bindAsValue()` before calling into `_bindImpl()`. + // + if (auto samplerCount = specializedLayout->getSamplerSlotCount()) + { + auto& dstTable = descriptorSet.samplerTable; + auto& srcTable = m_descriptorSet.samplerTable; + + context->device->m_device->CopyDescriptorsSimple( + UINT(samplerCount), + dstTable.getCpuHandle(offset.sampler), + srcTable.getCpuHandle(), + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + } + + // Next we iterate over the sub-object ranges and bind anything they require. + // + auto& subObjectRanges = specializedLayout->getSubObjectRanges(); + auto subObjectRangeCount = subObjectRanges.getCount(); + for (Index i = 0; i < subObjectRangeCount; i++) + { + auto& subObjectRange = specializedLayout->getSubObjectRange(i); + auto& bindingRange = specializedLayout->getBindingRange(subObjectRange.bindingRangeIndex); + auto subObjectIndex = bindingRange.subObjectIndex; + auto subObjectLayout = subObjectRange.layout.Ptr(); + + BindingOffset rangeOffset = offset; + rangeOffset += subObjectRange.offset; + + BindingOffset rangeStride = subObjectRange.stride; + + switch (bindingRange.bindingType) + { + case slang::BindingType::ConstantBuffer: + { + auto objOffset = rangeOffset; + for (uint32_t j = 0; j < bindingRange.count; j++) + { + auto& object = m_objects[subObjectIndex + j]; + SLANG_RETURN_ON_FAIL(object->bindAsConstantBuffer( + context, descriptorSet, objOffset, subObjectLayout)); + objOffset += rangeStride; + } + } + break; + + case slang::BindingType::ParameterBlock: + { + auto objOffset = rangeOffset; + for (uint32_t j = 0; j < bindingRange.count; j++) + { + auto& object = m_objects[subObjectIndex + j]; + SLANG_RETURN_ON_FAIL( + object->bindAsParameterBlock(context, objOffset, subObjectLayout)); + objOffset += rangeStride; + } + } + break; + + case slang::BindingType::ExistentialValue: + if (subObjectLayout) + { + auto objOffset = rangeOffset; + for (uint32_t j = 0; j < bindingRange.count; j++) + { + auto& object = m_objects[subObjectIndex + j]; + SLANG_RETURN_ON_FAIL( + object->bindAsValue(context, descriptorSet, objOffset, subObjectLayout)); + objOffset += rangeStride; + } + } + break; + } + } + + return SLANG_OK; +} + +Result ShaderObjectImpl::bindRootArguments(BindingContext* context, uint32_t& index) +{ + auto layoutImpl = getLayout(); + for (Index i = 0; i < m_rootArguments.getCount(); i++) + { + switch (layoutImpl->getRootParameterInfo(i).type) + { + case IResourceView::Type::ShaderResource: + case IResourceView::Type::AccelerationStructure: + context->submitter->setRootSRV(index, m_rootArguments[i]); + break; + case IResourceView::Type::UnorderedAccess: + context->submitter->setRootUAV(index, m_rootArguments[i]); + break; + default: + continue; + } + index++; + } + for (auto& subObject : m_objects) + { + if (subObject) + { + SLANG_RETURN_ON_FAIL(subObject->bindRootArguments(context, index)); + } + } + return SLANG_OK; +} + +/// Get the layout of this shader object with specialization arguments considered +/// +/// This operation should only be called after the shader object has been +/// fully filled in and finalized. +/// + +Result ShaderObjectImpl::getSpecializedLayout(ShaderObjectLayoutImpl** outLayout) +{ + if (!m_specializedLayout) + { + SLANG_RETURN_ON_FAIL(_createSpecializedLayout(m_specializedLayout.writeRef())); + } + returnRefPtr(outLayout, m_specializedLayout); + return SLANG_OK; +} + +/// Create the layout for this shader object with specialization arguments considered +/// +/// This operation is virtual so that it can be customized by `RootShaderObject`. +/// + +Result ShaderObjectImpl::_createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) +{ + ExtendedShaderObjectType extendedType; + SLANG_RETURN_ON_FAIL(getSpecializedShaderObjectType(&extendedType)); + + auto renderer = getRenderer(); + RefPtr<ShaderObjectLayoutImpl> layout; + SLANG_RETURN_ON_FAIL(renderer->getShaderObjectLayout( + extendedType.slangType, + m_layout->getContainerType(), + (ShaderObjectLayoutBase**)layout.writeRef())); + + returnRefPtrMove(outLayout, layout); + return SLANG_OK; +} + +Result ShaderObjectImpl::setResource(ShaderOffset const& offset, IResourceView* resourceView) +{ + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; + + m_version++; + + ID3D12Device* d3dDevice = static_cast<DeviceImpl*>(getDevice())->m_device; + + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + + if (bindingRange.isRootParameter && resourceView) + { + auto& rootArg = m_rootArguments[bindingRange.baseIndex]; + switch (resourceView->getViewDesc()->type) + { + case IResourceView::Type::AccelerationStructure: + { + auto resourceViewImpl = static_cast<AccelerationStructureImpl*>(resourceView); + rootArg = resourceViewImpl->getDeviceAddress(); + } + break; + case IResourceView::Type::ShaderResource: + case IResourceView::Type::UnorderedAccess: + { + auto resourceViewImpl = static_cast<ResourceViewImpl*>(resourceView); + if (resourceViewImpl->m_resource->isBuffer()) + { + rootArg = static_cast<BufferResourceImpl*>(resourceViewImpl->m_resource.Ptr()) + ->getDeviceAddress(); + } + else + { + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "The shader parameter at the specified offset is a root parameter, and " + "therefore can only be a buffer view."); + return SLANG_FAIL; + } + } + break; + } + return SLANG_OK; + } + + if (resourceView == nullptr) + { + // Create null descriptor for the binding. + auto destDescriptor = m_descriptorSet.resourceTable.getCpuHandle( + bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex); + return createNullDescriptor(d3dDevice, destDescriptor, bindingRange); + } + + ResourceViewInternalImpl* internalResourceView = nullptr; + switch (resourceView->getViewDesc()->type) + { +#if SLANG_GFX_HAS_DXR_SUPPORT + case IResourceView::Type::AccelerationStructure: + { + auto asImpl = static_cast<AccelerationStructureImpl*>(resourceView); + // Hold a reference to the resource to prevent its destruction. + m_boundResources[bindingRange.baseIndex + offset.bindingArrayIndex] = asImpl->m_buffer; + internalResourceView = asImpl; + } + break; +#endif + default: + { + auto resourceViewImpl = static_cast<ResourceViewImpl*>(resourceView); + // Hold a reference to the resource to prevent its destruction. + m_boundResources[bindingRange.baseIndex + offset.bindingArrayIndex] = + resourceViewImpl->m_resource; + internalResourceView = resourceViewImpl; + } + break; + } + + auto descriptorSlotIndex = bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex; + if (internalResourceView->m_descriptor.cpuHandle.ptr) + { + d3dDevice->CopyDescriptorsSimple( + 1, + m_descriptorSet.resourceTable.getCpuHandle( + bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex), + internalResourceView->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + else + { + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "IShaderObject::setResource: the resource view cannot be set to this shader parameter. " + "A possible reason is that the view is too large to be supported by D3D12."); + return SLANG_FAIL; + } + return SLANG_OK; +} + +Result ShaderObjectImpl::create( + DeviceImpl* device, ShaderObjectLayoutImpl* layout, ShaderObjectImpl** outShaderObject) +{ + auto object = RefPtr<ShaderObjectImpl>(new ShaderObjectImpl()); + SLANG_RETURN_ON_FAIL( + object->init(device, layout, device->m_cpuViewHeap.Ptr(), device->m_cpuSamplerHeap.Ptr())); + returnRefPtrMove(outShaderObject, object); + return SLANG_OK; +} + +ShaderObjectImpl::~ShaderObjectImpl() { m_descriptorSet.freeIfSupported(); } + +RootShaderObjectLayoutImpl* RootShaderObjectImpl::getLayout() +{ + return static_cast<RootShaderObjectLayoutImpl*>(m_layout.Ptr()); +} + +GfxCount RootShaderObjectImpl::getEntryPointCount() { return (GfxCount)m_entryPoints.getCount(); } + +SlangResult RootShaderObjectImpl::getEntryPoint(GfxIndex index, IShaderObject** outEntryPoint) +{ + returnComPtr(outEntryPoint, m_entryPoints[index]); + return SLANG_OK; +} + +Result RootShaderObjectImpl::collectSpecializationArgs(ExtendedShaderObjectTypeList& args) +{ + SLANG_RETURN_ON_FAIL(ShaderObjectImpl::collectSpecializationArgs(args)); + for (auto& entryPoint : m_entryPoints) + { + SLANG_RETURN_ON_FAIL(entryPoint->collectSpecializationArgs(args)); + } + return SLANG_OK; +} + +Result RootShaderObjectImpl::_createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) +{ + ExtendedShaderObjectTypeList specializationArgs; + SLANG_RETURN_ON_FAIL(collectSpecializationArgs(specializationArgs)); + + // Note: There is an important policy decision being made here that we need + // to approach carefully. + // + // We are doing two different things that affect the layout of a program: + // + // 1. We are *composing* one or more pieces of code (notably the shared global/module + // stuff and the per-entry-point stuff). + // + // 2. We are *specializing* code that includes generic/existential parameters + // to concrete types/values. + // + // We need to decide the relative *order* of these two steps, because of how it impacts + // layout. The layout for `specialize(compose(A,B), X, Y)` is potentially different + // form that of `compose(specialize(A,X), speciealize(B,Y))`, even when both are + // semantically equivalent programs. + // + // Right now we are using the first option: we are first generating a full composition + // of all the code we plan to use (global scope plus all entry points), and then + // specializing it to the concatenated specialization argumenst for all of that. + // + // In some cases, though, this model isn't appropriate. For example, when dealing with + // ray-tracing shaders and local root signatures, we really want the parameters of each + // entry point (actually, each entry-point *group*) to be allocated distinct storage, + // which really means we want to compute something like: + // + // SpecializedGlobals = specialize(compose(ModuleA, ModuleB, ...), X, Y, ...) + // + // SpecializedEP1 = compose(SpecializedGlobals, specialize(EntryPoint1, T, U, ...)) + // SpecializedEP2 = compose(SpecializedGlobals, specialize(EntryPoint2, A, B, ...)) + // + // Note how in this case all entry points agree on the layout for the shared/common + // parmaeters, but their layouts are also independent of one another. + // + // Furthermore, in this example, loading another entry point into the system would not + // rquire re-computing the layouts (or generated kernel code) for any of the entry + // points that had already been loaded (in contrast to a compose-then-specialize + // approach). + // + ComPtr<slang::IComponentType> specializedComponentType; + ComPtr<slang::IBlob> diagnosticBlob; + auto result = getLayout()->getSlangProgram()->specialize( + specializationArgs.components.getArrayView().getBuffer(), + specializationArgs.getCount(), + specializedComponentType.writeRef(), + diagnosticBlob.writeRef()); + + if (diagnosticBlob && diagnosticBlob->getBufferSize()) + { + getDebugCallback()->handleMessage( + SLANG_FAILED(result) ? DebugMessageType::Error : DebugMessageType::Info, + DebugMessageSource::Layer, + (const char*)diagnosticBlob->getBufferPointer()); + } + + if (SLANG_FAILED(result)) + return result; + + ComPtr<ID3DBlob> d3dDiagnosticBlob; + auto slangSpecializedLayout = specializedComponentType->getLayout(); + RefPtr<RootShaderObjectLayoutImpl> specializedLayout; + auto rootLayoutResult = RootShaderObjectLayoutImpl::create( + static_cast<DeviceImpl*>(getRenderer()), + specializedComponentType, + slangSpecializedLayout, + specializedLayout.writeRef(), + d3dDiagnosticBlob.writeRef()); + + if (SLANG_FAILED(rootLayoutResult)) + { + return rootLayoutResult; + } + + // Note: Computing the layout for the specialized program will have also computed + // the layouts for the entry points, and we really need to attach that information + // to them so that they don't go and try to compute their own specializations. + // + // TODO: Well, if we move to the specialization model described above then maybe + // we *will* want entry points to do their own specialization work... + // + auto entryPointCount = m_entryPoints.getCount(); + for (Index i = 0; i < entryPointCount; ++i) + { + auto entryPointInfo = specializedLayout->getEntryPoint(i); + auto entryPointVars = m_entryPoints[i]; + + entryPointVars->m_specializedLayout = entryPointInfo.layout; + } + + returnRefPtrMove(outLayout, specializedLayout); + return SLANG_OK; +} + +Result RootShaderObjectImpl::copyFrom(IShaderObject* object, ITransientResourceHeap* transientHeap) +{ + if (auto srcObj = dynamic_cast<MutableRootShaderObjectImpl*>(object)) + { + *this = *srcObj; + return SLANG_OK; + } + return SLANG_FAIL; +} + +Result RootShaderObjectImpl::bindAsRoot( + BindingContext* context, RootShaderObjectLayoutImpl* specializedLayout) +{ + // Pull updates from sub-objects when this is a mutable root shader object. + updateSubObjectsRecursive(); + + // A root shader object always binds as if it were a parameter block, + // insofar as it needs to allocate a descriptor set to hold the bindings + // for its own state and any sub-objects. + // + // Note: We do not direclty use `bindAsParameterBlock` here because we also + // need to bind the entry points into the same descriptor set that is + // being used for the root object. + + BindingOffset rootOffset; + + // Bind all root parameters first. + Super::bindRootArguments(context, rootOffset.rootParam); + + DescriptorSet descriptorSet; + SLANG_RETURN_ON_FAIL(prepareToBindAsParameterBlock( + context, /* inout */ rootOffset, specializedLayout, descriptorSet)); + + SLANG_RETURN_ON_FAIL( + Super::bindAsConstantBuffer(context, descriptorSet, rootOffset, specializedLayout)); + + auto entryPointCount = m_entryPoints.getCount(); + for (Index i = 0; i < entryPointCount; ++i) + { + auto entryPoint = m_entryPoints[i]; + auto& entryPointInfo = specializedLayout->getEntryPoint(i); + + auto entryPointOffset = rootOffset; + entryPointOffset += entryPointInfo.offset; + + entryPoint->updateSubObjectsRecursive(); + + SLANG_RETURN_ON_FAIL(entryPoint->bindAsConstantBuffer( + context, descriptorSet, entryPointOffset, entryPointInfo.layout)); + } + + return SLANG_OK; +} + +Result RootShaderObjectImpl::resetImpl( + DeviceImpl* device, + RootShaderObjectLayoutImpl* layout, + DescriptorHeapReference viewHeap, + DescriptorHeapReference samplerHeap, + bool isMutable) +{ + SLANG_RETURN_ON_FAIL(Super::init(device, layout, viewHeap, samplerHeap)); + m_isMutable = isMutable; + m_specializedLayout = nullptr; + m_entryPoints.clear(); + for (auto entryPointInfo : layout->getEntryPoints()) + { + RefPtr<ShaderObjectImpl> entryPoint; + SLANG_RETURN_ON_FAIL( + ShaderObjectImpl::create(device, entryPointInfo.layout, entryPoint.writeRef())); + entryPoint->m_isMutable = isMutable; + m_entryPoints.add(entryPoint); + } + return SLANG_OK; +} + +Result RootShaderObjectImpl::reset( + DeviceImpl* device, RootShaderObjectLayoutImpl* layout, TransientResourceHeapImpl* heap) +{ + return resetImpl( + device, layout, &heap->m_stagingCpuViewHeap, &heap->m_stagingCpuSamplerHeap, false); +} + +} // namespace d3d12 +} // namespace gfx |
