diff options
| author | ZanderMajercik <amajercik@nvidia.com> | 2022-01-04 11:05:04 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-01-04 11:05:04 -0800 |
| commit | 9d6c7763334908c78027199a0cb1ca3b9841ebab (patch) | |
| tree | dc75824b08ca23554a0c8e1f98458c103605ebf9 /tools/gfx/d3d12/render-d3d12.cpp | |
| parent | 1a1b2a0de67dccc1102449b8620830131d569cde (diff) | |
Buffer allocation backend. (#2045)
* removed initialization of upload resource for CPU visible buffers (D3D12, Vulkan)
* reverted slang-gfx.h change
* declared DescBase::hasCpuAccessFlag() const to make backend changes compile under gcc
* commit before checking master branch
* commit before merge
* commit before checking master
* commit before merging upstream
* revert vulkan changes
* commit before testing on master
* commit before merge with master
* reverted bad merge changes
* reverted more bad merge changes in render-d3d12.cpp
* reverted buffer transition in _uploadBufferData
* implemented bufferBarrier() in render-d3d12.cpp
* reverted uneccesary transition in createBuffer
* create staging buffer even when AccessFlag::None passed to D3D11 createBufferResource
* renamed AccessFlags to MemoryType
Co-authored-by: Yong He <yonghe@outlook.com>
Diffstat (limited to 'tools/gfx/d3d12/render-d3d12.cpp')
| -rw-r--r-- | tools/gfx/d3d12/render-d3d12.cpp | 287 |
1 files changed, 183 insertions, 104 deletions
diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index aab260cd1..90bfb9bc6 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -246,8 +246,7 @@ public: } } - D3D12Resource m_resource; ///< The resource typically in gpu memory - D3D12Resource m_uploadResource; ///< If the resource can be written to, and is in gpu memory (ie not Memory backed), will have upload resource + D3D12Resource m_resource; ///< The resource in gpu memory, allocated on the correct heap relative to the cpu access flag D3D12_RESOURCE_STATES m_defaultState; @@ -623,41 +622,6 @@ public: out.Flags = D3D12_RESOURCE_FLAG_NONE; } - static Result _uploadBufferData( - ID3D12GraphicsCommandList* cmdList, - BufferResourceImpl* buffer, - size_t offset, - size_t size, - void* data) - { - D3D12_RANGE readRange = {}; - readRange.Begin = offset; - readRange.End = offset + size; - - void* uploadData; - SLANG_RETURN_ON_FAIL(buffer->m_uploadResource.getResource()->Map( - 0, &readRange, reinterpret_cast<void**>(&uploadData))); - memcpy((uint8_t*)uploadData + offset, data, size); - buffer->m_uploadResource.getResource()->Unmap(0, &readRange); - { - D3D12BarrierSubmitter submitter(cmdList); - submitter.transition( - buffer->m_resource, buffer->m_defaultState, D3D12_RESOURCE_STATE_COPY_DEST); - } - cmdList->CopyBufferRegion( - buffer->m_resource.getResource(), - offset, - buffer->m_uploadResource.getResource(), - offset, - size); - { - D3D12BarrierSubmitter submitter(cmdList); - submitter.transition( - buffer->m_resource, D3D12_RESOURCE_STATE_COPY_DEST, buffer->m_defaultState); - } - return SLANG_OK; - } - class TransientResourceHeapImpl : public TransientResourceHeapBaseImpl<D3D12Device, BufferResourceImpl> { @@ -721,7 +685,7 @@ public: auto d3dDevice = device->m_device; SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator( D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef()))); - + SLANG_RETURN_ON_FAIL(m_viewHeap.init( d3dDevice, viewHeapSize, @@ -742,7 +706,7 @@ public: bufferDesc.allowedStates = ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); bufferDesc.sizeInBytes = desc.constantBufferSize; - bufferDesc.cpuAccessFlags |= AccessFlag::Write; + bufferDesc.cpuAccessFlags |= MemoryType::CpuWrite; SLANG_RETURN_ON_FAIL(device->createBufferResource( bufferDesc, nullptr, @@ -758,6 +722,58 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override; }; + static Result _uploadBufferData( + ID3D12Device* device, + ID3D12GraphicsCommandList* cmdList, + TransientResourceHeapImpl* transientHeap, + BufferResourceImpl* buffer, + size_t offset, + size_t size, + void* data) + { + D3D12_RANGE readRange = {}; + readRange.Begin = offset; + readRange.End = offset + size; + + + IBufferResource* uploadResource; + if (!buffer->getDesc()->hasCpuAccessFlag(MemoryType::CpuWrite)) + { + transientHeap->allocateStagingBuffer(size, uploadResource, ResourceState::CopySource); + } + + D3D12Resource& uploadResourceRef = (buffer->getDesc()->hasCpuAccessFlag(MemoryType::CpuWrite)) ? buffer->m_resource : static_cast<BufferResourceImpl*>(uploadResource)->m_resource; + + void* uploadData; + SLANG_RETURN_ON_FAIL(uploadResourceRef.getResource()->Map( + 0, &readRange, reinterpret_cast<void**>(&uploadData))); + memcpy((uint8_t*)uploadData + offset, data, size); + uploadResourceRef.getResource()->Unmap(0, &readRange); + + if (!buffer->getDesc()->hasCpuAccessFlag(MemoryType::CpuWrite)) { + { + D3D12BarrierSubmitter submitter(cmdList); + submitter.transition( + buffer->m_resource, buffer->m_defaultState, D3D12_RESOURCE_STATE_COPY_DEST); + } + cmdList->CopyBufferRegion( + buffer->m_resource.getResource(), + offset, + uploadResourceRef.getResource(), + offset, + size); + + // Should already be in COPY_DEST if write flag was set. + { + D3D12BarrierSubmitter submitter(cmdList); + submitter.transition( + buffer->m_resource, D3D12_RESOURCE_STATE_COPY_DEST, buffer->m_defaultState); + } + } + + return SLANG_OK; + } + class CommandBufferImpl; class PipelineCommandEncoder @@ -2308,7 +2324,7 @@ public: SLANG_ASSERT(srcSize <= destSize); - _uploadBufferData(encoder->m_d3dCmdList, buffer, offset, srcSize, src); + _uploadBufferData(encoder->m_device, encoder->m_d3dCmdList, encoder->m_transientHeap, buffer, offset, srcSize, src); // In the case where this object has any sub-objects of // existential/interface type, we need to recurse on those objects @@ -3601,7 +3617,9 @@ public: void* data) override { _uploadBufferData( + m_commandBuffer->m_renderer->m_device, m_commandBuffer->m_cmdList, + m_commandBuffer->m_transientHeap, static_cast<BufferResourceImpl*>(dst), offset, size, @@ -3663,6 +3681,33 @@ public: ResourceState src, ResourceState dst) override { + + List<D3D12_RESOURCE_BARRIER> barriers; + barriers.reserve(count); + + for (size_t i = 0; i < count; i++) + { + auto bufferImpl = static_cast<BufferResourceImpl*>(buffers[i]); + + D3D12_RESOURCE_BARRIER barrier = {}; + // If the src == dst, it must be a UAV barrier. + barrier.Type = (src == dst) ? D3D12_RESOURCE_BARRIER_TYPE_UAV : D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + + if (barrier.Type == D3D12_RESOURCE_BARRIER_TYPE_UAV) { + barrier.UAV.pResource = bufferImpl->m_resource; + } + else { + barrier.Transition.pResource = bufferImpl->m_resource; + barrier.Transition.StateBefore = D3DUtil::translateResourceState(src); + barrier.Transition.StateAfter = D3DUtil::translateResourceState(dst); + barrier.Transition.Subresource = 0; + } + + barriers.add(barrier); + } + + m_commandBuffer->m_cmdList4->ResourceBarrier((UINT)count, barriers.getArrayView().getBuffer()); } virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} virtual SLANG_NO_THROW void SLANG_MCALL writeTimestamp(IQueryPool* pool, SlangInt index) override @@ -4149,8 +4194,8 @@ public: virtual SLANG_NO_THROW void SLANG_MCALL memoryBarrier( int count, IAccelerationStructure* const* structures, - AccessFlag::Enum sourceAccess, - AccessFlag::Enum destAccess) override; + MemoryType::Enum sourceAccess, + MemoryType::Enum destAccess) override; virtual SLANG_NO_THROW void SLANG_MCALL bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override; virtual SLANG_NO_THROW void SLANG_MCALL dispatchRays( @@ -4455,10 +4500,10 @@ public: const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, - D3D12Resource& uploadResource, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, - bool isShared = false); + bool isShared, + MemoryType::Enum access = MemoryType::GpuOnly); Result captureTextureToSurface( TextureResourceImpl* resource, @@ -4748,57 +4793,83 @@ static void _initSrvDesc( } } -Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12Resource& uploadResource, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, bool isShared) +Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, bool isShared, MemoryType::Enum access) { const size_t bufferSize = size_t(resourceDesc.Width); - { - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + D3D12_HEAP_PROPERTIES heapProps; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE; - if (isShared) flags |= D3D12_HEAP_FLAG_SHARED; + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE; + if (isShared) flags |= D3D12_HEAP_FLAG_SHARED; - const D3D12_RESOURCE_STATES initialState = srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState; + D3D12_RESOURCE_DESC desc = resourceDesc; - SLANG_RETURN_ON_FAIL(resourceOut.initCommitted(m_device, heapProps, flags, resourceDesc, initialState, nullptr)); - } + D3D12_RESOURCE_STATES initialState = finalState; - { - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + switch (access) { + case MemoryType::CpuRead: + assert(!srcData); - D3D12_RESOURCE_DESC uploadResourceDesc(resourceDesc); - uploadResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + initialState |= D3D12_RESOURCE_STATE_COPY_DEST; - SLANG_RETURN_ON_FAIL(uploadResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadResourceDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); - } + break; + case MemoryType::CpuWrite: - if (srcData) - { - // Copy data to the intermediate upload heap and then schedule a copy - // from the upload heap to the vertex buffer. - UINT8* dstData; - D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU. + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + initialState |= D3D12_RESOURCE_STATE_GENERIC_READ; - ID3D12Resource* dxUploadResource = uploadResource.getResource(); + break; + case MemoryType::GpuOnly: + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + initialState = (srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState); + break; + default: + return SLANG_FAIL; + } - SLANG_RETURN_ON_FAIL(dxUploadResource->Map(0, &readRange, reinterpret_cast<void**>(&dstData))); - ::memcpy(dstData, srcData, srcDataSize); - dxUploadResource->Unmap(0, nullptr); + // Create the resource. + SLANG_RETURN_ON_FAIL(resourceOut.initCommitted(m_device, heapProps, flags, desc, initialState, nullptr)); - auto encodeInfo = encodeResourceCommands(); - encodeInfo.d3dCommandList->CopyBufferRegion(resourceOut, 0, uploadResource, 0, bufferSize); - submitResourceCommandsAndWait(encodeInfo); - } + if (srcData) + { + D3D12Resource uploadResource; + + if (access == MemoryType::GpuOnly) { + // If the buffer is on the default heap, create upload buffer. + D3D12_RESOURCE_DESC uploadDesc(resourceDesc); + uploadDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + + SLANG_RETURN_ON_FAIL(uploadResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); + } + + // Be careful not to actually copy a resource here. + D3D12Resource& uploadResourceRef = (access == MemoryType::GpuOnly) ? uploadResource : resourceOut; + + // Copy data to the intermediate upload heap and then schedule a copy + // from the upload heap to the vertex buffer. + UINT8* dstData; + D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU. + + ID3D12Resource* dxUploadResource = uploadResourceRef.getResource(); + + SLANG_RETURN_ON_FAIL(dxUploadResource->Map(0, &readRange, reinterpret_cast<void**>(&dstData))); + ::memcpy(dstData, srcData, srcDataSize); + dxUploadResource->Unmap(0, nullptr); + + if (access == MemoryType::GpuOnly) { + auto encodeInfo = encodeResourceCommands(); + encodeInfo.d3dCommandList->CopyBufferRegion(resourceOut, 0, uploadResourceRef, 0, bufferSize); + submitResourceCommandsAndWait(encodeInfo); + } + } return SLANG_OK; } @@ -5711,7 +5782,8 @@ Result D3D12Device::createBufferResource(const IBufferResource::Desc& descIn, co bufferDesc.Flags |= _calcResourceFlags(srcDesc.allowedStates); const D3D12_RESOURCE_STATES initialState = buffer->m_defaultState; - SLANG_RETURN_ON_FAIL(createBuffer(bufferDesc, initData, srcDesc.sizeInBytes, buffer->m_uploadResource, initialState, buffer->m_resource, descIn.isShared)); + SLANG_RETURN_ON_FAIL(createBuffer(bufferDesc, initData, srcDesc.sizeInBytes, initialState, buffer->m_resource, descIn.isShared, + (MemoryType::Enum)descIn.cpuAccessFlags)); returnComPtr(outResource, buffer); return SLANG_OK; @@ -6252,7 +6324,6 @@ Result D3D12Device::readBufferResource( size_t size, ISlangBlob** outBlob) { - auto encodeInfo = encodeResourceCommands(); BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(bufferIn); @@ -6261,26 +6332,34 @@ Result D3D12Device::readBufferResource( // This will be slow!!! - it blocks CPU on GPU completion D3D12Resource& resource = buffer->m_resource; - // Readback heap - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + D3D12Resource stageBuf; + if (buffer->getDesc()->cpuAccessFlags != (int)MemoryType::CpuRead) { - // Resource to readback to - D3D12_RESOURCE_DESC stagingDesc; - _initBufferResourceDesc(size, stagingDesc); + auto encodeInfo = encodeResourceCommands(); - D3D12Resource stageBuf; - SLANG_RETURN_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); - // Do the copy - encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, offset, size); + // Readback heap + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - // Wait until complete - submitResourceCommandsAndWait(encodeInfo); + // Resource to readback to + D3D12_RESOURCE_DESC stagingDesc; + _initBufferResourceDesc(size, stagingDesc); + + SLANG_RETURN_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); + + // Do the copy + encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, offset, size); + + // Wait until complete + submitResourceCommandsAndWait(encodeInfo); + } + + D3D12Resource& stageBufRef = (buffer->getDesc()->cpuAccessFlags != (int)MemoryType::CpuRead) ? stageBuf : resource; // Map and copy RefPtr<ListBlob> blob = new ListBlob(); @@ -6288,13 +6367,13 @@ Result D3D12Device::readBufferResource( UINT8* data; D3D12_RANGE readRange = { 0, size }; - SLANG_RETURN_ON_FAIL(stageBuf.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&data))); + SLANG_RETURN_ON_FAIL(stageBufRef.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&data))); // Copy to memory buffer blob->m_data.setCount(size); ::memcpy(blob->m_data.getBuffer(), data, size); - stageBuf.getResource()->Unmap(0, nullptr); + stageBufRef.getResource()->Unmap(0, nullptr); } returnComPtr(outBlob, blob); return SLANG_OK; @@ -6962,8 +7041,8 @@ void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::deserializeAc void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::memoryBarrier( int count, IAccelerationStructure* const* structures, - AccessFlag::Enum sourceAccess, - AccessFlag::Enum destAccess) + MemoryType::Enum sourceAccess, + MemoryType::Enum destAccess) { ShortList<D3D12_RESOURCE_BARRIER> barriers; barriers.setCount(count); |
