diff options
| author | ZanderMajercik <amajercik@nvidia.com> | 2022-01-04 11:05:04 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-01-04 11:05:04 -0800 |
| commit | 9d6c7763334908c78027199a0cb1ca3b9841ebab (patch) | |
| tree | dc75824b08ca23554a0c8e1f98458c103605ebf9 /tools | |
| parent | 1a1b2a0de67dccc1102449b8620830131d569cde (diff) | |
Buffer allocation backend. (#2045)
* removed initialization of upload resource for CPU visible buffers (D3D12, Vulkan)
* reverted slang-gfx.h change
* declared DescBase::hasCpuAccessFlag() const to make backend changes compile under gcc
* commit before checking master branch
* commit before merge
* commit before checking master
* commit before merging upstream
* revert vulkan changes
* commit before testing on master
* commit before merge with master
* reverted bad merge changes
* reverted more bad merge changes in render-d3d12.cpp
* reverted buffer transition in _uploadBufferData
* implemented bufferBarrier() in render-d3d12.cpp
* reverted uneccesary transition in createBuffer
* create staging buffer even when AccessFlag::None passed to D3D11 createBufferResource
* renamed AccessFlags to MemoryType
Co-authored-by: Yong He <yonghe@outlook.com>
Diffstat (limited to 'tools')
20 files changed, 228 insertions, 150 deletions
diff --git a/tools/gfx-unit-test/buffer-barrier-test.cpp b/tools/gfx-unit-test/buffer-barrier-test.cpp index f11dd1e5b..0103fda4d 100644 --- a/tools/gfx-unit-test/buffer-barrier-test.cpp +++ b/tools/gfx-unit-test/buffer-barrier-test.cpp @@ -32,7 +32,7 @@ namespace gfx_test bufferDesc.format = gfx::Format::Unknown; bufferDesc.elementSize = sizeof(float); bufferDesc.defaultState = unorderedAccess ? ResourceState::UnorderedAccess : ResourceState::ShaderResource; - bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read; + bufferDesc.cpuAccessFlags = MemoryType::GpuOnly; bufferDesc.allowedStates = ResourceStateSet( ResourceState::ShaderResource, ResourceState::CopyDestination, diff --git a/tools/gfx-unit-test/compute-smoke.cpp b/tools/gfx-unit-test/compute-smoke.cpp index 4dc3c685c..dbe22f08b 100644 --- a/tools/gfx-unit-test/compute-smoke.cpp +++ b/tools/gfx-unit-test/compute-smoke.cpp @@ -39,7 +39,7 @@ namespace gfx_test ResourceState::CopyDestination, ResourceState::CopySource); bufferDesc.defaultState = ResourceState::UnorderedAccess; - bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read; + bufferDesc.cpuAccessFlags = MemoryType::GpuOnly; ComPtr<IBufferResource> numbersBuffer; GFX_CHECK_CALL_ABORT(device->createBufferResource( diff --git a/tools/gfx-unit-test/create-buffer-from-handle.cpp b/tools/gfx-unit-test/create-buffer-from-handle.cpp index 71b5f5ca2..f7e9356bc 100644 --- a/tools/gfx-unit-test/create-buffer-from-handle.cpp +++ b/tools/gfx-unit-test/create-buffer-from-handle.cpp @@ -39,7 +39,7 @@ namespace gfx_test ResourceState::CopyDestination, ResourceState::CopySource); bufferDesc.defaultState = ResourceState::UnorderedAccess; - bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read; + bufferDesc.cpuAccessFlags = MemoryType::GpuOnly; ComPtr<IBufferResource> originalNumbersBuffer; GFX_CHECK_CALL_ABORT(device->createBufferResource( diff --git a/tools/gfx-unit-test/existing-device-handle-test.cpp b/tools/gfx-unit-test/existing-device-handle-test.cpp index 257ac1f37..efb21dd0b 100644 --- a/tools/gfx-unit-test/existing-device-handle-test.cpp +++ b/tools/gfx-unit-test/existing-device-handle-test.cpp @@ -39,7 +39,7 @@ namespace gfx_test ResourceState::CopyDestination, ResourceState::CopySource); bufferDesc.defaultState = ResourceState::UnorderedAccess; - bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read; + bufferDesc.cpuAccessFlags = MemoryType::GpuOnly; ComPtr<IBufferResource> numbersBuffer; GFX_CHECK_CALL_ABORT(device->createBufferResource( diff --git a/tools/gfx-unit-test/format-unit-tests.cpp b/tools/gfx-unit-test/format-unit-tests.cpp index d46a074e5..2cba98178 100644 --- a/tools/gfx-unit-test/format-unit-tests.cpp +++ b/tools/gfx-unit-test/format-unit-tests.cpp @@ -137,7 +137,7 @@ namespace gfx_test ResourceState::CopyDestination, ResourceState::CopySource); bufferDesc.defaultState = ResourceState::UnorderedAccess; - bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read; + bufferDesc.cpuAccessFlags = MemoryType::GpuOnly; ComPtr<IBufferResource> outBuffer; GFX_CHECK_CALL_ABORT(device->createBufferResource( diff --git a/tools/gfx-unit-test/get-buffer-resource-handle-test.cpp b/tools/gfx-unit-test/get-buffer-resource-handle-test.cpp index a5fa5e441..047522688 100644 --- a/tools/gfx-unit-test/get-buffer-resource-handle-test.cpp +++ b/tools/gfx-unit-test/get-buffer-resource-handle-test.cpp @@ -27,7 +27,7 @@ namespace gfx_test ResourceState::CopyDestination, ResourceState::CopySource); bufferDesc.defaultState = ResourceState::UnorderedAccess; - bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read; + bufferDesc.cpuAccessFlags = MemoryType::GpuOnly; ComPtr<IBufferResource> buffer; GFX_CHECK_CALL_ABORT(device->createBufferResource( diff --git a/tools/gfx-unit-test/mutable-shader-object.cpp b/tools/gfx-unit-test/mutable-shader-object.cpp index 8e3838620..0880bc2d2 100644 --- a/tools/gfx-unit-test/mutable-shader-object.cpp +++ b/tools/gfx-unit-test/mutable-shader-object.cpp @@ -39,7 +39,7 @@ namespace gfx_test ResourceState::CopyDestination, ResourceState::CopySource); bufferDesc.defaultState = ResourceState::UnorderedAccess; - bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read; + bufferDesc.cpuAccessFlags = MemoryType::GpuOnly; ComPtr<IBufferResource> numbersBuffer; GFX_CHECK_CALL_ABORT(device->createBufferResource( diff --git a/tools/gfx-unit-test/root-mutable-shader-object.cpp b/tools/gfx-unit-test/root-mutable-shader-object.cpp index c20d3aa41..40b57e79d 100644 --- a/tools/gfx-unit-test/root-mutable-shader-object.cpp +++ b/tools/gfx-unit-test/root-mutable-shader-object.cpp @@ -39,7 +39,7 @@ namespace gfx_test ResourceState::CopyDestination, ResourceState::CopySource); bufferDesc.defaultState = ResourceState::UnorderedAccess; - bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read; + bufferDesc.cpuAccessFlags = MemoryType::GpuOnly; ComPtr<IBufferResource> numbersBuffer; GFX_CHECK_CALL_ABORT(device->createBufferResource( diff --git a/tools/gfx-unit-test/shared-buffers-tests.cpp b/tools/gfx-unit-test/shared-buffers-tests.cpp index fe7757083..fc9f88173 100644 --- a/tools/gfx-unit-test/shared-buffers-tests.cpp +++ b/tools/gfx-unit-test/shared-buffers-tests.cpp @@ -25,7 +25,7 @@ namespace gfx_test ResourceState::CopyDestination, ResourceState::CopySource); bufferDesc.defaultState = ResourceState::UnorderedAccess; - bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read; + bufferDesc.cpuAccessFlags = MemoryType::GpuOnly; bufferDesc.isShared = true; ComPtr<IBufferResource> srcBuffer; diff --git a/tools/gfx-unit-test/shared-textures-tests.cpp b/tools/gfx-unit-test/shared-textures-tests.cpp index d32a8493e..b2944b6e7 100644 --- a/tools/gfx-unit-test/shared-textures-tests.cpp +++ b/tools/gfx-unit-test/shared-textures-tests.cpp @@ -114,7 +114,7 @@ namespace gfx_test ResourceState::CopyDestination, ResourceState::CopySource); bufferDesc.defaultState = ResourceState::UnorderedAccess; - bufferDesc.cpuAccessFlags = AccessFlag::Write | AccessFlag::Read; + bufferDesc.cpuAccessFlags = MemoryType::GpuOnly; ComPtr<IBufferResource> outBuffer; GFX_CHECK_CALL_ABORT(device->createBufferResource( diff --git a/tools/gfx/d3d11/render-d3d11.cpp b/tools/gfx/d3d11/render-d3d11.cpp index cb6d62830..aa5136873 100644 --- a/tools/gfx/d3d11/render-d3d11.cpp +++ b/tools/gfx/d3d11/render-d3d11.cpp @@ -1514,7 +1514,7 @@ protected: bufferDesc.defaultState = ResourceState::ConstantBuffer; bufferDesc.allowedStates = ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); - bufferDesc.cpuAccessFlags |= AccessFlag::Write; + bufferDesc.cpuAccessFlags |= MemoryType::CpuWrite; SLANG_RETURN_ON_FAIL( device->createBufferResource(bufferDesc, nullptr, bufferResourcePtr.writeRef())); m_ordinaryDataBuffer = static_cast<BufferResourceImpl*>(bufferResourcePtr.get()); @@ -2554,10 +2554,10 @@ static int _calcResourceAccessFlags(int accessFlags) switch (accessFlags) { case 0: return 0; - case AccessFlag::Read: return D3D11_CPU_ACCESS_READ; - case AccessFlag::Write: return D3D11_CPU_ACCESS_WRITE; - case AccessFlag::Read | - AccessFlag::Write: return D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + case MemoryType::CpuRead: return D3D11_CPU_ACCESS_READ; + case MemoryType::CpuWrite: return D3D11_CPU_ACCESS_WRITE; + case MemoryType::CpuRead | + MemoryType::CpuWrite: return D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; default: assert(!"Invalid flags"); return 0; } } @@ -2711,11 +2711,11 @@ Result D3D11Device::createBufferResource(const IBufferResource::Desc& descIn, co bufferDesc.BindFlags = d3dBindFlags; // For read we'll need to do some staging bufferDesc.CPUAccessFlags = - _calcResourceAccessFlags(descIn.cpuAccessFlags & AccessFlag::Write); + _calcResourceAccessFlags(descIn.cpuAccessFlags & MemoryType::CpuWrite); bufferDesc.Usage = D3D11_USAGE_DEFAULT; // If written by CPU, make it dynamic - if ((descIn.cpuAccessFlags & AccessFlag::Write) && + if ((descIn.cpuAccessFlags & MemoryType::CpuWrite) && !descIn.allowedStates.contains(ResourceState::UnorderedAccess)) { bufferDesc.Usage = D3D11_USAGE_DYNAMIC; @@ -2746,7 +2746,7 @@ Result D3D11Device::createBufferResource(const IBufferResource::Desc& descIn, co } } - if (srcDesc.cpuAccessFlags & AccessFlag::Write) + if (srcDesc.cpuAccessFlags & MemoryType::CpuWrite) { bufferDesc.CPUAccessFlags |= D3D11_CPU_ACCESS_WRITE; } @@ -2759,8 +2759,7 @@ Result D3D11Device::createBufferResource(const IBufferResource::Desc& descIn, co SLANG_RETURN_ON_FAIL(m_device->CreateBuffer(&bufferDesc, initData ? &subResourceData : nullptr, buffer->m_buffer.writeRef())); buffer->m_d3dUsage = bufferDesc.Usage; - if ((srcDesc.cpuAccessFlags & AccessFlag::Read) || - ((srcDesc.cpuAccessFlags & AccessFlag::Write) && bufferDesc.Usage != D3D11_USAGE_DYNAMIC)) + if (srcDesc.cpuAccessFlags & MemoryType::CpuRead || bufferDesc.Usage != D3D11_USAGE_DYNAMIC) { D3D11_BUFFER_DESC bufDesc = {}; bufDesc.BindFlags = 0; diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index aab260cd1..90bfb9bc6 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -246,8 +246,7 @@ public: } } - D3D12Resource m_resource; ///< The resource typically in gpu memory - D3D12Resource m_uploadResource; ///< If the resource can be written to, and is in gpu memory (ie not Memory backed), will have upload resource + D3D12Resource m_resource; ///< The resource in gpu memory, allocated on the correct heap relative to the cpu access flag D3D12_RESOURCE_STATES m_defaultState; @@ -623,41 +622,6 @@ public: out.Flags = D3D12_RESOURCE_FLAG_NONE; } - static Result _uploadBufferData( - ID3D12GraphicsCommandList* cmdList, - BufferResourceImpl* buffer, - size_t offset, - size_t size, - void* data) - { - D3D12_RANGE readRange = {}; - readRange.Begin = offset; - readRange.End = offset + size; - - void* uploadData; - SLANG_RETURN_ON_FAIL(buffer->m_uploadResource.getResource()->Map( - 0, &readRange, reinterpret_cast<void**>(&uploadData))); - memcpy((uint8_t*)uploadData + offset, data, size); - buffer->m_uploadResource.getResource()->Unmap(0, &readRange); - { - D3D12BarrierSubmitter submitter(cmdList); - submitter.transition( - buffer->m_resource, buffer->m_defaultState, D3D12_RESOURCE_STATE_COPY_DEST); - } - cmdList->CopyBufferRegion( - buffer->m_resource.getResource(), - offset, - buffer->m_uploadResource.getResource(), - offset, - size); - { - D3D12BarrierSubmitter submitter(cmdList); - submitter.transition( - buffer->m_resource, D3D12_RESOURCE_STATE_COPY_DEST, buffer->m_defaultState); - } - return SLANG_OK; - } - class TransientResourceHeapImpl : public TransientResourceHeapBaseImpl<D3D12Device, BufferResourceImpl> { @@ -721,7 +685,7 @@ public: auto d3dDevice = device->m_device; SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator( D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef()))); - + SLANG_RETURN_ON_FAIL(m_viewHeap.init( d3dDevice, viewHeapSize, @@ -742,7 +706,7 @@ public: bufferDesc.allowedStates = ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); bufferDesc.sizeInBytes = desc.constantBufferSize; - bufferDesc.cpuAccessFlags |= AccessFlag::Write; + bufferDesc.cpuAccessFlags |= MemoryType::CpuWrite; SLANG_RETURN_ON_FAIL(device->createBufferResource( bufferDesc, nullptr, @@ -758,6 +722,58 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override; }; + static Result _uploadBufferData( + ID3D12Device* device, + ID3D12GraphicsCommandList* cmdList, + TransientResourceHeapImpl* transientHeap, + BufferResourceImpl* buffer, + size_t offset, + size_t size, + void* data) + { + D3D12_RANGE readRange = {}; + readRange.Begin = offset; + readRange.End = offset + size; + + + IBufferResource* uploadResource; + if (!buffer->getDesc()->hasCpuAccessFlag(MemoryType::CpuWrite)) + { + transientHeap->allocateStagingBuffer(size, uploadResource, ResourceState::CopySource); + } + + D3D12Resource& uploadResourceRef = (buffer->getDesc()->hasCpuAccessFlag(MemoryType::CpuWrite)) ? buffer->m_resource : static_cast<BufferResourceImpl*>(uploadResource)->m_resource; + + void* uploadData; + SLANG_RETURN_ON_FAIL(uploadResourceRef.getResource()->Map( + 0, &readRange, reinterpret_cast<void**>(&uploadData))); + memcpy((uint8_t*)uploadData + offset, data, size); + uploadResourceRef.getResource()->Unmap(0, &readRange); + + if (!buffer->getDesc()->hasCpuAccessFlag(MemoryType::CpuWrite)) { + { + D3D12BarrierSubmitter submitter(cmdList); + submitter.transition( + buffer->m_resource, buffer->m_defaultState, D3D12_RESOURCE_STATE_COPY_DEST); + } + cmdList->CopyBufferRegion( + buffer->m_resource.getResource(), + offset, + uploadResourceRef.getResource(), + offset, + size); + + // Should already be in COPY_DEST if write flag was set. + { + D3D12BarrierSubmitter submitter(cmdList); + submitter.transition( + buffer->m_resource, D3D12_RESOURCE_STATE_COPY_DEST, buffer->m_defaultState); + } + } + + return SLANG_OK; + } + class CommandBufferImpl; class PipelineCommandEncoder @@ -2308,7 +2324,7 @@ public: SLANG_ASSERT(srcSize <= destSize); - _uploadBufferData(encoder->m_d3dCmdList, buffer, offset, srcSize, src); + _uploadBufferData(encoder->m_device, encoder->m_d3dCmdList, encoder->m_transientHeap, buffer, offset, srcSize, src); // In the case where this object has any sub-objects of // existential/interface type, we need to recurse on those objects @@ -3601,7 +3617,9 @@ public: void* data) override { _uploadBufferData( + m_commandBuffer->m_renderer->m_device, m_commandBuffer->m_cmdList, + m_commandBuffer->m_transientHeap, static_cast<BufferResourceImpl*>(dst), offset, size, @@ -3663,6 +3681,33 @@ public: ResourceState src, ResourceState dst) override { + + List<D3D12_RESOURCE_BARRIER> barriers; + barriers.reserve(count); + + for (size_t i = 0; i < count; i++) + { + auto bufferImpl = static_cast<BufferResourceImpl*>(buffers[i]); + + D3D12_RESOURCE_BARRIER barrier = {}; + // If the src == dst, it must be a UAV barrier. + barrier.Type = (src == dst) ? D3D12_RESOURCE_BARRIER_TYPE_UAV : D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + + if (barrier.Type == D3D12_RESOURCE_BARRIER_TYPE_UAV) { + barrier.UAV.pResource = bufferImpl->m_resource; + } + else { + barrier.Transition.pResource = bufferImpl->m_resource; + barrier.Transition.StateBefore = D3DUtil::translateResourceState(src); + barrier.Transition.StateAfter = D3DUtil::translateResourceState(dst); + barrier.Transition.Subresource = 0; + } + + barriers.add(barrier); + } + + m_commandBuffer->m_cmdList4->ResourceBarrier((UINT)count, barriers.getArrayView().getBuffer()); } virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} virtual SLANG_NO_THROW void SLANG_MCALL writeTimestamp(IQueryPool* pool, SlangInt index) override @@ -4149,8 +4194,8 @@ public: virtual SLANG_NO_THROW void SLANG_MCALL memoryBarrier( int count, IAccelerationStructure* const* structures, - AccessFlag::Enum sourceAccess, - AccessFlag::Enum destAccess) override; + MemoryType::Enum sourceAccess, + MemoryType::Enum destAccess) override; virtual SLANG_NO_THROW void SLANG_MCALL bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override; virtual SLANG_NO_THROW void SLANG_MCALL dispatchRays( @@ -4455,10 +4500,10 @@ public: const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, - D3D12Resource& uploadResource, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, - bool isShared = false); + bool isShared, + MemoryType::Enum access = MemoryType::GpuOnly); Result captureTextureToSurface( TextureResourceImpl* resource, @@ -4748,57 +4793,83 @@ static void _initSrvDesc( } } -Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12Resource& uploadResource, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, bool isShared) +Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, bool isShared, MemoryType::Enum access) { const size_t bufferSize = size_t(resourceDesc.Width); - { - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + D3D12_HEAP_PROPERTIES heapProps; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE; - if (isShared) flags |= D3D12_HEAP_FLAG_SHARED; + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE; + if (isShared) flags |= D3D12_HEAP_FLAG_SHARED; - const D3D12_RESOURCE_STATES initialState = srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState; + D3D12_RESOURCE_DESC desc = resourceDesc; - SLANG_RETURN_ON_FAIL(resourceOut.initCommitted(m_device, heapProps, flags, resourceDesc, initialState, nullptr)); - } + D3D12_RESOURCE_STATES initialState = finalState; - { - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + switch (access) { + case MemoryType::CpuRead: + assert(!srcData); - D3D12_RESOURCE_DESC uploadResourceDesc(resourceDesc); - uploadResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + initialState |= D3D12_RESOURCE_STATE_COPY_DEST; - SLANG_RETURN_ON_FAIL(uploadResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadResourceDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); - } + break; + case MemoryType::CpuWrite: - if (srcData) - { - // Copy data to the intermediate upload heap and then schedule a copy - // from the upload heap to the vertex buffer. - UINT8* dstData; - D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU. + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + initialState |= D3D12_RESOURCE_STATE_GENERIC_READ; - ID3D12Resource* dxUploadResource = uploadResource.getResource(); + break; + case MemoryType::GpuOnly: + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + initialState = (srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState); + break; + default: + return SLANG_FAIL; + } - SLANG_RETURN_ON_FAIL(dxUploadResource->Map(0, &readRange, reinterpret_cast<void**>(&dstData))); - ::memcpy(dstData, srcData, srcDataSize); - dxUploadResource->Unmap(0, nullptr); + // Create the resource. + SLANG_RETURN_ON_FAIL(resourceOut.initCommitted(m_device, heapProps, flags, desc, initialState, nullptr)); - auto encodeInfo = encodeResourceCommands(); - encodeInfo.d3dCommandList->CopyBufferRegion(resourceOut, 0, uploadResource, 0, bufferSize); - submitResourceCommandsAndWait(encodeInfo); - } + if (srcData) + { + D3D12Resource uploadResource; + + if (access == MemoryType::GpuOnly) { + // If the buffer is on the default heap, create upload buffer. + D3D12_RESOURCE_DESC uploadDesc(resourceDesc); + uploadDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + + SLANG_RETURN_ON_FAIL(uploadResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); + } + + // Be careful not to actually copy a resource here. + D3D12Resource& uploadResourceRef = (access == MemoryType::GpuOnly) ? uploadResource : resourceOut; + + // Copy data to the intermediate upload heap and then schedule a copy + // from the upload heap to the vertex buffer. + UINT8* dstData; + D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU. + + ID3D12Resource* dxUploadResource = uploadResourceRef.getResource(); + + SLANG_RETURN_ON_FAIL(dxUploadResource->Map(0, &readRange, reinterpret_cast<void**>(&dstData))); + ::memcpy(dstData, srcData, srcDataSize); + dxUploadResource->Unmap(0, nullptr); + + if (access == MemoryType::GpuOnly) { + auto encodeInfo = encodeResourceCommands(); + encodeInfo.d3dCommandList->CopyBufferRegion(resourceOut, 0, uploadResourceRef, 0, bufferSize); + submitResourceCommandsAndWait(encodeInfo); + } + } return SLANG_OK; } @@ -5711,7 +5782,8 @@ Result D3D12Device::createBufferResource(const IBufferResource::Desc& descIn, co bufferDesc.Flags |= _calcResourceFlags(srcDesc.allowedStates); const D3D12_RESOURCE_STATES initialState = buffer->m_defaultState; - SLANG_RETURN_ON_FAIL(createBuffer(bufferDesc, initData, srcDesc.sizeInBytes, buffer->m_uploadResource, initialState, buffer->m_resource, descIn.isShared)); + SLANG_RETURN_ON_FAIL(createBuffer(bufferDesc, initData, srcDesc.sizeInBytes, initialState, buffer->m_resource, descIn.isShared, + (MemoryType::Enum)descIn.cpuAccessFlags)); returnComPtr(outResource, buffer); return SLANG_OK; @@ -6252,7 +6324,6 @@ Result D3D12Device::readBufferResource( size_t size, ISlangBlob** outBlob) { - auto encodeInfo = encodeResourceCommands(); BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(bufferIn); @@ -6261,26 +6332,34 @@ Result D3D12Device::readBufferResource( // This will be slow!!! - it blocks CPU on GPU completion D3D12Resource& resource = buffer->m_resource; - // Readback heap - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + D3D12Resource stageBuf; + if (buffer->getDesc()->cpuAccessFlags != (int)MemoryType::CpuRead) { - // Resource to readback to - D3D12_RESOURCE_DESC stagingDesc; - _initBufferResourceDesc(size, stagingDesc); + auto encodeInfo = encodeResourceCommands(); - D3D12Resource stageBuf; - SLANG_RETURN_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); - // Do the copy - encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, offset, size); + // Readback heap + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - // Wait until complete - submitResourceCommandsAndWait(encodeInfo); + // Resource to readback to + D3D12_RESOURCE_DESC stagingDesc; + _initBufferResourceDesc(size, stagingDesc); + + SLANG_RETURN_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); + + // Do the copy + encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, offset, size); + + // Wait until complete + submitResourceCommandsAndWait(encodeInfo); + } + + D3D12Resource& stageBufRef = (buffer->getDesc()->cpuAccessFlags != (int)MemoryType::CpuRead) ? stageBuf : resource; // Map and copy RefPtr<ListBlob> blob = new ListBlob(); @@ -6288,13 +6367,13 @@ Result D3D12Device::readBufferResource( UINT8* data; D3D12_RANGE readRange = { 0, size }; - SLANG_RETURN_ON_FAIL(stageBuf.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&data))); + SLANG_RETURN_ON_FAIL(stageBufRef.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&data))); // Copy to memory buffer blob->m_data.setCount(size); ::memcpy(blob->m_data.getBuffer(), data, size); - stageBuf.getResource()->Unmap(0, nullptr); + stageBufRef.getResource()->Unmap(0, nullptr); } returnComPtr(outBlob, blob); return SLANG_OK; @@ -6962,8 +7041,8 @@ void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::deserializeAc void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::memoryBarrier( int count, IAccelerationStructure* const* structures, - AccessFlag::Enum sourceAccess, - AccessFlag::Enum destAccess) + MemoryType::Enum sourceAccess, + MemoryType::Enum destAccess) { ShortList<D3D12_RESOURCE_BARRIER> barriers; barriers.setCount(count); diff --git a/tools/gfx/debug-layer.cpp b/tools/gfx/debug-layer.cpp index be76f8ade..97504017e 100644 --- a/tools/gfx/debug-layer.cpp +++ b/tools/gfx/debug-layer.cpp @@ -1428,8 +1428,8 @@ void DebugRayTracingCommandEncoder::deserializeAccelerationStructure( void DebugRayTracingCommandEncoder::memoryBarrier( int count, IAccelerationStructure* const* structures, - AccessFlag::Enum sourceAccess, - AccessFlag::Enum destAccess) + MemoryType::Enum sourceAccess, + MemoryType::Enum destAccess) { SLANG_GFX_API_FUNC; List<IAccelerationStructure*> innerAS; diff --git a/tools/gfx/debug-layer.h b/tools/gfx/debug-layer.h index 881d90341..35410530e 100644 --- a/tools/gfx/debug-layer.h +++ b/tools/gfx/debug-layer.h @@ -490,8 +490,8 @@ public: virtual SLANG_NO_THROW void SLANG_MCALL memoryBarrier( int count, IAccelerationStructure* const* structures, - AccessFlag::Enum sourceAccess, - AccessFlag::Enum destAccess) override; + MemoryType::Enum sourceAccess, + MemoryType::Enum destAccess) override; virtual SLANG_NO_THROW void SLANG_MCALL bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override; virtual SLANG_NO_THROW void SLANG_MCALL dispatchRays( diff --git a/tools/gfx/open-gl/render-gl.cpp b/tools/gfx/open-gl/render-gl.cpp index 8e59356da..e7573e232 100644 --- a/tools/gfx/open-gl/render-gl.cpp +++ b/tools/gfx/open-gl/render-gl.cpp @@ -1260,7 +1260,7 @@ public: bufferDesc.defaultState = ResourceState::ConstantBuffer; bufferDesc.allowedStates = ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); - bufferDesc.cpuAccessFlags |= AccessFlag::Write; + bufferDesc.cpuAccessFlags |= MemoryType::CpuWrite; SLANG_RETURN_ON_FAIL( device->createBufferResource(bufferDesc, nullptr, bufferResourcePtr.writeRef())); m_ordinaryDataBuffer = static_cast<BufferResourceImpl*>(bufferResourcePtr.get()); diff --git a/tools/gfx/simple-transient-resource-heap.h b/tools/gfx/simple-transient-resource-heap.h index c6980f94c..23d9c3b59 100644 --- a/tools/gfx/simple-transient-resource-heap.h +++ b/tools/gfx/simple-transient-resource-heap.h @@ -24,7 +24,7 @@ public: bufferDesc.allowedStates = ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); bufferDesc.defaultState = ResourceState::ConstantBuffer; bufferDesc.sizeInBytes = desc.constantBufferSize; - bufferDesc.cpuAccessFlags = AccessFlag::Write; + bufferDesc.cpuAccessFlags = MemoryType::CpuWrite; SLANG_RETURN_ON_FAIL( device->createBufferResource(bufferDesc, nullptr, m_constantBuffer.writeRef())); return SLANG_OK; diff --git a/tools/gfx/transient-resource-heap-base.h b/tools/gfx/transient-resource-heap-base.h index c3e312b3c..8f346837a 100644 --- a/tools/gfx/transient-resource-heap-base.h +++ b/tools/gfx/transient-resource-heap-base.h @@ -31,7 +31,7 @@ public: bufferDesc.allowedStates = ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); bufferDesc.sizeInBytes = desc.constantBufferSize; - bufferDesc.cpuAccessFlags = AccessFlag::Write; + bufferDesc.cpuAccessFlags = MemoryType::CpuWrite; SLANG_RETURN_ON_FAIL( m_device->createBufferResource(bufferDesc, nullptr, bufferPtr.writeRef())); m_constantBuffers.add(static_cast<TBufferResource*>(bufferPtr.get())); @@ -56,9 +56,9 @@ public: bufferDesc.allowedStates = ResourceStateSet(ResourceState::CopyDestination, ResourceState::CopySource); if (state == ResourceState::CopySource) - bufferDesc.cpuAccessFlags |= AccessFlag::Write; + bufferDesc.cpuAccessFlags |= MemoryType::CpuWrite; else - bufferDesc.cpuAccessFlags |= AccessFlag::Read; + bufferDesc.cpuAccessFlags |= MemoryType::CpuRead; bufferDesc.sizeInBytes = size; SLANG_RETURN_ON_FAIL( m_device->createBufferResource(bufferDesc, nullptr, bufferPtr.writeRef())); @@ -96,7 +96,7 @@ public: bufferDesc.defaultState = ResourceState::ConstantBuffer; bufferDesc.allowedStates = ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); - bufferDesc.cpuAccessFlags |= AccessFlag::Write; + bufferDesc.cpuAccessFlags |= MemoryType::CpuWrite; size_t lastConstantBufferSize = 0; if (m_constantBuffers.getCount()) { diff --git a/tools/gfx/vulkan/render-vk.cpp b/tools/gfx/vulkan/render-vk.cpp index 232f18f01..8260ab283 100644 --- a/tools/gfx/vulkan/render-vk.cpp +++ b/tools/gfx/vulkan/render-vk.cpp @@ -4856,13 +4856,13 @@ public: public: void init(CommandBufferImpl* commandBuffer) { m_commandBuffer = commandBuffer; } - inline VkAccessFlags translateAccelerationStructureAccessFlag(AccessFlag::Enum access) + inline VkAccessFlags translateAccelerationStructureAccessFlag(MemoryType::Enum access) { VkAccessFlags result = 0; - if (access & AccessFlag::Read) + if (access & MemoryType::CpuRead) result |= VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT; - if (access & AccessFlag::Write) + if (access & MemoryType::CpuWrite) result |= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR; return result; } @@ -4870,8 +4870,8 @@ public: inline void _memoryBarrier( int count, IAccelerationStructure* const* structures, - AccessFlag::Enum srcAccess, - AccessFlag::Enum destAccess) + MemoryType::Enum srcAccess, + MemoryType::Enum destAccess) { ShortList<VkBufferMemoryBarrier> memBarriers; memBarriers.setCount(count); @@ -4998,7 +4998,7 @@ public: if (propertyQueryCount) { - _memoryBarrier(1, &desc.dest, AccessFlag::Write, AccessFlag::Read); + _memoryBarrier(1, &desc.dest, MemoryType::CpuWrite, MemoryType::CpuRead); _queryAccelerationStructureProperties( 1, &desc.dest, propertyQueryCount, queryDescs); } @@ -5071,8 +5071,8 @@ public: virtual SLANG_NO_THROW void SLANG_MCALL memoryBarrier( int count, IAccelerationStructure* const* structures, - AccessFlag::Enum srcAccess, - AccessFlag::Enum destAccess) override + MemoryType::Enum srcAccess, + MemoryType::Enum destAccess) override { _memoryBarrier(count, structures, srcAccess, destAccess); } @@ -6988,7 +6988,7 @@ static VkImageUsageFlags _calcImageUsageFlags( { VkImageUsageFlags usage = _calcImageUsageFlags(states); - if ((cpuAccessFlags & AccessFlag::Write) || initData) + if ((cpuAccessFlags & MemoryType::CpuWrite) || initData) { usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; } @@ -7519,7 +7519,7 @@ Result VKDevice::createBufferResource(const IBufferResource::Desc& descIn, const SLANG_RETURN_ON_FAIL(buffer->m_buffer.init(m_api, desc.sizeInBytes, usage, reqMemoryProperties)); } - if ((desc.cpuAccessFlags & AccessFlag::Write) || initData) + if ((desc.cpuAccessFlags & MemoryType::CpuWrite) || initData) { SLANG_RETURN_ON_FAIL(buffer->m_uploadBuffer.init(m_api, bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)); } diff --git a/tools/platform/gui.cpp b/tools/platform/gui.cpp index 53d0a43ba..31671c424 100644 --- a/tools/platform/gui.cpp +++ b/tools/platform/gui.cpp @@ -216,7 +216,7 @@ void GUI::endFrame(ITransientResourceHeap* transientHeap, IFramebuffer* framebuf vertexBufferDesc.allowedStates = ResourceStateSet(ResourceState::VertexBuffer, ResourceState::CopyDestination); vertexBufferDesc.sizeInBytes = vertexCount * sizeof(ImDrawVert); - vertexBufferDesc.cpuAccessFlags = AccessFlag::Write; + vertexBufferDesc.cpuAccessFlags = MemoryType::CpuWrite; auto vertexBuffer = device->createBufferResource(vertexBufferDesc); gfx::IBufferResource::Desc indexBufferDesc; @@ -225,7 +225,7 @@ void GUI::endFrame(ITransientResourceHeap* transientHeap, IFramebuffer* framebuf indexBufferDesc.allowedStates = ResourceStateSet(ResourceState::IndexBuffer, ResourceState::CopyDestination); indexBufferDesc.defaultState = ResourceState::IndexBuffer; - indexBufferDesc.cpuAccessFlags = AccessFlag::Write; + indexBufferDesc.cpuAccessFlags = MemoryType::CpuWrite; auto indexBuffer = device->createBufferResource(indexBufferDesc); auto cmdBuf = transientHeap->createCommandBuffer(); auto encoder = cmdBuf->encodeResourceCommands(); @@ -253,7 +253,7 @@ void GUI::endFrame(ITransientResourceHeap* transientHeap, IFramebuffer* framebuf ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); constantBufferDesc.defaultState = ResourceState::ConstantBuffer; constantBufferDesc.sizeInBytes = sizeof(glm::mat4x4); - constantBufferDesc.cpuAccessFlags = AccessFlag::Write; + constantBufferDesc.cpuAccessFlags = MemoryType::CpuWrite; auto constantBuffer = device->createBufferResource(constantBufferDesc); { diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp index 16a725f2b..4893ad890 100644 --- a/tools/render-test/render-test-main.cpp +++ b/tools/render-test/render-test-main.cpp @@ -558,7 +558,7 @@ SlangResult RenderTestApp::initialize( IBufferResource::Desc vertexBufferDesc; vertexBufferDesc.type = IResource::Type::Buffer; vertexBufferDesc.sizeInBytes = kVertexCount * sizeof(Vertex); - vertexBufferDesc.cpuAccessFlags = AccessFlag::Write; + vertexBufferDesc.cpuAccessFlags = MemoryType::CpuWrite; vertexBufferDesc.defaultState = ResourceState::VertexBuffer; vertexBufferDesc.allowedStates = ResourceStateSet(ResourceState::VertexBuffer); @@ -909,7 +909,7 @@ Result RenderTestApp::writeBindingOutput(const String& fileName) const size_t bufferSize = bufferDesc.sizeInBytes; ComPtr<ISlangBlob> blob; - if(bufferDesc.cpuAccessFlags & AccessFlag::Read) + if(bufferDesc.cpuAccessFlags & MemoryType::CpuRead) { // The buffer is already allocated for CPU access, so we can read it back directly. // @@ -920,7 +920,7 @@ Result RenderTestApp::writeBindingOutput(const String& fileName) // The buffer is not CPU-readable, so we will copy it using a staging buffer. auto stagingBufferDesc = bufferDesc; - stagingBufferDesc.cpuAccessFlags = AccessFlag::Read; + stagingBufferDesc.cpuAccessFlags = MemoryType::CpuRead; stagingBufferDesc.allowedStates = ResourceStateSet(ResourceState::CopyDestination, ResourceState::CopySource); stagingBufferDesc.defaultState = ResourceState::CopyDestination; |
