summaryrefslogtreecommitdiff
path: root/tools/gfx/d3d12/render-d3d12.cpp
diff options
context:
space:
mode:
authorZanderMajercik <amajercik@nvidia.com>2022-01-04 11:05:04 -0800
committerGitHub <noreply@github.com>2022-01-04 11:05:04 -0800
commit9d6c7763334908c78027199a0cb1ca3b9841ebab (patch)
treedc75824b08ca23554a0c8e1f98458c103605ebf9 /tools/gfx/d3d12/render-d3d12.cpp
parent1a1b2a0de67dccc1102449b8620830131d569cde (diff)
Buffer allocation backend. (#2045)
* removed initialization of upload resource for CPU visible buffers (D3D12, Vulkan) * reverted slang-gfx.h change * declared DescBase::hasCpuAccessFlag() const to make backend changes compile under gcc * commit before checking master branch * commit before merge * commit before checking master * commit before merging upstream * revert vulkan changes * commit before testing on master * commit before merge with master * reverted bad merge changes * reverted more bad merge changes in render-d3d12.cpp * reverted buffer transition in _uploadBufferData * implemented bufferBarrier() in render-d3d12.cpp * reverted uneccesary transition in createBuffer * create staging buffer even when AccessFlag::None passed to D3D11 createBufferResource * renamed AccessFlags to MemoryType Co-authored-by: Yong He <yonghe@outlook.com>
Diffstat (limited to 'tools/gfx/d3d12/render-d3d12.cpp')
-rw-r--r--tools/gfx/d3d12/render-d3d12.cpp287
1 files changed, 183 insertions, 104 deletions
diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp
index aab260cd1..90bfb9bc6 100644
--- a/tools/gfx/d3d12/render-d3d12.cpp
+++ b/tools/gfx/d3d12/render-d3d12.cpp
@@ -246,8 +246,7 @@ public:
}
}
- D3D12Resource m_resource; ///< The resource typically in gpu memory
- D3D12Resource m_uploadResource; ///< If the resource can be written to, and is in gpu memory (ie not Memory backed), will have upload resource
+ D3D12Resource m_resource; ///< The resource in gpu memory, allocated on the correct heap relative to the cpu access flag
D3D12_RESOURCE_STATES m_defaultState;
@@ -623,41 +622,6 @@ public:
out.Flags = D3D12_RESOURCE_FLAG_NONE;
}
- static Result _uploadBufferData(
- ID3D12GraphicsCommandList* cmdList,
- BufferResourceImpl* buffer,
- size_t offset,
- size_t size,
- void* data)
- {
- D3D12_RANGE readRange = {};
- readRange.Begin = offset;
- readRange.End = offset + size;
-
- void* uploadData;
- SLANG_RETURN_ON_FAIL(buffer->m_uploadResource.getResource()->Map(
- 0, &readRange, reinterpret_cast<void**>(&uploadData)));
- memcpy((uint8_t*)uploadData + offset, data, size);
- buffer->m_uploadResource.getResource()->Unmap(0, &readRange);
- {
- D3D12BarrierSubmitter submitter(cmdList);
- submitter.transition(
- buffer->m_resource, buffer->m_defaultState, D3D12_RESOURCE_STATE_COPY_DEST);
- }
- cmdList->CopyBufferRegion(
- buffer->m_resource.getResource(),
- offset,
- buffer->m_uploadResource.getResource(),
- offset,
- size);
- {
- D3D12BarrierSubmitter submitter(cmdList);
- submitter.transition(
- buffer->m_resource, D3D12_RESOURCE_STATE_COPY_DEST, buffer->m_defaultState);
- }
- return SLANG_OK;
- }
-
class TransientResourceHeapImpl
: public TransientResourceHeapBaseImpl<D3D12Device, BufferResourceImpl>
{
@@ -721,7 +685,7 @@ public:
auto d3dDevice = device->m_device;
SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator(
D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef())));
-
+
SLANG_RETURN_ON_FAIL(m_viewHeap.init(
d3dDevice,
viewHeapSize,
@@ -742,7 +706,7 @@ public:
bufferDesc.allowedStates =
ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination);
bufferDesc.sizeInBytes = desc.constantBufferSize;
- bufferDesc.cpuAccessFlags |= AccessFlag::Write;
+ bufferDesc.cpuAccessFlags |= MemoryType::CpuWrite;
SLANG_RETURN_ON_FAIL(device->createBufferResource(
bufferDesc,
nullptr,
@@ -758,6 +722,58 @@ public:
virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override;
};
+ static Result _uploadBufferData(
+ ID3D12Device* device,
+ ID3D12GraphicsCommandList* cmdList,
+ TransientResourceHeapImpl* transientHeap,
+ BufferResourceImpl* buffer,
+ size_t offset,
+ size_t size,
+ void* data)
+ {
+ D3D12_RANGE readRange = {};
+ readRange.Begin = offset;
+ readRange.End = offset + size;
+
+
+ IBufferResource* uploadResource;
+ if (!buffer->getDesc()->hasCpuAccessFlag(MemoryType::CpuWrite))
+ {
+ transientHeap->allocateStagingBuffer(size, uploadResource, ResourceState::CopySource);
+ }
+
+ D3D12Resource& uploadResourceRef = (buffer->getDesc()->hasCpuAccessFlag(MemoryType::CpuWrite)) ? buffer->m_resource : static_cast<BufferResourceImpl*>(uploadResource)->m_resource;
+
+ void* uploadData;
+ SLANG_RETURN_ON_FAIL(uploadResourceRef.getResource()->Map(
+ 0, &readRange, reinterpret_cast<void**>(&uploadData)));
+ memcpy((uint8_t*)uploadData + offset, data, size);
+ uploadResourceRef.getResource()->Unmap(0, &readRange);
+
+ if (!buffer->getDesc()->hasCpuAccessFlag(MemoryType::CpuWrite)) {
+ {
+ D3D12BarrierSubmitter submitter(cmdList);
+ submitter.transition(
+ buffer->m_resource, buffer->m_defaultState, D3D12_RESOURCE_STATE_COPY_DEST);
+ }
+ cmdList->CopyBufferRegion(
+ buffer->m_resource.getResource(),
+ offset,
+ uploadResourceRef.getResource(),
+ offset,
+ size);
+
+ // Should already be in COPY_DEST if write flag was set.
+ {
+ D3D12BarrierSubmitter submitter(cmdList);
+ submitter.transition(
+ buffer->m_resource, D3D12_RESOURCE_STATE_COPY_DEST, buffer->m_defaultState);
+ }
+ }
+
+ return SLANG_OK;
+ }
+
class CommandBufferImpl;
class PipelineCommandEncoder
@@ -2308,7 +2324,7 @@ public:
SLANG_ASSERT(srcSize <= destSize);
- _uploadBufferData(encoder->m_d3dCmdList, buffer, offset, srcSize, src);
+ _uploadBufferData(encoder->m_device, encoder->m_d3dCmdList, encoder->m_transientHeap, buffer, offset, srcSize, src);
// In the case where this object has any sub-objects of
// existential/interface type, we need to recurse on those objects
@@ -3601,7 +3617,9 @@ public:
void* data) override
{
_uploadBufferData(
+ m_commandBuffer->m_renderer->m_device,
m_commandBuffer->m_cmdList,
+ m_commandBuffer->m_transientHeap,
static_cast<BufferResourceImpl*>(dst),
offset,
size,
@@ -3663,6 +3681,33 @@ public:
ResourceState src,
ResourceState dst) override
{
+
+ List<D3D12_RESOURCE_BARRIER> barriers;
+ barriers.reserve(count);
+
+ for (size_t i = 0; i < count; i++)
+ {
+ auto bufferImpl = static_cast<BufferResourceImpl*>(buffers[i]);
+
+ D3D12_RESOURCE_BARRIER barrier = {};
+ // If the src == dst, it must be a UAV barrier.
+ barrier.Type = (src == dst) ? D3D12_RESOURCE_BARRIER_TYPE_UAV : D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+ barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+
+ if (barrier.Type == D3D12_RESOURCE_BARRIER_TYPE_UAV) {
+ barrier.UAV.pResource = bufferImpl->m_resource;
+ }
+ else {
+ barrier.Transition.pResource = bufferImpl->m_resource;
+ barrier.Transition.StateBefore = D3DUtil::translateResourceState(src);
+ barrier.Transition.StateAfter = D3DUtil::translateResourceState(dst);
+ barrier.Transition.Subresource = 0;
+ }
+
+ barriers.add(barrier);
+ }
+
+ m_commandBuffer->m_cmdList4->ResourceBarrier((UINT)count, barriers.getArrayView().getBuffer());
}
virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {}
virtual SLANG_NO_THROW void SLANG_MCALL writeTimestamp(IQueryPool* pool, SlangInt index) override
@@ -4149,8 +4194,8 @@ public:
virtual SLANG_NO_THROW void SLANG_MCALL memoryBarrier(
int count,
IAccelerationStructure* const* structures,
- AccessFlag::Enum sourceAccess,
- AccessFlag::Enum destAccess) override;
+ MemoryType::Enum sourceAccess,
+ MemoryType::Enum destAccess) override;
virtual SLANG_NO_THROW void SLANG_MCALL
bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override;
virtual SLANG_NO_THROW void SLANG_MCALL dispatchRays(
@@ -4455,10 +4500,10 @@ public:
const D3D12_RESOURCE_DESC& resourceDesc,
const void* srcData,
size_t srcDataSize,
- D3D12Resource& uploadResource,
D3D12_RESOURCE_STATES finalState,
D3D12Resource& resourceOut,
- bool isShared = false);
+ bool isShared,
+ MemoryType::Enum access = MemoryType::GpuOnly);
Result captureTextureToSurface(
TextureResourceImpl* resource,
@@ -4748,57 +4793,83 @@ static void _initSrvDesc(
}
}
-Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12Resource& uploadResource, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, bool isShared)
+Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, bool isShared, MemoryType::Enum access)
{
const size_t bufferSize = size_t(resourceDesc.Width);
- {
- D3D12_HEAP_PROPERTIES heapProps;
- heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
- heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
- heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
- heapProps.CreationNodeMask = 1;
- heapProps.VisibleNodeMask = 1;
+ D3D12_HEAP_PROPERTIES heapProps;
+ heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
+ heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
+ heapProps.CreationNodeMask = 1;
+ heapProps.VisibleNodeMask = 1;
- D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE;
- if (isShared) flags |= D3D12_HEAP_FLAG_SHARED;
+ D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE;
+ if (isShared) flags |= D3D12_HEAP_FLAG_SHARED;
- const D3D12_RESOURCE_STATES initialState = srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState;
+ D3D12_RESOURCE_DESC desc = resourceDesc;
- SLANG_RETURN_ON_FAIL(resourceOut.initCommitted(m_device, heapProps, flags, resourceDesc, initialState, nullptr));
- }
+ D3D12_RESOURCE_STATES initialState = finalState;
- {
- D3D12_HEAP_PROPERTIES heapProps;
- heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
- heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
- heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
- heapProps.CreationNodeMask = 1;
- heapProps.VisibleNodeMask = 1;
+ switch (access) {
+ case MemoryType::CpuRead:
+ assert(!srcData);
- D3D12_RESOURCE_DESC uploadResourceDesc(resourceDesc);
- uploadResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
+ heapProps.Type = D3D12_HEAP_TYPE_READBACK;
+ desc.Flags = D3D12_RESOURCE_FLAG_NONE;
+ initialState |= D3D12_RESOURCE_STATE_COPY_DEST;
- SLANG_RETURN_ON_FAIL(uploadResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadResourceDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr));
- }
+ break;
+ case MemoryType::CpuWrite:
- if (srcData)
- {
- // Copy data to the intermediate upload heap and then schedule a copy
- // from the upload heap to the vertex buffer.
- UINT8* dstData;
- D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU.
+ heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
+ desc.Flags = D3D12_RESOURCE_FLAG_NONE;
+ initialState |= D3D12_RESOURCE_STATE_GENERIC_READ;
- ID3D12Resource* dxUploadResource = uploadResource.getResource();
+ break;
+ case MemoryType::GpuOnly:
+ heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
+ initialState = (srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState);
+ break;
+ default:
+ return SLANG_FAIL;
+ }
- SLANG_RETURN_ON_FAIL(dxUploadResource->Map(0, &readRange, reinterpret_cast<void**>(&dstData)));
- ::memcpy(dstData, srcData, srcDataSize);
- dxUploadResource->Unmap(0, nullptr);
+ // Create the resource.
+ SLANG_RETURN_ON_FAIL(resourceOut.initCommitted(m_device, heapProps, flags, desc, initialState, nullptr));
- auto encodeInfo = encodeResourceCommands();
- encodeInfo.d3dCommandList->CopyBufferRegion(resourceOut, 0, uploadResource, 0, bufferSize);
- submitResourceCommandsAndWait(encodeInfo);
- }
+ if (srcData)
+ {
+ D3D12Resource uploadResource;
+
+ if (access == MemoryType::GpuOnly) {
+ // If the buffer is on the default heap, create upload buffer.
+ D3D12_RESOURCE_DESC uploadDesc(resourceDesc);
+ uploadDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
+ heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
+
+ SLANG_RETURN_ON_FAIL(uploadResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr));
+ }
+
+ // Be careful not to actually copy a resource here.
+ D3D12Resource& uploadResourceRef = (access == MemoryType::GpuOnly) ? uploadResource : resourceOut;
+
+ // Copy data to the intermediate upload heap and then schedule a copy
+ // from the upload heap to the vertex buffer.
+ UINT8* dstData;
+ D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU.
+
+ ID3D12Resource* dxUploadResource = uploadResourceRef.getResource();
+
+ SLANG_RETURN_ON_FAIL(dxUploadResource->Map(0, &readRange, reinterpret_cast<void**>(&dstData)));
+ ::memcpy(dstData, srcData, srcDataSize);
+ dxUploadResource->Unmap(0, nullptr);
+
+ if (access == MemoryType::GpuOnly) {
+ auto encodeInfo = encodeResourceCommands();
+ encodeInfo.d3dCommandList->CopyBufferRegion(resourceOut, 0, uploadResourceRef, 0, bufferSize);
+ submitResourceCommandsAndWait(encodeInfo);
+ }
+ }
return SLANG_OK;
}
@@ -5711,7 +5782,8 @@ Result D3D12Device::createBufferResource(const IBufferResource::Desc& descIn, co
bufferDesc.Flags |= _calcResourceFlags(srcDesc.allowedStates);
const D3D12_RESOURCE_STATES initialState = buffer->m_defaultState;
- SLANG_RETURN_ON_FAIL(createBuffer(bufferDesc, initData, srcDesc.sizeInBytes, buffer->m_uploadResource, initialState, buffer->m_resource, descIn.isShared));
+ SLANG_RETURN_ON_FAIL(createBuffer(bufferDesc, initData, srcDesc.sizeInBytes, initialState, buffer->m_resource, descIn.isShared,
+ (MemoryType::Enum)descIn.cpuAccessFlags));
returnComPtr(outResource, buffer);
return SLANG_OK;
@@ -6252,7 +6324,6 @@ Result D3D12Device::readBufferResource(
size_t size,
ISlangBlob** outBlob)
{
- auto encodeInfo = encodeResourceCommands();
BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(bufferIn);
@@ -6261,26 +6332,34 @@ Result D3D12Device::readBufferResource(
// This will be slow!!! - it blocks CPU on GPU completion
D3D12Resource& resource = buffer->m_resource;
- // Readback heap
- D3D12_HEAP_PROPERTIES heapProps;
- heapProps.Type = D3D12_HEAP_TYPE_READBACK;
- heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
- heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
- heapProps.CreationNodeMask = 1;
- heapProps.VisibleNodeMask = 1;
+ D3D12Resource stageBuf;
+ if (buffer->getDesc()->cpuAccessFlags != (int)MemoryType::CpuRead) {
- // Resource to readback to
- D3D12_RESOURCE_DESC stagingDesc;
- _initBufferResourceDesc(size, stagingDesc);
+ auto encodeInfo = encodeResourceCommands();
- D3D12Resource stageBuf;
- SLANG_RETURN_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr));
- // Do the copy
- encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, offset, size);
+ // Readback heap
+ D3D12_HEAP_PROPERTIES heapProps;
+ heapProps.Type = D3D12_HEAP_TYPE_READBACK;
+ heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
+ heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
+ heapProps.CreationNodeMask = 1;
+ heapProps.VisibleNodeMask = 1;
- // Wait until complete
- submitResourceCommandsAndWait(encodeInfo);
+ // Resource to readback to
+ D3D12_RESOURCE_DESC stagingDesc;
+ _initBufferResourceDesc(size, stagingDesc);
+
+ SLANG_RETURN_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr));
+
+ // Do the copy
+ encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, offset, size);
+
+ // Wait until complete
+ submitResourceCommandsAndWait(encodeInfo);
+ }
+
+ D3D12Resource& stageBufRef = (buffer->getDesc()->cpuAccessFlags != (int)MemoryType::CpuRead) ? stageBuf : resource;
// Map and copy
RefPtr<ListBlob> blob = new ListBlob();
@@ -6288,13 +6367,13 @@ Result D3D12Device::readBufferResource(
UINT8* data;
D3D12_RANGE readRange = { 0, size };
- SLANG_RETURN_ON_FAIL(stageBuf.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&data)));
+ SLANG_RETURN_ON_FAIL(stageBufRef.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&data)));
// Copy to memory buffer
blob->m_data.setCount(size);
::memcpy(blob->m_data.getBuffer(), data, size);
- stageBuf.getResource()->Unmap(0, nullptr);
+ stageBufRef.getResource()->Unmap(0, nullptr);
}
returnComPtr(outBlob, blob);
return SLANG_OK;
@@ -6962,8 +7041,8 @@ void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::deserializeAc
void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::memoryBarrier(
int count,
IAccelerationStructure* const* structures,
- AccessFlag::Enum sourceAccess,
- AccessFlag::Enum destAccess)
+ MemoryType::Enum sourceAccess,
+ MemoryType::Enum destAccess)
{
ShortList<D3D12_RESOURCE_BARRIER> barriers;
barriers.setCount(count);