summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2022-02-23 10:30:19 -0800
committerGitHub <noreply@github.com>2022-02-23 10:30:19 -0800
commit393d5beb1e0e71e6f2a384c9ab19b717f389a056 (patch)
tree1341fcdf592127f5e78054c73e8cb032381a0b56
parentc4790309ec46ae2f4f7c49eb50699a950ee7a9a4 (diff)
gfx: d3d12 performance optimizations. (#2140)
* gfx: d3d12 performance optimizations. * Fix. * Fix unit test bug. * Add gfx interface for directly allocating GPU descriptor tables. Co-authored-by: Yong He <yhe@nvidia.com>
-rw-r--r--slang-gfx.h19
-rw-r--r--tools/gfx-unit-test/nested-parameter-block.cpp4
-rw-r--r--tools/gfx/cuda/render-cuda.cpp6
-rw-r--r--tools/gfx/d3d/d3d-swapchain.h35
-rw-r--r--tools/gfx/d3d12/render-d3d12.cpp170
-rw-r--r--tools/gfx/debug-layer.cpp6
-rw-r--r--tools/gfx/debug-layer.h1
-rw-r--r--tools/gfx/immediate-renderer-base.cpp2
-rw-r--r--tools/gfx/renderer-shared.cpp1
-rw-r--r--tools/gfx/renderer-shared.h1
-rw-r--r--tools/gfx/transient-resource-heap-base.h242
-rw-r--r--tools/gfx/vulkan/render-vk.cpp63
12 files changed, 304 insertions, 246 deletions
diff --git a/slang-gfx.h b/slang-gfx.h
index 25785cb40..9971b81fe 100644
--- a/slang-gfx.h
+++ b/slang-gfx.h
@@ -1821,8 +1821,6 @@ public:
virtual SLANG_NO_THROW void SLANG_MCALL close() = 0;
virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) = 0;
-
- virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() = 0;
};
#define SLANG_UUID_ICommandBuffer \
{ \
@@ -1912,6 +1910,23 @@ public:
0xcd48bd29, 0xee72, 0x41b8, { 0xbc, 0xff, 0xa, 0x2b, 0x3a, 0xaa, 0x6d, 0xeb } \
}
+class ID3D12TransientResourceHeap : public ISlangUnknown
+{
+public:
+ enum class DescriptorType
+ {
+ ResourceView, Sampler
+ };
+ virtual SLANG_NO_THROW Result SLANG_MCALL allocateTransientDescriptorTable(
+ DescriptorType type,
+ uint32_t count,
+ uint64_t& outDescriptorOffset,
+ void** outD3DDescriptorHeapHandle) = 0;
+};
+#define SLANG_UUID_ID3D12TransientResourceHeap \
+ { \
+ 0x9bc6a8bc, 0x5f7a, 0x454a, { 0x93, 0xef, 0x3b, 0x10, 0x5b, 0xb7, 0x63, 0x7e } \
+ }
class ISwapchain : public ISlangUnknown
{
diff --git a/tools/gfx-unit-test/nested-parameter-block.cpp b/tools/gfx-unit-test/nested-parameter-block.cpp
index 774a94c5f..907b4c868 100644
--- a/tools/gfx-unit-test/nested-parameter-block.cpp
+++ b/tools/gfx-unit-test/nested-parameter-block.cpp
@@ -68,7 +68,7 @@ namespace gfx_test
srvDesc.type = IResourceView::Type::ShaderResource;
srvDesc.format = Format::Unknown;
srvDesc.bufferElementSize = sizeof(uint32_t) * 4;
- srvDesc.bufferRange.elementCount = 4;
+ srvDesc.bufferRange.elementCount = 1;
srvDesc.bufferRange.firstElement = 0;
srvs.add(device->createBufferView(srvBuffers[i], nullptr, srvDesc));
}
@@ -78,7 +78,7 @@ namespace gfx_test
resultBufferViewDesc.type = IResourceView::Type::UnorderedAccess;
resultBufferViewDesc.format = Format::Unknown;
resultBufferViewDesc.bufferElementSize = sizeof(uint32_t) * 4;
- resultBufferViewDesc.bufferRange.elementCount = 4;
+ resultBufferViewDesc.bufferRange.elementCount = 1;
resultBufferViewDesc.bufferRange.firstElement = 0;
Slang::ComPtr<IResourceView> resultBufferView;
SLANG_CHECK(SLANG_SUCCEEDED(device->createBufferView(
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp
index db5661f20..6d1f7f354 100644
--- a/tools/gfx/cuda/render-cuda.cpp
+++ b/tools/gfx/cuda/render-cuda.cpp
@@ -945,12 +945,6 @@ public:
return static_cast<ICommandBuffer*>(this);
return nullptr;
}
-
- virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() override
- {
- return SLANG_OK;
- }
-
public:
CUDADevice* m_device;
TransientResourceHeapBase* m_transientHeap;
diff --git a/tools/gfx/d3d/d3d-swapchain.h b/tools/gfx/d3d/d3d-swapchain.h
index 1c29b2039..36a35f754 100644
--- a/tools/gfx/d3d/d3d-swapchain.h
+++ b/tools/gfx/d3d/d3d-swapchain.h
@@ -87,20 +87,6 @@ public:
SLANG_RETURN_ON_FAIL(swapChain1->QueryInterface(m_swapChain.writeRef()));
}
- if (!desc.enableVSync)
- {
- m_swapChainWaitableObject = m_swapChain->GetFrameLatencyWaitableObject();
-
- int maxLatency = desc.imageCount - 2;
-
- // Make sure the maximum latency is in the range required by dx runtime
- maxLatency = (maxLatency < 1) ? 1 : maxLatency;
- maxLatency = (maxLatency > DXGI_MAX_SWAP_CHAIN_BUFFERS) ? DXGI_MAX_SWAP_CHAIN_BUFFERS
- : maxLatency;
-
- m_swapChain->SetMaximumFrameLatency(maxLatency);
- }
-
createSwapchainBufferImages();
return SLANG_OK;
}
@@ -113,25 +99,9 @@ public:
}
virtual SLANG_NO_THROW Result SLANG_MCALL present() override
{
- if (m_swapChainWaitableObject)
+ if (SLANG_FAILED(m_swapChain->Present(m_desc.enableVSync ? 1 : 0, 0)))
{
- // check if now is good time to present
- // This doesn't wait - because the wait time is 0. If it returns WAIT_TIMEOUT it
- // means that no frame is waiting to be be displayed so there is no point doing a
- // present.
- const bool shouldPresent =
- (WaitForSingleObjectEx(m_swapChainWaitableObject, 0, TRUE) != WAIT_TIMEOUT);
- if (shouldPresent)
- {
- m_swapChain->Present(0, 0);
- }
- }
- else
- {
- if (SLANG_FAILED(m_swapChain->Present(1, 0)))
- {
- return SLANG_FAIL;
- }
+ return SLANG_FAIL;
}
return SLANG_OK;
}
@@ -171,7 +141,6 @@ public:
virtual IDXGIFactory* getDXGIFactory() = 0;
virtual IUnknown* getOwningDevice() = 0;
ISwapchain::Desc m_desc;
- HANDLE m_swapChainWaitableObject = nullptr;
ComPtr<IDXGISwapChain2> m_swapChain;
Slang::ShortList<Slang::RefPtr<TextureResource>> m_images;
};
diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp
index aa008ea81..826475f43 100644
--- a/tools/gfx/d3d12/render-d3d12.cpp
+++ b/tools/gfx/d3d12/render-d3d12.cpp
@@ -556,6 +556,7 @@ public:
m_fence->SetEventOnCompletion(m_eventValue, m_waitEvent);
m_commandQueue->Signal(m_fence, m_eventValue);
WaitForSingleObject(m_waitEvent, INFINITE);
+ m_commandAllocator->Reset();
int8_t* mappedData = nullptr;
D3D12_RANGE readRange = { sizeof(uint64_t) * queryIndex, sizeof(uint64_t) * (queryIndex + count) };
@@ -787,6 +788,7 @@ public:
class TransientResourceHeapImpl
: public TransientResourceHeapBaseImpl<D3D12Device, BufferResourceImpl>
+ , public ID3D12TransientResourceHeap
{
private:
typedef TransientResourceHeapBaseImpl<D3D12Device, BufferResourceImpl> Super;
@@ -843,6 +845,39 @@ public:
D3D12LinearExpandingDescriptorHeap m_stagingCpuViewHeap;
D3D12LinearExpandingDescriptorHeap m_stagingCpuSamplerHeap;
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+ queryInterface(SlangUUID const& uuid, void** outObject) override
+ {
+ if (uuid == GfxGUID::IID_ID3D12TransientResourceHeap)
+ {
+ *outObject = static_cast<ID3D12TransientResourceHeap*>(this);
+ addRef();
+ return SLANG_OK;
+ }
+ return Super::queryInterface(uuid, outObject);
+ }
+
+ virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return Super::addRef(); }
+ virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return Super::release(); }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL allocateTransientDescriptorTable(
+ DescriptorType type,
+ uint32_t count,
+ uint64_t& outDescriptorOffset,
+ void** outD3DDescriptorHeapHandle) override
+ {
+ auto& heap = (type == DescriptorType::ResourceView) ? getCurrentViewHeap()
+ : getCurrentSamplerHeap();
+ int allocResult = heap.allocate((int)count);
+ if (allocResult == -1)
+ {
+ return SLANG_E_OUT_OF_MEMORY;
+ }
+ outDescriptorOffset = (uint64_t)allocResult;
+ *outD3DDescriptorHeapHandle = heap.getHeap();
+ return SLANG_OK;
+ }
+
~TransientResourceHeapImpl()
{
synchronizeAndReset();
@@ -881,22 +916,6 @@ public:
allocateNewViewDescriptorHeap(device);
allocateNewSamplerDescriptorHeap(device);
- if (desc.constantBufferSize != 0)
- {
- ComPtr<IBufferResource> bufferResourcePtr;
- IBufferResource::Desc bufferDesc;
- bufferDesc.type = IResource::Type::Buffer;
- bufferDesc.defaultState = ResourceState::ConstantBuffer;
- bufferDesc.allowedStates =
- ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination);
- bufferDesc.sizeInBytes = desc.constantBufferSize;
- bufferDesc.memoryType = MemoryType::Upload;
- SLANG_RETURN_ON_FAIL(device->createBufferResource(
- bufferDesc,
- nullptr,
- bufferResourcePtr.writeRef()));
- m_constantBuffers.add(static_cast<BufferResourceImpl*>(bufferResourcePtr.get()));
- }
return SLANG_OK;
}
@@ -957,15 +976,12 @@ public:
size_t size,
void* data)
{
- D3D12_RANGE readRange = {};
- readRange.Begin = offset;
- readRange.End = offset + size;
-
-
IBufferResource* uploadResource;
+ size_t uploadResourceOffset = 0;
if (buffer->getDesc()->memoryType != MemoryType::Upload)
{
- transientHeap->allocateStagingBuffer(size, uploadResource, ResourceState::General);
+ SLANG_RETURN_ON_FAIL(transientHeap->allocateStagingBuffer(
+ size, uploadResource, uploadResourceOffset, MemoryType::Upload));
}
D3D12Resource& uploadResourceRef =
@@ -973,32 +989,26 @@ public:
? buffer->m_resource
: static_cast<BufferResourceImpl*>(uploadResource)->m_resource;
+ D3D12_RANGE readRange = {};
+ readRange.Begin = 0;
+ readRange.End = 0;
void* uploadData;
SLANG_RETURN_ON_FAIL(uploadResourceRef.getResource()->Map(
0, &readRange, reinterpret_cast<void**>(&uploadData)));
- memcpy((uint8_t*)uploadData + offset, data, size);
- uploadResourceRef.getResource()->Unmap(0, &readRange);
+ memcpy((uint8_t*)uploadData + uploadResourceOffset + offset, data, size);
+ D3D12_RANGE writtenRange = {};
+ writtenRange.Begin = uploadResourceOffset + offset;
+ writtenRange.End = uploadResourceOffset + offset + size;
+ uploadResourceRef.getResource()->Unmap(0, &writtenRange);
if (buffer->getDesc()->memoryType != MemoryType::Upload)
{
- {
- D3D12BarrierSubmitter submitter(cmdList);
- submitter.transition(
- buffer->m_resource, buffer->m_defaultState, D3D12_RESOURCE_STATE_COPY_DEST);
- }
cmdList->CopyBufferRegion(
buffer->m_resource.getResource(),
offset,
uploadResourceRef.getResource(),
- offset,
+ uploadResourceOffset + offset,
size);
-
- // Should already be in COPY_DEST if write flag was set.
- {
- D3D12BarrierSubmitter submitter(cmdList);
- submitter.transition(
- buffer->m_resource, D3D12_RESOURCE_STATE_COPY_DEST, buffer->m_defaultState);
- }
}
return SLANG_OK;
@@ -3725,8 +3735,9 @@ public:
static_cast<TransientResourceHeapImpl*>(transientHeap);
IBufferResource* stagingBuffer = nullptr;
+ size_t stagingBufferOffset = 0;
transientHeapImpl->allocateStagingBuffer(
- tableSize, stagingBuffer, ResourceState::General);
+ tableSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload);
assert(stagingBuffer);
void* stagingPtr = nullptr;
@@ -3749,7 +3760,7 @@ public:
}
};
- uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr;
+ uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr + stagingBufferOffset;
for (uint32_t i = 0; i < m_rayGenShaderCount; i++)
{
copyShaderIdInto(
@@ -3776,7 +3787,7 @@ public:
}
stagingBuffer->unmap(nullptr);
- encoder->copyBuffer(bufferResource, 0, stagingBuffer, 0, tableSize);
+ encoder->copyBuffer(bufferResource, 0, stagingBuffer, stagingBufferOffset, tableSize);
encoder->bufferBarrier(
1,
bufferResource.readRef(),
@@ -3813,12 +3824,6 @@ public:
return SLANG_OK;
}
- virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() override
- {
- bindDescriptorHeaps();
- return SLANG_OK;
- }
-
public:
ComPtr<ID3D12GraphicsCommandList> m_cmdList;
ComPtr<ID3D12GraphicsCommandList1> m_cmdList1;
@@ -3830,19 +3835,26 @@ public:
D3D12Device* m_renderer;
RootShaderObjectImpl m_rootShaderObject;
RefPtr<MutableRootShaderObjectImpl> m_mutableRootShaderObject;
+ bool m_descriptorHeapsBound = false;
void bindDescriptorHeaps()
{
- ID3D12DescriptorHeap* heaps[] = {
- m_transientHeap->getCurrentViewHeap().getHeap(),
- m_transientHeap->getCurrentSamplerHeap().getHeap(),
- };
- m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps);
+ if (!m_descriptorHeapsBound)
+ {
+ ID3D12DescriptorHeap* heaps[] = {
+ m_transientHeap->getCurrentViewHeap().getHeap(),
+ m_transientHeap->getCurrentSamplerHeap().getHeap(),
+ };
+ m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps);
+ m_descriptorHeapsBound = true;
+ }
}
+ void invalidateDescriptorHeapBinding() { m_descriptorHeapsBound = false; }
+
void reinit()
{
- bindDescriptorHeaps();
+ invalidateDescriptorHeapBinding();
m_rootShaderObject.init(m_renderer);
}
@@ -3936,23 +3948,9 @@ public:
auto arraySize = textureDesc->arraySize;
if (arraySize == 0)
arraySize = 1;
- for (uint32_t planeIndex = 0; planeIndex < planeCount; planeIndex++)
- {
- for (int layer = 0; layer < arraySize; layer++)
- {
- for (int mip = 0; mip < textureDesc->numMipLevels; mip++)
- {
- barrier.Transition.Subresource = D3DUtil::getSubresourceIndex(
- mip,
- layer,
- planeIndex,
- textureImpl->getDesc()->numMipLevels,
- arraySize);
- barriers.add(barrier);
- }
- }
- }
+ barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
}
+ barriers.add(barrier);
}
if (barriers.getCount())
{
@@ -4119,7 +4117,6 @@ public:
D3D12_TEXTURE_COPY_LOCATION srcRegion = {};
srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint = srcRegion.PlacedFootprint;
-
footprint.Offset = 0;
footprint.Footprint.Format = texDesc.Format;
uint32_t mipLevel = D3DUtil::getSubresourceMipLevel(
@@ -4162,9 +4159,10 @@ public:
footprint.Footprint.RowPitch * rowCount * footprint.Footprint.Depth;
IBufferResource* stagingBuffer;
+ size_t stagingBufferOffset = 0;
m_commandBuffer->m_transientHeap->allocateStagingBuffer(
- bufferSize, stagingBuffer, ResourceState::General);
-
+ bufferSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload, true);
+ assert(stagingBufferOffset == 0);
BufferResourceImpl* bufferImpl =
static_cast<BufferResourceImpl*>(stagingBuffer);
uint8_t* bufferData = nullptr;
@@ -4185,9 +4183,7 @@ public:
}
}
bufferImpl->m_resource.getResource()->Unmap(0, nullptr);
-
srcRegion.pResource = bufferImpl->m_resource.getResource();
-
m_commandBuffer->m_cmdList->CopyTextureRegion(
&dstRegion, offset.x, offset.y, offset.z, &srcRegion, nullptr);
}
@@ -4252,8 +4248,6 @@ public:
m_commandBuffer->m_renderer);
gpuHandleIndex =
m_commandBuffer->m_transientHeap->getCurrentViewHeap().allocate(1);
- auto d3dViewHeap =
- m_commandBuffer->m_transientHeap->getCurrentViewHeap().getHeap();
m_commandBuffer->bindDescriptorHeaps();
}
this->m_commandBuffer->m_renderer->m_device->CopyDescriptorsSimple(
@@ -4495,13 +4489,6 @@ public:
{
auto textureImpl = static_cast<TextureResourceImpl*>(texture);
- if (subresourceRange.mipLevelCount == 0)
- subresourceRange.mipLevelCount = textureImpl->getDesc()->numMipLevels;
- if (subresourceRange.layerCount == 0)
- subresourceRange.layerCount = textureImpl->getDesc()->arraySize;
-
- auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format);
-
ShortList<D3D12_RESOURCE_BARRIER> barriers;
D3D12_RESOURCE_BARRIER barrier;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
@@ -4509,13 +4496,17 @@ public:
{
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
barrier.UAV.pResource = textureImpl->m_resource.getResource();
+ barriers.add(barrier);
}
else
{
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Transition.StateBefore = D3DUtil::getResourceState(src);
barrier.Transition.StateAfter = D3DUtil::getResourceState(dst);
+ if (barrier.Transition.StateBefore == barrier.Transition.StateAfter)
+ return;
barrier.Transition.pResource = textureImpl->m_resource.getResource();
+ auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format);
auto aspectMask = (int32_t)subresourceRange.aspectMask;
if (subresourceRange.aspectMask == TextureAspect::Default)
aspectMask = (int32_t)TextureAspect::Color;
@@ -5438,10 +5429,11 @@ public:
}
virtual SLANG_NO_THROW Result SLANG_MCALL present() override
{
+ m_fence->SetEventOnCompletion(fenceValue, m_frameEvents[m_swapChain3->GetCurrentBackBufferIndex()]);
SLANG_RETURN_ON_FAIL(D3DSwapchainBase::present());
fenceValue++;
- m_fence->SetEventOnCompletion(fenceValue, m_frameEvents[m_swapChain3->GetCurrentBackBufferIndex()]);
m_queue->Signal(m_fence, fenceValue);
+
return SLANG_OK;
}
};
@@ -5630,6 +5622,7 @@ Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitte
// themselves will be responsible for allocating, binding, and filling in
// any descriptor tables or other root parameters needed.
//
+ m_commandBuffer->bindDescriptorHeaps();
if (rootObjectImpl->bindAsRoot(&context, rootLayoutImpl) == SLANG_E_OUT_OF_MEMORY)
{
if (!m_transientHeap->canResize())
@@ -5639,6 +5632,7 @@ Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitte
// If we run out of heap space while binding, allocate new descriptor heaps and try again.
ID3D12DescriptorHeap* d3dheap = nullptr;
+ m_commandBuffer->invalidateDescriptorHeapBinding();
switch (context.outOfMemoryHeap)
{
case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV:
@@ -5842,7 +5836,7 @@ static void _initSrvDesc(
Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, bool isShared, MemoryType memoryType)
{
- const size_t bufferSize = size_t(resourceDesc.Width);
+ const size_t bufferSize = size_t(resourceDesc.Width);
D3D12_HEAP_PROPERTIES heapProps;
heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
@@ -6982,16 +6976,10 @@ Result D3D12Device::createBufferResource(const IBufferResource::Desc& descIn, co
{
BufferResource::Desc srcDesc = fixupBufferDesc(descIn);
- // Always align up to 256 bytes, since that is required for constant buffers.
- //
- // TODO: only do this for buffers that could potentially be bound as constant buffers...
- //
- const size_t alignedSizeInBytes = D3DUtil::calcAligned(srcDesc.sizeInBytes, 256);
-
RefPtr<BufferResourceImpl> buffer(new BufferResourceImpl(srcDesc));
D3D12_RESOURCE_DESC bufferDesc;
- _initBufferResourceDesc(alignedSizeInBytes, bufferDesc);
+ _initBufferResourceDesc(descIn.sizeInBytes, bufferDesc);
bufferDesc.Flags |= _calcResourceFlags(srcDesc.allowedStates);
diff --git a/tools/gfx/debug-layer.cpp b/tools/gfx/debug-layer.cpp
index 8ecdf405a..9cd8d216c 100644
--- a/tools/gfx/debug-layer.cpp
+++ b/tools/gfx/debug-layer.cpp
@@ -1015,12 +1015,6 @@ Result DebugCommandBuffer::getNativeHandle(InteropHandle* outHandle)
return baseObject->getNativeHandle(outHandle);
}
-Result DebugCommandBuffer::resetDescriptorHeaps()
-{
- SLANG_GFX_API_FUNC;
- return baseObject->resetDescriptorHeaps();
-}
-
void DebugCommandBuffer::checkEncodersClosedBeforeNewEncoder()
{
if (m_renderCommandEncoder.isOpen || m_resourceCommandEncoder.isOpen ||
diff --git a/tools/gfx/debug-layer.h b/tools/gfx/debug-layer.h
index a8cdc1b4f..c141ce24a 100644
--- a/tools/gfx/debug-layer.h
+++ b/tools/gfx/debug-layer.h
@@ -624,7 +624,6 @@ public:
encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) override;
virtual SLANG_NO_THROW void SLANG_MCALL close() override;
virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override;
- virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() override;
private:
void checkEncodersClosedBeforeNewEncoder();
diff --git a/tools/gfx/immediate-renderer-base.cpp b/tools/gfx/immediate-renderer-base.cpp
index e18727bdf..03751f531 100644
--- a/tools/gfx/immediate-renderer-base.cpp
+++ b/tools/gfx/immediate-renderer-base.cpp
@@ -31,8 +31,6 @@ public:
return nullptr;
}
- virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() override { return SLANG_OK; }
-
public:
CommandWriter m_writer;
bool m_hasWriteTimestamps = false;
diff --git a/tools/gfx/renderer-shared.cpp b/tools/gfx/renderer-shared.cpp
index 92d263ed6..555186840 100644
--- a/tools/gfx/renderer-shared.cpp
+++ b/tools/gfx/renderer-shared.cpp
@@ -34,6 +34,7 @@ const Slang::Guid GfxGUID::IID_IAccelerationStructure = SLANG_UUID_IAcceleration
const Slang::Guid GfxGUID::IID_IFence = SLANG_UUID_IFence;
const Slang::Guid GfxGUID::IID_IShaderTable = SLANG_UUID_IShaderTable;
const Slang::Guid GfxGUID::IID_IPipelineCreationAPIDispatcher = SLANG_UUID_IPipelineCreationAPIDispatcher;
+const Slang::Guid GfxGUID::IID_ID3D12TransientResourceHeap = SLANG_UUID_ID3D12TransientResourceHeap;
StageType translateStage(SlangStage slangStage)
diff --git a/tools/gfx/renderer-shared.h b/tools/gfx/renderer-shared.h
index a78113552..e82bc83d0 100644
--- a/tools/gfx/renderer-shared.h
+++ b/tools/gfx/renderer-shared.h
@@ -41,6 +41,7 @@ struct GfxGUID
static const Slang::Guid IID_IFence;
static const Slang::Guid IID_IShaderTable;
static const Slang::Guid IID_IPipelineCreationAPIDispatcher;
+ static const Slang::Guid IID_ID3D12TransientResourceHeap;
};
// We use a `BreakableReference` to avoid the cyclic reference situation in gfx implementation.
diff --git a/tools/gfx/transient-resource-heap-base.h b/tools/gfx/transient-resource-heap-base.h
index 2dc16dcd4..1b86b983c 100644
--- a/tools/gfx/transient-resource-heap-base.h
+++ b/tools/gfx/transient-resource-heap-base.h
@@ -3,43 +3,41 @@
namespace gfx
{
-template <typename TDevice, typename TBufferResource>
-class TransientResourceHeapBaseImpl : public TransientResourceHeapBase
+template<typename TDevice, typename TBufferResource>
+class StagingBufferPool
{
public:
- void breakStrongReferenceToDevice() { m_device.breakStrongReference(); }
+ struct StagingBufferPage
+ {
+ Slang::RefPtr<TBufferResource> resource;
+ size_t size;
+ };
-public:
- BreakableReference<TDevice> m_device;
- Slang::List<Slang::RefPtr<TBufferResource>> m_constantBuffers;
- Slang::List<Slang::RefPtr<TBufferResource>> m_stagingBuffers;
+ struct Allocation
+ {
+ TBufferResource* resource;
+ size_t offset;
+ };
- Slang::Index m_constantBufferAllocCounter = 0;
- size_t m_constantBufferOffsetAllocCounter = 0;
- uint32_t m_alignment = 256;
+ TDevice* m_device;
+ MemoryType m_memoryType;
+ uint32_t m_alignment;
+ ResourceStateSet m_allowedStates;
- Result init(const ITransientResourceHeap::Desc& desc, uint32_t alignment, TDevice* device)
- {
- m_device = device;
+ Slang::List<StagingBufferPage> m_pages;
+ Slang::List<Slang::RefPtr<TBufferResource>> m_largeAllocations;
- if (desc.constantBufferSize)
- {
- Slang::ComPtr<IBufferResource> bufferPtr;
- IBufferResource::Desc bufferDesc;
- bufferDesc.type = IResource::Type::Buffer;
- bufferDesc.defaultState = ResourceState::ConstantBuffer;
- bufferDesc.allowedStates =
- ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination);
- bufferDesc.sizeInBytes = desc.constantBufferSize;
- bufferDesc.memoryType = MemoryType::Upload;
- SLANG_RETURN_ON_FAIL(
- m_device->createBufferResource(bufferDesc, nullptr, bufferPtr.writeRef()));
- m_constantBuffers.add(static_cast<TBufferResource*>(bufferPtr.get()));
- }
+ Slang::Index m_pageAllocCounter = 0;
+ size_t m_offsetAllocCounter = 0;
- m_version = getVersionCounter();
- getVersionCounter()++;
- return SLANG_OK;
+ const size_t kStagingBufferDefaultPageSize = 16 * 1024 * 1024;
+
+ void init(TDevice* device, MemoryType memoryType, uint32_t alignment, ResourceStateSet allowedStates)
+ {
+ m_device = device;
+ m_memoryType = memoryType;
+ m_alignment = alignment;
+ m_allowedStates = allowedStates;
}
static size_t alignUp(size_t value, uint32_t alignment)
@@ -47,38 +45,66 @@ public:
return (value + alignment - 1) / alignment * alignment;
}
- Result allocateStagingBuffer(size_t size, IBufferResource*& outBufferWeakPtr, ResourceState state)
+ void reset()
{
+ m_pageAllocCounter = 0;
+ m_offsetAllocCounter = 0;
+ m_largeAllocations.clearAndDeallocate();
+ }
+
+ Result newStagingBufferPage()
+ {
+ StagingBufferPage page;
+ size_t pageSize = kStagingBufferDefaultPageSize;
+
Slang::ComPtr<IBufferResource> bufferPtr;
IBufferResource::Desc bufferDesc;
bufferDesc.type = IResource::Type::Buffer;
- bufferDesc.defaultState = state;
- bufferDesc.allowedStates =
- ResourceStateSet(ResourceState::CopyDestination, ResourceState::CopySource);
- if (state == ResourceState::General)
- bufferDesc.memoryType = MemoryType::Upload;
- else
- bufferDesc.memoryType = MemoryType::ReadBack;
+ bufferDesc.defaultState = ResourceState::General;
+ bufferDesc.allowedStates = m_allowedStates;
+ bufferDesc.memoryType = m_memoryType;
+ bufferDesc.sizeInBytes = pageSize;
+ SLANG_RETURN_ON_FAIL(
+ m_device->createBufferResource(bufferDesc, nullptr, bufferPtr.writeRef()));
+
+ page.resource = static_cast<TBufferResource*>(bufferPtr.get());
+ page.size = pageSize;
+ m_pages.add(page);
+ return SLANG_OK;
+ }
+
+ Result newLargeBuffer(size_t size)
+ {
+ Slang::ComPtr<IBufferResource> bufferPtr;
+ IBufferResource::Desc bufferDesc;
+ bufferDesc.type = IResource::Type::Buffer;
+ bufferDesc.defaultState = ResourceState::General;
+ bufferDesc.allowedStates = m_allowedStates;
+ bufferDesc.memoryType = m_memoryType;
bufferDesc.sizeInBytes = size;
SLANG_RETURN_ON_FAIL(
m_device->createBufferResource(bufferDesc, nullptr, bufferPtr.writeRef()));
- m_stagingBuffers.add(static_cast<TBufferResource*>(bufferPtr.get()));
- outBufferWeakPtr = bufferPtr.get();
+ auto bufferImpl = static_cast<TBufferResource*>(bufferPtr.get());
+ m_largeAllocations.add(bufferImpl);
return SLANG_OK;
}
- Result allocateConstantBuffer(
- size_t size,
- IBufferResource*& outBufferWeakPtr,
- size_t& outOffset)
+ Allocation allocate(size_t size, bool forceLargePage)
{
- size_t bufferAllocOffset = alignUp(m_constantBufferOffsetAllocCounter, m_alignment);
+ if (forceLargePage || size >= (kStagingBufferDefaultPageSize >> 2))
+ {
+ newLargeBuffer(size);
+ Allocation result;
+ result.resource = m_largeAllocations.getLast();
+ result.offset = 0;
+ return result;
+ }
+
+ size_t bufferAllocOffset = alignUp(m_offsetAllocCounter, m_alignment);
Slang::Index bufferId = -1;
- // Find first constant buffer from `m_constantBufferAllocCounter` that has enough space
- // for this allocation.
- for (Slang::Index i = m_constantBufferAllocCounter; i < m_constantBuffers.getCount(); i++)
+ for (Slang::Index i = m_pageAllocCounter; i < m_pages.getCount(); i++)
{
- auto cb = m_constantBuffers[i].Ptr();
+ auto cb = m_pages[i].resource.Ptr();
if (bufferAllocOffset + size <= cb->getDesc()->sizeInBytes)
{
bufferId = i;
@@ -86,45 +112,105 @@ public:
}
bufferAllocOffset = 0;
}
- // If we cannot find an existing constant buffer with sufficient free space,
- // create a new constant buffer.
+ // If we cannot find an existing page with sufficient free space,
+ // create a new page.
if (bufferId == -1)
{
- Slang::ComPtr<IBufferResource> bufferPtr;
- IBufferResource::Desc bufferDesc;
- bufferDesc.type = IResource::Type::Buffer;
- bufferDesc.defaultState = ResourceState::ConstantBuffer;
- bufferDesc.allowedStates =
- ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination);
- bufferDesc.memoryType = MemoryType::Upload;
- size_t lastConstantBufferSize = 0;
- if (m_constantBuffers.getCount())
+ newStagingBufferPage();
+ bufferId = m_pages.getCount() - 1;
+ }
+ // Sub allocate from current page.
+ Allocation result;
+ result.resource = m_pages[bufferId].resource.Ptr();
+ result.offset = bufferAllocOffset;
+ m_pageAllocCounter = bufferId;
+ m_offsetAllocCounter = bufferAllocOffset + size;
+ return result;
+ }
+};
+
+template <typename TDevice, typename TBufferResource>
+class TransientResourceHeapBaseImpl : public TransientResourceHeapBase
+{
+public:
+ void breakStrongReferenceToDevice() { m_device.breakStrongReference(); }
+
+public:
+ BreakableReference<TDevice> m_device;
+ StagingBufferPool<TDevice, TBufferResource> m_constantBufferPool;
+ StagingBufferPool<TDevice, TBufferResource> m_uploadBufferPool;
+ StagingBufferPool<TDevice, TBufferResource> m_readbackBufferPool;
+
+ Result init(const ITransientResourceHeap::Desc& desc, uint32_t alignment, TDevice* device)
+ {
+ m_device = device;
+
+ m_constantBufferPool.init(
+ device,
+ MemoryType::Upload,
+ 256,
+ ResourceStateSet(
+ ResourceState::ConstantBuffer,
+ ResourceState::CopySource,
+ ResourceState::CopyDestination));
+
+ m_uploadBufferPool.init(
+ device,
+ MemoryType::Upload,
+ 256,
+ ResourceStateSet(
+ ResourceState::CopySource,
+ ResourceState::CopyDestination));
+
+ m_readbackBufferPool.init(
+ device,
+ MemoryType::ReadBack,
+ 256,
+ ResourceStateSet(ResourceState::CopySource, ResourceState::CopyDestination));
+
+ m_version = getVersionCounter();
+ getVersionCounter()++;
+ return SLANG_OK;
+ }
+
+ Result allocateStagingBuffer(size_t size, IBufferResource*& outBufferWeakPtr, size_t& offset, MemoryType memoryType, bool forceLargePage = false)
+ {
+ switch (memoryType)
+ {
+ case MemoryType::ReadBack:
{
- lastConstantBufferSize = m_constantBuffers.getLast()->getDesc()->sizeInBytes;
+ auto allocation = m_readbackBufferPool.allocate(size, forceLargePage);
+ outBufferWeakPtr = allocation.resource;
+ offset = allocation.offset;
}
- bufferDesc.sizeInBytes = Slang::Math::Max(
- lastConstantBufferSize * 2, Slang::Math::Max(size, size_t(4 << 20)));
- SLANG_RETURN_ON_FAIL(
- m_device->createBufferResource(bufferDesc, nullptr, bufferPtr.writeRef()));
- bufferId = m_constantBuffers.getCount();
- bufferAllocOffset = 0;
- m_constantBuffers.add(static_cast<TBufferResource*>(bufferPtr.get()));
+ break;
+ default:
+ {
+ auto allocation = m_uploadBufferPool.allocate(size, forceLargePage);
+ outBufferWeakPtr = allocation.resource;
+ offset = allocation.offset;
+ }
+ break;
}
- // Sub allocate from current constant buffer.
- outBufferWeakPtr = m_constantBuffers[bufferId].Ptr();
- outOffset = bufferAllocOffset;
- m_constantBufferAllocCounter = bufferId;
- m_constantBufferOffsetAllocCounter = bufferAllocOffset + size;
+ return SLANG_OK;
+ }
+
+ Result allocateConstantBuffer(
+ size_t size,
+ IBufferResource*& outBufferWeakPtr,
+ size_t& outOffset)
+ {
+ auto allocation = m_constantBufferPool.allocate(size, false);
+ outBufferWeakPtr = allocation.resource;
+ outOffset = allocation.offset;
return SLANG_OK;
}
void reset()
{
- m_constantBufferAllocCounter = 0;
- m_constantBufferOffsetAllocCounter = 0;
- for (auto& stagingBuffer : m_stagingBuffers)
- stagingBuffer = nullptr;
- m_stagingBuffers.clear();
+ m_constantBufferPool.reset();
+ m_uploadBufferPool.reset();
+ m_readbackBufferPool.reset();
m_version = getVersionCounter();
getVersionCounter()++;
}
diff --git a/tools/gfx/vulkan/render-vk.cpp b/tools/gfx/vulkan/render-vk.cpp
index d1713ae01..a1544ebc0 100644
--- a/tools/gfx/vulkan/render-vk.cpp
+++ b/tools/gfx/vulkan/render-vk.cpp
@@ -2482,22 +2482,29 @@ public:
{
auto& api = buffer->m_renderer->m_api;
IBufferResource* stagingBuffer = nullptr;
- transientHeap->allocateStagingBuffer(size, stagingBuffer, ResourceState::CopySource);
+ size_t stagingBufferOffset = 0;
+ transientHeap->allocateStagingBuffer(
+ size, stagingBuffer, stagingBufferOffset, MemoryType::Upload);
BufferResourceImpl* stagingBufferImpl =
static_cast<BufferResourceImpl*>(stagingBuffer);
void* mappedData = nullptr;
SLANG_VK_CHECK(api.vkMapMemory(
- api.m_device, stagingBufferImpl->m_buffer.m_memory, 0, size, 0, &mappedData));
- memcpy(mappedData, data, size);
+ api.m_device,
+ stagingBufferImpl->m_buffer.m_memory,
+ 0,
+ stagingBufferOffset + size,
+ 0,
+ &mappedData));
+ memcpy((char*)mappedData + stagingBufferOffset, data, size);
api.vkUnmapMemory(api.m_device, stagingBufferImpl->m_buffer.m_memory);
// Copy from staging buffer to real buffer
VkBufferCopy copyInfo = {};
copyInfo.size = size;
copyInfo.dstOffset = offset;
- copyInfo.srcOffset = 0;
+ copyInfo.srcOffset = stagingBufferOffset;
api.vkCmdCopyBuffer(
commandBuffer,
stagingBufferImpl->m_buffer.m_buffer,
@@ -3962,8 +3969,9 @@ public:
static_cast<TransientResourceHeapImpl*>(transientHeap);
IBufferResource* stagingBuffer = nullptr;
+ size_t stagingBufferOffset = 0;
transientHeapImpl->allocateStagingBuffer(
- tableSize, stagingBuffer, ResourceState::General);
+ tableSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload);
assert(stagingBuffer);
void* stagingPtr = nullptr;
@@ -3975,7 +3983,7 @@ public:
handles.setCount(totalHandleSize);
auto result = vkApi.vkGetRayTracingShaderGroupHandlesKHR(m_device->m_device, pipelineImpl->m_pipeline, 0, (uint32_t)handleCount, totalHandleSize, handles.getBuffer());
- uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr;
+ uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr + stagingBufferOffset;
auto subTablePtr = stagingBufferPtr;
Int shaderTableEntryCounter = 0;
@@ -4026,7 +4034,7 @@ public:
// TODO: Callable shaders?
stagingBuffer->unmap(nullptr);
- encoder->copyBuffer(bufferResource, 0, stagingBuffer, 0, tableSize);
+ encoder->copyBuffer(bufferResource, 0, stagingBuffer, stagingBufferOffset, tableSize);
encoder->bufferBarrier(
1,
bufferResource.readRef(),
@@ -4053,10 +4061,6 @@ public:
return nullptr;
}
virtual void comFree() override { m_transientHeap.breakStrongReference(); }
- virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() override
- {
- return SLANG_OK;
- }
public:
VkCommandBuffer m_commandBuffer;
VkCommandBuffer m_preCommandBuffer = VK_NULL_HANDLE;
@@ -4557,8 +4561,9 @@ public:
bufferSize *= subResourceRange.layerCount;
IBufferResource* uploadBuffer = nullptr;
+ size_t uploadBufferOffset = 0;
m_commandBuffer->m_transientHeap->allocateStagingBuffer(
- bufferSize, uploadBuffer, gfx::ResourceState::CopySource);
+ bufferSize, uploadBuffer, uploadBufferOffset, MemoryType::Upload);
// Copy into upload buffer
{
@@ -4566,8 +4571,9 @@ public:
uint8_t* dstData;
uploadBuffer->map(nullptr, (void**)&dstData);
+ dstData += uploadBufferOffset;
uint8_t* dstDataStart;
- dstDataStart = dstData;
+ dstDataStart = dstData ;
size_t dstSubresourceOffset = 0;
for (uint32_t i = 0; i < subResourceRange.layerCount; ++i)
@@ -4612,7 +4618,7 @@ public:
uploadBuffer->unmap(nullptr);
}
{
- size_t srcOffset = 0;
+ size_t srcOffset = uploadBufferOffset;
for (uint32_t i = 0; i < subResourceRange.layerCount; ++i)
{
for (Index j = 0; j < mipSizes.getCount(); ++j)
@@ -6569,15 +6575,18 @@ Result VKDevice::PipelineCommandEncoder::bindRootShaderObjectImpl(
// Once we've filled in all the descriptor sets, we bind them
// to the pipeline at once.
//
- m_device->m_api.vkCmdBindDescriptorSets(
- m_commandBuffer->m_commandBuffer,
- bindPoint,
- specializedLayout->m_pipelineLayout,
- 0,
- (uint32_t) descriptorSetCount,
- descriptorSets,
- 0,
- nullptr);
+ if (descriptorSetCount > 0)
+ {
+ m_device->m_api.vkCmdBindDescriptorSets(
+ m_commandBuffer->m_commandBuffer,
+ bindPoint,
+ specializedLayout->m_pipelineLayout,
+ 0,
+ (uint32_t) descriptorSetCount,
+ descriptorSets,
+ 0,
+ nullptr);
+ }
return SLANG_OK;
}
@@ -7226,9 +7235,13 @@ Result VKDevice::initVulkanInstanceAndDevice(const InteropHandle* handles, bool
#endif
m_features.add("external-memory");
}
- if (extensionNames.Contains(VK_EXT_DEBUG_MARKER_EXTENSION_NAME))
+ if (extensionNames.Contains(VK_EXT_DEBUG_REPORT_EXTENSION_NAME))
{
- deviceExtensions.add(VK_EXT_DEBUG_MARKER_EXTENSION_NAME);
+ deviceExtensions.add(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
+ if (extensionNames.Contains(VK_EXT_DEBUG_MARKER_EXTENSION_NAME))
+ {
+ deviceExtensions.add(VK_EXT_DEBUG_MARKER_EXTENSION_NAME);
+ }
}
if (extensionNames.Contains(VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME))
{