diff options
| author | Yong He <yonghe@outlook.com> | 2022-02-23 10:30:19 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-02-23 10:30:19 -0800 |
| commit | 393d5beb1e0e71e6f2a384c9ab19b717f389a056 (patch) | |
| tree | 1341fcdf592127f5e78054c73e8cb032381a0b56 | |
| parent | c4790309ec46ae2f4f7c49eb50699a950ee7a9a4 (diff) | |
gfx: d3d12 performance optimizations. (#2140)
* gfx: d3d12 performance optimizations.
* Fix.
* Fix unit test bug.
* Add gfx interface for directly allocating GPU descriptor tables.
Co-authored-by: Yong He <yhe@nvidia.com>
| -rw-r--r-- | slang-gfx.h | 19 | ||||
| -rw-r--r-- | tools/gfx-unit-test/nested-parameter-block.cpp | 4 | ||||
| -rw-r--r-- | tools/gfx/cuda/render-cuda.cpp | 6 | ||||
| -rw-r--r-- | tools/gfx/d3d/d3d-swapchain.h | 35 | ||||
| -rw-r--r-- | tools/gfx/d3d12/render-d3d12.cpp | 170 | ||||
| -rw-r--r-- | tools/gfx/debug-layer.cpp | 6 | ||||
| -rw-r--r-- | tools/gfx/debug-layer.h | 1 | ||||
| -rw-r--r-- | tools/gfx/immediate-renderer-base.cpp | 2 | ||||
| -rw-r--r-- | tools/gfx/renderer-shared.cpp | 1 | ||||
| -rw-r--r-- | tools/gfx/renderer-shared.h | 1 | ||||
| -rw-r--r-- | tools/gfx/transient-resource-heap-base.h | 242 | ||||
| -rw-r--r-- | tools/gfx/vulkan/render-vk.cpp | 63 |
12 files changed, 304 insertions, 246 deletions
diff --git a/slang-gfx.h b/slang-gfx.h index 25785cb40..9971b81fe 100644 --- a/slang-gfx.h +++ b/slang-gfx.h @@ -1821,8 +1821,6 @@ public: virtual SLANG_NO_THROW void SLANG_MCALL close() = 0; virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) = 0; - - virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() = 0; }; #define SLANG_UUID_ICommandBuffer \ { \ @@ -1912,6 +1910,23 @@ public: 0xcd48bd29, 0xee72, 0x41b8, { 0xbc, 0xff, 0xa, 0x2b, 0x3a, 0xaa, 0x6d, 0xeb } \ } +class ID3D12TransientResourceHeap : public ISlangUnknown +{ +public: + enum class DescriptorType + { + ResourceView, Sampler + }; + virtual SLANG_NO_THROW Result SLANG_MCALL allocateTransientDescriptorTable( + DescriptorType type, + uint32_t count, + uint64_t& outDescriptorOffset, + void** outD3DDescriptorHeapHandle) = 0; +}; +#define SLANG_UUID_ID3D12TransientResourceHeap \ + { \ + 0x9bc6a8bc, 0x5f7a, 0x454a, { 0x93, 0xef, 0x3b, 0x10, 0x5b, 0xb7, 0x63, 0x7e } \ + } class ISwapchain : public ISlangUnknown { diff --git a/tools/gfx-unit-test/nested-parameter-block.cpp b/tools/gfx-unit-test/nested-parameter-block.cpp index 774a94c5f..907b4c868 100644 --- a/tools/gfx-unit-test/nested-parameter-block.cpp +++ b/tools/gfx-unit-test/nested-parameter-block.cpp @@ -68,7 +68,7 @@ namespace gfx_test srvDesc.type = IResourceView::Type::ShaderResource; srvDesc.format = Format::Unknown; srvDesc.bufferElementSize = sizeof(uint32_t) * 4; - srvDesc.bufferRange.elementCount = 4; + srvDesc.bufferRange.elementCount = 1; srvDesc.bufferRange.firstElement = 0; srvs.add(device->createBufferView(srvBuffers[i], nullptr, srvDesc)); } @@ -78,7 +78,7 @@ namespace gfx_test resultBufferViewDesc.type = IResourceView::Type::UnorderedAccess; resultBufferViewDesc.format = Format::Unknown; resultBufferViewDesc.bufferElementSize = sizeof(uint32_t) * 4; - resultBufferViewDesc.bufferRange.elementCount = 4; + resultBufferViewDesc.bufferRange.elementCount = 1; resultBufferViewDesc.bufferRange.firstElement = 0; Slang::ComPtr<IResourceView> resultBufferView; SLANG_CHECK(SLANG_SUCCEEDED(device->createBufferView( diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp index db5661f20..6d1f7f354 100644 --- a/tools/gfx/cuda/render-cuda.cpp +++ b/tools/gfx/cuda/render-cuda.cpp @@ -945,12 +945,6 @@ public: return static_cast<ICommandBuffer*>(this); return nullptr; } - - virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() override - { - return SLANG_OK; - } - public: CUDADevice* m_device; TransientResourceHeapBase* m_transientHeap; diff --git a/tools/gfx/d3d/d3d-swapchain.h b/tools/gfx/d3d/d3d-swapchain.h index 1c29b2039..36a35f754 100644 --- a/tools/gfx/d3d/d3d-swapchain.h +++ b/tools/gfx/d3d/d3d-swapchain.h @@ -87,20 +87,6 @@ public: SLANG_RETURN_ON_FAIL(swapChain1->QueryInterface(m_swapChain.writeRef())); } - if (!desc.enableVSync) - { - m_swapChainWaitableObject = m_swapChain->GetFrameLatencyWaitableObject(); - - int maxLatency = desc.imageCount - 2; - - // Make sure the maximum latency is in the range required by dx runtime - maxLatency = (maxLatency < 1) ? 1 : maxLatency; - maxLatency = (maxLatency > DXGI_MAX_SWAP_CHAIN_BUFFERS) ? DXGI_MAX_SWAP_CHAIN_BUFFERS - : maxLatency; - - m_swapChain->SetMaximumFrameLatency(maxLatency); - } - createSwapchainBufferImages(); return SLANG_OK; } @@ -113,25 +99,9 @@ public: } virtual SLANG_NO_THROW Result SLANG_MCALL present() override { - if (m_swapChainWaitableObject) + if (SLANG_FAILED(m_swapChain->Present(m_desc.enableVSync ? 1 : 0, 0))) { - // check if now is good time to present - // This doesn't wait - because the wait time is 0. If it returns WAIT_TIMEOUT it - // means that no frame is waiting to be be displayed so there is no point doing a - // present. - const bool shouldPresent = - (WaitForSingleObjectEx(m_swapChainWaitableObject, 0, TRUE) != WAIT_TIMEOUT); - if (shouldPresent) - { - m_swapChain->Present(0, 0); - } - } - else - { - if (SLANG_FAILED(m_swapChain->Present(1, 0))) - { - return SLANG_FAIL; - } + return SLANG_FAIL; } return SLANG_OK; } @@ -171,7 +141,6 @@ public: virtual IDXGIFactory* getDXGIFactory() = 0; virtual IUnknown* getOwningDevice() = 0; ISwapchain::Desc m_desc; - HANDLE m_swapChainWaitableObject = nullptr; ComPtr<IDXGISwapChain2> m_swapChain; Slang::ShortList<Slang::RefPtr<TextureResource>> m_images; }; diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index aa008ea81..826475f43 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -556,6 +556,7 @@ public: m_fence->SetEventOnCompletion(m_eventValue, m_waitEvent); m_commandQueue->Signal(m_fence, m_eventValue); WaitForSingleObject(m_waitEvent, INFINITE); + m_commandAllocator->Reset(); int8_t* mappedData = nullptr; D3D12_RANGE readRange = { sizeof(uint64_t) * queryIndex, sizeof(uint64_t) * (queryIndex + count) }; @@ -787,6 +788,7 @@ public: class TransientResourceHeapImpl : public TransientResourceHeapBaseImpl<D3D12Device, BufferResourceImpl> + , public ID3D12TransientResourceHeap { private: typedef TransientResourceHeapBaseImpl<D3D12Device, BufferResourceImpl> Super; @@ -843,6 +845,39 @@ public: D3D12LinearExpandingDescriptorHeap m_stagingCpuViewHeap; D3D12LinearExpandingDescriptorHeap m_stagingCpuSamplerHeap; + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override + { + if (uuid == GfxGUID::IID_ID3D12TransientResourceHeap) + { + *outObject = static_cast<ID3D12TransientResourceHeap*>(this); + addRef(); + return SLANG_OK; + } + return Super::queryInterface(uuid, outObject); + } + + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return Super::addRef(); } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return Super::release(); } + + virtual SLANG_NO_THROW Result SLANG_MCALL allocateTransientDescriptorTable( + DescriptorType type, + uint32_t count, + uint64_t& outDescriptorOffset, + void** outD3DDescriptorHeapHandle) override + { + auto& heap = (type == DescriptorType::ResourceView) ? getCurrentViewHeap() + : getCurrentSamplerHeap(); + int allocResult = heap.allocate((int)count); + if (allocResult == -1) + { + return SLANG_E_OUT_OF_MEMORY; + } + outDescriptorOffset = (uint64_t)allocResult; + *outD3DDescriptorHeapHandle = heap.getHeap(); + return SLANG_OK; + } + ~TransientResourceHeapImpl() { synchronizeAndReset(); @@ -881,22 +916,6 @@ public: allocateNewViewDescriptorHeap(device); allocateNewSamplerDescriptorHeap(device); - if (desc.constantBufferSize != 0) - { - ComPtr<IBufferResource> bufferResourcePtr; - IBufferResource::Desc bufferDesc; - bufferDesc.type = IResource::Type::Buffer; - bufferDesc.defaultState = ResourceState::ConstantBuffer; - bufferDesc.allowedStates = - ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); - bufferDesc.sizeInBytes = desc.constantBufferSize; - bufferDesc.memoryType = MemoryType::Upload; - SLANG_RETURN_ON_FAIL(device->createBufferResource( - bufferDesc, - nullptr, - bufferResourcePtr.writeRef())); - m_constantBuffers.add(static_cast<BufferResourceImpl*>(bufferResourcePtr.get())); - } return SLANG_OK; } @@ -957,15 +976,12 @@ public: size_t size, void* data) { - D3D12_RANGE readRange = {}; - readRange.Begin = offset; - readRange.End = offset + size; - - IBufferResource* uploadResource; + size_t uploadResourceOffset = 0; if (buffer->getDesc()->memoryType != MemoryType::Upload) { - transientHeap->allocateStagingBuffer(size, uploadResource, ResourceState::General); + SLANG_RETURN_ON_FAIL(transientHeap->allocateStagingBuffer( + size, uploadResource, uploadResourceOffset, MemoryType::Upload)); } D3D12Resource& uploadResourceRef = @@ -973,32 +989,26 @@ public: ? buffer->m_resource : static_cast<BufferResourceImpl*>(uploadResource)->m_resource; + D3D12_RANGE readRange = {}; + readRange.Begin = 0; + readRange.End = 0; void* uploadData; SLANG_RETURN_ON_FAIL(uploadResourceRef.getResource()->Map( 0, &readRange, reinterpret_cast<void**>(&uploadData))); - memcpy((uint8_t*)uploadData + offset, data, size); - uploadResourceRef.getResource()->Unmap(0, &readRange); + memcpy((uint8_t*)uploadData + uploadResourceOffset + offset, data, size); + D3D12_RANGE writtenRange = {}; + writtenRange.Begin = uploadResourceOffset + offset; + writtenRange.End = uploadResourceOffset + offset + size; + uploadResourceRef.getResource()->Unmap(0, &writtenRange); if (buffer->getDesc()->memoryType != MemoryType::Upload) { - { - D3D12BarrierSubmitter submitter(cmdList); - submitter.transition( - buffer->m_resource, buffer->m_defaultState, D3D12_RESOURCE_STATE_COPY_DEST); - } cmdList->CopyBufferRegion( buffer->m_resource.getResource(), offset, uploadResourceRef.getResource(), - offset, + uploadResourceOffset + offset, size); - - // Should already be in COPY_DEST if write flag was set. - { - D3D12BarrierSubmitter submitter(cmdList); - submitter.transition( - buffer->m_resource, D3D12_RESOURCE_STATE_COPY_DEST, buffer->m_defaultState); - } } return SLANG_OK; @@ -3725,8 +3735,9 @@ public: static_cast<TransientResourceHeapImpl*>(transientHeap); IBufferResource* stagingBuffer = nullptr; + size_t stagingBufferOffset = 0; transientHeapImpl->allocateStagingBuffer( - tableSize, stagingBuffer, ResourceState::General); + tableSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload); assert(stagingBuffer); void* stagingPtr = nullptr; @@ -3749,7 +3760,7 @@ public: } }; - uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr; + uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr + stagingBufferOffset; for (uint32_t i = 0; i < m_rayGenShaderCount; i++) { copyShaderIdInto( @@ -3776,7 +3787,7 @@ public: } stagingBuffer->unmap(nullptr); - encoder->copyBuffer(bufferResource, 0, stagingBuffer, 0, tableSize); + encoder->copyBuffer(bufferResource, 0, stagingBuffer, stagingBufferOffset, tableSize); encoder->bufferBarrier( 1, bufferResource.readRef(), @@ -3813,12 +3824,6 @@ public: return SLANG_OK; } - virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() override - { - bindDescriptorHeaps(); - return SLANG_OK; - } - public: ComPtr<ID3D12GraphicsCommandList> m_cmdList; ComPtr<ID3D12GraphicsCommandList1> m_cmdList1; @@ -3830,19 +3835,26 @@ public: D3D12Device* m_renderer; RootShaderObjectImpl m_rootShaderObject; RefPtr<MutableRootShaderObjectImpl> m_mutableRootShaderObject; + bool m_descriptorHeapsBound = false; void bindDescriptorHeaps() { - ID3D12DescriptorHeap* heaps[] = { - m_transientHeap->getCurrentViewHeap().getHeap(), - m_transientHeap->getCurrentSamplerHeap().getHeap(), - }; - m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); + if (!m_descriptorHeapsBound) + { + ID3D12DescriptorHeap* heaps[] = { + m_transientHeap->getCurrentViewHeap().getHeap(), + m_transientHeap->getCurrentSamplerHeap().getHeap(), + }; + m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); + m_descriptorHeapsBound = true; + } } + void invalidateDescriptorHeapBinding() { m_descriptorHeapsBound = false; } + void reinit() { - bindDescriptorHeaps(); + invalidateDescriptorHeapBinding(); m_rootShaderObject.init(m_renderer); } @@ -3936,23 +3948,9 @@ public: auto arraySize = textureDesc->arraySize; if (arraySize == 0) arraySize = 1; - for (uint32_t planeIndex = 0; planeIndex < planeCount; planeIndex++) - { - for (int layer = 0; layer < arraySize; layer++) - { - for (int mip = 0; mip < textureDesc->numMipLevels; mip++) - { - barrier.Transition.Subresource = D3DUtil::getSubresourceIndex( - mip, - layer, - planeIndex, - textureImpl->getDesc()->numMipLevels, - arraySize); - barriers.add(barrier); - } - } - } + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; } + barriers.add(barrier); } if (barriers.getCount()) { @@ -4119,7 +4117,6 @@ public: D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint = srcRegion.PlacedFootprint; - footprint.Offset = 0; footprint.Footprint.Format = texDesc.Format; uint32_t mipLevel = D3DUtil::getSubresourceMipLevel( @@ -4162,9 +4159,10 @@ public: footprint.Footprint.RowPitch * rowCount * footprint.Footprint.Depth; IBufferResource* stagingBuffer; + size_t stagingBufferOffset = 0; m_commandBuffer->m_transientHeap->allocateStagingBuffer( - bufferSize, stagingBuffer, ResourceState::General); - + bufferSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload, true); + assert(stagingBufferOffset == 0); BufferResourceImpl* bufferImpl = static_cast<BufferResourceImpl*>(stagingBuffer); uint8_t* bufferData = nullptr; @@ -4185,9 +4183,7 @@ public: } } bufferImpl->m_resource.getResource()->Unmap(0, nullptr); - srcRegion.pResource = bufferImpl->m_resource.getResource(); - m_commandBuffer->m_cmdList->CopyTextureRegion( &dstRegion, offset.x, offset.y, offset.z, &srcRegion, nullptr); } @@ -4252,8 +4248,6 @@ public: m_commandBuffer->m_renderer); gpuHandleIndex = m_commandBuffer->m_transientHeap->getCurrentViewHeap().allocate(1); - auto d3dViewHeap = - m_commandBuffer->m_transientHeap->getCurrentViewHeap().getHeap(); m_commandBuffer->bindDescriptorHeaps(); } this->m_commandBuffer->m_renderer->m_device->CopyDescriptorsSimple( @@ -4495,13 +4489,6 @@ public: { auto textureImpl = static_cast<TextureResourceImpl*>(texture); - if (subresourceRange.mipLevelCount == 0) - subresourceRange.mipLevelCount = textureImpl->getDesc()->numMipLevels; - if (subresourceRange.layerCount == 0) - subresourceRange.layerCount = textureImpl->getDesc()->arraySize; - - auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format); - ShortList<D3D12_RESOURCE_BARRIER> barriers; D3D12_RESOURCE_BARRIER barrier; barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; @@ -4509,13 +4496,17 @@ public: { barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; barrier.UAV.pResource = textureImpl->m_resource.getResource(); + barriers.add(barrier); } else { barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barrier.Transition.StateBefore = D3DUtil::getResourceState(src); barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); + if (barrier.Transition.StateBefore == barrier.Transition.StateAfter) + return; barrier.Transition.pResource = textureImpl->m_resource.getResource(); + auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format); auto aspectMask = (int32_t)subresourceRange.aspectMask; if (subresourceRange.aspectMask == TextureAspect::Default) aspectMask = (int32_t)TextureAspect::Color; @@ -5438,10 +5429,11 @@ public: } virtual SLANG_NO_THROW Result SLANG_MCALL present() override { + m_fence->SetEventOnCompletion(fenceValue, m_frameEvents[m_swapChain3->GetCurrentBackBufferIndex()]); SLANG_RETURN_ON_FAIL(D3DSwapchainBase::present()); fenceValue++; - m_fence->SetEventOnCompletion(fenceValue, m_frameEvents[m_swapChain3->GetCurrentBackBufferIndex()]); m_queue->Signal(m_fence, fenceValue); + return SLANG_OK; } }; @@ -5630,6 +5622,7 @@ Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitte // themselves will be responsible for allocating, binding, and filling in // any descriptor tables or other root parameters needed. // + m_commandBuffer->bindDescriptorHeaps(); if (rootObjectImpl->bindAsRoot(&context, rootLayoutImpl) == SLANG_E_OUT_OF_MEMORY) { if (!m_transientHeap->canResize()) @@ -5639,6 +5632,7 @@ Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitte // If we run out of heap space while binding, allocate new descriptor heaps and try again. ID3D12DescriptorHeap* d3dheap = nullptr; + m_commandBuffer->invalidateDescriptorHeapBinding(); switch (context.outOfMemoryHeap) { case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV: @@ -5842,7 +5836,7 @@ static void _initSrvDesc( Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, bool isShared, MemoryType memoryType) { - const size_t bufferSize = size_t(resourceDesc.Width); + const size_t bufferSize = size_t(resourceDesc.Width); D3D12_HEAP_PROPERTIES heapProps; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; @@ -6982,16 +6976,10 @@ Result D3D12Device::createBufferResource(const IBufferResource::Desc& descIn, co { BufferResource::Desc srcDesc = fixupBufferDesc(descIn); - // Always align up to 256 bytes, since that is required for constant buffers. - // - // TODO: only do this for buffers that could potentially be bound as constant buffers... - // - const size_t alignedSizeInBytes = D3DUtil::calcAligned(srcDesc.sizeInBytes, 256); - RefPtr<BufferResourceImpl> buffer(new BufferResourceImpl(srcDesc)); D3D12_RESOURCE_DESC bufferDesc; - _initBufferResourceDesc(alignedSizeInBytes, bufferDesc); + _initBufferResourceDesc(descIn.sizeInBytes, bufferDesc); bufferDesc.Flags |= _calcResourceFlags(srcDesc.allowedStates); diff --git a/tools/gfx/debug-layer.cpp b/tools/gfx/debug-layer.cpp index 8ecdf405a..9cd8d216c 100644 --- a/tools/gfx/debug-layer.cpp +++ b/tools/gfx/debug-layer.cpp @@ -1015,12 +1015,6 @@ Result DebugCommandBuffer::getNativeHandle(InteropHandle* outHandle) return baseObject->getNativeHandle(outHandle); } -Result DebugCommandBuffer::resetDescriptorHeaps() -{ - SLANG_GFX_API_FUNC; - return baseObject->resetDescriptorHeaps(); -} - void DebugCommandBuffer::checkEncodersClosedBeforeNewEncoder() { if (m_renderCommandEncoder.isOpen || m_resourceCommandEncoder.isOpen || diff --git a/tools/gfx/debug-layer.h b/tools/gfx/debug-layer.h index a8cdc1b4f..c141ce24a 100644 --- a/tools/gfx/debug-layer.h +++ b/tools/gfx/debug-layer.h @@ -624,7 +624,6 @@ public: encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) override; virtual SLANG_NO_THROW void SLANG_MCALL close() override; virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override; - virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() override; private: void checkEncodersClosedBeforeNewEncoder(); diff --git a/tools/gfx/immediate-renderer-base.cpp b/tools/gfx/immediate-renderer-base.cpp index e18727bdf..03751f531 100644 --- a/tools/gfx/immediate-renderer-base.cpp +++ b/tools/gfx/immediate-renderer-base.cpp @@ -31,8 +31,6 @@ public: return nullptr; } - virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() override { return SLANG_OK; } - public: CommandWriter m_writer; bool m_hasWriteTimestamps = false; diff --git a/tools/gfx/renderer-shared.cpp b/tools/gfx/renderer-shared.cpp index 92d263ed6..555186840 100644 --- a/tools/gfx/renderer-shared.cpp +++ b/tools/gfx/renderer-shared.cpp @@ -34,6 +34,7 @@ const Slang::Guid GfxGUID::IID_IAccelerationStructure = SLANG_UUID_IAcceleration const Slang::Guid GfxGUID::IID_IFence = SLANG_UUID_IFence; const Slang::Guid GfxGUID::IID_IShaderTable = SLANG_UUID_IShaderTable; const Slang::Guid GfxGUID::IID_IPipelineCreationAPIDispatcher = SLANG_UUID_IPipelineCreationAPIDispatcher; +const Slang::Guid GfxGUID::IID_ID3D12TransientResourceHeap = SLANG_UUID_ID3D12TransientResourceHeap; StageType translateStage(SlangStage slangStage) diff --git a/tools/gfx/renderer-shared.h b/tools/gfx/renderer-shared.h index a78113552..e82bc83d0 100644 --- a/tools/gfx/renderer-shared.h +++ b/tools/gfx/renderer-shared.h @@ -41,6 +41,7 @@ struct GfxGUID static const Slang::Guid IID_IFence; static const Slang::Guid IID_IShaderTable; static const Slang::Guid IID_IPipelineCreationAPIDispatcher; + static const Slang::Guid IID_ID3D12TransientResourceHeap; }; // We use a `BreakableReference` to avoid the cyclic reference situation in gfx implementation. diff --git a/tools/gfx/transient-resource-heap-base.h b/tools/gfx/transient-resource-heap-base.h index 2dc16dcd4..1b86b983c 100644 --- a/tools/gfx/transient-resource-heap-base.h +++ b/tools/gfx/transient-resource-heap-base.h @@ -3,43 +3,41 @@ namespace gfx { -template <typename TDevice, typename TBufferResource> -class TransientResourceHeapBaseImpl : public TransientResourceHeapBase +template<typename TDevice, typename TBufferResource> +class StagingBufferPool { public: - void breakStrongReferenceToDevice() { m_device.breakStrongReference(); } + struct StagingBufferPage + { + Slang::RefPtr<TBufferResource> resource; + size_t size; + }; -public: - BreakableReference<TDevice> m_device; - Slang::List<Slang::RefPtr<TBufferResource>> m_constantBuffers; - Slang::List<Slang::RefPtr<TBufferResource>> m_stagingBuffers; + struct Allocation + { + TBufferResource* resource; + size_t offset; + }; - Slang::Index m_constantBufferAllocCounter = 0; - size_t m_constantBufferOffsetAllocCounter = 0; - uint32_t m_alignment = 256; + TDevice* m_device; + MemoryType m_memoryType; + uint32_t m_alignment; + ResourceStateSet m_allowedStates; - Result init(const ITransientResourceHeap::Desc& desc, uint32_t alignment, TDevice* device) - { - m_device = device; + Slang::List<StagingBufferPage> m_pages; + Slang::List<Slang::RefPtr<TBufferResource>> m_largeAllocations; - if (desc.constantBufferSize) - { - Slang::ComPtr<IBufferResource> bufferPtr; - IBufferResource::Desc bufferDesc; - bufferDesc.type = IResource::Type::Buffer; - bufferDesc.defaultState = ResourceState::ConstantBuffer; - bufferDesc.allowedStates = - ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); - bufferDesc.sizeInBytes = desc.constantBufferSize; - bufferDesc.memoryType = MemoryType::Upload; - SLANG_RETURN_ON_FAIL( - m_device->createBufferResource(bufferDesc, nullptr, bufferPtr.writeRef())); - m_constantBuffers.add(static_cast<TBufferResource*>(bufferPtr.get())); - } + Slang::Index m_pageAllocCounter = 0; + size_t m_offsetAllocCounter = 0; - m_version = getVersionCounter(); - getVersionCounter()++; - return SLANG_OK; + const size_t kStagingBufferDefaultPageSize = 16 * 1024 * 1024; + + void init(TDevice* device, MemoryType memoryType, uint32_t alignment, ResourceStateSet allowedStates) + { + m_device = device; + m_memoryType = memoryType; + m_alignment = alignment; + m_allowedStates = allowedStates; } static size_t alignUp(size_t value, uint32_t alignment) @@ -47,38 +45,66 @@ public: return (value + alignment - 1) / alignment * alignment; } - Result allocateStagingBuffer(size_t size, IBufferResource*& outBufferWeakPtr, ResourceState state) + void reset() { + m_pageAllocCounter = 0; + m_offsetAllocCounter = 0; + m_largeAllocations.clearAndDeallocate(); + } + + Result newStagingBufferPage() + { + StagingBufferPage page; + size_t pageSize = kStagingBufferDefaultPageSize; + Slang::ComPtr<IBufferResource> bufferPtr; IBufferResource::Desc bufferDesc; bufferDesc.type = IResource::Type::Buffer; - bufferDesc.defaultState = state; - bufferDesc.allowedStates = - ResourceStateSet(ResourceState::CopyDestination, ResourceState::CopySource); - if (state == ResourceState::General) - bufferDesc.memoryType = MemoryType::Upload; - else - bufferDesc.memoryType = MemoryType::ReadBack; + bufferDesc.defaultState = ResourceState::General; + bufferDesc.allowedStates = m_allowedStates; + bufferDesc.memoryType = m_memoryType; + bufferDesc.sizeInBytes = pageSize; + SLANG_RETURN_ON_FAIL( + m_device->createBufferResource(bufferDesc, nullptr, bufferPtr.writeRef())); + + page.resource = static_cast<TBufferResource*>(bufferPtr.get()); + page.size = pageSize; + m_pages.add(page); + return SLANG_OK; + } + + Result newLargeBuffer(size_t size) + { + Slang::ComPtr<IBufferResource> bufferPtr; + IBufferResource::Desc bufferDesc; + bufferDesc.type = IResource::Type::Buffer; + bufferDesc.defaultState = ResourceState::General; + bufferDesc.allowedStates = m_allowedStates; + bufferDesc.memoryType = m_memoryType; bufferDesc.sizeInBytes = size; SLANG_RETURN_ON_FAIL( m_device->createBufferResource(bufferDesc, nullptr, bufferPtr.writeRef())); - m_stagingBuffers.add(static_cast<TBufferResource*>(bufferPtr.get())); - outBufferWeakPtr = bufferPtr.get(); + auto bufferImpl = static_cast<TBufferResource*>(bufferPtr.get()); + m_largeAllocations.add(bufferImpl); return SLANG_OK; } - Result allocateConstantBuffer( - size_t size, - IBufferResource*& outBufferWeakPtr, - size_t& outOffset) + Allocation allocate(size_t size, bool forceLargePage) { - size_t bufferAllocOffset = alignUp(m_constantBufferOffsetAllocCounter, m_alignment); + if (forceLargePage || size >= (kStagingBufferDefaultPageSize >> 2)) + { + newLargeBuffer(size); + Allocation result; + result.resource = m_largeAllocations.getLast(); + result.offset = 0; + return result; + } + + size_t bufferAllocOffset = alignUp(m_offsetAllocCounter, m_alignment); Slang::Index bufferId = -1; - // Find first constant buffer from `m_constantBufferAllocCounter` that has enough space - // for this allocation. - for (Slang::Index i = m_constantBufferAllocCounter; i < m_constantBuffers.getCount(); i++) + for (Slang::Index i = m_pageAllocCounter; i < m_pages.getCount(); i++) { - auto cb = m_constantBuffers[i].Ptr(); + auto cb = m_pages[i].resource.Ptr(); if (bufferAllocOffset + size <= cb->getDesc()->sizeInBytes) { bufferId = i; @@ -86,45 +112,105 @@ public: } bufferAllocOffset = 0; } - // If we cannot find an existing constant buffer with sufficient free space, - // create a new constant buffer. + // If we cannot find an existing page with sufficient free space, + // create a new page. if (bufferId == -1) { - Slang::ComPtr<IBufferResource> bufferPtr; - IBufferResource::Desc bufferDesc; - bufferDesc.type = IResource::Type::Buffer; - bufferDesc.defaultState = ResourceState::ConstantBuffer; - bufferDesc.allowedStates = - ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); - bufferDesc.memoryType = MemoryType::Upload; - size_t lastConstantBufferSize = 0; - if (m_constantBuffers.getCount()) + newStagingBufferPage(); + bufferId = m_pages.getCount() - 1; + } + // Sub allocate from current page. + Allocation result; + result.resource = m_pages[bufferId].resource.Ptr(); + result.offset = bufferAllocOffset; + m_pageAllocCounter = bufferId; + m_offsetAllocCounter = bufferAllocOffset + size; + return result; + } +}; + +template <typename TDevice, typename TBufferResource> +class TransientResourceHeapBaseImpl : public TransientResourceHeapBase +{ +public: + void breakStrongReferenceToDevice() { m_device.breakStrongReference(); } + +public: + BreakableReference<TDevice> m_device; + StagingBufferPool<TDevice, TBufferResource> m_constantBufferPool; + StagingBufferPool<TDevice, TBufferResource> m_uploadBufferPool; + StagingBufferPool<TDevice, TBufferResource> m_readbackBufferPool; + + Result init(const ITransientResourceHeap::Desc& desc, uint32_t alignment, TDevice* device) + { + m_device = device; + + m_constantBufferPool.init( + device, + MemoryType::Upload, + 256, + ResourceStateSet( + ResourceState::ConstantBuffer, + ResourceState::CopySource, + ResourceState::CopyDestination)); + + m_uploadBufferPool.init( + device, + MemoryType::Upload, + 256, + ResourceStateSet( + ResourceState::CopySource, + ResourceState::CopyDestination)); + + m_readbackBufferPool.init( + device, + MemoryType::ReadBack, + 256, + ResourceStateSet(ResourceState::CopySource, ResourceState::CopyDestination)); + + m_version = getVersionCounter(); + getVersionCounter()++; + return SLANG_OK; + } + + Result allocateStagingBuffer(size_t size, IBufferResource*& outBufferWeakPtr, size_t& offset, MemoryType memoryType, bool forceLargePage = false) + { + switch (memoryType) + { + case MemoryType::ReadBack: { - lastConstantBufferSize = m_constantBuffers.getLast()->getDesc()->sizeInBytes; + auto allocation = m_readbackBufferPool.allocate(size, forceLargePage); + outBufferWeakPtr = allocation.resource; + offset = allocation.offset; } - bufferDesc.sizeInBytes = Slang::Math::Max( - lastConstantBufferSize * 2, Slang::Math::Max(size, size_t(4 << 20))); - SLANG_RETURN_ON_FAIL( - m_device->createBufferResource(bufferDesc, nullptr, bufferPtr.writeRef())); - bufferId = m_constantBuffers.getCount(); - bufferAllocOffset = 0; - m_constantBuffers.add(static_cast<TBufferResource*>(bufferPtr.get())); + break; + default: + { + auto allocation = m_uploadBufferPool.allocate(size, forceLargePage); + outBufferWeakPtr = allocation.resource; + offset = allocation.offset; + } + break; } - // Sub allocate from current constant buffer. - outBufferWeakPtr = m_constantBuffers[bufferId].Ptr(); - outOffset = bufferAllocOffset; - m_constantBufferAllocCounter = bufferId; - m_constantBufferOffsetAllocCounter = bufferAllocOffset + size; + return SLANG_OK; + } + + Result allocateConstantBuffer( + size_t size, + IBufferResource*& outBufferWeakPtr, + size_t& outOffset) + { + auto allocation = m_constantBufferPool.allocate(size, false); + outBufferWeakPtr = allocation.resource; + outOffset = allocation.offset; return SLANG_OK; } void reset() { - m_constantBufferAllocCounter = 0; - m_constantBufferOffsetAllocCounter = 0; - for (auto& stagingBuffer : m_stagingBuffers) - stagingBuffer = nullptr; - m_stagingBuffers.clear(); + m_constantBufferPool.reset(); + m_uploadBufferPool.reset(); + m_readbackBufferPool.reset(); m_version = getVersionCounter(); getVersionCounter()++; } diff --git a/tools/gfx/vulkan/render-vk.cpp b/tools/gfx/vulkan/render-vk.cpp index d1713ae01..a1544ebc0 100644 --- a/tools/gfx/vulkan/render-vk.cpp +++ b/tools/gfx/vulkan/render-vk.cpp @@ -2482,22 +2482,29 @@ public: { auto& api = buffer->m_renderer->m_api; IBufferResource* stagingBuffer = nullptr; - transientHeap->allocateStagingBuffer(size, stagingBuffer, ResourceState::CopySource); + size_t stagingBufferOffset = 0; + transientHeap->allocateStagingBuffer( + size, stagingBuffer, stagingBufferOffset, MemoryType::Upload); BufferResourceImpl* stagingBufferImpl = static_cast<BufferResourceImpl*>(stagingBuffer); void* mappedData = nullptr; SLANG_VK_CHECK(api.vkMapMemory( - api.m_device, stagingBufferImpl->m_buffer.m_memory, 0, size, 0, &mappedData)); - memcpy(mappedData, data, size); + api.m_device, + stagingBufferImpl->m_buffer.m_memory, + 0, + stagingBufferOffset + size, + 0, + &mappedData)); + memcpy((char*)mappedData + stagingBufferOffset, data, size); api.vkUnmapMemory(api.m_device, stagingBufferImpl->m_buffer.m_memory); // Copy from staging buffer to real buffer VkBufferCopy copyInfo = {}; copyInfo.size = size; copyInfo.dstOffset = offset; - copyInfo.srcOffset = 0; + copyInfo.srcOffset = stagingBufferOffset; api.vkCmdCopyBuffer( commandBuffer, stagingBufferImpl->m_buffer.m_buffer, @@ -3962,8 +3969,9 @@ public: static_cast<TransientResourceHeapImpl*>(transientHeap); IBufferResource* stagingBuffer = nullptr; + size_t stagingBufferOffset = 0; transientHeapImpl->allocateStagingBuffer( - tableSize, stagingBuffer, ResourceState::General); + tableSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload); assert(stagingBuffer); void* stagingPtr = nullptr; @@ -3975,7 +3983,7 @@ public: handles.setCount(totalHandleSize); auto result = vkApi.vkGetRayTracingShaderGroupHandlesKHR(m_device->m_device, pipelineImpl->m_pipeline, 0, (uint32_t)handleCount, totalHandleSize, handles.getBuffer()); - uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr; + uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr + stagingBufferOffset; auto subTablePtr = stagingBufferPtr; Int shaderTableEntryCounter = 0; @@ -4026,7 +4034,7 @@ public: // TODO: Callable shaders? stagingBuffer->unmap(nullptr); - encoder->copyBuffer(bufferResource, 0, stagingBuffer, 0, tableSize); + encoder->copyBuffer(bufferResource, 0, stagingBuffer, stagingBufferOffset, tableSize); encoder->bufferBarrier( 1, bufferResource.readRef(), @@ -4053,10 +4061,6 @@ public: return nullptr; } virtual void comFree() override { m_transientHeap.breakStrongReference(); } - virtual SLANG_NO_THROW Result SLANG_MCALL resetDescriptorHeaps() override - { - return SLANG_OK; - } public: VkCommandBuffer m_commandBuffer; VkCommandBuffer m_preCommandBuffer = VK_NULL_HANDLE; @@ -4557,8 +4561,9 @@ public: bufferSize *= subResourceRange.layerCount; IBufferResource* uploadBuffer = nullptr; + size_t uploadBufferOffset = 0; m_commandBuffer->m_transientHeap->allocateStagingBuffer( - bufferSize, uploadBuffer, gfx::ResourceState::CopySource); + bufferSize, uploadBuffer, uploadBufferOffset, MemoryType::Upload); // Copy into upload buffer { @@ -4566,8 +4571,9 @@ public: uint8_t* dstData; uploadBuffer->map(nullptr, (void**)&dstData); + dstData += uploadBufferOffset; uint8_t* dstDataStart; - dstDataStart = dstData; + dstDataStart = dstData ; size_t dstSubresourceOffset = 0; for (uint32_t i = 0; i < subResourceRange.layerCount; ++i) @@ -4612,7 +4618,7 @@ public: uploadBuffer->unmap(nullptr); } { - size_t srcOffset = 0; + size_t srcOffset = uploadBufferOffset; for (uint32_t i = 0; i < subResourceRange.layerCount; ++i) { for (Index j = 0; j < mipSizes.getCount(); ++j) @@ -6569,15 +6575,18 @@ Result VKDevice::PipelineCommandEncoder::bindRootShaderObjectImpl( // Once we've filled in all the descriptor sets, we bind them // to the pipeline at once. // - m_device->m_api.vkCmdBindDescriptorSets( - m_commandBuffer->m_commandBuffer, - bindPoint, - specializedLayout->m_pipelineLayout, - 0, - (uint32_t) descriptorSetCount, - descriptorSets, - 0, - nullptr); + if (descriptorSetCount > 0) + { + m_device->m_api.vkCmdBindDescriptorSets( + m_commandBuffer->m_commandBuffer, + bindPoint, + specializedLayout->m_pipelineLayout, + 0, + (uint32_t) descriptorSetCount, + descriptorSets, + 0, + nullptr); + } return SLANG_OK; } @@ -7226,9 +7235,13 @@ Result VKDevice::initVulkanInstanceAndDevice(const InteropHandle* handles, bool #endif m_features.add("external-memory"); } - if (extensionNames.Contains(VK_EXT_DEBUG_MARKER_EXTENSION_NAME)) + if (extensionNames.Contains(VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) { - deviceExtensions.add(VK_EXT_DEBUG_MARKER_EXTENSION_NAME); + deviceExtensions.add(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); + if (extensionNames.Contains(VK_EXT_DEBUG_MARKER_EXTENSION_NAME)) + { + deviceExtensions.add(VK_EXT_DEBUG_MARKER_EXTENSION_NAME); + } } if (extensionNames.Contains(VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME)) { |
