diff options
| author | Yong He <yonghe@outlook.com> | 2022-03-11 11:57:53 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-03-11 11:57:53 -0800 |
| commit | 0890fd8f2e485bfdd93ff6227be08ff4142e55d1 (patch) | |
| tree | be8d2381d0f0e96b267428046dcad815fcafd245 | |
| parent | f6c2a0f81340a0efd10e286a3cbe33e1b564a11b (diff) | |
gfx: Add `ITransientResourceHeap::finish()` to avoid `Signal` after every queue submit. (#2158)
Co-authored-by: Yong He <yhe@nvidia.com>
| -rw-r--r-- | examples/example-base/example-base.cpp | 1 | ||||
| -rw-r--r-- | slang-gfx.h | 9 | ||||
| -rw-r--r-- | tools/gfx/d3d12/render-d3d12.cpp | 43 | ||||
| -rw-r--r-- | tools/gfx/d3d12/render-d3d12.h | 4 | ||||
| -rw-r--r-- | tools/gfx/debug-layer.cpp | 6 | ||||
| -rw-r--r-- | tools/gfx/debug-layer.h | 1 | ||||
| -rw-r--r-- | tools/gfx/renderer-shared.h | 2 | ||||
| -rw-r--r-- | tools/render-test/render-test-main.cpp | 1 |
8 files changed, 47 insertions, 20 deletions
diff --git a/examples/example-base/example-base.cpp b/examples/example-base/example-base.cpp index 68a30fd60..9330dcf04 100644 --- a/examples/example-base/example-base.cpp +++ b/examples/example-base/example-base.cpp @@ -105,6 +105,7 @@ void WindowedAppBase::mainLoop() gTransientHeaps[frameBufferIndex]->synchronizeAndReset(); renderFrame(frameBufferIndex); + gTransientHeaps[frameBufferIndex]->finish(); } void WindowedAppBase::createSwapchainFramebuffers() diff --git a/slang-gfx.h b/slang-gfx.h index e9d33bc98..5350444e9 100644 --- a/slang-gfx.h +++ b/slang-gfx.h @@ -1890,8 +1890,17 @@ public: uint32_t constantBufferDescriptorCount; uint32_t accelerationStructureDescriptorCount; }; + + // Waits until GPU commands issued before last call to `finish()` has been completed, and resets + // all transient resources holds by the heap. + // This method must be called before using the transient heap to issue new GPU commands. + // In most situations this method should be called at the beginning of each frame. virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() = 0; + // Must be called when the application has done using this heap to issue commands. In most situations + // this method should be called at the end of each frame. + virtual SLANG_NO_THROW Result SLANG_MCALL finish() = 0; + // Command buffers are one-time use. Once it is submitted to the queue via // `executeCommandBuffers` a command buffer is no longer valid to be used any more. Command // buffers must be closed before submission. The current D3D12 implementation has a limitation diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index 67c31158b..de3081f40 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -2378,6 +2378,7 @@ void DeviceImpl::submitResourceCommandsAndWait(const DeviceImpl::ResourceCommand { info.commandBuffer->close(); m_resourceCommandQueue->executeCommandBuffer(info.commandBuffer); + m_resourceCommandTransientHeap->finish(); m_resourceCommandTransientHeap->synchronizeAndReset(); } @@ -2785,18 +2786,9 @@ Result AccelerationStructureImpl::getNativeHandle(InteropHandle* outHandle) Result TransientResourceHeapImpl::synchronizeAndReset() { - Array<HANDLE, 16> waitHandles; - for (auto& waitInfo : m_waitInfos) - { - if (waitInfo.waitValue == 0) - continue; - if (waitInfo.fence) - { - waitInfo.fence->SetEventOnCompletion(waitInfo.waitValue, waitInfo.fenceEvent); - waitHandles.add(waitInfo.fenceEvent); - } - } - WaitForMultipleObjects((DWORD)waitHandles.getCount(), waitHandles.getBuffer(), TRUE, INFINITE); + WaitForMultipleObjects( + (DWORD)m_waitHandles.getCount(), m_waitHandles.getBuffer(), TRUE, INFINITE); + m_waitHandles.clear(); m_currentViewHeapIndex = -1; m_currentSamplerHeapIndex = -1; allocateNewViewDescriptorHeap(m_device); @@ -2809,6 +2801,22 @@ Result TransientResourceHeapImpl::synchronizeAndReset() return SLANG_OK; } +Result TransientResourceHeapImpl::finish() +{ + for (auto& waitInfo : m_waitInfos) + { + if (waitInfo.waitValue == 0) + continue; + if (waitInfo.fence) + { + waitInfo.queue->Signal(waitInfo.fence, waitInfo.waitValue); + waitInfo.fence->SetEventOnCompletion(waitInfo.waitValue, waitInfo.fenceEvent); + m_waitHandles.add(waitInfo.fenceEvent); + } + } + return SLANG_OK; +} + TransientResourceHeapImpl::QueueWaitInfo& TransientResourceHeapImpl::getQueueWaitInfo( uint32_t queueIndex) { @@ -3462,10 +3470,8 @@ void RayTracingCommandEncoderImpl::dispatchRays( auto shaderTableImpl = static_cast<ShaderTableImpl*>(shaderTable); - ResourceCommandEncoderImpl resourceCopyEncoder; - resourceCopyEncoder.init(m_commandBuffer); auto shaderTableBuffer = - shaderTableImpl->getOrCreateBuffer(pipelineImpl, m_transientHeap, &resourceCopyEncoder); + shaderTableImpl->getOrCreateBuffer(pipelineImpl, m_transientHeap, static_cast<ResourceCommandEncoderImpl*>(this)); auto shaderTableAddr = shaderTableBuffer->getDeviceAddress(); D3D12_DISPATCH_RAYS_DESC dispatchDesc = {}; @@ -5015,8 +5021,8 @@ void CommandQueueImpl::executeCommandBuffers( auto& waitInfo = transientHeap->getQueueWaitInfo(m_queueIndex); waitInfo.waitValue = m_fenceValue; waitInfo.fence = m_fence; + waitInfo.queue = m_d3dQueue; } - m_d3dQueue->Signal(m_fence, m_fenceValue); } if (fence) @@ -6556,10 +6562,7 @@ void ResourceCommandEncoderImpl::textureBarrier( void ResourceCommandEncoderImpl::bufferBarrier( size_t count, IBufferResource* const* buffers, ResourceState src, ResourceState dst) { - - List<D3D12_RESOURCE_BARRIER> barriers; - barriers.reserve(count); - + ShortList<D3D12_RESOURCE_BARRIER, 16> barriers; for (size_t i = 0; i < count; i++) { auto bufferImpl = static_cast<BufferResourceImpl*>(buffers[i]); diff --git a/tools/gfx/d3d12/render-d3d12.h b/tools/gfx/d3d12/render-d3d12.h index 3ef4a84a7..2cb9a3fd3 100644 --- a/tools/gfx/d3d12/render-d3d12.h +++ b/tools/gfx/d3d12/render-d3d12.h @@ -490,9 +490,11 @@ public: { uint64_t waitValue; HANDLE fenceEvent; + ComPtr<ID3D12CommandQueue> queue; ComPtr<ID3D12Fence> fence = nullptr; }; ShortList<QueueWaitInfo, 4> m_waitInfos; + Array<HANDLE, 16> m_waitHandles; QueueWaitInfo& getQueueWaitInfo(uint32_t queueIndex); // During command submission, we need all the descriptor tables that get @@ -545,6 +547,8 @@ public: createCommandBuffer(ICommandBuffer** outCommandBuffer) override; virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override; + + virtual SLANG_NO_THROW Result SLANG_MCALL finish() override; }; struct Submitter diff --git a/tools/gfx/debug-layer.cpp b/tools/gfx/debug-layer.cpp index 341f0034a..414434da6 100644 --- a/tools/gfx/debug-layer.cpp +++ b/tools/gfx/debug-layer.cpp @@ -1619,6 +1619,12 @@ Result DebugTransientResourceHeap::synchronizeAndReset() return baseObject->synchronizeAndReset(); } +Result DebugTransientResourceHeap::finish() +{ + SLANG_GFX_API_FUNC; + return baseObject->finish(); +} + Result DebugTransientResourceHeap::createCommandBuffer(ICommandBuffer** outCommandBuffer) { SLANG_GFX_API_FUNC; diff --git a/tools/gfx/debug-layer.h b/tools/gfx/debug-layer.h index 84ae33c2a..de8ccfe8e 100644 --- a/tools/gfx/debug-layer.h +++ b/tools/gfx/debug-layer.h @@ -724,6 +724,7 @@ public: public: ITransientResourceHeap* getInterface(const Slang::Guid& guid); virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override; + virtual SLANG_NO_THROW Result SLANG_MCALL finish() override; virtual SLANG_NO_THROW Result SLANG_MCALL createCommandBuffer(ICommandBuffer** outCommandBuffer) override; }; diff --git a/tools/gfx/renderer-shared.h b/tools/gfx/renderer-shared.h index 24d51c87a..a9d9e19c4 100644 --- a/tools/gfx/renderer-shared.h +++ b/tools/gfx/renderer-shared.h @@ -1150,6 +1150,8 @@ public: return static_cast<ITransientResourceHeap*>(this); return nullptr; } + + virtual SLANG_NO_THROW Result SLANG_MCALL finish() override { return SLANG_OK; } }; class ShaderTableBase diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp index 7046e2fda..81e8cd38a 100644 --- a/tools/render-test/render-test-main.cpp +++ b/tools/render-test/render-test-main.cpp @@ -939,6 +939,7 @@ Result RenderTestApp::writeBindingOutput(const String& fileName) commandBuffer->close(); m_queue->executeCommandBuffer(commandBuffer); + m_transientHeap->finish(); m_transientHeap->synchronizeAndReset(); m_device->readBufferResource(stagingBuffer, 0, bufferSize, blob.writeRef()); |
