diff options
Diffstat (limited to 'tools/gfx')
| -rw-r--r-- | tools/gfx/cpu/render-cpu.cpp | 23 | ||||
| -rw-r--r-- | tools/gfx/cuda/render-cuda.cpp | 21 | ||||
| -rw-r--r-- | tools/gfx/d3d12/circular-resource-heap-d3d12.cpp | 222 | ||||
| -rw-r--r-- | tools/gfx/d3d12/circular-resource-heap-d3d12.h | 206 | ||||
| -rw-r--r-- | tools/gfx/d3d12/render-d3d12.cpp | 406 | ||||
| -rw-r--r-- | tools/gfx/d3d12/resource-d3d12.cpp | 57 | ||||
| -rw-r--r-- | tools/gfx/d3d12/resource-d3d12.h | 47 | ||||
| -rw-r--r-- | tools/gfx/immediate-renderer-base.cpp | 24 | ||||
| -rw-r--r-- | tools/gfx/immediate-renderer-base.h | 3 | ||||
| -rw-r--r-- | tools/gfx/renderer-shared.cpp | 1 | ||||
| -rw-r--r-- | tools/gfx/renderer-shared.h | 4 | ||||
| -rw-r--r-- | tools/gfx/simple-transient-resource-heap.h | 52 | ||||
| -rw-r--r-- | tools/gfx/vulkan/render-vk.cpp | 261 |
13 files changed, 510 insertions, 817 deletions
diff --git a/tools/gfx/cpu/render-cpu.cpp b/tools/gfx/cpu/render-cpu.cpp index fa31f7ee1..f0b612e2f 100644 --- a/tools/gfx/cpu/render-cpu.cpp +++ b/tools/gfx/cpu/render-cpu.cpp @@ -9,6 +9,7 @@ #include "../command-writer.h" #include "../renderer-shared.h" +#include "../simple-transient-resource-heap.h" #include "../slang-context.h" #define SLANG_PRELUDE_NAMESPACE slang_prelude @@ -1119,6 +1120,10 @@ private: return nullptr; } public: + void init(CPUDevice* device) + { + SLANG_UNUSED(device); + } virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( IRenderPassLayout* renderPass, IFramebuffer* framebuffer, @@ -1273,13 +1278,6 @@ private: { return m_desc; } - virtual SLANG_NO_THROW Result SLANG_MCALL - createCommandBuffer(ICommandBuffer** outCommandBuffer) override - { - RefPtr<CommandBufferImpl> result = new CommandBufferImpl(); - *outCommandBuffer = result.detach(); - return SLANG_OK; - } virtual SLANG_NO_THROW void SLANG_MCALL executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override @@ -1400,6 +1398,8 @@ private: } }; + using TransientResourceHeapImpl = SimpleTransientResourceHeap<CPUDevice, CommandBufferImpl>; + public: ~CPUDevice() { @@ -1559,6 +1559,15 @@ public: *outQueue = queue.detach(); return SLANG_OK; } + virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, + ITransientResourceHeap** outHeap) override + { + RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl(); + SLANG_RETURN_ON_FAIL(result->init(this, desc)); + *outHeap = result.detach(); + return SLANG_OK; + } virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override { diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp index 383ccc924..b29f7f7e4 100644 --- a/tools/gfx/cuda/render-cuda.cpp +++ b/tools/gfx/cuda/render-cuda.cpp @@ -12,6 +12,7 @@ #include "slang-com-helper.h" #include "../command-writer.h" #include "../renderer-shared.h" +#include "../simple-transient-resource-heap.h" #include "../slang-context.h" # ifdef RENDER_TEST_OPTIX @@ -989,6 +990,7 @@ public: return nullptr; } public: + void init(CUDADevice* device) { SLANG_UNUSED(device); } virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( IRenderPassLayout* renderPass, IFramebuffer* framebuffer, @@ -1147,13 +1149,6 @@ public: { return m_desc; } - virtual SLANG_NO_THROW Result SLANG_MCALL - createCommandBuffer(ICommandBuffer** outCommandBuffer) override - { - RefPtr<CommandBufferImpl> result = new CommandBufferImpl(); - *outCommandBuffer = result.detach(); - return SLANG_OK; - } virtual SLANG_NO_THROW void SLANG_MCALL executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override @@ -1320,6 +1315,8 @@ public: } }; + using TransientResourceHeapImpl = SimpleTransientResourceHeap<CUDADevice, CommandBufferImpl>; + public: ~CUDADevice() { @@ -1921,6 +1918,16 @@ public: } public: + virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, + ITransientResourceHeap** outHeap) override + { + RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl(); + SLANG_RETURN_ON_FAIL(result->init(this, desc)); + *outHeap = result.detach(); + return SLANG_OK; + } + virtual SLANG_NO_THROW Result SLANG_MCALL createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override { diff --git a/tools/gfx/d3d12/circular-resource-heap-d3d12.cpp b/tools/gfx/d3d12/circular-resource-heap-d3d12.cpp deleted file mode 100644 index 685dd364f..000000000 --- a/tools/gfx/d3d12/circular-resource-heap-d3d12.cpp +++ /dev/null @@ -1,222 +0,0 @@ -#include "circular-resource-heap-d3d12.h" - -namespace gfx { -using namespace Slang; - -D3D12CircularResourceHeap::D3D12CircularResourceHeap(): - m_fence(nullptr), - m_device(nullptr), - m_blockFreeList(sizeof(Block), SLANG_ALIGN_OF(Block), 16), - m_blocks(nullptr) -{ - m_back.m_block = nullptr; - m_back.m_position = nullptr; - m_front.m_block = nullptr; - m_front.m_position = nullptr; -} - -D3D12CircularResourceHeap::~D3D12CircularResourceHeap() -{ - _freeBlockListResources(m_blocks); -} - -void D3D12CircularResourceHeap::_freeBlockListResources(const Block* start) -{ - if (start) - { - const Block* block = start; - do - { - ID3D12Resource* resource = block->m_resource; - - resource->Unmap(0, nullptr); - resource->Release(); - - // Next in list - block = block->m_next; - - } while (block != start); - } -} - -Result D3D12CircularResourceHeap::init(ID3D12Device* device, const Desc& desc, D3D12CounterFence* fence) -{ - assert(m_blocks == nullptr); - assert(desc.m_blockSize > 0); - - m_fence = fence; - m_desc = desc; - m_device = device; - - return SLANG_OK; -} - -void D3D12CircularResourceHeap::addSync(uint64_t signalValue) -{ - assert(signalValue == m_fence->getCurrentValue()); - PendingEntry entry; - entry.m_completedValue = signalValue; - entry.m_cursor = m_front; - m_pendingQueue.add(entry); -} - -void D3D12CircularResourceHeap::updateCompleted() -{ - const uint64_t completedValue = m_fence->getCompletedValue(); - -#if 0 - while (m_pendingQueue.getCount() != 0) - { - const PendingEntry& entry = m_pendingQueue[0]; - if (entry.m_completedValue <= completedValue) - { - m_back = entry.m_cursor; - m_pendingQueue.removeAt(0); - } - else - { - break; - } - } -#else - // A more efficient implementation is m_pendingQueue is implemented as a vector like type - const Index size = m_pendingQueue.getCount(); - Index end = 0; - while (end < size && m_pendingQueue[end].m_completedValue <= completedValue) - { - end++; - } - - if (end > 0) - { - // Set the back position - m_back = m_pendingQueue[end - 1].m_cursor; - if (end == size) - { - m_pendingQueue.clear(); - } - else - { - m_pendingQueue.removeRange(0, size); - } - } -#endif -} - -D3D12CircularResourceHeap::Block* D3D12CircularResourceHeap::_newBlock() -{ - D3D12_RESOURCE_DESC desc; - - desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - desc.Alignment = 0; - desc.Width = m_desc.m_blockSize; - desc.Height = 1; - desc.DepthOrArraySize = 1; - desc.MipLevels = 1; - desc.Format = DXGI_FORMAT_UNKNOWN; - desc.SampleDesc.Count = 1; - desc.SampleDesc.Quality = 0; - desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - desc.Flags = D3D12_RESOURCE_FLAG_NONE; - - ComPtr<ID3D12Resource> resource; - Result res = m_device->CreateCommittedResource(&m_desc.m_heapProperties, m_desc.m_heapFlags, &desc, m_desc.m_initialState, nullptr, IID_PPV_ARGS(resource.writeRef())); - if (SLANG_FAILED(res)) - { - assert(!"Resource allocation failed"); - return nullptr; - } - - uint8_t* data = nullptr; - if (m_desc.m_heapProperties.Type == D3D12_HEAP_TYPE_READBACK) - { - } - else - { - // Map it, and keep it mapped - resource->Map(0, nullptr, (void**)&data); - } - - // We have no blocks -> so lets allocate the first - Block* block = (Block*)m_blockFreeList.allocate(); - block->m_next = nullptr; - - block->m_resource = resource.detach(); - block->m_start = data; - return block; -} - -D3D12CircularResourceHeap::Cursor D3D12CircularResourceHeap::allocate(size_t size, size_t alignment) -{ - const size_t blockSize = getBlockSize(); - - assert(size <= blockSize); - - // If nothing is allocated add the first block - if (m_blocks == nullptr) - { - Block* block = _newBlock(); - if (!block) - { - Cursor cursor = {}; - return cursor; - } - m_blocks = block; - // Make circular - block->m_next = block; - - // Point front and back to same position, as currently it is all free - m_back = { block, block->m_start }; - m_front = m_back; - } - - // If front and back are in the same block then front MUST be ahead of back (as that defined as - // an invariant and is required for block insertion to be possible - Block* block = m_front.m_block; - - // Check the invariant - assert(block != m_back.m_block || m_front.m_position >= m_back.m_position); - - { - uint8_t* cur = (uint8_t*)((size_t(m_front.m_position) + alignment - 1) & ~(alignment - 1)); - // Does the the allocation fit? - if (cur + size <= block->m_start + blockSize) - { - // It fits - // Move the front forward - m_front.m_position = cur + size; - Cursor cursor = { block, cur }; - return cursor; - } - } - - // Okay I can't fit into current block... - - // If the next block contains front, we need to add a block, else we can use that block - if (block->m_next == m_back.m_block) - { - Block* newBlock = _newBlock(); - // Insert into the list - newBlock->m_next = block->m_next; - block->m_next = newBlock; - } - - // Use the block we are going to add to - block = block->m_next; - uint8_t* cur = (uint8_t*)((size_t(block->m_start) + alignment - 1) & ~(alignment - 1)); - // Does the the allocation fit? - if (cur + size > block->m_start + blockSize) - { - assert(!"Couldn't fit into a free block(!) Alignment breaks it?"); - Cursor cursor = {}; - return cursor; - } - // It fits - // Move the front forward - m_front.m_block = block; - m_front.m_position = cur + size; - Cursor cursor = { block, cur }; - return cursor; -} - -} // namespace gfx diff --git a/tools/gfx/d3d12/circular-resource-heap-d3d12.h b/tools/gfx/d3d12/circular-resource-heap-d3d12.h deleted file mode 100644 index 7eacf9572..000000000 --- a/tools/gfx/d3d12/circular-resource-heap-d3d12.h +++ /dev/null @@ -1,206 +0,0 @@ -#pragma once - -#include "slang-com-ptr.h" -#include "core/slang-list.h" -#include "core/slang-free-list.h" - -#include "resource-d3d12.h" - -namespace gfx { - -/*! \brief The D3D12CircularResourceHeap is a heap that is suited for size constrained real-time resources allocation that -is transitory in nature. It is designed to allocate resources which are used and discarded, often used where in -previous versions of DirectX the 'DISCARD' flag was used. - -The idea is to have a heap which chunks of resource can be allocated, and used for GPU execution, -and that the heap is able through the addSync/updateCompleted idiom is able to track when the usage of the resources is -completed allowing them to be reused. The heap is arranged as circularly, with new allocations made from the front, and the back -being updated as the GPU updating the back when it is informed anything using prior parts of the heap have completed. In this -arrangement all the heap between the back and the front can be thought of as in use or potentially in use by the GPU. All the heap -from the front back around to the back, is free and can be allocated from. It is the responsibility of the user of the Heap to make -sure the invariant holds, but in most normal usage it does so simply. - -Another feature of the heap is that it does not require upfront knowledge of how big a heap is needed. The backing resources will be expanded -dynamically with requests as needed. The only requirement is that know single request can be larger than m_blockSize specified in the Desc -used to initialize the heap. This is because all the backing resources are allocated to a single size. This limitation means the D3D12CircularResourceHeap -may not be the best use for example for uploading a texture - because it's design is really around transitory uploads or write backs, and so more suited -to constant buffers, vertex buffer, index buffers and the like. - -To upload a texture at program startup it is most likely better to use a D3D12ResourceScopeManager. - -\code{.cpp} - -typedef D3D12CircularResourceHeap Heap; - -Heap::Cursor cursor = heap.allocateVertexBuffer(sizeof(Vertex) * numVerts); -Memory:copy(cursor.m_position, verts, sizeof(Vertex) * numVerts); - -// Do a command using the GPU handle -m_commandList->... -// Do another command using the GPU handle - -m_commandList->... - -// Execute the command list on the command queue -{ - ID3D12CommandList* lists[] = { m_commandList }; - m_commandQueue->ExecuteCommandLists(SLANG_COUNT_OF(lists), lists); -} - -// Add a sync point -const uint64_t signalValue = m_fence.nextSignal(m_commandQueue); -heap.addSync(signalValue) - -// The cursors cannot be used anymore - -// At some later point call updateCompleted. This will see where the GPU is at, and make resources available that the GPU no longer accesses. -heap.updateCompleted(); - -\endcode - -### Implementation - -Front and back can be in the same block, but ONLY if back is behind front, because we have to always be able to insert -new blocks in front of front. So it must be possible to do an block insertion between the two of them. - -|--B---F-----| |----------| - -When B and F are on top of one another it means there is nothing in the list. NOTE this also means that a move of front can never place it -top of the back. - -https://msdn.microsoft.com/en-us/library/windows/desktop/dn899125%28v=vs.85%29.aspx -https://msdn.microsoft.com/en-us/library/windows/desktop/mt426646%28v=vs.85%29.aspx -*/ - -class D3D12CircularResourceHeap -{ - protected: - struct Block; - public: - typedef D3D12CircularResourceHeap ThisType; - - /// The alignment used for VERTEX_BUFFER allocations - /// Strictly speaking it seems the hardware can handle 4 byte alignment, but since often in use - /// data will be copied from CPU memory to the allocation, using 16 byte alignment is superior as allows - /// significantly faster memcpy. - /// The sample that shows sizeof(float) - 4 bytes is appropriate is at the link below. - /// https://msdn.microsoft.com/en-us/library/windows/desktop/mt426646%28v=vs.85%29.aspx - enum - { - VERTEX_BUFFER_ALIGNMENT = 16, - }; - - struct Desc - { - void init() - { - { - D3D12_HEAP_PROPERTIES& props = m_heapProperties; - - props.Type = D3D12_HEAP_TYPE_UPLOAD; - props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - props.CreationNodeMask = 1; - props.VisibleNodeMask = 1; - } - m_heapFlags = D3D12_HEAP_FLAG_NONE; - m_initialState = D3D12_RESOURCE_STATE_GENERIC_READ; - m_blockSize = 0; - } - - D3D12_HEAP_PROPERTIES m_heapProperties; - D3D12_HEAP_FLAGS m_heapFlags; - D3D12_RESOURCE_STATES m_initialState; - size_t m_blockSize; - }; - - /// Cursor position - struct Cursor - { - /// Get GpuHandle - SLANG_FORCE_INLINE D3D12_GPU_VIRTUAL_ADDRESS getGpuHandle() const { return m_block->m_resource->GetGPUVirtualAddress() + size_t(m_position - m_block->m_start); } - /// Must have a block and position - SLANG_FORCE_INLINE bool isValid() const { return m_block != nullptr; } - /// Calculate the offset into the underlying resource - SLANG_FORCE_INLINE size_t getOffset() const { return size_t(m_position - m_block->m_start); } - /// Get the underlying resource - SLANG_FORCE_INLINE ID3D12Resource* getResource() const { return m_block->m_resource; } - - Block* m_block; ///< The block index - uint8_t* m_position; ///< The current position - }; - - /// Get the desc used to initialize the heap - SLANG_FORCE_INLINE const Desc& getDesc() const { return m_desc; } - - /// Must be called before used - /// Block size must be at least as large as the _largest_ thing allocated - /// Also note depending on alignment of a resource allocation, the block size might also need to take into account the - /// maximum alignment use. It is a REQUIREMENT that a newly allocated resource block is large enough to hold any - /// allocation taking into account the alignment used. - Slang::Result init(ID3D12Device* device, const Desc& desc, D3D12CounterFence* fence); - - /// Get the block size - SLANG_FORCE_INLINE size_t getBlockSize() const { return m_desc.m_blockSize; } - - /// Allocate constant buffer of specified size - Cursor allocate(size_t size, size_t alignment); - - /// Allocate a constant buffer - SLANG_FORCE_INLINE Cursor allocateConstantBuffer(size_t size) { return allocate(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); } - /// Allocate a vertex buffer - SLANG_FORCE_INLINE Cursor allocateVertexBuffer(size_t size) { return allocate(size, VERTEX_BUFFER_ALIGNMENT); } - - /// Create filled in constant buffer - SLANG_FORCE_INLINE Cursor newConstantBuffer(const void* data, size_t size) { Cursor cursor = allocateConstantBuffer(size); ::memcpy(cursor.m_position, data, size); return cursor; } - /// Create in filled in constant buffer - template <typename T> - SLANG_FORCE_INLINE Cursor newConstantBuffer(const T& in) { return newConstantBuffer(&in, sizeof(T)); } - - /// Look where the GPU has got to and release anything not currently used - void updateCompleted(); - /// Add a sync point - meaning that when this point is hit in the queue - /// all of the resources up to this point will no longer be used. - void addSync(uint64_t signalValue); - - /// Get the gpu address of this cursor - D3D12_GPU_VIRTUAL_ADDRESS getGpuHandle(const Cursor& cursor) const { return cursor.m_block->m_resource->GetGPUVirtualAddress() + size_t(cursor.m_position - cursor.m_block->m_start); } - - /// Ctor - D3D12CircularResourceHeap(); - /// Dtor - ~D3D12CircularResourceHeap(); - - protected: - - struct Block - { - ID3D12Resource* m_resource; ///< The mapped resource - uint8_t* m_start; ///< Once created the resource is mapped to here - Block* m_next; ///< Points to next block in the list - }; - struct PendingEntry - { - uint64_t m_completedValue; ///< The value when this is completed - Cursor m_cursor; ///< the cursor at that point - }; - void _freeBlockListResources(const Block* block); - /// Create a new block (with associated resource), do not add the block list - Block* _newBlock(); - - Block* m_blocks; ///< Circular singly linked list of block. nullptr initially - Slang::FreeList m_blockFreeList; ///< Free list of actual allocations of blocks - Slang::List<PendingEntry> m_pendingQueue; ///< Holds the list of pending positions. When the fence value is greater than the value on the queue entry, the entry is done. - - // Allocation is made from the front, and freed from the back. - Cursor m_back; ///< Current back position. - Cursor m_front; ///< Current front position. - - Desc m_desc; ///< Describes the heap - - D3D12CounterFence* m_fence; ///< The fence to use - ID3D12Device* m_device; ///< The device that resources will be constructed on -}; - -} // namespace gfx - diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index e2629dc53..6b818f100 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -41,7 +41,6 @@ struct ID3D12GraphicsCommandList1 {}; #include "resource-d3d12.h" #include "descriptor-heap-d3d12.h" -#include "circular-resource-heap-d3d12.h" #include "../d3d/d3d-util.h" @@ -72,6 +71,9 @@ public: virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; virtual SLANG_NO_THROW Result SLANG_MCALL createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, + ITransientResourceHeap** outHeap) override; virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, @@ -211,12 +213,6 @@ public: public: typedef BufferResource Parent; - void bindConstantBufferView(D3D12CircularResourceHeap& circularHeap, int index, Submitter* submitter) const - { - // Set the constant buffer - submitter->setRootConstantBufferView(index, m_resource.getResource()->GetGPUVirtualAddress()); - } - BufferResourceImpl(IResource::Usage initialUsage, const Desc& desc): Parent(desc), m_initialUsage(initialUsage) , m_defaultState(_calcResourceState(initialUsage)) @@ -444,6 +440,23 @@ public: ID3D12GraphicsCommandList* m_commandList; }; + static void _initBufferResourceDesc(size_t bufferSize, D3D12_RESOURCE_DESC& out) + { + out = {}; + + out.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + out.Alignment = 0; + out.Width = bufferSize; + out.Height = 1; + out.DepthOrArraySize = 1; + out.MipLevels = 1; + out.Format = DXGI_FORMAT_UNKNOWN; + out.SampleDesc.Count = 1; + out.SampleDesc.Quality = 0; + out.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + out.Flags = D3D12_RESOURCE_FLAG_NONE; + } + static Result _uploadBufferData( ID3D12GraphicsCommandList* cmdList, BufferResourceImpl* buffer, @@ -479,78 +492,111 @@ public: return SLANG_OK; } - // Use a circular buffer of execution frames to manage in-flight GPU command buffers. - // Each call to `executeCommandLists` advances the frame by 1. - // If we run out of avaialble frames, wait for the earliest submitted frame to finish. - struct ExecutionFrameResources + class TransientResourceHeapImpl + : public ITransientResourceHeap + , public RefObject { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ITransientResourceHeap* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ITransientResourceHeap) + return static_cast<ITransientResourceHeap*>(this); + return nullptr; + } + + public: + D3D12Resource m_constantBuffer; + D3D12Resource m_constantUploadBuffer; + + D3D12Device* m_device; ComPtr<ID3D12CommandAllocator> m_commandAllocator; - List<ComPtr<ID3D12GraphicsCommandList>> m_commandListPool; + List<ComPtr<ID3D12GraphicsCommandList>> m_d3dCommandListPool; + List<ComPtr<ICommandBuffer>> m_commandBufferPool; uint32_t m_commandListAllocId = 0; - HANDLE fenceEvent; + // Wait values for each command queue. + struct QueueWaitInfo + { + uint64_t waitValue; + HANDLE fenceEvent; + }; + ShortList<QueueWaitInfo, 4> m_waitInfos; + QueueWaitInfo& getQueueWaitInfo(uint32_t queueIndex) + { + if (queueIndex < (uint32_t)m_waitInfos.getCount()) + { + return m_waitInfos[queueIndex]; + } + auto oldCount = m_waitInfos.getCount(); + m_waitInfos.setCount(queueIndex + 1); + for (auto i = oldCount; i < m_waitInfos.getCount(); i++) + { + m_waitInfos[i].waitValue = 0; + m_waitInfos[i].fenceEvent = CreateEventEx( + nullptr, + false, + CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, + EVENT_ALL_ACCESS); + } + return m_waitInfos[queueIndex]; + } // During command submission, we need all the descriptor tables that get // used to come from a single heap (for each descriptor heap type). // // We will thus keep a single heap of each type that we hope will hold // all the descriptors that actually get needed in a frame. - // - // TODO: we need an allocation policy to reallocate and resize these - // if/when we run out of space during a frame. D3D12DescriptorHeap m_viewHeap; // Cbv, Srv, Uav D3D12DescriptorHeap m_samplerHeap; // Heap for samplers - ~ExecutionFrameResources() { CloseHandle(fenceEvent); } - Result init(ID3D12Device* device, uint32_t viewHeapSize, uint32_t samplerHeapSize) + ~TransientResourceHeapImpl() { - SLANG_RETURN_ON_FAIL(device->CreateCommandAllocator( + synchronizeAndReset(); + for (auto& waitInfo : m_waitInfos) + CloseHandle(waitInfo.fenceEvent); + } + + Result init( + const ITransientResourceHeap::Desc& desc, + D3D12Device* device, + uint32_t viewHeapSize, + uint32_t samplerHeapSize) + { + m_device = device; + auto d3dDevice = device->m_device; + SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator( D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef()))); - fenceEvent = CreateEventEx( - nullptr, - false, - CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, - EVENT_ALL_ACCESS); + SLANG_RETURN_ON_FAIL(m_viewHeap.init( - device, + d3dDevice, viewHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); SLANG_RETURN_ON_FAIL(m_samplerHeap.init( - device, + d3dDevice, samplerHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); - return SLANG_OK; - } - void reset() - { - WaitForSingleObject(fenceEvent, INFINITE); - m_viewHeap.deallocateAll(); - m_samplerHeap.deallocateAll(); - m_commandListAllocId = 0; - m_commandAllocator->Reset(); - for (auto cmdBuffer : m_commandListPool) - cmdBuffer->Reset(m_commandAllocator, nullptr); - } - ComPtr<ID3D12GraphicsCommandList> createCommandList(ID3D12Device* device) - { - if (m_commandListAllocId == m_commandListPool.getCount()) + + if (desc.constantBufferSize != 0) { - ComPtr<ID3D12GraphicsCommandList> cmdList; - device->CreateCommandList( - 0, - D3D12_COMMAND_LIST_TYPE_DIRECT, - m_commandAllocator, + D3D12_RESOURCE_DESC resourceDesc; + _initBufferResourceDesc(desc.constantBufferSize, resourceDesc); + device->createBuffer( + resourceDesc, nullptr, - IID_PPV_ARGS(cmdList.writeRef())); - - m_commandListPool.add(cmdList); + 0, + m_constantUploadBuffer, + D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, + m_constantBuffer); } - assert((Index)m_commandListAllocId < m_commandListPool.getCount()); - auto& result = m_commandListPool[m_commandListAllocId]; - ++m_commandListAllocId; - return result; + return SLANG_OK; } + + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandBuffer(ICommandBuffer** outCommandBuffer) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override; }; class CommandBufferImpl; @@ -561,7 +607,7 @@ public: bool m_isOpen = false; bool m_bindingDirty = true; CommandBufferImpl* m_commandBuffer; - ExecutionFrameResources* m_frame; + TransientResourceHeapImpl* m_transientHeap; D3D12Device* m_renderer; ID3D12Device* m_device; ID3D12GraphicsCommandList* m_d3dCmdList; @@ -591,7 +637,7 @@ public: m_commandBuffer = commandBuffer; m_d3dCmdList = m_commandBuffer->m_cmdList; m_renderer = commandBuffer->m_renderer; - m_frame = commandBuffer->m_frame; + m_transientHeap = commandBuffer->m_transientHeap; } void endEncodingImpl() { m_isOpen = false; } @@ -659,7 +705,7 @@ public: struct RootBindingState { - ExecutionFrameResources* frame; + TransientResourceHeapImpl* transientHeap; D3D12Device* device; ArrayView<DescriptorTable> descriptorTables; BindingOffset offset; @@ -2339,17 +2385,20 @@ public: } public: ComPtr<ID3D12GraphicsCommandList> m_cmdList; - ExecutionFrameResources* m_frame; + TransientResourceHeapImpl* m_transientHeap; D3D12Device* m_renderer; - void init(D3D12Device* renderer, ExecutionFrameResources* frame) + void init( + D3D12Device* renderer, + ID3D12GraphicsCommandList* d3dCommandList, + TransientResourceHeapImpl* transientHeap) { - m_frame = frame; + m_transientHeap = transientHeap; m_renderer = renderer; - m_cmdList = m_frame->createCommandList(renderer->m_device); + m_cmdList = d3dCommandList; ID3D12DescriptorHeap* heaps[] = { - m_frame->m_viewHeap.getHeap(), - m_frame->m_samplerHeap.getHeap(), + m_transientHeap->m_viewHeap.getHeap(), + m_transientHeap->m_samplerHeap.getHeap(), }; m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); } @@ -2392,7 +2441,7 @@ public: void init( D3D12Device* renderer, - ExecutionFrameResources* frame, + TransientResourceHeapImpl* transientHeap, CommandBufferImpl* cmdBuffer, RenderPassLayoutImpl* renderPass, FramebufferImpl* framebuffer) @@ -2402,7 +2451,7 @@ public: m_device = renderer->m_device; m_renderPass = renderPass; m_framebuffer = framebuffer; - m_frame = frame; + m_transientHeap = transientHeap; m_boundVertexBuffers.clear(); m_boundIndexBuffer = nullptr; m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; @@ -2735,7 +2784,7 @@ public: { m_renderCommandEncoder.init( m_renderer, - m_frame, + m_transientHeap, this, static_cast<RenderPassLayoutImpl*>(renderPass), static_cast<FramebufferImpl*>(framebuffer)); @@ -2769,13 +2818,13 @@ public: } void init( D3D12Device* renderer, - ExecutionFrameResources* frame, + TransientResourceHeapImpl* transientHeap, CommandBufferImpl* cmdBuffer) { PipelineCommandEncoder::init(cmdBuffer); m_preCmdList = nullptr; m_device = renderer->m_device; - m_frame = frame; + m_transientHeap = transientHeap; m_currentPipeline = nullptr; } @@ -2805,7 +2854,7 @@ public: virtual SLANG_NO_THROW void SLANG_MCALL encodeComputeCommands(IComputeCommandEncoder** outEncoder) override { - m_computeCommandEncoder.init(m_renderer, m_frame, this); + m_computeCommandEncoder.init(m_renderer, m_transientHeap, this); *outEncoder = &m_computeCommandEncoder; } @@ -2892,32 +2941,6 @@ public: } public: - struct CommandBufferPool - { - List<RefPtr<CommandBufferImpl>> pool; - uint32_t allocIndex = 0; - RefPtr<CommandBufferImpl> allocCommandBuffer(D3D12Device* renderer, ExecutionFrameResources* frame) - { - if ((Index)allocIndex < pool.getCount()) - { - RefPtr<CommandBufferImpl> result = pool[allocIndex]; - result->init(renderer, frame); - allocIndex++; - return result; - } - RefPtr<CommandBufferImpl> cmdBuffer = new CommandBufferImpl(); - cmdBuffer->init(renderer, frame); - pool.add(cmdBuffer); - return cmdBuffer; - } - void reset() - { - allocIndex = 0; - } - }; - List<CommandBufferPool> m_commandBufferPools; - List<ExecutionFrameResources> m_frames; - uint32_t m_frameIndex = 0; D3D12Device* m_renderer; ComPtr<ID3D12Device> m_device; ComPtr<ID3D12CommandQueue> m_d3dQueue; @@ -2925,20 +2948,13 @@ public: uint64_t m_fenceValue = 0; HANDLE globalWaitHandle; Desc m_desc; - Result init( - D3D12Device* renderer, - uint32_t frameCount, - uint32_t viewHeapSize, - uint32_t samplerHeapSize) + uint32_t m_queueIndex = 0; + + Result init(D3D12Device* device, uint32_t queueIndex) { - m_renderer = renderer; - m_device = renderer->m_device; - m_frames.setCount(frameCount); - m_commandBufferPools.setCount(frameCount); - for (uint32_t i = 0; i < frameCount; i++) - { - SLANG_RETURN_ON_FAIL(m_frames[i].init(m_device, viewHeapSize, samplerHeapSize)); - } + m_queueIndex = queueIndex; + m_renderer = device; + m_device = device->m_device; D3D12_COMMAND_QUEUE_DESC queueDesc = {}; queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; SLANG_RETURN_ON_FAIL(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(m_d3dQueue.writeRef()))); @@ -2955,20 +2971,12 @@ public: { wait(); CloseHandle(globalWaitHandle); + m_renderer->m_queueIndexAllocator.free((int)m_queueIndex, 1); } virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override { return m_desc; } - virtual SLANG_NO_THROW Result SLANG_MCALL - createCommandBuffer(ICommandBuffer** outCommandBuffer) override - { - RefPtr<CommandBufferImpl> result = - m_commandBufferPools[m_frameIndex].allocCommandBuffer( - m_renderer, &m_frames[m_frameIndex]); - *outCommandBuffer = result.detach(); - return SLANG_OK; - } virtual SLANG_NO_THROW void SLANG_MCALL executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override @@ -2981,21 +2989,21 @@ public: } m_d3dQueue->ExecuteCommandLists((UINT)count, commandLists.getArrayView().getBuffer()); - auto& frame = m_frames[m_frameIndex]; m_fenceValue++; + + for (uint32_t i = 0; i < count; i++) + { + if (i > 0 && commandBuffers[i] == commandBuffers[i - 1]) + continue; + auto cmdImpl = static_cast<CommandBufferImpl*>(commandBuffers[i]); + auto transientHeap = cmdImpl->m_transientHeap; + auto& waitInfo = transientHeap->getQueueWaitInfo(m_queueIndex); + waitInfo.waitValue = m_fenceValue; + ResetEvent(waitInfo.fenceEvent); + m_fence->SetEventOnCompletion(m_fenceValue, waitInfo.fenceEvent); + } m_d3dQueue->Signal(m_fence, m_fenceValue); - ResetEvent(frame.fenceEvent); ResetEvent(globalWaitHandle); - m_fence->SetEventOnCompletion(m_fenceValue, frame.fenceEvent); - swapExecutionFrame(); - } - - void swapExecutionFrame() - { - m_frameIndex = (m_frameIndex + 1) % m_frames.getCount(); - auto& frame = m_frames[m_frameIndex]; - frame.reset(); - m_commandBufferPools[m_frameIndex].reset(); } virtual SLANG_NO_THROW void SLANG_MCALL wait() override @@ -3083,11 +3091,13 @@ public: static PROC loadProc(HMODULE module, char const* name); - Result createCommandQueueImpl( - uint32_t frameCount, - uint32_t viewHeapSize, - uint32_t samplerHeapSize, - CommandQueueImpl** outQueue); + Result createCommandQueueImpl(CommandQueueImpl** outQueue); + + Result createTransientResourceHeapImpl( + size_t constantBufferSize, + uint32_t viewDescriptors, + uint32_t samplerDescriptors, + TransientResourceHeapImpl** outHeap); Result createBuffer( const D3D12_RESOURCE_DESC& resourceDesc, @@ -3118,7 +3128,7 @@ public: ResourceCommandRecordInfo encodeResourceCommands() { ResourceCommandRecordInfo info; - m_resourceCommandQueue->createCommandBuffer(info.commandBuffer.writeRef()); + m_resourceCommandTransientHeap->createCommandBuffer(info.commandBuffer.writeRef()); info.d3dCommandList = static_cast<CommandBufferImpl*>(info.commandBuffer.get())->m_cmdList; return info; } @@ -3126,7 +3136,7 @@ public: { info.commandBuffer->close(); m_resourceCommandQueue->executeCommandBuffer(info.commandBuffer); - m_resourceCommandQueue->wait(); + m_resourceCommandTransientHeap->synchronizeAndReset(); } // D3D12Device members. @@ -3143,7 +3153,10 @@ public: DeviceInfo m_deviceInfo; ID3D12Device* m_device = nullptr; + VirtualObjectPool m_queueIndexAllocator; + RefPtr<CommandQueueImpl> m_resourceCommandQueue; + RefPtr<TransientResourceHeapImpl> m_resourceCommandTransientHeap; D3D12HostVisibleDescriptorAllocator m_rtvAllocator; D3D12HostVisibleDescriptorAllocator m_dsvAllocator; @@ -3162,6 +3175,53 @@ public: bool m_nvapi = false; }; +SLANG_NO_THROW Result SLANG_MCALL D3D12Device::TransientResourceHeapImpl::synchronizeAndReset() +{ + Array<HANDLE, 16> waitHandles; + for (auto& waitInfo : m_waitInfos) + { + if (waitInfo.waitValue != 0) + waitHandles.add(waitInfo.fenceEvent); + } + WaitForMultipleObjects((DWORD)waitHandles.getCount(), waitHandles.getBuffer(), TRUE, INFINITE); + m_viewHeap.deallocateAll(); + m_samplerHeap.deallocateAll(); + m_commandListAllocId = 0; + SLANG_RETURN_ON_FAIL(m_commandAllocator->Reset()); + return SLANG_OK; +} + +Result D3D12Device::TransientResourceHeapImpl::createCommandBuffer(ICommandBuffer** outCmdBuffer) +{ + if ((Index)m_commandListAllocId < m_commandBufferPool.getCount()) + { + auto result = static_cast<D3D12Device::CommandBufferImpl*>( + m_commandBufferPool[m_commandListAllocId].get()); + m_d3dCommandListPool[m_commandListAllocId]->Reset(m_commandAllocator, nullptr); + result->init(m_device, m_d3dCommandListPool[m_commandListAllocId], this); + ++m_commandListAllocId; + result->addRef(); + *outCmdBuffer = result; + return SLANG_OK; + } + ComPtr<ID3D12GraphicsCommandList> cmdList; + m_device->m_device->CreateCommandList( + 0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + m_commandAllocator, + nullptr, + IID_PPV_ARGS(cmdList.writeRef())); + + m_d3dCommandListPool.add(cmdList); + RefPtr<CommandBufferImpl> cmdBuffer = new CommandBufferImpl(); + cmdBuffer->init(m_device, cmdList, this); + ComPtr<ICommandBuffer> cmdBufferPtr; + *cmdBufferPtr.writeRef() = cmdBuffer.detach(); + m_commandBufferPool.add(cmdBufferPtr); + ++m_commandListAllocId; + *outCmdBuffer = cmdBufferPtr.detach(); + return SLANG_OK; +} Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitter) { @@ -3186,21 +3246,23 @@ Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitte if (descSet.resourceDescriptorCount) { DescriptorTable table; - table.heap = &m_frame->m_viewHeap; - table.table = m_frame->m_viewHeap.allocate((int)descSet.resourceDescriptorCount); + table.heap = &m_transientHeap->m_viewHeap; + table.table = + m_transientHeap->m_viewHeap.allocate((int)descSet.resourceDescriptorCount); descriptorTables.add(table); } if (descSet.samplerDescriptorCount) { DescriptorTable table; - table.heap = &m_frame->m_samplerHeap; - table.table = m_frame->m_samplerHeap.allocate((int)descSet.samplerDescriptorCount); + table.heap = &m_transientHeap->m_samplerHeap; + table.table = + m_transientHeap->m_samplerHeap.allocate((int)descSet.samplerDescriptorCount); descriptorTables.add(table); } } RootBindingState bindState = {}; bindState.device = m_renderer; - bindState.frame = m_frame; + bindState.transientHeap = m_transientHeap; auto descTablesView = descriptorTables.getArrayView(); bindState.descriptorTables = descTablesView.arrayView; SLANG_RETURN_ON_FAIL(rootObjectImpl->bindObject(this, &bindState)); @@ -3213,14 +3275,29 @@ Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitte return SLANG_OK; } -Result D3D12Device::createCommandQueueImpl( - uint32_t frameCount, - uint32_t viewHeapSize, - uint32_t samplerHeapSize, - D3D12Device::CommandQueueImpl** outQueue) +Result D3D12Device::createTransientResourceHeapImpl( + size_t constantBufferSize, + uint32_t viewDescriptors, + uint32_t samplerDescriptors, + TransientResourceHeapImpl** outHeap) +{ + RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl(); + ITransientResourceHeap::Desc desc = {}; + desc.constantBufferSize = constantBufferSize; + SLANG_RETURN_ON_FAIL(result->init(desc, this, viewDescriptors, samplerDescriptors)); + *outHeap = result.detach(); + return SLANG_OK; +} + +Result D3D12Device::createCommandQueueImpl(D3D12Device::CommandQueueImpl** outQueue) { + int queueIndex = m_queueIndexAllocator.alloc(1); + // If we run out of queue index space, then the user is requesting too many queues. + if (queueIndex == -1) + return SLANG_FAIL; + RefPtr<D3D12Device::CommandQueueImpl> queue = new D3D12Device::CommandQueueImpl(); - SLANG_RETURN_ON_FAIL(queue->init(this, frameCount, viewHeapSize, samplerHeapSize)); + SLANG_RETURN_ON_FAIL(queue->init(this, (uint32_t)queueIndex)); *outQueue = queue.detach(); return SLANG_OK; } @@ -3313,23 +3390,6 @@ static void _initSrvDesc(IResource::Type resourceType, const ITextureResource::D } } -static void _initBufferResourceDesc(size_t bufferSize, D3D12_RESOURCE_DESC& out) -{ - out = {}; - - out.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - out.Alignment = 0; - out.Width = bufferSize; - out.Height = 1; - out.DepthOrArraySize = 1; - out.MipLevels = 1; - out.Format = DXGI_FORMAT_UNKNOWN; - out.SampleDesc.Count = 1; - out.SampleDesc.Quality = 0; - out.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - out.Flags = D3D12_RESOURCE_FLAG_NONE; -} - Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12Resource& uploadResource, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut) { const size_t bufferSize = size_t(resourceDesc.Width); @@ -3590,6 +3650,10 @@ Result D3D12Device::initialize(const Desc& desc) SLANG_RETURN_ON_FAIL(RendererBase::initialize(desc)); + // Initialize queue index allocator. + // Support max 32 queues. + m_queueIndexAllocator.initPool(32); + // Initialize DeviceInfo { m_info.deviceType = DeviceType::DirectX12; @@ -3743,7 +3807,8 @@ Result D3D12Device::initialize(const Desc& desc) m_desc = desc; // Create a command queue for internal resource transfer operations. - SLANG_RETURN_ON_FAIL(createCommandQueueImpl(1, 32, 4, m_resourceCommandQueue.writeRef())); + SLANG_RETURN_ON_FAIL(createCommandQueueImpl(m_resourceCommandQueue.writeRef())); + SLANG_RETURN_ON_FAIL(createTransientResourceHeapImpl(0, 8, 4, m_resourceCommandTransientHeap.writeRef())); SLANG_RETURN_ON_FAIL(m_cpuViewHeap.init (m_device, 8192, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); SLANG_RETURN_ON_FAIL(m_cpuSamplerHeap.init(m_device, 1024, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); @@ -3764,10 +3829,21 @@ Result D3D12Device::initialize(const Desc& desc) return SLANG_OK; } +Result D3D12Device::createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, + ITransientResourceHeap** outHeap) +{ + RefPtr<TransientResourceHeapImpl> heap; + SLANG_RETURN_ON_FAIL( + createTransientResourceHeapImpl(desc.constantBufferSize, 8192, 1024, heap.writeRef())); + *outHeap = heap.detach(); + return SLANG_OK; +} + Result D3D12Device::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) { RefPtr<CommandQueueImpl> queue; - SLANG_RETURN_ON_FAIL(createCommandQueueImpl(8, 4096, 1024, queue.writeRef())); + SLANG_RETURN_ON_FAIL(createCommandQueueImpl(queue.writeRef())); *outQueue = queue.detach(); return SLANG_OK; } diff --git a/tools/gfx/d3d12/resource-d3d12.cpp b/tools/gfx/d3d12/resource-d3d12.cpp index 397eee665..3f91a12be 100644 --- a/tools/gfx/d3d12/resource-d3d12.cpp +++ b/tools/gfx/d3d12/resource-d3d12.cpp @@ -72,63 +72,6 @@ void D3D12ResourceBase::transition( } } -/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! D3D12CounterFence !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ - -D3D12CounterFence::~D3D12CounterFence() -{ - if (m_event) - { - CloseHandle(m_event); - } -} - -Result D3D12CounterFence::init(ID3D12Device* device, uint64_t initialValue) -{ - m_currentValue = initialValue; - - SLANG_RETURN_ON_FAIL(device->CreateFence(m_currentValue, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); - // Create an event handle to use for frame synchronization. - m_event = ::CreateEvent(nullptr, FALSE, FALSE, nullptr); - if (m_event == nullptr) - { - Result res = HRESULT_FROM_WIN32(GetLastError()); - return SLANG_FAILED(res) ? res : SLANG_FAIL; - } - return SLANG_OK; -} - -UInt64 D3D12CounterFence::nextSignal(ID3D12CommandQueue* commandQueue) -{ - // Increment the fence value. Save on the frame - we'll know that frame is done when the fence value >= - m_currentValue++; - // Schedule a Signal command in the queue. - Result res = commandQueue->Signal(m_fence, m_currentValue); - if (SLANG_FAILED(res)) - { - assert(!"Signal failed"); - } - return m_currentValue; -} - -void D3D12CounterFence::waitUntilCompleted(uint64_t completedValue) -{ - // You can only wait for a value that is less than or equal to the current value - assert(completedValue <= m_currentValue); - - // Wait until the previous frame is finished. - while (m_fence->GetCompletedValue() < completedValue) - { - // Make it signal with the current value - SLANG_ASSERT_VOID_ON_FAIL(m_fence->SetEventOnCompletion(completedValue, m_event)); - WaitForSingleObject(m_event, INFINITE); - } -} - -void D3D12CounterFence::nextSignalAndWait(ID3D12CommandQueue* commandQueue) -{ - waitUntilCompleted(nextSignal(commandQueue)); -} - /* !!!!!!!!!!!!!!!!!!!!!!!!! D3D12Resource !!!!!!!!!!!!!!!!!!!!!!!! */ /* static */void D3D12Resource::setDebugName(ID3D12Resource* resource, const char* name) diff --git a/tools/gfx/d3d12/resource-d3d12.h b/tools/gfx/d3d12/resource-d3d12.h index 8b6c28114..39b6c13db 100644 --- a/tools/gfx/d3d12/resource-d3d12.h +++ b/tools/gfx/d3d12/resource-d3d12.h @@ -46,53 +46,6 @@ protected: D3D12_RESOURCE_BARRIER m_barriers[MAX_BARRIERS]; }; -/*! \brief A class to simplify using Dx12 fences. - -A fence is a mechanism to track GPU work. This is achieved by having a counter that the CPU holds -called the current value. Calling nextSignal will increase the CPU counter, and add a fence -with that value to the commandQueue. When the GPU has completed all the work before the fence it will -update the completed value. This is typically used when -the CPU needs to know the GPU has finished some piece of work has completed. To do this the CPU -can check the completed value, and when it is greater or equal to the value returned by nextSignal the -CPU will know that all the work prior to when the nextSignal was added to the queue will have completed. - -NOTE! This cannot be used across threads, as for amongst other reasons SetEventOnCompletion -only works with a single value. - -Signal on the CommandQueue updates the fence on the GPU side. Signal on the fence object changes -the value on the CPU side (not used here). - -Useful article describing how Dx12 synchronization works: -https://msdn.microsoft.com/en-us/library/windows/desktop/dn899217%28v=vs.85%29.aspx -*/ -class D3D12CounterFence -{ -public: - /// Must be called before used - SlangResult init(ID3D12Device* device, uint64_t initialValue = 0); - /// Increases the counter, signals the queue and waits for the signal to be hit - void nextSignalAndWait(ID3D12CommandQueue* queue); - /// Signals with next counter value. Returns the value the signal was called on - uint64_t nextSignal(ID3D12CommandQueue* commandQueue); - /// Get the current value - SLANG_FORCE_INLINE uint64_t getCurrentValue() const { return m_currentValue; } - /// Get the completed value - SLANG_FORCE_INLINE uint64_t getCompletedValue() const { return m_fence->GetCompletedValue(); } - - /// Waits for the the specified value - void waitUntilCompleted(uint64_t completedValue); - - /// Ctor - D3D12CounterFence() :m_event(nullptr), m_currentValue(0) {} - /// Dtor - ~D3D12CounterFence(); - -protected: - HANDLE m_event; - Slang::ComPtr<ID3D12Fence> m_fence; - UINT64 m_currentValue; -}; - /** The base class for resource types allows for tracking of state. It does not allow for setting of the resource though, such that an interface can return a D3D12ResourceBase, and a client cant manipulate it's state, but it cannot replace/change the actual resource */ struct D3D12ResourceBase diff --git a/tools/gfx/immediate-renderer-base.cpp b/tools/gfx/immediate-renderer-base.cpp index 9402f7834..97bc63634 100644 --- a/tools/gfx/immediate-renderer-base.cpp +++ b/tools/gfx/immediate-renderer-base.cpp @@ -1,5 +1,6 @@ #include "immediate-renderer-base.h" #include "simple-render-pass-layout.h" +#include "simple-transient-resource-heap.h" #include "command-writer.h" #include "core/slang-basic.h" #include "core/slang-blob.h" @@ -402,15 +403,6 @@ public: virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override { return m_desc; } - virtual SLANG_NO_THROW Result SLANG_MCALL - createCommandBuffer(ICommandBuffer** outCommandBuffer) override - { - RefPtr<CommandBufferImpl> newCmdBuffer = new CommandBufferImpl(); - newCmdBuffer->init(m_renderer); - *outCommandBuffer = newCmdBuffer.detach(); - return SLANG_OK; - } - virtual SLANG_NO_THROW void SLANG_MCALL executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override { @@ -425,12 +417,26 @@ public: m_renderer->waitForGpu(); } }; + +using TransientResourceHeapImpl = + SimpleTransientResourceHeap<ImmediateRendererBase, CommandBufferImpl>; + } ImmediateRendererBase::ImmediateRendererBase() { m_queue = new CommandQueueImpl(this); } +SLANG_NO_THROW Result SLANG_MCALL ImmediateRendererBase::createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, + ITransientResourceHeap** outHeap) +{ + RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl(); + SLANG_RETURN_ON_FAIL(result->init(this, desc)); + *outHeap = result.detach(); + return SLANG_OK; +} + SLANG_NO_THROW Result SLANG_MCALL ImmediateRendererBase::createCommandQueue( const ICommandQueue::Desc& desc, ICommandQueue** outQueue) diff --git a/tools/gfx/immediate-renderer-base.h b/tools/gfx/immediate-renderer-base.h index a78671e49..296cd15cb 100644 --- a/tools/gfx/immediate-renderer-base.h +++ b/tools/gfx/immediate-renderer-base.h @@ -64,6 +64,9 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, + ITransientResourceHeap** outHeap) override; virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( const IRenderPassLayout::Desc& desc, IRenderPassLayout** outRenderPassLayout) override; diff --git a/tools/gfx/renderer-shared.cpp b/tools/gfx/renderer-shared.cpp index 2e6105793..1571e9abf 100644 --- a/tools/gfx/renderer-shared.cpp +++ b/tools/gfx/renderer-shared.cpp @@ -11,6 +11,7 @@ const Slang::Guid GfxGUID::IID_ISlangUnknown = SLANG_UUID_ISlangUnknown; const Slang::Guid GfxGUID::IID_IShaderProgram = SLANG_UUID_IShaderProgram; const Slang::Guid GfxGUID::IID_IInputLayout = SLANG_UUID_IInputLayout; const Slang::Guid GfxGUID::IID_IPipelineState = SLANG_UUID_IPipelineState; +const Slang::Guid GfxGUID::IID_ITransientResourceHeap = SLANG_UUID_ITransientResourceHeap; const Slang::Guid GfxGUID::IID_IResourceView = SLANG_UUID_IResourceView; const Slang::Guid GfxGUID::IID_IFramebuffer = SLANG_UUID_IFrameBuffer; const Slang::Guid GfxGUID::IID_IFramebufferLayout = SLANG_UUID_IFramebufferLayout; diff --git a/tools/gfx/renderer-shared.h b/tools/gfx/renderer-shared.h index 41b9a31c9..d1ecebfce 100644 --- a/tools/gfx/renderer-shared.h +++ b/tools/gfx/renderer-shared.h @@ -10,10 +10,8 @@ namespace gfx struct GfxGUID { static const Slang::Guid IID_ISlangUnknown; - static const Slang::Guid IID_IDescriptorSetLayout; - static const Slang::Guid IID_IDescriptorSet; static const Slang::Guid IID_IShaderProgram; - static const Slang::Guid IID_IPipelineLayout; + static const Slang::Guid IID_ITransientResourceHeap; static const Slang::Guid IID_IPipelineState; static const Slang::Guid IID_IResourceView; static const Slang::Guid IID_IFramebuffer; diff --git a/tools/gfx/simple-transient-resource-heap.h b/tools/gfx/simple-transient-resource-heap.h new file mode 100644 index 000000000..5f6c32451 --- /dev/null +++ b/tools/gfx/simple-transient-resource-heap.h @@ -0,0 +1,52 @@ +// simple-render-pass-layout.h +#pragma once + +// Provide a simple no-op implementation for `ITransientResourceHeap` for targets that +// already support version management. + +#include "slang-gfx.h" + +namespace gfx +{ +template<typename TDevice, typename TCommandBuffer> +class SimpleTransientResourceHeap + : public ITransientResourceHeap + , public Slang::RefObject +{ +public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ITransientResourceHeap* getInterface(const Slang::Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ITransientResourceHeap) + return static_cast<ITransientResourceHeap*>(this); + return nullptr; + } + +public: + TDevice* m_device; + ComPtr<IBufferResource> m_constantBuffer; + +public: + Result init(TDevice* device, const ITransientResourceHeap::Desc& desc) + { + m_device = device; + IBufferResource::Desc bufferDesc = {}; + bufferDesc.setDefaults(IResource::Usage::ConstantBuffer); + bufferDesc.sizeInBytes = desc.constantBufferSize; + bufferDesc.cpuAccessFlags = IResource::AccessFlag::Write; + SLANG_RETURN_ON_FAIL(device->createBufferResource( + IResource::Usage::ConstantBuffer, bufferDesc, nullptr, m_constantBuffer.writeRef())); + return SLANG_OK; + } + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandBuffer(ICommandBuffer** outCommandBuffer) override + { + Slang::RefPtr<TCommandBuffer> newCmdBuffer = new TCommandBuffer(); + newCmdBuffer->init(m_device); + *outCommandBuffer = newCmdBuffer.detach(); + return SLANG_OK; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override { return SLANG_OK; } +}; +} diff --git a/tools/gfx/vulkan/render-vk.cpp b/tools/gfx/vulkan/render-vk.cpp index d4396429b..96a0d1047 100644 --- a/tools/gfx/vulkan/render-vk.cpp +++ b/tools/gfx/vulkan/render-vk.cpp @@ -50,7 +50,10 @@ public: }; // Renderer implementation Result initVulkanInstanceAndDevice(bool useValidationLayer); - virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; + virtual SLANG_NO_THROW Result SLANG_MCALL initialize(const Desc& desc) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, + ITransientResourceHeap** outHeap) override; virtual SLANG_NO_THROW Result SLANG_MCALL createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override; virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( @@ -795,7 +798,6 @@ public: auto descriptorSetIndex = findOrAddDescriptorSet(typeLayout->getDescriptorSetSpaceOffset(s)); auto& descriptorSetInfo = m_descriptorSetBuildInfos[descriptorSetIndex]; - for (SlangInt r = 0; r < descriptorRangeCount; ++r) { auto slangBindingType = @@ -812,7 +814,6 @@ public: } auto vkDescriptorType = _mapDescriptorType(slangBindingType); - VkDescriptorSetLayoutBinding vkBindingRangeDesc = {}; vkBindingRangeDesc.binding = (uint32_t)typeLayout->getDescriptorSetDescriptorRangeIndexOffset(s, r); @@ -829,14 +830,6 @@ public: } descriptorSetInfo.vkBindings.add(vkBindingRangeDesc); } - VkDescriptorSetLayoutCreateInfo createInfo = {}; - createInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - createInfo.pBindings = descriptorSetInfo.vkBindings.getBuffer(); - createInfo.bindingCount = (uint32_t)descriptorSetInfo.vkBindings.getCount(); - VkDescriptorSetLayout vkDescSetLayout; - SLANG_RETURN_ON_FAIL(m_renderer->m_api.vkCreateDescriptorSetLayout( - m_renderer->m_api.m_device, &createInfo, nullptr, &vkDescSetLayout)); - descriptorSetInfo.descriptorSetLayout = vkDescSetLayout; } return SLANG_OK; } @@ -1019,6 +1012,19 @@ public: m_combinedTextureSamplerCount = builder->m_combinedTextureSamplerCount; m_subObjectCount = builder->m_subObjectCount; m_subObjectRanges = builder->m_subObjectRanges; + + // Create VkDescriptorSetLayout for all descriptor sets. + for (auto& descriptorSetInfo : m_descriptorSetInfos) + { + VkDescriptorSetLayoutCreateInfo createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + createInfo.pBindings = descriptorSetInfo.vkBindings.getBuffer(); + createInfo.bindingCount = (uint32_t)descriptorSetInfo.vkBindings.getCount(); + VkDescriptorSetLayout vkDescSetLayout; + SLANG_RETURN_ON_FAIL(renderer->m_api.vkCreateDescriptorSetLayout( + renderer->m_api.m_device, &createInfo, nullptr, &vkDescSetLayout)); + descriptorSetInfo.descriptorSetLayout = vkDescSetLayout; + } return SLANG_OK; } @@ -1145,7 +1151,8 @@ public: auto slangEntryPointLayout = entryPointLayout->getSlangLayout(); _addDescriptorSets( - slangEntryPointLayout->getTypeLayout(), slangEntryPointLayout->getVarLayout()); + _unwrapParameterGroups(slangEntryPointLayout->getTypeLayout()), + slangEntryPointLayout->getVarLayout()); m_entryPoints.add(info); } @@ -1210,7 +1217,7 @@ public: m_program = builder->m_program; m_programLayout = builder->m_programLayout; - m_entryPoints = builder->m_entryPoints; + m_entryPoints = _Move(builder->m_entryPoints); m_renderer = renderer; if (m_program->getSpecializationParamCount() != 0) @@ -2709,8 +2716,10 @@ public: VkCommandBuffer m_commandBuffer; VkCommandBuffer m_preCommandBuffer = VK_NULL_HANDLE; VkCommandPool m_pool; + VkFence m_fence; VKDevice* m_renderer; DescriptorSetAllocator* m_transientDescSetAllocator; + bool m_isPreCommandBufferEmpty = true; // Command buffers are deallocated by its command pool, // so no need to free individually. ~CommandBufferImpl() = default; @@ -2718,11 +2727,13 @@ public: Result init( VKDevice* renderer, VkCommandPool pool, + VkFence fence, DescriptorSetAllocator* transientDescSetAllocator) { m_renderer = renderer; m_transientDescSetAllocator = transientDescSetAllocator; m_pool = pool; + m_fence = fence; auto& api = renderer->m_api; VkCommandBufferAllocateInfo allocInfo = {}; @@ -2733,12 +2744,23 @@ public: SLANG_VK_RETURN_ON_FAIL( api.vkAllocateCommandBuffers(api.m_device, &allocInfo, &m_commandBuffer)); + beginCommandBuffer(); + return SLANG_OK; + } + + void beginCommandBuffer() + { + auto& api = m_renderer->m_api; VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; api.vkBeginCommandBuffer(m_commandBuffer, &beginInfo); - return SLANG_OK; + if (m_preCommandBuffer) + { + api.vkBeginCommandBuffer(m_preCommandBuffer, &beginInfo); + } + m_isPreCommandBufferEmpty = true; } Result createPreCommandBuffer() @@ -2761,6 +2783,7 @@ public: VkCommandBuffer getPreCommandBuffer() { + m_isPreCommandBufferEmpty = false; if (m_preCommandBuffer) return m_preCommandBuffer; createPreCommandBuffer(); @@ -3191,7 +3214,7 @@ public: virtual SLANG_NO_THROW void SLANG_MCALL close() override { auto& vkAPI = m_renderer->m_api; - if (m_preCommandBuffer != VK_NULL_HANDLE) + if (!m_isPreCommandBufferEmpty) { // `preCmdBuffer` contains buffer transfer commands for shader object // uniform buffers, and we need a memory barrier here to ensure the @@ -3231,56 +3254,34 @@ public: public: Desc m_desc; - uint32_t m_poolIndex; RefPtr<VKDevice> m_renderer; VkQueue m_queue; uint32_t m_queueFamilyIndex; VkSemaphore m_pendingWaitSemaphore = VK_NULL_HANDLE; List<VkCommandBuffer> m_submitCommandBuffers; - static const int kCommandPoolCount = 8; - VkCommandPool m_commandPools[kCommandPoolCount]; - DescriptorSetAllocator m_descSetAllocators[kCommandPoolCount]; - VkFence m_fences[kCommandPoolCount]; - VkSemaphore m_semaphores[kCommandPoolCount]; + static const int kSemaphoreCount = 2; + uint32_t m_currentSemaphoreIndex; + VkSemaphore m_semaphores[kSemaphoreCount]; ~CommandQueueImpl() { m_renderer->m_api.vkQueueWaitIdle(m_queue); m_renderer->m_queueAllocCount--; - for (int i = 0; i < kCommandPoolCount; i++) + for (int i = 0; i < kSemaphoreCount; i++) { - m_renderer->m_api.vkDestroyCommandPool( - m_renderer->m_api.m_device, m_commandPools[i], nullptr); - m_renderer->m_api.vkDestroyFence(m_renderer->m_api.m_device, m_fences[i], nullptr); m_renderer->m_api.vkDestroySemaphore( m_renderer->m_api.m_device, m_semaphores[i], nullptr); - m_descSetAllocators[i].close(); } } void init(VKDevice* renderer, VkQueue queue, uint32_t queueFamilyIndex) { m_renderer = renderer; - m_poolIndex = 0; + m_currentSemaphoreIndex = 0; m_queue = queue; m_queueFamilyIndex = queueFamilyIndex; - for (int i = 0; i < kCommandPoolCount; i++) + for (int i = 0; i < kSemaphoreCount; i++) { - m_descSetAllocators[i].m_api = &m_renderer->m_api; - - VkCommandPoolCreateInfo poolCreateInfo = {}; - poolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - poolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - poolCreateInfo.queueFamilyIndex = queueFamilyIndex; - m_renderer->m_api.vkCreateCommandPool( - m_renderer->m_api.m_device, &poolCreateInfo, nullptr, &m_commandPools[i]); - - VkFenceCreateInfo fenceCreateInfo = {}; - fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fenceCreateInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; - m_renderer->m_api.vkCreateFence( - m_renderer->m_api.m_device, &fenceCreateInfo, nullptr, &m_fences[i]); - VkSemaphoreCreateInfo semaphoreCreateInfo = {}; semaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; semaphoreCreateInfo.flags = 0; @@ -3289,24 +3290,6 @@ public: } } - // Swaps to and resets the next command pool. - // Wait if command lists in the next pool are still in flight. - Result swapPools() - { - auto& vkAPI = m_renderer->m_api; - m_poolIndex++; - m_poolIndex = m_poolIndex % kCommandPoolCount; - - if (vkAPI.vkWaitForFences(vkAPI.m_device, 1, &m_fences[m_poolIndex], 1, UINT64_MAX) != - VK_SUCCESS) - { - return SLANG_FAIL; - } - vkAPI.vkResetCommandPool(vkAPI.m_device, m_commandPools[m_poolIndex], 0); - m_descSetAllocators[m_poolIndex].reset(); - return SLANG_OK; - } - virtual SLANG_NO_THROW void SLANG_MCALL wait() override { auto& vkAPI = m_renderer->m_api; @@ -3318,33 +3301,26 @@ public: return m_desc; } - virtual SLANG_NO_THROW Result SLANG_MCALL - createCommandBuffer(ICommandBuffer** result) override - { - RefPtr<CommandBufferImpl> commandBuffer = new CommandBufferImpl(); - SLANG_RETURN_ON_FAIL(commandBuffer->init( - m_renderer, m_commandPools[m_poolIndex], &m_descSetAllocators[m_poolIndex])); - *result = commandBuffer.detach(); - return SLANG_OK; - } - virtual SLANG_NO_THROW void SLANG_MCALL executeCommandBuffers( uint32_t count, ICommandBuffer* const* commandBuffers) override { + if (count == 0) + return; + auto& vkAPI = m_renderer->m_api; m_submitCommandBuffers.clear(); for (uint32_t i = 0; i < count; i++) { auto cmdBufImpl = static_cast<CommandBufferImpl*>(commandBuffers[i]); - if (cmdBufImpl->m_preCommandBuffer != VK_NULL_HANDLE) + if (!cmdBufImpl->m_isPreCommandBufferEmpty) m_submitCommandBuffers.add(cmdBufImpl->m_preCommandBuffer); auto vkCmdBuf = cmdBufImpl->m_commandBuffer; m_submitCommandBuffers.add(vkCmdBuf); } VkSemaphore waitSemaphore = m_pendingWaitSemaphore; - VkSemaphore signalSemaphore = m_semaphores[m_poolIndex]; + VkSemaphore signalSemaphore = m_semaphores[m_currentSemaphoreIndex]; VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; VkPipelineStageFlags stageFlag = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; @@ -3358,11 +3334,50 @@ public: } submitInfo.signalSemaphoreCount = 1; submitInfo.pSignalSemaphores = &signalSemaphore; - vkAPI.vkResetFences(vkAPI.m_device, 1, &m_fences[m_poolIndex]); - vkAPI.vkQueueSubmit(m_queue, 1, &submitInfo, m_fences[m_poolIndex]); + + auto fence = static_cast<CommandBufferImpl*>(commandBuffers[0])->m_fence; + vkAPI.vkResetFences(vkAPI.m_device, 1, &fence); + vkAPI.vkQueueSubmit(m_queue, 1, &submitInfo, fence); m_pendingWaitSemaphore = signalSemaphore; - swapPools(); + + m_currentSemaphoreIndex++; + m_currentSemaphoreIndex = m_currentSemaphoreIndex % kSemaphoreCount; + } + }; + + class TransientResourceHeapImpl + : public ITransientResourceHeap + , public RefObject + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ITransientResourceHeap* getInterface(const Slang::Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ITransientResourceHeap) + return static_cast<ITransientResourceHeap*>(this); + return nullptr; + } + + public: + VkCommandPool m_commandPool; + DescriptorSetAllocator m_descSetAllocator; + VkFence m_fence; + List<RefPtr<CommandBufferImpl>> m_commandBufferPool; + uint32_t m_commandBufferAllocId = 0; + RefPtr<BufferResourceImpl> m_constantBuffer; + RefPtr<VKDevice> m_device; + + Result init(const ITransientResourceHeap::Desc& desc, VKDevice* device); + ~TransientResourceHeapImpl() + { + m_device->m_api.vkDestroyCommandPool(m_device->m_api.m_device, m_commandPool, nullptr); + m_device->m_api.vkDestroyFence(m_device->m_api.m_device, m_fence, nullptr); + m_descSetAllocator.close(); } + public: + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandBuffer(ICommandBuffer** outCommandBuffer) override; + virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override; }; class SwapchainImpl @@ -3742,6 +3757,15 @@ public: void _transitionImageLayout(VkImage image, VkFormat format, const TextureResource::Desc& desc, VkImageLayout oldLayout, VkImageLayout newLayout); + uint32_t getQueueFamilyIndex(ICommandQueue::QueueType queueType) + { + switch (queueType) + { + case ICommandQueue::QueueType::Graphics: + default: + return m_queueFamilyIndex; + } + } public: // VKDevice members. @@ -4282,6 +4306,71 @@ void VKDevice::waitForGpu() m_deviceQueue.flushAndWait(); } +Result VKDevice::TransientResourceHeapImpl::init( + const ITransientResourceHeap::Desc& desc, + VKDevice* device) +{ + m_device = device; + m_descSetAllocator.m_api = &device->m_api; + + VkCommandPoolCreateInfo poolCreateInfo = {}; + poolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + poolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + poolCreateInfo.queueFamilyIndex = + device->getQueueFamilyIndex(ICommandQueue::QueueType::Graphics); + device->m_api.vkCreateCommandPool( + device->m_api.m_device, &poolCreateInfo, nullptr, &m_commandPool); + + VkFenceCreateInfo fenceCreateInfo = {}; + fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceCreateInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; + device->m_api.vkCreateFence(device->m_api.m_device, &fenceCreateInfo, nullptr, &m_fence); + return SLANG_OK; +} + +Result VKDevice::TransientResourceHeapImpl::createCommandBuffer(ICommandBuffer** outCmdBuffer) +{ + if (m_commandBufferAllocId < (uint32_t)m_commandBufferPool.getCount()) + { + auto result = m_commandBufferPool[m_commandBufferAllocId]; + result->beginCommandBuffer(); + m_commandBufferAllocId++; + *outCmdBuffer = result.detach(); + return SLANG_OK; + } + + RefPtr<CommandBufferImpl> commandBuffer = new CommandBufferImpl(); + SLANG_RETURN_ON_FAIL(commandBuffer->init( + m_device, m_commandPool, m_fence, &m_descSetAllocator)); + m_commandBufferPool.add(commandBuffer); + m_commandBufferAllocId++; + *outCmdBuffer = commandBuffer.detach(); + return SLANG_OK; +} + +Result VKDevice::TransientResourceHeapImpl::synchronizeAndReset() +{ + m_commandBufferAllocId = 0; + auto& api = m_device->m_api; + if (api.vkWaitForFences(api.m_device, 1, &m_fence, 1, UINT64_MAX) != VK_SUCCESS) + { + return SLANG_FAIL; + } + api.vkResetCommandPool(api.m_device, m_commandPool, 0); + m_descSetAllocator.reset(); + return SLANG_OK; +} + +Result VKDevice::createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, + ITransientResourceHeap** outHeap) +{ + RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl(); + SLANG_RETURN_ON_FAIL(result->init(desc, this)); + *outHeap = result.detach(); + return SLANG_OK; +} + Result VKDevice::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) { // Only support one queue for now. @@ -4430,23 +4519,6 @@ static VkBufferUsageFlagBits _calcBufferUsageFlags(int bindFlags) return VkBufferUsageFlagBits(dstFlags); } -static VkBufferUsageFlags _calcBufferUsageFlags(int bindFlags, int cpuAccessFlags, const void* initData) -{ - VkBufferUsageFlags usage = _calcBufferUsageFlags(bindFlags); - - if (cpuAccessFlags & IResource::AccessFlag::Read) - { - // If it can be read from, set this - usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - } - if ((cpuAccessFlags & IResource::AccessFlag::Write) || initData) - { - usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; - } - - return usage; -} - static VkImageUsageFlagBits _calcImageUsageFlags(IResource::BindFlag::Enum bind) { typedef IResource::BindFlag BindFlag; @@ -4890,7 +4962,8 @@ Result VKDevice::createBufferResource(IResource::Usage initialUsage, const IBuff VkMemoryPropertyFlags reqMemoryProperties = 0; - VkBufferUsageFlags usage = _calcBufferUsageFlags(desc.bindFlags, desc.cpuAccessFlags, initData); + VkBufferUsageFlags usage = _calcBufferUsageFlags(desc.bindFlags) | + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; switch (initialUsage) { |
