summaryrefslogtreecommitdiffstats
path: root/tools/gfx
diff options
context:
space:
mode:
Diffstat (limited to 'tools/gfx')
-rw-r--r--tools/gfx/cpu/render-cpu.cpp23
-rw-r--r--tools/gfx/cuda/render-cuda.cpp21
-rw-r--r--tools/gfx/d3d12/circular-resource-heap-d3d12.cpp222
-rw-r--r--tools/gfx/d3d12/circular-resource-heap-d3d12.h206
-rw-r--r--tools/gfx/d3d12/render-d3d12.cpp406
-rw-r--r--tools/gfx/d3d12/resource-d3d12.cpp57
-rw-r--r--tools/gfx/d3d12/resource-d3d12.h47
-rw-r--r--tools/gfx/immediate-renderer-base.cpp24
-rw-r--r--tools/gfx/immediate-renderer-base.h3
-rw-r--r--tools/gfx/renderer-shared.cpp1
-rw-r--r--tools/gfx/renderer-shared.h4
-rw-r--r--tools/gfx/simple-transient-resource-heap.h52
-rw-r--r--tools/gfx/vulkan/render-vk.cpp261
13 files changed, 510 insertions, 817 deletions
diff --git a/tools/gfx/cpu/render-cpu.cpp b/tools/gfx/cpu/render-cpu.cpp
index fa31f7ee1..f0b612e2f 100644
--- a/tools/gfx/cpu/render-cpu.cpp
+++ b/tools/gfx/cpu/render-cpu.cpp
@@ -9,6 +9,7 @@
#include "../command-writer.h"
#include "../renderer-shared.h"
+#include "../simple-transient-resource-heap.h"
#include "../slang-context.h"
#define SLANG_PRELUDE_NAMESPACE slang_prelude
@@ -1119,6 +1120,10 @@ private:
return nullptr;
}
public:
+ void init(CPUDevice* device)
+ {
+ SLANG_UNUSED(device);
+ }
virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands(
IRenderPassLayout* renderPass,
IFramebuffer* framebuffer,
@@ -1273,13 +1278,6 @@ private:
{
return m_desc;
}
- virtual SLANG_NO_THROW Result SLANG_MCALL
- createCommandBuffer(ICommandBuffer** outCommandBuffer) override
- {
- RefPtr<CommandBufferImpl> result = new CommandBufferImpl();
- *outCommandBuffer = result.detach();
- return SLANG_OK;
- }
virtual SLANG_NO_THROW void SLANG_MCALL
executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override
@@ -1400,6 +1398,8 @@ private:
}
};
+ using TransientResourceHeapImpl = SimpleTransientResourceHeap<CPUDevice, CommandBufferImpl>;
+
public:
~CPUDevice()
{
@@ -1559,6 +1559,15 @@ public:
*outQueue = queue.detach();
return SLANG_OK;
}
+ virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap(
+ const ITransientResourceHeap::Desc& desc,
+ ITransientResourceHeap** outHeap) override
+ {
+ RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl();
+ SLANG_RETURN_ON_FAIL(result->init(this, desc));
+ *outHeap = result.detach();
+ return SLANG_OK;
+ }
virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain(
const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override
{
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp
index 383ccc924..b29f7f7e4 100644
--- a/tools/gfx/cuda/render-cuda.cpp
+++ b/tools/gfx/cuda/render-cuda.cpp
@@ -12,6 +12,7 @@
#include "slang-com-helper.h"
#include "../command-writer.h"
#include "../renderer-shared.h"
+#include "../simple-transient-resource-heap.h"
#include "../slang-context.h"
# ifdef RENDER_TEST_OPTIX
@@ -989,6 +990,7 @@ public:
return nullptr;
}
public:
+ void init(CUDADevice* device) { SLANG_UNUSED(device); }
virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands(
IRenderPassLayout* renderPass,
IFramebuffer* framebuffer,
@@ -1147,13 +1149,6 @@ public:
{
return m_desc;
}
- virtual SLANG_NO_THROW Result SLANG_MCALL
- createCommandBuffer(ICommandBuffer** outCommandBuffer) override
- {
- RefPtr<CommandBufferImpl> result = new CommandBufferImpl();
- *outCommandBuffer = result.detach();
- return SLANG_OK;
- }
virtual SLANG_NO_THROW void SLANG_MCALL
executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override
@@ -1320,6 +1315,8 @@ public:
}
};
+ using TransientResourceHeapImpl = SimpleTransientResourceHeap<CUDADevice, CommandBufferImpl>;
+
public:
~CUDADevice()
{
@@ -1921,6 +1918,16 @@ public:
}
public:
+ virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap(
+ const ITransientResourceHeap::Desc& desc,
+ ITransientResourceHeap** outHeap) override
+ {
+ RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl();
+ SLANG_RETURN_ON_FAIL(result->init(this, desc));
+ *outHeap = result.detach();
+ return SLANG_OK;
+ }
+
virtual SLANG_NO_THROW Result SLANG_MCALL
createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override
{
diff --git a/tools/gfx/d3d12/circular-resource-heap-d3d12.cpp b/tools/gfx/d3d12/circular-resource-heap-d3d12.cpp
deleted file mode 100644
index 685dd364f..000000000
--- a/tools/gfx/d3d12/circular-resource-heap-d3d12.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-#include "circular-resource-heap-d3d12.h"
-
-namespace gfx {
-using namespace Slang;
-
-D3D12CircularResourceHeap::D3D12CircularResourceHeap():
- m_fence(nullptr),
- m_device(nullptr),
- m_blockFreeList(sizeof(Block), SLANG_ALIGN_OF(Block), 16),
- m_blocks(nullptr)
-{
- m_back.m_block = nullptr;
- m_back.m_position = nullptr;
- m_front.m_block = nullptr;
- m_front.m_position = nullptr;
-}
-
-D3D12CircularResourceHeap::~D3D12CircularResourceHeap()
-{
- _freeBlockListResources(m_blocks);
-}
-
-void D3D12CircularResourceHeap::_freeBlockListResources(const Block* start)
-{
- if (start)
- {
- const Block* block = start;
- do
- {
- ID3D12Resource* resource = block->m_resource;
-
- resource->Unmap(0, nullptr);
- resource->Release();
-
- // Next in list
- block = block->m_next;
-
- } while (block != start);
- }
-}
-
-Result D3D12CircularResourceHeap::init(ID3D12Device* device, const Desc& desc, D3D12CounterFence* fence)
-{
- assert(m_blocks == nullptr);
- assert(desc.m_blockSize > 0);
-
- m_fence = fence;
- m_desc = desc;
- m_device = device;
-
- return SLANG_OK;
-}
-
-void D3D12CircularResourceHeap::addSync(uint64_t signalValue)
-{
- assert(signalValue == m_fence->getCurrentValue());
- PendingEntry entry;
- entry.m_completedValue = signalValue;
- entry.m_cursor = m_front;
- m_pendingQueue.add(entry);
-}
-
-void D3D12CircularResourceHeap::updateCompleted()
-{
- const uint64_t completedValue = m_fence->getCompletedValue();
-
-#if 0
- while (m_pendingQueue.getCount() != 0)
- {
- const PendingEntry& entry = m_pendingQueue[0];
- if (entry.m_completedValue <= completedValue)
- {
- m_back = entry.m_cursor;
- m_pendingQueue.removeAt(0);
- }
- else
- {
- break;
- }
- }
-#else
- // A more efficient implementation is m_pendingQueue is implemented as a vector like type
- const Index size = m_pendingQueue.getCount();
- Index end = 0;
- while (end < size && m_pendingQueue[end].m_completedValue <= completedValue)
- {
- end++;
- }
-
- if (end > 0)
- {
- // Set the back position
- m_back = m_pendingQueue[end - 1].m_cursor;
- if (end == size)
- {
- m_pendingQueue.clear();
- }
- else
- {
- m_pendingQueue.removeRange(0, size);
- }
- }
-#endif
-}
-
-D3D12CircularResourceHeap::Block* D3D12CircularResourceHeap::_newBlock()
-{
- D3D12_RESOURCE_DESC desc;
-
- desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
- desc.Alignment = 0;
- desc.Width = m_desc.m_blockSize;
- desc.Height = 1;
- desc.DepthOrArraySize = 1;
- desc.MipLevels = 1;
- desc.Format = DXGI_FORMAT_UNKNOWN;
- desc.SampleDesc.Count = 1;
- desc.SampleDesc.Quality = 0;
- desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
- desc.Flags = D3D12_RESOURCE_FLAG_NONE;
-
- ComPtr<ID3D12Resource> resource;
- Result res = m_device->CreateCommittedResource(&m_desc.m_heapProperties, m_desc.m_heapFlags, &desc, m_desc.m_initialState, nullptr, IID_PPV_ARGS(resource.writeRef()));
- if (SLANG_FAILED(res))
- {
- assert(!"Resource allocation failed");
- return nullptr;
- }
-
- uint8_t* data = nullptr;
- if (m_desc.m_heapProperties.Type == D3D12_HEAP_TYPE_READBACK)
- {
- }
- else
- {
- // Map it, and keep it mapped
- resource->Map(0, nullptr, (void**)&data);
- }
-
- // We have no blocks -> so lets allocate the first
- Block* block = (Block*)m_blockFreeList.allocate();
- block->m_next = nullptr;
-
- block->m_resource = resource.detach();
- block->m_start = data;
- return block;
-}
-
-D3D12CircularResourceHeap::Cursor D3D12CircularResourceHeap::allocate(size_t size, size_t alignment)
-{
- const size_t blockSize = getBlockSize();
-
- assert(size <= blockSize);
-
- // If nothing is allocated add the first block
- if (m_blocks == nullptr)
- {
- Block* block = _newBlock();
- if (!block)
- {
- Cursor cursor = {};
- return cursor;
- }
- m_blocks = block;
- // Make circular
- block->m_next = block;
-
- // Point front and back to same position, as currently it is all free
- m_back = { block, block->m_start };
- m_front = m_back;
- }
-
- // If front and back are in the same block then front MUST be ahead of back (as that defined as
- // an invariant and is required for block insertion to be possible
- Block* block = m_front.m_block;
-
- // Check the invariant
- assert(block != m_back.m_block || m_front.m_position >= m_back.m_position);
-
- {
- uint8_t* cur = (uint8_t*)((size_t(m_front.m_position) + alignment - 1) & ~(alignment - 1));
- // Does the the allocation fit?
- if (cur + size <= block->m_start + blockSize)
- {
- // It fits
- // Move the front forward
- m_front.m_position = cur + size;
- Cursor cursor = { block, cur };
- return cursor;
- }
- }
-
- // Okay I can't fit into current block...
-
- // If the next block contains front, we need to add a block, else we can use that block
- if (block->m_next == m_back.m_block)
- {
- Block* newBlock = _newBlock();
- // Insert into the list
- newBlock->m_next = block->m_next;
- block->m_next = newBlock;
- }
-
- // Use the block we are going to add to
- block = block->m_next;
- uint8_t* cur = (uint8_t*)((size_t(block->m_start) + alignment - 1) & ~(alignment - 1));
- // Does the the allocation fit?
- if (cur + size > block->m_start + blockSize)
- {
- assert(!"Couldn't fit into a free block(!) Alignment breaks it?");
- Cursor cursor = {};
- return cursor;
- }
- // It fits
- // Move the front forward
- m_front.m_block = block;
- m_front.m_position = cur + size;
- Cursor cursor = { block, cur };
- return cursor;
-}
-
-} // namespace gfx
diff --git a/tools/gfx/d3d12/circular-resource-heap-d3d12.h b/tools/gfx/d3d12/circular-resource-heap-d3d12.h
deleted file mode 100644
index 7eacf9572..000000000
--- a/tools/gfx/d3d12/circular-resource-heap-d3d12.h
+++ /dev/null
@@ -1,206 +0,0 @@
-#pragma once
-
-#include "slang-com-ptr.h"
-#include "core/slang-list.h"
-#include "core/slang-free-list.h"
-
-#include "resource-d3d12.h"
-
-namespace gfx {
-
-/*! \brief The D3D12CircularResourceHeap is a heap that is suited for size constrained real-time resources allocation that
-is transitory in nature. It is designed to allocate resources which are used and discarded, often used where in
-previous versions of DirectX the 'DISCARD' flag was used.
-
-The idea is to have a heap which chunks of resource can be allocated, and used for GPU execution,
-and that the heap is able through the addSync/updateCompleted idiom is able to track when the usage of the resources is
-completed allowing them to be reused. The heap is arranged as circularly, with new allocations made from the front, and the back
-being updated as the GPU updating the back when it is informed anything using prior parts of the heap have completed. In this
-arrangement all the heap between the back and the front can be thought of as in use or potentially in use by the GPU. All the heap
-from the front back around to the back, is free and can be allocated from. It is the responsibility of the user of the Heap to make
-sure the invariant holds, but in most normal usage it does so simply.
-
-Another feature of the heap is that it does not require upfront knowledge of how big a heap is needed. The backing resources will be expanded
-dynamically with requests as needed. The only requirement is that know single request can be larger than m_blockSize specified in the Desc
-used to initialize the heap. This is because all the backing resources are allocated to a single size. This limitation means the D3D12CircularResourceHeap
-may not be the best use for example for uploading a texture - because it's design is really around transitory uploads or write backs, and so more suited
-to constant buffers, vertex buffer, index buffers and the like.
-
-To upload a texture at program startup it is most likely better to use a D3D12ResourceScopeManager.
-
-\code{.cpp}
-
-typedef D3D12CircularResourceHeap Heap;
-
-Heap::Cursor cursor = heap.allocateVertexBuffer(sizeof(Vertex) * numVerts);
-Memory:copy(cursor.m_position, verts, sizeof(Vertex) * numVerts);
-
-// Do a command using the GPU handle
-m_commandList->...
-// Do another command using the GPU handle
-
-m_commandList->...
-
-// Execute the command list on the command queue
-{
- ID3D12CommandList* lists[] = { m_commandList };
- m_commandQueue->ExecuteCommandLists(SLANG_COUNT_OF(lists), lists);
-}
-
-// Add a sync point
-const uint64_t signalValue = m_fence.nextSignal(m_commandQueue);
-heap.addSync(signalValue)
-
-// The cursors cannot be used anymore
-
-// At some later point call updateCompleted. This will see where the GPU is at, and make resources available that the GPU no longer accesses.
-heap.updateCompleted();
-
-\endcode
-
-### Implementation
-
-Front and back can be in the same block, but ONLY if back is behind front, because we have to always be able to insert
-new blocks in front of front. So it must be possible to do an block insertion between the two of them.
-
-|--B---F-----| |----------|
-
-When B and F are on top of one another it means there is nothing in the list. NOTE this also means that a move of front can never place it
-top of the back.
-
-https://msdn.microsoft.com/en-us/library/windows/desktop/dn899125%28v=vs.85%29.aspx
-https://msdn.microsoft.com/en-us/library/windows/desktop/mt426646%28v=vs.85%29.aspx
-*/
-
-class D3D12CircularResourceHeap
-{
- protected:
- struct Block;
- public:
- typedef D3D12CircularResourceHeap ThisType;
-
- /// The alignment used for VERTEX_BUFFER allocations
- /// Strictly speaking it seems the hardware can handle 4 byte alignment, but since often in use
- /// data will be copied from CPU memory to the allocation, using 16 byte alignment is superior as allows
- /// significantly faster memcpy.
- /// The sample that shows sizeof(float) - 4 bytes is appropriate is at the link below.
- /// https://msdn.microsoft.com/en-us/library/windows/desktop/mt426646%28v=vs.85%29.aspx
- enum
- {
- VERTEX_BUFFER_ALIGNMENT = 16,
- };
-
- struct Desc
- {
- void init()
- {
- {
- D3D12_HEAP_PROPERTIES& props = m_heapProperties;
-
- props.Type = D3D12_HEAP_TYPE_UPLOAD;
- props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
- props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
- props.CreationNodeMask = 1;
- props.VisibleNodeMask = 1;
- }
- m_heapFlags = D3D12_HEAP_FLAG_NONE;
- m_initialState = D3D12_RESOURCE_STATE_GENERIC_READ;
- m_blockSize = 0;
- }
-
- D3D12_HEAP_PROPERTIES m_heapProperties;
- D3D12_HEAP_FLAGS m_heapFlags;
- D3D12_RESOURCE_STATES m_initialState;
- size_t m_blockSize;
- };
-
- /// Cursor position
- struct Cursor
- {
- /// Get GpuHandle
- SLANG_FORCE_INLINE D3D12_GPU_VIRTUAL_ADDRESS getGpuHandle() const { return m_block->m_resource->GetGPUVirtualAddress() + size_t(m_position - m_block->m_start); }
- /// Must have a block and position
- SLANG_FORCE_INLINE bool isValid() const { return m_block != nullptr; }
- /// Calculate the offset into the underlying resource
- SLANG_FORCE_INLINE size_t getOffset() const { return size_t(m_position - m_block->m_start); }
- /// Get the underlying resource
- SLANG_FORCE_INLINE ID3D12Resource* getResource() const { return m_block->m_resource; }
-
- Block* m_block; ///< The block index
- uint8_t* m_position; ///< The current position
- };
-
- /// Get the desc used to initialize the heap
- SLANG_FORCE_INLINE const Desc& getDesc() const { return m_desc; }
-
- /// Must be called before used
- /// Block size must be at least as large as the _largest_ thing allocated
- /// Also note depending on alignment of a resource allocation, the block size might also need to take into account the
- /// maximum alignment use. It is a REQUIREMENT that a newly allocated resource block is large enough to hold any
- /// allocation taking into account the alignment used.
- Slang::Result init(ID3D12Device* device, const Desc& desc, D3D12CounterFence* fence);
-
- /// Get the block size
- SLANG_FORCE_INLINE size_t getBlockSize() const { return m_desc.m_blockSize; }
-
- /// Allocate constant buffer of specified size
- Cursor allocate(size_t size, size_t alignment);
-
- /// Allocate a constant buffer
- SLANG_FORCE_INLINE Cursor allocateConstantBuffer(size_t size) { return allocate(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); }
- /// Allocate a vertex buffer
- SLANG_FORCE_INLINE Cursor allocateVertexBuffer(size_t size) { return allocate(size, VERTEX_BUFFER_ALIGNMENT); }
-
- /// Create filled in constant buffer
- SLANG_FORCE_INLINE Cursor newConstantBuffer(const void* data, size_t size) { Cursor cursor = allocateConstantBuffer(size); ::memcpy(cursor.m_position, data, size); return cursor; }
- /// Create in filled in constant buffer
- template <typename T>
- SLANG_FORCE_INLINE Cursor newConstantBuffer(const T& in) { return newConstantBuffer(&in, sizeof(T)); }
-
- /// Look where the GPU has got to and release anything not currently used
- void updateCompleted();
- /// Add a sync point - meaning that when this point is hit in the queue
- /// all of the resources up to this point will no longer be used.
- void addSync(uint64_t signalValue);
-
- /// Get the gpu address of this cursor
- D3D12_GPU_VIRTUAL_ADDRESS getGpuHandle(const Cursor& cursor) const { return cursor.m_block->m_resource->GetGPUVirtualAddress() + size_t(cursor.m_position - cursor.m_block->m_start); }
-
- /// Ctor
- D3D12CircularResourceHeap();
- /// Dtor
- ~D3D12CircularResourceHeap();
-
- protected:
-
- struct Block
- {
- ID3D12Resource* m_resource; ///< The mapped resource
- uint8_t* m_start; ///< Once created the resource is mapped to here
- Block* m_next; ///< Points to next block in the list
- };
- struct PendingEntry
- {
- uint64_t m_completedValue; ///< The value when this is completed
- Cursor m_cursor; ///< the cursor at that point
- };
- void _freeBlockListResources(const Block* block);
- /// Create a new block (with associated resource), do not add the block list
- Block* _newBlock();
-
- Block* m_blocks; ///< Circular singly linked list of block. nullptr initially
- Slang::FreeList m_blockFreeList; ///< Free list of actual allocations of blocks
- Slang::List<PendingEntry> m_pendingQueue; ///< Holds the list of pending positions. When the fence value is greater than the value on the queue entry, the entry is done.
-
- // Allocation is made from the front, and freed from the back.
- Cursor m_back; ///< Current back position.
- Cursor m_front; ///< Current front position.
-
- Desc m_desc; ///< Describes the heap
-
- D3D12CounterFence* m_fence; ///< The fence to use
- ID3D12Device* m_device; ///< The device that resources will be constructed on
-};
-
-} // namespace gfx
-
diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp
index e2629dc53..6b818f100 100644
--- a/tools/gfx/d3d12/render-d3d12.cpp
+++ b/tools/gfx/d3d12/render-d3d12.cpp
@@ -41,7 +41,6 @@ struct ID3D12GraphicsCommandList1 {};
#include "resource-d3d12.h"
#include "descriptor-heap-d3d12.h"
-#include "circular-resource-heap-d3d12.h"
#include "../d3d/d3d-util.h"
@@ -72,6 +71,9 @@ public:
virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override;
virtual SLANG_NO_THROW Result SLANG_MCALL
createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override;
+ virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap(
+ const ITransientResourceHeap::Desc& desc,
+ ITransientResourceHeap** outHeap) override;
virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain(
const ISwapchain::Desc& desc,
WindowHandle window,
@@ -211,12 +213,6 @@ public:
public:
typedef BufferResource Parent;
- void bindConstantBufferView(D3D12CircularResourceHeap& circularHeap, int index, Submitter* submitter) const
- {
- // Set the constant buffer
- submitter->setRootConstantBufferView(index, m_resource.getResource()->GetGPUVirtualAddress());
- }
-
BufferResourceImpl(IResource::Usage initialUsage, const Desc& desc):
Parent(desc), m_initialUsage(initialUsage)
, m_defaultState(_calcResourceState(initialUsage))
@@ -444,6 +440,23 @@ public:
ID3D12GraphicsCommandList* m_commandList;
};
+ static void _initBufferResourceDesc(size_t bufferSize, D3D12_RESOURCE_DESC& out)
+ {
+ out = {};
+
+ out.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+ out.Alignment = 0;
+ out.Width = bufferSize;
+ out.Height = 1;
+ out.DepthOrArraySize = 1;
+ out.MipLevels = 1;
+ out.Format = DXGI_FORMAT_UNKNOWN;
+ out.SampleDesc.Count = 1;
+ out.SampleDesc.Quality = 0;
+ out.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+ out.Flags = D3D12_RESOURCE_FLAG_NONE;
+ }
+
static Result _uploadBufferData(
ID3D12GraphicsCommandList* cmdList,
BufferResourceImpl* buffer,
@@ -479,78 +492,111 @@ public:
return SLANG_OK;
}
- // Use a circular buffer of execution frames to manage in-flight GPU command buffers.
- // Each call to `executeCommandLists` advances the frame by 1.
- // If we run out of avaialble frames, wait for the earliest submitted frame to finish.
- struct ExecutionFrameResources
+ class TransientResourceHeapImpl
+ : public ITransientResourceHeap
+ , public RefObject
{
+ public:
+ SLANG_REF_OBJECT_IUNKNOWN_ALL
+ ITransientResourceHeap* getInterface(const Guid& guid)
+ {
+ if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ITransientResourceHeap)
+ return static_cast<ITransientResourceHeap*>(this);
+ return nullptr;
+ }
+
+ public:
+ D3D12Resource m_constantBuffer;
+ D3D12Resource m_constantUploadBuffer;
+
+ D3D12Device* m_device;
ComPtr<ID3D12CommandAllocator> m_commandAllocator;
- List<ComPtr<ID3D12GraphicsCommandList>> m_commandListPool;
+ List<ComPtr<ID3D12GraphicsCommandList>> m_d3dCommandListPool;
+ List<ComPtr<ICommandBuffer>> m_commandBufferPool;
uint32_t m_commandListAllocId = 0;
- HANDLE fenceEvent;
+ // Wait values for each command queue.
+ struct QueueWaitInfo
+ {
+ uint64_t waitValue;
+ HANDLE fenceEvent;
+ };
+ ShortList<QueueWaitInfo, 4> m_waitInfos;
+ QueueWaitInfo& getQueueWaitInfo(uint32_t queueIndex)
+ {
+ if (queueIndex < (uint32_t)m_waitInfos.getCount())
+ {
+ return m_waitInfos[queueIndex];
+ }
+ auto oldCount = m_waitInfos.getCount();
+ m_waitInfos.setCount(queueIndex + 1);
+ for (auto i = oldCount; i < m_waitInfos.getCount(); i++)
+ {
+ m_waitInfos[i].waitValue = 0;
+ m_waitInfos[i].fenceEvent = CreateEventEx(
+ nullptr,
+ false,
+ CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET,
+ EVENT_ALL_ACCESS);
+ }
+ return m_waitInfos[queueIndex];
+ }
// During command submission, we need all the descriptor tables that get
// used to come from a single heap (for each descriptor heap type).
//
// We will thus keep a single heap of each type that we hope will hold
// all the descriptors that actually get needed in a frame.
- //
- // TODO: we need an allocation policy to reallocate and resize these
- // if/when we run out of space during a frame.
D3D12DescriptorHeap m_viewHeap; // Cbv, Srv, Uav
D3D12DescriptorHeap m_samplerHeap; // Heap for samplers
- ~ExecutionFrameResources() { CloseHandle(fenceEvent); }
- Result init(ID3D12Device* device, uint32_t viewHeapSize, uint32_t samplerHeapSize)
+ ~TransientResourceHeapImpl()
{
- SLANG_RETURN_ON_FAIL(device->CreateCommandAllocator(
+ synchronizeAndReset();
+ for (auto& waitInfo : m_waitInfos)
+ CloseHandle(waitInfo.fenceEvent);
+ }
+
+ Result init(
+ const ITransientResourceHeap::Desc& desc,
+ D3D12Device* device,
+ uint32_t viewHeapSize,
+ uint32_t samplerHeapSize)
+ {
+ m_device = device;
+ auto d3dDevice = device->m_device;
+ SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator(
D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef())));
- fenceEvent = CreateEventEx(
- nullptr,
- false,
- CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET,
- EVENT_ALL_ACCESS);
+
SLANG_RETURN_ON_FAIL(m_viewHeap.init(
- device,
+ d3dDevice,
viewHeapSize,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE));
SLANG_RETURN_ON_FAIL(m_samplerHeap.init(
- device,
+ d3dDevice,
samplerHeapSize,
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE));
- return SLANG_OK;
- }
- void reset()
- {
- WaitForSingleObject(fenceEvent, INFINITE);
- m_viewHeap.deallocateAll();
- m_samplerHeap.deallocateAll();
- m_commandListAllocId = 0;
- m_commandAllocator->Reset();
- for (auto cmdBuffer : m_commandListPool)
- cmdBuffer->Reset(m_commandAllocator, nullptr);
- }
- ComPtr<ID3D12GraphicsCommandList> createCommandList(ID3D12Device* device)
- {
- if (m_commandListAllocId == m_commandListPool.getCount())
+
+ if (desc.constantBufferSize != 0)
{
- ComPtr<ID3D12GraphicsCommandList> cmdList;
- device->CreateCommandList(
- 0,
- D3D12_COMMAND_LIST_TYPE_DIRECT,
- m_commandAllocator,
+ D3D12_RESOURCE_DESC resourceDesc;
+ _initBufferResourceDesc(desc.constantBufferSize, resourceDesc);
+ device->createBuffer(
+ resourceDesc,
nullptr,
- IID_PPV_ARGS(cmdList.writeRef()));
-
- m_commandListPool.add(cmdList);
+ 0,
+ m_constantUploadBuffer,
+ D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER,
+ m_constantBuffer);
}
- assert((Index)m_commandListAllocId < m_commandListPool.getCount());
- auto& result = m_commandListPool[m_commandListAllocId];
- ++m_commandListAllocId;
- return result;
+ return SLANG_OK;
}
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createCommandBuffer(ICommandBuffer** outCommandBuffer) override;
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override;
};
class CommandBufferImpl;
@@ -561,7 +607,7 @@ public:
bool m_isOpen = false;
bool m_bindingDirty = true;
CommandBufferImpl* m_commandBuffer;
- ExecutionFrameResources* m_frame;
+ TransientResourceHeapImpl* m_transientHeap;
D3D12Device* m_renderer;
ID3D12Device* m_device;
ID3D12GraphicsCommandList* m_d3dCmdList;
@@ -591,7 +637,7 @@ public:
m_commandBuffer = commandBuffer;
m_d3dCmdList = m_commandBuffer->m_cmdList;
m_renderer = commandBuffer->m_renderer;
- m_frame = commandBuffer->m_frame;
+ m_transientHeap = commandBuffer->m_transientHeap;
}
void endEncodingImpl() { m_isOpen = false; }
@@ -659,7 +705,7 @@ public:
struct RootBindingState
{
- ExecutionFrameResources* frame;
+ TransientResourceHeapImpl* transientHeap;
D3D12Device* device;
ArrayView<DescriptorTable> descriptorTables;
BindingOffset offset;
@@ -2339,17 +2385,20 @@ public:
}
public:
ComPtr<ID3D12GraphicsCommandList> m_cmdList;
- ExecutionFrameResources* m_frame;
+ TransientResourceHeapImpl* m_transientHeap;
D3D12Device* m_renderer;
- void init(D3D12Device* renderer, ExecutionFrameResources* frame)
+ void init(
+ D3D12Device* renderer,
+ ID3D12GraphicsCommandList* d3dCommandList,
+ TransientResourceHeapImpl* transientHeap)
{
- m_frame = frame;
+ m_transientHeap = transientHeap;
m_renderer = renderer;
- m_cmdList = m_frame->createCommandList(renderer->m_device);
+ m_cmdList = d3dCommandList;
ID3D12DescriptorHeap* heaps[] = {
- m_frame->m_viewHeap.getHeap(),
- m_frame->m_samplerHeap.getHeap(),
+ m_transientHeap->m_viewHeap.getHeap(),
+ m_transientHeap->m_samplerHeap.getHeap(),
};
m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps);
}
@@ -2392,7 +2441,7 @@ public:
void init(
D3D12Device* renderer,
- ExecutionFrameResources* frame,
+ TransientResourceHeapImpl* transientHeap,
CommandBufferImpl* cmdBuffer,
RenderPassLayoutImpl* renderPass,
FramebufferImpl* framebuffer)
@@ -2402,7 +2451,7 @@ public:
m_device = renderer->m_device;
m_renderPass = renderPass;
m_framebuffer = framebuffer;
- m_frame = frame;
+ m_transientHeap = transientHeap;
m_boundVertexBuffers.clear();
m_boundIndexBuffer = nullptr;
m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
@@ -2735,7 +2784,7 @@ public:
{
m_renderCommandEncoder.init(
m_renderer,
- m_frame,
+ m_transientHeap,
this,
static_cast<RenderPassLayoutImpl*>(renderPass),
static_cast<FramebufferImpl*>(framebuffer));
@@ -2769,13 +2818,13 @@ public:
}
void init(
D3D12Device* renderer,
- ExecutionFrameResources* frame,
+ TransientResourceHeapImpl* transientHeap,
CommandBufferImpl* cmdBuffer)
{
PipelineCommandEncoder::init(cmdBuffer);
m_preCmdList = nullptr;
m_device = renderer->m_device;
- m_frame = frame;
+ m_transientHeap = transientHeap;
m_currentPipeline = nullptr;
}
@@ -2805,7 +2854,7 @@ public:
virtual SLANG_NO_THROW void SLANG_MCALL
encodeComputeCommands(IComputeCommandEncoder** outEncoder) override
{
- m_computeCommandEncoder.init(m_renderer, m_frame, this);
+ m_computeCommandEncoder.init(m_renderer, m_transientHeap, this);
*outEncoder = &m_computeCommandEncoder;
}
@@ -2892,32 +2941,6 @@ public:
}
public:
- struct CommandBufferPool
- {
- List<RefPtr<CommandBufferImpl>> pool;
- uint32_t allocIndex = 0;
- RefPtr<CommandBufferImpl> allocCommandBuffer(D3D12Device* renderer, ExecutionFrameResources* frame)
- {
- if ((Index)allocIndex < pool.getCount())
- {
- RefPtr<CommandBufferImpl> result = pool[allocIndex];
- result->init(renderer, frame);
- allocIndex++;
- return result;
- }
- RefPtr<CommandBufferImpl> cmdBuffer = new CommandBufferImpl();
- cmdBuffer->init(renderer, frame);
- pool.add(cmdBuffer);
- return cmdBuffer;
- }
- void reset()
- {
- allocIndex = 0;
- }
- };
- List<CommandBufferPool> m_commandBufferPools;
- List<ExecutionFrameResources> m_frames;
- uint32_t m_frameIndex = 0;
D3D12Device* m_renderer;
ComPtr<ID3D12Device> m_device;
ComPtr<ID3D12CommandQueue> m_d3dQueue;
@@ -2925,20 +2948,13 @@ public:
uint64_t m_fenceValue = 0;
HANDLE globalWaitHandle;
Desc m_desc;
- Result init(
- D3D12Device* renderer,
- uint32_t frameCount,
- uint32_t viewHeapSize,
- uint32_t samplerHeapSize)
+ uint32_t m_queueIndex = 0;
+
+ Result init(D3D12Device* device, uint32_t queueIndex)
{
- m_renderer = renderer;
- m_device = renderer->m_device;
- m_frames.setCount(frameCount);
- m_commandBufferPools.setCount(frameCount);
- for (uint32_t i = 0; i < frameCount; i++)
- {
- SLANG_RETURN_ON_FAIL(m_frames[i].init(m_device, viewHeapSize, samplerHeapSize));
- }
+ m_queueIndex = queueIndex;
+ m_renderer = device;
+ m_device = device->m_device;
D3D12_COMMAND_QUEUE_DESC queueDesc = {};
queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
SLANG_RETURN_ON_FAIL(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(m_d3dQueue.writeRef())));
@@ -2955,20 +2971,12 @@ public:
{
wait();
CloseHandle(globalWaitHandle);
+ m_renderer->m_queueIndexAllocator.free((int)m_queueIndex, 1);
}
virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override
{
return m_desc;
}
- virtual SLANG_NO_THROW Result SLANG_MCALL
- createCommandBuffer(ICommandBuffer** outCommandBuffer) override
- {
- RefPtr<CommandBufferImpl> result =
- m_commandBufferPools[m_frameIndex].allocCommandBuffer(
- m_renderer, &m_frames[m_frameIndex]);
- *outCommandBuffer = result.detach();
- return SLANG_OK;
- }
virtual SLANG_NO_THROW void SLANG_MCALL
executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override
@@ -2981,21 +2989,21 @@ public:
}
m_d3dQueue->ExecuteCommandLists((UINT)count, commandLists.getArrayView().getBuffer());
- auto& frame = m_frames[m_frameIndex];
m_fenceValue++;
+
+ for (uint32_t i = 0; i < count; i++)
+ {
+ if (i > 0 && commandBuffers[i] == commandBuffers[i - 1])
+ continue;
+ auto cmdImpl = static_cast<CommandBufferImpl*>(commandBuffers[i]);
+ auto transientHeap = cmdImpl->m_transientHeap;
+ auto& waitInfo = transientHeap->getQueueWaitInfo(m_queueIndex);
+ waitInfo.waitValue = m_fenceValue;
+ ResetEvent(waitInfo.fenceEvent);
+ m_fence->SetEventOnCompletion(m_fenceValue, waitInfo.fenceEvent);
+ }
m_d3dQueue->Signal(m_fence, m_fenceValue);
- ResetEvent(frame.fenceEvent);
ResetEvent(globalWaitHandle);
- m_fence->SetEventOnCompletion(m_fenceValue, frame.fenceEvent);
- swapExecutionFrame();
- }
-
- void swapExecutionFrame()
- {
- m_frameIndex = (m_frameIndex + 1) % m_frames.getCount();
- auto& frame = m_frames[m_frameIndex];
- frame.reset();
- m_commandBufferPools[m_frameIndex].reset();
}
virtual SLANG_NO_THROW void SLANG_MCALL wait() override
@@ -3083,11 +3091,13 @@ public:
static PROC loadProc(HMODULE module, char const* name);
- Result createCommandQueueImpl(
- uint32_t frameCount,
- uint32_t viewHeapSize,
- uint32_t samplerHeapSize,
- CommandQueueImpl** outQueue);
+ Result createCommandQueueImpl(CommandQueueImpl** outQueue);
+
+ Result createTransientResourceHeapImpl(
+ size_t constantBufferSize,
+ uint32_t viewDescriptors,
+ uint32_t samplerDescriptors,
+ TransientResourceHeapImpl** outHeap);
Result createBuffer(
const D3D12_RESOURCE_DESC& resourceDesc,
@@ -3118,7 +3128,7 @@ public:
ResourceCommandRecordInfo encodeResourceCommands()
{
ResourceCommandRecordInfo info;
- m_resourceCommandQueue->createCommandBuffer(info.commandBuffer.writeRef());
+ m_resourceCommandTransientHeap->createCommandBuffer(info.commandBuffer.writeRef());
info.d3dCommandList = static_cast<CommandBufferImpl*>(info.commandBuffer.get())->m_cmdList;
return info;
}
@@ -3126,7 +3136,7 @@ public:
{
info.commandBuffer->close();
m_resourceCommandQueue->executeCommandBuffer(info.commandBuffer);
- m_resourceCommandQueue->wait();
+ m_resourceCommandTransientHeap->synchronizeAndReset();
}
// D3D12Device members.
@@ -3143,7 +3153,10 @@ public:
DeviceInfo m_deviceInfo;
ID3D12Device* m_device = nullptr;
+ VirtualObjectPool m_queueIndexAllocator;
+
RefPtr<CommandQueueImpl> m_resourceCommandQueue;
+ RefPtr<TransientResourceHeapImpl> m_resourceCommandTransientHeap;
D3D12HostVisibleDescriptorAllocator m_rtvAllocator;
D3D12HostVisibleDescriptorAllocator m_dsvAllocator;
@@ -3162,6 +3175,53 @@ public:
bool m_nvapi = false;
};
+SLANG_NO_THROW Result SLANG_MCALL D3D12Device::TransientResourceHeapImpl::synchronizeAndReset()
+{
+ Array<HANDLE, 16> waitHandles;
+ for (auto& waitInfo : m_waitInfos)
+ {
+ if (waitInfo.waitValue != 0)
+ waitHandles.add(waitInfo.fenceEvent);
+ }
+ WaitForMultipleObjects((DWORD)waitHandles.getCount(), waitHandles.getBuffer(), TRUE, INFINITE);
+ m_viewHeap.deallocateAll();
+ m_samplerHeap.deallocateAll();
+ m_commandListAllocId = 0;
+ SLANG_RETURN_ON_FAIL(m_commandAllocator->Reset());
+ return SLANG_OK;
+}
+
+Result D3D12Device::TransientResourceHeapImpl::createCommandBuffer(ICommandBuffer** outCmdBuffer)
+{
+ if ((Index)m_commandListAllocId < m_commandBufferPool.getCount())
+ {
+ auto result = static_cast<D3D12Device::CommandBufferImpl*>(
+ m_commandBufferPool[m_commandListAllocId].get());
+ m_d3dCommandListPool[m_commandListAllocId]->Reset(m_commandAllocator, nullptr);
+ result->init(m_device, m_d3dCommandListPool[m_commandListAllocId], this);
+ ++m_commandListAllocId;
+ result->addRef();
+ *outCmdBuffer = result;
+ return SLANG_OK;
+ }
+ ComPtr<ID3D12GraphicsCommandList> cmdList;
+ m_device->m_device->CreateCommandList(
+ 0,
+ D3D12_COMMAND_LIST_TYPE_DIRECT,
+ m_commandAllocator,
+ nullptr,
+ IID_PPV_ARGS(cmdList.writeRef()));
+
+ m_d3dCommandListPool.add(cmdList);
+ RefPtr<CommandBufferImpl> cmdBuffer = new CommandBufferImpl();
+ cmdBuffer->init(m_device, cmdList, this);
+ ComPtr<ICommandBuffer> cmdBufferPtr;
+ *cmdBufferPtr.writeRef() = cmdBuffer.detach();
+ m_commandBufferPool.add(cmdBufferPtr);
+ ++m_commandListAllocId;
+ *outCmdBuffer = cmdBufferPtr.detach();
+ return SLANG_OK;
+}
Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitter)
{
@@ -3186,21 +3246,23 @@ Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitte
if (descSet.resourceDescriptorCount)
{
DescriptorTable table;
- table.heap = &m_frame->m_viewHeap;
- table.table = m_frame->m_viewHeap.allocate((int)descSet.resourceDescriptorCount);
+ table.heap = &m_transientHeap->m_viewHeap;
+ table.table =
+ m_transientHeap->m_viewHeap.allocate((int)descSet.resourceDescriptorCount);
descriptorTables.add(table);
}
if (descSet.samplerDescriptorCount)
{
DescriptorTable table;
- table.heap = &m_frame->m_samplerHeap;
- table.table = m_frame->m_samplerHeap.allocate((int)descSet.samplerDescriptorCount);
+ table.heap = &m_transientHeap->m_samplerHeap;
+ table.table =
+ m_transientHeap->m_samplerHeap.allocate((int)descSet.samplerDescriptorCount);
descriptorTables.add(table);
}
}
RootBindingState bindState = {};
bindState.device = m_renderer;
- bindState.frame = m_frame;
+ bindState.transientHeap = m_transientHeap;
auto descTablesView = descriptorTables.getArrayView();
bindState.descriptorTables = descTablesView.arrayView;
SLANG_RETURN_ON_FAIL(rootObjectImpl->bindObject(this, &bindState));
@@ -3213,14 +3275,29 @@ Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitte
return SLANG_OK;
}
-Result D3D12Device::createCommandQueueImpl(
- uint32_t frameCount,
- uint32_t viewHeapSize,
- uint32_t samplerHeapSize,
- D3D12Device::CommandQueueImpl** outQueue)
+Result D3D12Device::createTransientResourceHeapImpl(
+ size_t constantBufferSize,
+ uint32_t viewDescriptors,
+ uint32_t samplerDescriptors,
+ TransientResourceHeapImpl** outHeap)
+{
+ RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl();
+ ITransientResourceHeap::Desc desc = {};
+ desc.constantBufferSize = constantBufferSize;
+ SLANG_RETURN_ON_FAIL(result->init(desc, this, viewDescriptors, samplerDescriptors));
+ *outHeap = result.detach();
+ return SLANG_OK;
+}
+
+Result D3D12Device::createCommandQueueImpl(D3D12Device::CommandQueueImpl** outQueue)
{
+ int queueIndex = m_queueIndexAllocator.alloc(1);
+ // If we run out of queue index space, then the user is requesting too many queues.
+ if (queueIndex == -1)
+ return SLANG_FAIL;
+
RefPtr<D3D12Device::CommandQueueImpl> queue = new D3D12Device::CommandQueueImpl();
- SLANG_RETURN_ON_FAIL(queue->init(this, frameCount, viewHeapSize, samplerHeapSize));
+ SLANG_RETURN_ON_FAIL(queue->init(this, (uint32_t)queueIndex));
*outQueue = queue.detach();
return SLANG_OK;
}
@@ -3313,23 +3390,6 @@ static void _initSrvDesc(IResource::Type resourceType, const ITextureResource::D
}
}
-static void _initBufferResourceDesc(size_t bufferSize, D3D12_RESOURCE_DESC& out)
-{
- out = {};
-
- out.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
- out.Alignment = 0;
- out.Width = bufferSize;
- out.Height = 1;
- out.DepthOrArraySize = 1;
- out.MipLevels = 1;
- out.Format = DXGI_FORMAT_UNKNOWN;
- out.SampleDesc.Count = 1;
- out.SampleDesc.Quality = 0;
- out.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
- out.Flags = D3D12_RESOURCE_FLAG_NONE;
-}
-
Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12Resource& uploadResource, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut)
{
const size_t bufferSize = size_t(resourceDesc.Width);
@@ -3590,6 +3650,10 @@ Result D3D12Device::initialize(const Desc& desc)
SLANG_RETURN_ON_FAIL(RendererBase::initialize(desc));
+ // Initialize queue index allocator.
+ // Support max 32 queues.
+ m_queueIndexAllocator.initPool(32);
+
// Initialize DeviceInfo
{
m_info.deviceType = DeviceType::DirectX12;
@@ -3743,7 +3807,8 @@ Result D3D12Device::initialize(const Desc& desc)
m_desc = desc;
// Create a command queue for internal resource transfer operations.
- SLANG_RETURN_ON_FAIL(createCommandQueueImpl(1, 32, 4, m_resourceCommandQueue.writeRef()));
+ SLANG_RETURN_ON_FAIL(createCommandQueueImpl(m_resourceCommandQueue.writeRef()));
+ SLANG_RETURN_ON_FAIL(createTransientResourceHeapImpl(0, 8, 4, m_resourceCommandTransientHeap.writeRef()));
SLANG_RETURN_ON_FAIL(m_cpuViewHeap.init (m_device, 8192, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV));
SLANG_RETURN_ON_FAIL(m_cpuSamplerHeap.init(m_device, 1024, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER));
@@ -3764,10 +3829,21 @@ Result D3D12Device::initialize(const Desc& desc)
return SLANG_OK;
}
+Result D3D12Device::createTransientResourceHeap(
+ const ITransientResourceHeap::Desc& desc,
+ ITransientResourceHeap** outHeap)
+{
+ RefPtr<TransientResourceHeapImpl> heap;
+ SLANG_RETURN_ON_FAIL(
+ createTransientResourceHeapImpl(desc.constantBufferSize, 8192, 1024, heap.writeRef()));
+ *outHeap = heap.detach();
+ return SLANG_OK;
+}
+
Result D3D12Device::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue)
{
RefPtr<CommandQueueImpl> queue;
- SLANG_RETURN_ON_FAIL(createCommandQueueImpl(8, 4096, 1024, queue.writeRef()));
+ SLANG_RETURN_ON_FAIL(createCommandQueueImpl(queue.writeRef()));
*outQueue = queue.detach();
return SLANG_OK;
}
diff --git a/tools/gfx/d3d12/resource-d3d12.cpp b/tools/gfx/d3d12/resource-d3d12.cpp
index 397eee665..3f91a12be 100644
--- a/tools/gfx/d3d12/resource-d3d12.cpp
+++ b/tools/gfx/d3d12/resource-d3d12.cpp
@@ -72,63 +72,6 @@ void D3D12ResourceBase::transition(
}
}
-/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! D3D12CounterFence !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
-
-D3D12CounterFence::~D3D12CounterFence()
-{
- if (m_event)
- {
- CloseHandle(m_event);
- }
-}
-
-Result D3D12CounterFence::init(ID3D12Device* device, uint64_t initialValue)
-{
- m_currentValue = initialValue;
-
- SLANG_RETURN_ON_FAIL(device->CreateFence(m_currentValue, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef())));
- // Create an event handle to use for frame synchronization.
- m_event = ::CreateEvent(nullptr, FALSE, FALSE, nullptr);
- if (m_event == nullptr)
- {
- Result res = HRESULT_FROM_WIN32(GetLastError());
- return SLANG_FAILED(res) ? res : SLANG_FAIL;
- }
- return SLANG_OK;
-}
-
-UInt64 D3D12CounterFence::nextSignal(ID3D12CommandQueue* commandQueue)
-{
- // Increment the fence value. Save on the frame - we'll know that frame is done when the fence value >=
- m_currentValue++;
- // Schedule a Signal command in the queue.
- Result res = commandQueue->Signal(m_fence, m_currentValue);
- if (SLANG_FAILED(res))
- {
- assert(!"Signal failed");
- }
- return m_currentValue;
-}
-
-void D3D12CounterFence::waitUntilCompleted(uint64_t completedValue)
-{
- // You can only wait for a value that is less than or equal to the current value
- assert(completedValue <= m_currentValue);
-
- // Wait until the previous frame is finished.
- while (m_fence->GetCompletedValue() < completedValue)
- {
- // Make it signal with the current value
- SLANG_ASSERT_VOID_ON_FAIL(m_fence->SetEventOnCompletion(completedValue, m_event));
- WaitForSingleObject(m_event, INFINITE);
- }
-}
-
-void D3D12CounterFence::nextSignalAndWait(ID3D12CommandQueue* commandQueue)
-{
- waitUntilCompleted(nextSignal(commandQueue));
-}
-
/* !!!!!!!!!!!!!!!!!!!!!!!!! D3D12Resource !!!!!!!!!!!!!!!!!!!!!!!! */
/* static */void D3D12Resource::setDebugName(ID3D12Resource* resource, const char* name)
diff --git a/tools/gfx/d3d12/resource-d3d12.h b/tools/gfx/d3d12/resource-d3d12.h
index 8b6c28114..39b6c13db 100644
--- a/tools/gfx/d3d12/resource-d3d12.h
+++ b/tools/gfx/d3d12/resource-d3d12.h
@@ -46,53 +46,6 @@ protected:
D3D12_RESOURCE_BARRIER m_barriers[MAX_BARRIERS];
};
-/*! \brief A class to simplify using Dx12 fences.
-
-A fence is a mechanism to track GPU work. This is achieved by having a counter that the CPU holds
-called the current value. Calling nextSignal will increase the CPU counter, and add a fence
-with that value to the commandQueue. When the GPU has completed all the work before the fence it will
-update the completed value. This is typically used when
-the CPU needs to know the GPU has finished some piece of work has completed. To do this the CPU
-can check the completed value, and when it is greater or equal to the value returned by nextSignal the
-CPU will know that all the work prior to when the nextSignal was added to the queue will have completed.
-
-NOTE! This cannot be used across threads, as for amongst other reasons SetEventOnCompletion
-only works with a single value.
-
-Signal on the CommandQueue updates the fence on the GPU side. Signal on the fence object changes
-the value on the CPU side (not used here).
-
-Useful article describing how Dx12 synchronization works:
-https://msdn.microsoft.com/en-us/library/windows/desktop/dn899217%28v=vs.85%29.aspx
-*/
-class D3D12CounterFence
-{
-public:
- /// Must be called before used
- SlangResult init(ID3D12Device* device, uint64_t initialValue = 0);
- /// Increases the counter, signals the queue and waits for the signal to be hit
- void nextSignalAndWait(ID3D12CommandQueue* queue);
- /// Signals with next counter value. Returns the value the signal was called on
- uint64_t nextSignal(ID3D12CommandQueue* commandQueue);
- /// Get the current value
- SLANG_FORCE_INLINE uint64_t getCurrentValue() const { return m_currentValue; }
- /// Get the completed value
- SLANG_FORCE_INLINE uint64_t getCompletedValue() const { return m_fence->GetCompletedValue(); }
-
- /// Waits for the the specified value
- void waitUntilCompleted(uint64_t completedValue);
-
- /// Ctor
- D3D12CounterFence() :m_event(nullptr), m_currentValue(0) {}
- /// Dtor
- ~D3D12CounterFence();
-
-protected:
- HANDLE m_event;
- Slang::ComPtr<ID3D12Fence> m_fence;
- UINT64 m_currentValue;
-};
-
/** The base class for resource types allows for tracking of state. It does not allow for setting of the resource though, such that
an interface can return a D3D12ResourceBase, and a client cant manipulate it's state, but it cannot replace/change the actual resource */
struct D3D12ResourceBase
diff --git a/tools/gfx/immediate-renderer-base.cpp b/tools/gfx/immediate-renderer-base.cpp
index 9402f7834..97bc63634 100644
--- a/tools/gfx/immediate-renderer-base.cpp
+++ b/tools/gfx/immediate-renderer-base.cpp
@@ -1,5 +1,6 @@
#include "immediate-renderer-base.h"
#include "simple-render-pass-layout.h"
+#include "simple-transient-resource-heap.h"
#include "command-writer.h"
#include "core/slang-basic.h"
#include "core/slang-blob.h"
@@ -402,15 +403,6 @@ public:
virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override { return m_desc; }
- virtual SLANG_NO_THROW Result SLANG_MCALL
- createCommandBuffer(ICommandBuffer** outCommandBuffer) override
- {
- RefPtr<CommandBufferImpl> newCmdBuffer = new CommandBufferImpl();
- newCmdBuffer->init(m_renderer);
- *outCommandBuffer = newCmdBuffer.detach();
- return SLANG_OK;
- }
-
virtual SLANG_NO_THROW void SLANG_MCALL
executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override
{
@@ -425,12 +417,26 @@ public:
m_renderer->waitForGpu();
}
};
+
+using TransientResourceHeapImpl =
+ SimpleTransientResourceHeap<ImmediateRendererBase, CommandBufferImpl>;
+
}
ImmediateRendererBase::ImmediateRendererBase() {
m_queue = new CommandQueueImpl(this);
}
+SLANG_NO_THROW Result SLANG_MCALL ImmediateRendererBase::createTransientResourceHeap(
+ const ITransientResourceHeap::Desc& desc,
+ ITransientResourceHeap** outHeap)
+{
+ RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl();
+ SLANG_RETURN_ON_FAIL(result->init(this, desc));
+ *outHeap = result.detach();
+ return SLANG_OK;
+}
+
SLANG_NO_THROW Result SLANG_MCALL ImmediateRendererBase::createCommandQueue(
const ICommandQueue::Desc& desc,
ICommandQueue** outQueue)
diff --git a/tools/gfx/immediate-renderer-base.h b/tools/gfx/immediate-renderer-base.h
index a78671e49..296cd15cb 100644
--- a/tools/gfx/immediate-renderer-base.h
+++ b/tools/gfx/immediate-renderer-base.h
@@ -64,6 +64,9 @@ public:
virtual SLANG_NO_THROW Result SLANG_MCALL
createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override;
+ virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap(
+ const ITransientResourceHeap::Desc& desc,
+ ITransientResourceHeap** outHeap) override;
virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout(
const IRenderPassLayout::Desc& desc,
IRenderPassLayout** outRenderPassLayout) override;
diff --git a/tools/gfx/renderer-shared.cpp b/tools/gfx/renderer-shared.cpp
index 2e6105793..1571e9abf 100644
--- a/tools/gfx/renderer-shared.cpp
+++ b/tools/gfx/renderer-shared.cpp
@@ -11,6 +11,7 @@ const Slang::Guid GfxGUID::IID_ISlangUnknown = SLANG_UUID_ISlangUnknown;
const Slang::Guid GfxGUID::IID_IShaderProgram = SLANG_UUID_IShaderProgram;
const Slang::Guid GfxGUID::IID_IInputLayout = SLANG_UUID_IInputLayout;
const Slang::Guid GfxGUID::IID_IPipelineState = SLANG_UUID_IPipelineState;
+const Slang::Guid GfxGUID::IID_ITransientResourceHeap = SLANG_UUID_ITransientResourceHeap;
const Slang::Guid GfxGUID::IID_IResourceView = SLANG_UUID_IResourceView;
const Slang::Guid GfxGUID::IID_IFramebuffer = SLANG_UUID_IFrameBuffer;
const Slang::Guid GfxGUID::IID_IFramebufferLayout = SLANG_UUID_IFramebufferLayout;
diff --git a/tools/gfx/renderer-shared.h b/tools/gfx/renderer-shared.h
index 41b9a31c9..d1ecebfce 100644
--- a/tools/gfx/renderer-shared.h
+++ b/tools/gfx/renderer-shared.h
@@ -10,10 +10,8 @@ namespace gfx
struct GfxGUID
{
static const Slang::Guid IID_ISlangUnknown;
- static const Slang::Guid IID_IDescriptorSetLayout;
- static const Slang::Guid IID_IDescriptorSet;
static const Slang::Guid IID_IShaderProgram;
- static const Slang::Guid IID_IPipelineLayout;
+ static const Slang::Guid IID_ITransientResourceHeap;
static const Slang::Guid IID_IPipelineState;
static const Slang::Guid IID_IResourceView;
static const Slang::Guid IID_IFramebuffer;
diff --git a/tools/gfx/simple-transient-resource-heap.h b/tools/gfx/simple-transient-resource-heap.h
new file mode 100644
index 000000000..5f6c32451
--- /dev/null
+++ b/tools/gfx/simple-transient-resource-heap.h
@@ -0,0 +1,52 @@
+// simple-render-pass-layout.h
+#pragma once
+
+// Provide a simple no-op implementation for `ITransientResourceHeap` for targets that
+// already support version management.
+
+#include "slang-gfx.h"
+
+namespace gfx
+{
+template<typename TDevice, typename TCommandBuffer>
+class SimpleTransientResourceHeap
+ : public ITransientResourceHeap
+ , public Slang::RefObject
+{
+public:
+ SLANG_REF_OBJECT_IUNKNOWN_ALL
+ ITransientResourceHeap* getInterface(const Slang::Guid& guid)
+ {
+ if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ITransientResourceHeap)
+ return static_cast<ITransientResourceHeap*>(this);
+ return nullptr;
+ }
+
+public:
+ TDevice* m_device;
+ ComPtr<IBufferResource> m_constantBuffer;
+
+public:
+ Result init(TDevice* device, const ITransientResourceHeap::Desc& desc)
+ {
+ m_device = device;
+ IBufferResource::Desc bufferDesc = {};
+ bufferDesc.setDefaults(IResource::Usage::ConstantBuffer);
+ bufferDesc.sizeInBytes = desc.constantBufferSize;
+ bufferDesc.cpuAccessFlags = IResource::AccessFlag::Write;
+ SLANG_RETURN_ON_FAIL(device->createBufferResource(
+ IResource::Usage::ConstantBuffer, bufferDesc, nullptr, m_constantBuffer.writeRef()));
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createCommandBuffer(ICommandBuffer** outCommandBuffer) override
+ {
+ Slang::RefPtr<TCommandBuffer> newCmdBuffer = new TCommandBuffer();
+ newCmdBuffer->init(m_device);
+ *outCommandBuffer = newCmdBuffer.detach();
+ return SLANG_OK;
+ }
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override { return SLANG_OK; }
+};
+}
diff --git a/tools/gfx/vulkan/render-vk.cpp b/tools/gfx/vulkan/render-vk.cpp
index d4396429b..96a0d1047 100644
--- a/tools/gfx/vulkan/render-vk.cpp
+++ b/tools/gfx/vulkan/render-vk.cpp
@@ -50,7 +50,10 @@ public:
};
// Renderer implementation
Result initVulkanInstanceAndDevice(bool useValidationLayer);
- virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override;
+ virtual SLANG_NO_THROW Result SLANG_MCALL initialize(const Desc& desc) override;
+ virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap(
+ const ITransientResourceHeap::Desc& desc,
+ ITransientResourceHeap** outHeap) override;
virtual SLANG_NO_THROW Result SLANG_MCALL
createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override;
virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain(
@@ -795,7 +798,6 @@ public:
auto descriptorSetIndex =
findOrAddDescriptorSet(typeLayout->getDescriptorSetSpaceOffset(s));
auto& descriptorSetInfo = m_descriptorSetBuildInfos[descriptorSetIndex];
-
for (SlangInt r = 0; r < descriptorRangeCount; ++r)
{
auto slangBindingType =
@@ -812,7 +814,6 @@ public:
}
auto vkDescriptorType = _mapDescriptorType(slangBindingType);
-
VkDescriptorSetLayoutBinding vkBindingRangeDesc = {};
vkBindingRangeDesc.binding =
(uint32_t)typeLayout->getDescriptorSetDescriptorRangeIndexOffset(s, r);
@@ -829,14 +830,6 @@ public:
}
descriptorSetInfo.vkBindings.add(vkBindingRangeDesc);
}
- VkDescriptorSetLayoutCreateInfo createInfo = {};
- createInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- createInfo.pBindings = descriptorSetInfo.vkBindings.getBuffer();
- createInfo.bindingCount = (uint32_t)descriptorSetInfo.vkBindings.getCount();
- VkDescriptorSetLayout vkDescSetLayout;
- SLANG_RETURN_ON_FAIL(m_renderer->m_api.vkCreateDescriptorSetLayout(
- m_renderer->m_api.m_device, &createInfo, nullptr, &vkDescSetLayout));
- descriptorSetInfo.descriptorSetLayout = vkDescSetLayout;
}
return SLANG_OK;
}
@@ -1019,6 +1012,19 @@ public:
m_combinedTextureSamplerCount = builder->m_combinedTextureSamplerCount;
m_subObjectCount = builder->m_subObjectCount;
m_subObjectRanges = builder->m_subObjectRanges;
+
+ // Create VkDescriptorSetLayout for all descriptor sets.
+ for (auto& descriptorSetInfo : m_descriptorSetInfos)
+ {
+ VkDescriptorSetLayoutCreateInfo createInfo = {};
+ createInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+ createInfo.pBindings = descriptorSetInfo.vkBindings.getBuffer();
+ createInfo.bindingCount = (uint32_t)descriptorSetInfo.vkBindings.getCount();
+ VkDescriptorSetLayout vkDescSetLayout;
+ SLANG_RETURN_ON_FAIL(renderer->m_api.vkCreateDescriptorSetLayout(
+ renderer->m_api.m_device, &createInfo, nullptr, &vkDescSetLayout));
+ descriptorSetInfo.descriptorSetLayout = vkDescSetLayout;
+ }
return SLANG_OK;
}
@@ -1145,7 +1151,8 @@ public:
auto slangEntryPointLayout = entryPointLayout->getSlangLayout();
_addDescriptorSets(
- slangEntryPointLayout->getTypeLayout(), slangEntryPointLayout->getVarLayout());
+ _unwrapParameterGroups(slangEntryPointLayout->getTypeLayout()),
+ slangEntryPointLayout->getVarLayout());
m_entryPoints.add(info);
}
@@ -1210,7 +1217,7 @@ public:
m_program = builder->m_program;
m_programLayout = builder->m_programLayout;
- m_entryPoints = builder->m_entryPoints;
+ m_entryPoints = _Move(builder->m_entryPoints);
m_renderer = renderer;
if (m_program->getSpecializationParamCount() != 0)
@@ -2709,8 +2716,10 @@ public:
VkCommandBuffer m_commandBuffer;
VkCommandBuffer m_preCommandBuffer = VK_NULL_HANDLE;
VkCommandPool m_pool;
+ VkFence m_fence;
VKDevice* m_renderer;
DescriptorSetAllocator* m_transientDescSetAllocator;
+ bool m_isPreCommandBufferEmpty = true;
// Command buffers are deallocated by its command pool,
// so no need to free individually.
~CommandBufferImpl() = default;
@@ -2718,11 +2727,13 @@ public:
Result init(
VKDevice* renderer,
VkCommandPool pool,
+ VkFence fence,
DescriptorSetAllocator* transientDescSetAllocator)
{
m_renderer = renderer;
m_transientDescSetAllocator = transientDescSetAllocator;
m_pool = pool;
+ m_fence = fence;
auto& api = renderer->m_api;
VkCommandBufferAllocateInfo allocInfo = {};
@@ -2733,12 +2744,23 @@ public:
SLANG_VK_RETURN_ON_FAIL(
api.vkAllocateCommandBuffers(api.m_device, &allocInfo, &m_commandBuffer));
+ beginCommandBuffer();
+ return SLANG_OK;
+ }
+
+ void beginCommandBuffer()
+ {
+ auto& api = m_renderer->m_api;
VkCommandBufferBeginInfo beginInfo = {
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
nullptr,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
api.vkBeginCommandBuffer(m_commandBuffer, &beginInfo);
- return SLANG_OK;
+ if (m_preCommandBuffer)
+ {
+ api.vkBeginCommandBuffer(m_preCommandBuffer, &beginInfo);
+ }
+ m_isPreCommandBufferEmpty = true;
}
Result createPreCommandBuffer()
@@ -2761,6 +2783,7 @@ public:
VkCommandBuffer getPreCommandBuffer()
{
+ m_isPreCommandBufferEmpty = false;
if (m_preCommandBuffer)
return m_preCommandBuffer;
createPreCommandBuffer();
@@ -3191,7 +3214,7 @@ public:
virtual SLANG_NO_THROW void SLANG_MCALL close() override
{
auto& vkAPI = m_renderer->m_api;
- if (m_preCommandBuffer != VK_NULL_HANDLE)
+ if (!m_isPreCommandBufferEmpty)
{
// `preCmdBuffer` contains buffer transfer commands for shader object
// uniform buffers, and we need a memory barrier here to ensure the
@@ -3231,56 +3254,34 @@ public:
public:
Desc m_desc;
- uint32_t m_poolIndex;
RefPtr<VKDevice> m_renderer;
VkQueue m_queue;
uint32_t m_queueFamilyIndex;
VkSemaphore m_pendingWaitSemaphore = VK_NULL_HANDLE;
List<VkCommandBuffer> m_submitCommandBuffers;
- static const int kCommandPoolCount = 8;
- VkCommandPool m_commandPools[kCommandPoolCount];
- DescriptorSetAllocator m_descSetAllocators[kCommandPoolCount];
- VkFence m_fences[kCommandPoolCount];
- VkSemaphore m_semaphores[kCommandPoolCount];
+ static const int kSemaphoreCount = 2;
+ uint32_t m_currentSemaphoreIndex;
+ VkSemaphore m_semaphores[kSemaphoreCount];
~CommandQueueImpl()
{
m_renderer->m_api.vkQueueWaitIdle(m_queue);
m_renderer->m_queueAllocCount--;
- for (int i = 0; i < kCommandPoolCount; i++)
+ for (int i = 0; i < kSemaphoreCount; i++)
{
- m_renderer->m_api.vkDestroyCommandPool(
- m_renderer->m_api.m_device, m_commandPools[i], nullptr);
- m_renderer->m_api.vkDestroyFence(m_renderer->m_api.m_device, m_fences[i], nullptr);
m_renderer->m_api.vkDestroySemaphore(
m_renderer->m_api.m_device, m_semaphores[i], nullptr);
- m_descSetAllocators[i].close();
}
}
void init(VKDevice* renderer, VkQueue queue, uint32_t queueFamilyIndex)
{
m_renderer = renderer;
- m_poolIndex = 0;
+ m_currentSemaphoreIndex = 0;
m_queue = queue;
m_queueFamilyIndex = queueFamilyIndex;
- for (int i = 0; i < kCommandPoolCount; i++)
+ for (int i = 0; i < kSemaphoreCount; i++)
{
- m_descSetAllocators[i].m_api = &m_renderer->m_api;
-
- VkCommandPoolCreateInfo poolCreateInfo = {};
- poolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
- poolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
- poolCreateInfo.queueFamilyIndex = queueFamilyIndex;
- m_renderer->m_api.vkCreateCommandPool(
- m_renderer->m_api.m_device, &poolCreateInfo, nullptr, &m_commandPools[i]);
-
- VkFenceCreateInfo fenceCreateInfo = {};
- fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
- fenceCreateInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT;
- m_renderer->m_api.vkCreateFence(
- m_renderer->m_api.m_device, &fenceCreateInfo, nullptr, &m_fences[i]);
-
VkSemaphoreCreateInfo semaphoreCreateInfo = {};
semaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
semaphoreCreateInfo.flags = 0;
@@ -3289,24 +3290,6 @@ public:
}
}
- // Swaps to and resets the next command pool.
- // Wait if command lists in the next pool are still in flight.
- Result swapPools()
- {
- auto& vkAPI = m_renderer->m_api;
- m_poolIndex++;
- m_poolIndex = m_poolIndex % kCommandPoolCount;
-
- if (vkAPI.vkWaitForFences(vkAPI.m_device, 1, &m_fences[m_poolIndex], 1, UINT64_MAX) !=
- VK_SUCCESS)
- {
- return SLANG_FAIL;
- }
- vkAPI.vkResetCommandPool(vkAPI.m_device, m_commandPools[m_poolIndex], 0);
- m_descSetAllocators[m_poolIndex].reset();
- return SLANG_OK;
- }
-
virtual SLANG_NO_THROW void SLANG_MCALL wait() override
{
auto& vkAPI = m_renderer->m_api;
@@ -3318,33 +3301,26 @@ public:
return m_desc;
}
- virtual SLANG_NO_THROW Result SLANG_MCALL
- createCommandBuffer(ICommandBuffer** result) override
- {
- RefPtr<CommandBufferImpl> commandBuffer = new CommandBufferImpl();
- SLANG_RETURN_ON_FAIL(commandBuffer->init(
- m_renderer, m_commandPools[m_poolIndex], &m_descSetAllocators[m_poolIndex]));
- *result = commandBuffer.detach();
- return SLANG_OK;
- }
-
virtual SLANG_NO_THROW void SLANG_MCALL
executeCommandBuffers(
uint32_t count,
ICommandBuffer* const* commandBuffers) override
{
+ if (count == 0)
+ return;
+
auto& vkAPI = m_renderer->m_api;
m_submitCommandBuffers.clear();
for (uint32_t i = 0; i < count; i++)
{
auto cmdBufImpl = static_cast<CommandBufferImpl*>(commandBuffers[i]);
- if (cmdBufImpl->m_preCommandBuffer != VK_NULL_HANDLE)
+ if (!cmdBufImpl->m_isPreCommandBufferEmpty)
m_submitCommandBuffers.add(cmdBufImpl->m_preCommandBuffer);
auto vkCmdBuf = cmdBufImpl->m_commandBuffer;
m_submitCommandBuffers.add(vkCmdBuf);
}
VkSemaphore waitSemaphore = m_pendingWaitSemaphore;
- VkSemaphore signalSemaphore = m_semaphores[m_poolIndex];
+ VkSemaphore signalSemaphore = m_semaphores[m_currentSemaphoreIndex];
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
VkPipelineStageFlags stageFlag = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
@@ -3358,11 +3334,50 @@ public:
}
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = &signalSemaphore;
- vkAPI.vkResetFences(vkAPI.m_device, 1, &m_fences[m_poolIndex]);
- vkAPI.vkQueueSubmit(m_queue, 1, &submitInfo, m_fences[m_poolIndex]);
+
+ auto fence = static_cast<CommandBufferImpl*>(commandBuffers[0])->m_fence;
+ vkAPI.vkResetFences(vkAPI.m_device, 1, &fence);
+ vkAPI.vkQueueSubmit(m_queue, 1, &submitInfo, fence);
m_pendingWaitSemaphore = signalSemaphore;
- swapPools();
+
+ m_currentSemaphoreIndex++;
+ m_currentSemaphoreIndex = m_currentSemaphoreIndex % kSemaphoreCount;
+ }
+ };
+
+ class TransientResourceHeapImpl
+ : public ITransientResourceHeap
+ , public RefObject
+ {
+ public:
+ SLANG_REF_OBJECT_IUNKNOWN_ALL
+ ITransientResourceHeap* getInterface(const Slang::Guid& guid)
+ {
+ if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ITransientResourceHeap)
+ return static_cast<ITransientResourceHeap*>(this);
+ return nullptr;
+ }
+
+ public:
+ VkCommandPool m_commandPool;
+ DescriptorSetAllocator m_descSetAllocator;
+ VkFence m_fence;
+ List<RefPtr<CommandBufferImpl>> m_commandBufferPool;
+ uint32_t m_commandBufferAllocId = 0;
+ RefPtr<BufferResourceImpl> m_constantBuffer;
+ RefPtr<VKDevice> m_device;
+
+ Result init(const ITransientResourceHeap::Desc& desc, VKDevice* device);
+ ~TransientResourceHeapImpl()
+ {
+ m_device->m_api.vkDestroyCommandPool(m_device->m_api.m_device, m_commandPool, nullptr);
+ m_device->m_api.vkDestroyFence(m_device->m_api.m_device, m_fence, nullptr);
+ m_descSetAllocator.close();
}
+ public:
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ createCommandBuffer(ICommandBuffer** outCommandBuffer) override;
+ virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override;
};
class SwapchainImpl
@@ -3742,6 +3757,15 @@ public:
void _transitionImageLayout(VkImage image, VkFormat format, const TextureResource::Desc& desc, VkImageLayout oldLayout, VkImageLayout newLayout);
+ uint32_t getQueueFamilyIndex(ICommandQueue::QueueType queueType)
+ {
+ switch (queueType)
+ {
+ case ICommandQueue::QueueType::Graphics:
+ default:
+ return m_queueFamilyIndex;
+ }
+ }
public:
// VKDevice members.
@@ -4282,6 +4306,71 @@ void VKDevice::waitForGpu()
m_deviceQueue.flushAndWait();
}
+Result VKDevice::TransientResourceHeapImpl::init(
+ const ITransientResourceHeap::Desc& desc,
+ VKDevice* device)
+{
+ m_device = device;
+ m_descSetAllocator.m_api = &device->m_api;
+
+ VkCommandPoolCreateInfo poolCreateInfo = {};
+ poolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+ poolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
+ poolCreateInfo.queueFamilyIndex =
+ device->getQueueFamilyIndex(ICommandQueue::QueueType::Graphics);
+ device->m_api.vkCreateCommandPool(
+ device->m_api.m_device, &poolCreateInfo, nullptr, &m_commandPool);
+
+ VkFenceCreateInfo fenceCreateInfo = {};
+ fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+ fenceCreateInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT;
+ device->m_api.vkCreateFence(device->m_api.m_device, &fenceCreateInfo, nullptr, &m_fence);
+ return SLANG_OK;
+}
+
+Result VKDevice::TransientResourceHeapImpl::createCommandBuffer(ICommandBuffer** outCmdBuffer)
+{
+ if (m_commandBufferAllocId < (uint32_t)m_commandBufferPool.getCount())
+ {
+ auto result = m_commandBufferPool[m_commandBufferAllocId];
+ result->beginCommandBuffer();
+ m_commandBufferAllocId++;
+ *outCmdBuffer = result.detach();
+ return SLANG_OK;
+ }
+
+ RefPtr<CommandBufferImpl> commandBuffer = new CommandBufferImpl();
+ SLANG_RETURN_ON_FAIL(commandBuffer->init(
+ m_device, m_commandPool, m_fence, &m_descSetAllocator));
+ m_commandBufferPool.add(commandBuffer);
+ m_commandBufferAllocId++;
+ *outCmdBuffer = commandBuffer.detach();
+ return SLANG_OK;
+}
+
+Result VKDevice::TransientResourceHeapImpl::synchronizeAndReset()
+{
+ m_commandBufferAllocId = 0;
+ auto& api = m_device->m_api;
+ if (api.vkWaitForFences(api.m_device, 1, &m_fence, 1, UINT64_MAX) != VK_SUCCESS)
+ {
+ return SLANG_FAIL;
+ }
+ api.vkResetCommandPool(api.m_device, m_commandPool, 0);
+ m_descSetAllocator.reset();
+ return SLANG_OK;
+}
+
+Result VKDevice::createTransientResourceHeap(
+ const ITransientResourceHeap::Desc& desc,
+ ITransientResourceHeap** outHeap)
+{
+ RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl();
+ SLANG_RETURN_ON_FAIL(result->init(desc, this));
+ *outHeap = result.detach();
+ return SLANG_OK;
+}
+
Result VKDevice::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue)
{
// Only support one queue for now.
@@ -4430,23 +4519,6 @@ static VkBufferUsageFlagBits _calcBufferUsageFlags(int bindFlags)
return VkBufferUsageFlagBits(dstFlags);
}
-static VkBufferUsageFlags _calcBufferUsageFlags(int bindFlags, int cpuAccessFlags, const void* initData)
-{
- VkBufferUsageFlags usage = _calcBufferUsageFlags(bindFlags);
-
- if (cpuAccessFlags & IResource::AccessFlag::Read)
- {
- // If it can be read from, set this
- usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
- }
- if ((cpuAccessFlags & IResource::AccessFlag::Write) || initData)
- {
- usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
- }
-
- return usage;
-}
-
static VkImageUsageFlagBits _calcImageUsageFlags(IResource::BindFlag::Enum bind)
{
typedef IResource::BindFlag BindFlag;
@@ -4890,7 +4962,8 @@ Result VKDevice::createBufferResource(IResource::Usage initialUsage, const IBuff
VkMemoryPropertyFlags reqMemoryProperties = 0;
- VkBufferUsageFlags usage = _calcBufferUsageFlags(desc.bindFlags, desc.cpuAccessFlags, initData);
+ VkBufferUsageFlags usage = _calcBufferUsageFlags(desc.bindFlags) |
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
switch (initialUsage)
{