diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/gfx/d3d11/render-d3d11.cpp | 14 | ||||
| -rw-r--r-- | tools/gfx/d3d12/render-d3d12.cpp | 11423 | ||||
| -rw-r--r-- | tools/gfx/d3d12/render-d3d12.h | 1792 | ||||
| -rw-r--r-- | tools/gfx/render.cpp | 4 | ||||
| -rw-r--r-- | tools/gfx/renderer-shared.h | 2 | ||||
| -rw-r--r-- | tools/gfx/vulkan/render-vk.cpp | 6 |
6 files changed, 6926 insertions, 6315 deletions
diff --git a/tools/gfx/d3d11/render-d3d11.cpp b/tools/gfx/d3d11/render-d3d11.cpp index d7a918a3b..aecadcf46 100644 --- a/tools/gfx/d3d11/render-d3d11.cpp +++ b/tools/gfx/d3d11/render-d3d11.cpp @@ -2071,7 +2071,7 @@ SlangResult SLANG_MCALL createD3D11Device(const IDevice::Desc* desc, IDevice** o return SLANG_OK; } -static void _initSrvDesc(IResource::Type resourceType, const ITextureResource::Desc& textureDesc, DXGI_FORMAT pixelFormat, D3D11_SHADER_RESOURCE_VIEW_DESC& descOut) +static void initSrvDesc(IResource::Type resourceType, const ITextureResource::Desc& textureDesc, DXGI_FORMAT pixelFormat, D3D11_SHADER_RESOURCE_VIEW_DESC& descOut) { // create SRV descOut = D3D11_SHADER_RESOURCE_VIEW_DESC(); @@ -2170,7 +2170,7 @@ D3D11Device::ScopeNVAPI::~ScopeNVAPI() // !!!!!!!!!!!!!!!!!!!!!!!!!!!! Renderer interface !!!!!!!!!!!!!!!!!!!!!!!!!! -static bool _isSupportedNVAPIOp(IUnknown* dev, uint32_t op) +static bool isSupportedNVAPIOp(IUnknown* dev, uint32_t op) { #ifdef GFX_NVAPI { @@ -2347,11 +2347,11 @@ SlangResult D3D11Device::initialize(const Desc& desc) return SLANG_E_NOT_AVAILABLE; } - if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC )) + if (isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC )) { m_features.add("atomic-int64"); } - if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC)) + if (isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC)) { m_features.add("atomic-float"); } @@ -2548,7 +2548,7 @@ SlangResult D3D11Device::readTextureResource( } } -static D3D11_BIND_FLAG _calcResourceFlag(ResourceState state) +static D3D11_BIND_FLAG calcResourceFlag(ResourceState state) { switch (state) { @@ -2581,7 +2581,7 @@ static int _calcResourceBindFlags(ResourceStateSet allowedStates) { auto state = (ResourceState)i; if (allowedStates.contains(state)) - dstFlags |= _calcResourceFlag(state); + dstFlags |= calcResourceFlag(state); } return dstFlags; } @@ -3000,7 +3000,7 @@ Result D3D11Device::createTextureView(ITextureResource* texture, IResourceView:: case IResourceView::Type::ShaderResource: { D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc; - _initSrvDesc(resourceImpl->getType(), *resourceImpl->getDesc(), D3DUtil::getMapFormat(desc.format), srvDesc); + initSrvDesc(resourceImpl->getType(), *resourceImpl->getDesc(), D3DUtil::getMapFormat(desc.format), srvDesc); ComPtr<ID3D11ShaderResourceView> srv; SLANG_RETURN_ON_FAIL(m_device->CreateShaderResourceView(resourceImpl->m_resource, &srvDesc, srv.writeRef())); diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index 0fc3b2874..1e73caf44 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -1,5686 +1,214 @@ // render-d3d12.cpp -#define _CRT_SECURE_NO_WARNINGS - #include "render-d3d12.h" -//WORKING:#include "options.h" -#include "../renderer-shared.h" -#include "../transient-resource-heap-base.h" -#include "../simple-render-pass-layout.h" -#include "../d3d/d3d-swapchain.h" -#include "../mutable-shader-object.h" -#include "../command-encoder-com-forward.h" -#include "core/slang-blob.h" -#include "core/slang-basic.h" -#include "core/slang-chunked-list.h" - -// In order to use the Slang API, we need to include its header - -//WORKING:#include <slang.h> - -// We will be rendering with Direct3D 12, so we need to include -// the Windows and D3D12 headers - -#define WIN32_LEAN_AND_MEAN -#define NOMINMAX -#include <Windows.h> -#undef WIN32_LEAN_AND_MEAN -#undef NOMINMAX - -#include <dxgi1_4.h> -#include <d3d12.h> -//#include <d3dcompiler.h> - -#ifndef __ID3D12GraphicsCommandList1_FWD_DEFINED__ -// If can't find a definition of CommandList1, just use an empty definition -struct ID3D12GraphicsCommandList1 {}; -#endif - #ifdef GFX_NVAPI -# include "../nvapi/nvapi-include.h" +# include "../nvapi/nvapi-include.h" #endif -#include "slang-com-ptr.h" -#include "../flag-combiner.h" - -#include "resource-d3d12.h" -#include "descriptor-heap-d3d12.h" - #include "../d3d/d3d-util.h" - +#include "../flag-combiner.h" #include "../nvapi/nvapi-util.h" - -// We will use the C standard library just for printing error messages. +#include "slang-com-ptr.h" #include <stdio.h> -#ifdef _MSC_VER -#include <stddef.h> -#if (_MSC_VER < 1900) -#define snprintf sprintf_s -#endif -#endif -// - #ifdef _DEBUG -#define ENABLE_DEBUG_LAYER 1 +# define ENABLE_DEBUG_LAYER 1 #else -#define ENABLE_DEBUG_LAYER 0 +# define ENABLE_DEBUG_LAYER 0 #endif -namespace gfx { - +namespace gfx +{ using namespace Slang; -// Define function pointer types for PIX library. -typedef HRESULT(WINAPI* PFN_BeginEventOnCommandList)( - ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString); -typedef HRESULT(WINAPI* PFN_EndEventOnCommandList)(ID3D12GraphicsCommandList* commandList); - -class D3D12Device : public RendererBase -{ -public: - // Renderer implementation - virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; - virtual SLANG_NO_THROW Result SLANG_MCALL - getFormatSupportedResourceStates(Format format, ResourceStateSet* outStates) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap( - const ITransientResourceHeap::Desc& desc, - ITransientResourceHeap** outHeap) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( - const ISwapchain::Desc& desc, - WindowHandle window, - ISwapchain** outSwapchain) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL getTextureAllocationInfo( - const ITextureResource::Desc& desc, size_t* outSize, size_t* outAlignment) override; - virtual SLANG_NO_THROW Result SLANG_MCALL getTextureRowAlignment(size_t* outAlignment) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource( - const ITextureResource::Desc& desc, - const ITextureResource::SubresourceData* initData, - ITextureResource** outResource) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createTextureFromNativeHandle( - InteropHandle handle, - const ITextureResource::Desc& srcDesc, - ITextureResource** outResource) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createBufferResource( - const IBufferResource::Desc& desc, - const void* initData, - IBufferResource** outResource) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createBufferFromNativeHandle( - InteropHandle handle, - const IBufferResource::Desc& srcDesc, - IBufferResource** outResource) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView( - ITextureResource* texture, - IResourceView::Desc const& desc, - IResourceView** outView) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createBufferView( - IBufferResource* buffer, - IBufferResource* counterBuffer, - IResourceView::Desc const& desc, - IResourceView** outView) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFrameBuffer) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createFramebufferLayout(IFramebufferLayout::Desc const& desc, IFramebufferLayout** outLayout) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( - const IRenderPassLayout::Desc& desc, - IRenderPassLayout** outRenderPassLayout) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL createInputLayout( - IInputLayout::Desc const& desc, - IInputLayout** outLayout) override; - - virtual Result createShaderObjectLayout( - slang::TypeLayoutReflection* typeLayout, - ShaderObjectLayoutBase** outLayout) override; - virtual Result createShaderObject(ShaderObjectLayoutBase* layout, IShaderObject** outObject) - override; - virtual Result createMutableShaderObject( - ShaderObjectLayoutBase* layout, IShaderObject** outObject) override; - virtual SLANG_NO_THROW Result SLANG_MCALL - createMutableRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createShaderTable(const IShaderTable::Desc& desc, IShaderTable** outShaderTable) override; - virtual SLANG_NO_THROW Result SLANG_MCALL - createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram, ISlangBlob** outDiagnostics) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState( - const GraphicsPipelineStateDesc& desc, IPipelineState** outState) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState( - const ComputePipelineStateDesc& desc, IPipelineState** outState) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL createQueryPool( - const IQueryPool::Desc& desc, IQueryPool** outState) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createFence(const IFence::Desc& desc, IFence** outFence) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL waitForFences( - uint32_t fenceCount, - IFence** fences, - uint64_t* fenceValues, - bool waitForAll, - uint64_t timeout) override; - - virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( - ITextureResource* resource, - ResourceState state, - ISlangBlob** outBlob, - size_t* outRowPitch, - size_t* outPixelSize) override; - - virtual SLANG_NO_THROW SlangResult SLANG_MCALL readBufferResource( - IBufferResource* resource, - size_t offset, - size_t size, - ISlangBlob** outBlob) override; - - virtual SLANG_NO_THROW const DeviceInfo& SLANG_MCALL getDeviceInfo() const override - { - return m_info; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeDeviceHandles(InteropHandles* outHandles) override; - - ~D3D12Device(); - -#if SLANG_GFX_HAS_DXR_SUPPORT - virtual SLANG_NO_THROW Result SLANG_MCALL getAccelerationStructurePrebuildInfo( - const IAccelerationStructure::BuildInputs& buildInputs, - IAccelerationStructure::PrebuildInfo* outPrebuildInfo) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createAccelerationStructure( - const IAccelerationStructure::CreateDesc& desc, - IAccelerationStructure** outView) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createRayTracingPipelineState( - const RayTracingPipelineStateDesc& desc, IPipelineState** outState) override; -#endif - -public: - - static const Int kMaxNumRenderFrames = 4; - static const Int kMaxNumRenderTargets = 3; - - static const Int kMaxRTVCount = 8; - static const Int kMaxDescriptorSetCount = 16; - - struct DeviceInfo - { - void clear() - { - m_dxgiFactory.setNull(); - m_device.setNull(); - m_adapter.setNull(); - m_desc = {}; - m_desc1 = {}; - m_isWarp = false; - m_isSoftware = false; - } - - bool m_isWarp; - bool m_isSoftware; - ComPtr<IDXGIFactory> m_dxgiFactory; - ComPtr<ID3D12Device> m_device; - ComPtr<ID3D12Device5> m_device5; - ComPtr<IDXGIAdapter> m_adapter; - DXGI_ADAPTER_DESC m_desc; - DXGI_ADAPTER_DESC1 m_desc1; - }; - - struct Submitter - { - virtual void setRootConstantBufferView(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; - virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; - virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; - virtual void setRootDescriptorTable(int index, D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor) = 0; - virtual void setRootSignature(ID3D12RootSignature* rootSignature) = 0; - virtual void setRootConstants(Index rootParamIndex, Index dstOffsetIn32BitValues, Index countOf32BitValues, void const* srcData) = 0; - virtual void setPipelineState(PipelineStateBase* pipelineState) = 0; - }; - - class BufferResourceImpl: public gfx::BufferResource - { - public: - typedef BufferResource Parent; - - BufferResourceImpl(const Desc& desc) - : Parent(desc) - , m_defaultState(D3DUtil::getResourceState(desc.defaultState)) - { - } - - ~BufferResourceImpl() - { - if (sharedHandle.handleValue != 0) - { - CloseHandle((HANDLE)sharedHandle.handleValue); - } - } - - D3D12Resource m_resource; ///< The resource in gpu memory, allocated on the correct heap relative to the cpu access flag - - D3D12_RESOURCE_STATES m_defaultState; - - virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() override - { - return (DeviceAddress)m_resource.getResource()->GetGPUVirtualAddress(); - } - - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeResourceHandle(InteropHandle* outHandle) override - { - outHandle->handleValue = (uint64_t)m_resource.getResource(); - outHandle->api = InteropHandleAPI::D3D12; - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override - { - // Check if a shared handle already exists for this resource. - if (sharedHandle.handleValue != 0) - { - *outHandle = sharedHandle; - return SLANG_OK; - } - - // If a shared handle doesn't exist, create one and store it. - ComPtr<ID3D12Device> pDevice; - auto pResource = m_resource.getResource(); - pResource->GetDevice(IID_PPV_ARGS(pDevice.writeRef())); - SLANG_RETURN_ON_FAIL(pDevice->CreateSharedHandle(pResource, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); - outHandle->api = InteropHandleAPI::D3D12; - sharedHandle = *outHandle; - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL - map(MemoryRange* rangeToRead, void** outPointer) override - { - D3D12_RANGE range = {}; - if (rangeToRead) - { - range.Begin = (SIZE_T)rangeToRead->offset; - range.End = (SIZE_T)(rangeToRead->offset + rangeToRead->size); - } - SLANG_RETURN_ON_FAIL(m_resource.getResource()->Map(0, rangeToRead ? &range : nullptr, outPointer)); - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL unmap(MemoryRange* writtenRange) override - { - D3D12_RANGE range = {}; - if (writtenRange) - { - range.Begin = (SIZE_T)writtenRange->offset; - range.End = (SIZE_T)(writtenRange->offset + writtenRange->size); - } - m_resource.getResource()->Unmap(0, writtenRange ? &range : nullptr); - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL setDebugName(const char* name) override - { - Parent::setDebugName(name); - m_resource.setDebugName(name); - return SLANG_OK; - } - }; - - class TextureResourceImpl: public TextureResource - { - public: - typedef TextureResource Parent; - - TextureResourceImpl(const Desc& desc) - : Parent(desc) - , m_defaultState(D3DUtil::getResourceState(desc.defaultState)) - { - } - - ~TextureResourceImpl() - { - if (sharedHandle.handleValue != 0) - { - CloseHandle((HANDLE)sharedHandle.handleValue); - } - } - - D3D12Resource m_resource; - D3D12_RESOURCE_STATES m_defaultState; - - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeResourceHandle(InteropHandle* outHandle) override - { - outHandle->handleValue = (uint64_t)m_resource.getResource(); - outHandle->api = InteropHandleAPI::D3D12; - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override - { - // Check if a shared handle already exists for this resource. - if (sharedHandle.handleValue != 0) - { - *outHandle = sharedHandle; - return SLANG_OK; - } - - // If a shared handle doesn't exist, create one and store it. - ComPtr<ID3D12Device> pDevice; - auto pResource = m_resource.getResource(); - pResource->GetDevice(IID_PPV_ARGS(pDevice.writeRef())); - SLANG_RETURN_ON_FAIL(pDevice->CreateSharedHandle(pResource, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); - outHandle->api = InteropHandleAPI::D3D12; - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL setDebugName(const char* name) override - { - Parent::setDebugName(name); - m_resource.setDebugName(name); - return SLANG_OK; - } - }; - - class SamplerStateImpl : public SamplerStateBase - { - public: - D3D12Descriptor m_descriptor; - Slang::RefPtr<D3D12GeneralExpandingDescriptorHeap> m_allocator; - ~SamplerStateImpl() - { - m_allocator->free(m_descriptor); - } - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override - { - outHandle->api = InteropHandleAPI::D3D12CpuDescriptorHandle; - outHandle->handleValue = m_descriptor.cpuHandle.ptr; - return SLANG_OK; - } - }; - - class ResourceViewInternalImpl - { - public: - D3D12Descriptor m_descriptor; - RefPtr<D3D12GeneralExpandingDescriptorHeap> m_allocator; - ~ResourceViewInternalImpl() - { - if (m_descriptor.cpuHandle.ptr) - m_allocator->free(m_descriptor); - } - }; - - class ResourceViewImpl - : public ResourceViewBase - , public ResourceViewInternalImpl - { - public: - RefPtr<Resource> m_resource; - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override - { - outHandle->api = InteropHandleAPI::D3D12CpuDescriptorHandle; - outHandle->handleValue = m_descriptor.cpuHandle.ptr; - return SLANG_OK; - } - }; - - class FramebufferLayoutImpl : public FramebufferLayoutBase - { - public: - ShortList<IFramebufferLayout::AttachmentLayout> m_renderTargets; - bool m_hasDepthStencil = false; - IFramebufferLayout::AttachmentLayout m_depthStencil; - }; - - class FramebufferImpl : public FramebufferBase - { - public: - ShortList<RefPtr<ResourceViewImpl>> renderTargetViews; - RefPtr<ResourceViewImpl> depthStencilView; - ShortList<D3D12_CPU_DESCRIPTOR_HANDLE> renderTargetDescriptors; - struct Color4f - { - float values[4]; - }; - ShortList<Color4f> renderTargetClearValues; - D3D12_CPU_DESCRIPTOR_HANDLE depthStencilDescriptor; - DepthStencilClearValue depthStencilClearValue; - }; - - class RenderPassLayoutImpl : public SimpleRenderPassLayout - { - public: - RefPtr<FramebufferLayoutImpl> m_framebufferLayout; - void init(const IRenderPassLayout::Desc& desc) - { - SimpleRenderPassLayout::init(desc); - m_framebufferLayout = static_cast<FramebufferLayoutImpl*>(desc.framebufferLayout); - m_hasDepthStencil = m_framebufferLayout->m_hasDepthStencil; - } - }; - - class InputLayoutImpl : public InputLayoutBase - { - public: - List<D3D12_INPUT_ELEMENT_DESC> m_elements; - List<UINT> m_vertexStreamStrides; - List<char> m_text; ///< Holds all strings to keep in scope - }; - - class PipelineStateImpl : public PipelineStateBase - { - public: - PipelineStateImpl(D3D12Device* device) - : m_device(device) - {} - D3D12Device* m_device; - ComPtr<ID3D12PipelineState> m_pipelineState; - void init(const GraphicsPipelineStateDesc& inDesc) - { - PipelineStateDesc pipelineDesc; - pipelineDesc.type = PipelineType::Graphics; - pipelineDesc.graphics = inDesc; - initializeBase(pipelineDesc); - } - void init(const ComputePipelineStateDesc& inDesc) - { - PipelineStateDesc pipelineDesc; - pipelineDesc.type = PipelineType::Compute; - pipelineDesc.compute = inDesc; - initializeBase(pipelineDesc); - } - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override - { - SLANG_RETURN_ON_FAIL(ensureAPIPipelineStateCreated()); - outHandle->api = InteropHandleAPI::D3D12; - outHandle->handleValue = reinterpret_cast<uint64_t>(m_pipelineState.get()); - return SLANG_OK; - } - virtual Result ensureAPIPipelineStateCreated() override; - }; - -#if SLANG_GFX_HAS_DXR_SUPPORT - class RayTracingPipelineStateImpl : public PipelineStateBase - { - public: - ComPtr<ID3D12StateObject> m_stateObject; - D3D12Device* m_device; - RayTracingPipelineStateImpl(D3D12Device* device) - : m_device(device) - {} - void init(const RayTracingPipelineStateDesc& inDesc) - { - PipelineStateDesc pipelineDesc; - pipelineDesc.type = PipelineType::RayTracing; - pipelineDesc.rayTracing.set(inDesc); - initializeBase(pipelineDesc); - } - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override - { - SLANG_RETURN_ON_FAIL(ensureAPIPipelineStateCreated()); - outHandle->api = InteropHandleAPI::D3D12; - outHandle->handleValue = reinterpret_cast<uint64_t>(m_stateObject.get()); - return SLANG_OK; - } - virtual Result ensureAPIPipelineStateCreated() override; - }; -#endif - - class QueryPoolImpl : public QueryPoolBase - { - public: - Result init(const IQueryPool::Desc& desc, D3D12Device* device); - - virtual SLANG_NO_THROW Result SLANG_MCALL getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) override - { - m_commandList->Reset(m_commandAllocator, nullptr); - m_commandList->ResolveQueryData( - m_queryHeap, - m_queryType, - (UINT)queryIndex, - (UINT)count, - m_readBackBuffer, - sizeof(uint64_t) * queryIndex); - m_commandList->Close(); - ID3D12CommandList* cmdList = m_commandList; - m_commandQueue->ExecuteCommandLists(1, &cmdList); - m_eventValue++; - m_fence->SetEventOnCompletion(m_eventValue, m_waitEvent); - m_commandQueue->Signal(m_fence, m_eventValue); - WaitForSingleObject(m_waitEvent, INFINITE); - m_commandAllocator->Reset(); - - int8_t* mappedData = nullptr; - D3D12_RANGE readRange = { sizeof(uint64_t) * queryIndex, sizeof(uint64_t) * (queryIndex + count) }; - m_readBackBuffer.getResource()->Map(0, &readRange, (void**)&mappedData); - memcpy(data, mappedData + sizeof(uint64_t) * queryIndex, sizeof(uint64_t) * count); - m_readBackBuffer.getResource()->Unmap(0, nullptr); - return SLANG_OK; - } - - void writeTimestamp(ID3D12GraphicsCommandList* cmdList, SlangInt index) - { - cmdList->EndQuery(m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, (UINT)index); - } - - public: - D3D12_QUERY_TYPE m_queryType; - ComPtr<ID3D12QueryHeap> m_queryHeap; - D3D12Resource m_readBackBuffer; - ComPtr<ID3D12CommandAllocator> m_commandAllocator; - ComPtr<ID3D12GraphicsCommandList> m_commandList; - ComPtr<ID3D12Fence> m_fence; - ComPtr<ID3D12CommandQueue> m_commandQueue; - HANDLE m_waitEvent; - UINT64 m_eventValue = 0; - }; - - /// Implements the IQueryPool interface with a plain buffer. - /// Used for query types that does not correspond to a D3D query, - /// such as ray-tracing acceleration structure post-build info. - class PlainBufferProxyQueryPoolImpl - : public QueryPoolBase - { - public: - SLANG_COM_OBJECT_IUNKNOWN_ALL - IQueryPool* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IQueryPool) - return static_cast<IQueryPool*>(this); - return nullptr; - } - public: - Result init(const IQueryPool::Desc& desc, D3D12Device* device, uint32_t stride); - - virtual SLANG_NO_THROW Result SLANG_MCALL reset() override - { - m_resultDirty = true; - auto encodeInfo = m_device->encodeResourceCommands(); - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barrier.Transition.pResource = m_bufferResource->m_resource.getResource(); - encodeInfo.d3dCommandList->ResourceBarrier(1, &barrier); - m_device->submitResourceCommandsAndWait(encodeInfo); - return SLANG_OK; - } - virtual SLANG_NO_THROW Result SLANG_MCALL - getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) override - { - if (m_resultDirty) - { - auto encodeInfo = m_device->encodeResourceCommands(); - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier.Transition.pResource = m_bufferResource->m_resource.getResource(); - encodeInfo.d3dCommandList->ResourceBarrier(1, &barrier); - - D3D12Resource stageBuf; - - auto size = (size_t)m_count * m_stride; - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; - - D3D12_RESOURCE_DESC stagingDesc; - _initBufferResourceDesc(size, stagingDesc); - - SLANG_RETURN_ON_FAIL(stageBuf.initCommitted( - m_device->m_device, - heapProps, - D3D12_HEAP_FLAG_NONE, - stagingDesc, - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr)); - - encodeInfo.d3dCommandList->CopyBufferRegion( - stageBuf, - 0, - m_bufferResource->m_resource.getResource(), - 0, - size); - m_device->submitResourceCommandsAndWait(encodeInfo); - void* ptr = nullptr; - stageBuf.getResource()->Map(0, nullptr, &ptr); - m_result.setCount(m_count * m_stride); - memcpy(m_result.getBuffer(), ptr, m_result.getCount()); - - m_resultDirty = false; - } - - memcpy(data, m_result.getBuffer() + queryIndex * m_stride, count * m_stride); - - return SLANG_OK; - } - public: - QueryType m_queryType; - RefPtr<BufferResourceImpl> m_bufferResource; - RefPtr<D3D12Device> m_device; - List<uint8_t> m_result; - bool m_resultDirty = true; - uint32_t m_stride = 0; - uint32_t m_count = 0; - }; - - struct BoundVertexBuffer - { - RefPtr<BufferResourceImpl> m_buffer; - int m_offset; - }; - - struct GraphicsSubmitter : public Submitter - { - virtual void setRootConstantBufferView(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetGraphicsRootConstantBufferView(index, gpuBufferLocation); - } - virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetGraphicsRootUnorderedAccessView(index, gpuBufferLocation); - } - virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetGraphicsRootShaderResourceView(index, gpuBufferLocation); - } - virtual void setRootDescriptorTable(int index, D3D12_GPU_DESCRIPTOR_HANDLE baseDescriptor) override - { - m_commandList->SetGraphicsRootDescriptorTable(index, baseDescriptor); - } - void setRootSignature(ID3D12RootSignature* rootSignature) - { - m_commandList->SetGraphicsRootSignature(rootSignature); - } - void setRootConstants( - Index rootParamIndex, - Index dstOffsetIn32BitValues, - Index countOf32BitValues, - void const* srcData) override - { - m_commandList->SetGraphicsRoot32BitConstants(UINT(rootParamIndex), UINT(countOf32BitValues), srcData, UINT(dstOffsetIn32BitValues)); - } - virtual void setPipelineState(PipelineStateBase* pipeline) override - { - auto pipelineImpl = static_cast<PipelineStateImpl*>(pipeline); - m_commandList->SetPipelineState(pipelineImpl->m_pipelineState.get()); - } - - GraphicsSubmitter(ID3D12GraphicsCommandList* commandList): - m_commandList(commandList) - { - } - - ID3D12GraphicsCommandList* m_commandList; - }; - - struct ComputeSubmitter : public Submitter - { - virtual void setRootConstantBufferView(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetComputeRootConstantBufferView(index, gpuBufferLocation); - } - virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetComputeRootUnorderedAccessView(index, gpuBufferLocation); - } - virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetComputeRootShaderResourceView(index, gpuBufferLocation); - } - virtual void setRootDescriptorTable(int index, D3D12_GPU_DESCRIPTOR_HANDLE baseDescriptor) override - { - m_commandList->SetComputeRootDescriptorTable(index, baseDescriptor); - } - void setRootSignature(ID3D12RootSignature* rootSignature) - { - m_commandList->SetComputeRootSignature(rootSignature); - } - void setRootConstants( - Index rootParamIndex, - Index dstOffsetIn32BitValues, - Index countOf32BitValues, - void const* srcData) override - { - m_commandList->SetComputeRoot32BitConstants(UINT(rootParamIndex), UINT(countOf32BitValues), srcData, UINT(dstOffsetIn32BitValues)); - } - virtual void setPipelineState(PipelineStateBase* pipeline) override - { - auto pipelineImpl = static_cast<PipelineStateImpl*>(pipeline); - m_commandList->SetPipelineState(pipelineImpl->m_pipelineState.get()); - } - ComputeSubmitter(ID3D12GraphicsCommandList* commandList) : - m_commandList(commandList) - { - } - - ID3D12GraphicsCommandList* m_commandList; - }; - - static void _initBufferResourceDesc(size_t bufferSize, D3D12_RESOURCE_DESC& out) - { - out = {}; - - out.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - out.Alignment = 0; - out.Width = bufferSize; - out.Height = 1; - out.DepthOrArraySize = 1; - out.MipLevels = 1; - out.Format = DXGI_FORMAT_UNKNOWN; - out.SampleDesc.Count = 1; - out.SampleDesc.Quality = 0; - out.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - out.Flags = D3D12_RESOURCE_FLAG_NONE; - } - - class TransientResourceHeapImpl - : public TransientResourceHeapBaseImpl<D3D12Device, BufferResourceImpl> - , public ID3D12TransientResourceHeap - { - private: - typedef TransientResourceHeapBaseImpl<D3D12Device, BufferResourceImpl> Super; - public: - ComPtr<ID3D12CommandAllocator> m_commandAllocator; - List<ComPtr<ID3D12GraphicsCommandList>> m_d3dCommandListPool; - List<RefPtr<RefObject>> m_commandBufferPool; - uint32_t m_commandListAllocId = 0; - // Wait values for each command queue. - struct QueueWaitInfo - { - uint64_t waitValue; - HANDLE fenceEvent; - ComPtr<ID3D12Fence> fence = nullptr; - }; - ShortList<QueueWaitInfo, 4> m_waitInfos; - - QueueWaitInfo& getQueueWaitInfo(uint32_t queueIndex) - { - if (queueIndex < (uint32_t)m_waitInfos.getCount()) - { - return m_waitInfos[queueIndex]; - } - auto oldCount = m_waitInfos.getCount(); - m_waitInfos.setCount(queueIndex + 1); - for (auto i = oldCount; i < m_waitInfos.getCount(); i++) - { - m_waitInfos[i].waitValue = 0; - m_waitInfos[i].fenceEvent = CreateEventEx( - nullptr, - false, - 0, - EVENT_ALL_ACCESS); - } - return m_waitInfos[queueIndex]; - } - // During command submission, we need all the descriptor tables that get - // used to come from a single heap (for each descriptor heap type). - // - // We will thus keep a single heap of each type that we hope will hold - // all the descriptors that actually get needed in a frame. - ShortList<D3D12DescriptorHeap, 4> m_viewHeaps; // Cbv, Srv, Uav - ShortList<D3D12DescriptorHeap, 4> m_samplerHeaps; // Heap for samplers - int32_t m_currentViewHeapIndex = -1; - int32_t m_currentSamplerHeapIndex = -1; - bool m_canResize = false; - - uint32_t m_viewHeapSize; - uint32_t m_samplerHeapSize; - - D3D12DescriptorHeap& getCurrentViewHeap() { return m_viewHeaps[m_currentViewHeapIndex]; } - D3D12DescriptorHeap& getCurrentSamplerHeap() { return m_samplerHeaps[m_currentSamplerHeapIndex]; } - - D3D12LinearExpandingDescriptorHeap m_stagingCpuViewHeap; - D3D12LinearExpandingDescriptorHeap m_stagingCpuSamplerHeap; - - virtual SLANG_NO_THROW SlangResult SLANG_MCALL - queryInterface(SlangUUID const& uuid, void** outObject) override - { - if (uuid == GfxGUID::IID_ID3D12TransientResourceHeap) - { - *outObject = static_cast<ID3D12TransientResourceHeap*>(this); - addRef(); - return SLANG_OK; - } - return Super::queryInterface(uuid, outObject); - } - - virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return Super::addRef(); } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return Super::release(); } - - virtual SLANG_NO_THROW Result SLANG_MCALL allocateTransientDescriptorTable( - DescriptorType type, - uint32_t count, - uint64_t& outDescriptorOffset, - void** outD3DDescriptorHeapHandle) override - { - auto& heap = (type == DescriptorType::ResourceView) ? getCurrentViewHeap() - : getCurrentSamplerHeap(); - int allocResult = heap.allocate((int)count); - if (allocResult == -1) - { - return SLANG_E_OUT_OF_MEMORY; - } - outDescriptorOffset = (uint64_t)allocResult; - *outD3DDescriptorHeapHandle = heap.getHeap(); - return SLANG_OK; - } - - ~TransientResourceHeapImpl() - { - synchronizeAndReset(); - for (auto& waitInfo : m_waitInfos) - CloseHandle(waitInfo.fenceEvent); - } - - bool canResize() { return m_canResize; } - - Result init( - const ITransientResourceHeap::Desc& desc, - D3D12Device* device, - uint32_t viewHeapSize, - uint32_t samplerHeapSize) - { - Super::init(desc, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, device); - m_canResize = (desc.flags & ITransientResourceHeap::Flags::AllowResizing) != 0; - m_viewHeapSize = viewHeapSize; - m_samplerHeapSize = samplerHeapSize; - - m_stagingCpuViewHeap.init( - device->m_device, - 1000000, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - D3D12_DESCRIPTOR_HEAP_FLAG_NONE); - m_stagingCpuSamplerHeap.init( - device->m_device, - 1000000, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - D3D12_DESCRIPTOR_HEAP_FLAG_NONE); - - auto d3dDevice = device->m_device; - SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator( - D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef()))); - - allocateNewViewDescriptorHeap(device); - allocateNewSamplerDescriptorHeap(device); - - return SLANG_OK; - } - - Result allocateNewViewDescriptorHeap(D3D12Device* device) - { - auto nextHeapIndex = m_currentViewHeapIndex + 1; - if (nextHeapIndex < m_viewHeaps.getCount()) - { - m_viewHeaps[nextHeapIndex].deallocateAll(); - m_currentViewHeapIndex = nextHeapIndex; - return SLANG_OK; - } - auto d3dDevice = device->m_device; - D3D12DescriptorHeap viewHeap; - SLANG_RETURN_ON_FAIL(viewHeap.init( - d3dDevice, - m_viewHeapSize, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); - m_currentViewHeapIndex = (int32_t)m_viewHeaps.getCount(); - m_viewHeaps.add(_Move(viewHeap)); - return SLANG_OK; - } - - Result allocateNewSamplerDescriptorHeap(D3D12Device* device) - { - auto nextHeapIndex = m_currentSamplerHeapIndex + 1; - if (nextHeapIndex < m_samplerHeaps.getCount()) - { - m_samplerHeaps[nextHeapIndex].deallocateAll(); - m_currentSamplerHeapIndex = nextHeapIndex; - return SLANG_OK; - } - auto d3dDevice = device->m_device; - D3D12DescriptorHeap samplerHeap; - SLANG_RETURN_ON_FAIL(samplerHeap.init( - d3dDevice, - m_samplerHeapSize, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); - m_currentSamplerHeapIndex = (int32_t)m_samplerHeaps.getCount(); - m_samplerHeaps.add(_Move(samplerHeap)); - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL - createCommandBuffer(ICommandBuffer** outCommandBuffer) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override; - }; +namespace d3d12 +{ - static Result _uploadBufferData( - ID3D12Device* device, - ID3D12GraphicsCommandList* cmdList, - TransientResourceHeapImpl* transientHeap, - BufferResourceImpl* buffer, - size_t offset, - size_t size, - void* data) +namespace +{ +bool isSupportedNVAPIOp(ID3D12Device* dev, uint32_t op) +{ +#ifdef GFX_NVAPI { - IBufferResource* uploadResource; - size_t uploadResourceOffset = 0; - if (buffer->getDesc()->memoryType != MemoryType::Upload) - { - SLANG_RETURN_ON_FAIL(transientHeap->allocateStagingBuffer( - size, uploadResource, uploadResourceOffset, MemoryType::Upload)); - } - - D3D12Resource& uploadResourceRef = - (buffer->getDesc()->memoryType == MemoryType::Upload) - ? buffer->m_resource - : static_cast<BufferResourceImpl*>(uploadResource)->m_resource; - - D3D12_RANGE readRange = {}; - readRange.Begin = 0; - readRange.End = 0; - void* uploadData; - SLANG_RETURN_ON_FAIL(uploadResourceRef.getResource()->Map( - 0, &readRange, reinterpret_cast<void**>(&uploadData))); - memcpy((uint8_t*)uploadData + uploadResourceOffset + offset, data, size); - D3D12_RANGE writtenRange = {}; - writtenRange.Begin = uploadResourceOffset + offset; - writtenRange.End = uploadResourceOffset + offset + size; - uploadResourceRef.getResource()->Unmap(0, &writtenRange); - - if (buffer->getDesc()->memoryType != MemoryType::Upload) - { - cmdList->CopyBufferRegion( - buffer->m_resource.getResource(), - offset, - uploadResourceRef.getResource(), - uploadResourceOffset + offset, - size); - } - - return SLANG_OK; + bool isSupported; + NvAPI_Status status = + NvAPI_D3D12_IsNvShaderExtnOpCodeSupported(dev, NvU32(op), &isSupported); + return status == NVAPI_OK && isSupported; } - - class CommandBufferImpl; - - class PipelineCommandEncoder - { - public: - bool m_isOpen = false; - bool m_bindingDirty = true; - CommandBufferImpl* m_commandBuffer; - TransientResourceHeapImpl* m_transientHeap; - D3D12Device* m_renderer; - ID3D12Device* m_device; - ID3D12GraphicsCommandList* m_d3dCmdList; - ID3D12GraphicsCommandList* m_preCmdList = nullptr; - - RefPtr<PipelineStateBase> m_currentPipeline; - - static int getBindPointIndex(PipelineType type) - { - switch (type) - { - case PipelineType::Graphics: - return 0; - case PipelineType::Compute: - return 1; - case PipelineType::RayTracing: - return 2; - default: - assert(!"unknown pipeline type."); - return -1; - } - } - - void init(CommandBufferImpl* commandBuffer) - { - m_commandBuffer = commandBuffer; - m_d3dCmdList = m_commandBuffer->m_cmdList; - m_renderer = commandBuffer->m_renderer; - m_transientHeap = commandBuffer->m_transientHeap; - m_device = commandBuffer->m_renderer->m_device; - } - - void endEncodingImpl() { m_isOpen = false; } - - Result bindPipelineImpl(IPipelineState* pipelineState, IShaderObject** outRootObject) - { - m_currentPipeline = static_cast<PipelineStateBase*>(pipelineState); - auto rootObject = &m_commandBuffer->m_rootShaderObject; - m_commandBuffer->m_mutableRootShaderObject = nullptr; - SLANG_RETURN_ON_FAIL(rootObject->reset( - m_renderer, - m_currentPipeline->getProgram<ShaderProgramImpl>()->m_rootObjectLayout, - m_commandBuffer->m_transientHeap)); - *outRootObject = rootObject; - m_bindingDirty = true; - return SLANG_OK; - } - - Result bindPipelineWithRootObjectImpl(IPipelineState* pipelineState, IShaderObject* rootObject) - { - m_currentPipeline = static_cast<PipelineStateBase*>(pipelineState); - m_commandBuffer->m_mutableRootShaderObject = static_cast<MutableRootShaderObjectImpl*>(rootObject); - m_bindingDirty = true; - return SLANG_OK; - } - - /// Specializes the pipeline according to current root-object argument values, - /// applys the root object bindings and binds the pipeline state. - /// The newly specialized pipeline is held alive by the pipeline cache so users of - /// `newPipeline` do not need to maintain its lifespan. - Result _bindRenderState(Submitter* submitter, RefPtr<PipelineStateBase>& newPipeline); - }; - - struct DescriptorTable - { - DescriptorHeapReference m_heap; - uint32_t m_offset = 0; - uint32_t m_count = 0; - - SLANG_FORCE_INLINE uint32_t getDescriptorCount() const { return m_count; } - - /// Get the GPU handle at the specified index - SLANG_FORCE_INLINE D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(uint32_t index = 0) const - { - SLANG_ASSERT(index < getDescriptorCount()); - return m_heap.getGpuHandle(m_offset + index); - } - - /// Get the CPU handle at the specified index - SLANG_FORCE_INLINE D3D12_CPU_DESCRIPTOR_HANDLE getCpuHandle(uint32_t index = 0) const - { - SLANG_ASSERT(index < getDescriptorCount()); - return m_heap.getCpuHandle(m_offset + index); - } - - void freeIfSupported() - { - if(m_count) - { - m_heap.freeIfSupported(m_offset, m_count); - m_offset = 0; - m_count = 0; - } - } - - bool allocate(uint32_t count) - { - auto allocatedOffset = m_heap.allocate(count); - if (allocatedOffset == -1) - return false; - m_offset = allocatedOffset; - m_count = count; - return true; - } - - bool allocate(DescriptorHeapReference heap, uint32_t count) - { - auto allocatedOffset = heap.allocate(count); - if (allocatedOffset == -1) - return false; - m_heap = heap; - m_offset = allocatedOffset; - m_count = count; - return true; - } - }; - - /// Contextual data and operations required when binding shader objects to the pipeline state - struct BindingContext - { - PipelineCommandEncoder* encoder; - Submitter* submitter; - TransientResourceHeapImpl* transientHeap; - D3D12Device* device; - D3D12_DESCRIPTOR_HEAP_TYPE outOfMemoryHeap; // The type of descriptor heap that is OOM during binding. - }; - - /// A representation of the offset at which to bind a shader parameter or sub-object - struct BindingOffset - { - // Note: When we actually bind a shader object to the pipeline we do not care about - // HLSL-specific notions like `t` registers and `space`s. Those concepts are all - // mediated by the root signature. - // - // Instead, we need to consider the offsets at which the object will be bound - // into the actual D3D12 API state, which consists of the index of the current - // root parameter to bind from, as well as indices into the current descriptor - // tables (for resource views and samplers). - - uint32_t rootParam = 0; - uint32_t resource = 0; - uint32_t sampler = 0; - - void operator+=(BindingOffset const& offset) - { - rootParam += offset.rootParam; - resource += offset.resource; - sampler += offset.sampler; - } - }; - - /// A reprsentation of an allocated descriptor set, consisting of an option resource table and an optional sampler table - struct DescriptorSet - { - DescriptorTable resourceTable; - DescriptorTable samplerTable; - - void freeIfSupported() - { - resourceTable.freeIfSupported(); - samplerTable .freeIfSupported(); - } - }; - - // Provides information on how binding ranges are stored in descriptor tables for - // a shader object. - // We allocate one CPU descriptor table for each descriptor heap type for the shader - // object. In `ShaderObjectLayoutImpl`, we store the offset into the descriptor tables - // for each binding, so we know where to write the descriptor when the user sets - // a resource or sampler binding. - class ShaderObjectLayoutImpl : public ShaderObjectLayoutBase - { - public: - - /// Information about a single logical binding range - struct BindingRangeInfo - { - // Some of the information we store on binding ranges is redundant with - // the information that Slang's reflection information stores, but having - // it here can make the code more compact and obvious. - - /// The type of binding in this range. - slang::BindingType bindingType; - - /// The shape of the resource - SlangResourceShape resourceShape; - - /// The number of distinct bindings in this range. - uint32_t count; - - /// A "flat" index for this range in whatever array provides backing storage for it - uint32_t baseIndex; - - /// An index into the sub-object array if this binding range is treated - /// as a sub-object. - uint32_t subObjectIndex; - - bool isRootParameter; - }; - - /// Offset information for a sub-object range - struct SubObjectRangeOffset : BindingOffset - { - SubObjectRangeOffset() - {} - - SubObjectRangeOffset(slang::VariableLayoutReflection* varLayout) - { - if(auto pendingLayout = varLayout->getPendingDataLayout()) - { - pendingOrdinaryData = (uint32_t) pendingLayout->getOffset(SLANG_PARAMETER_CATEGORY_UNIFORM); - } - } - - /// The offset for "pending" ordinary data related to this range - uint32_t pendingOrdinaryData = 0; - }; - - /// Stride information for a sub-object range - struct SubObjectRangeStride : BindingOffset - { - SubObjectRangeStride() - {} - - SubObjectRangeStride(slang::TypeLayoutReflection* typeLayout) - { - if(auto pendingLayout = typeLayout->getPendingDataTypeLayout()) - { - pendingOrdinaryData = (uint32_t) pendingLayout->getSize(SLANG_PARAMETER_CATEGORY_UNIFORM); - } - } - - /// The strid for "pending" ordinary data related to this range - uint32_t pendingOrdinaryData = 0; - }; - - /// Information about a sub-objecrt range - struct SubObjectRangeInfo - { - /// The index of the binding range corresponding to this sub-object range - Index bindingRangeIndex = 0; - - /// Layout information for the type of sub-object expected to be bound, if known - RefPtr<ShaderObjectLayoutImpl> layout; - - /// The offset to use when binding the first object in this range - SubObjectRangeOffset offset; - - /// Stride between consecutive objects in this range - SubObjectRangeStride stride; - }; - - struct RootParameterInfo - { - IResourceView::Type type; - }; - - static bool isBindingRangeRootParameter( - SlangSession* globalSession, - const char* rootParameterAttributeName, - slang::TypeLayoutReflection* typeLayout, - Index bindingRangeIndex) - { - bool isRootParameter = false; - if (rootParameterAttributeName) - { - if (auto leafVariable = typeLayout->getBindingRangeLeafVariable(bindingRangeIndex)) - { - if (leafVariable->findUserAttributeByName( - globalSession, rootParameterAttributeName)) - { - isRootParameter = true; - } - } - } - return isRootParameter; - } - - struct Builder - { - public: - Builder(RendererBase* renderer) - : m_renderer(renderer) - {} - - RendererBase* m_renderer; - slang::TypeLayoutReflection* m_elementTypeLayout; - List<BindingRangeInfo> m_bindingRanges; - List<SubObjectRangeInfo> m_subObjectRanges; - List<RootParameterInfo> m_rootParamsInfo; - - /// The number of sub-objects (not just sub-object *ranges*) stored in instances of this layout - uint32_t m_subObjectCount = 0; - - /// Counters for the number of root parameters, resources, and samplers in this object itself - BindingOffset m_ownCounts; - - /// Counters for the number of root parameters, resources, and sampler in this object and transitive sub-objects - BindingOffset m_totalCounts; - - /// The number of root parameter consumed by (transitive) sub-objects - uint32_t m_childRootParameterCount = 0; - - /// The total size in bytes of the ordinary data for this object and transitive sub-object. - uint32_t m_totalOrdinaryDataSize = 0; - - /// The container type of this shader object. When `m_containerType` is - /// `StructuredBuffer` or `UnsizedArray`, this shader object represents a collection - /// instead of a single object. - ShaderObjectContainerType m_containerType = ShaderObjectContainerType::None; - - Result setElementTypeLayout(slang::TypeLayoutReflection* typeLayout) - { - typeLayout = _unwrapParameterGroups(typeLayout, m_containerType); - m_elementTypeLayout = typeLayout; - - // If the type contains any ordinary data, then we must reserve a buffer - // descriptor to hold it when binding as a parameter block. - // - m_totalOrdinaryDataSize = (uint32_t) typeLayout->getSize(); - if (m_totalOrdinaryDataSize != 0) - { - m_ownCounts.resource++; - } - - // We will scan over the reflected Slang binding ranges and add them - // to our array. There are two main things we compute along the way: - // - // * For each binding range we compute a `flatIndex` that can be - // used to identify where the values for the given range begin - // in the flattened arrays (e.g., `m_objects`) and descriptor - // tables that hold the state of a shader object. - // - // * We also update the various counters taht keep track of the number - // of sub-objects, resources, samplers, etc. that are being - // consumed. These counters will contribute to figuring out - // the descriptor table(s) that might be needed to represent - // the object. - // - SlangInt bindingRangeCount = typeLayout->getBindingRangeCount(); - for (SlangInt r = 0; r < bindingRangeCount; ++r) - { - slang::BindingType slangBindingType = typeLayout->getBindingRangeType(r); - uint32_t count = (uint32_t)typeLayout->getBindingRangeBindingCount(r); - slang::TypeLayoutReflection* slangLeafTypeLayout = - typeLayout->getBindingRangeLeafTypeLayout(r); - BindingRangeInfo bindingRangeInfo = {}; - bindingRangeInfo.bindingType = slangBindingType; - bindingRangeInfo.resourceShape = slangLeafTypeLayout->getResourceShape(); - bindingRangeInfo.count = count; - bindingRangeInfo.isRootParameter = isBindingRangeRootParameter( - m_renderer->slangContext.globalSession, - static_cast<D3D12Device*>(m_renderer) - ->m_extendedDesc.rootParameterShaderAttributeName, - typeLayout, - r); - if (bindingRangeInfo.isRootParameter) - { - RootParameterInfo rootInfo = {}; - switch (slangBindingType) - { - case slang::BindingType::RayTracingAccelerationStructure: - rootInfo.type = IResourceView::Type::AccelerationStructure; - break; - case slang::BindingType::RawBuffer: - case slang::BindingType::TypedBuffer: - rootInfo.type = IResourceView::Type::ShaderResource; - break; - case slang::BindingType::MutableRawBuffer: - case slang::BindingType::MutableTypedBuffer: - rootInfo.type = IResourceView::Type::UnorderedAccess; - break; - } - bindingRangeInfo.baseIndex = (uint32_t)m_rootParamsInfo.getCount(); - for (uint32_t i = 0; i < count; i++) - { - m_rootParamsInfo.add(rootInfo); - } - } - else - { - switch (slangBindingType) - { - case slang::BindingType::ConstantBuffer: - case slang::BindingType::ParameterBlock: - case slang::BindingType::ExistentialValue: - bindingRangeInfo.baseIndex = m_subObjectCount; - bindingRangeInfo.subObjectIndex = m_subObjectCount; - m_subObjectCount += count; - break; - case slang::BindingType::RawBuffer: - case slang::BindingType::MutableRawBuffer: - if (slangLeafTypeLayout->getType()->getElementType() != nullptr) - { - // A structured buffer occupies both a resource slot and - // a sub-object slot. - bindingRangeInfo.subObjectIndex = m_subObjectCount; - m_subObjectCount += count; - } - bindingRangeInfo.baseIndex = m_ownCounts.resource; - m_ownCounts.resource += count; - break; - case slang::BindingType::Sampler: - bindingRangeInfo.baseIndex = m_ownCounts.sampler; - m_ownCounts.sampler += count; - break; - - case slang::BindingType::CombinedTextureSampler: - // TODO: support this case... - break; - - case slang::BindingType::VaryingInput: - case slang::BindingType::VaryingOutput: - break; - - default: - bindingRangeInfo.baseIndex = m_ownCounts.resource; - m_ownCounts.resource += count; - break; - } - } - m_bindingRanges.add(bindingRangeInfo); - } - - // At this point we've computed the number of resources/samplers that - // the type needs to represent its *own* state, and stored those counts - // in `m_ownCounts`. Next we need to consider any resources/samplers - // and root parameters needed to represent the state of the transitive - // sub-objects of this objet, so that we can compute the total size - // of the object when bound to the pipeline. - - m_totalCounts = m_ownCounts; - - SlangInt subObjectRangeCount = typeLayout->getSubObjectRangeCount(); - for (SlangInt r = 0; r < subObjectRangeCount; ++r) - { - SlangInt bindingRangeIndex = typeLayout->getSubObjectRangeBindingRangeIndex(r); - auto slangBindingType = typeLayout->getBindingRangeType(bindingRangeIndex); - auto count = (uint32_t) typeLayout->getBindingRangeBindingCount(bindingRangeIndex); - slang::TypeLayoutReflection* slangLeafTypeLayout = - typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); - - // A sub-object range can either represent a sub-object of a known - // type, like a `ConstantBuffer<Foo>` or `ParameterBlock<Foo>` - // (in which case we can pre-compute a layout to use, based on - // the type `Foo`) *or* it can represent a sub-object of some - // existential type (e.g., `IBar`) in which case we cannot - // know the appropraite type/layout of sub-object to allocate. - // - RefPtr<ShaderObjectLayoutImpl> subObjectLayout; - if (slangBindingType == slang::BindingType::ExistentialValue) - { - if(auto pendingTypeLayout = slangLeafTypeLayout->getPendingDataTypeLayout()) - { - createForElementType( - m_renderer, - pendingTypeLayout, - subObjectLayout.writeRef()); - } - } - else - { - createForElementType( - m_renderer, - slangLeafTypeLayout->getElementTypeLayout(), - subObjectLayout.writeRef()); - } - - SubObjectRangeInfo subObjectRange; - subObjectRange.bindingRangeIndex = bindingRangeIndex; - subObjectRange.layout = subObjectLayout; - - // The Slang reflection API stors offset information for sub-object ranges, - // and we care about *some* of that information: in particular, we need - // the offset of sub-objects in terms of uniform/ordinary data for the - // cases where we need to fill in "pending" data in our ordinary buffer. - // - subObjectRange.offset = SubObjectRangeOffset(typeLayout->getSubObjectRangeOffset(r)); - subObjectRange.stride = SubObjectRangeStride(slangLeafTypeLayout); - - // The remaining offset information is computed based on the counters - // we are generating here, which depend only on the in-memory layout - // decisions being made in our implementation. Remember that the - // `register` and `space` values coming from DXBC/DXIL do *not* - // dictate the in-memory layout we use. - // - // Note: One subtle point here is that the `.rootParam` offset we are computing - // here does *not* include any root parameters that would be allocated - // for the parent object type itself (e.g., for descriptor tables - // used if it were bound as a parameter block). The later logic when - // we actually go to bind things will need to apply those offsets. - // - // Note: An even *more* subtle point is that the `.resource` offset - // being computed here *does* include the resource descriptor allocated - // for holding the ordinary data buffer, if any. The implications of - // this for later offset math is subtle. - // - subObjectRange.offset.rootParam = m_childRootParameterCount; - subObjectRange.offset.resource = m_totalCounts.resource; - subObjectRange.offset.sampler = m_totalCounts.sampler; - - // Along with the offset information, we also need to compute the - // "stride" between consecutive sub-objects in the range. The actual - // size/stride of a single object depends on the type of range we - // are dealing with. - // - BindingOffset objectCounts; - switch(slangBindingType) - { - default: - { - // We only treat buffers of interface types as actual sub-object binding range. - auto bindingRangeTypeLayout = - typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); - if (!bindingRangeTypeLayout) - continue; - auto elementType = - typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex) - ->getElementTypeLayout(); - if (!elementType) - continue; - if (elementType->getKind() != slang::TypeReflection::Kind::Interface) - { - continue; - } - } - break; - - case slang::BindingType::ConstantBuffer: - { - SLANG_ASSERT(subObjectLayout); - - // The resource and sampler descriptors of a nested - // constant buffer will "leak" into those of the - // parent type, and we need to account for them - // whenever we allocate storage. - // - objectCounts.resource = subObjectLayout->getTotalResourceDescriptorCount(); - objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); - objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); - } - break; - - case slang::BindingType::ParameterBlock: - { - SLANG_ASSERT(subObjectLayout); - - // In contrast to a constant buffer, a parameter block can hide - // the resource and sampler descriptor allocation it uses (since they - // are allocated into the tables that make up the parameter block. - // - // The only resource usage that leaks into the surrounding context - // is the number of root parameters consumed. - // - objectCounts.rootParam = subObjectRange.layout->getTotalRootTableParameterCount(); - } - break; - - case slang::BindingType::ExistentialValue: - // An unspecialized existential/interface value cannot consume any resources - // as part of the parent object (it needs to fit inside the fixed-size - // represnetation of existential types). - // - // However, if we are statically specializing to a type that doesn't "fit" - // we may need to account for additional information that needs to be - // allocaated. - // - if(subObjectLayout) - { - // The ordinary data for an existential-type value is allocated into - // the same buffer as the parent object, so we only want to consider - // the resource descriptors *other than* the ordinary data buffer. - // - // Otherwise the logic here is identical to the constant buffer case. - // - objectCounts.resource = subObjectLayout->getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer(); - objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); - objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); - - // Note: In the implementation for some other graphics API (e.g., Vulkan) there - // needs to be more work done to handle the fact that "pending" data from - // interface-type sub-objects get allocated to a distinct offset after all the - // "primary" data. We are consciously ignoring that issue here, and the physical - // layout of a shader object into the D3D12 binding state may end up interleaving - // resources/samplers for "primary" and "pending" data. - // - // If this choice ever causes issues, we can revisit the approach here. - - // An interface-type range that includes ordinary data can - // increase the size of the ordinary data buffer we need to - // allocate for the parent object. - // - uint32_t ordinaryDataEnd = subObjectRange.offset.pendingOrdinaryData - + (uint32_t) count * subObjectRange.stride.pendingOrdinaryData; - - if(ordinaryDataEnd > m_totalOrdinaryDataSize) - { - m_totalOrdinaryDataSize = ordinaryDataEnd; - } - } - break; - } - - // Once we've computed the usage for each object in the range, we can - // easily compute the usage for the entire range. - // - auto rangeResourceCount = count * objectCounts.resource; - auto rangeSamplerCount = count * objectCounts.sampler; - auto rangeRootParamCount = count * objectCounts.rootParam; - - m_totalCounts.resource += rangeResourceCount; - m_totalCounts.sampler += rangeSamplerCount; - m_childRootParameterCount += rangeRootParamCount; - - m_subObjectRanges.add(subObjectRange); - } - - // Once we have added up the resource usage from all the sub-objects - // we can look at the total number of resources and samplers that - // need to be bound as part of this objects descriptor tables and - // that will allow us to decide whether we need to allocate a root - // parameter for a resource table or not, ans similarly for a - // sampler table. - // - if(m_totalCounts.resource) m_ownCounts.rootParam++; - if(m_totalCounts.sampler) m_ownCounts.rootParam++; - - m_totalCounts.rootParam = m_ownCounts.rootParam + m_childRootParameterCount; - - return SLANG_OK; - } - - SlangResult build(ShaderObjectLayoutImpl** outLayout) - { - auto layout = RefPtr<ShaderObjectLayoutImpl>(new ShaderObjectLayoutImpl()); - SLANG_RETURN_ON_FAIL(layout->_init(this)); - - returnRefPtrMove(outLayout, layout); - return SLANG_OK; - } - }; - - static Result createForElementType( - RendererBase* renderer, - slang::TypeLayoutReflection* elementType, - ShaderObjectLayoutImpl** outLayout) - { - Builder builder(renderer); - builder.setElementTypeLayout(elementType); - return builder.build(outLayout); - } - - List<BindingRangeInfo> const& getBindingRanges() { return m_bindingRanges; } - - Index getBindingRangeCount() { return m_bindingRanges.getCount(); } - - BindingRangeInfo const& getBindingRange(Index index) { return m_bindingRanges[index]; } - - uint32_t getResourceSlotCount() { return m_ownCounts.resource; } - uint32_t getSamplerSlotCount() { return m_ownCounts.sampler; } - Index getSubObjectSlotCount() { return m_subObjectCount; } - Index getSubObjectCount() { return m_subObjectCount; } - - uint32_t getTotalResourceDescriptorCount() { return m_totalCounts.resource; } - uint32_t getTotalSamplerDescriptorCount() { return m_totalCounts.sampler; } - - uint32_t getOrdinaryDataBufferCount() { return m_totalOrdinaryDataSize ? 1 : 0; } - bool hasOrdinaryDataBuffer() { return m_totalOrdinaryDataSize != 0; } - - uint32_t getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer() { return m_totalCounts.resource - getOrdinaryDataBufferCount(); } - - uint32_t getOwnUserRootParameterCount() { return (uint32_t)m_rootParamsInfo.getCount(); } - uint32_t getTotalRootTableParameterCount() { return m_totalCounts.rootParam; } - uint32_t getChildRootParameterCount() { return m_childRootParameterCount; } - - uint32_t getTotalOrdinaryDataSize() const { return m_totalOrdinaryDataSize; } - - SubObjectRangeInfo const& getSubObjectRange(Index index) - { - return m_subObjectRanges[index]; - } - List<SubObjectRangeInfo> const& getSubObjectRanges() { return m_subObjectRanges; } - - RendererBase* getRenderer() { return m_renderer; } - - slang::TypeReflection* getType() { return m_elementTypeLayout->getType(); } - - const RootParameterInfo& getRootParameterInfo(Index index) - { - return m_rootParamsInfo[index]; - } - - protected: - Result _init(Builder* builder) - { - auto renderer = builder->m_renderer; - - initBase(renderer, builder->m_elementTypeLayout); - - m_containerType = builder->m_containerType; - - m_bindingRanges = _Move(builder->m_bindingRanges); - m_subObjectRanges = _Move(builder->m_subObjectRanges); - m_rootParamsInfo = _Move(builder->m_rootParamsInfo); - - m_ownCounts = builder->m_ownCounts; - m_totalCounts = builder->m_totalCounts; - m_subObjectCount = builder->m_subObjectCount; - m_childRootParameterCount = builder->m_childRootParameterCount; - m_totalOrdinaryDataSize = builder->m_totalOrdinaryDataSize; - - return SLANG_OK; - } - - List<BindingRangeInfo> m_bindingRanges; - List<SubObjectRangeInfo> m_subObjectRanges; - List<RootParameterInfo> m_rootParamsInfo; - - BindingOffset m_ownCounts; - BindingOffset m_totalCounts; - - uint32_t m_subObjectCount = 0; - uint32_t m_childRootParameterCount = 0; - - uint32_t m_totalOrdinaryDataSize = 0; - }; - - class RootShaderObjectLayoutImpl : public ShaderObjectLayoutImpl - { - typedef ShaderObjectLayoutImpl Super; - - public: - struct EntryPointInfo - { - RefPtr<ShaderObjectLayoutImpl> layout; - BindingOffset offset; - }; - - struct Builder : Super::Builder - { - Builder( - RendererBase* renderer, - slang::IComponentType* program, - slang::ProgramLayout* programLayout) - : Super::Builder(renderer) - , m_program(program) - , m_programLayout(programLayout) - {} - - Result build(RootShaderObjectLayoutImpl** outLayout) - { - RefPtr<RootShaderObjectLayoutImpl> layout = new RootShaderObjectLayoutImpl(); - SLANG_RETURN_ON_FAIL(layout->_init(this)); - - returnRefPtrMove(outLayout, layout); - return SLANG_OK; - } - - void addGlobalParams(slang::VariableLayoutReflection* globalsLayout) - { - setElementTypeLayout(globalsLayout->getTypeLayout()); - } - - void addEntryPoint(SlangStage stage, ShaderObjectLayoutImpl* entryPointLayout) - { - EntryPointInfo info; - info.layout = entryPointLayout; - - info.offset.resource = m_totalCounts.resource; - info.offset.sampler = m_totalCounts.sampler; - info.offset.rootParam = m_childRootParameterCount; - - m_totalCounts.resource += entryPointLayout->getTotalResourceDescriptorCount(); - m_totalCounts.sampler += entryPointLayout->getTotalSamplerDescriptorCount(); - - // TODO(tfoley): Check this to make sure it is reasonable... - m_childRootParameterCount += entryPointLayout->getChildRootParameterCount(); - - m_entryPoints.add(info); - } - - slang::IComponentType* m_program; - slang::ProgramLayout* m_programLayout; - List<EntryPointInfo> m_entryPoints; - }; - - EntryPointInfo& getEntryPoint(Index index) { return m_entryPoints[index]; } - - List<EntryPointInfo>& getEntryPoints() { return m_entryPoints; } - - struct DescriptorSetLayout - { - List<D3D12_DESCRIPTOR_RANGE> m_resourceRanges; - List<D3D12_DESCRIPTOR_RANGE> m_samplerRanges; - uint32_t m_resourceCount = 0; - uint32_t m_samplerCount = 0; - }; - - struct RootSignatureDescBuilder - { - D3D12Device* m_device; - - RootSignatureDescBuilder(D3D12Device* device) - : m_device(device) - {} - - // We will use one descriptor set for the global scope and one additional - // descriptor set for each `ParameterBlock` binding range in the shader object - // hierarchy, regardless of the shader's `space` indices. - List<DescriptorSetLayout> m_descriptorSets; - List<D3D12_ROOT_PARAMETER> m_rootParameters; - D3D12_ROOT_SIGNATURE_DESC m_rootSignatureDesc = {}; - - static Result translateDescriptorRangeType( - slang::BindingType c, - D3D12_DESCRIPTOR_RANGE_TYPE* outType) - { - switch (c) - { - case slang::BindingType::ConstantBuffer: - *outType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - return SLANG_OK; - case slang::BindingType::RawBuffer: - case slang::BindingType::Texture: - case slang::BindingType::TypedBuffer: - case slang::BindingType::RayTracingAccelerationStructure: - *outType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - return SLANG_OK; - case slang::BindingType::MutableRawBuffer: - case slang::BindingType::MutableTexture: - case slang::BindingType::MutableTypedBuffer: - *outType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - return SLANG_OK; - case slang::BindingType::Sampler: - *outType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - return SLANG_OK; - default: - return SLANG_FAIL; - } - } - - /// Stores offset information to apply to the reflected register/space for a descriptor range. - /// - struct BindingRegisterOffset - { - uint32_t spaceOffset = 0; // The `space` index as specified in shader. - - enum { kRangeTypeCount = 4 }; - - /// An offset to apply for each D3D12 register class, as given - /// by a `D3D12_DESCRIPTOR_RANGE_TYPE`. - /// - /// Note that the `D3D12_DESCRIPTOR_RANGE_TYPE` enumeration has - /// values between 0 and 3, inclusive. - /// - uint32_t offsetForRangeType[kRangeTypeCount] = {0, 0, 0, 0}; - - uint32_t& operator[](D3D12_DESCRIPTOR_RANGE_TYPE type) - { - return offsetForRangeType[int(type)]; - } - - uint32_t operator[](D3D12_DESCRIPTOR_RANGE_TYPE type) const - { - return offsetForRangeType[int(type)]; - } - - BindingRegisterOffset() - {} - - BindingRegisterOffset(slang::VariableLayoutReflection* varLayout) - { - if(varLayout) - { - spaceOffset = (UINT) varLayout->getOffset(SLANG_PARAMETER_CATEGORY_REGISTER_SPACE); - offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_CBV] = (UINT) varLayout->getOffset(SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER); - offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_SRV] = (UINT) varLayout->getOffset(SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE); - offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_UAV] = (UINT) varLayout->getOffset(SLANG_PARAMETER_CATEGORY_UNORDERED_ACCESS); - offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER] = (UINT) varLayout->getOffset(SLANG_PARAMETER_CATEGORY_SAMPLER_STATE); - } - } - - void operator+=(BindingRegisterOffset const& other) - { - spaceOffset += other.spaceOffset; - for(int i = 0; i < kRangeTypeCount; ++i) - { - offsetForRangeType[i] += other.offsetForRangeType[i]; - } - } - - }; - - struct BindingRegisterOffsetPair - { - BindingRegisterOffset primary; - BindingRegisterOffset pending; - - BindingRegisterOffsetPair() - {} - - BindingRegisterOffsetPair(slang::VariableLayoutReflection* varLayout) - : primary(varLayout) - , pending(varLayout->getPendingDataLayout()) - {} - - void operator+=(BindingRegisterOffsetPair const& other) - { - primary += other.primary; - pending += other.pending; - } - }; - /// Add a new descriptor set to the layout being computed. - /// - /// Note that a "descriptor set" in the layout may amount to - /// zero, one, or two different descriptor *tables* in the - /// final D3D12 root signature. Each descriptor set may - /// contain zero or more view ranges (CBV/SRV/UAV) and zero - /// or more sampler ranges. It maps to a view descriptor table - /// if the number of view ranges is non-zero and to a sampler - /// descriptor table if the number of sampler ranges is non-zero. - /// - uint32_t addDescriptorSet() - { - auto result = (uint32_t) m_descriptorSets.getCount(); - m_descriptorSets.add(DescriptorSetLayout{}); - return result; - } - - Result addDescriptorRange( - Index physicalDescriptorSetIndex, - D3D12_DESCRIPTOR_RANGE_TYPE rangeType, - UINT registerIndex, - UINT spaceIndex, - UINT count, - bool isRootParameter) - { - if (isRootParameter) - { - D3D12_ROOT_PARAMETER rootParam = {}; - switch (rangeType) - { - case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: - rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; - break; - case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: - rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; - break; - default: - getDebugCallback()->handleMessage( - DebugMessageType::Error, - DebugMessageSource::Layer, - "A shader parameter marked as root parameter is neither SRV nor UAV."); - return SLANG_FAIL; - } - rootParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - rootParam.Descriptor.RegisterSpace = spaceIndex; - rootParam.Descriptor.ShaderRegister = registerIndex; - m_rootParameters.add(rootParam); - return SLANG_OK; - } - - auto& descriptorSet = m_descriptorSets[physicalDescriptorSetIndex]; - - D3D12_DESCRIPTOR_RANGE range = {}; - range.RangeType = rangeType; - range.NumDescriptors = count; - range.BaseShaderRegister = registerIndex; - range.RegisterSpace = spaceIndex; - range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - - if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER) - { - descriptorSet.m_samplerRanges.add(range); - descriptorSet.m_samplerCount += range.NumDescriptors; - } - else - { - descriptorSet.m_resourceRanges.add(range); - descriptorSet.m_resourceCount += range.NumDescriptors; - } - - return SLANG_OK; - } - /// Add one descriptor range as specified in Slang reflection information to the layout. - /// - /// The layout information is taken from `typeLayout` for the descriptor - /// range with the given `descriptorRangeIndex` within the logical - /// descriptor set (reflected by Slang) with the given `logicalDescriptorSetIndex`. - /// - /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of - /// the descriptor set that the range should be added to. - /// - /// The `offset` encodes information about space and/or register offsets that - /// should be applied to descrptor ranges. - /// - /// This operation can fail if the given descriptor range encodes a range that - /// doesn't map to anything directly supported by D3D12. Higher-level routines - /// will often want to ignore such failures. - /// - Result addDescriptorRange( - slang::TypeLayoutReflection* typeLayout, - Index physicalDescriptorSetIndex, - BindingRegisterOffset const& containerOffset, - BindingRegisterOffset const& elementOffset, - Index logicalDescriptorSetIndex, - Index descriptorRangeIndex, - bool isRootParameter) - { - auto bindingType = typeLayout->getDescriptorSetDescriptorRangeType(logicalDescriptorSetIndex, descriptorRangeIndex); - auto count = typeLayout->getDescriptorSetDescriptorRangeDescriptorCount(logicalDescriptorSetIndex, descriptorRangeIndex); - auto index = typeLayout->getDescriptorSetDescriptorRangeIndexOffset(logicalDescriptorSetIndex, descriptorRangeIndex); - auto space = typeLayout->getDescriptorSetSpaceOffset(logicalDescriptorSetIndex); - - D3D12_DESCRIPTOR_RANGE_TYPE rangeType; - SLANG_RETURN_ON_FAIL(translateDescriptorRangeType(bindingType, &rangeType)); - - return addDescriptorRange( - physicalDescriptorSetIndex, - rangeType, - (UINT)index + elementOffset[rangeType], - (UINT)space + containerOffset.spaceOffset, - (UINT)count, - isRootParameter); - } - - /// Add one binding range to the computed layout. - /// - /// The layout information is taken from `typeLayout` for the binding - /// range with the given `bindingRangeIndex`. - /// - /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of - /// the descriptor set that the range should be added to. - /// - /// The `offset` encodes information about space and/or register offsets that - /// should be applied to descrptor ranges. - /// - /// Note that a single binding range may encompass zero or more descriptor ranges. - /// - void addBindingRange( - slang::TypeLayoutReflection* typeLayout, - Index physicalDescriptorSetIndex, - BindingRegisterOffset const& containerOffset, - BindingRegisterOffset const& elementOffset, - Index bindingRangeIndex) - { - auto logicalDescriptorSetIndex = typeLayout->getBindingRangeDescriptorSetIndex(bindingRangeIndex); - auto firstDescriptorRangeIndex = typeLayout->getBindingRangeFirstDescriptorRangeIndex(bindingRangeIndex); - Index descriptorRangeCount = typeLayout->getBindingRangeDescriptorRangeCount(bindingRangeIndex); - bool isRootParameter = isBindingRangeRootParameter( - m_device->slangContext.globalSession, - m_device->m_extendedDesc.rootParameterShaderAttributeName, - typeLayout, - bindingRangeIndex); - for( Index i = 0; i < descriptorRangeCount; ++i ) - { - auto descriptorRangeIndex = firstDescriptorRangeIndex + i; - - // Note: we ignore the `Result` returned by `addDescriptorRange()` because we - // want to silently skip any ranges that represent kinds of bindings that - // don't actually exist in D3D12. - // - addDescriptorRange( - typeLayout, - physicalDescriptorSetIndex, - containerOffset, - elementOffset, - logicalDescriptorSetIndex, - descriptorRangeIndex, - isRootParameter); - } - } - - void addAsValue( - slang::VariableLayoutReflection* varLayout, - Index physicalDescriptorSetIndex) - { - BindingRegisterOffsetPair offset(varLayout); - addAsValue(varLayout->getTypeLayout(), physicalDescriptorSetIndex, offset, offset); - } - - - /// Add binding ranges and parameter blocks to the root signature. - /// - /// The layout information is taken from `typeLayout` which should - /// be a layout for either a program or an entry point. - /// - /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of - /// the descriptor set that binding ranges not belonging to nested - /// parameter blocks should be added to. - /// - /// The `offset` encodes information about space and/or register offsets that - /// should be applied to descrptor ranges. - /// - void addAsConstantBuffer( - slang::TypeLayoutReflection* typeLayout, - Index physicalDescriptorSetIndex, - BindingRegisterOffsetPair const& containerOffset, - BindingRegisterOffsetPair const& elementOffset) - { - if(typeLayout->getSize(SLANG_PARAMETER_CATEGORY_UNIFORM) != 0) - { - auto descriptorRangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - auto& offsetForRangeType = containerOffset.primary.offsetForRangeType[descriptorRangeType]; - addDescriptorRange( - physicalDescriptorSetIndex, - descriptorRangeType, - offsetForRangeType, - containerOffset.primary.spaceOffset, - 1, - false); - } - - addAsValue(typeLayout, physicalDescriptorSetIndex, containerOffset, elementOffset); - } - - void addAsValue( - slang::TypeLayoutReflection* typeLayout, - Index physicalDescriptorSetIndex, - BindingRegisterOffsetPair const& containerOffset, - BindingRegisterOffsetPair const& elementOffset) - { - // Our first task is to add the binding ranges for stuff that is - // directly contained in `typeLayout` rather than via sub-objects. - // - // Our goal is to have the descriptors for directly-contained views/samplers - // always be contiguous in CPU and GPU memory, so that we can write - // to them easily with a single operaiton. - // - Index bindingRangeCount = typeLayout->getBindingRangeCount(); - for (Index bindingRangeIndex = 0; bindingRangeIndex < bindingRangeCount; bindingRangeIndex++) - { - // We will look at the type of each binding range and intentionally - // skip those that represent sub-objects. - // - auto bindingType = typeLayout->getBindingRangeType(bindingRangeIndex); - switch(bindingType) - { - case slang::BindingType::ConstantBuffer: - case slang::BindingType::ParameterBlock: - case slang::BindingType::ExistentialValue: - continue; - - default: - break; - } - - // For binding ranges that don't represent sub-objects, we will add - // all of the descriptor ranges they encompass to the root signature. - // - addBindingRange( - typeLayout, - physicalDescriptorSetIndex, - containerOffset.primary, - elementOffset.primary, - bindingRangeIndex); - } - - // Next we need to recursively include everything bound via sub-objects - Index subObjectRangeCount = typeLayout->getSubObjectRangeCount(); - for (Index subObjectRangeIndex = 0; subObjectRangeIndex < subObjectRangeCount; subObjectRangeIndex++) - { - auto bindingRangeIndex = typeLayout->getSubObjectRangeBindingRangeIndex(subObjectRangeIndex); - auto bindingType = typeLayout->getBindingRangeType(bindingRangeIndex); - - auto subObjectTypeLayout = typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); - - BindingRegisterOffsetPair subObjectRangeContainerOffset = containerOffset; - subObjectRangeContainerOffset += BindingRegisterOffsetPair( - typeLayout->getSubObjectRangeOffset(subObjectRangeIndex)); - BindingRegisterOffsetPair subObjectRangeElementOffset = elementOffset; - subObjectRangeElementOffset += BindingRegisterOffsetPair( - typeLayout->getSubObjectRangeOffset(subObjectRangeIndex)); - - switch(bindingType) - { - case slang::BindingType::ConstantBuffer: - { - auto containerVarLayout = subObjectTypeLayout->getContainerVarLayout(); - SLANG_ASSERT(containerVarLayout); - - auto elementVarLayout = subObjectTypeLayout->getElementVarLayout(); - SLANG_ASSERT(elementVarLayout); - - auto elementTypeLayout = elementVarLayout->getTypeLayout(); - SLANG_ASSERT(elementTypeLayout); - - BindingRegisterOffsetPair containerOffset = subObjectRangeContainerOffset; - containerOffset += BindingRegisterOffsetPair(containerVarLayout); - - BindingRegisterOffsetPair elementOffset = subObjectRangeElementOffset; - elementOffset += BindingRegisterOffsetPair(elementVarLayout); - - addAsConstantBuffer(elementTypeLayout, physicalDescriptorSetIndex, containerOffset, elementOffset); - } - break; - - case slang::BindingType::ParameterBlock: - { - auto containerVarLayout = subObjectTypeLayout->getContainerVarLayout(); - SLANG_ASSERT(containerVarLayout); - - auto elementVarLayout = subObjectTypeLayout->getElementVarLayout(); - SLANG_ASSERT(elementVarLayout); - - auto elementTypeLayout = elementVarLayout->getTypeLayout(); - SLANG_ASSERT(elementTypeLayout); - - BindingRegisterOffsetPair subDescriptorSetOffset; - subDescriptorSetOffset.primary.spaceOffset = - subObjectRangeElementOffset.primary.spaceOffset; - subDescriptorSetOffset.pending.spaceOffset = - subObjectRangeElementOffset.pending.spaceOffset; - - auto subPhysicalDescriptorSetIndex = addDescriptorSet(); - - BindingRegisterOffsetPair containerOffset = subDescriptorSetOffset; - containerOffset += BindingRegisterOffsetPair(containerVarLayout); - - BindingRegisterOffsetPair elementOffset = subDescriptorSetOffset; - elementOffset += BindingRegisterOffsetPair(elementVarLayout); - - addAsConstantBuffer(elementTypeLayout, subPhysicalDescriptorSetIndex, containerOffset, elementOffset); - } - break; - - case slang::BindingType::ExistentialValue: - { - // Any nested binding ranges in the sub-object will "leak" into the - // binding ranges for the surrounding context. - // - auto specializedTypeLayout = subObjectTypeLayout->getPendingDataTypeLayout(); - if(specializedTypeLayout) - { - BindingRegisterOffsetPair pendingOffset; - pendingOffset.primary = subObjectRangeElementOffset.pending; - - addAsValue( - specializedTypeLayout, - physicalDescriptorSetIndex, - pendingOffset, - pendingOffset); - } - } - break; - } - } - -// BindingRegisterOffsetPair pendingOffset; -// pendingOffset.primary = offset.pending; -// addPendingResourceBindingRanges(typeLayout, physicalDescriptorSetIndex, pendingOffset); - } - - D3D12_ROOT_SIGNATURE_DESC& build() - { - for (Index i = 0; i < m_descriptorSets.getCount(); i++) - { - auto& descriptorSet = m_descriptorSets[i]; -// D3D12Device::DescriptorSetInfo setInfo; -// setInfo.resourceDescriptorCount = descriptorSet.m_resourceCount; -// setInfo.samplerDescriptorCount = descriptorSet.m_samplerCount; -// outRootDescriptorSetInfos.add(setInfo); - if (descriptorSet.m_resourceRanges.getCount()) - { - D3D12_ROOT_PARAMETER rootParam = {}; - rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - rootParam.DescriptorTable.NumDescriptorRanges = - (UINT)descriptorSet.m_resourceRanges.getCount(); - rootParam.DescriptorTable.pDescriptorRanges = - descriptorSet.m_resourceRanges.getBuffer(); - m_rootParameters.add(rootParam); - } - if (descriptorSet.m_samplerRanges.getCount()) - { - D3D12_ROOT_PARAMETER rootParam = {}; - rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - rootParam.DescriptorTable.NumDescriptorRanges = - (UINT)descriptorSet.m_samplerRanges.getCount(); - rootParam.DescriptorTable.pDescriptorRanges = - descriptorSet.m_samplerRanges.getBuffer(); - m_rootParameters.add(rootParam); - } - } - - m_rootSignatureDesc.NumParameters = UINT(m_rootParameters.getCount()); - m_rootSignatureDesc.pParameters = m_rootParameters.getBuffer(); - - // TODO: static samplers should be reasonably easy to support... - m_rootSignatureDesc.NumStaticSamplers = 0; - m_rootSignatureDesc.pStaticSamplers = nullptr; - - // TODO: only set this flag if needed (requires creating root - // signature at same time as pipeline state...). - // - m_rootSignatureDesc.Flags = - D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; - - return m_rootSignatureDesc; - } - }; - - static Result createRootSignatureFromSlang( - D3D12Device* device, - RootShaderObjectLayoutImpl* rootLayout, - slang::IComponentType* program, - ID3D12RootSignature** outRootSignature, - ID3DBlob** outError) - { - // We are going to build up the root signature by adding - // binding/descritpor ranges and nested parameter blocks - // based on the computed layout information for `program`. - // - RootSignatureDescBuilder builder(device); - auto layout = program->getLayout(); - - // The layout information computed by Slang breaks up shader - // parameters into what we can think of as "logical" descriptor - // sets based on whether or not parameters have the same `space`. - // - // We want to basically ignore that decomposition and generate a - // single descriptor set to hold all top-level parameters, and only - // generate distinct descriptor sets when the shader has opted in - // via explicit parameter blocks. - // - // To achieve this goal, we will manually allocate a default descriptor - // set for root parameters in our signature, and then recursively - // add all the binding/descriptor ranges implied by the global-scope - // parameters. - // - auto rootDescriptorSetIndex = builder.addDescriptorSet(); - builder.addAsValue(layout->getGlobalParamsVarLayout(), rootDescriptorSetIndex); - - for (SlangUInt i = 0; i < layout->getEntryPointCount(); i++) - { - // Entry-point parameters should also be added to the default root - // descriptor set. - // - // We add the parameters using the "variable layout" for the entry point - // and not just its type layout, to ensure that any offset information is - // applied correctly to the `register` and `space` information for entry-point - // parameters. - // - // Note: When we start to support DXR we will need to handle entry-point parameters - // differently because they will need to map to local root signatures rather than - // being included in the global root signature as is being done here. - // - auto entryPoint = layout->getEntryPointByIndex(i); - builder.addAsValue(entryPoint->getVarLayout(), rootDescriptorSetIndex); - } - - auto& rootSignatureDesc = builder.build(); - - ComPtr<ID3DBlob> signature; - ComPtr<ID3DBlob> error; - if (SLANG_FAILED(device->m_D3D12SerializeRootSignature( - &rootSignatureDesc, - D3D_ROOT_SIGNATURE_VERSION_1, - signature.writeRef(), - error.writeRef()))) - { - getDebugCallback()->handleMessage(DebugMessageType::Error, DebugMessageSource::Layer, "error: D3D12SerializeRootSignature failed"); - if (error) - { - getDebugCallback()->handleMessage( - DebugMessageType::Error, - DebugMessageSource::Driver, - (const char*)error->GetBufferPointer()); - if (outError) - returnComPtr(outError, error); - } - return SLANG_FAIL; - } - - SLANG_RETURN_ON_FAIL(device->m_device->CreateRootSignature( - 0, - signature->GetBufferPointer(), - signature->GetBufferSize(), - IID_PPV_ARGS(outRootSignature))); - return SLANG_OK; - } - - static Result create( - D3D12Device* device, - slang::IComponentType* program, - slang::ProgramLayout* programLayout, - RootShaderObjectLayoutImpl** outLayout, - ID3DBlob** outError) - { - RootShaderObjectLayoutImpl::Builder builder(device, program, programLayout); - builder.addGlobalParams(programLayout->getGlobalParamsVarLayout()); - - SlangInt entryPointCount = programLayout->getEntryPointCount(); - for (SlangInt e = 0; e < entryPointCount; ++e) - { - auto slangEntryPoint = programLayout->getEntryPointByIndex(e); - RefPtr<ShaderObjectLayoutImpl> entryPointLayout; - SLANG_RETURN_ON_FAIL(ShaderObjectLayoutImpl::createForElementType( - device, slangEntryPoint->getTypeLayout(), entryPointLayout.writeRef())); - builder.addEntryPoint(slangEntryPoint->getStage(), entryPointLayout); - } - - RefPtr<RootShaderObjectLayoutImpl> layout; - SLANG_RETURN_ON_FAIL(builder.build(layout.writeRef())); - - if (program->getSpecializationParamCount() == 0) - { - // For root object, we would like know the union of all binding slots - // including all sub-objects in the shader-object hierarchy, so at - // parameter binding time we can easily know how many GPU descriptor tables - // to create without walking through the shader-object hierarchy again. - // We build out this array along with root signature construction and store - // it in `m_gpuDescriptorSetInfos`. - SLANG_RETURN_ON_FAIL(createRootSignatureFromSlang( - device, - layout, - program, - layout->m_rootSignature.writeRef(), - outError)); - } - - *outLayout = layout.detach(); - - return SLANG_OK; - } - - slang::IComponentType* getSlangProgram() const { return m_program; } - slang::ProgramLayout* getSlangProgramLayout() const { return m_programLayout; } - - protected: - Result _init(Builder* builder) - { - auto renderer = builder->m_renderer; - - SLANG_RETURN_ON_FAIL(Super::_init(builder)); - - m_program = builder->m_program; - m_programLayout = builder->m_programLayout; - m_entryPoints = builder->m_entryPoints; - return SLANG_OK; - } - - ComPtr<slang::IComponentType> m_program; - slang::ProgramLayout* m_programLayout = nullptr; - - List<EntryPointInfo> m_entryPoints; - - public: - ComPtr<ID3D12RootSignature> m_rootSignature; -// List<DescriptorSetInfo> m_gpuDescriptorSetInfos; - }; - - struct ShaderBinary - { - SlangStage stage; - slang::EntryPointReflection* entryPointInfo; - String actualEntryPointNameInAPI; - List<uint8_t> code; - }; - - class ShaderProgramImpl : public ShaderProgramBase - { - public: - RefPtr<RootShaderObjectLayoutImpl> m_rootObjectLayout; - List<ShaderBinary> m_shaders; - - virtual Result createShaderModule( - slang::EntryPointReflection* entryPointInfo, ComPtr<ISlangBlob> kernelCode) override - { - ShaderBinary shaderBin; - shaderBin.stage = entryPointInfo->getStage(); - shaderBin.entryPointInfo = entryPointInfo; - shaderBin.code.addRange( - reinterpret_cast<const uint8_t*>(kernelCode->getBufferPointer()), - (Index)kernelCode->getBufferSize()); - m_shaders.add(_Move(shaderBin)); - return SLANG_OK; - } - }; - - class ShaderObjectImpl - : public ShaderObjectBaseImpl< - ShaderObjectImpl, - ShaderObjectLayoutImpl, - SimpleShaderObjectData> - { - typedef ShaderObjectBaseImpl< - ShaderObjectImpl, - ShaderObjectLayoutImpl, - SimpleShaderObjectData> - Super; - public: - static Result create( - D3D12Device* device, - ShaderObjectLayoutImpl* layout, - ShaderObjectImpl** outShaderObject) - { - auto object = RefPtr<ShaderObjectImpl>(new ShaderObjectImpl()); - SLANG_RETURN_ON_FAIL( - object->init(device, layout, device->m_cpuViewHeap.Ptr(), device->m_cpuSamplerHeap.Ptr())); - returnRefPtrMove(outShaderObject, object); - return SLANG_OK; - } - - ~ShaderObjectImpl() - { - m_descriptorSet.freeIfSupported(); - } - - RendererBase* getDevice() { return m_device.get(); } - - SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() SLANG_OVERRIDE { return 0; } - - SLANG_NO_THROW Result SLANG_MCALL getEntryPoint(UInt index, IShaderObject** outEntryPoint) - SLANG_OVERRIDE - { - *outEntryPoint = nullptr; - return SLANG_OK; - } - - virtual SLANG_NO_THROW const void* SLANG_MCALL getRawData() override - { - return m_data.getBuffer(); - } - - virtual SLANG_NO_THROW size_t SLANG_MCALL getSize() override - { - return (size_t)m_data.getCount(); - } - - SLANG_NO_THROW Result SLANG_MCALL - setData(ShaderOffset const& inOffset, void const* data, size_t inSize) SLANG_OVERRIDE - { - Index offset = inOffset.uniformOffset; - Index size = inSize; - - char* dest = m_data.getBuffer(); - Index availableSize = m_data.getCount(); - - // TODO: We really should bounds-check access rather than silently ignoring sets - // that are too large, but we have several test cases that set more data than - // an object actually stores on several targets... - // - if (offset < 0) - { - size += offset; - offset = 0; - } - if ((offset + size) >= availableSize) - { - size = availableSize - offset; - } - - memcpy(dest + offset, data, size); - - m_isConstantBufferDirty = true; - - m_version++; - - return SLANG_OK; - } - - SLANG_NO_THROW Result SLANG_MCALL - setObject(ShaderOffset const& offset, IShaderObject* object) SLANG_OVERRIDE - { - SLANG_RETURN_ON_FAIL(Super::setObject(offset, object)); - if (m_isMutable) - { - auto subObjectIndex = getSubObjectIndex(offset); - if (subObjectIndex >= m_subObjectVersions.getCount()) - m_subObjectVersions.setCount(subObjectIndex + 1); - m_subObjectVersions[subObjectIndex] = - static_cast<ShaderObjectImpl*>(object)->m_version; - m_version++; - } - return SLANG_OK; - } - - SLANG_NO_THROW Result SLANG_MCALL - setResource(ShaderOffset const& offset, IResourceView* resourceView) SLANG_OVERRIDE; - - SLANG_NO_THROW Result SLANG_MCALL - setSampler(ShaderOffset const& offset, ISamplerState* sampler) SLANG_OVERRIDE - { - if (offset.bindingRangeIndex < 0) - return SLANG_E_INVALID_ARG; - auto layout = getLayout(); - if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) - return SLANG_E_INVALID_ARG; - auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); - auto samplerImpl = static_cast<SamplerStateImpl*>(sampler); - ID3D12Device* d3dDevice = static_cast<D3D12Device*>(getDevice())->m_device; - d3dDevice->CopyDescriptorsSimple( - 1, - m_descriptorSet.samplerTable.getCpuHandle( - bindingRange.baseIndex + - (int32_t)offset.bindingArrayIndex), - samplerImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - m_version++; - return SLANG_OK; - } - - SLANG_NO_THROW Result SLANG_MCALL setCombinedTextureSampler( - ShaderOffset const& offset, - IResourceView* textureView, - ISamplerState* sampler) SLANG_OVERRIDE - { -#if 0 - if (offset.bindingRangeIndex < 0) - return SLANG_E_INVALID_ARG; - auto layout = getLayout(); - if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) - return SLANG_E_INVALID_ARG; - auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); - auto resourceViewImpl = static_cast<ResourceViewImpl*>(textureView); - ID3D12Device* d3dDevice = static_cast<D3D12Device*>(getDevice())->m_device; - d3dDevice->CopyDescriptorsSimple( - 1, - m_resourceHeap.getCpuHandle( - m_descriptorSet.m_resourceTable + - bindingRange.binding.offsetInDescriptorTable.resource + - (int32_t)offset.bindingArrayIndex), - resourceViewImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - auto samplerImpl = static_cast<SamplerStateImpl*>(sampler); - d3dDevice->CopyDescriptorsSimple( - 1, - m_samplerHeap.getCpuHandle( - m_descriptorSet.m_samplerTable + - bindingRange.binding.offsetInDescriptorTable.sampler + - (int32_t)offset.bindingArrayIndex), - samplerImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); -#endif - m_version++; - return SLANG_OK; - } - protected: - Result init( - D3D12Device* device, - ShaderObjectLayoutImpl* layout, - DescriptorHeapReference viewHeap, - DescriptorHeapReference samplerHeap) - { - m_device = device; - - m_layout = layout; - - m_cachedTransientHeap = nullptr; - m_cachedTransientHeapVersion = 0; - m_isConstantBufferDirty = true; - - // If the layout tells us that there is any uniform data, - // then we will allocate a CPU memory buffer to hold that data - // while it is being set from the host. - // - // Once the user is done setting the parameters/fields of this - // shader object, we will produce a GPU-memory version of the - // uniform data (which includes values from this object and - // any existential-type sub-objects). - // - size_t uniformSize = layout->getElementTypeLayout()->getSize(); - if (uniformSize) - { - m_data.setCount(uniformSize); - memset(m_data.getBuffer(), 0, uniformSize); - } - m_rootArguments.setCount(layout->getOwnUserRootParameterCount()); - memset( - m_rootArguments.getBuffer(), - 0, - sizeof(D3D12_GPU_VIRTUAL_ADDRESS) * m_rootArguments.getCount()); - // Each shader object will own CPU descriptor heap memory - // for any resource or sampler descriptors it might store - // as part of its value. - // - // This allocate includes a reservation for any constant - // buffer descriptor pertaining to the ordinary data, - // but does *not* include any descriptors that are managed - // as part of sub-objects. - // - if (auto resourceCount = layout->getResourceSlotCount()) - { - m_descriptorSet.resourceTable.allocate(viewHeap, resourceCount); - - // We must also ensure that the memory for any resources - // referenced by descriptors in this object does not get - // freed while the object is still live. - // - m_boundResources.setCount(resourceCount); - } - if (auto samplerCount = layout->getSamplerSlotCount()) - { - m_descriptorSet.samplerTable.allocate(samplerHeap, samplerCount); - } - - // If the layout specifies that we have any sub-objects, then - // we need to size the array to account for them. - // - Index subObjectCount = layout->getSubObjectSlotCount(); - m_objects.setCount(subObjectCount); - - for (auto subObjectRangeInfo : layout->getSubObjectRanges()) - { - auto subObjectLayout = subObjectRangeInfo.layout; - - // In the case where the sub-object range represents an - // existential-type leaf field (e.g., an `IBar`), we - // cannot pre-allocate the object(s) to go into that - // range, since we can't possibly know what to allocate - // at this point. - // - if (!subObjectLayout) - continue; - // - // Otherwise, we will allocate a sub-object to fill - // in each entry in this range, based on the layout - // information we already have. - - auto& bindingRangeInfo = - layout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); - for (uint32_t i = 0; i < bindingRangeInfo.count; ++i) - { - RefPtr<ShaderObjectImpl> subObject; - SLANG_RETURN_ON_FAIL( - ShaderObjectImpl::create(device, subObjectLayout, subObject.writeRef())); - m_objects[bindingRangeInfo.subObjectIndex + i] = subObject; - } - } - - return SLANG_OK; - } - - /// Write the uniform/ordinary data of this object into the given `dest` buffer at the given - /// `offset` - Result _writeOrdinaryData( - PipelineCommandEncoder* encoder, - BufferResourceImpl* buffer, - size_t offset, - size_t destSize, - ShaderObjectLayoutImpl* specializedLayout) - { - auto src = m_data.getBuffer(); - auto srcSize = size_t(m_data.getCount()); - - SLANG_ASSERT(srcSize <= destSize); - - _uploadBufferData(encoder->m_device, encoder->m_d3dCmdList, encoder->m_transientHeap, buffer, offset, srcSize, src); - - // In the case where this object has any sub-objects of - // existential/interface type, we need to recurse on those objects - // that need to write their state into an appropriate "pending" allocation. - // - // Note: Any values that could fit into the "payload" included - // in the existential-type field itself will have already been - // written as part of `setObject()`. This loop only needs to handle - // those sub-objects that do not "fit." - // - // An implementers looking at this code might wonder if things could be changed - // so that *all* writes related to sub-objects for interface-type fields could - // be handled in this one location, rather than having some in `setObject()` and - // others handled here. - // - Index subObjectRangeCounter = 0; - for (auto const& subObjectRangeInfo : specializedLayout->getSubObjectRanges()) - { - Index subObjectRangeIndex = subObjectRangeCounter++; - auto const& bindingRangeInfo = - specializedLayout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); - - // We only need to handle sub-object ranges for interface/existential-type fields, - // because fields of constant-buffer or parameter-block type are responsible for - // the ordinary/uniform data of their own existential/interface-type sub-objects. - // - if (bindingRangeInfo.bindingType != slang::BindingType::ExistentialValue) - continue; - - // Each sub-object range represents a single "leaf" field, but might be nested - // under zero or more outer arrays, such that the number of existential values - // in the same range can be one or more. - // - auto count = bindingRangeInfo.count; - - // We are not concerned with the case where the existential value(s) in the range - // git into the payload part of the leaf field. - // - // In the case where the value didn't fit, the Slang layout strategy would have - // considered the requirements of the value as a "pending" allocation, and would - // allocate storage for the ordinary/uniform part of that pending allocation inside - // of the parent object's type layout. - // - // Here we assume that the Slang reflection API can provide us with a single byte - // offset and stride for the location of the pending data allocation in the - // specialized type layout, which will store the values for this sub-object range. - // - // TODO: The reflection API functions we are assuming here haven't been implemented - // yet, so the functions being called here are stubs. - // - // TODO: It might not be that a single sub-object range can reliably map to a single - // contiguous array with a single stride; we need to carefully consider what the - // layout logic does for complex cases with multiple layers of nested arrays and - // structures. - // - size_t subObjectRangePendingDataOffset = subObjectRangeInfo.offset.pendingOrdinaryData; - size_t subObjectRangePendingDataStride = subObjectRangeInfo.stride.pendingOrdinaryData; - - // If the range doesn't actually need/use the "pending" allocation at all, then - // we need to detect that case and skip such ranges. - // - // TODO: This should probably be handled on a per-object basis by caching a "does it - // fit?" bit as part of the information for bound sub-objects, given that we already - // compute the "does it fit?" status as part of `setObject()`. - // - if (subObjectRangePendingDataOffset == 0) - continue; - - for (uint32_t i = 0; i < count; ++i) - { - auto subObject = m_objects[bindingRangeInfo.subObjectIndex + i]; - - RefPtr<ShaderObjectLayoutImpl> subObjectLayout; - SLANG_RETURN_ON_FAIL( - subObject->getSpecializedLayout(subObjectLayout.writeRef())); - - auto subObjectOffset = - subObjectRangePendingDataOffset + i * subObjectRangePendingDataStride; - - subObject->_writeOrdinaryData( - encoder, - buffer, - offset + subObjectOffset, - destSize - subObjectOffset, - subObjectLayout); - } - } - - return SLANG_OK; - } - - bool shouldAllocateConstantBuffer(TransientResourceHeapImpl* transientHeap) - { - if (m_isConstantBufferDirty || m_cachedTransientHeap != transientHeap || - m_cachedTransientHeapVersion != transientHeap->getVersion()) - { - return true; - } - return false; - } - - /// Ensure that the `m_ordinaryDataBuffer` has been created, if it is needed - Result _ensureOrdinaryDataBufferCreatedIfNeeded( - PipelineCommandEncoder* encoder, - ShaderObjectLayoutImpl* specializedLayout) - { - // If data has been changed since last allocation/filling of constant buffer, - // we will need to allocate a new one. - // - if (!shouldAllocateConstantBuffer(encoder->m_transientHeap)) - { - return SLANG_OK; - } - m_isConstantBufferDirty = false; - m_cachedTransientHeap = encoder->m_transientHeap; - m_cachedTransientHeapVersion = encoder->m_transientHeap->getVersion(); - - // Computing the size of the ordinary data buffer is *not* just as simple - // as using the size of the `m_ordinayData` array that we store. The reason - // for the added complexity is that interface-type fields may lead to the - // storage being specialized such that it needs extra appended data to - // store the concrete values that logically belong in those interface-type - // fields but wouldn't fit in the fixed-size allocation we gave them. - // - m_constantBufferSize = specializedLayout->getTotalOrdinaryDataSize(); - if (m_constantBufferSize == 0) - { - return SLANG_OK; - } - - // Once we have computed how large the buffer should be, we can allocate - // it from the transient resource heap. - // - auto alignedConstantBufferSize = D3DUtil::calcAligned(m_constantBufferSize, 256); - SLANG_RETURN_ON_FAIL(encoder->m_commandBuffer->m_transientHeap->allocateConstantBuffer( - alignedConstantBufferSize, m_constantBufferWeakPtr, m_constantBufferOffset)); - - // Once the buffer is allocated, we can use `_writeOrdinaryData` to fill it in. - // - // Note that `_writeOrdinaryData` is potentially recursive in the case - // where this object contains interface/existential-type fields, so we - // don't need or want to inline it into this call site. - // - SLANG_RETURN_ON_FAIL(_writeOrdinaryData( - encoder, - static_cast<BufferResourceImpl*>(m_constantBufferWeakPtr), - m_constantBufferOffset, - m_constantBufferSize, - specializedLayout)); - - { - // We also create and store a descriptor for our root constant buffer - // into the descriptor table allocation that was reserved for them. - // - // We always know that the ordinary data buffer will be the first descriptor - // in the table of resource views. - // - auto descriptorTable = m_descriptorSet.resourceTable; - D3D12_CONSTANT_BUFFER_VIEW_DESC viewDesc = {}; - viewDesc.BufferLocation = static_cast<BufferResourceImpl*>(m_constantBufferWeakPtr) - ->m_resource.getResource() - ->GetGPUVirtualAddress() + - m_constantBufferOffset; - viewDesc.SizeInBytes = (UINT)alignedConstantBufferSize; - encoder->m_device->CreateConstantBufferView( - &viewDesc, descriptorTable.getCpuHandle()); - } - - return SLANG_OK; - } - - public: - void updateSubObjectsRecursive() - { - if (!m_isMutable) - return; - auto& subObjectRanges = getLayout()->getSubObjectRanges(); - for (Slang::Index subObjectRangeIndex = 0; - subObjectRangeIndex < subObjectRanges.getCount(); - subObjectRangeIndex++) - { - auto const& subObjectRange = subObjectRanges[subObjectRangeIndex]; - auto const& bindingRange = - getLayout()->getBindingRange(subObjectRange.bindingRangeIndex); - Slang::Index count = bindingRange.count; - - for (Slang::Index subObjectIndexInRange = 0; subObjectIndexInRange < count; - subObjectIndexInRange++) - { - Slang::Index objectIndex = bindingRange.subObjectIndex + subObjectIndexInRange; - auto subObject = m_objects[objectIndex].Ptr(); - if (!subObject) - continue; - subObject->updateSubObjectsRecursive(); - if (m_subObjectVersions[objectIndex] != m_objects[objectIndex]->m_version) - { - ShaderOffset offset; - offset.bindingRangeIndex = subObjectRange.bindingRangeIndex; - offset.bindingArrayIndex = subObjectIndexInRange; - setObject(offset, subObject); - } - } - } - } - /// Prepare to bind this object as a parameter block. - /// - /// This involves allocating and binding any descriptor tables necessary - /// to to store the state of the object. The function returns a descriptor - /// set formed from any table(s) allocated. In addition, the `ioOffset` - /// parameter will be adjusted to be correct for binding values into - /// the resulting descriptor set. - /// - /// Returns: - /// SLANG_OK when successful, - /// SLANG_E_OUT_OF_MEMORY when descriptor heap is full. - /// - Result prepareToBindAsParameterBlock( - BindingContext* context, - BindingOffset& ioOffset, - ShaderObjectLayoutImpl* specializedLayout, - DescriptorSet& outDescriptorSet) - { - auto transientHeap = context->transientHeap; - auto submitter = context->submitter; - - // When writing into the new descriptor set, resource and sampler - // descriptors will need to start at index zero in the respective - // tables. - // - ioOffset.resource = 0; - ioOffset.sampler = 0; - - // The index of the next root parameter to bind will be maintained, - // but needs to be incremented by the number of descriptor tables - // we allocate (zero or one resource table and zero or one sampler - // table). - // - auto& rootParamIndex = ioOffset.rootParam; - - if (auto descriptorCount = specializedLayout->getTotalResourceDescriptorCount()) - { - // There is a non-zero number of resource descriptors needed, - // so we will allocate a table out of the appropriate heap, - // and store it into the appropriate part of `descriptorSet`. - // - auto descriptorHeap = &transientHeap->getCurrentViewHeap(); - auto& table = outDescriptorSet.resourceTable; - - // Allocate the table. - // - if (!table.allocate(descriptorHeap, descriptorCount)) - { - context->outOfMemoryHeap = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - return SLANG_E_OUT_OF_MEMORY; - } - - // Bind the table to the pipeline, consuming the next available - // root parameter. - // - auto tableRootParamIndex = rootParamIndex++; - submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); - } - if (auto descriptorCount = specializedLayout->getTotalSamplerDescriptorCount()) - { - // There is a non-zero number of sampler descriptors needed, - // so we will allocate a table out of the appropriate heap, - // and store it into the appropriate part of `descriptorSet`. - // - auto descriptorHeap = &transientHeap->getCurrentSamplerHeap(); - auto& table = outDescriptorSet.samplerTable; - - // Allocate the table. - // - if (!table.allocate(descriptorHeap, descriptorCount)) - { - context->outOfMemoryHeap = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; - return SLANG_E_OUT_OF_MEMORY; - } - - // Bind the table to the pipeline, consuming the next available - // root parameter. - // - auto tableRootParamIndex = rootParamIndex++; - submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); - } - - return SLANG_OK; - } - - bool checkIfCachedDescriptorSetIsValidRecursive(BindingContext* context) - { - if (shouldAllocateConstantBuffer(context->transientHeap)) - return false; - if (m_isMutable && m_version != m_cachedGPUDescriptorSetVersion) - return false; - if (m_cachedGPUDescriptorSet.resourceTable.getDescriptorCount() != 0 && - m_cachedGPUDescriptorSet.resourceTable.m_heap.ptr.linearHeap->getHeap() != - m_cachedTransientHeap->getCurrentViewHeap().getHeap()) - return false; - if (m_cachedGPUDescriptorSet.samplerTable.getDescriptorCount() != 0 && - m_cachedGPUDescriptorSet.samplerTable.m_heap.ptr.linearHeap->getHeap() != - m_cachedTransientHeap->getCurrentSamplerHeap().getHeap()) - return false; - - auto& subObjectRanges = getLayout()->getSubObjectRanges(); - for (Slang::Index subObjectRangeIndex = 0; - subObjectRangeIndex < subObjectRanges.getCount(); - subObjectRangeIndex++) - { - auto const& subObjectRange = subObjectRanges[subObjectRangeIndex]; - auto const& bindingRange = - getLayout()->getBindingRange(subObjectRange.bindingRangeIndex); - if (bindingRange.bindingType != slang::BindingType::ParameterBlock) - continue; - Slang::Index count = bindingRange.count; - - for (Slang::Index subObjectIndexInRange = 0; subObjectIndexInRange < count; - subObjectIndexInRange++) - { - Slang::Index objectIndex = bindingRange.subObjectIndex + subObjectIndexInRange; - auto subObject = m_objects[objectIndex].Ptr(); - if (!subObject) - continue; - if (subObject->checkIfCachedDescriptorSetIsValidRecursive(context)) - return false; - } - } - return true; - } - - /// Bind this object as a `ParameterBlock<X>` - Result bindAsParameterBlock( - BindingContext* context, - BindingOffset const& offset, - ShaderObjectLayoutImpl* specializedLayout) - { - if (checkIfCachedDescriptorSetIsValidRecursive(context)) - { - // If we already have a valid gpu descriptor table in the current - // heap, bind it. - auto rootParamIndex = offset.rootParam; - if (m_cachedGPUDescriptorSet.resourceTable.getDescriptorCount()) - { - auto tableRootParamIndex = rootParamIndex++; - context->submitter->setRootDescriptorTable( - tableRootParamIndex, m_cachedGPUDescriptorSet.resourceTable.getGpuHandle()); - } - if (m_cachedGPUDescriptorSet.samplerTable.getDescriptorCount()) - { - auto tableRootParamIndex = rootParamIndex++; - context->submitter->setRootDescriptorTable( - tableRootParamIndex, m_cachedGPUDescriptorSet.samplerTable.getGpuHandle()); - } - return SLANG_OK; - } - - // The first step to binding an object as a parameter block is to allocate a descriptor - // set (consisting of zero or one resource descriptor table and zero or one sampler - // descriptor table) to represent its values. - // - BindingOffset subOffset = offset; - SLANG_RETURN_ON_FAIL(prepareToBindAsParameterBlock( - context, /* inout */ subOffset, specializedLayout, m_cachedGPUDescriptorSet)); - - // Next we bind the object into that descriptor set as if it were being used - // as a `ConstantBuffer<X>`. - // - SLANG_RETURN_ON_FAIL(bindAsConstantBuffer( - context, m_cachedGPUDescriptorSet, subOffset, specializedLayout)); - - m_cachedGPUDescriptorSetVersion = m_version; - return SLANG_OK; - } - - /// Bind this object as a `ConstantBuffer<X>` - Result bindAsConstantBuffer( - BindingContext* context, - DescriptorSet const& descriptorSet, - BindingOffset const& offset, - ShaderObjectLayoutImpl* specializedLayout) - { - // If we are to bind as a constant buffer we first need to ensure that - // the ordinary data buffer is created, if this object needs one. - // - SLANG_RETURN_ON_FAIL(_ensureOrdinaryDataBufferCreatedIfNeeded(context->encoder, specializedLayout)); - - // Next, we need to bind all of the resource descriptors for this object - // (including any ordinary data buffer) into the provided `descriptorSet`. - // - auto resourceCount = specializedLayout->getResourceSlotCount(); - if(resourceCount) - { - auto& dstTable = descriptorSet.resourceTable; - auto& srcTable = m_descriptorSet.resourceTable; - - context->device->m_device->CopyDescriptorsSimple( - UINT(resourceCount), - dstTable.getCpuHandle(offset.resource), - srcTable.getCpuHandle(), - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - } - - // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, - // since the logic is shared with the `bindAsValue()` case below. - // - SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, offset, specializedLayout)); - return SLANG_OK; - } - - /// Bind this object as a value (for an interface-type parameter) - Result bindAsValue( - BindingContext* context, - DescriptorSet const& descriptorSet, - BindingOffset const& offset, - ShaderObjectLayoutImpl* specializedLayout) - { - // When binding a value for an interface-type field we do *not* want - // to bind a buffer for the ordinary data (if there is any) because - // ordinary data for interface-type fields gets allocated into the - // parent object's ordinary data buffer. - // - // This CPU-memory descriptor table that holds resource descriptors - // will have already been allocated to have space for an ordinary data - // buffer (if needed), so we need to take care to skip over that - // descriptor when copying descriptors from the CPU-memory set - // to the GPU-memory `descriptorSet`. - // - auto skipResourceCount = specializedLayout->getOrdinaryDataBufferCount(); - auto resourceCount = specializedLayout->getResourceSlotCount() - skipResourceCount; - if(resourceCount) - { - auto& dstTable = descriptorSet.resourceTable; - auto& srcTable = m_descriptorSet.resourceTable; - - context->device->m_device->CopyDescriptorsSimple( - UINT(resourceCount), - dstTable.getCpuHandle(offset.resource), - srcTable.getCpuHandle(skipResourceCount), - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - } - - // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, - // since the logic is shared with the `bindAsConstantBuffer()` case above. - // - // Note: Just like we had to do some subtle handling of the ordinary data buffer - // above, here we need to contend with the fact that the `offset.resource` fields - // computed for sub-object ranges were baked to take the ordinary data buffer - // into account, so that if `skipResourceCount` is non-zero then they are all - // too high by `skipResourceCount`. - // - // We will address the problem here by computing a modified offset that adjusts - // for the ordinary data buffer that we have not bound after all. - // - BindingOffset subOffset = offset; - subOffset.resource -= skipResourceCount; - SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, subOffset, specializedLayout)); - return SLANG_OK; - } - - /// Shared logic for `bindAsConstantBuffer()` and `bindAsValue()` - Result _bindImpl( - BindingContext* context, - DescriptorSet const& descriptorSet, - BindingOffset const& offset, - ShaderObjectLayoutImpl* specializedLayout) - { - // We start by binding all the sampler decriptors, if needed. - // - // Note: resource descriptors were handled in either `bindAsConstantBuffer()` - // or `bindAsValue()` before calling into `_bindImpl()`. - // - if (auto samplerCount = specializedLayout->getSamplerSlotCount()) - { - auto& dstTable = descriptorSet.samplerTable; - auto& srcTable = m_descriptorSet.samplerTable; - - context->device->m_device->CopyDescriptorsSimple( - UINT(samplerCount), - dstTable.getCpuHandle(offset.sampler), - srcTable.getCpuHandle(), - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - } - - // Next we iterate over the sub-object ranges and bind anything they require. - // - auto& subObjectRanges = specializedLayout->getSubObjectRanges(); - auto subObjectRangeCount = subObjectRanges.getCount(); - for (Index i = 0; i < subObjectRangeCount; i++) - { - auto& subObjectRange = specializedLayout->getSubObjectRange(i); - auto& bindingRange = specializedLayout->getBindingRange(subObjectRange.bindingRangeIndex); - auto subObjectIndex = bindingRange.subObjectIndex; - auto subObjectLayout = subObjectRange.layout.Ptr(); - - BindingOffset rangeOffset = offset; - rangeOffset += subObjectRange.offset; - - BindingOffset rangeStride = subObjectRange.stride; - - switch(bindingRange.bindingType) - { - case slang::BindingType::ConstantBuffer: - { - auto objOffset = rangeOffset; - for (uint32_t j = 0; j < bindingRange.count; j++) - { - auto& object = m_objects[subObjectIndex + j]; - SLANG_RETURN_ON_FAIL(object->bindAsConstantBuffer(context, descriptorSet, objOffset, subObjectLayout)); - objOffset += rangeStride; - } - } - break; - - case slang::BindingType::ParameterBlock: - { - auto objOffset = rangeOffset; - for (uint32_t j = 0; j < bindingRange.count; j++) - { - auto& object = m_objects[subObjectIndex + j]; - SLANG_RETURN_ON_FAIL(object->bindAsParameterBlock(context, objOffset, subObjectLayout)); - objOffset += rangeStride; - } - } - break; - - case slang::BindingType::ExistentialValue: - if(subObjectLayout) - { - auto objOffset = rangeOffset; - for (uint32_t j = 0; j < bindingRange.count; j++) - { - auto& object = m_objects[subObjectIndex + j]; - SLANG_RETURN_ON_FAIL(object->bindAsValue(context, descriptorSet, objOffset, subObjectLayout)); - objOffset += rangeStride; - } - } - break; - } - } - - return SLANG_OK; - } - - Result bindRootArguments(BindingContext* context, uint32_t& index) - { - auto layoutImpl = getLayout(); - for (Index i = 0; i < m_rootArguments.getCount(); i++) - { - switch (layoutImpl->getRootParameterInfo(i).type) - { - case IResourceView::Type::ShaderResource: - case IResourceView::Type::AccelerationStructure: - context->submitter->setRootSRV(index, m_rootArguments[i]); - break; - case IResourceView::Type::UnorderedAccess: - context->submitter->setRootUAV(index, m_rootArguments[i]); - break; - default: - continue; - } - index++; - } - for (auto& subObject : m_objects) - { - if (subObject) - { - SLANG_RETURN_ON_FAIL(subObject->bindRootArguments(context, index)); - } - } - return SLANG_OK; - } - /// A CPU-memory descriptor set holding any descriptors used to represent the resources/samplers in this object's state - DescriptorSet m_descriptorSet; - /// A cached descriptor set on GPU heap. - DescriptorSet m_cachedGPUDescriptorSet; - - ShortList<RefPtr<Resource>, 8> m_boundResources; - List<D3D12_GPU_VIRTUAL_ADDRESS> m_rootArguments; - /// A constant buffer used to stored ordinary data for this object - /// and existential-type sub-objects. - /// - /// Allocated from transient heap on demand with `_createOrdinaryDataBufferIfNeeded()` - IBufferResource* m_constantBufferWeakPtr = nullptr; - size_t m_constantBufferOffset = 0; - size_t m_constantBufferSize = 0; - - /// Dirty bit tracking whether the constant buffer needs to be updated. - bool m_isConstantBufferDirty = true; - /// The transient heap from which the constant buffer and descriptor set is allocated. - TransientResourceHeapImpl* m_cachedTransientHeap; - /// The version of the transient heap when the constant buffer and descriptor set is allocated. - uint64_t m_cachedTransientHeapVersion; - - /// Whether this shader object is allowed to be mutable. - bool m_isMutable = false; - /// The version of a mutable shader object. - uint32_t m_version = 0; - /// The version of this mutable shader object when the gpu descriptor table is cached. - uint32_t m_cachedGPUDescriptorSetVersion = -1; - /// The versions of bound subobjects. - List<uint32_t> m_subObjectVersions; - - /// Get the layout of this shader object with specialization arguments considered - /// - /// This operation should only be called after the shader object has been - /// fully filled in and finalized. - /// - Result getSpecializedLayout(ShaderObjectLayoutImpl** outLayout) - { - if (!m_specializedLayout) - { - SLANG_RETURN_ON_FAIL(_createSpecializedLayout(m_specializedLayout.writeRef())); - } - returnRefPtr(outLayout, m_specializedLayout); - return SLANG_OK; - } - - /// Create the layout for this shader object with specialization arguments considered - /// - /// This operation is virtual so that it can be customized by `RootShaderObject`. - /// - virtual Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) - { - ExtendedShaderObjectType extendedType; - SLANG_RETURN_ON_FAIL(getSpecializedShaderObjectType(&extendedType)); - - auto renderer = getRenderer(); - RefPtr<ShaderObjectLayoutImpl> layout; - SLANG_RETURN_ON_FAIL(renderer->getShaderObjectLayout( - extendedType.slangType, - m_layout->getContainerType(), - (ShaderObjectLayoutBase**)layout.writeRef())); - - returnRefPtrMove(outLayout, layout); - return SLANG_OK; - } - - RefPtr<ShaderObjectLayoutImpl> m_specializedLayout; - }; - - class RootShaderObjectImpl : public ShaderObjectImpl - { - typedef ShaderObjectImpl Super; - - public: - // Override default reference counting behavior to disable lifetime management via ComPtr. - // Root objects are managed by command buffer and does not need to be freed by the user. - SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 1; } - SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 1; } - public: - RootShaderObjectLayoutImpl* getLayout() - { - return static_cast<RootShaderObjectLayoutImpl*>(m_layout.Ptr()); - } - - UInt SLANG_MCALL getEntryPointCount() SLANG_OVERRIDE - { - return (UInt)m_entryPoints.getCount(); - } - SlangResult SLANG_MCALL getEntryPoint(UInt index, IShaderObject** outEntryPoint) - SLANG_OVERRIDE - { - returnComPtr(outEntryPoint, m_entryPoints[index]); - return SLANG_OK; - } - - virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override - { - SLANG_RETURN_ON_FAIL(ShaderObjectImpl::collectSpecializationArgs(args)); - for (auto& entryPoint : m_entryPoints) - { - SLANG_RETURN_ON_FAIL(entryPoint->collectSpecializationArgs(args)); - } - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL - copyFrom(IShaderObject* object, ITransientResourceHeap* transientHeap) override - { - if (auto srcObj = dynamic_cast<MutableRootShaderObjectImpl*>(object)) - { - *this = *srcObj; - return SLANG_OK; - } - return SLANG_FAIL; - } - - public: - Result bindAsRoot( - BindingContext* context, - RootShaderObjectLayoutImpl* specializedLayout) - { - // Pull updates from sub-objects when this is a mutable root shader object. - updateSubObjectsRecursive(); - - // A root shader object always binds as if it were a parameter block, - // insofar as it needs to allocate a descriptor set to hold the bindings - // for its own state and any sub-objects. - // - // Note: We do not direclty use `bindAsParameterBlock` here because we also - // need to bind the entry points into the same descriptor set that is - // being used for the root object. - - BindingOffset rootOffset; - - // Bind all root parameters first. - Super::bindRootArguments(context, rootOffset.rootParam); - - DescriptorSet descriptorSet; - SLANG_RETURN_ON_FAIL(prepareToBindAsParameterBlock( - context, /* inout */ rootOffset, specializedLayout, descriptorSet)); - - SLANG_RETURN_ON_FAIL(Super::bindAsConstantBuffer(context, descriptorSet, rootOffset, specializedLayout)); - - auto entryPointCount = m_entryPoints.getCount(); - for (Index i = 0; i < entryPointCount; ++i) - { - auto entryPoint = m_entryPoints[i]; - auto& entryPointInfo = specializedLayout->getEntryPoint(i); - - auto entryPointOffset = rootOffset; - entryPointOffset += entryPointInfo.offset; - - entryPoint->updateSubObjectsRecursive(); - - SLANG_RETURN_ON_FAIL(entryPoint->bindAsConstantBuffer(context, descriptorSet, entryPointOffset, entryPointInfo.layout)); - } - - return SLANG_OK; - } - - public: - - Result init(D3D12Device* device) - { - return SLANG_OK; - } - - Result resetImpl( - D3D12Device* device, - RootShaderObjectLayoutImpl* layout, - DescriptorHeapReference viewHeap, - DescriptorHeapReference samplerHeap, - bool isMutable) - { - SLANG_RETURN_ON_FAIL(Super::init(device, layout, viewHeap, samplerHeap)); - m_isMutable = isMutable; - m_specializedLayout = nullptr; - m_entryPoints.clear(); - for (auto entryPointInfo : layout->getEntryPoints()) - { - RefPtr<ShaderObjectImpl> entryPoint; - SLANG_RETURN_ON_FAIL( - ShaderObjectImpl::create(device, entryPointInfo.layout, entryPoint.writeRef())); - entryPoint->m_isMutable = isMutable; - m_entryPoints.add(entryPoint); - } - return SLANG_OK; - } - - Result reset( - D3D12Device* device, - RootShaderObjectLayoutImpl* layout, - TransientResourceHeapImpl* heap) - { - return resetImpl( - device, layout, &heap->m_stagingCpuViewHeap, &heap->m_stagingCpuSamplerHeap, false); - } - - protected: - Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) SLANG_OVERRIDE - { - ExtendedShaderObjectTypeList specializationArgs; - SLANG_RETURN_ON_FAIL(collectSpecializationArgs(specializationArgs)); - - // Note: There is an important policy decision being made here that we need - // to approach carefully. - // - // We are doing two different things that affect the layout of a program: - // - // 1. We are *composing* one or more pieces of code (notably the shared global/module - // stuff and the per-entry-point stuff). - // - // 2. We are *specializing* code that includes generic/existential parameters - // to concrete types/values. - // - // We need to decide the relative *order* of these two steps, because of how it impacts - // layout. The layout for `specialize(compose(A,B), X, Y)` is potentially different - // form that of `compose(specialize(A,X), speciealize(B,Y))`, even when both are - // semantically equivalent programs. - // - // Right now we are using the first option: we are first generating a full composition - // of all the code we plan to use (global scope plus all entry points), and then - // specializing it to the concatenated specialization argumenst for all of that. - // - // In some cases, though, this model isn't appropriate. For example, when dealing with - // ray-tracing shaders and local root signatures, we really want the parameters of each - // entry point (actually, each entry-point *group*) to be allocated distinct storage, - // which really means we want to compute something like: - // - // SpecializedGlobals = specialize(compose(ModuleA, ModuleB, ...), X, Y, ...) - // - // SpecializedEP1 = compose(SpecializedGlobals, specialize(EntryPoint1, T, U, ...)) - // SpecializedEP2 = compose(SpecializedGlobals, specialize(EntryPoint2, A, B, ...)) - // - // Note how in this case all entry points agree on the layout for the shared/common - // parmaeters, but their layouts are also independent of one another. - // - // Furthermore, in this example, loading another entry point into the system would not - // rquire re-computing the layouts (or generated kernel code) for any of the entry - // points that had already been loaded (in contrast to a compose-then-specialize - // approach). - // - ComPtr<slang::IComponentType> specializedComponentType; - ComPtr<slang::IBlob> diagnosticBlob; - auto result = getLayout()->getSlangProgram()->specialize( - specializationArgs.components.getArrayView().getBuffer(), - specializationArgs.getCount(), - specializedComponentType.writeRef(), - diagnosticBlob.writeRef()); - - if (diagnosticBlob && diagnosticBlob->getBufferSize()) - { - getDebugCallback()->handleMessage( - SLANG_FAILED(result) ? DebugMessageType::Error : DebugMessageType::Info, - DebugMessageSource::Layer, - (const char*)diagnosticBlob->getBufferPointer()); - } - - if (SLANG_FAILED(result)) - return result; - - ComPtr<ID3DBlob> d3dDiagnosticBlob; - auto slangSpecializedLayout = specializedComponentType->getLayout(); - RefPtr<RootShaderObjectLayoutImpl> specializedLayout; - auto rootLayoutResult = RootShaderObjectLayoutImpl::create( - static_cast<D3D12Device*>(getRenderer()), - specializedComponentType, - slangSpecializedLayout, - specializedLayout.writeRef(), - d3dDiagnosticBlob.writeRef()); - - if (SLANG_FAILED(rootLayoutResult)) - { - return rootLayoutResult; - } - - // Note: Computing the layout for the specialized program will have also computed - // the layouts for the entry points, and we really need to attach that information - // to them so that they don't go and try to compute their own specializations. - // - // TODO: Well, if we move to the specialization model described above then maybe - // we *will* want entry points to do their own specialization work... - // - auto entryPointCount = m_entryPoints.getCount(); - for (Index i = 0; i < entryPointCount; ++i) - { - auto entryPointInfo = specializedLayout->getEntryPoint(i); - auto entryPointVars = m_entryPoints[i]; - - entryPointVars->m_specializedLayout = entryPointInfo.layout; - } - - returnRefPtrMove(outLayout, specializedLayout); - return SLANG_OK; - } - - List<RefPtr<ShaderObjectImpl>> m_entryPoints; - }; - - class MutableRootShaderObjectImpl : public RootShaderObjectImpl - { - public: - // Override default reference counting behavior to disable lifetime management via ComPtr. - // Root objects are managed by command buffer and does not need to be freed by the user. - SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return ShaderObjectBase::addRef(); } - SLANG_NO_THROW uint32_t SLANG_MCALL release() override - { - return ShaderObjectBase::release(); - } - }; - - class ShaderTableImpl : public ShaderTableBase - { - public: - uint32_t m_rayGenTableOffset; - uint32_t m_missTableOffset; - uint32_t m_hitGroupTableOffset; - - D3D12Device* m_device; - - virtual RefPtr<BufferResource> createDeviceBuffer( - PipelineStateBase* pipeline, - TransientResourceHeapBase* transientHeap, - IResourceCommandEncoder* encoder) override - { - uint32_t raygenTableSize = m_rayGenShaderCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - uint32_t missTableSize = m_missShaderCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - uint32_t hitgroupTableSize = m_hitGroupCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - m_rayGenTableOffset = 0; - m_missTableOffset = - (uint32_t)D3DUtil::calcAligned(raygenTableSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); - m_hitGroupTableOffset = (uint32_t)D3DUtil::calcAligned( - m_missTableOffset + missTableSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); - uint32_t tableSize = m_hitGroupTableOffset + hitgroupTableSize; - - auto pipelineImpl = static_cast<RayTracingPipelineStateImpl*>(pipeline); - ComPtr<IBufferResource> bufferResource; - IBufferResource::Desc bufferDesc = {}; - bufferDesc.memoryType = gfx::MemoryType::DeviceLocal; - bufferDesc.defaultState = ResourceState::General; - bufferDesc.type = IResource::Type::Buffer; - bufferDesc.sizeInBytes = tableSize; - m_device->createBufferResource(bufferDesc, nullptr, bufferResource.writeRef()); - - ComPtr<ID3D12StateObjectProperties> stateObjectProperties; - pipelineImpl->m_stateObject->QueryInterface(stateObjectProperties.writeRef()); - - TransientResourceHeapImpl* transientHeapImpl = - static_cast<TransientResourceHeapImpl*>(transientHeap); - - IBufferResource* stagingBuffer = nullptr; - size_t stagingBufferOffset = 0; - transientHeapImpl->allocateStagingBuffer( - tableSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload); - - assert(stagingBuffer); - void* stagingPtr = nullptr; - stagingBuffer->map(nullptr, &stagingPtr); - - auto copyShaderIdInto = [&](void* dest, String& name, const ShaderRecordOverwrite& overwrite) - { - if (name.getLength()) - { - void* shaderId = stateObjectProperties->GetShaderIdentifier(name.toWString().begin()); - memcpy(dest, shaderId, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); - } - else - { - memset(dest, 0, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); - } - if (overwrite.size) - { - memcpy((uint8_t*)dest + overwrite.offset, overwrite.data, overwrite.size); - } - }; - - uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr + stagingBufferOffset; - for (uint32_t i = 0; i < m_rayGenShaderCount; i++) - { - copyShaderIdInto( - stagingBufferPtr + m_rayGenTableOffset + - D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, - m_shaderGroupNames[i], - m_recordOverwrites[i]); - } - for (uint32_t i = 0; i < m_missShaderCount; i++) - { - copyShaderIdInto( - stagingBufferPtr + m_missTableOffset + - D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, - m_shaderGroupNames[m_rayGenShaderCount + i], - m_recordOverwrites[m_rayGenShaderCount + i]); - } - for (uint32_t i = 0; i < m_hitGroupCount; i++) - { - copyShaderIdInto( - stagingBufferPtr + m_hitGroupTableOffset + - D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, - m_shaderGroupNames[m_rayGenShaderCount + m_missShaderCount + i], - m_recordOverwrites[m_rayGenShaderCount + m_missShaderCount + i]); - } - - stagingBuffer->unmap(nullptr); - encoder->copyBuffer(bufferResource, 0, stagingBuffer, stagingBufferOffset, tableSize); - encoder->bufferBarrier( - 1, - bufferResource.readRef(), - gfx::ResourceState::CopyDestination, - gfx::ResourceState::ShaderResource); - RefPtr<BufferResource> resultPtr = static_cast<BufferResource*>(bufferResource.get()); - return _Move(resultPtr); - } - - }; - - class CommandBufferImpl - : public ICommandBuffer - , public ComObject - { - public: - // There are a pair of cyclic references between a `TransientResourceHeap` and - // a `CommandBuffer` created from the heap. We need to break the cycle upon - // the public reference count of a command buffer dropping to 0. - SLANG_COM_OBJECT_IUNKNOWN_ALL - - ICommandBuffer* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer) - return static_cast<ICommandBuffer*>(this); - return nullptr; - } - virtual void comFree() override { m_transientHeap.breakStrongReference(); } - - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* handle) override - { - handle->api = InteropHandleAPI::D3D12; - handle->handleValue = (uint64_t)m_cmdList.get(); - return SLANG_OK; - } - - public: - ComPtr<ID3D12GraphicsCommandList> m_cmdList; - ComPtr<ID3D12GraphicsCommandList1> m_cmdList1; - ComPtr<ID3D12GraphicsCommandList4> m_cmdList4; - - BreakableReference<TransientResourceHeapImpl> m_transientHeap; - // Weak reference is fine here since `m_transientHeap` already holds strong reference to - // device. - D3D12Device* m_renderer; - RootShaderObjectImpl m_rootShaderObject; - RefPtr<MutableRootShaderObjectImpl> m_mutableRootShaderObject; - bool m_descriptorHeapsBound = false; - - void bindDescriptorHeaps() - { - if (!m_descriptorHeapsBound) - { - ID3D12DescriptorHeap* heaps[] = { - m_transientHeap->getCurrentViewHeap().getHeap(), - m_transientHeap->getCurrentSamplerHeap().getHeap(), - }; - m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); - m_descriptorHeapsBound = true; - } - } - - void invalidateDescriptorHeapBinding() { m_descriptorHeapsBound = false; } - - void reinit() - { - invalidateDescriptorHeapBinding(); - m_rootShaderObject.init(m_renderer); - } - - void init( - D3D12Device* renderer, - ID3D12GraphicsCommandList* d3dCommandList, - TransientResourceHeapImpl* transientHeap) - { - m_transientHeap = transientHeap; - m_renderer = renderer; - m_cmdList = d3dCommandList; - - reinit(); - -#if SLANG_GFX_HAS_DXR_SUPPORT - m_cmdList->QueryInterface<ID3D12GraphicsCommandList4>(m_cmdList4.writeRef()); - if (m_cmdList4) - { - m_cmdList1 = m_cmdList4; - return; - } -#endif - m_cmdList->QueryInterface<ID3D12GraphicsCommandList1>(m_cmdList1.writeRef()); - } - - - class ResourceCommandEncoderImpl - : public IResourceCommandEncoder - , public PipelineCommandEncoder - { - public: - virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( - IBufferResource* dst, - size_t dstOffset, - IBufferResource* src, - size_t srcOffset, - size_t size) override - { - auto dstBuffer = static_cast<BufferResourceImpl*>(dst); - auto srcBuffer = static_cast<BufferResourceImpl*>(src); - - m_commandBuffer->m_cmdList->CopyBufferRegion( - dstBuffer->m_resource.getResource(), - dstOffset, - srcBuffer->m_resource.getResource(), - srcOffset, - size); - } - virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferData( - IBufferResource* dst, size_t offset, size_t size, void* data) override - { - _uploadBufferData( - m_commandBuffer->m_renderer->m_device, - m_commandBuffer->m_cmdList, - m_commandBuffer->m_transientHeap, - static_cast<BufferResourceImpl*>(dst), - offset, - size, - data); - } - virtual SLANG_NO_THROW void SLANG_MCALL textureBarrier( - size_t count, - ITextureResource* const* textures, - ResourceState src, - ResourceState dst) override - { - ShortList<D3D12_RESOURCE_BARRIER> barriers; - - for (size_t i = 0; i < count; i++) - { - auto textureImpl = static_cast<TextureResourceImpl*>(textures[i]); - auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format); - auto textureDesc = textureImpl->getDesc(); - D3D12_RESOURCE_BARRIER barrier; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - if (src == dst && src == ResourceState::UnorderedAccess) - { - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - barrier.UAV.pResource = textureImpl->m_resource.getResource(); - } - else - { - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3DUtil::getResourceState(src); - barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); - if (barrier.Transition.StateBefore == barrier.Transition.StateAfter) - continue; - barrier.Transition.pResource = textureImpl->m_resource.getResource(); - auto planeCount = D3DUtil::getPlaneSliceCount( - D3DUtil::getMapFormat(textureImpl->getDesc()->format)); - auto arraySize = textureDesc->arraySize; - if (arraySize == 0) - arraySize = 1; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - } - barriers.add(barrier); - } - if (barriers.getCount()) - { - m_commandBuffer->m_cmdList->ResourceBarrier( - (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); - } - } - virtual SLANG_NO_THROW void SLANG_MCALL bufferBarrier( - size_t count, - IBufferResource* const* buffers, - ResourceState src, - ResourceState dst) override - { - - List<D3D12_RESOURCE_BARRIER> barriers; - barriers.reserve(count); - - for (size_t i = 0; i < count; i++) - { - auto bufferImpl = static_cast<BufferResourceImpl*>(buffers[i]); - - D3D12_RESOURCE_BARRIER barrier = {}; - // If the src == dst, it must be a UAV barrier. - barrier.Type = (src == dst && dst == ResourceState::UnorderedAccess) - ? D3D12_RESOURCE_BARRIER_TYPE_UAV - : D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - - if (barrier.Type == D3D12_RESOURCE_BARRIER_TYPE_UAV) - { - barrier.UAV.pResource = bufferImpl->m_resource; - } - else - { - barrier.Transition.pResource = bufferImpl->m_resource; - barrier.Transition.StateBefore = D3DUtil::getResourceState(src); - barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); - barrier.Transition.Subresource = 0; - if (barrier.Transition.StateAfter == barrier.Transition.StateBefore) - continue; - } - barriers.add(barrier); - } - if (barriers.getCount()) - { - m_commandBuffer->m_cmdList4->ResourceBarrier( - (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); - } - } - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} - virtual SLANG_NO_THROW void SLANG_MCALL - writeTimestamp(IQueryPool* pool, SlangInt index) override - { - static_cast<QueryPoolImpl*>(pool)->writeTimestamp( - m_commandBuffer->m_cmdList, index); - } - virtual SLANG_NO_THROW void SLANG_MCALL copyTexture( - ITextureResource* dst, - ResourceState dstState, - SubresourceRange dstSubresource, - ITextureResource::Offset3D dstOffset, - ITextureResource* src, - ResourceState srcState, - SubresourceRange srcSubresource, - ITextureResource::Offset3D srcOffset, - ITextureResource::Size extent) override - { - auto dstTexture = static_cast<TextureResourceImpl*>(dst); - auto srcTexture = static_cast<TextureResourceImpl*>(src); - - if (dstSubresource.layerCount == 0 && dstSubresource.mipLevelCount == 0 && - srcSubresource.layerCount == 0 && srcSubresource.mipLevelCount == 0) - { - m_commandBuffer->m_cmdList->CopyResource( - dstTexture->m_resource.getResource(), srcTexture->m_resource.getResource()); - return; - } - - auto d3dFormat = D3DUtil::getMapFormat(dstTexture->getDesc()->format); - auto aspectMask = (int32_t)dstSubresource.aspectMask; - if (dstSubresource.aspectMask == TextureAspect::Default) - aspectMask = (int32_t)TextureAspect::Color; - while (aspectMask) - { - auto aspect = Math::getLowestBit((int32_t)aspectMask); - aspectMask &= ~aspect; - auto planeIndex = D3DUtil::getPlaneSlice(d3dFormat, (TextureAspect)aspect); - for (uint32_t layer = 0; layer < dstSubresource.layerCount; layer++) - { - for (uint32_t mipLevel = 0; mipLevel < dstSubresource.mipLevelCount; - mipLevel++) - { - D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; - - dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dstRegion.pResource = dstTexture->m_resource.getResource(); - dstRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( - dstSubresource.mipLevel + mipLevel, - dstSubresource.baseArrayLayer + layer, - planeIndex, - dstTexture->getDesc()->numMipLevels, - dstTexture->getDesc()->arraySize); - - D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; - srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - srcRegion.pResource = srcTexture->m_resource.getResource(); - srcRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( - srcSubresource.mipLevel + mipLevel, - srcSubresource.baseArrayLayer + layer, - planeIndex, - srcTexture->getDesc()->numMipLevels, - srcTexture->getDesc()->arraySize); - - D3D12_BOX srcBox = {}; - srcBox.left = srcOffset.x; - srcBox.top = srcOffset.y; - srcBox.front = srcOffset.z; - srcBox.right = srcBox.left + extent.width; - srcBox.bottom = srcBox.top + extent.height; - srcBox.back = srcBox.front + extent.depth; - - m_commandBuffer->m_cmdList->CopyTextureRegion( - &dstRegion, - dstOffset.x, - dstOffset.y, - dstOffset.z, - &srcRegion, - &srcBox); - } - } - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL uploadTextureData( - ITextureResource* dst, - SubresourceRange subResourceRange, - ITextureResource::Offset3D offset, - ITextureResource::Size extent, - ITextureResource::SubresourceData* subResourceData, - size_t subResourceDataCount) override - { - auto dstTexture = static_cast<TextureResourceImpl*>(dst); - auto baseSubresourceIndex = D3DUtil::getSubresourceIndex( - subResourceRange.mipLevel, - subResourceRange.baseArrayLayer, - 0, - dstTexture->getDesc()->numMipLevels, - dstTexture->getDesc()->arraySize); - auto textureSize = dstTexture->getDesc()->size; - FormatInfo formatInfo = {}; - gfxGetFormatInfo(dstTexture->getDesc()->format, &formatInfo); - for (uint32_t i = 0; i < (uint32_t)subResourceDataCount; i++) - { - auto subresourceIndex = baseSubresourceIndex + i; - // Get the footprint - D3D12_RESOURCE_DESC texDesc = dstTexture->m_resource.getResource()->GetDesc(); - - D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; - - dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dstRegion.SubresourceIndex = subresourceIndex; - dstRegion.pResource = dstTexture->m_resource.getResource(); - - D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; - srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint = srcRegion.PlacedFootprint; - footprint.Offset = 0; - footprint.Footprint.Format = texDesc.Format; - uint32_t mipLevel = D3DUtil::getSubresourceMipLevel( - subresourceIndex, dstTexture->getDesc()->numMipLevels); - if (extent.width != ITextureResource::kRemainingTextureSize) - { - footprint.Footprint.Width = extent.width; - } - else - { - footprint.Footprint.Width = - Math::Max(1, (textureSize.width >> mipLevel)) - offset.x; - } - if (extent.height != ITextureResource::kRemainingTextureSize) - { - footprint.Footprint.Height = extent.height; - } - else - { - footprint.Footprint.Height = - Math::Max(1, (textureSize.height >> mipLevel)) - offset.y; - } - if (extent.depth != ITextureResource::kRemainingTextureSize) - { - footprint.Footprint.Depth = extent.depth; - } - else - { - footprint.Footprint.Depth = - Math::Max(1, (textureSize.depth >> mipLevel)) - offset.z; - } - auto rowSize = (footprint.Footprint.Width + formatInfo.blockWidth - 1) / - formatInfo.blockWidth * formatInfo.blockSizeInBytes; - auto rowCount = (footprint.Footprint.Height + formatInfo.blockHeight - 1) / - formatInfo.blockHeight; - footprint.Footprint.RowPitch = (UINT)D3DUtil::calcAligned( - rowSize, (uint32_t)D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - - auto bufferSize = - footprint.Footprint.RowPitch * rowCount * footprint.Footprint.Depth; - - IBufferResource* stagingBuffer; - size_t stagingBufferOffset = 0; - m_commandBuffer->m_transientHeap->allocateStagingBuffer( - bufferSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload, true); - assert(stagingBufferOffset == 0); - BufferResourceImpl* bufferImpl = - static_cast<BufferResourceImpl*>(stagingBuffer); - uint8_t* bufferData = nullptr; - D3D12_RANGE mapRange = {0, 0}; - bufferImpl->m_resource.getResource()->Map(0, &mapRange, (void**)&bufferData); - for (uint32_t z = 0; z < footprint.Footprint.Depth; z++) - { - auto imageStart = - bufferData + footprint.Footprint.RowPitch * rowCount * (size_t)z; - auto srcData = - (uint8_t*)subResourceData->data + subResourceData->strideZ * z; - for (uint32_t row = 0; row < rowCount; row++) - { - memcpy( - imageStart + row * (size_t)footprint.Footprint.RowPitch, - srcData + subResourceData->strideY * row, - rowSize); - } - } - bufferImpl->m_resource.getResource()->Unmap(0, nullptr); - srcRegion.pResource = bufferImpl->m_resource.getResource(); - m_commandBuffer->m_cmdList->CopyTextureRegion( - &dstRegion, offset.x, offset.y, offset.z, &srcRegion, nullptr); - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL clearResourceView( - IResourceView* view, - ClearValue* clearValue, - ClearResourceViewFlags::Enum flags) override - { - auto viewImpl = static_cast<ResourceViewImpl*>(view); - switch (view->getViewDesc()->type) - { - case IResourceView::Type::RenderTarget: - m_commandBuffer->m_cmdList->ClearRenderTargetView( - viewImpl->m_descriptor.cpuHandle, - clearValue->color.floatValues, - 0, - nullptr); - break; - case IResourceView::Type::DepthStencil: - { - D3D12_CLEAR_FLAGS clearFlags = (D3D12_CLEAR_FLAGS)0; - if (flags & ClearResourceViewFlags::ClearDepth) - { - clearFlags |= D3D12_CLEAR_FLAG_DEPTH; - } - if (flags & ClearResourceViewFlags::ClearStencil) - { - clearFlags |= D3D12_CLEAR_FLAG_STENCIL; - } - m_commandBuffer->m_cmdList->ClearDepthStencilView( - viewImpl->m_descriptor.cpuHandle, - clearFlags, - clearValue->depthStencil.depth, - (UINT8)clearValue->depthStencil.stencil, - 0, - nullptr); - break; - } - case IResourceView::Type::UnorderedAccess: - { - ID3D12Resource* d3dResource = nullptr; - switch (viewImpl->m_resource->getType()) - { - case IResource::Type::Buffer: - d3dResource = - static_cast<BufferResourceImpl*>(viewImpl->m_resource.Ptr()) - ->m_resource.getResource(); - break; - default: - d3dResource = - static_cast<TextureResourceImpl*>(viewImpl->m_resource.Ptr()) - ->m_resource.getResource(); - break; - } - auto gpuHandleIndex = - m_commandBuffer->m_transientHeap->getCurrentViewHeap().allocate(1); - if (gpuHandleIndex == -1) - { - m_commandBuffer->m_transientHeap->allocateNewViewDescriptorHeap( - m_commandBuffer->m_renderer); - gpuHandleIndex = - m_commandBuffer->m_transientHeap->getCurrentViewHeap().allocate(1); - m_commandBuffer->bindDescriptorHeaps(); - } - this->m_commandBuffer->m_renderer->m_device->CopyDescriptorsSimple( - 1, - m_commandBuffer->m_transientHeap->getCurrentViewHeap().getCpuHandle( - gpuHandleIndex), - viewImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - - if (flags & ClearResourceViewFlags::FloatClearValues) - { - m_commandBuffer->m_cmdList->ClearUnorderedAccessViewFloat( - m_commandBuffer->m_transientHeap->getCurrentViewHeap().getGpuHandle( - gpuHandleIndex), - viewImpl->m_descriptor.cpuHandle, - d3dResource, - clearValue->color.floatValues, - 0, - nullptr); - } - else - { - m_commandBuffer->m_cmdList->ClearUnorderedAccessViewUint( - m_commandBuffer->m_transientHeap->getCurrentViewHeap().getGpuHandle( - gpuHandleIndex), - viewImpl->m_descriptor.cpuHandle, - d3dResource, - clearValue->color.uintValues, - 0, - nullptr); - } - break; - } - default: - break; - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL resolveResource( - ITextureResource* source, - ResourceState sourceState, - SubresourceRange sourceRange, - ITextureResource* dest, - ResourceState destState, - SubresourceRange destRange) override - { - auto srcTexture = static_cast<TextureResourceImpl*>(source); - auto srcDesc = srcTexture->getDesc(); - auto dstTexture = static_cast<TextureResourceImpl*>(dest); - auto dstDesc = dstTexture->getDesc(); - - for (uint32_t layer = 0; layer < sourceRange.layerCount; ++layer) - { - for (uint32_t mip = 0; mip < sourceRange.mipLevelCount; ++mip) - { - auto srcSubresourceIndex = D3DUtil::getSubresourceIndex( - mip + sourceRange.mipLevel, - layer + sourceRange.baseArrayLayer, - 0, - srcDesc->numMipLevels, - srcDesc->arraySize); - auto dstSubresourceIndex = D3DUtil::getSubresourceIndex( - mip + destRange.mipLevel, - layer + destRange.baseArrayLayer, - 0, - dstDesc->numMipLevels, - dstDesc->arraySize); - - DXGI_FORMAT format = D3DUtil::getMapFormat(srcDesc->format); - - m_commandBuffer->m_cmdList->ResolveSubresource( - dstTexture->m_resource.getResource(), - dstSubresourceIndex, - srcTexture->m_resource.getResource(), - srcSubresourceIndex, - format); - } - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL resolveQuery( - IQueryPool* queryPool, - uint32_t index, - uint32_t count, - IBufferResource* buffer, - uint64_t offset) override - { - auto queryBase = static_cast<QueryPoolBase*>(queryPool); - switch (queryBase->m_desc.type) - { - case QueryType::AccelerationStructureCompactedSize: - case QueryType::AccelerationStructureCurrentSize: - case QueryType::AccelerationStructureSerializedSize: - { - auto queryPoolImpl = static_cast<PlainBufferProxyQueryPoolImpl*>(queryPool); - auto bufferImpl = static_cast<BufferResourceImpl*>(buffer); - auto srcQueryBuffer = - queryPoolImpl->m_bufferResource->m_resource.getResource(); - - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier.Transition.pResource = srcQueryBuffer; - m_commandBuffer->m_cmdList->ResourceBarrier(1, &barrier); - - m_commandBuffer->m_cmdList->CopyBufferRegion( - bufferImpl->m_resource.getResource(), - offset, - srcQueryBuffer, - index * sizeof(uint64_t), - count * sizeof(uint64_t)); - - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barrier.Transition.pResource = srcQueryBuffer; - m_commandBuffer->m_cmdList->ResourceBarrier(1, &barrier); - } - break; - default: - { - auto queryPoolImpl = static_cast<QueryPoolImpl*>(queryPool); - auto bufferImpl = static_cast<BufferResourceImpl*>(buffer); - m_commandBuffer->m_cmdList->ResolveQueryData( - queryPoolImpl->m_queryHeap.get(), - queryPoolImpl->m_queryType, - index, - count, - bufferImpl->m_resource.getResource(), - offset); - } - break; - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL copyTextureToBuffer( - IBufferResource* dst, - size_t dstOffset, - size_t dstSize, - size_t dstRowStride, - ITextureResource* src, - ResourceState srcState, - SubresourceRange srcSubresource, - ITextureResource::Offset3D srcOffset, - ITextureResource::Size extent) override - { - assert(srcSubresource.mipLevelCount <= 1); - - auto srcTexture = static_cast<TextureResourceImpl*>(src); - auto dstBuffer = static_cast<BufferResourceImpl*>(dst); - auto baseSubresourceIndex = D3DUtil::getSubresourceIndex( - srcSubresource.mipLevel, - srcSubresource.baseArrayLayer, - 0, - srcTexture->getDesc()->numMipLevels, - srcTexture->getDesc()->arraySize); - auto textureSize = srcTexture->getDesc()->size; - FormatInfo formatInfo = {}; - gfxGetFormatInfo(srcTexture->getDesc()->format, &formatInfo); - if (srcSubresource.mipLevelCount == 0) - srcSubresource.mipLevelCount = srcTexture->getDesc()->numMipLevels; - if (srcSubresource.layerCount == 0) - srcSubresource.layerCount = srcTexture->getDesc()->arraySize; - - for (uint32_t layer = 0; layer < srcSubresource.layerCount; layer++) - { - // Get the footprint - D3D12_RESOURCE_DESC texDesc = srcTexture->m_resource.getResource()->GetDesc(); - - D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; - dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dstRegion.pResource = dstBuffer->m_resource.getResource(); - D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint = dstRegion.PlacedFootprint; - - D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; - srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - srcRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( - srcSubresource.mipLevel, - layer + srcSubresource.baseArrayLayer, - 0, - srcTexture->getDesc()->numMipLevels, - srcTexture->getDesc()->arraySize); - srcRegion.pResource = srcTexture->m_resource.getResource(); - - footprint.Offset = dstOffset; - footprint.Footprint.Format = texDesc.Format; - uint32_t mipLevel = srcSubresource.mipLevel; - if (extent.width != 0xFFFFFFFF) - { - footprint.Footprint.Width = extent.width; - } - else - { - footprint.Footprint.Width = - Math::Max(1, (textureSize.width >> mipLevel)) - srcOffset.x; - } - if (extent.height != 0xFFFFFFFF) - { - footprint.Footprint.Height = extent.height; - } - else - { - footprint.Footprint.Height = - Math::Max(1, (textureSize.height >> mipLevel)) - srcOffset.y; - } - if (extent.depth != 0xFFFFFFFF) - { - footprint.Footprint.Depth = extent.depth; - } - else - { - footprint.Footprint.Depth = - Math::Max(1, (textureSize.depth >> mipLevel)) - srcOffset.z; - } - - assert(dstRowStride % D3D12_TEXTURE_DATA_PITCH_ALIGNMENT == 0); - footprint.Footprint.RowPitch = dstRowStride; - - auto bufferSize = footprint.Footprint.RowPitch * footprint.Footprint.Height * - footprint.Footprint.Depth; - - D3D12_BOX srcBox = {}; - srcBox.left = srcOffset.x; - srcBox.top = srcOffset.y; - srcBox.front = srcOffset.z; - srcBox.right = srcOffset.x + extent.width; - srcBox.bottom = srcOffset.y + extent.height; - srcBox.back = srcOffset.z + extent.depth; - m_commandBuffer->m_cmdList->CopyTextureRegion( - &dstRegion, 0, 0, 0, &srcRegion, &srcBox); - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL textureSubresourceBarrier( - ITextureResource* texture, - SubresourceRange subresourceRange, - ResourceState src, - ResourceState dst) override - { - auto textureImpl = static_cast<TextureResourceImpl*>(texture); - - ShortList<D3D12_RESOURCE_BARRIER> barriers; - D3D12_RESOURCE_BARRIER barrier; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - if (src == dst && src == ResourceState::UnorderedAccess) - { - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - barrier.UAV.pResource = textureImpl->m_resource.getResource(); - barriers.add(barrier); - } - else - { - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3DUtil::getResourceState(src); - barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); - if (barrier.Transition.StateBefore == barrier.Transition.StateAfter) - return; - barrier.Transition.pResource = textureImpl->m_resource.getResource(); - auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format); - auto aspectMask = (int32_t)subresourceRange.aspectMask; - if (subresourceRange.aspectMask == TextureAspect::Default) - aspectMask = (int32_t)TextureAspect::Color; - while (aspectMask) - { - auto aspect = Math::getLowestBit((int32_t)aspectMask); - aspectMask &= ~aspect; - auto planeIndex = D3DUtil::getPlaneSlice(d3dFormat, (TextureAspect)aspect); - for (uint32_t layer = 0; layer < subresourceRange.layerCount; layer++) - { - for (uint32_t mip = 0; mip < subresourceRange.mipLevelCount; mip++) - { - barrier.Transition.Subresource = D3DUtil::getSubresourceIndex( - mip + subresourceRange.mipLevel, - layer + subresourceRange.baseArrayLayer, - planeIndex, - textureImpl->getDesc()->numMipLevels, - textureImpl->getDesc()->arraySize); - barriers.add(barrier); - } - } - } - } - m_commandBuffer->m_cmdList->ResourceBarrier( - (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - beginDebugEvent(const char* name, float rgbColor[3]) override - { - auto beginEvent = m_commandBuffer->m_renderer->m_BeginEventOnCommandList; - if (beginEvent) - { - beginEvent( - m_commandBuffer->m_cmdList, - 0xff000000 | (uint8_t(rgbColor[0] * 255.0f) << 16) | - (uint8_t(rgbColor[1] * 255.0f) << 8) | uint8_t(rgbColor[2] * 255.0f), - name); - } - } - virtual SLANG_NO_THROW void SLANG_MCALL endDebugEvent() override - { - auto endEvent = m_commandBuffer->m_renderer->m_EndEventOnCommandList; - if (endEvent) - { - endEvent(m_commandBuffer->m_cmdList); - } - } - }; - - ResourceCommandEncoderImpl m_resourceCommandEncoder; - - virtual SLANG_NO_THROW void SLANG_MCALL - encodeResourceCommands(IResourceCommandEncoder** outEncoder) override - { - m_resourceCommandEncoder.init(this); - *outEncoder = &m_resourceCommandEncoder; - } - - class RenderCommandEncoderImpl - : public IRenderCommandEncoder - , public ResourceCommandEncoderImpl - { - public: - SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) - public: - RefPtr<RenderPassLayoutImpl> m_renderPass; - RefPtr<FramebufferImpl> m_framebuffer; - - List<BoundVertexBuffer> m_boundVertexBuffers; - - RefPtr<BufferResourceImpl> m_boundIndexBuffer; - - D3D12_VIEWPORT m_viewports[kMaxRTVCount]; - D3D12_RECT m_scissorRects[kMaxRTVCount]; - - DXGI_FORMAT m_boundIndexFormat; - UINT m_boundIndexOffset; - - D3D12_PRIMITIVE_TOPOLOGY_TYPE m_primitiveTopologyType; - D3D12_PRIMITIVE_TOPOLOGY m_primitiveTopology; - - void init( - D3D12Device* renderer, - TransientResourceHeapImpl* transientHeap, - CommandBufferImpl* cmdBuffer, - RenderPassLayoutImpl* renderPass, - FramebufferImpl* framebuffer) - { - PipelineCommandEncoder::init(cmdBuffer); - m_preCmdList = nullptr; - m_renderPass = renderPass; - m_framebuffer = framebuffer; - m_transientHeap = transientHeap; - m_boundVertexBuffers.clear(); - m_boundIndexBuffer = nullptr; - m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - m_primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - m_boundIndexFormat = DXGI_FORMAT_UNKNOWN; - m_boundIndexOffset = 0; - m_currentPipeline = nullptr; - - // Set render target states. - if (!framebuffer) - { - return; - } - m_d3dCmdList->OMSetRenderTargets( - (UINT)framebuffer->renderTargetViews.getCount(), - framebuffer->renderTargetDescriptors.getArrayView().getBuffer(), - FALSE, - framebuffer->depthStencilView ? &framebuffer->depthStencilDescriptor : nullptr); - - // Issue clear commands based on render pass set up. - for (Index i = 0; i < framebuffer->renderTargetViews.getCount(); i++) - { - if (i >= renderPass->m_renderTargetAccesses.getCount()) - continue; - - auto& access = renderPass->m_renderTargetAccesses[i]; - - // Transit resource states. - { - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = framebuffer->renderTargetViews[i].Ptr(); - if (resourceViewImpl) - { - auto textureResource = static_cast<TextureResourceImpl*>( - resourceViewImpl->m_resource.Ptr()); - if (textureResource) - { - D3D12_RESOURCE_STATES initialState; - if (access.initialState == ResourceState::Undefined) - { - initialState = textureResource->m_defaultState; - } - else - { - initialState = D3DUtil::getResourceState(access.initialState); - } - textureResource->m_resource.transition( - initialState, D3D12_RESOURCE_STATE_RENDER_TARGET, submitter); - } - } - } - // Clear. - if (access.loadOp == IRenderPassLayout::AttachmentLoadOp::Clear) - { - m_d3dCmdList->ClearRenderTargetView( - framebuffer->renderTargetDescriptors[i], - framebuffer->renderTargetClearValues[i].values, - 0, - nullptr); - } - } - - if (renderPass->m_hasDepthStencil) - { - // Transit resource states. - { - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = framebuffer->depthStencilView.Ptr(); - auto textureResource = - static_cast<TextureResourceImpl*>(resourceViewImpl->m_resource.Ptr()); - D3D12_RESOURCE_STATES initialState; - if (renderPass->m_depthStencilAccess.initialState == - ResourceState::Undefined) - { - initialState = textureResource->m_defaultState; - } - else - { - initialState = D3DUtil::getResourceState( - renderPass->m_depthStencilAccess.initialState); - } - textureResource->m_resource.transition( - initialState, - D3D12_RESOURCE_STATE_DEPTH_WRITE, - submitter); - } - // Clear. - uint32_t clearFlags = 0; - if (renderPass->m_depthStencilAccess.loadOp == - IRenderPassLayout::AttachmentLoadOp::Clear) - { - clearFlags |= D3D12_CLEAR_FLAG_DEPTH; - } - if (renderPass->m_depthStencilAccess.stencilLoadOp == - IRenderPassLayout::AttachmentLoadOp::Clear) - { - clearFlags |= D3D12_CLEAR_FLAG_STENCIL; - } - if (clearFlags) - { - m_d3dCmdList->ClearDepthStencilView( - framebuffer->depthStencilDescriptor, - (D3D12_CLEAR_FLAGS)clearFlags, - framebuffer->depthStencilClearValue.depth, - framebuffer->depthStencilClearValue.stencil, - 0, - nullptr); - } - } - } - - virtual SLANG_NO_THROW Result SLANG_MCALL - bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override - { - return bindPipelineImpl(state, outRootObject); - } - - virtual SLANG_NO_THROW Result SLANG_MCALL - bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) override - { - return bindPipelineWithRootObjectImpl(state, rootObject); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - setViewports(uint32_t count, const Viewport* viewports) override - { - static const int kMaxViewports = - D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; - assert(count <= kMaxViewports && count <= kMaxRTVCount); - for (UInt ii = 0; ii < count; ++ii) - { - auto& inViewport = viewports[ii]; - auto& dxViewport = m_viewports[ii]; - - dxViewport.TopLeftX = inViewport.originX; - dxViewport.TopLeftY = inViewport.originY; - dxViewport.Width = inViewport.extentX; - dxViewport.Height = inViewport.extentY; - dxViewport.MinDepth = inViewport.minZ; - dxViewport.MaxDepth = inViewport.maxZ; - } - m_d3dCmdList->RSSetViewports(UINT(count), m_viewports); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - setScissorRects(uint32_t count, const ScissorRect* rects) override - { - static const int kMaxScissorRects = - D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; - assert(count <= kMaxScissorRects && count <= kMaxRTVCount); - - for (UInt ii = 0; ii < count; ++ii) - { - auto& inRect = rects[ii]; - auto& dxRect = m_scissorRects[ii]; - - dxRect.left = LONG(inRect.minX); - dxRect.top = LONG(inRect.minY); - dxRect.right = LONG(inRect.maxX); - dxRect.bottom = LONG(inRect.maxY); - } - - m_d3dCmdList->RSSetScissorRects(UINT(count), m_scissorRects); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - setPrimitiveTopology(PrimitiveTopology topology) override - { - m_primitiveTopologyType = D3DUtil::getPrimitiveType(topology); - m_primitiveTopology = D3DUtil::getPrimitiveTopology(topology); - } - - virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( - uint32_t startSlot, - uint32_t slotCount, - IBufferResource* const* buffers, - const uint32_t* offsets) override - { - { - const Index num = startSlot + slotCount; - if (num > m_boundVertexBuffers.getCount()) - { - m_boundVertexBuffers.setCount(num); - } - } - - for (UInt i = 0; i < slotCount; i++) - { - BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(buffers[i]); - - BoundVertexBuffer& boundBuffer = m_boundVertexBuffers[startSlot + i]; - boundBuffer.m_buffer = buffer; - boundBuffer.m_offset = int(offsets[i]); - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL setIndexBuffer( - IBufferResource* buffer, Format indexFormat, uint32_t offset = 0) override - { - m_boundIndexBuffer = (BufferResourceImpl*)buffer; - m_boundIndexFormat = D3DUtil::getMapFormat(indexFormat); - m_boundIndexOffset = offset; - } - - void prepareDraw() - { - auto pipelineState = m_currentPipeline.Ptr(); - if (!pipelineState || (pipelineState->desc.type != PipelineType::Graphics)) - { - assert(!"No graphics pipeline state set"); - return; - } - - // Submit - setting for graphics - { - GraphicsSubmitter submitter(m_d3dCmdList); - RefPtr<PipelineStateBase> newPipeline; - if(SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) - { - assert(!"Failed to bind render state"); - } - } - - m_d3dCmdList->IASetPrimitiveTopology(m_primitiveTopology); - - // Set up vertex buffer views - { - auto inputLayout = (InputLayoutImpl*)pipelineState->inputLayout.Ptr(); - if (inputLayout) - { - int numVertexViews = 0; - D3D12_VERTEX_BUFFER_VIEW vertexViews[16]; - for (Index i = 0; i < m_boundVertexBuffers.getCount(); i++) - { - const BoundVertexBuffer& boundVertexBuffer = m_boundVertexBuffers[i]; - BufferResourceImpl* buffer = boundVertexBuffer.m_buffer; - if (buffer) - { - D3D12_VERTEX_BUFFER_VIEW& vertexView = - vertexViews[numVertexViews++]; - vertexView.BufferLocation = - buffer->m_resource.getResource()->GetGPUVirtualAddress() + - boundVertexBuffer.m_offset; - vertexView.SizeInBytes = UINT( - buffer->getDesc()->sizeInBytes - boundVertexBuffer.m_offset); - vertexView.StrideInBytes = inputLayout->m_vertexStreamStrides[i]; - } - } - m_d3dCmdList->IASetVertexBuffers(0, numVertexViews, vertexViews); - } - } - // Set up index buffer - if (m_boundIndexBuffer) - { - D3D12_INDEX_BUFFER_VIEW indexBufferView; - indexBufferView.BufferLocation = - m_boundIndexBuffer->m_resource.getResource()->GetGPUVirtualAddress() + - m_boundIndexOffset; - indexBufferView.SizeInBytes = - UINT(m_boundIndexBuffer->getDesc()->sizeInBytes - m_boundIndexOffset); - indexBufferView.Format = m_boundIndexFormat; - - m_d3dCmdList->IASetIndexBuffer(&indexBufferView); - } - } - virtual SLANG_NO_THROW void SLANG_MCALL - draw(uint32_t vertexCount, uint32_t startVertex = 0) override - { - prepareDraw(); - m_d3dCmdList->DrawInstanced(vertexCount, 1, startVertex, 0); - } - virtual SLANG_NO_THROW void SLANG_MCALL drawIndexed( - uint32_t indexCount, uint32_t startIndex = 0, uint32_t baseVertex = 0) override - { - prepareDraw(); - m_d3dCmdList->DrawIndexedInstanced(indexCount, 1, startIndex, baseVertex, 0); - } - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override - { - PipelineCommandEncoder::endEncodingImpl(); - if (!m_framebuffer) - return; - // Issue clear commands based on render pass set up. - for (Index i = 0; i < m_renderPass->m_renderTargetAccesses.getCount(); i++) - { - auto& access = m_renderPass->m_renderTargetAccesses[i]; - - // Transit resource states. - { - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = m_framebuffer->renderTargetViews[i].Ptr(); - if (!resourceViewImpl) - continue; - auto textureResource = - static_cast<TextureResourceImpl*>(resourceViewImpl->m_resource.Ptr()); - if (textureResource) - { - textureResource->m_resource.transition( - D3D12_RESOURCE_STATE_RENDER_TARGET, - D3DUtil::getResourceState(access.finalState), - submitter); - } - } - } - - if (m_renderPass->m_hasDepthStencil) - { - // Transit resource states. - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = m_framebuffer->depthStencilView.Ptr(); - auto textureResource = - static_cast<TextureResourceImpl*>(resourceViewImpl->m_resource.Ptr()); - textureResource->m_resource.transition( - D3D12_RESOURCE_STATE_DEPTH_WRITE, - D3DUtil::getResourceState( - m_renderPass->m_depthStencilAccess.finalState), - submitter); - } - m_framebuffer = nullptr; - } - - virtual SLANG_NO_THROW void SLANG_MCALL - setStencilReference(uint32_t referenceValue) override - { - m_d3dCmdList->OMSetStencilRef((UINT)referenceValue); - } - - virtual SLANG_NO_THROW void SLANG_MCALL drawIndirect( - uint32_t maxDrawCount, - IBufferResource* argBuffer, - uint64_t argOffset, - IBufferResource* countBuffer, - uint64_t countOffset) override - { - prepareDraw(); - - auto argBufferImpl = static_cast<BufferResourceImpl*>(argBuffer); - auto countBufferImpl = static_cast<BufferResourceImpl*>(countBuffer); - - m_d3dCmdList->ExecuteIndirect( - m_renderer->drawIndirectCmdSignature, - maxDrawCount, - argBufferImpl->m_resource, - argOffset, - countBufferImpl ? countBufferImpl->m_resource.getResource() : nullptr, - countOffset); - } - - virtual SLANG_NO_THROW void SLANG_MCALL drawIndexedIndirect( - uint32_t maxDrawCount, - IBufferResource* argBuffer, - uint64_t argOffset, - IBufferResource* countBuffer, - uint64_t countOffset) override - { - prepareDraw(); - - auto argBufferImpl = static_cast<BufferResourceImpl*>(argBuffer); - auto countBufferImpl = static_cast<BufferResourceImpl*>(countBuffer); - - m_d3dCmdList->ExecuteIndirect( - m_renderer->drawIndexedIndirectCmdSignature, - maxDrawCount, - argBufferImpl->m_resource, - argOffset, - countBufferImpl ? countBufferImpl->m_resource.getResource() : nullptr, - countOffset); - } - - virtual SLANG_NO_THROW Result SLANG_MCALL setSamplePositions( - uint32_t samplesPerPixel, - uint32_t pixelCount, - const SamplePosition* samplePositions) override - { - if (m_commandBuffer->m_cmdList1) - { - m_commandBuffer->m_cmdList1->SetSamplePositions( - samplesPerPixel, pixelCount, (D3D12_SAMPLE_POSITION*)samplePositions); - return SLANG_OK; - } - return SLANG_E_NOT_AVAILABLE; - } - - virtual SLANG_NO_THROW void SLANG_MCALL drawInstanced( - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t startVertex, - uint32_t startInstanceLocation) override - { - prepareDraw(); - m_d3dCmdList->DrawInstanced( - vertexCount, instanceCount, startVertex, startInstanceLocation); - } - - virtual SLANG_NO_THROW void SLANG_MCALL drawIndexedInstanced( - uint32_t indexCount, - uint32_t instanceCount, - uint32_t startIndexLocation, - int32_t baseVertexLocation, - uint32_t startInstanceLocation) override - { - prepareDraw(); - m_d3dCmdList->DrawIndexedInstanced(indexCount, instanceCount, startIndexLocation, baseVertexLocation, startInstanceLocation); - } - }; - - RenderCommandEncoderImpl m_renderCommandEncoder; - virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( - IRenderPassLayout* renderPass, - IFramebuffer* framebuffer, - IRenderCommandEncoder** outEncoder) override - { - m_renderCommandEncoder.init( - m_renderer, - m_transientHeap, - this, - static_cast<RenderPassLayoutImpl*>(renderPass), - static_cast<FramebufferImpl*>(framebuffer)); - *outEncoder = &m_renderCommandEncoder; - } - - class ComputeCommandEncoderImpl - : public IComputeCommandEncoder - , public ResourceCommandEncoderImpl - { - public: - SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) - public: - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override - { - PipelineCommandEncoder::endEncodingImpl(); - } - void init( - D3D12Device* renderer, - TransientResourceHeapImpl* transientHeap, - CommandBufferImpl* cmdBuffer) - { - PipelineCommandEncoder::init(cmdBuffer); - m_preCmdList = nullptr; - m_transientHeap = transientHeap; - m_currentPipeline = nullptr; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL - bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override - { - return bindPipelineImpl(state, outRootObject); - } - - virtual SLANG_NO_THROW Result SLANG_MCALL bindPipelineWithRootObject( - IPipelineState* state, IShaderObject* rootObject) override - { - return bindPipelineWithRootObjectImpl(state, rootObject); - } - - virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override - { - // Submit binding for compute - { - ComputeSubmitter submitter(m_d3dCmdList); - RefPtr<PipelineStateBase> newPipeline; - if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) - { - assert(!"Failed to bind render state"); - } - } - m_d3dCmdList->Dispatch(x, y, z); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - dispatchComputeIndirect(IBufferResource* argBuffer, uint64_t offset) override - { - // Submit binding for compute - { - ComputeSubmitter submitter(m_d3dCmdList); - RefPtr<PipelineStateBase> newPipeline; - if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) - { - assert(!"Failed to bind render state"); - } - } - auto argBufferImpl = static_cast<BufferResourceImpl*>(argBuffer); - - m_d3dCmdList->ExecuteIndirect( - m_renderer->dispatchIndirectCmdSignature, - 1, - argBufferImpl->m_resource, - offset, - nullptr, - 0); - } - }; - - ComputeCommandEncoderImpl m_computeCommandEncoder; - virtual SLANG_NO_THROW void SLANG_MCALL - encodeComputeCommands(IComputeCommandEncoder** outEncoder) override - { - m_computeCommandEncoder.init(m_renderer, m_transientHeap, this); - *outEncoder = &m_computeCommandEncoder; - } - -#if SLANG_GFX_HAS_DXR_SUPPORT - class RayTracingCommandEncoderImpl - : public IRayTracingCommandEncoder - , public ResourceCommandEncoderImpl - { - public: - SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) - public: - virtual SLANG_NO_THROW void SLANG_MCALL buildAccelerationStructure( - const IAccelerationStructure::BuildDesc& desc, - int propertyQueryCount, - AccelerationStructureQueryDesc* queryDescs) override; - virtual SLANG_NO_THROW void SLANG_MCALL copyAccelerationStructure( - IAccelerationStructure* dest, - IAccelerationStructure* src, - AccelerationStructureCopyMode mode) override; - virtual SLANG_NO_THROW void SLANG_MCALL queryAccelerationStructureProperties( - int accelerationStructureCount, - IAccelerationStructure* const* accelerationStructures, - int queryCount, - AccelerationStructureQueryDesc* queryDescs) override; - virtual SLANG_NO_THROW void SLANG_MCALL serializeAccelerationStructure( - DeviceAddress dest, - IAccelerationStructure* source) override; - virtual SLANG_NO_THROW void SLANG_MCALL deserializeAccelerationStructure( - IAccelerationStructure* dest, - DeviceAddress source) override; - virtual SLANG_NO_THROW void SLANG_MCALL - bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override; - virtual SLANG_NO_THROW Result SLANG_MCALL bindPipelineWithRootObject( - IPipelineState* state, IShaderObject* rootObject) override - { - return bindPipelineWithRootObjectImpl(state, rootObject); - } - virtual SLANG_NO_THROW void SLANG_MCALL dispatchRays( - uint32_t rayGenShaderIndex, - IShaderTable* shaderTable, - int32_t width, - int32_t height, - int32_t depth) override; - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} - }; - RayTracingCommandEncoderImpl m_rayTracingCommandEncoder; - virtual SLANG_NO_THROW void SLANG_MCALL - encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) override - { - m_rayTracingCommandEncoder.init(this); - *outEncoder = &m_rayTracingCommandEncoder; - } #else - virtual SLANG_NO_THROW void SLANG_MCALL - encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) override - { - *outEncoder = nullptr; - } + return false; #endif +} - virtual SLANG_NO_THROW void SLANG_MCALL close() override { m_cmdList->Close(); } - }; - - class FenceImpl : public FenceBase - { - public: - ComPtr<ID3D12Fence> m_fence; - HANDLE m_waitEvent = 0; - - ~FenceImpl() - { - if (m_waitEvent) - CloseHandle(m_waitEvent); - } - - HANDLE getWaitEvent() - { - if (m_waitEvent) - return m_waitEvent; - m_waitEvent = CreateEventEx( - nullptr, - nullptr, - 0, - EVENT_ALL_ACCESS); - return m_waitEvent; - } - - Result init(D3D12Device* device, const IFence::Desc& desc) - { - SLANG_RETURN_ON_FAIL(device->m_device->CreateFence( - desc.initialValue, - desc.isShared ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE, - IID_PPV_ARGS(m_fence.writeRef()))); - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL getCurrentValue(uint64_t* outValue) override - { - *outValue = m_fence->GetCompletedValue(); - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL setCurrentValue(uint64_t value) override - { - SLANG_RETURN_ON_FAIL(m_fence->Signal(value)); - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override - { - // Check if a shared handle already exists. - if (sharedHandle.handleValue != 0) - { - *outHandle = sharedHandle; - return SLANG_OK; - } - - ComPtr<ID3D12Device> devicePtr; - m_fence->GetDevice(IID_PPV_ARGS(devicePtr.writeRef())); - SLANG_RETURN_ON_FAIL(devicePtr->CreateSharedHandle(m_fence, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); - outHandle->api = InteropHandleAPI::D3D12; - sharedHandle = *outHandle; - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL - getNativeHandle(InteropHandle* outNativeHandle) override - { - outNativeHandle->api = gfx::InteropHandleAPI::D3D12; - outNativeHandle->handleValue = (uint64_t)m_fence.get(); - return SLANG_OK; - } - }; - - class CommandQueueImpl - : public ICommandQueue - , public ComObject - { - public: - SLANG_COM_OBJECT_IUNKNOWN_ALL - ICommandQueue* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue) - return static_cast<ICommandQueue*>(this); - return nullptr; - } - void breakStrongReferenceToDevice() { m_renderer.breakStrongReference(); } - - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* handle) override - { - handle->api = InteropHandleAPI::D3D12; - handle->handleValue = (uint64_t)m_d3dQueue.get(); - return SLANG_OK; - } - public: - BreakableReference<D3D12Device> m_renderer; - ComPtr<ID3D12Device> m_device; - ComPtr<ID3D12CommandQueue> m_d3dQueue; - ComPtr<ID3D12Fence> m_fence; - uint64_t m_fenceValue = 0; - HANDLE globalWaitHandle; - Desc m_desc; - uint32_t m_queueIndex = 0; - - Result init(D3D12Device* device, uint32_t queueIndex) - { - m_queueIndex = queueIndex; - m_renderer = device; - m_device = device->m_device; - D3D12_COMMAND_QUEUE_DESC queueDesc = {}; - queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; - SLANG_RETURN_ON_FAIL(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(m_d3dQueue.writeRef()))); - SLANG_RETURN_ON_FAIL( - m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); - globalWaitHandle = CreateEventEx( - nullptr, - nullptr, - CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, - EVENT_ALL_ACCESS); - return SLANG_OK; - } - ~CommandQueueImpl() - { - waitOnHost(); - CloseHandle(globalWaitHandle); - m_renderer->m_queueIndexAllocator.free((int)m_queueIndex, 1); - } - virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override - { - return m_desc; - } - - virtual SLANG_NO_THROW void SLANG_MCALL - executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers, IFence* fence, uint64_t valueToSignal) override - { - ShortList<ID3D12CommandList*> commandLists; - for (uint32_t i = 0; i < count; i++) - { - auto cmdImpl = static_cast<CommandBufferImpl*>(commandBuffers[i]); - commandLists.add(cmdImpl->m_cmdList); - } - if (count > 0) - { - m_d3dQueue->ExecuteCommandLists((UINT)count, commandLists.getArrayView().getBuffer()); - - m_fenceValue++; - - for (uint32_t i = 0; i < count; i++) - { - if (i > 0 && commandBuffers[i] == commandBuffers[i - 1]) - continue; - auto cmdImpl = static_cast<CommandBufferImpl*>(commandBuffers[i]); - auto transientHeap = cmdImpl->m_transientHeap; - auto& waitInfo = transientHeap->getQueueWaitInfo(m_queueIndex); - waitInfo.waitValue = m_fenceValue; - waitInfo.fence = m_fence; - } - m_d3dQueue->Signal(m_fence, m_fenceValue); - } - - if (fence) - { - auto fenceImpl = static_cast<FenceImpl*>(fence); - m_d3dQueue->Signal(fenceImpl->m_fence.get(), valueToSignal); - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL waitOnHost() override - { - m_fenceValue++; - m_d3dQueue->Signal(m_fence, m_fenceValue); - ResetEvent(globalWaitHandle); - m_fence->SetEventOnCompletion(m_fenceValue, globalWaitHandle); - WaitForSingleObject(globalWaitHandle, INFINITE); - } - - virtual SLANG_NO_THROW Result SLANG_MCALL waitForFenceValuesOnDevice( - uint32_t fenceCount, IFence** fences, uint64_t* waitValues) override - { - for (uint32_t i = 0; i < fenceCount; ++i) - { - auto fenceImpl = static_cast<FenceImpl*>(fences[i]); - m_d3dQueue->Wait( - fenceImpl->m_fence.get(), - waitValues[i]); - } - return SLANG_OK; - } - }; - - class SwapchainImpl : public D3DSwapchainBase - { - public: - ComPtr<ID3D12CommandQueue> m_queue; - ComPtr<IDXGIFactory> m_dxgiFactory; - ComPtr<IDXGISwapChain3> m_swapChain3; - ComPtr<ID3D12Fence> m_fence; - ShortList<HANDLE, kMaxNumRenderFrames> m_frameEvents; - uint64_t fenceValue = 0; - Result init( - D3D12Device* renderer, - const ISwapchain::Desc& swapchainDesc, - WindowHandle window) - { - m_queue = static_cast<CommandQueueImpl*>(swapchainDesc.queue)->m_d3dQueue; - m_dxgiFactory = renderer->m_deviceInfo.m_dxgiFactory; - SLANG_RETURN_ON_FAIL( - D3DSwapchainBase::init(swapchainDesc, window, DXGI_SWAP_EFFECT_FLIP_DISCARD)); - renderer->m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef())); - - SLANG_RETURN_ON_FAIL(m_swapChain->QueryInterface(m_swapChain3.writeRef())); - for (uint32_t i = 0; i < swapchainDesc.imageCount; i++) - { - m_frameEvents.add(CreateEventEx( - nullptr, - false, - CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, - EVENT_ALL_ACCESS)); - } - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL resize(uint32_t width, uint32_t height) override - { - for (auto evt : m_frameEvents) - SetEvent(evt); - SLANG_RETURN_ON_FAIL(D3DSwapchainBase::resize(width, height)); - return SLANG_OK; - } - - virtual void createSwapchainBufferImages() override - { - m_images.clear(); - - for (uint32_t i = 0; i < m_desc.imageCount; i++) - { - ComPtr<ID3D12Resource> d3dResource; - m_swapChain->GetBuffer(i, IID_PPV_ARGS(d3dResource.writeRef())); - ITextureResource::Desc imageDesc = {}; - imageDesc.allowedStates = ResourceStateSet( - ResourceState::Present, - ResourceState::RenderTarget, - ResourceState::CopyDestination); - imageDesc.type = IResource::Type::Texture2D; - imageDesc.arraySize = 0; - imageDesc.format = m_desc.format; - imageDesc.size.width = m_desc.width; - imageDesc.size.height = m_desc.height; - imageDesc.size.depth = 1; - imageDesc.numMipLevels = 1; - imageDesc.defaultState = ResourceState::Present; - RefPtr<TextureResourceImpl> image = new TextureResourceImpl(imageDesc); - image->m_resource.setResource(d3dResource.get()); - image->m_defaultState = D3D12_RESOURCE_STATE_PRESENT; - m_images.add(image); - } - for (auto evt : m_frameEvents) - SetEvent(evt); - } - virtual IDXGIFactory* getDXGIFactory() override { return m_dxgiFactory; } - virtual IUnknown* getOwningDevice() override { return m_queue; } - virtual SLANG_NO_THROW int SLANG_MCALL acquireNextImage() override - { - auto result = (int)m_swapChain3->GetCurrentBackBufferIndex(); - WaitForSingleObject(m_frameEvents[result], INFINITE); - ResetEvent(m_frameEvents[result]); - return result; - } - virtual SLANG_NO_THROW Result SLANG_MCALL present() override - { - m_fence->SetEventOnCompletion(fenceValue, m_frameEvents[m_swapChain3->GetCurrentBackBufferIndex()]); - SLANG_RETURN_ON_FAIL(D3DSwapchainBase::present()); - fenceValue++; - m_queue->Signal(m_fence, fenceValue); - - return SLANG_OK; - } - virtual SLANG_NO_THROW bool SLANG_MCALL isOccluded() override - { - return (m_swapChain3->Present(0, DXGI_PRESENT_TEST) == DXGI_STATUS_OCCLUDED); - } - virtual SLANG_NO_THROW Result SLANG_MCALL setFullScreenMode(bool mode) override - { - return m_swapChain3->SetFullscreenState(mode, nullptr); - } - }; - - static PROC loadProc(HMODULE module, char const* name); - - Result createCommandQueueImpl(CommandQueueImpl** outQueue); - - Result createTransientResourceHeapImpl( - ITransientResourceHeap::Flags::Enum flags, - size_t constantBufferSize, - uint32_t viewDescriptors, - uint32_t samplerDescriptors, - TransientResourceHeapImpl** outHeap); - - Result createBuffer( - const D3D12_RESOURCE_DESC& resourceDesc, - const void* srcData, - size_t srcDataSize, - D3D12_RESOURCE_STATES finalState, - D3D12Resource& resourceOut, - bool isShared, - MemoryType access = MemoryType::DeviceLocal); - - Result captureTextureToSurface( - TextureResourceImpl* resource, - ResourceState state, - ISlangBlob** blob, - size_t* outRowPitch, - size_t* outPixelSize); - - Result _createDevice( - DeviceCheckFlags deviceCheckFlags, - const UnownedStringSlice& nameMatch, - D3D_FEATURE_LEVEL featureLevel, - DeviceInfo& outDeviceInfo); - - struct ResourceCommandRecordInfo - { - ComPtr<ICommandBuffer> commandBuffer; - ID3D12GraphicsCommandList* d3dCommandList; - }; - ResourceCommandRecordInfo encodeResourceCommands() +D3D12_RESOURCE_FLAGS calcResourceFlag(ResourceState state) +{ + switch (state) { - ResourceCommandRecordInfo info; - m_resourceCommandTransientHeap->createCommandBuffer(info.commandBuffer.writeRef()); - info.d3dCommandList = static_cast<CommandBufferImpl*>(info.commandBuffer.get())->m_cmdList; - return info; + case ResourceState::RenderTarget: + return D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + case ResourceState::DepthRead: + case ResourceState::DepthWrite: + return D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + case ResourceState::UnorderedAccess: + case ResourceState::AccelerationStructure: + return D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + default: + return D3D12_RESOURCE_FLAG_NONE; } - void submitResourceCommandsAndWait(const ResourceCommandRecordInfo& info) +} + +D3D12_RESOURCE_FLAGS calcResourceFlags(ResourceStateSet states) +{ + int dstFlags = 0; + for (uint32_t i = 0; i < (uint32_t)ResourceState::_Count; i++) { - info.commandBuffer->close(); - m_resourceCommandQueue->executeCommandBuffer(info.commandBuffer); - m_resourceCommandTransientHeap->synchronizeAndReset(); + auto state = (ResourceState)i; + if (states.contains(state)) + dstFlags |= calcResourceFlag(state); } + return (D3D12_RESOURCE_FLAGS)dstFlags; +} - // D3D12Device members. - - Desc m_desc; - D3D12DeviceExtendedDesc m_extendedDesc; - - gfx::DeviceInfo m_info; - String m_adapterName; - - bool m_isInitialized = false; - - ComPtr<ID3D12Debug> m_dxDebug; - - DeviceInfo m_deviceInfo; - ID3D12Device* m_device = nullptr; - ID3D12Device5* m_device5 = nullptr; - - VirtualObjectPool m_queueIndexAllocator; - - RefPtr<CommandQueueImpl> m_resourceCommandQueue; - RefPtr<TransientResourceHeapImpl> m_resourceCommandTransientHeap; - - RefPtr<D3D12GeneralExpandingDescriptorHeap> m_rtvAllocator; - RefPtr<D3D12GeneralExpandingDescriptorHeap> m_dsvAllocator; - // Space in the GPU-visible heaps is precious, so we will also keep - // around CPU-visible heaps for storing descriptors in a format - // that is ready for copying into the GPU-visible heaps as needed. - // - RefPtr<D3D12GeneralExpandingDescriptorHeap> m_cpuViewHeap; ///< Cbv, Srv, Uav - RefPtr<D3D12GeneralExpandingDescriptorHeap> m_cpuSamplerHeap; ///< Heap for samplers - - // Dll entry points - PFN_D3D12_GET_DEBUG_INTERFACE m_D3D12GetDebugInterface = nullptr; - PFN_D3D12_CREATE_DEVICE m_D3D12CreateDevice = nullptr; - PFN_D3D12_SERIALIZE_ROOT_SIGNATURE m_D3D12SerializeRootSignature = nullptr; - - PFN_BeginEventOnCommandList m_BeginEventOnCommandList = nullptr; - PFN_EndEventOnCommandList m_EndEventOnCommandList = nullptr; - - bool m_nvapi = false; - - // Command signatures required for indirect draws. These indicate the format of the indirect - // as well as the command type to be used (DrawInstanced and DrawIndexedInstanced, in this case). - ComPtr<ID3D12CommandSignature> drawIndirectCmdSignature; - ComPtr<ID3D12CommandSignature> drawIndexedIndirectCmdSignature; - ComPtr<ID3D12CommandSignature> dispatchIndirectCmdSignature; -}; - -SLANG_NO_THROW Result SLANG_MCALL D3D12Device::TransientResourceHeapImpl::synchronizeAndReset() +D3D12_RESOURCE_DIMENSION calcResourceDimension(IResource::Type type) { - Array<HANDLE, 16> waitHandles; - for (auto& waitInfo : m_waitInfos) + switch (type) { - if (waitInfo.waitValue == 0) - continue; - if (waitInfo.fence) + case IResource::Type::Buffer: + return D3D12_RESOURCE_DIMENSION_BUFFER; + case IResource::Type::Texture1D: + return D3D12_RESOURCE_DIMENSION_TEXTURE1D; + case IResource::Type::TextureCube: + case IResource::Type::Texture2D: { - waitInfo.fence->SetEventOnCompletion(waitInfo.waitValue, waitInfo.fenceEvent); - waitHandles.add(waitInfo.fenceEvent); + return D3D12_RESOURCE_DIMENSION_TEXTURE2D; } + case IResource::Type::Texture3D: + return D3D12_RESOURCE_DIMENSION_TEXTURE3D; + default: + return D3D12_RESOURCE_DIMENSION_UNKNOWN; } - WaitForMultipleObjects((DWORD)waitHandles.getCount(), waitHandles.getBuffer(), TRUE, INFINITE); - m_currentViewHeapIndex = -1; - m_currentSamplerHeapIndex = -1; - allocateNewViewDescriptorHeap(m_device); - allocateNewSamplerDescriptorHeap(m_device); - m_stagingCpuSamplerHeap.freeAll(); - m_stagingCpuViewHeap.freeAll(); - m_commandListAllocId = 0; - SLANG_RETURN_ON_FAIL(m_commandAllocator->Reset()); - Super::reset(); - return SLANG_OK; } -Result D3D12Device::TransientResourceHeapImpl::createCommandBuffer(ICommandBuffer** outCmdBuffer) +DXGI_FORMAT getTypelessFormatFromDepthFormat(Format format) { - if ((Index)m_commandListAllocId < m_commandBufferPool.getCount()) + switch (format) { - auto result = static_cast<D3D12Device::CommandBufferImpl*>( - m_commandBufferPool[m_commandListAllocId].Ptr()); - m_d3dCommandListPool[m_commandListAllocId]->Reset(m_commandAllocator, nullptr); - result->reinit(); - ++m_commandListAllocId; - returnComPtr(outCmdBuffer, result); - return SLANG_OK; + case Format::D16_UNORM: + return DXGI_FORMAT_R16_TYPELESS; + case Format::D32_FLOAT: + return DXGI_FORMAT_R32_TYPELESS; + default: + return D3DUtil::getMapFormat(format); } - ComPtr<ID3D12GraphicsCommandList> cmdList; - m_device->m_device->CreateCommandList( - 0, - D3D12_COMMAND_LIST_TYPE_DIRECT, - m_commandAllocator, - nullptr, - IID_PPV_ARGS(cmdList.writeRef())); - - m_d3dCommandListPool.add(cmdList); - RefPtr<CommandBufferImpl> cmdBuffer = new CommandBufferImpl(); - cmdBuffer->init(m_device, cmdList, this); - m_commandBufferPool.add(cmdBuffer); - ++m_commandListAllocId; - returnComPtr(outCmdBuffer, cmdBuffer); - return SLANG_OK; } -Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitter, RefPtr<PipelineStateBase>& newPipeline) +bool isTypelessDepthFormat(DXGI_FORMAT format) { - RootShaderObjectImpl* rootObjectImpl = m_commandBuffer->m_mutableRootShaderObject - ? m_commandBuffer->m_mutableRootShaderObject.Ptr() - : &m_commandBuffer->m_rootShaderObject; - SLANG_RETURN_ON_FAIL(m_renderer->maybeSpecializePipeline(m_currentPipeline, rootObjectImpl, newPipeline)); - PipelineStateBase* newPipelineImpl = static_cast<PipelineStateBase*>(newPipeline.Ptr()); - auto commandList = m_d3dCmdList; - auto pipelineTypeIndex = (int)newPipelineImpl->desc.type; - auto programImpl = static_cast<ShaderProgramImpl*>(newPipelineImpl->m_program.Ptr()); - newPipelineImpl->ensureAPIPipelineStateCreated(); - submitter->setRootSignature(programImpl->m_rootObjectLayout->m_rootSignature); - submitter->setPipelineState(newPipelineImpl); - RootShaderObjectLayoutImpl* rootLayoutImpl = programImpl->m_rootObjectLayout; + switch (format) + { + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R32_TYPELESS: + return true; + default: + return false; + } +} - // We need to set up a context for binding shader objects to the pipeline state. - // This type mostly exists to bundle together a bunch of parameters that would - // otherwise need to be tunneled down through all the shader object binding - // logic. - // - BindingContext context = {}; - context.encoder = this; - context.submitter = submitter; - context.device = m_renderer; - context.transientHeap = m_transientHeap; - context.outOfMemoryHeap = (D3D12_DESCRIPTOR_HEAP_TYPE)(-1); - // We kick off binding of shader objects at the root object, and the objects - // themselves will be responsible for allocating, binding, and filling in - // any descriptor tables or other root parameters needed. - // - m_commandBuffer->bindDescriptorHeaps(); - if (rootObjectImpl->bindAsRoot(&context, rootLayoutImpl) == SLANG_E_OUT_OF_MEMORY) +D3D12_FILTER_TYPE translateFilterMode(TextureFilteringMode mode) +{ + switch (mode) { - if (!m_transientHeap->canResize()) - { - return SLANG_E_OUT_OF_MEMORY; - } + default: + return D3D12_FILTER_TYPE(0); - // If we run out of heap space while binding, allocate new descriptor heaps and try again. - ID3D12DescriptorHeap* d3dheap = nullptr; - m_commandBuffer->invalidateDescriptorHeapBinding(); - switch (context.outOfMemoryHeap) - { - case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV: - SLANG_RETURN_ON_FAIL(m_transientHeap->allocateNewViewDescriptorHeap(m_renderer)); - d3dheap = m_transientHeap->getCurrentViewHeap().getHeap(); - m_commandBuffer->bindDescriptorHeaps(); - break; - case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER: - SLANG_RETURN_ON_FAIL(m_transientHeap->allocateNewSamplerDescriptorHeap(m_renderer)); - d3dheap = m_transientHeap->getCurrentSamplerHeap().getHeap(); - m_commandBuffer->bindDescriptorHeaps(); - break; - default: - assert(!"shouldn't be here"); - return SLANG_FAIL; - } +#define CASE(SRC, DST) \ + case TextureFilteringMode::SRC: \ + return D3D12_FILTER_TYPE_##DST - // Try again. - SLANG_RETURN_ON_FAIL(rootObjectImpl->bindAsRoot(&context, rootLayoutImpl)); + CASE(Point, POINT); + CASE(Linear, LINEAR); + +#undef CASE } - - return SLANG_OK; } -Result D3D12Device::createTransientResourceHeapImpl( - ITransientResourceHeap::Flags::Enum flags, - size_t constantBufferSize, - uint32_t viewDescriptors, - uint32_t samplerDescriptors, - TransientResourceHeapImpl** outHeap) +D3D12_FILTER_REDUCTION_TYPE translateFilterReduction(TextureReductionOp op) { - RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl(); - ITransientResourceHeap::Desc desc = {}; - desc.flags = flags; - desc.samplerDescriptorCount = samplerDescriptors; - desc.constantBufferSize = constantBufferSize; - desc.constantBufferDescriptorCount = viewDescriptors; - desc.accelerationStructureDescriptorCount = viewDescriptors; - desc.srvDescriptorCount = viewDescriptors; - desc.uavDescriptorCount = viewDescriptors; - SLANG_RETURN_ON_FAIL(result->init(desc, this, viewDescriptors, samplerDescriptors)); - returnRefPtrMove(outHeap, result); - return SLANG_OK; -} + switch (op) + { + default: + return D3D12_FILTER_REDUCTION_TYPE(0); -Result D3D12Device::createCommandQueueImpl(D3D12Device::CommandQueueImpl** outQueue) -{ - int queueIndex = m_queueIndexAllocator.alloc(1); - // If we run out of queue index space, then the user is requesting too many queues. - if (queueIndex == -1) - return SLANG_FAIL; +#define CASE(SRC, DST) \ + case TextureReductionOp::SRC: \ + return D3D12_FILTER_REDUCTION_TYPE_##DST - RefPtr<D3D12Device::CommandQueueImpl> queue = new D3D12Device::CommandQueueImpl(); - SLANG_RETURN_ON_FAIL(queue->init(this, (uint32_t)queueIndex)); - returnRefPtrMove(outQueue, queue); - return SLANG_OK; + CASE(Average, STANDARD); + CASE(Comparison, COMPARISON); + CASE(Minimum, MINIMUM); + CASE(Maximum, MAXIMUM); + +#undef CASE + } } -SlangResult SLANG_MCALL createD3D12Device(const IDevice::Desc* desc, IDevice** outDevice) +D3D12_TEXTURE_ADDRESS_MODE translateAddressingMode(TextureAddressingMode mode) { - RefPtr<D3D12Device> result = new D3D12Device(); - SLANG_RETURN_ON_FAIL(result->initialize(*desc)); - returnComPtr(outDevice, result); - return SLANG_OK; + switch (mode) + { + default: + return D3D12_TEXTURE_ADDRESS_MODE(0); + +#define CASE(SRC, DST) \ + case TextureAddressingMode::SRC: \ + return D3D12_TEXTURE_ADDRESS_MODE_##DST + + CASE(Wrap, WRAP); + CASE(ClampToEdge, CLAMP); + CASE(ClampToBorder, BORDER); + CASE(MirrorRepeat, MIRROR); + CASE(MirrorOnce, MIRROR_ONCE); + +#undef CASE + } } -/* static */PROC D3D12Device::loadProc(HMODULE module, char const* name) +D3D12_COMPARISON_FUNC translateComparisonFunc(ComparisonFunc func) { - PROC proc = ::GetProcAddress(module, name); - if (!proc) + switch (func) { - fprintf(stderr, "error: failed load symbol '%s'\n", name); - return nullptr; + default: + // TODO: need to report failures + return D3D12_COMPARISON_FUNC_ALWAYS; + +#define CASE(FROM, TO) \ + case ComparisonFunc::FROM: \ + return D3D12_COMPARISON_FUNC_##TO + + CASE(Never, NEVER); + CASE(Less, LESS); + CASE(Equal, EQUAL); + CASE(LessEqual, LESS_EQUAL); + CASE(Greater, GREATER); + CASE(NotEqual, NOT_EQUAL); + CASE(GreaterEqual, GREATER_EQUAL); + CASE(Always, ALWAYS); +#undef CASE } - return proc; } -D3D12Device::~D3D12Device() { m_shaderObjectLayoutCache = decltype(m_shaderObjectLayoutCache)(); } +uint32_t getViewDescriptorCount(const ITransientResourceHeap::Desc& desc) +{ + return Math::Max( + Math::Max( + desc.srvDescriptorCount, + desc.uavDescriptorCount, + desc.accelerationStructureDescriptorCount), + desc.constantBufferDescriptorCount, + 2048u); +} -static void _initSrvDesc( +void initSrvDesc( IResource::Type resourceType, const ITextureResource::Desc& textureDesc, const D3D12_RESOURCE_DESC& desc, @@ -5691,7 +219,9 @@ static void _initSrvDesc( // create SRV descOut = D3D12_SHADER_RESOURCE_VIEW_DESC(); - descOut.Format = (pixelFormat == DXGI_FORMAT_UNKNOWN) ? D3DUtil::calcFormat(D3DUtil::USAGE_SRV, desc.Format) : pixelFormat; + descOut.Format = (pixelFormat == DXGI_FORMAT_UNKNOWN) + ? D3DUtil::calcFormat(D3DUtil::USAGE_SRV, desc.Format) + : pixelFormat; descOut.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; if (desc.DepthOrArraySize == 1) { @@ -5724,7 +254,6 @@ static void _initSrvDesc( default: assert(!"Unknown dimension"); } - } else if (resourceType == IResource::Type::TextureCube) { @@ -5777,8 +306,9 @@ static void _initSrvDesc( break; case D3D12_RESOURCE_DIMENSION_TEXTURE2D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; - descOut.Texture2DArray.ArraySize = - subresourceRange.layerCount == 0 ? desc.DepthOrArraySize : subresourceRange.layerCount; + descOut.Texture2DArray.ArraySize = subresourceRange.layerCount == 0 + ? desc.DepthOrArraySize + : subresourceRange.layerCount; descOut.Texture2DArray.FirstArraySlice = subresourceRange.baseArrayLayer; descOut.Texture2DArray.PlaneSlice = D3DUtil::getPlaneSlice(descOut.Format, subresourceRange.aspectMask); @@ -5792,8 +322,8 @@ static void _initSrvDesc( descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; descOut.Texture3D.MostDetailedMip = subresourceRange.mipLevel; descOut.Texture3D.MipLevels = subresourceRange.mipLevelCount == 0 - ? desc.MipLevels - : subresourceRange.mipLevelCount; + ? desc.MipLevels + : subresourceRange.mipLevelCount; break; default: @@ -5802,91 +332,316 @@ static void _initSrvDesc( } } -Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, bool isShared, MemoryType memoryType) +Result initTextureResourceDesc( + D3D12_RESOURCE_DESC& resourceDesc, const ITextureResource::Desc& srcDesc) +{ + const DXGI_FORMAT pixelFormat = D3DUtil::getMapFormat(srcDesc.format); + if (pixelFormat == DXGI_FORMAT_UNKNOWN) + { + return SLANG_FAIL; + } + + const int arraySize = calcEffectiveArraySize(srcDesc); + + const D3D12_RESOURCE_DIMENSION dimension = calcResourceDimension(srcDesc.type); + if (dimension == D3D12_RESOURCE_DIMENSION_UNKNOWN) + { + return SLANG_FAIL; + } + + const int numMipMaps = srcDesc.numMipLevels; + resourceDesc.Dimension = dimension; + resourceDesc.Format = pixelFormat; + resourceDesc.Width = srcDesc.size.width; + resourceDesc.Height = srcDesc.size.height; + resourceDesc.DepthOrArraySize = (srcDesc.size.depth > 1) ? srcDesc.size.depth : arraySize; + + resourceDesc.MipLevels = numMipMaps; + resourceDesc.SampleDesc.Count = srcDesc.sampleDesc.numSamples; + resourceDesc.SampleDesc.Quality = srcDesc.sampleDesc.quality; + + resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + + resourceDesc.Flags |= calcResourceFlags(srcDesc.allowedStates); + + resourceDesc.Alignment = 0; + + if (isDepthFormat(srcDesc.format) && + (srcDesc.allowedStates.contains(ResourceState::ShaderResource) || + srcDesc.allowedStates.contains(ResourceState::UnorderedAccess))) + { + resourceDesc.Format = getTypelessFormatFromDepthFormat(srcDesc.format); + } + + return SLANG_OK; +} + +void initBufferResourceDesc(size_t bufferSize, D3D12_RESOURCE_DESC& out) +{ + out = {}; + + out.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + out.Alignment = 0; + out.Width = bufferSize; + out.Height = 1; + out.DepthOrArraySize = 1; + out.MipLevels = 1; + out.Format = DXGI_FORMAT_UNKNOWN; + out.SampleDesc.Count = 1; + out.SampleDesc.Quality = 0; + out.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + out.Flags = D3D12_RESOURCE_FLAG_NONE; +} + +Result uploadBufferDataImpl( + ID3D12Device* device, + ID3D12GraphicsCommandList* cmdList, + TransientResourceHeapImpl* transientHeap, + BufferResourceImpl* buffer, + size_t offset, + size_t size, + void* data) +{ + IBufferResource* uploadResource; + size_t uploadResourceOffset = 0; + if (buffer->getDesc()->memoryType != MemoryType::Upload) + { + SLANG_RETURN_ON_FAIL(transientHeap->allocateStagingBuffer( + size, uploadResource, uploadResourceOffset, MemoryType::Upload)); + } + + D3D12Resource& uploadResourceRef = + (buffer->getDesc()->memoryType == MemoryType::Upload) + ? buffer->m_resource + : static_cast<BufferResourceImpl*>(uploadResource)->m_resource; + + D3D12_RANGE readRange = {}; + readRange.Begin = 0; + readRange.End = 0; + void* uploadData; + SLANG_RETURN_ON_FAIL( + uploadResourceRef.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&uploadData))); + memcpy((uint8_t*)uploadData + uploadResourceOffset + offset, data, size); + D3D12_RANGE writtenRange = {}; + writtenRange.Begin = uploadResourceOffset + offset; + writtenRange.End = uploadResourceOffset + offset + size; + uploadResourceRef.getResource()->Unmap(0, &writtenRange); + + if (buffer->getDesc()->memoryType != MemoryType::Upload) + { + cmdList->CopyBufferRegion( + buffer->m_resource.getResource(), + offset, + uploadResourceRef.getResource(), + uploadResourceOffset + offset, + size); + } + + return SLANG_OK; +} + +Result createNullDescriptor( + ID3D12Device* d3dDevice, + D3D12_CPU_DESCRIPTOR_HANDLE destDescriptor, + const ShaderObjectLayoutImpl::BindingRangeInfo& bindingRange) +{ + switch (bindingRange.bindingType) + { + case slang::BindingType::ConstantBuffer: + { + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = 0; + cbvDesc.SizeInBytes = 0; + d3dDevice->CreateConstantBufferView(&cbvDesc, destDescriptor); + } + break; + case slang::BindingType::MutableRawBuffer: + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; + d3dDevice->CreateUnorderedAccessView(nullptr, nullptr, &uavDesc, destDescriptor); + } + break; + case slang::BindingType::MutableTypedBuffer: + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + d3dDevice->CreateUnorderedAccessView(nullptr, nullptr, &uavDesc, destDescriptor); + } + break; + case slang::BindingType::RawBuffer: + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); + } + break; + case slang::BindingType::TypedBuffer: + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); + } + break; + case slang::BindingType::Texture: + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + switch (bindingRange.resourceShape) + { + case SLANG_TEXTURE_1D: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + break; + case SLANG_TEXTURE_1D_ARRAY: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + break; + case SLANG_TEXTURE_2D: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + break; + case SLANG_TEXTURE_2D_ARRAY: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + break; + case SLANG_TEXTURE_3D: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + break; + case SLANG_TEXTURE_CUBE: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + break; + case SLANG_TEXTURE_CUBE_ARRAY: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + break; + case SLANG_TEXTURE_2D_MULTISAMPLE: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; + break; + case SLANG_TEXTURE_2D_MULTISAMPLE_ARRAY: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + break; + default: + return SLANG_OK; + } + d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); + } + break; + default: + break; + } + return SLANG_OK; +} +} // namespace + +Result DeviceImpl::createBuffer( + const D3D12_RESOURCE_DESC& resourceDesc, + const void* srcData, + size_t srcDataSize, + D3D12_RESOURCE_STATES finalState, + D3D12Resource& resourceOut, + bool isShared, + MemoryType memoryType) { - const size_t bufferSize = size_t(resourceDesc.Width); + const size_t bufferSize = size_t(resourceDesc.Width); - D3D12_HEAP_PROPERTIES heapProps; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + D3D12_HEAP_PROPERTIES heapProps; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE; - if (isShared) flags |= D3D12_HEAP_FLAG_SHARED; + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE; + if (isShared) + flags |= D3D12_HEAP_FLAG_SHARED; - D3D12_RESOURCE_DESC desc = resourceDesc; + D3D12_RESOURCE_DESC desc = resourceDesc; - D3D12_RESOURCE_STATES initialState = finalState; + D3D12_RESOURCE_STATES initialState = finalState; - switch (memoryType) - { - case MemoryType::ReadBack: - assert(!srcData); + switch (memoryType) + { + case MemoryType::ReadBack: + assert(!srcData); - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - desc.Flags = D3D12_RESOURCE_FLAG_NONE; - initialState |= D3D12_RESOURCE_STATE_COPY_DEST; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + initialState |= D3D12_RESOURCE_STATE_COPY_DEST; - break; - case MemoryType::Upload: + break; + case MemoryType::Upload: - heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; - desc.Flags = D3D12_RESOURCE_FLAG_NONE; - initialState |= D3D12_RESOURCE_STATE_GENERIC_READ; + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + initialState |= D3D12_RESOURCE_STATE_GENERIC_READ; - break; - case MemoryType::DeviceLocal: - heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; - initialState = (srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState); - break; - default: - return SLANG_FAIL; - } + break; + case MemoryType::DeviceLocal: + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + initialState = (srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState); + break; + default: + return SLANG_FAIL; + } - // Create the resource. - SLANG_RETURN_ON_FAIL(resourceOut.initCommitted(m_device, heapProps, flags, desc, initialState, nullptr)); + // Create the resource. + SLANG_RETURN_ON_FAIL( + resourceOut.initCommitted(m_device, heapProps, flags, desc, initialState, nullptr)); - if (srcData) - { - D3D12Resource uploadResource; + if (srcData) + { + D3D12Resource uploadResource; - if (memoryType == MemoryType::DeviceLocal) - { - // If the buffer is on the default heap, create upload buffer. - D3D12_RESOURCE_DESC uploadDesc(resourceDesc); - uploadDesc.Flags = D3D12_RESOURCE_FLAG_NONE; - heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + if (memoryType == MemoryType::DeviceLocal) + { + // If the buffer is on the default heap, create upload buffer. + D3D12_RESOURCE_DESC uploadDesc(resourceDesc); + uploadDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; - SLANG_RETURN_ON_FAIL(uploadResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); - } + SLANG_RETURN_ON_FAIL(uploadResource.initCommitted( + m_device, + heapProps, + D3D12_HEAP_FLAG_NONE, + uploadDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr)); + } - // Be careful not to actually copy a resource here. - D3D12Resource& uploadResourceRef = (memoryType == MemoryType::DeviceLocal) ? uploadResource : resourceOut; + // Be careful not to actually copy a resource here. + D3D12Resource& uploadResourceRef = + (memoryType == MemoryType::DeviceLocal) ? uploadResource : resourceOut; - // Copy data to the intermediate upload heap and then schedule a copy - // from the upload heap to the vertex buffer. - UINT8* dstData; - D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU. + // Copy data to the intermediate upload heap and then schedule a copy + // from the upload heap to the vertex buffer. + UINT8* dstData; + D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU. - ID3D12Resource* dxUploadResource = uploadResourceRef.getResource(); + ID3D12Resource* dxUploadResource = uploadResourceRef.getResource(); - SLANG_RETURN_ON_FAIL(dxUploadResource->Map(0, &readRange, reinterpret_cast<void**>(&dstData))); - ::memcpy(dstData, srcData, srcDataSize); - dxUploadResource->Unmap(0, nullptr); + SLANG_RETURN_ON_FAIL( + dxUploadResource->Map(0, &readRange, reinterpret_cast<void**>(&dstData))); + ::memcpy(dstData, srcData, srcDataSize); + dxUploadResource->Unmap(0, nullptr); - if (memoryType == MemoryType::DeviceLocal) - { - auto encodeInfo = encodeResourceCommands(); - encodeInfo.d3dCommandList->CopyBufferRegion(resourceOut, 0, uploadResourceRef, 0, bufferSize); - submitResourceCommandsAndWait(encodeInfo); - } - } + if (memoryType == MemoryType::DeviceLocal) + { + auto encodeInfo = encodeResourceCommands(); + encodeInfo.d3dCommandList->CopyBufferRegion( + resourceOut, 0, uploadResourceRef, 0, bufferSize); + submitResourceCommandsAndWait(encodeInfo); + } + } return SLANG_OK; } -Result D3D12Device::captureTextureToSurface( +Result DeviceImpl::captureTextureToSurface( TextureResourceImpl* resourceImpl, ResourceState state, ISlangBlob** outBlob, @@ -5918,11 +673,11 @@ Result D3D12Device::captureTextureToSurface( *outRowPitch = rowPitch; if (outPixelSize) *outPixelSize = bytesPerPixel; - + D3D12Resource stagingResource; { D3D12_RESOURCE_DESC stagingDesc; - _initBufferResourceDesc(bufferSize, stagingDesc); + initBufferResourceDesc(bufferSize, stagingDesc); D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_READBACK; @@ -5931,7 +686,13 @@ Result D3D12Device::captureTextureToSurface( heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; - SLANG_RETURN_ON_FAIL(stagingResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); + SLANG_RETURN_ON_FAIL(stagingResource.initCommitted( + m_device, + heapProps, + D3D12_HEAP_FLAG_NONE, + stagingDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr)); } auto encodeInfo = encodeResourceCommands(); @@ -5987,16 +748,18 @@ Result D3D12Device::captureTextureToSurface( } } -// !!!!!!!!!!!!!!!!!!!!!!!!!!!! Renderer interface !!!!!!!!!!!!!!!!!!!!!!!!!! - -Result D3D12Device::getNativeDeviceHandles(InteropHandles* outHandles) +Result DeviceImpl::getNativeDeviceHandles(InteropHandles* outHandles) { outHandles->handles[0].handleValue = (uint64_t)m_device; outHandles->handles[0].api = InteropHandleAPI::D3D12; return SLANG_OK; } -Result D3D12Device::_createDevice(DeviceCheckFlags deviceCheckFlags, const UnownedStringSlice& nameMatch, D3D_FEATURE_LEVEL featureLevel, DeviceInfo& outDeviceInfo) +Result DeviceImpl::_createDevice( + DeviceCheckFlags deviceCheckFlags, + const UnownedStringSlice& nameMatch, + D3D_FEATURE_LEVEL featureLevel, + D3D12DeviceInfo& outDeviceInfo) { if (m_dxDebug && (deviceCheckFlags & DeviceCheckFlag::UseDebug)) { @@ -6009,7 +772,8 @@ Result D3D12Device::_createDevice(DeviceCheckFlags deviceCheckFlags, const Unown SLANG_RETURN_ON_FAIL(D3DUtil::createFactory(deviceCheckFlags, dxgiFactory)); List<ComPtr<IDXGIAdapter>> dxgiAdapters; - SLANG_RETURN_ON_FAIL(D3DUtil::findAdapters(deviceCheckFlags, nameMatch, dxgiFactory, dxgiAdapters)); + SLANG_RETURN_ON_FAIL( + D3DUtil::findAdapters(deviceCheckFlags, nameMatch, dxgiFactory, dxgiAdapters)); ComPtr<ID3D12Device> device; ComPtr<IDXGIAdapter> adapter; @@ -6017,7 +781,8 @@ Result D3D12Device::_createDevice(DeviceCheckFlags deviceCheckFlags, const Unown for (Index i = 0; i < dxgiAdapters.getCount(); ++i) { IDXGIAdapter* dxgiAdapter = dxgiAdapters[i]; - if (SLANG_SUCCEEDED(m_D3D12CreateDevice(dxgiAdapter, featureLevel, IID_PPV_ARGS(device.writeRef())))) + if (SLANG_SUCCEEDED( + m_D3D12CreateDevice(dxgiAdapter, featureLevel, IID_PPV_ARGS(device.writeRef())))) { adapter = dxgiAdapter; break; @@ -6034,7 +799,7 @@ Result D3D12Device::_createDevice(DeviceCheckFlags deviceCheckFlags, const Unown ComPtr<ID3D12InfoQueue> infoQueue; if (SLANG_SUCCEEDED(device->QueryInterface(infoQueue.writeRef()))) { - // Make break + // Make break infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true); if (m_extendedDesc.debugBreakOnD3D12Error) { @@ -6049,26 +814,28 @@ Result D3D12Device::_createDevice(DeviceCheckFlags deviceCheckFlags, const Unown f.DenyList.pIDList = hideMessages; infoQueue->AddStorageFilterEntries(&f); - // Apparently there is a problem with sm 6.3 with spurious errors, with debug layer enabled + // Apparently there is a problem with sm 6.3 with spurious errors, with debug layer + // enabled D3D12_FEATURE_DATA_SHADER_MODEL featureShaderModel; featureShaderModel.HighestShaderModel = D3D_SHADER_MODEL(0x63); - SLANG_SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &featureShaderModel, sizeof(featureShaderModel))); + SLANG_SUCCEEDED(device->CheckFeatureSupport( + D3D12_FEATURE_SHADER_MODEL, &featureShaderModel, sizeof(featureShaderModel))); if (featureShaderModel.HighestShaderModel >= D3D_SHADER_MODEL(0x63)) { // Filter out any messages that cause issues // TODO: Remove this when the debug layers work properly - D3D12_MESSAGE_ID messageIds[] = - { - // When the debug layer is enabled this error is triggered sometimes after a CopyDescriptorsSimple - // call The failed check validates that the source and destination ranges of the copy do not - // overlap. The check assumes descriptor handles are pointers to memory, but this is not always the - // case and the check fails (even though everything is okay). + D3D12_MESSAGE_ID messageIds[] = { + // When the debug layer is enabled this error is triggered sometimes after a + // CopyDescriptorsSimple call The failed check validates that the source and + // destination ranges of the copy do not overlap. The check assumes descriptor + // handles are pointers to memory, but this is not always the case and the check + // fails (even though everything is okay). D3D12_MESSAGE_ID_COPY_DESCRIPTORS_INVALID_RANGES, }; // We filter INFO messages because they are way too many - D3D12_MESSAGE_SEVERITY severities[] = { D3D12_MESSAGE_SEVERITY_INFO }; + D3D12_MESSAGE_SEVERITY severities[] = {D3D12_MESSAGE_SEVERITY_INFO}; D3D12_INFO_QUEUE_FILTER infoQueueFilter = {}; infoQueueFilter.DenyList.NumSeverities = SLANG_COUNT_OF(severities); @@ -6099,26 +866,15 @@ Result D3D12Device::_createDevice(DeviceCheckFlags deviceCheckFlags, const Unown outDeviceInfo.m_adapter = adapter; outDeviceInfo.m_isWarp = D3DUtil::isWarp(dxgiFactory, adapter); const UINT kMicrosoftVendorId = 5140; - outDeviceInfo.m_isSoftware = outDeviceInfo.m_isWarp || ((outDeviceInfo.m_desc1.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) != 0) - || outDeviceInfo.m_desc.VendorId == kMicrosoftVendorId; + outDeviceInfo.m_isSoftware = + outDeviceInfo.m_isWarp || + ((outDeviceInfo.m_desc1.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) != 0) || + outDeviceInfo.m_desc.VendorId == kMicrosoftVendorId; return SLANG_OK; } -static bool _isSupportedNVAPIOp(ID3D12Device* dev, uint32_t op) -{ -#ifdef GFX_NVAPI - { - bool isSupported; - NvAPI_Status status = NvAPI_D3D12_IsNvShaderExtnOpCodeSupported(dev, NvU32(op), &isSupported); - return status == NVAPI_OK && isSupported; - } -#else - return false; -#endif -} - -Result D3D12Device::initialize(const Desc& desc) +Result DeviceImpl::initialize(const Desc& desc) { SLANG_RETURN_ON_FAIL(RendererBase::initialize(desc)); @@ -6157,7 +913,8 @@ Result D3D12Device::initialize(const Desc& desc) } // Get all the dll entry points - m_D3D12SerializeRootSignature = (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)loadProc(d3dModule, "D3D12SerializeRootSignature"); + m_D3D12SerializeRootSignature = + (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)loadProc(d3dModule, "D3D12SerializeRootSignature"); if (!m_D3D12SerializeRootSignature) { return SLANG_FAIL; @@ -6173,12 +930,13 @@ Result D3D12Device::initialize(const Desc& desc) } #if ENABLE_DEBUG_LAYER - m_D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)loadProc(d3dModule, "D3D12GetDebugInterface"); + m_D3D12GetDebugInterface = + (PFN_D3D12_GET_DEBUG_INTERFACE)loadProc(d3dModule, "D3D12GetDebugInterface"); if (m_D3D12GetDebugInterface) { if (SLANG_SUCCEEDED(m_D3D12GetDebugInterface(IID_PPV_ARGS(m_dxDebug.writeRef())))) { -#if 0 +# if 0 // Can enable for extra validation. NOTE! That d3d12 warns if you do.... // D3D12 MESSAGE : Device Debug Layer Startup Options : GPU - Based Validation is enabled(disabled by default). // This results in new validation not possible during API calls on the CPU, by creating patched shaders that have validation @@ -6191,7 +949,7 @@ Result D3D12Device::initialize(const Desc& desc) { debug1->SetEnableGPUBasedValidation(true); } -#endif +# endif } } #endif @@ -6209,18 +967,25 @@ Result D3D12Device::initialize(const Desc& desc) // to override UseDebug of default rather than leave it // up to each back-end to specify. #if ENABLE_DEBUG_LAYER - combiner.add(DeviceCheckFlag::UseDebug, ChangeType::OnOff); ///< First try debug then non debug + combiner.add( + DeviceCheckFlag::UseDebug, ChangeType::OnOff); ///< First try debug then non debug #else - combiner.add(DeviceCheckFlag::UseDebug, ChangeType::Off); ///< Don't bother with debug + combiner.add(DeviceCheckFlag::UseDebug, ChangeType::Off); ///< Don't bother with debug #endif - combiner.add(DeviceCheckFlag::UseHardwareDevice, ChangeType::OnOff); ///< First try hardware, then reference + combiner.add( + DeviceCheckFlag::UseHardwareDevice, + ChangeType::OnOff); ///< First try hardware, then reference const D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0; const int numCombinations = combiner.getNumCombinations(); for (int i = 0; i < numCombinations; ++i) { - if (SLANG_SUCCEEDED(_createDevice(combiner.getCombination(i), UnownedStringSlice(desc.adapter), featureLevel, m_deviceInfo))) + if (SLANG_SUCCEEDED(_createDevice( + combiner.getCombination(i), + UnownedStringSlice(desc.adapter), + featureLevel, + m_deviceInfo))) { break; } @@ -6262,25 +1027,25 @@ Result D3D12Device::initialize(const Desc& desc) // From DOCS: Applications are expected to bind null UAV to this slot. // NOTE! We don't currently do this, but doesn't seem to be a problem. - const NvAPI_Status status = NvAPI_D3D12_SetNvShaderExtnSlotSpace(m_device, NvU32(desc.nvapiExtnSlot), NvU32(0)); - + const NvAPI_Status status = + NvAPI_D3D12_SetNvShaderExtnSlotSpace(m_device, NvU32(desc.nvapiExtnSlot), NvU32(0)); + if (status != NVAPI_OK) { return SLANG_E_NOT_AVAILABLE; } - if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC)) + if (isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC)) { m_features.add("atomic-int64"); } - if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC)) + if (isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC)) { m_features.add("atomic-float"); } m_nvapi = true; #endif - } D3D12_FEATURE_DATA_SHADER_MODEL shaderModelData = {}; @@ -6293,9 +1058,9 @@ Result D3D12Device::initialize(const Desc& desc) { // TODO: Currently warp causes a crash when using half, so disable for now - if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shaderModelData, sizeof(shaderModelData))) && - m_deviceInfo.m_isWarp == false && - shaderModelData.HighestShaderModel >= 0x62) + if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport( + D3D12_FEATURE_SHADER_MODEL, &shaderModelData, sizeof(shaderModelData))) && + m_deviceInfo.m_isWarp == false && shaderModelData.HighestShaderModel >= 0x62) { // With sm_6_2 we have half m_features.add("half"); @@ -6303,7 +1068,8 @@ Result D3D12Device::initialize(const Desc& desc) } { D3D12_FEATURE_DATA_D3D12_OPTIONS options; - if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)))) + if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)))) { // Check double precision support if (options.DoublePrecisionFloatShaderOps) @@ -6394,9 +1160,14 @@ Result D3D12Device::initialize(const Desc& desc) // Retrieve timestamp frequency. m_resourceCommandQueue->m_d3dQueue->GetTimestampFrequency(&m_info.timestampFrequency); - SLANG_RETURN_ON_FAIL(createTransientResourceHeapImpl(ITransientResourceHeap::Flags::AllowResizing, 0, 8, 4, m_resourceCommandTransientHeap.writeRef())); - // `TransientResourceHeap` holds a back reference to `D3D12Device`, make it a weak reference here - // since this object is already owned by `D3D12Device`. + SLANG_RETURN_ON_FAIL(createTransientResourceHeapImpl( + ITransientResourceHeap::Flags::AllowResizing, + 0, + 8, + 4, + m_resourceCommandTransientHeap.writeRef())); + // `TransientResourceHeap` holds a back reference to `D3D12Device`, make it a weak reference + // here since this object is already owned by `D3D12Device`. m_resourceCommandTransientHeap->breakStrongReferenceToDevice(); m_cpuViewHeap = new D3D12GeneralExpandingDescriptorHeap(); @@ -6473,7 +1244,9 @@ Result D3D12Device::initialize(const Desc& desc) int userSpecifiedShaderModel = D3DUtil::getShaderModelFromProfileName(desc.slang.targetProfile); if (userSpecifiedShaderModel > shaderModelData.HighestShaderModel) { - getDebugCallback()->handleMessage(gfx::DebugMessageType::Error, gfx::DebugMessageSource::Layer, + getDebugCallback()->handleMessage( + gfx::DebugMessageType::Error, + gfx::DebugMessageSource::Layer, "The requested shader model is not supported by the system."); return SLANG_E_NOT_AVAILABLE; } @@ -6495,7 +1268,8 @@ Result D3D12Device::initialize(const Desc& desc) desc.pArgumentDescs = &args; desc.NodeMask = 0; - SLANG_RETURN_ON_FAIL(m_device->CreateCommandSignature(&desc, nullptr, IID_PPV_ARGS(drawIndirectCmdSignature.writeRef()))); + SLANG_RETURN_ON_FAIL(m_device->CreateCommandSignature( + &desc, nullptr, IID_PPV_ARGS(drawIndirectCmdSignature.writeRef()))); } // Allocate a D3D12 "command signature" object that matches the behavior @@ -6510,7 +1284,8 @@ Result D3D12Device::initialize(const Desc& desc) desc.pArgumentDescs = &args; desc.NodeMask = 0; - SLANG_RETURN_ON_FAIL(m_device->CreateCommandSignature(&desc, nullptr, IID_PPV_ARGS(drawIndexedIndirectCmdSignature.writeRef()))); + SLANG_RETURN_ON_FAIL(m_device->CreateCommandSignature( + &desc, nullptr, IID_PPV_ARGS(drawIndexedIndirectCmdSignature.writeRef()))); } // Allocate a D3D12 "command signature" object that matches the behavior @@ -6532,23 +1307,8 @@ Result D3D12Device::initialize(const Desc& desc) return SLANG_OK; } -namespace -{ - uint32_t getViewDescriptorCount(const ITransientResourceHeap::Desc& desc) - { - return Math::Max( - Math::Max( - desc.srvDescriptorCount, - desc.uavDescriptorCount, - desc.accelerationStructureDescriptorCount), - desc.constantBufferDescriptorCount, - 2048u); - } -} - -Result D3D12Device::createTransientResourceHeap( - const ITransientResourceHeap::Desc& desc, - ITransientResourceHeap** outHeap) +Result DeviceImpl::createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, ITransientResourceHeap** outHeap) { RefPtr<TransientResourceHeapImpl> heap; SLANG_RETURN_ON_FAIL(createTransientResourceHeapImpl( @@ -6561,7 +1321,7 @@ Result D3D12Device::createTransientResourceHeap( return SLANG_OK; } -Result D3D12Device::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) +Result DeviceImpl::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) { RefPtr<CommandQueueImpl> queue; SLANG_RETURN_ON_FAIL(createCommandQueueImpl(queue.writeRef())); @@ -6569,7 +1329,7 @@ Result D3D12Device::createCommandQueue(const ICommandQueue::Desc& desc, ICommand return SLANG_OK; } -SLANG_NO_THROW Result SLANG_MCALL D3D12Device::createSwapchain( +Result DeviceImpl::createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) { RefPtr<SwapchainImpl> swapchain = new SwapchainImpl(); @@ -6578,7 +1338,7 @@ SLANG_NO_THROW Result SLANG_MCALL D3D12Device::createSwapchain( return SLANG_OK; } -SlangResult D3D12Device::readTextureResource( +SlangResult DeviceImpl::readTextureResource( ITextureResource* resource, ResourceState state, ISlangBlob** outBlob, @@ -6586,146 +1346,31 @@ SlangResult D3D12Device::readTextureResource( size_t* outPixelSize) { return captureTextureToSurface( - static_cast<TextureResourceImpl*>(resource), - state, - outBlob, - outRowPitch, - outPixelSize); -} - -static D3D12_RESOURCE_FLAGS _calcResourceFlag(ResourceState state) -{ - switch (state) - { - case ResourceState::RenderTarget: - return D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - case ResourceState::DepthRead: - case ResourceState::DepthWrite: - return D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; - case ResourceState::UnorderedAccess: - case ResourceState::AccelerationStructure: - return D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - default: - return D3D12_RESOURCE_FLAG_NONE; - } + static_cast<TextureResourceImpl*>(resource), state, outBlob, outRowPitch, outPixelSize); } -static D3D12_RESOURCE_FLAGS _calcResourceFlags(ResourceStateSet states) -{ - int dstFlags = 0; - for (uint32_t i = 0; i < (uint32_t)ResourceState::_Count; i++) - { - auto state = (ResourceState)i; - if (states.contains(state)) - dstFlags |= _calcResourceFlag(state); - } - return (D3D12_RESOURCE_FLAGS)dstFlags; -} - -static D3D12_RESOURCE_DIMENSION _calcResourceDimension(IResource::Type type) -{ - switch (type) - { - case IResource::Type::Buffer: return D3D12_RESOURCE_DIMENSION_BUFFER; - case IResource::Type::Texture1D: return D3D12_RESOURCE_DIMENSION_TEXTURE1D; - case IResource::Type::TextureCube: - case IResource::Type::Texture2D: - { - return D3D12_RESOURCE_DIMENSION_TEXTURE2D; - } - case IResource::Type::Texture3D: return D3D12_RESOURCE_DIMENSION_TEXTURE3D; - default: return D3D12_RESOURCE_DIMENSION_UNKNOWN; - } -} - -DXGI_FORMAT getTypelessFormatFromDepthFormat(Format format) -{ - switch (format) - { - case Format::D16_UNORM: - return DXGI_FORMAT_R16_TYPELESS; - case Format::D32_FLOAT: - return DXGI_FORMAT_R32_TYPELESS; - default: - return D3DUtil::getMapFormat(format); - } -} - -BOOL isTypelessDepthFormat(DXGI_FORMAT format) -{ - switch (format) - { - case DXGI_FORMAT_R16_TYPELESS: - case DXGI_FORMAT_R32_TYPELESS: - return true; - default: - return false; - } -} - -Result setupResourceDesc(D3D12_RESOURCE_DESC& resourceDesc, const ITextureResource::Desc& srcDesc) -{ - const DXGI_FORMAT pixelFormat = D3DUtil::getMapFormat(srcDesc.format); - if (pixelFormat == DXGI_FORMAT_UNKNOWN) - { - return SLANG_FAIL; - } - - const int arraySize = calcEffectiveArraySize(srcDesc); - - const D3D12_RESOURCE_DIMENSION dimension = _calcResourceDimension(srcDesc.type); - if (dimension == D3D12_RESOURCE_DIMENSION_UNKNOWN) - { - return SLANG_FAIL; - } - - const int numMipMaps = srcDesc.numMipLevels; - resourceDesc.Dimension = dimension; - resourceDesc.Format = pixelFormat; - resourceDesc.Width = srcDesc.size.width; - resourceDesc.Height = srcDesc.size.height; - resourceDesc.DepthOrArraySize = (srcDesc.size.depth > 1) ? srcDesc.size.depth : arraySize; - - resourceDesc.MipLevels = numMipMaps; - resourceDesc.SampleDesc.Count = srcDesc.sampleDesc.numSamples; - resourceDesc.SampleDesc.Quality = srcDesc.sampleDesc.quality; - - resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; - resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; - - resourceDesc.Flags |= _calcResourceFlags(srcDesc.allowedStates); - - resourceDesc.Alignment = 0; - - if (isDepthFormat(srcDesc.format) && - (srcDesc.allowedStates.contains(ResourceState::ShaderResource) || - srcDesc.allowedStates.contains(ResourceState::UnorderedAccess))) - { - resourceDesc.Format = getTypelessFormatFromDepthFormat(srcDesc.format); - } - - return SLANG_OK; -} - -Result D3D12Device::getTextureAllocationInfo( +Result DeviceImpl::getTextureAllocationInfo( const ITextureResource::Desc& desc, size_t* outSize, size_t* outAlignment) { TextureResource::Desc srcDesc = fixupTextureDesc(desc); D3D12_RESOURCE_DESC resourceDesc = {}; - setupResourceDesc(resourceDesc, srcDesc); + initTextureResourceDesc(resourceDesc, srcDesc); auto allocInfo = m_device->GetResourceAllocationInfo(0, 1, &resourceDesc); *outSize = (size_t)allocInfo.SizeInBytes; *outAlignment = (size_t)allocInfo.Alignment; return SLANG_OK; } -Result D3D12Device::getTextureRowAlignment(size_t* outAlignment) +Result DeviceImpl::getTextureRowAlignment(size_t* outAlignment) { *outAlignment = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; return SLANG_OK; } -Result D3D12Device::createTextureResource(const ITextureResource::Desc& descIn, const ITextureResource::SubresourceData* initData, ITextureResource** outResource) +Result DeviceImpl::createTextureResource( + const ITextureResource::Desc& descIn, + const ITextureResource::SubresourceData* initData, + ITextureResource** outResource) { // Description of uploading on Dx12 // https://msdn.microsoft.com/en-us/library/windows/desktop/dn899215%28v=vs.85%29.aspx @@ -6733,7 +1378,7 @@ Result D3D12Device::createTextureResource(const ITextureResource::Desc& descIn, TextureResource::Desc srcDesc = fixupTextureDesc(descIn); D3D12_RESOURCE_DESC resourceDesc = {}; - setupResourceDesc(resourceDesc, srcDesc); + initTextureResourceDesc(resourceDesc, srcDesc); const int arraySize = calcEffectiveArraySize(srcDesc); const int numMipMaps = srcDesc.numMipLevels; @@ -6750,7 +1395,8 @@ Result D3D12Device::createTextureResource(const ITextureResource::Desc& descIn, heapProps.VisibleNodeMask = 1; D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE; - if (descIn.isShared) flags |= D3D12_HEAP_FLAG_SHARED; + if (descIn.isShared) + flags |= D3D12_HEAP_FLAG_SHARED; D3D12_CLEAR_VALUE clearValue; D3D12_CLEAR_VALUE* clearValuePtr = &clearValue; @@ -6804,7 +1450,7 @@ Result D3D12Device::createTextureResource(const ITextureResource::Desc& descIn, { // Create the upload texture D3D12Resource uploadTexture; - + { D3D12_HEAP_PROPERTIES heapProps; @@ -6828,7 +1474,13 @@ Result D3D12Device::createTextureResource(const ITextureResource::Desc& descIn, uploadResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; uploadResourceDesc.Alignment = 0; - SLANG_RETURN_ON_FAIL(uploadTexture.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadResourceDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); + SLANG_RETURN_ON_FAIL(uploadTexture.initCommitted( + m_device, + heapProps, + D3D12_HEAP_FLAG_NONE, + uploadResourceDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr)); uploadTexture.setDebugName(L"TextureUpload"); } @@ -6855,19 +1507,21 @@ Result D3D12Device::createTextureResource(const ITextureResource::Desc& descIn, mipSize.height = int(D3DUtil::calcAligned(mipSize.height, 4)); } - assert(footprint.Width == mipSize.width && footprint.Height == mipSize.height && footprint.Depth == mipSize.depth); + assert( + footprint.Width == mipSize.width && footprint.Height == mipSize.height && + footprint.Depth == mipSize.depth); auto mipRowSize = mipRowSizeInBytes[j]; const ptrdiff_t dstMipRowPitch = ptrdiff_t(footprint.RowPitch); const ptrdiff_t srcMipRowPitch = ptrdiff_t(srcSubresource.strideY); - const ptrdiff_t dstMipLayerPitch = ptrdiff_t(footprint.RowPitch*footprint.Height); + const ptrdiff_t dstMipLayerPitch = ptrdiff_t(footprint.RowPitch * footprint.Height); const ptrdiff_t srcMipLayerPitch = ptrdiff_t(srcSubresource.strideZ); // Our outer loop will copy the depth layers one at a time. // - const uint8_t* srcLayer = (const uint8_t*) srcSubresource.data; + const uint8_t* srcLayer = (const uint8_t*)srcSubresource.data; uint8_t* dstLayer = p + layouts[j].Offset; for (int l = 0; l < mipSize.depth; l++) { @@ -6875,7 +1529,9 @@ Result D3D12Device::createTextureResource(const ITextureResource::Desc& descIn, // const uint8_t* srcRow = srcLayer; uint8_t* dstRow = dstLayer; - int j = gfxIsCompressedFormat(descIn.format) ? 4 : 1; // BC compressed formats are organized into 4x4 blocks + int j = gfxIsCompressedFormat(descIn.format) + ? 4 + : 1; // BC compressed formats are organized into 4x4 blocks for (int k = 0; k < mipSize.height; k += j) { ::memcpy(dstRow, srcRow, (size_t)mipRowSize); @@ -6888,7 +1544,7 @@ Result D3D12Device::createTextureResource(const ITextureResource::Desc& descIn, dstLayer += dstMipLayerPitch; } - //assert(srcRow == (const uint8_t*)(srcMip.getBuffer() + srcMip.getCount())); + // assert(srcRow == (const uint8_t*)(srcMip.getBuffer() + srcMip.getCount())); } uploadResource->Unmap(0, nullptr); @@ -6929,7 +1585,8 @@ Result D3D12Device::createTextureResource(const ITextureResource::Desc& descIn, return SLANG_OK; } -Result D3D12Device::createTextureFromNativeHandle(InteropHandle handle, const ITextureResource::Desc& srcDesc, ITextureResource** outResource) +Result DeviceImpl::createTextureFromNativeHandle( + InteropHandle handle, const ITextureResource::Desc& srcDesc, ITextureResource** outResource) { RefPtr<TextureResourceImpl> texture(new TextureResourceImpl(srcDesc)); @@ -6946,16 +1603,17 @@ Result D3D12Device::createTextureFromNativeHandle(InteropHandle handle, const IT return SLANG_OK; } -Result D3D12Device::createBufferResource(const IBufferResource::Desc& descIn, const void* initData, IBufferResource** outResource) +Result DeviceImpl::createBufferResource( + const IBufferResource::Desc& descIn, const void* initData, IBufferResource** outResource) { BufferResource::Desc srcDesc = fixupBufferDesc(descIn); RefPtr<BufferResourceImpl> buffer(new BufferResourceImpl(srcDesc)); D3D12_RESOURCE_DESC bufferDesc; - _initBufferResourceDesc(descIn.sizeInBytes, bufferDesc); + initBufferResourceDesc(descIn.sizeInBytes, bufferDesc); - bufferDesc.Flags |= _calcResourceFlags(srcDesc.allowedStates); + bufferDesc.Flags |= calcResourceFlags(srcDesc.allowedStates); const D3D12_RESOURCE_STATES initialState = buffer->m_defaultState; SLANG_RETURN_ON_FAIL(createBuffer( @@ -6971,7 +1629,8 @@ Result D3D12Device::createBufferResource(const IBufferResource::Desc& descIn, co return SLANG_OK; } -Result D3D12Device::createBufferFromNativeHandle(InteropHandle handle, const IBufferResource::Desc& srcDesc, IBufferResource** outResource) +Result DeviceImpl::createBufferFromNativeHandle( + InteropHandle handle, const IBufferResource::Desc& srcDesc, IBufferResource** outResource) { RefPtr<BufferResourceImpl> buffer(new BufferResourceImpl(srcDesc)); @@ -6988,86 +1647,7 @@ Result D3D12Device::createBufferFromNativeHandle(InteropHandle handle, const IBu return SLANG_OK; } -D3D12_FILTER_TYPE translateFilterMode(TextureFilteringMode mode) -{ - switch (mode) - { - default: - return D3D12_FILTER_TYPE(0); - -#define CASE(SRC, DST) \ - case TextureFilteringMode::SRC: return D3D12_FILTER_TYPE_##DST - - CASE(Point, POINT); - CASE(Linear, LINEAR); - -#undef CASE - } -} - -D3D12_FILTER_REDUCTION_TYPE translateFilterReduction(TextureReductionOp op) -{ - switch (op) - { - default: - return D3D12_FILTER_REDUCTION_TYPE(0); - -#define CASE(SRC, DST) \ - case TextureReductionOp::SRC: return D3D12_FILTER_REDUCTION_TYPE_##DST - - CASE(Average, STANDARD); - CASE(Comparison, COMPARISON); - CASE(Minimum, MINIMUM); - CASE(Maximum, MAXIMUM); - -#undef CASE - } -} - -D3D12_TEXTURE_ADDRESS_MODE translateAddressingMode(TextureAddressingMode mode) -{ - switch (mode) - { - default: - return D3D12_TEXTURE_ADDRESS_MODE(0); - -#define CASE(SRC, DST) \ - case TextureAddressingMode::SRC: return D3D12_TEXTURE_ADDRESS_MODE_##DST - - CASE(Wrap, WRAP); - CASE(ClampToEdge, CLAMP); - CASE(ClampToBorder, BORDER); - CASE(MirrorRepeat, MIRROR); - CASE(MirrorOnce, MIRROR_ONCE); - -#undef CASE - } -} - -static D3D12_COMPARISON_FUNC translateComparisonFunc(ComparisonFunc func) -{ - switch (func) - { - default: - // TODO: need to report failures - return D3D12_COMPARISON_FUNC_ALWAYS; - -#define CASE(FROM, TO) \ - case ComparisonFunc::FROM: return D3D12_COMPARISON_FUNC_##TO - - CASE(Never, NEVER); - CASE(Less, LESS); - CASE(Equal, EQUAL); - CASE(LessEqual, LESS_EQUAL); - CASE(Greater, GREATER); - CASE(NotEqual, NOT_EQUAL); - CASE(GreaterEqual, GREATER_EQUAL); - CASE(Always, ALWAYS); -#undef CASE - } -} - -Result D3D12Device::createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) +Result DeviceImpl::createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) { D3D12_FILTER_REDUCTION_TYPE dxReduction = translateFilterReduction(desc.reductionOp); D3D12_FILTER dxFilter; @@ -7114,15 +1694,16 @@ Result D3D12Device::createSamplerState(ISamplerState::Desc const& desc, ISampler return SLANG_OK; } -Result D3D12Device::createTextureView(ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) +Result DeviceImpl::createTextureView( + ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) { - auto resourceImpl = (TextureResourceImpl*) texture; + auto resourceImpl = (TextureResourceImpl*)texture; RefPtr<ResourceViewImpl> viewImpl = new ResourceViewImpl(); viewImpl->m_resource = resourceImpl; viewImpl->m_desc = desc; bool isArray = resourceImpl ? resourceImpl->getDesc()->arraySize != 0 : false; - bool isMultiSample = resourceImpl ? resourceImpl->getDesc()->sampleDesc.numSamples > 1: false; + bool isMultiSample = resourceImpl ? resourceImpl->getDesc()->sampleDesc.numSamples > 1 : false; switch (desc.type) { default: @@ -7138,8 +1719,8 @@ Result D3D12Device::createTextureView(ITextureResource* texture, IResourceView:: switch (desc.renderTarget.shape) { case IResource::Type::Texture1D: - rtvDesc.ViewDimension = isArray ? D3D12_RTV_DIMENSION_TEXTURE1DARRAY - : D3D12_RTV_DIMENSION_TEXTURE1D; + rtvDesc.ViewDimension = + isArray ? D3D12_RTV_DIMENSION_TEXTURE1DARRAY : D3D12_RTV_DIMENSION_TEXTURE1D; rtvDesc.Texture1D.MipSlice = desc.subresourceRange.mipLevel; break; case IResource::Type::Texture2D: @@ -7279,13 +1860,17 @@ Result D3D12Device::createTextureView(ITextureResource* texture, IResourceView:: SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&viewImpl->m_descriptor)); viewImpl->m_allocator = m_cpuViewHeap; - // Need to construct the D3D12_SHADER_RESOURCE_VIEW_DESC because otherwise TextureCube is not accessed - // appropriately (rather than just passing nullptr to CreateShaderResourceView) - const D3D12_RESOURCE_DESC resourceDesc = resourceImpl->m_resource.getResource()->GetDesc(); - const DXGI_FORMAT pixelFormat = desc.format == Format::Unknown ? resourceDesc.Format : D3DUtil::getMapFormat(desc.format); + // Need to construct the D3D12_SHADER_RESOURCE_VIEW_DESC because otherwise TextureCube + // is not accessed appropriately (rather than just passing nullptr to + // CreateShaderResourceView) + const D3D12_RESOURCE_DESC resourceDesc = + resourceImpl->m_resource.getResource()->GetDesc(); + const DXGI_FORMAT pixelFormat = desc.format == Format::Unknown + ? resourceDesc.Format + : D3DUtil::getMapFormat(desc.format); D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc; - _initSrvDesc( + initSrvDesc( resourceImpl->getType(), *resourceImpl->getDesc(), resourceDesc, @@ -7293,7 +1878,8 @@ Result D3D12Device::createTextureView(ITextureResource* texture, IResourceView:: desc.subresourceRange, srvDesc); - m_device->CreateShaderResourceView(resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle); + m_device->CreateShaderResourceView( + resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle); } break; } @@ -7302,12 +1888,12 @@ Result D3D12Device::createTextureView(ITextureResource* texture, IResourceView:: return SLANG_OK; } -Result D3D12Device::getFormatSupportedResourceStates(Format format, ResourceStateSet* outStates) +Result DeviceImpl::getFormatSupportedResourceStates(Format format, ResourceStateSet* outStates) { D3D12_FEATURE_DATA_FORMAT_SUPPORT support; support.Format = D3DUtil::getMapFormat(format); - SLANG_RETURN_ON_FAIL(m_device->CheckFeatureSupport( - D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))); + SLANG_RETURN_ON_FAIL( + m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))); ResourceStateSet allowedStates; @@ -7349,13 +1935,13 @@ Result D3D12Device::getFormatSupportedResourceStates(Format format, ResourceStat return SLANG_OK; } -Result D3D12Device::createBufferView( +Result DeviceImpl::createBufferView( IBufferResource* buffer, IBufferResource* counterBuffer, IResourceView::Desc const& desc, IResourceView** outView) { - auto resourceImpl = (BufferResourceImpl*) buffer; + auto resourceImpl = (BufferResourceImpl*)buffer; auto resourceDesc = *resourceImpl->getDesc(); RefPtr<ResourceViewImpl> viewImpl = new ResourceViewImpl(); @@ -7383,7 +1969,7 @@ Result D3D12Device::createBufferView( : (UINT)desc.bufferRange.elementCount; viewSize = (uint64_t)desc.bufferElementSize * uavDesc.Buffer.NumElements; } - else if(desc.format == Format::Unknown) + else if (desc.format == Format::Unknown) { uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; uavDesc.Buffer.NumElements = desc.bufferRange.elementCount == 0 @@ -7487,7 +2073,7 @@ Result D3D12Device::createBufferView( return SLANG_OK; } -Result D3D12Device::createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFb) +Result DeviceImpl::createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFb) { RefPtr<FramebufferImpl> framebuffer = new FramebufferImpl(); framebuffer->renderTargetViews.setCount(desc.renderTargetCount); @@ -7495,9 +2081,10 @@ Result D3D12Device::createFramebuffer(IFramebuffer::Desc const& desc, IFramebuff framebuffer->renderTargetClearValues.setCount(desc.renderTargetCount); for (uint32_t i = 0; i < desc.renderTargetCount; i++) { - framebuffer->renderTargetViews[i] = static_cast<ResourceViewImpl*>(desc.renderTargetViews[i]); - framebuffer->renderTargetDescriptors[i] = - framebuffer->renderTargetViews[i]->m_descriptor.cpuHandle; + framebuffer->renderTargetViews[i] = + static_cast<ResourceViewImpl*>(desc.renderTargetViews[i]); + framebuffer->renderTargetDescriptors[i] = + framebuffer->renderTargetViews[i]->m_descriptor.cpuHandle; if (static_cast<ResourceViewImpl*>(desc.renderTargetViews[i])->m_resource.Ptr()) { auto clearValue = @@ -7531,7 +2118,7 @@ Result D3D12Device::createFramebuffer(IFramebuffer::Desc const& desc, IFramebuff return SLANG_OK; } -Result D3D12Device::createFramebufferLayout( +Result DeviceImpl::createFramebufferLayout( IFramebufferLayout::Desc const& desc, IFramebufferLayout** outLayout) { RefPtr<FramebufferLayoutImpl> layout = new FramebufferLayoutImpl(); @@ -7540,7 +2127,7 @@ Result D3D12Device::createFramebufferLayout( { layout->m_renderTargets[i] = desc.renderTargets[i]; } - + if (desc.depthStencil) { layout->m_hasDepthStencil = true; @@ -7554,9 +2141,8 @@ Result D3D12Device::createFramebufferLayout( return SLANG_OK; } -Result D3D12Device::createRenderPassLayout( - const IRenderPassLayout::Desc& desc, - IRenderPassLayout** outRenderPassLayout) +Result DeviceImpl::createRenderPassLayout( + const IRenderPassLayout::Desc& desc, IRenderPassLayout** outRenderPassLayout) { RefPtr<RenderPassLayoutImpl> result = new RenderPassLayoutImpl(); result->init(desc); @@ -7564,7 +2150,7 @@ Result D3D12Device::createRenderPassLayout( return SLANG_OK; } -Result D3D12Device::createInputLayout(IInputLayout::Desc const& desc, IInputLayout** outLayout) +Result DeviceImpl::createInputLayout(IInputLayout::Desc const& desc, IInputLayout** outLayout) { RefPtr<InputLayoutImpl> layout(new InputLayoutImpl); @@ -7585,7 +2171,6 @@ Result D3D12Device::createInputLayout(IInputLayout::Desc const& desc, IInputLayo List<D3D12_INPUT_ELEMENT_DESC>& elements = layout->m_elements; elements.setCount(inputElementCount); - for (Int i = 0; i < inputElementCount; ++i) { const InputElementDesc& srcEle = inputElements[i]; @@ -7622,11 +2207,10 @@ Result D3D12Device::createInputLayout(IInputLayout::Desc const& desc, IInputLayo return SLANG_OK; } -Result D3D12Device::readBufferResource( - IBufferResource* bufferIn, - size_t offset, - size_t size, - ISlangBlob** outBlob) +const gfx::DeviceInfo& DeviceImpl::getDeviceInfo() const { return m_info; } + +Result DeviceImpl::readBufferResource( + IBufferResource* bufferIn, size_t offset, size_t size, ISlangBlob** outBlob) { BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(bufferIn); @@ -7651,9 +2235,15 @@ Result D3D12Device::readBufferResource( // Resource to readback to D3D12_RESOURCE_DESC stagingDesc; - _initBufferResourceDesc(size, stagingDesc); + initBufferResourceDesc(size, stagingDesc); - SLANG_RETURN_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); + SLANG_RETURN_ON_FAIL(stageBuf.initCommitted( + m_device, + heapProps, + D3D12_HEAP_FLAG_NONE, + stagingDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr)); // Do the copy encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, offset, size); @@ -7669,9 +2259,10 @@ Result D3D12Device::readBufferResource( RefPtr<ListBlob> blob = new ListBlob(); { UINT8* data; - D3D12_RANGE readRange = { 0, size }; + D3D12_RANGE readRange = {0, size}; - SLANG_RETURN_ON_FAIL(stageBufRef.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&data))); + SLANG_RETURN_ON_FAIL( + stageBufRef.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&data))); // Copy to memory buffer blob->m_data.setCount(size); @@ -7683,7 +2274,8 @@ Result D3D12Device::readBufferResource( return SLANG_OK; } -Result D3D12Device::createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram, ISlangBlob** outDiagnosticBlob) +Result DeviceImpl::createProgram( + const IShaderProgram::Desc& desc, IShaderProgram** outProgram, ISlangBlob** outDiagnosticBlob) { RefPtr<ShaderProgramImpl> shaderProgram = new ShaderProgramImpl(); shaderProgram->init(desc); @@ -7708,33 +2300,27 @@ Result D3D12Device::createProgram(const IShaderProgram::Desc& desc, IShaderProgr return SLANG_OK; } -Result D3D12Device::createShaderObjectLayout( - slang::TypeLayoutReflection* typeLayout, - ShaderObjectLayoutBase** outLayout) +Result DeviceImpl::createShaderObjectLayout( + slang::TypeLayoutReflection* typeLayout, ShaderObjectLayoutBase** outLayout) { RefPtr<ShaderObjectLayoutImpl> layout; SLANG_RETURN_ON_FAIL( - ShaderObjectLayoutImpl::createForElementType( - this, typeLayout, layout.writeRef())); + ShaderObjectLayoutImpl::createForElementType(this, typeLayout, layout.writeRef())); returnRefPtrMove(outLayout, layout); return SLANG_OK; } -Result D3D12Device::createShaderObject( - ShaderObjectLayoutBase* layout, - IShaderObject** outObject) +Result DeviceImpl::createShaderObject(ShaderObjectLayoutBase* layout, IShaderObject** outObject) { RefPtr<ShaderObjectImpl> shaderObject; SLANG_RETURN_ON_FAIL(ShaderObjectImpl::create( - this, reinterpret_cast<ShaderObjectLayoutImpl*>(layout), - shaderObject.writeRef())); + this, reinterpret_cast<ShaderObjectLayoutImpl*>(layout), shaderObject.writeRef())); returnComPtr(outObject, shaderObject); return SLANG_OK; } -Result D3D12Device::createMutableShaderObject( - ShaderObjectLayoutBase* layout, - IShaderObject** outObject) +Result DeviceImpl::createMutableShaderObject( + ShaderObjectLayoutBase* layout, IShaderObject** outObject) { auto result = createShaderObject(layout, outObject); SLANG_RETURN_ON_FAIL(result); @@ -7742,17 +2328,18 @@ Result D3D12Device::createMutableShaderObject( return result; } -Result D3D12Device::createMutableRootShaderObject(IShaderProgram* program, IShaderObject** outObject) +Result DeviceImpl::createMutableRootShaderObject(IShaderProgram* program, IShaderObject** outObject) { RefPtr<MutableRootShaderObjectImpl> result = new MutableRootShaderObjectImpl(); result->init(this); auto programImpl = static_cast<ShaderProgramImpl*>(program); - result->resetImpl(this, programImpl->m_rootObjectLayout, m_cpuViewHeap.Ptr(), m_cpuSamplerHeap.Ptr(), true); + result->resetImpl( + this, programImpl->m_rootObjectLayout, m_cpuViewHeap.Ptr(), m_cpuSamplerHeap.Ptr(), true); returnComPtr(outObject, result); return SLANG_OK; } -Result D3D12Device::createShaderTable(const IShaderTable::Desc& desc, IShaderTable** outShaderTable) +Result DeviceImpl::createShaderTable(const IShaderTable::Desc& desc, IShaderTable** outShaderTable) { RefPtr<ShaderTableImpl> result = new ShaderTableImpl(); result->m_device = this; @@ -7761,7 +2348,8 @@ Result D3D12Device::createShaderTable(const IShaderTable::Desc& desc, IShaderTab return SLANG_OK; } -Result D3D12Device::createGraphicsPipelineState(const GraphicsPipelineStateDesc& desc, IPipelineState** outState) +Result DeviceImpl::createGraphicsPipelineState( + const GraphicsPipelineStateDesc& desc, IPipelineState** outState) { RefPtr<PipelineStateImpl> pipelineStateImpl = new PipelineStateImpl(this); pipelineStateImpl->init(desc); @@ -7769,7 +2357,8 @@ Result D3D12Device::createGraphicsPipelineState(const GraphicsPipelineStateDesc& return SLANG_OK; } -Result D3D12Device::createComputePipelineState(const ComputePipelineStateDesc& desc, IPipelineState** outState) +Result DeviceImpl::createComputePipelineState( + const ComputePipelineStateDesc& desc, IPipelineState** outState) { RefPtr<PipelineStateImpl> pipelineStateImpl = new PipelineStateImpl(this); pipelineStateImpl->init(desc); @@ -7777,7 +2366,733 @@ Result D3D12Device::createComputePipelineState(const ComputePipelineStateDesc& d return SLANG_OK; } -Result D3D12Device::QueryPoolImpl::init(const IQueryPool::Desc& desc, D3D12Device* device) +DeviceImpl::ResourceCommandRecordInfo DeviceImpl::encodeResourceCommands() +{ + ResourceCommandRecordInfo info; + m_resourceCommandTransientHeap->createCommandBuffer(info.commandBuffer.writeRef()); + info.d3dCommandList = static_cast<CommandBufferImpl*>(info.commandBuffer.get())->m_cmdList; + return info; +} + +void DeviceImpl::submitResourceCommandsAndWait(const DeviceImpl::ResourceCommandRecordInfo& info) +{ + info.commandBuffer->close(); + m_resourceCommandQueue->executeCommandBuffer(info.commandBuffer); + m_resourceCommandTransientHeap->synchronizeAndReset(); +} + +Result DeviceImpl::createQueryPool(const IQueryPool::Desc& desc, IQueryPool** outState) +{ + switch (desc.type) + { + case QueryType::AccelerationStructureCompactedSize: + case QueryType::AccelerationStructureSerializedSize: + case QueryType::AccelerationStructureCurrentSize: + { + RefPtr<PlainBufferProxyQueryPoolImpl> queryPoolImpl = + new PlainBufferProxyQueryPoolImpl(); + uint32_t stride = 8; + if (desc.type == QueryType::AccelerationStructureSerializedSize) + stride = 16; + SLANG_RETURN_ON_FAIL(queryPoolImpl->init(desc, this, stride)); + returnComPtr(outState, queryPoolImpl); + return SLANG_OK; + } + default: + { + RefPtr<QueryPoolImpl> queryPoolImpl = new QueryPoolImpl(); + SLANG_RETURN_ON_FAIL(queryPoolImpl->init(desc, this)); + returnComPtr(outState, queryPoolImpl); + return SLANG_OK; + } + } +} + +Result DeviceImpl::createFence(const IFence::Desc& desc, IFence** outFence) +{ + RefPtr<FenceImpl> fence = new FenceImpl(); + SLANG_RETURN_ON_FAIL(fence->init(this, desc)); + returnComPtr(outFence, fence); + return SLANG_OK; +} + +Result DeviceImpl::waitForFences( + uint32_t fenceCount, IFence** fences, uint64_t* fenceValues, bool waitForAll, uint64_t timeout) +{ + ShortList<HANDLE> waitHandles; + for (uint32_t i = 0; i < fenceCount; ++i) + { + auto fenceImpl = static_cast<FenceImpl*>(fences[i]); + waitHandles.add(fenceImpl->getWaitEvent()); + SLANG_RETURN_ON_FAIL( + fenceImpl->m_fence->SetEventOnCompletion(fenceValues[i], fenceImpl->getWaitEvent())); + } + auto result = WaitForMultipleObjects( + fenceCount, + waitHandles.getArrayView().getBuffer(), + waitForAll ? TRUE : FALSE, + timeout == kTimeoutInfinite ? INFINITE : (DWORD)(timeout / 1000000)); + if (result == WAIT_TIMEOUT) + return SLANG_E_TIME_OUT; + return result == WAIT_FAILED ? SLANG_FAIL : SLANG_OK; +} + +Result DeviceImpl::getAccelerationStructurePrebuildInfo( + const IAccelerationStructure::BuildInputs& buildInputs, + IAccelerationStructure::PrebuildInfo* outPrebuildInfo) +{ + if (!m_device5) + return SLANG_E_NOT_AVAILABLE; + + D3DAccelerationStructureInputsBuilder inputsBuilder; + SLANG_RETURN_ON_FAIL(inputsBuilder.build(buildInputs, getDebugCallback())); + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo; + m_device5->GetRaytracingAccelerationStructurePrebuildInfo(&inputsBuilder.desc, &prebuildInfo); + + outPrebuildInfo->resultDataMaxSize = prebuildInfo.ResultDataMaxSizeInBytes; + outPrebuildInfo->scratchDataSize = prebuildInfo.ScratchDataSizeInBytes; + outPrebuildInfo->updateScratchDataSize = prebuildInfo.UpdateScratchDataSizeInBytes; + return SLANG_OK; +} + +Result DeviceImpl::createAccelerationStructure( + const IAccelerationStructure::CreateDesc& desc, IAccelerationStructure** outAS) +{ +#if SLANG_GFX_HAS_DXR_SUPPORT + RefPtr<AccelerationStructureImpl> result = new AccelerationStructureImpl(); + result->m_device5 = m_device5; + result->m_buffer = static_cast<BufferResourceImpl*>(desc.buffer); + result->m_size = desc.size; + result->m_offset = desc.offset; + result->m_allocator = m_cpuViewHeap; + result->m_desc.type = IResourceView::Type::AccelerationStructure; + SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&result->m_descriptor)); + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.RaytracingAccelerationStructure.Location = + result->m_buffer->getDeviceAddress() + desc.offset; + m_device->CreateShaderResourceView(nullptr, &srvDesc, result->m_descriptor.cpuHandle); + returnComPtr(outAS, result); + return SLANG_OK; +#else + *outAS = nullptr; + return SLANG_FAIL; +#endif +} + +Result DeviceImpl::createRayTracingPipelineState( + const RayTracingPipelineStateDesc& inDesc, IPipelineState** outState) +{ + if (!m_device5) + { + return SLANG_E_NOT_AVAILABLE; + } + + RefPtr<RayTracingPipelineStateImpl> pipelineStateImpl = new RayTracingPipelineStateImpl(this); + pipelineStateImpl->init(inDesc); + returnComPtr(outState, pipelineStateImpl); + return SLANG_OK; +} + +Result DeviceImpl::createTransientResourceHeapImpl( + ITransientResourceHeap::Flags::Enum flags, + size_t constantBufferSize, + uint32_t viewDescriptors, + uint32_t samplerDescriptors, + TransientResourceHeapImpl** outHeap) +{ + RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl(); + ITransientResourceHeap::Desc desc = {}; + desc.flags = flags; + desc.samplerDescriptorCount = samplerDescriptors; + desc.constantBufferSize = constantBufferSize; + desc.constantBufferDescriptorCount = viewDescriptors; + desc.accelerationStructureDescriptorCount = viewDescriptors; + desc.srvDescriptorCount = viewDescriptors; + desc.uavDescriptorCount = viewDescriptors; + SLANG_RETURN_ON_FAIL(result->init(desc, this, viewDescriptors, samplerDescriptors)); + returnRefPtrMove(outHeap, result); + return SLANG_OK; +} + +Result DeviceImpl::createCommandQueueImpl(CommandQueueImpl** outQueue) +{ + int queueIndex = m_queueIndexAllocator.alloc(1); + // If we run out of queue index space, then the user is requesting too many queues. + if (queueIndex == -1) + return SLANG_FAIL; + + RefPtr<CommandQueueImpl> queue = new CommandQueueImpl(); + SLANG_RETURN_ON_FAIL(queue->init(this, (uint32_t)queueIndex)); + returnRefPtrMove(outQueue, queue); + return SLANG_OK; +} + +PROC DeviceImpl::loadProc(HMODULE module, char const* name) +{ + PROC proc = ::GetProcAddress(module, name); + if (!proc) + { + fprintf(stderr, "error: failed load symbol '%s'\n", name); + return nullptr; + } + return proc; +} + +DeviceImpl::~DeviceImpl() { m_shaderObjectLayoutCache = decltype(m_shaderObjectLayoutCache)(); } + +struct GraphicsSubmitter : public Submitter +{ + virtual void setRootConstantBufferView( + int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetGraphicsRootConstantBufferView(index, gpuBufferLocation); + } + virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetGraphicsRootUnorderedAccessView(index, gpuBufferLocation); + } + virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetGraphicsRootShaderResourceView(index, gpuBufferLocation); + } + virtual void setRootDescriptorTable( + int index, D3D12_GPU_DESCRIPTOR_HANDLE baseDescriptor) override + { + m_commandList->SetGraphicsRootDescriptorTable(index, baseDescriptor); + } + void setRootSignature(ID3D12RootSignature* rootSignature) + { + m_commandList->SetGraphicsRootSignature(rootSignature); + } + void setRootConstants( + Index rootParamIndex, + Index dstOffsetIn32BitValues, + Index countOf32BitValues, + void const* srcData) override + { + m_commandList->SetGraphicsRoot32BitConstants( + UINT(rootParamIndex), UINT(countOf32BitValues), srcData, UINT(dstOffsetIn32BitValues)); + } + virtual void setPipelineState(PipelineStateBase* pipeline) override + { + auto pipelineImpl = static_cast<PipelineStateImpl*>(pipeline); + m_commandList->SetPipelineState(pipelineImpl->m_pipelineState.get()); + } + + GraphicsSubmitter(ID3D12GraphicsCommandList* commandList) + : m_commandList(commandList) + {} + + ID3D12GraphicsCommandList* m_commandList; +}; + +struct ComputeSubmitter : public Submitter +{ + virtual void setRootConstantBufferView( + int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetComputeRootConstantBufferView(index, gpuBufferLocation); + } + virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetComputeRootUnorderedAccessView(index, gpuBufferLocation); + } + virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetComputeRootShaderResourceView(index, gpuBufferLocation); + } + virtual void setRootDescriptorTable( + int index, D3D12_GPU_DESCRIPTOR_HANDLE baseDescriptor) override + { + m_commandList->SetComputeRootDescriptorTable(index, baseDescriptor); + } + void setRootSignature(ID3D12RootSignature* rootSignature) + { + m_commandList->SetComputeRootSignature(rootSignature); + } + void setRootConstants( + Index rootParamIndex, + Index dstOffsetIn32BitValues, + Index countOf32BitValues, + void const* srcData) override + { + m_commandList->SetComputeRoot32BitConstants( + UINT(rootParamIndex), UINT(countOf32BitValues), srcData, UINT(dstOffsetIn32BitValues)); + } + virtual void setPipelineState(PipelineStateBase* pipeline) override + { + auto pipelineImpl = static_cast<PipelineStateImpl*>(pipeline); + m_commandList->SetPipelineState(pipelineImpl->m_pipelineState.get()); + } + ComputeSubmitter(ID3D12GraphicsCommandList* commandList) + : m_commandList(commandList) + {} + + ID3D12GraphicsCommandList* m_commandList; +}; + +BufferResourceImpl::BufferResourceImpl(const Desc& desc) + : Parent(desc) + , m_defaultState(D3DUtil::getResourceState(desc.defaultState)) +{} + +BufferResourceImpl::~BufferResourceImpl() +{ + if (sharedHandle.handleValue != 0) + { + CloseHandle((HANDLE)sharedHandle.handleValue); + } +} + +DeviceAddress BufferResourceImpl::getDeviceAddress() +{ + return (DeviceAddress)m_resource.getResource()->GetGPUVirtualAddress(); +} + +Result BufferResourceImpl::getNativeResourceHandle(InteropHandle* outHandle) +{ + outHandle->handleValue = (uint64_t)m_resource.getResource(); + outHandle->api = InteropHandleAPI::D3D12; + return SLANG_OK; +} + +Result BufferResourceImpl::getSharedHandle(InteropHandle* outHandle) +{ + // Check if a shared handle already exists for this resource. + if (sharedHandle.handleValue != 0) + { + *outHandle = sharedHandle; + return SLANG_OK; + } + + // If a shared handle doesn't exist, create one and store it. + ComPtr<ID3D12Device> pDevice; + auto pResource = m_resource.getResource(); + pResource->GetDevice(IID_PPV_ARGS(pDevice.writeRef())); + SLANG_RETURN_ON_FAIL(pDevice->CreateSharedHandle( + pResource, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); + outHandle->api = InteropHandleAPI::D3D12; + sharedHandle = *outHandle; + return SLANG_OK; +} + +Result BufferResourceImpl::map(MemoryRange* rangeToRead, void** outPointer) +{ + D3D12_RANGE range = {}; + if (rangeToRead) + { + range.Begin = (SIZE_T)rangeToRead->offset; + range.End = (SIZE_T)(rangeToRead->offset + rangeToRead->size); + } + SLANG_RETURN_ON_FAIL( + m_resource.getResource()->Map(0, rangeToRead ? &range : nullptr, outPointer)); + return SLANG_OK; +} + +Result BufferResourceImpl::unmap(MemoryRange* writtenRange) +{ + D3D12_RANGE range = {}; + if (writtenRange) + { + range.Begin = (SIZE_T)writtenRange->offset; + range.End = (SIZE_T)(writtenRange->offset + writtenRange->size); + } + m_resource.getResource()->Unmap(0, writtenRange ? &range : nullptr); + return SLANG_OK; +} + +Result BufferResourceImpl::setDebugName(const char* name) +{ + Parent::setDebugName(name); + m_resource.setDebugName(name); + return SLANG_OK; +} + +TextureResourceImpl::TextureResourceImpl(const Desc& desc) + : Parent(desc) + , m_defaultState(D3DUtil::getResourceState(desc.defaultState)) +{} + +TextureResourceImpl::~TextureResourceImpl() +{ + if (sharedHandle.handleValue != 0) + { + CloseHandle((HANDLE)sharedHandle.handleValue); + } +} + +Result TextureResourceImpl::getNativeResourceHandle(InteropHandle* outHandle) +{ + outHandle->handleValue = (uint64_t)m_resource.getResource(); + outHandle->api = InteropHandleAPI::D3D12; + return SLANG_OK; +} + +Result TextureResourceImpl::getSharedHandle(InteropHandle* outHandle) +{ + // Check if a shared handle already exists for this resource. + if (sharedHandle.handleValue != 0) + { + *outHandle = sharedHandle; + return SLANG_OK; + } + + // If a shared handle doesn't exist, create one and store it. + ComPtr<ID3D12Device> pDevice; + auto pResource = m_resource.getResource(); + pResource->GetDevice(IID_PPV_ARGS(pDevice.writeRef())); + SLANG_RETURN_ON_FAIL(pDevice->CreateSharedHandle( + pResource, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); + outHandle->api = InteropHandleAPI::D3D12; + return SLANG_OK; +} + +Result TextureResourceImpl::setDebugName(const char* name) +{ + Parent::setDebugName(name); + m_resource.setDebugName(name); + return SLANG_OK; +} + +SamplerStateImpl::~SamplerStateImpl() { m_allocator->free(m_descriptor); } + +Result SamplerStateImpl::getNativeHandle(InteropHandle* outHandle) +{ + outHandle->api = InteropHandleAPI::D3D12CpuDescriptorHandle; + outHandle->handleValue = m_descriptor.cpuHandle.ptr; + return SLANG_OK; +} + +#if SLANG_GFX_HAS_DXR_SUPPORT + +DeviceAddress AccelerationStructureImpl::getDeviceAddress() +{ + return m_buffer->getDeviceAddress() + m_offset; +} + +Result AccelerationStructureImpl::getNativeHandle(InteropHandle* outHandle) +{ + outHandle->api = InteropHandleAPI::DeviceAddress; + outHandle->handleValue = getDeviceAddress(); + return SLANG_OK; +} + +#endif // SLANG_GFX_HAS_DXR_SUPPORT + +Result TransientResourceHeapImpl::synchronizeAndReset() +{ + Array<HANDLE, 16> waitHandles; + for (auto& waitInfo : m_waitInfos) + { + if (waitInfo.waitValue == 0) + continue; + if (waitInfo.fence) + { + waitInfo.fence->SetEventOnCompletion(waitInfo.waitValue, waitInfo.fenceEvent); + waitHandles.add(waitInfo.fenceEvent); + } + } + WaitForMultipleObjects((DWORD)waitHandles.getCount(), waitHandles.getBuffer(), TRUE, INFINITE); + m_currentViewHeapIndex = -1; + m_currentSamplerHeapIndex = -1; + allocateNewViewDescriptorHeap(m_device); + allocateNewSamplerDescriptorHeap(m_device); + m_stagingCpuSamplerHeap.freeAll(); + m_stagingCpuViewHeap.freeAll(); + m_commandListAllocId = 0; + SLANG_RETURN_ON_FAIL(m_commandAllocator->Reset()); + Super::reset(); + return SLANG_OK; +} + +TransientResourceHeapImpl::QueueWaitInfo& TransientResourceHeapImpl::getQueueWaitInfo( + uint32_t queueIndex) +{ + if (queueIndex < (uint32_t)m_waitInfos.getCount()) + { + return m_waitInfos[queueIndex]; + } + auto oldCount = m_waitInfos.getCount(); + m_waitInfos.setCount(queueIndex + 1); + for (auto i = oldCount; i < m_waitInfos.getCount(); i++) + { + m_waitInfos[i].waitValue = 0; + m_waitInfos[i].fenceEvent = CreateEventEx(nullptr, false, 0, EVENT_ALL_ACCESS); + } + return m_waitInfos[queueIndex]; +} + +D3D12DescriptorHeap& TransientResourceHeapImpl::getCurrentViewHeap() +{ + return m_viewHeaps[m_currentViewHeapIndex]; +} + +D3D12DescriptorHeap& TransientResourceHeapImpl::getCurrentSamplerHeap() +{ + return m_samplerHeaps[m_currentSamplerHeapIndex]; +} + +Result TransientResourceHeapImpl::queryInterface(SlangUUID const& uuid, void** outObject) +{ + if (uuid == GfxGUID::IID_ID3D12TransientResourceHeap) + { + *outObject = static_cast<ID3D12TransientResourceHeap*>(this); + addRef(); + return SLANG_OK; + } + return Super::queryInterface(uuid, outObject); +} + +Result TransientResourceHeapImpl::allocateTransientDescriptorTable( + DescriptorType type, + uint32_t count, + uint64_t& outDescriptorOffset, + void** outD3DDescriptorHeapHandle) +{ + auto& heap = + (type == DescriptorType::ResourceView) ? getCurrentViewHeap() : getCurrentSamplerHeap(); + int allocResult = heap.allocate((int)count); + if (allocResult == -1) + { + return SLANG_E_OUT_OF_MEMORY; + } + outDescriptorOffset = (uint64_t)allocResult; + *outD3DDescriptorHeapHandle = heap.getHeap(); + return SLANG_OK; +} + +TransientResourceHeapImpl::~TransientResourceHeapImpl() +{ + synchronizeAndReset(); + for (auto& waitInfo : m_waitInfos) + CloseHandle(waitInfo.fenceEvent); +} + +Result TransientResourceHeapImpl::init( + const ITransientResourceHeap::Desc& desc, + DeviceImpl* device, + uint32_t viewHeapSize, + uint32_t samplerHeapSize) +{ + Super::init(desc, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, device); + m_canResize = (desc.flags & ITransientResourceHeap::Flags::AllowResizing) != 0; + m_viewHeapSize = viewHeapSize; + m_samplerHeapSize = samplerHeapSize; + + m_stagingCpuViewHeap.init( + device->m_device, + 1000000, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + D3D12_DESCRIPTOR_HEAP_FLAG_NONE); + m_stagingCpuSamplerHeap.init( + device->m_device, + 1000000, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + D3D12_DESCRIPTOR_HEAP_FLAG_NONE); + + auto d3dDevice = device->m_device; + SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef()))); + + allocateNewViewDescriptorHeap(device); + allocateNewSamplerDescriptorHeap(device); + + return SLANG_OK; +} + +Result TransientResourceHeapImpl::allocateNewViewDescriptorHeap(DeviceImpl* device) +{ + auto nextHeapIndex = m_currentViewHeapIndex + 1; + if (nextHeapIndex < m_viewHeaps.getCount()) + { + m_viewHeaps[nextHeapIndex].deallocateAll(); + m_currentViewHeapIndex = nextHeapIndex; + return SLANG_OK; + } + auto d3dDevice = device->m_device; + D3D12DescriptorHeap viewHeap; + SLANG_RETURN_ON_FAIL(viewHeap.init( + d3dDevice, + m_viewHeapSize, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); + m_currentViewHeapIndex = (int32_t)m_viewHeaps.getCount(); + m_viewHeaps.add(_Move(viewHeap)); + return SLANG_OK; +} + +Result TransientResourceHeapImpl::allocateNewSamplerDescriptorHeap(DeviceImpl* device) +{ + auto nextHeapIndex = m_currentSamplerHeapIndex + 1; + if (nextHeapIndex < m_samplerHeaps.getCount()) + { + m_samplerHeaps[nextHeapIndex].deallocateAll(); + m_currentSamplerHeapIndex = nextHeapIndex; + return SLANG_OK; + } + auto d3dDevice = device->m_device; + D3D12DescriptorHeap samplerHeap; + SLANG_RETURN_ON_FAIL(samplerHeap.init( + d3dDevice, + m_samplerHeapSize, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); + m_currentSamplerHeapIndex = (int32_t)m_samplerHeaps.getCount(); + m_samplerHeaps.add(_Move(samplerHeap)); + return SLANG_OK; +} + +Result TransientResourceHeapImpl::createCommandBuffer(ICommandBuffer** outCmdBuffer) +{ + if ((Index)m_commandListAllocId < m_commandBufferPool.getCount()) + { + auto result = + static_cast<CommandBufferImpl*>(m_commandBufferPool[m_commandListAllocId].Ptr()); + m_d3dCommandListPool[m_commandListAllocId]->Reset(m_commandAllocator, nullptr); + result->reinit(); + ++m_commandListAllocId; + returnComPtr(outCmdBuffer, result); + return SLANG_OK; + } + ComPtr<ID3D12GraphicsCommandList> cmdList; + m_device->m_device->CreateCommandList( + 0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + m_commandAllocator, + nullptr, + IID_PPV_ARGS(cmdList.writeRef())); + + m_d3dCommandListPool.add(cmdList); + RefPtr<CommandBufferImpl> cmdBuffer = new CommandBufferImpl(); + cmdBuffer->init(m_device, cmdList, this); + m_commandBufferPool.add(cmdBuffer); + ++m_commandListAllocId; + returnComPtr(outCmdBuffer, cmdBuffer); + return SLANG_OK; +} + +int PipelineCommandEncoder::getBindPointIndex(PipelineType type) +{ + switch (type) + { + case PipelineType::Graphics: + return 0; + case PipelineType::Compute: + return 1; + case PipelineType::RayTracing: + return 2; + default: + assert(!"unknown pipeline type."); + return -1; + } +} + +void PipelineCommandEncoder::init(CommandBufferImpl* commandBuffer) +{ + m_commandBuffer = commandBuffer; + m_d3dCmdList = m_commandBuffer->m_cmdList; + m_renderer = commandBuffer->m_renderer; + m_transientHeap = commandBuffer->m_transientHeap; + m_device = commandBuffer->m_renderer->m_device; +} + +Result PipelineCommandEncoder::bindPipelineImpl( + IPipelineState* pipelineState, IShaderObject** outRootObject) +{ + m_currentPipeline = static_cast<PipelineStateBase*>(pipelineState); + auto rootObject = &m_commandBuffer->m_rootShaderObject; + m_commandBuffer->m_mutableRootShaderObject = nullptr; + SLANG_RETURN_ON_FAIL(rootObject->reset( + m_renderer, + m_currentPipeline->getProgram<ShaderProgramImpl>()->m_rootObjectLayout, + m_commandBuffer->m_transientHeap)); + *outRootObject = rootObject; + m_bindingDirty = true; + return SLANG_OK; +} + +Result PipelineCommandEncoder::bindPipelineWithRootObjectImpl( + IPipelineState* pipelineState, IShaderObject* rootObject) +{ + m_currentPipeline = static_cast<PipelineStateBase*>(pipelineState); + m_commandBuffer->m_mutableRootShaderObject = + static_cast<MutableRootShaderObjectImpl*>(rootObject); + m_bindingDirty = true; + return SLANG_OK; +} + +Result PipelineCommandEncoder::_bindRenderState( + Submitter* submitter, RefPtr<PipelineStateBase>& newPipeline) +{ + RootShaderObjectImpl* rootObjectImpl = m_commandBuffer->m_mutableRootShaderObject + ? m_commandBuffer->m_mutableRootShaderObject.Ptr() + : &m_commandBuffer->m_rootShaderObject; + SLANG_RETURN_ON_FAIL( + m_renderer->maybeSpecializePipeline(m_currentPipeline, rootObjectImpl, newPipeline)); + PipelineStateBase* newPipelineImpl = static_cast<PipelineStateBase*>(newPipeline.Ptr()); + auto commandList = m_d3dCmdList; + auto pipelineTypeIndex = (int)newPipelineImpl->desc.type; + auto programImpl = static_cast<ShaderProgramImpl*>(newPipelineImpl->m_program.Ptr()); + newPipelineImpl->ensureAPIPipelineStateCreated(); + submitter->setRootSignature(programImpl->m_rootObjectLayout->m_rootSignature); + submitter->setPipelineState(newPipelineImpl); + RootShaderObjectLayoutImpl* rootLayoutImpl = programImpl->m_rootObjectLayout; + + // We need to set up a context for binding shader objects to the pipeline state. + // This type mostly exists to bundle together a bunch of parameters that would + // otherwise need to be tunneled down through all the shader object binding + // logic. + // + BindingContext context = {}; + context.encoder = this; + context.submitter = submitter; + context.device = m_renderer; + context.transientHeap = m_transientHeap; + context.outOfMemoryHeap = (D3D12_DESCRIPTOR_HEAP_TYPE)(-1); + // We kick off binding of shader objects at the root object, and the objects + // themselves will be responsible for allocating, binding, and filling in + // any descriptor tables or other root parameters needed. + // + m_commandBuffer->bindDescriptorHeaps(); + if (rootObjectImpl->bindAsRoot(&context, rootLayoutImpl) == SLANG_E_OUT_OF_MEMORY) + { + if (!m_transientHeap->canResize()) + { + return SLANG_E_OUT_OF_MEMORY; + } + + // If we run out of heap space while binding, allocate new descriptor heaps and try again. + ID3D12DescriptorHeap* d3dheap = nullptr; + m_commandBuffer->invalidateDescriptorHeapBinding(); + switch (context.outOfMemoryHeap) + { + case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV: + SLANG_RETURN_ON_FAIL(m_transientHeap->allocateNewViewDescriptorHeap(m_renderer)); + d3dheap = m_transientHeap->getCurrentViewHeap().getHeap(); + m_commandBuffer->bindDescriptorHeaps(); + break; + case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER: + SLANG_RETURN_ON_FAIL(m_transientHeap->allocateNewSamplerDescriptorHeap(m_renderer)); + d3dheap = m_transientHeap->getCurrentSamplerHeap().getHeap(); + m_commandBuffer->bindDescriptorHeaps(); + break; + default: + assert(!"shouldn't be here"); + return SLANG_FAIL; + } + + // Try again. + SLANG_RETURN_ON_FAIL(rootObjectImpl->bindAsRoot(&context, rootLayoutImpl)); + } + + return SLANG_OK; +} + +Result QueryPoolImpl::init(const IQueryPool::Desc& desc, DeviceImpl* device) { m_desc = desc; @@ -7797,8 +3112,8 @@ Result D3D12Device::QueryPoolImpl::init(const IQueryPool::Desc& desc, D3D12Devic // Create query heap. auto d3dDevice = device->m_device; - SLANG_RETURN_ON_FAIL(d3dDevice->CreateQueryHeap( - &heapDesc, IID_PPV_ARGS(m_queryHeap.writeRef()))); + SLANG_RETURN_ON_FAIL( + d3dDevice->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(m_queryHeap.writeRef()))); // Create readback buffer. D3D12_HEAP_PROPERTIES heapProps; @@ -7808,7 +3123,7 @@ Result D3D12Device::QueryPoolImpl::init(const IQueryPool::Desc& desc, D3D12Devic heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; D3D12_RESOURCE_DESC resourceDesc = {}; - _initBufferResourceDesc(sizeof(uint64_t) * desc.count, resourceDesc); + initBufferResourceDesc(sizeof(uint64_t) * desc.count, resourceDesc); SLANG_RETURN_ON_FAIL(m_readBackBuffer.initCommitted( d3dDevice, heapProps, @@ -7831,26 +3146,60 @@ Result D3D12Device::QueryPoolImpl::init(const IQueryPool::Desc& desc, D3D12Devic m_commandList->Close(); // Create fence. - SLANG_RETURN_ON_FAIL(d3dDevice->CreateFence( - 0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); + SLANG_RETURN_ON_FAIL( + d3dDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); // Get command queue from device. m_commandQueue = device->m_resourceCommandQueue->m_d3dQueue; // Create wait event. - m_waitEvent = CreateEventEx( - nullptr, - false, - 0, - EVENT_ALL_ACCESS); + m_waitEvent = CreateEventEx(nullptr, false, 0, EVENT_ALL_ACCESS); + + return SLANG_OK; +} +Result QueryPoolImpl::getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) +{ + m_commandList->Reset(m_commandAllocator, nullptr); + m_commandList->ResolveQueryData( + m_queryHeap, + m_queryType, + (UINT)queryIndex, + (UINT)count, + m_readBackBuffer, + sizeof(uint64_t) * queryIndex); + m_commandList->Close(); + ID3D12CommandList* cmdList = m_commandList; + m_commandQueue->ExecuteCommandLists(1, &cmdList); + m_eventValue++; + m_fence->SetEventOnCompletion(m_eventValue, m_waitEvent); + m_commandQueue->Signal(m_fence, m_eventValue); + WaitForSingleObject(m_waitEvent, INFINITE); + m_commandAllocator->Reset(); + + int8_t* mappedData = nullptr; + D3D12_RANGE readRange = { + sizeof(uint64_t) * queryIndex, sizeof(uint64_t) * (queryIndex + count)}; + m_readBackBuffer.getResource()->Map(0, &readRange, (void**)&mappedData); + memcpy(data, mappedData + sizeof(uint64_t) * queryIndex, sizeof(uint64_t) * count); + m_readBackBuffer.getResource()->Unmap(0, nullptr); return SLANG_OK; } -Result D3D12Device::PlainBufferProxyQueryPoolImpl::init( - const IQueryPool::Desc& desc, - D3D12Device* device, - uint32_t stride) +void QueryPoolImpl::writeTimestamp(ID3D12GraphicsCommandList* cmdList, SlangInt index) +{ + cmdList->EndQuery(m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, (UINT)index); +} + +IQueryPool* PlainBufferProxyQueryPoolImpl::getInterface(const Guid& guid) +{ + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IQueryPool) + return static_cast<IQueryPool*>(this); + return nullptr; +} + +Result PlainBufferProxyQueryPoolImpl::init( + const IQueryPool::Desc& desc, DeviceImpl* device, uint32_t stride) { ComPtr<IBufferResource> bufferResource; IBufferResource::Desc bufferDesc = {}; @@ -7862,7 +3211,7 @@ Result D3D12Device::PlainBufferProxyQueryPoolImpl::init( bufferDesc.allowedStates.add(ResourceState::UnorderedAccess); SLANG_RETURN_ON_FAIL( device->createBufferResource(bufferDesc, nullptr, bufferResource.writeRef())); - m_bufferResource = static_cast<D3D12Device::BufferResourceImpl*>(bufferResource.get()); + m_bufferResource = static_cast<BufferResourceImpl*>(bufferResource.get()); m_queryType = desc.type; m_device = device; m_stride = stride; @@ -7871,126 +3220,66 @@ Result D3D12Device::PlainBufferProxyQueryPoolImpl::init( return SLANG_OK; } -Result D3D12Device::createQueryPool(const IQueryPool::Desc& desc, IQueryPool** outState) +Result PlainBufferProxyQueryPoolImpl::reset() { - switch (desc.type) - { - case QueryType::AccelerationStructureCompactedSize: - case QueryType::AccelerationStructureSerializedSize: - case QueryType::AccelerationStructureCurrentSize: - { - RefPtr<PlainBufferProxyQueryPoolImpl> queryPoolImpl = - new PlainBufferProxyQueryPoolImpl(); - uint32_t stride = 8; - if (desc.type == QueryType::AccelerationStructureSerializedSize) - stride = 16; - SLANG_RETURN_ON_FAIL(queryPoolImpl->init(desc, this, stride)); - returnComPtr(outState, queryPoolImpl); - return SLANG_OK; - } - default: - { - RefPtr<QueryPoolImpl> queryPoolImpl = new QueryPoolImpl(); - SLANG_RETURN_ON_FAIL(queryPoolImpl->init(desc, this)); - returnComPtr(outState, queryPoolImpl); - return SLANG_OK; - } - } -} - -Result D3D12Device::createFence(const IFence::Desc& desc, IFence** outFence) -{ - RefPtr<FenceImpl> fence = new FenceImpl(); - SLANG_RETURN_ON_FAIL(fence->init(this, desc)); - returnComPtr(outFence, fence); + m_resultDirty = true; + auto encodeInfo = m_device->encodeResourceCommands(); + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barrier.Transition.pResource = m_bufferResource->m_resource.getResource(); + encodeInfo.d3dCommandList->ResourceBarrier(1, &barrier); + m_device->submitResourceCommandsAndWait(encodeInfo); return SLANG_OK; } -Result D3D12Device::waitForFences( - uint32_t fenceCount, IFence** fences, uint64_t* fenceValues, bool waitForAll, uint64_t timeout) +Result PlainBufferProxyQueryPoolImpl::getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) { - ShortList<HANDLE> waitHandles; - for (uint32_t i = 0; i < fenceCount; ++i) + if (m_resultDirty) { - auto fenceImpl = static_cast<FenceImpl*>(fences[i]); - waitHandles.add(fenceImpl->getWaitEvent()); - SLANG_RETURN_ON_FAIL(fenceImpl->m_fence->SetEventOnCompletion(fenceValues[i], fenceImpl->getWaitEvent())); - } - auto result = WaitForMultipleObjects( - fenceCount, - waitHandles.getArrayView().getBuffer(), - waitForAll ? TRUE : FALSE, - timeout == kTimeoutInfinite ? INFINITE : (DWORD)(timeout / 1000000)); - if (result == WAIT_TIMEOUT) - return SLANG_E_TIME_OUT; - return result == WAIT_FAILED ? SLANG_FAIL : SLANG_OK; -} + auto encodeInfo = m_device->encodeResourceCommands(); + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.pResource = m_bufferResource->m_resource.getResource(); + encodeInfo.d3dCommandList->ResourceBarrier(1, &barrier); -#if SLANG_GFX_HAS_DXR_SUPPORT + D3D12Resource stageBuf; -class D3D12AccelerationStructureImpl - : public AccelerationStructureBase - , public D3D12Device::ResourceViewInternalImpl -{ -public: - RefPtr<D3D12Device::BufferResourceImpl> m_buffer; - uint64_t m_offset; - uint64_t m_size; - ComPtr<ID3D12Device5> m_device5; + auto size = (size_t)m_count * m_stride; + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; -public: - virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() override - { - return m_buffer->getDeviceAddress() + m_offset; - } + D3D12_RESOURCE_DESC stagingDesc; + initBufferResourceDesc(size, stagingDesc); - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override - { - outHandle->api = InteropHandleAPI::DeviceAddress; - outHandle->handleValue = getDeviceAddress(); - return SLANG_OK; - } -}; + SLANG_RETURN_ON_FAIL(stageBuf.initCommitted( + m_device->m_device, + heapProps, + D3D12_HEAP_FLAG_NONE, + stagingDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr)); -Result D3D12Device::getAccelerationStructurePrebuildInfo( - const IAccelerationStructure::BuildInputs& buildInputs, - IAccelerationStructure::PrebuildInfo* outPrebuildInfo) -{ - if (!m_device5) - return SLANG_E_NOT_AVAILABLE; - - D3DAccelerationStructureInputsBuilder inputsBuilder; - SLANG_RETURN_ON_FAIL(inputsBuilder.build(buildInputs, getDebugCallback())); + encodeInfo.d3dCommandList->CopyBufferRegion( + stageBuf, 0, m_bufferResource->m_resource.getResource(), 0, size); + m_device->submitResourceCommandsAndWait(encodeInfo); + void* ptr = nullptr; + stageBuf.getResource()->Map(0, nullptr, &ptr); + m_result.setCount(m_count * m_stride); + memcpy(m_result.getBuffer(), ptr, m_result.getCount()); - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo; - m_device5->GetRaytracingAccelerationStructurePrebuildInfo(&inputsBuilder.desc, &prebuildInfo); + m_resultDirty = false; + } - outPrebuildInfo->resultDataMaxSize = prebuildInfo.ResultDataMaxSizeInBytes; - outPrebuildInfo->scratchDataSize = prebuildInfo.ScratchDataSizeInBytes; - outPrebuildInfo->updateScratchDataSize = prebuildInfo.UpdateScratchDataSizeInBytes; - return SLANG_OK; -} + memcpy(data, m_result.getBuffer() + queryIndex * m_stride, count * m_stride); -Result D3D12Device::createAccelerationStructure( - const IAccelerationStructure::CreateDesc& desc, - IAccelerationStructure** outAS) -{ - RefPtr<D3D12AccelerationStructureImpl> result = new D3D12AccelerationStructureImpl(); - result->m_device5 = m_device5; - result->m_buffer = static_cast<BufferResourceImpl*>(desc.buffer); - result->m_size = desc.size; - result->m_offset = desc.offset; - result->m_allocator = m_cpuViewHeap; - result->m_desc.type = IResourceView::Type::AccelerationStructure; - SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&result->m_descriptor)); - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc; - srvDesc.Format = DXGI_FORMAT_UNKNOWN; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srvDesc.RaytracingAccelerationStructure.Location = - result->m_buffer->getDeviceAddress()+ desc.offset; - m_device->CreateShaderResourceView(nullptr, &srvDesc, result->m_descriptor.cpuHandle); - returnComPtr(outAS, result); return SLANG_OK; } @@ -8008,7 +3297,7 @@ void translatePostBuildInfoDescs( postBuildInfoDescs[i].InfoType = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE; postBuildInfoDescs[i].DestBuffer = - static_cast<D3D12Device::PlainBufferProxyQueryPoolImpl*>(queryDescs[i].queryPool) + static_cast<PlainBufferProxyQueryPoolImpl*>(queryDescs[i].queryPool) ->m_bufferResource->getDeviceAddress() + sizeof(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE_DESC) * queryDescs[i].firstQueryIndex; @@ -8017,7 +3306,7 @@ void translatePostBuildInfoDescs( postBuildInfoDescs[i].InfoType = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_CURRENT_SIZE; postBuildInfoDescs[i].DestBuffer = - static_cast<D3D12Device::PlainBufferProxyQueryPoolImpl*>(queryDescs[i].queryPool) + static_cast<PlainBufferProxyQueryPoolImpl*>(queryDescs[i].queryPool) ->m_bufferResource->getDeviceAddress() + sizeof(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE_DESC) * queryDescs[i].firstQueryIndex; @@ -8026,7 +3315,7 @@ void translatePostBuildInfoDescs( postBuildInfoDescs[i].InfoType = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION; postBuildInfoDescs[i].DestBuffer = - static_cast<D3D12Device::PlainBufferProxyQueryPoolImpl*>(queryDescs[i].queryPool) + static_cast<PlainBufferProxyQueryPoolImpl*>(queryDescs[i].queryPool) ->m_bufferResource->getDeviceAddress() + sizeof(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION_DESC) * queryDescs[i].firstQueryIndex; @@ -8035,7 +3324,9 @@ void translatePostBuildInfoDescs( } } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::buildAccelerationStructure( +#if SLANG_GFX_HAS_DXR_SUPPORT + +void RayTracingCommandEncoderImpl::buildAccelerationStructure( const IAccelerationStructure::BuildDesc& desc, int propertyQueryCount, AccelerationStructureQueryDesc* queryDescs) @@ -8048,16 +3339,16 @@ void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::buildAccelera "Ray-tracing is not supported on current system."); return; } - D3D12AccelerationStructureImpl* destASImpl = nullptr; + AccelerationStructureImpl* destASImpl = nullptr; if (desc.dest) - destASImpl = static_cast<D3D12AccelerationStructureImpl*>(desc.dest); - D3D12AccelerationStructureImpl* srcASImpl = nullptr; + destASImpl = static_cast<AccelerationStructureImpl*>(desc.dest); + AccelerationStructureImpl* srcASImpl = nullptr; if (desc.source) - srcASImpl = static_cast<D3D12AccelerationStructureImpl*>(desc.source); - + srcASImpl = static_cast<AccelerationStructureImpl*>(desc.source); + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC buildDesc = {}; buildDesc.DestAccelerationStructureData = destASImpl->getDeviceAddress(); - buildDesc.SourceAccelerationStructureData = srcASImpl?srcASImpl->getDeviceAddress() : 0; + buildDesc.SourceAccelerationStructureData = srcASImpl ? srcASImpl->getDeviceAddress() : 0; buildDesc.ScratchAccelerationStructureData = desc.scratchData; D3DAccelerationStructureInputsBuilder builder; builder.build(desc.inputs, getDebugCallback()); @@ -8069,13 +3360,11 @@ void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::buildAccelera &buildDesc, (UINT)propertyQueryCount, postBuildInfoDescs.getBuffer()); } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::copyAccelerationStructure( - IAccelerationStructure* dest, - IAccelerationStructure* src, - AccelerationStructureCopyMode mode) +void RayTracingCommandEncoderImpl::copyAccelerationStructure( + IAccelerationStructure* dest, IAccelerationStructure* src, AccelerationStructureCopyMode mode) { - auto destASImpl = static_cast<D3D12AccelerationStructureImpl*>(dest); - auto srcASImpl = static_cast<D3D12AccelerationStructureImpl*>(src); + auto destASImpl = static_cast<AccelerationStructureImpl*>(dest); + auto srcASImpl = static_cast<AccelerationStructureImpl*>(src); D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE copyMode; switch (mode) { @@ -8096,12 +3385,11 @@ void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::copyAccelerat destASImpl->getDeviceAddress(), srcASImpl->getDeviceAddress(), copyMode); } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl:: - queryAccelerationStructureProperties( - int accelerationStructureCount, - IAccelerationStructure* const* accelerationStructures, - int queryCount, - AccelerationStructureQueryDesc* queryDescs) +void RayTracingCommandEncoderImpl::queryAccelerationStructureProperties( + int accelerationStructureCount, + IAccelerationStructure* const* accelerationStructures, + int queryCount, + AccelerationStructureQueryDesc* queryDescs) { List<D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC> postBuildInfoDescs; List<DeviceAddress> asAddresses; @@ -8110,40 +3398,36 @@ void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl:: asAddresses[i] = accelerationStructures[i]->getDeviceAddress(); translatePostBuildInfoDescs(queryCount, queryDescs, postBuildInfoDescs); m_commandBuffer->m_cmdList4->EmitRaytracingAccelerationStructurePostbuildInfo( - postBuildInfoDescs.getBuffer(), - (UINT)accelerationStructureCount, - asAddresses.getBuffer()); + postBuildInfoDescs.getBuffer(), (UINT)accelerationStructureCount, asAddresses.getBuffer()); } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::serializeAccelerationStructure( - DeviceAddress dest, - IAccelerationStructure* src) +void RayTracingCommandEncoderImpl::serializeAccelerationStructure( + DeviceAddress dest, IAccelerationStructure* src) { - auto srcASImpl = static_cast<D3D12AccelerationStructureImpl*>(src); + auto srcASImpl = static_cast<AccelerationStructureImpl*>(src); m_commandBuffer->m_cmdList4->CopyRaytracingAccelerationStructure( dest, srcASImpl->getDeviceAddress(), D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_SERIALIZE); } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::deserializeAccelerationStructure( - IAccelerationStructure* dest, - DeviceAddress source) +void RayTracingCommandEncoderImpl::deserializeAccelerationStructure( + IAccelerationStructure* dest, DeviceAddress source) { - auto destASImpl = static_cast<D3D12AccelerationStructureImpl*>(dest); + auto destASImpl = static_cast<AccelerationStructureImpl*>(dest); m_commandBuffer->m_cmdList4->CopyRaytracingAccelerationStructure( dest->getDeviceAddress(), source, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_DESERIALIZE); } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::bindPipeline( +void RayTracingCommandEncoderImpl::bindPipeline( IPipelineState* state, IShaderObject** outRootObject) { bindPipelineImpl(state, outRootObject); } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::dispatchRays( +void RayTracingCommandEncoderImpl::dispatchRays( uint32_t rayGenShaderIndex, IShaderTable* shaderTable, int32_t width, @@ -8157,9 +3441,9 @@ void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::dispatchRays( { ID3D12GraphicsCommandList4* m_cmdList4; RayTracingSubmitter(ID3D12GraphicsCommandList4* cmdList4) - : ComputeSubmitter(cmdList4), m_cmdList4(cmdList4) - { - } + : ComputeSubmitter(cmdList4) + , m_cmdList4(cmdList4) + {} virtual void setPipelineState(PipelineStateBase* pipeline) override { auto pipelineImpl = static_cast<RayTracingPipelineStateImpl*>(pipeline); @@ -8180,7 +3464,8 @@ void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::dispatchRays( ResourceCommandEncoderImpl resourceCopyEncoder; resourceCopyEncoder.init(m_commandBuffer); - auto shaderTableBuffer = shaderTableImpl->getOrCreateBuffer(pipelineImpl, m_transientHeap, &resourceCopyEncoder); + auto shaderTableBuffer = + shaderTableImpl->getOrCreateBuffer(pipelineImpl, m_transientHeap, &resourceCopyEncoder); auto shaderTableAddr = shaderTableBuffer->getDeviceAddress(); D3D12_DISPATCH_RAYS_DESC dispatchDesc = {}; @@ -8208,7 +3493,27 @@ void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::dispatchRays( m_commandBuffer->m_cmdList4->DispatchRays(&dispatchDesc); } -Result D3D12Device::RayTracingPipelineStateImpl::ensureAPIPipelineStateCreated() +RayTracingPipelineStateImpl::RayTracingPipelineStateImpl(DeviceImpl* device) + : m_device(device) +{} + +void RayTracingPipelineStateImpl::init(const RayTracingPipelineStateDesc& inDesc) +{ + PipelineStateDesc pipelineDesc; + pipelineDesc.type = PipelineType::RayTracing; + pipelineDesc.rayTracing.set(inDesc); + initializeBase(pipelineDesc); +} + +Result RayTracingPipelineStateImpl::getNativeHandle(InteropHandle* outHandle) +{ + SLANG_RETURN_ON_FAIL(ensureAPIPipelineStateCreated()); + outHandle->api = InteropHandleAPI::D3D12; + outHandle->handleValue = reinterpret_cast<uint64_t>(m_stateObject.get()); + return SLANG_OK; +} + +Result RayTracingPipelineStateImpl::ensureAPIPipelineStateCreated() { if (m_stateObject) return SLANG_OK; @@ -8345,8 +3650,8 @@ Result D3D12Device::RayTracingPipelineStateImpl::ensureAPIPipelineStateCreated() rtpsoDesc.Type = D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE; rtpsoDesc.NumSubobjects = (UINT)subObjects.getCount(); rtpsoDesc.pSubobjects = subObjects.getBuffer(); - SLANG_RETURN_ON_FAIL(m_device->m_device5->CreateStateObject( - &rtpsoDesc, IID_PPV_ARGS(m_stateObject.writeRef()))); + SLANG_RETURN_ON_FAIL( + m_device->m_device5->CreateStateObject(&rtpsoDesc, IID_PPV_ARGS(m_stateObject.writeRef()))); if (m_device->m_pipelineCreationAPIDispatcher) { @@ -8356,119 +3661,847 @@ Result D3D12Device::RayTracingPipelineStateImpl::ensureAPIPipelineStateCreated() return SLANG_OK; } -Result D3D12Device::createRayTracingPipelineState(const RayTracingPipelineStateDesc& inDesc, IPipelineState** outState) +#endif + +UInt ShaderObjectImpl::getEntryPointCount() { return 0; } + +Result ShaderObjectImpl::getEntryPoint(UInt index, IShaderObject** outEntryPoint) { - if (!m_device5) + *outEntryPoint = nullptr; + return SLANG_OK; +} + +const void* ShaderObjectImpl::getRawData() { return m_data.getBuffer(); } + +size_t ShaderObjectImpl::getSize() { return (size_t)m_data.getCount(); } + +Result ShaderObjectImpl::setData(ShaderOffset const& inOffset, void const* data, size_t inSize) +{ + Index offset = inOffset.uniformOffset; + Index size = inSize; + + char* dest = m_data.getBuffer(); + Index availableSize = m_data.getCount(); + + // TODO: We really should bounds-check access rather than silently ignoring sets + // that are too large, but we have several test cases that set more data than + // an object actually stores on several targets... + // + if (offset < 0) { - return SLANG_E_NOT_AVAILABLE; + size += offset; + offset = 0; + } + if ((offset + size) >= availableSize) + { + size = availableSize - offset; } - RefPtr<RayTracingPipelineStateImpl> pipelineStateImpl = new RayTracingPipelineStateImpl(this); - pipelineStateImpl->init(inDesc); - returnComPtr(outState, pipelineStateImpl); + memcpy(dest + offset, data, size); + + m_isConstantBufferDirty = true; + + m_version++; + return SLANG_OK; } -#endif // SLANG_GFX_HAS_DXR_SUPPORT +Result ShaderObjectImpl::setObject(ShaderOffset const& offset, IShaderObject* object) +{ + SLANG_RETURN_ON_FAIL(Super::setObject(offset, object)); + if (m_isMutable) + { + auto subObjectIndex = getSubObjectIndex(offset); + if (subObjectIndex >= m_subObjectVersions.getCount()) + m_subObjectVersions.setCount(subObjectIndex + 1); + m_subObjectVersions[subObjectIndex] = static_cast<ShaderObjectImpl*>(object)->m_version; + m_version++; + } + return SLANG_OK; +} -Result createNullDescriptor( - ID3D12Device* d3dDevice, - D3D12_CPU_DESCRIPTOR_HANDLE destDescriptor, - const D3D12Device::ShaderObjectLayoutImpl::BindingRangeInfo& bindingRange) +Result ShaderObjectImpl::setSampler(ShaderOffset const& offset, ISamplerState* sampler) { - switch (bindingRange.bindingType) + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + auto samplerImpl = static_cast<SamplerStateImpl*>(sampler); + ID3D12Device* d3dDevice = static_cast<DeviceImpl*>(getDevice())->m_device; + d3dDevice->CopyDescriptorsSimple( + 1, + m_descriptorSet.samplerTable.getCpuHandle( + bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex), + samplerImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + m_version++; + return SLANG_OK; +} + +Result ShaderObjectImpl::setCombinedTextureSampler( + ShaderOffset const& offset, IResourceView* textureView, ISamplerState* sampler) +{ +#if 0 + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + auto resourceViewImpl = static_cast<ResourceViewImpl*>(textureView); + ID3D12Device* d3dDevice = static_cast<DeviceImpl*>(getDevice())->m_device; + d3dDevice->CopyDescriptorsSimple( + 1, + m_resourceHeap.getCpuHandle( + m_descriptorSet.m_resourceTable + + bindingRange.binding.offsetInDescriptorTable.resource + + (int32_t)offset.bindingArrayIndex), + resourceViewImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + auto samplerImpl = static_cast<SamplerStateImpl*>(sampler); + d3dDevice->CopyDescriptorsSimple( + 1, + m_samplerHeap.getCpuHandle( + m_descriptorSet.m_samplerTable + + bindingRange.binding.offsetInDescriptorTable.sampler + + (int32_t)offset.bindingArrayIndex), + samplerImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); +#endif + m_version++; + return SLANG_OK; +} + +Result ShaderObjectImpl::init( + DeviceImpl* device, + ShaderObjectLayoutImpl* layout, + DescriptorHeapReference viewHeap, + DescriptorHeapReference samplerHeap) +{ + m_device = device; + + m_layout = layout; + + m_cachedTransientHeap = nullptr; + m_cachedTransientHeapVersion = 0; + m_isConstantBufferDirty = true; + + // If the layout tells us that there is any uniform data, + // then we will allocate a CPU memory buffer to hold that data + // while it is being set from the host. + // + // Once the user is done setting the parameters/fields of this + // shader object, we will produce a GPU-memory version of the + // uniform data (which includes values from this object and + // any existential-type sub-objects). + // + size_t uniformSize = layout->getElementTypeLayout()->getSize(); + if (uniformSize) { - case slang::BindingType::ConstantBuffer: + m_data.setCount(uniformSize); + memset(m_data.getBuffer(), 0, uniformSize); + } + m_rootArguments.setCount(layout->getOwnUserRootParameterCount()); + memset( + m_rootArguments.getBuffer(), + 0, + sizeof(D3D12_GPU_VIRTUAL_ADDRESS) * m_rootArguments.getCount()); + // Each shader object will own CPU descriptor heap memory + // for any resource or sampler descriptors it might store + // as part of its value. + // + // This allocate includes a reservation for any constant + // buffer descriptor pertaining to the ordinary data, + // but does *not* include any descriptors that are managed + // as part of sub-objects. + // + if (auto resourceCount = layout->getResourceSlotCount()) + { + m_descriptorSet.resourceTable.allocate(viewHeap, resourceCount); + + // We must also ensure that the memory for any resources + // referenced by descriptors in this object does not get + // freed while the object is still live. + // + m_boundResources.setCount(resourceCount); + } + if (auto samplerCount = layout->getSamplerSlotCount()) + { + m_descriptorSet.samplerTable.allocate(samplerHeap, samplerCount); + } + + // If the layout specifies that we have any sub-objects, then + // we need to size the array to account for them. + // + Index subObjectCount = layout->getSubObjectSlotCount(); + m_objects.setCount(subObjectCount); + + for (auto subObjectRangeInfo : layout->getSubObjectRanges()) + { + auto subObjectLayout = subObjectRangeInfo.layout; + + // In the case where the sub-object range represents an + // existential-type leaf field (e.g., an `IBar`), we + // cannot pre-allocate the object(s) to go into that + // range, since we can't possibly know what to allocate + // at this point. + // + if (!subObjectLayout) + continue; + // + // Otherwise, we will allocate a sub-object to fill + // in each entry in this range, based on the layout + // information we already have. + + auto& bindingRangeInfo = layout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); + for (uint32_t i = 0; i < bindingRangeInfo.count; ++i) { - D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; - cbvDesc.BufferLocation = 0; - cbvDesc.SizeInBytes = 0; - d3dDevice->CreateConstantBufferView(&cbvDesc, destDescriptor); + RefPtr<ShaderObjectImpl> subObject; + SLANG_RETURN_ON_FAIL( + ShaderObjectImpl::create(device, subObjectLayout, subObject.writeRef())); + m_objects[bindingRangeInfo.subObjectIndex + i] = subObject; } - break; - case slang::BindingType::MutableRawBuffer: + } + + return SLANG_OK; +} + +/// Write the uniform/ordinary data of this object into the given `dest` buffer at the given +/// `offset` + +Result ShaderObjectImpl::_writeOrdinaryData( + PipelineCommandEncoder* encoder, + BufferResourceImpl* buffer, + size_t offset, + size_t destSize, + ShaderObjectLayoutImpl* specializedLayout) +{ + auto src = m_data.getBuffer(); + auto srcSize = size_t(m_data.getCount()); + + SLANG_ASSERT(srcSize <= destSize); + + uploadBufferDataImpl( + encoder->m_device, + encoder->m_d3dCmdList, + encoder->m_transientHeap, + buffer, + offset, + srcSize, + src); + + // In the case where this object has any sub-objects of + // existential/interface type, we need to recurse on those objects + // that need to write their state into an appropriate "pending" allocation. + // + // Note: Any values that could fit into the "payload" included + // in the existential-type field itself will have already been + // written as part of `setObject()`. This loop only needs to handle + // those sub-objects that do not "fit." + // + // An implementers looking at this code might wonder if things could be changed + // so that *all* writes related to sub-objects for interface-type fields could + // be handled in this one location, rather than having some in `setObject()` and + // others handled here. + // + Index subObjectRangeCounter = 0; + for (auto const& subObjectRangeInfo : specializedLayout->getSubObjectRanges()) + { + Index subObjectRangeIndex = subObjectRangeCounter++; + auto const& bindingRangeInfo = + specializedLayout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); + + // We only need to handle sub-object ranges for interface/existential-type fields, + // because fields of constant-buffer or parameter-block type are responsible for + // the ordinary/uniform data of their own existential/interface-type sub-objects. + // + if (bindingRangeInfo.bindingType != slang::BindingType::ExistentialValue) + continue; + + // Each sub-object range represents a single "leaf" field, but might be nested + // under zero or more outer arrays, such that the number of existential values + // in the same range can be one or more. + // + auto count = bindingRangeInfo.count; + + // We are not concerned with the case where the existential value(s) in the range + // git into the payload part of the leaf field. + // + // In the case where the value didn't fit, the Slang layout strategy would have + // considered the requirements of the value as a "pending" allocation, and would + // allocate storage for the ordinary/uniform part of that pending allocation inside + // of the parent object's type layout. + // + // Here we assume that the Slang reflection API can provide us with a single byte + // offset and stride for the location of the pending data allocation in the + // specialized type layout, which will store the values for this sub-object range. + // + // TODO: The reflection API functions we are assuming here haven't been implemented + // yet, so the functions being called here are stubs. + // + // TODO: It might not be that a single sub-object range can reliably map to a single + // contiguous array with a single stride; we need to carefully consider what the + // layout logic does for complex cases with multiple layers of nested arrays and + // structures. + // + size_t subObjectRangePendingDataOffset = subObjectRangeInfo.offset.pendingOrdinaryData; + size_t subObjectRangePendingDataStride = subObjectRangeInfo.stride.pendingOrdinaryData; + + // If the range doesn't actually need/use the "pending" allocation at all, then + // we need to detect that case and skip such ranges. + // + // TODO: This should probably be handled on a per-object basis by caching a "does it + // fit?" bit as part of the information for bound sub-objects, given that we already + // compute the "does it fit?" status as part of `setObject()`. + // + if (subObjectRangePendingDataOffset == 0) + continue; + + for (uint32_t i = 0; i < count; ++i) { - D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; - uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; - d3dDevice->CreateUnorderedAccessView(nullptr, nullptr, &uavDesc, destDescriptor); + auto subObject = m_objects[bindingRangeInfo.subObjectIndex + i]; + + RefPtr<ShaderObjectLayoutImpl> subObjectLayout; + SLANG_RETURN_ON_FAIL(subObject->getSpecializedLayout(subObjectLayout.writeRef())); + + auto subObjectOffset = + subObjectRangePendingDataOffset + i * subObjectRangePendingDataStride; + + subObject->_writeOrdinaryData( + encoder, + buffer, + offset + subObjectOffset, + destSize - subObjectOffset, + subObjectLayout); } - break; - case slang::BindingType::MutableTypedBuffer: + } + + return SLANG_OK; +} + +bool ShaderObjectImpl::shouldAllocateConstantBuffer(TransientResourceHeapImpl* transientHeap) +{ + if (m_isConstantBufferDirty || m_cachedTransientHeap != transientHeap || + m_cachedTransientHeapVersion != transientHeap->getVersion()) + { + return true; + } + return false; +} + +/// Ensure that the `m_ordinaryDataBuffer` has been created, if it is needed + +Result ShaderObjectImpl::_ensureOrdinaryDataBufferCreatedIfNeeded( + PipelineCommandEncoder* encoder, ShaderObjectLayoutImpl* specializedLayout) +{ + // If data has been changed since last allocation/filling of constant buffer, + // we will need to allocate a new one. + // + if (!shouldAllocateConstantBuffer(encoder->m_transientHeap)) + { + return SLANG_OK; + } + m_isConstantBufferDirty = false; + m_cachedTransientHeap = encoder->m_transientHeap; + m_cachedTransientHeapVersion = encoder->m_transientHeap->getVersion(); + + // Computing the size of the ordinary data buffer is *not* just as simple + // as using the size of the `m_ordinayData` array that we store. The reason + // for the added complexity is that interface-type fields may lead to the + // storage being specialized such that it needs extra appended data to + // store the concrete values that logically belong in those interface-type + // fields but wouldn't fit in the fixed-size allocation we gave them. + // + m_constantBufferSize = specializedLayout->getTotalOrdinaryDataSize(); + if (m_constantBufferSize == 0) + { + return SLANG_OK; + } + + // Once we have computed how large the buffer should be, we can allocate + // it from the transient resource heap. + // + auto alignedConstantBufferSize = D3DUtil::calcAligned(m_constantBufferSize, 256); + SLANG_RETURN_ON_FAIL(encoder->m_commandBuffer->m_transientHeap->allocateConstantBuffer( + alignedConstantBufferSize, m_constantBufferWeakPtr, m_constantBufferOffset)); + + // Once the buffer is allocated, we can use `_writeOrdinaryData` to fill it in. + // + // Note that `_writeOrdinaryData` is potentially recursive in the case + // where this object contains interface/existential-type fields, so we + // don't need or want to inline it into this call site. + // + SLANG_RETURN_ON_FAIL(_writeOrdinaryData( + encoder, + static_cast<BufferResourceImpl*>(m_constantBufferWeakPtr), + m_constantBufferOffset, + m_constantBufferSize, + specializedLayout)); + + { + // We also create and store a descriptor for our root constant buffer + // into the descriptor table allocation that was reserved for them. + // + // We always know that the ordinary data buffer will be the first descriptor + // in the table of resource views. + // + auto descriptorTable = m_descriptorSet.resourceTable; + D3D12_CONSTANT_BUFFER_VIEW_DESC viewDesc = {}; + viewDesc.BufferLocation = static_cast<BufferResourceImpl*>(m_constantBufferWeakPtr) + ->m_resource.getResource() + ->GetGPUVirtualAddress() + + m_constantBufferOffset; + viewDesc.SizeInBytes = (UINT)alignedConstantBufferSize; + encoder->m_device->CreateConstantBufferView(&viewDesc, descriptorTable.getCpuHandle()); + } + + return SLANG_OK; +} + +void ShaderObjectImpl::updateSubObjectsRecursive() +{ + if (!m_isMutable) + return; + auto& subObjectRanges = getLayout()->getSubObjectRanges(); + for (Slang::Index subObjectRangeIndex = 0; subObjectRangeIndex < subObjectRanges.getCount(); + subObjectRangeIndex++) + { + auto const& subObjectRange = subObjectRanges[subObjectRangeIndex]; + auto const& bindingRange = getLayout()->getBindingRange(subObjectRange.bindingRangeIndex); + Slang::Index count = bindingRange.count; + + for (Slang::Index subObjectIndexInRange = 0; subObjectIndexInRange < count; + subObjectIndexInRange++) { - D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - uavDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - d3dDevice->CreateUnorderedAccessView(nullptr, nullptr, &uavDesc, destDescriptor); + Slang::Index objectIndex = bindingRange.subObjectIndex + subObjectIndexInRange; + auto subObject = m_objects[objectIndex].Ptr(); + if (!subObject) + continue; + subObject->updateSubObjectsRecursive(); + if (m_subObjectVersions[objectIndex] != m_objects[objectIndex]->m_version) + { + ShaderOffset offset; + offset.bindingRangeIndex = subObjectRange.bindingRangeIndex; + offset.bindingArrayIndex = subObjectIndexInRange; + setObject(offset, subObject); + } } - break; - case slang::BindingType::RawBuffer: + } +} + +/// Prepare to bind this object as a parameter block. +/// +/// This involves allocating and binding any descriptor tables necessary +/// to to store the state of the object. The function returns a descriptor +/// set formed from any table(s) allocated. In addition, the `ioOffset` +/// parameter will be adjusted to be correct for binding values into +/// the resulting descriptor set. +/// +/// Returns: +/// SLANG_OK when successful, +/// SLANG_E_OUT_OF_MEMORY when descriptor heap is full. +/// + +Result ShaderObjectImpl::prepareToBindAsParameterBlock( + BindingContext* context, + BindingOffset& ioOffset, + ShaderObjectLayoutImpl* specializedLayout, + DescriptorSet& outDescriptorSet) +{ + auto transientHeap = context->transientHeap; + auto submitter = context->submitter; + + // When writing into the new descriptor set, resource and sampler + // descriptors will need to start at index zero in the respective + // tables. + // + ioOffset.resource = 0; + ioOffset.sampler = 0; + + // The index of the next root parameter to bind will be maintained, + // but needs to be incremented by the number of descriptor tables + // we allocate (zero or one resource table and zero or one sampler + // table). + // + auto& rootParamIndex = ioOffset.rootParam; + + if (auto descriptorCount = specializedLayout->getTotalResourceDescriptorCount()) + { + // There is a non-zero number of resource descriptors needed, + // so we will allocate a table out of the appropriate heap, + // and store it into the appropriate part of `descriptorSet`. + // + auto descriptorHeap = &transientHeap->getCurrentViewHeap(); + auto& table = outDescriptorSet.resourceTable; + + // Allocate the table. + // + if (!table.allocate(descriptorHeap, descriptorCount)) { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; - srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); + context->outOfMemoryHeap = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + return SLANG_E_OUT_OF_MEMORY; } - break; - case slang::BindingType::TypedBuffer: + + // Bind the table to the pipeline, consuming the next available + // root parameter. + // + auto tableRootParamIndex = rootParamIndex++; + submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); + } + if (auto descriptorCount = specializedLayout->getTotalSamplerDescriptorCount()) + { + // There is a non-zero number of sampler descriptors needed, + // so we will allocate a table out of the appropriate heap, + // and store it into the appropriate part of `descriptorSet`. + // + auto descriptorHeap = &transientHeap->getCurrentSamplerHeap(); + auto& table = outDescriptorSet.samplerTable; + + // Allocate the table. + // + if (!table.allocate(descriptorHeap, descriptorCount)) { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); + context->outOfMemoryHeap = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + return SLANG_E_OUT_OF_MEMORY; } - break; - case slang::BindingType::Texture: + + // Bind the table to the pipeline, consuming the next available + // root parameter. + // + auto tableRootParamIndex = rootParamIndex++; + submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); + } + + return SLANG_OK; +} + +bool ShaderObjectImpl::checkIfCachedDescriptorSetIsValidRecursive(BindingContext* context) +{ + if (shouldAllocateConstantBuffer(context->transientHeap)) + return false; + if (m_isMutable && m_version != m_cachedGPUDescriptorSetVersion) + return false; + if (m_cachedGPUDescriptorSet.resourceTable.getDescriptorCount() != 0 && + m_cachedGPUDescriptorSet.resourceTable.m_heap.ptr.linearHeap->getHeap() != + m_cachedTransientHeap->getCurrentViewHeap().getHeap()) + return false; + if (m_cachedGPUDescriptorSet.samplerTable.getDescriptorCount() != 0 && + m_cachedGPUDescriptorSet.samplerTable.m_heap.ptr.linearHeap->getHeap() != + m_cachedTransientHeap->getCurrentSamplerHeap().getHeap()) + return false; + + auto& subObjectRanges = getLayout()->getSubObjectRanges(); + for (Slang::Index subObjectRangeIndex = 0; subObjectRangeIndex < subObjectRanges.getCount(); + subObjectRangeIndex++) + { + auto const& subObjectRange = subObjectRanges[subObjectRangeIndex]; + auto const& bindingRange = getLayout()->getBindingRange(subObjectRange.bindingRangeIndex); + if (bindingRange.bindingType != slang::BindingType::ParameterBlock) + continue; + Slang::Index count = bindingRange.count; + + for (Slang::Index subObjectIndexInRange = 0; subObjectIndexInRange < count; + subObjectIndexInRange++) { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - switch (bindingRange.resourceShape) + Slang::Index objectIndex = bindingRange.subObjectIndex + subObjectIndexInRange; + auto subObject = m_objects[objectIndex].Ptr(); + if (!subObject) + continue; + if (subObject->checkIfCachedDescriptorSetIsValidRecursive(context)) + return false; + } + } + return true; +} + +/// Bind this object as a `ParameterBlock<X>` + +Result ShaderObjectImpl::bindAsParameterBlock( + BindingContext* context, BindingOffset const& offset, ShaderObjectLayoutImpl* specializedLayout) +{ + if (checkIfCachedDescriptorSetIsValidRecursive(context)) + { + // If we already have a valid gpu descriptor table in the current + // heap, bind it. + auto rootParamIndex = offset.rootParam; + if (m_cachedGPUDescriptorSet.resourceTable.getDescriptorCount()) + { + auto tableRootParamIndex = rootParamIndex++; + context->submitter->setRootDescriptorTable( + tableRootParamIndex, m_cachedGPUDescriptorSet.resourceTable.getGpuHandle()); + } + if (m_cachedGPUDescriptorSet.samplerTable.getDescriptorCount()) + { + auto tableRootParamIndex = rootParamIndex++; + context->submitter->setRootDescriptorTable( + tableRootParamIndex, m_cachedGPUDescriptorSet.samplerTable.getGpuHandle()); + } + return SLANG_OK; + } + + // The first step to binding an object as a parameter block is to allocate a descriptor + // set (consisting of zero or one resource descriptor table and zero or one sampler + // descriptor table) to represent its values. + // + BindingOffset subOffset = offset; + SLANG_RETURN_ON_FAIL(prepareToBindAsParameterBlock( + context, /* inout */ subOffset, specializedLayout, m_cachedGPUDescriptorSet)); + + // Next we bind the object into that descriptor set as if it were being used + // as a `ConstantBuffer<X>`. + // + SLANG_RETURN_ON_FAIL( + bindAsConstantBuffer(context, m_cachedGPUDescriptorSet, subOffset, specializedLayout)); + + m_cachedGPUDescriptorSetVersion = m_version; + return SLANG_OK; +} + +/// Bind this object as a `ConstantBuffer<X>` + +Result ShaderObjectImpl::bindAsConstantBuffer( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) +{ + // If we are to bind as a constant buffer we first need to ensure that + // the ordinary data buffer is created, if this object needs one. + // + SLANG_RETURN_ON_FAIL( + _ensureOrdinaryDataBufferCreatedIfNeeded(context->encoder, specializedLayout)); + + // Next, we need to bind all of the resource descriptors for this object + // (including any ordinary data buffer) into the provided `descriptorSet`. + // + auto resourceCount = specializedLayout->getResourceSlotCount(); + if (resourceCount) + { + auto& dstTable = descriptorSet.resourceTable; + auto& srcTable = m_descriptorSet.resourceTable; + + context->device->m_device->CopyDescriptorsSimple( + UINT(resourceCount), + dstTable.getCpuHandle(offset.resource), + srcTable.getCpuHandle(), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, + // since the logic is shared with the `bindAsValue()` case below. + // + SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, offset, specializedLayout)); + return SLANG_OK; +} + +/// Bind this object as a value (for an interface-type parameter) + +Result ShaderObjectImpl::bindAsValue( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) +{ + // When binding a value for an interface-type field we do *not* want + // to bind a buffer for the ordinary data (if there is any) because + // ordinary data for interface-type fields gets allocated into the + // parent object's ordinary data buffer. + // + // This CPU-memory descriptor table that holds resource descriptors + // will have already been allocated to have space for an ordinary data + // buffer (if needed), so we need to take care to skip over that + // descriptor when copying descriptors from the CPU-memory set + // to the GPU-memory `descriptorSet`. + // + auto skipResourceCount = specializedLayout->getOrdinaryDataBufferCount(); + auto resourceCount = specializedLayout->getResourceSlotCount() - skipResourceCount; + if (resourceCount) + { + auto& dstTable = descriptorSet.resourceTable; + auto& srcTable = m_descriptorSet.resourceTable; + + context->device->m_device->CopyDescriptorsSimple( + UINT(resourceCount), + dstTable.getCpuHandle(offset.resource), + srcTable.getCpuHandle(skipResourceCount), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, + // since the logic is shared with the `bindAsConstantBuffer()` case above. + // + // Note: Just like we had to do some subtle handling of the ordinary data buffer + // above, here we need to contend with the fact that the `offset.resource` fields + // computed for sub-object ranges were baked to take the ordinary data buffer + // into account, so that if `skipResourceCount` is non-zero then they are all + // too high by `skipResourceCount`. + // + // We will address the problem here by computing a modified offset that adjusts + // for the ordinary data buffer that we have not bound after all. + // + BindingOffset subOffset = offset; + subOffset.resource -= skipResourceCount; + SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, subOffset, specializedLayout)); + return SLANG_OK; +} + +/// Shared logic for `bindAsConstantBuffer()` and `bindAsValue()` + +Result ShaderObjectImpl::_bindImpl( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) +{ + // We start by binding all the sampler decriptors, if needed. + // + // Note: resource descriptors were handled in either `bindAsConstantBuffer()` + // or `bindAsValue()` before calling into `_bindImpl()`. + // + if (auto samplerCount = specializedLayout->getSamplerSlotCount()) + { + auto& dstTable = descriptorSet.samplerTable; + auto& srcTable = m_descriptorSet.samplerTable; + + context->device->m_device->CopyDescriptorsSimple( + UINT(samplerCount), + dstTable.getCpuHandle(offset.sampler), + srcTable.getCpuHandle(), + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + } + + // Next we iterate over the sub-object ranges and bind anything they require. + // + auto& subObjectRanges = specializedLayout->getSubObjectRanges(); + auto subObjectRangeCount = subObjectRanges.getCount(); + for (Index i = 0; i < subObjectRangeCount; i++) + { + auto& subObjectRange = specializedLayout->getSubObjectRange(i); + auto& bindingRange = specializedLayout->getBindingRange(subObjectRange.bindingRangeIndex); + auto subObjectIndex = bindingRange.subObjectIndex; + auto subObjectLayout = subObjectRange.layout.Ptr(); + + BindingOffset rangeOffset = offset; + rangeOffset += subObjectRange.offset; + + BindingOffset rangeStride = subObjectRange.stride; + + switch (bindingRange.bindingType) + { + case slang::BindingType::ConstantBuffer: { - case SLANG_TEXTURE_1D: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; - break; - case SLANG_TEXTURE_1D_ARRAY: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; - break; - case SLANG_TEXTURE_2D: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - break; - case SLANG_TEXTURE_2D_ARRAY: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; - break; - case SLANG_TEXTURE_3D: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; - break; - case SLANG_TEXTURE_CUBE: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; - break; - case SLANG_TEXTURE_CUBE_ARRAY: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; - break; - case SLANG_TEXTURE_2D_MULTISAMPLE: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; - break; - case SLANG_TEXTURE_2D_MULTISAMPLE_ARRAY: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; - break; - default: - return SLANG_OK; + auto objOffset = rangeOffset; + for (uint32_t j = 0; j < bindingRange.count; j++) + { + auto& object = m_objects[subObjectIndex + j]; + SLANG_RETURN_ON_FAIL(object->bindAsConstantBuffer( + context, descriptorSet, objOffset, subObjectLayout)); + objOffset += rangeStride; + } } - d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); + break; + + case slang::BindingType::ParameterBlock: + { + auto objOffset = rangeOffset; + for (uint32_t j = 0; j < bindingRange.count; j++) + { + auto& object = m_objects[subObjectIndex + j]; + SLANG_RETURN_ON_FAIL( + object->bindAsParameterBlock(context, objOffset, subObjectLayout)); + objOffset += rangeStride; + } + } + break; + + case slang::BindingType::ExistentialValue: + if (subObjectLayout) + { + auto objOffset = rangeOffset; + for (uint32_t j = 0; j < bindingRange.count; j++) + { + auto& object = m_objects[subObjectIndex + j]; + SLANG_RETURN_ON_FAIL( + object->bindAsValue(context, descriptorSet, objOffset, subObjectLayout)); + objOffset += rangeStride; + } + } + break; + } + } + + return SLANG_OK; +} + +Result ShaderObjectImpl::bindRootArguments(BindingContext* context, uint32_t& index) +{ + auto layoutImpl = getLayout(); + for (Index i = 0; i < m_rootArguments.getCount(); i++) + { + switch (layoutImpl->getRootParameterInfo(i).type) + { + case IResourceView::Type::ShaderResource: + case IResourceView::Type::AccelerationStructure: + context->submitter->setRootSRV(index, m_rootArguments[i]); + break; + case IResourceView::Type::UnorderedAccess: + context->submitter->setRootUAV(index, m_rootArguments[i]); + break; + default: + continue; + } + index++; + } + for (auto& subObject : m_objects) + { + if (subObject) + { + SLANG_RETURN_ON_FAIL(subObject->bindRootArguments(context, index)); } - break; - default: - break; } return SLANG_OK; } -Result D3D12Device::ShaderObjectImpl::setResource(ShaderOffset const& offset, IResourceView* resourceView) +/// Get the layout of this shader object with specialization arguments considered +/// +/// This operation should only be called after the shader object has been +/// fully filled in and finalized. +/// + +Result ShaderObjectImpl::getSpecializedLayout(ShaderObjectLayoutImpl** outLayout) +{ + if (!m_specializedLayout) + { + SLANG_RETURN_ON_FAIL(_createSpecializedLayout(m_specializedLayout.writeRef())); + } + returnRefPtr(outLayout, m_specializedLayout); + return SLANG_OK; +} + +/// Create the layout for this shader object with specialization arguments considered +/// +/// This operation is virtual so that it can be customized by `RootShaderObject`. +/// + +Result ShaderObjectImpl::_createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) +{ + ExtendedShaderObjectType extendedType; + SLANG_RETURN_ON_FAIL(getSpecializedShaderObjectType(&extendedType)); + + auto renderer = getRenderer(); + RefPtr<ShaderObjectLayoutImpl> layout; + SLANG_RETURN_ON_FAIL(renderer->getShaderObjectLayout( + extendedType.slangType, + m_layout->getContainerType(), + (ShaderObjectLayoutBase**)layout.writeRef())); + + returnRefPtrMove(outLayout, layout); + return SLANG_OK; +} + +Result ShaderObjectImpl::setResource(ShaderOffset const& offset, IResourceView* resourceView) { if (offset.bindingRangeIndex < 0) return SLANG_E_INVALID_ARG; @@ -8478,7 +4511,7 @@ Result D3D12Device::ShaderObjectImpl::setResource(ShaderOffset const& offset, IR m_version++; - ID3D12Device* d3dDevice = static_cast<D3D12Device*>(getDevice())->m_device; + ID3D12Device* d3dDevice = static_cast<DeviceImpl*>(getDevice())->m_device; auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); @@ -8489,7 +4522,7 @@ Result D3D12Device::ShaderObjectImpl::setResource(ShaderOffset const& offset, IR { case IResourceView::Type::AccelerationStructure: { - auto resourceViewImpl = static_cast<D3D12AccelerationStructureImpl*>(resourceView); + auto resourceViewImpl = static_cast<AccelerationStructureImpl*>(resourceView); rootArg = resourceViewImpl->getDeviceAddress(); } break; @@ -8531,7 +4564,7 @@ Result D3D12Device::ShaderObjectImpl::setResource(ShaderOffset const& offset, IR #if SLANG_GFX_HAS_DXR_SUPPORT case IResourceView::Type::AccelerationStructure: { - auto asImpl = static_cast<D3D12AccelerationStructureImpl*>(resourceView); + auto asImpl = static_cast<AccelerationStructureImpl*>(resourceView); // Hold a reference to the resource to prevent its destruction. m_boundResources[bindingRange.baseIndex + offset.bindingArrayIndex] = asImpl->m_buffer; internalResourceView = asImpl; @@ -8571,7 +4604,31 @@ Result D3D12Device::ShaderObjectImpl::setResource(ShaderOffset const& offset, IR return SLANG_OK; } -Result D3D12Device::PipelineStateImpl::ensureAPIPipelineStateCreated() +void PipelineStateImpl::init(const GraphicsPipelineStateDesc& inDesc) +{ + PipelineStateDesc pipelineDesc; + pipelineDesc.type = PipelineType::Graphics; + pipelineDesc.graphics = inDesc; + initializeBase(pipelineDesc); +} + +void PipelineStateImpl::init(const ComputePipelineStateDesc& inDesc) +{ + PipelineStateDesc pipelineDesc; + pipelineDesc.type = PipelineType::Compute; + pipelineDesc.compute = inDesc; + initializeBase(pipelineDesc); +} + +Result PipelineStateImpl::getNativeHandle(InteropHandle* outHandle) +{ + SLANG_RETURN_ON_FAIL(ensureAPIPipelineStateCreated()); + outHandle->api = InteropHandleAPI::D3D12; + outHandle->handleValue = reinterpret_cast<uint64_t>(m_pipelineState.get()); + return SLANG_OK; +} + +Result PipelineStateImpl::ensureAPIPipelineStateCreated() { if (m_pipelineState) return SLANG_OK; @@ -8629,7 +4686,8 @@ Result D3D12Device::PipelineStateImpl::ensureAPIPipelineStateCreated() psoDesc.PrimitiveTopologyType = D3DUtil::getPrimitiveType(desc.graphics.primitiveType); { - auto framebufferLayout = static_cast<FramebufferLayoutImpl*>(desc.graphics.framebufferLayout); + auto framebufferLayout = + static_cast<FramebufferLayoutImpl*>(desc.graphics.framebufferLayout); const int numRenderTargets = int(framebufferLayout->m_renderTargets.getCount()); if (framebufferLayout->m_hasDepthStencil) @@ -8724,7 +4782,7 @@ Result D3D12Device::PipelineStateImpl::ensureAPIPipelineStateCreated() ds.DepthEnable = desc.graphics.depthStencil.depthTestEnable; ds.DepthWriteMask = desc.graphics.depthStencil.depthWriteEnable ? D3D12_DEPTH_WRITE_MASK_ALL - : D3D12_DEPTH_WRITE_MASK_ZERO; + : D3D12_DEPTH_WRITE_MASK_ZERO; ds.DepthFunc = D3DUtil::getComparisonFunc(desc.graphics.depthStencil.depthFunc); ds.StencilEnable = desc.graphics.depthStencil.stencilEnable; ds.StencilReadMask = (UINT8)desc.graphics.depthStencil.stencilReadMask; @@ -8740,9 +4798,9 @@ Result D3D12Device::PipelineStateImpl::ensureAPIPipelineStateCreated() SLANG_RETURN_ON_FAIL( m_device->m_pipelineCreationAPIDispatcher->createGraphicsPipelineState( m_device, - programImpl->linkedProgram.get(), - &psoDesc, - (void**)m_pipelineState.writeRef())); + programImpl->linkedProgram.get(), + &psoDesc, + (void**)m_pipelineState.writeRef())); } else { @@ -8821,4 +4879,2769 @@ Result D3D12Device::PipelineStateImpl::ensureAPIPipelineStateCreated() return SLANG_OK; } -} // renderer_test +// Swapchain Implementation + +Result SwapchainImpl::init( + DeviceImpl* renderer, const ISwapchain::Desc& swapchainDesc, WindowHandle window) +{ + m_queue = static_cast<CommandQueueImpl*>(swapchainDesc.queue)->m_d3dQueue; + m_dxgiFactory = renderer->m_deviceInfo.m_dxgiFactory; + SLANG_RETURN_ON_FAIL( + D3DSwapchainBase::init(swapchainDesc, window, DXGI_SWAP_EFFECT_FLIP_DISCARD)); + renderer->m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef())); + + SLANG_RETURN_ON_FAIL(m_swapChain->QueryInterface(m_swapChain3.writeRef())); + for (uint32_t i = 0; i < swapchainDesc.imageCount; i++) + { + m_frameEvents.add(CreateEventEx( + nullptr, + false, + CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, + EVENT_ALL_ACCESS)); + } + return SLANG_OK; +} + +Result SwapchainImpl::resize(uint32_t width, uint32_t height) +{ + for (auto evt : m_frameEvents) + SetEvent(evt); + SLANG_RETURN_ON_FAIL(D3DSwapchainBase::resize(width, height)); + return SLANG_OK; +} + +void SwapchainImpl::createSwapchainBufferImages() +{ + m_images.clear(); + + for (uint32_t i = 0; i < m_desc.imageCount; i++) + { + ComPtr<ID3D12Resource> d3dResource; + m_swapChain->GetBuffer(i, IID_PPV_ARGS(d3dResource.writeRef())); + ITextureResource::Desc imageDesc = {}; + imageDesc.allowedStates = ResourceStateSet( + ResourceState::Present, ResourceState::RenderTarget, ResourceState::CopyDestination); + imageDesc.type = IResource::Type::Texture2D; + imageDesc.arraySize = 0; + imageDesc.format = m_desc.format; + imageDesc.size.width = m_desc.width; + imageDesc.size.height = m_desc.height; + imageDesc.size.depth = 1; + imageDesc.numMipLevels = 1; + imageDesc.defaultState = ResourceState::Present; + RefPtr<TextureResourceImpl> image = new TextureResourceImpl(imageDesc); + image->m_resource.setResource(d3dResource.get()); + image->m_defaultState = D3D12_RESOURCE_STATE_PRESENT; + m_images.add(image); + } + for (auto evt : m_frameEvents) + SetEvent(evt); +} + +int SwapchainImpl::acquireNextImage() +{ + auto result = (int)m_swapChain3->GetCurrentBackBufferIndex(); + WaitForSingleObject(m_frameEvents[result], INFINITE); + ResetEvent(m_frameEvents[result]); + return result; +} + +Result SwapchainImpl::present() +{ + m_fence->SetEventOnCompletion( + fenceValue, m_frameEvents[m_swapChain3->GetCurrentBackBufferIndex()]); + SLANG_RETURN_ON_FAIL(D3DSwapchainBase::present()); + fenceValue++; + m_queue->Signal(m_fence, fenceValue); + return SLANG_OK; +} + +bool SwapchainImpl::isOccluded() +{ + return (m_swapChain3->Present(0, DXGI_PRESENT_TEST) == DXGI_STATUS_OCCLUDED); +} + +Result SwapchainImpl::setFullScreenMode(bool mode) +{ + return m_swapChain3->SetFullscreenState(mode, nullptr); +} + +// CommandQueue implementation. + +Result CommandQueueImpl::init(DeviceImpl* device, uint32_t queueIndex) +{ + m_queueIndex = queueIndex; + m_renderer = device; + m_device = device->m_device; + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + SLANG_RETURN_ON_FAIL( + m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(m_d3dQueue.writeRef()))); + SLANG_RETURN_ON_FAIL( + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); + globalWaitHandle = CreateEventEx( + nullptr, nullptr, CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, EVENT_ALL_ACCESS); + return SLANG_OK; +} + +CommandQueueImpl::~CommandQueueImpl() +{ + waitOnHost(); + CloseHandle(globalWaitHandle); + m_renderer->m_queueIndexAllocator.free((int)m_queueIndex, 1); +} + +void CommandQueueImpl::executeCommandBuffers( + uint32_t count, ICommandBuffer* const* commandBuffers, IFence* fence, uint64_t valueToSignal) +{ + ShortList<ID3D12CommandList*> commandLists; + for (uint32_t i = 0; i < count; i++) + { + auto cmdImpl = static_cast<CommandBufferImpl*>(commandBuffers[i]); + commandLists.add(cmdImpl->m_cmdList); + } + if (count > 0) + { + m_d3dQueue->ExecuteCommandLists((UINT)count, commandLists.getArrayView().getBuffer()); + + m_fenceValue++; + + for (uint32_t i = 0; i < count; i++) + { + if (i > 0 && commandBuffers[i] == commandBuffers[i - 1]) + continue; + auto cmdImpl = static_cast<CommandBufferImpl*>(commandBuffers[i]); + auto transientHeap = cmdImpl->m_transientHeap; + auto& waitInfo = transientHeap->getQueueWaitInfo(m_queueIndex); + waitInfo.waitValue = m_fenceValue; + waitInfo.fence = m_fence; + } + m_d3dQueue->Signal(m_fence, m_fenceValue); + } + + if (fence) + { + auto fenceImpl = static_cast<FenceImpl*>(fence); + m_d3dQueue->Signal(fenceImpl->m_fence.get(), valueToSignal); + } +} + +void CommandQueueImpl::waitOnHost() +{ + m_fenceValue++; + m_d3dQueue->Signal(m_fence, m_fenceValue); + ResetEvent(globalWaitHandle); + m_fence->SetEventOnCompletion(m_fenceValue, globalWaitHandle); + WaitForSingleObject(globalWaitHandle, INFINITE); +} + +Result CommandQueueImpl::waitForFenceValuesOnDevice( + uint32_t fenceCount, IFence** fences, uint64_t* waitValues) +{ + for (uint32_t i = 0; i < fenceCount; ++i) + { + auto fenceImpl = static_cast<FenceImpl*>(fences[i]); + m_d3dQueue->Wait(fenceImpl->m_fence.get(), waitValues[i]); + } + return SLANG_OK; +} + +const CommandQueueImpl::Desc& CommandQueueImpl::getDesc() { return m_desc; } + +ICommandQueue* CommandQueueImpl::getInterface(const Guid& guid) +{ + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue) + return static_cast<ICommandQueue*>(this); + return nullptr; +} + +Result CommandQueueImpl::getNativeHandle(InteropHandle* handle) +{ + handle->api = InteropHandleAPI::D3D12; + handle->handleValue = (uint64_t)m_d3dQueue.get(); + return SLANG_OK; +} + +ResourceViewInternalImpl::~ResourceViewInternalImpl() +{ + if (m_descriptor.cpuHandle.ptr) + m_allocator->free(m_descriptor); +} + +Result ResourceViewImpl::getNativeHandle(InteropHandle* outHandle) +{ + outHandle->api = InteropHandleAPI::D3D12CpuDescriptorHandle; + outHandle->handleValue = m_descriptor.cpuHandle.ptr; + return SLANG_OK; +} + +void RenderPassLayoutImpl::init(const IRenderPassLayout::Desc& desc) +{ + SimpleRenderPassLayout::init(desc); + m_framebufferLayout = static_cast<FramebufferLayoutImpl*>(desc.framebufferLayout); + m_hasDepthStencil = m_framebufferLayout->m_hasDepthStencil; +} + +ShaderObjectLayoutImpl::SubObjectRangeOffset::SubObjectRangeOffset( + slang::VariableLayoutReflection* varLayout) +{ + if (auto pendingLayout = varLayout->getPendingDataLayout()) + { + pendingOrdinaryData = (uint32_t)pendingLayout->getOffset(SLANG_PARAMETER_CATEGORY_UNIFORM); + } +} + +ShaderObjectLayoutImpl::SubObjectRangeStride::SubObjectRangeStride( + slang::TypeLayoutReflection* typeLayout) +{ + if (auto pendingLayout = typeLayout->getPendingDataTypeLayout()) + { + pendingOrdinaryData = (uint32_t)pendingLayout->getSize(SLANG_PARAMETER_CATEGORY_UNIFORM); + } +} + +bool ShaderObjectLayoutImpl::isBindingRangeRootParameter( + SlangSession* globalSession, + const char* rootParameterAttributeName, + slang::TypeLayoutReflection* typeLayout, + Index bindingRangeIndex) +{ + bool isRootParameter = false; + if (rootParameterAttributeName) + { + if (auto leafVariable = typeLayout->getBindingRangeLeafVariable(bindingRangeIndex)) + { + if (leafVariable->findUserAttributeByName(globalSession, rootParameterAttributeName)) + { + isRootParameter = true; + } + } + } + return isRootParameter; +} + +Result ShaderObjectLayoutImpl::createForElementType( + RendererBase* renderer, + slang::TypeLayoutReflection* elementType, + ShaderObjectLayoutImpl** outLayout) +{ + Builder builder(renderer); + builder.setElementTypeLayout(elementType); + return builder.build(outLayout); +} + +Result ShaderObjectLayoutImpl::init(Builder* builder) +{ + auto renderer = builder->m_renderer; + + initBase(renderer, builder->m_elementTypeLayout); + + m_containerType = builder->m_containerType; + + m_bindingRanges = _Move(builder->m_bindingRanges); + m_subObjectRanges = _Move(builder->m_subObjectRanges); + m_rootParamsInfo = _Move(builder->m_rootParamsInfo); + + m_ownCounts = builder->m_ownCounts; + m_totalCounts = builder->m_totalCounts; + m_subObjectCount = builder->m_subObjectCount; + m_childRootParameterCount = builder->m_childRootParameterCount; + m_totalOrdinaryDataSize = builder->m_totalOrdinaryDataSize; + + return SLANG_OK; +} + +Result ShaderObjectLayoutImpl::Builder::setElementTypeLayout( + slang::TypeLayoutReflection* typeLayout) +{ + typeLayout = _unwrapParameterGroups(typeLayout, m_containerType); + m_elementTypeLayout = typeLayout; + + // If the type contains any ordinary data, then we must reserve a buffer + // descriptor to hold it when binding as a parameter block. + // + m_totalOrdinaryDataSize = (uint32_t)typeLayout->getSize(); + if (m_totalOrdinaryDataSize != 0) + { + m_ownCounts.resource++; + } + + // We will scan over the reflected Slang binding ranges and add them + // to our array. There are two main things we compute along the way: + // + // * For each binding range we compute a `flatIndex` that can be + // used to identify where the values for the given range begin + // in the flattened arrays (e.g., `m_objects`) and descriptor + // tables that hold the state of a shader object. + // + // * We also update the various counters taht keep track of the number + // of sub-objects, resources, samplers, etc. that are being + // consumed. These counters will contribute to figuring out + // the descriptor table(s) that might be needed to represent + // the object. + // + SlangInt bindingRangeCount = typeLayout->getBindingRangeCount(); + for (SlangInt r = 0; r < bindingRangeCount; ++r) + { + slang::BindingType slangBindingType = typeLayout->getBindingRangeType(r); + uint32_t count = (uint32_t)typeLayout->getBindingRangeBindingCount(r); + slang::TypeLayoutReflection* slangLeafTypeLayout = + typeLayout->getBindingRangeLeafTypeLayout(r); + BindingRangeInfo bindingRangeInfo = {}; + bindingRangeInfo.bindingType = slangBindingType; + bindingRangeInfo.resourceShape = slangLeafTypeLayout->getResourceShape(); + bindingRangeInfo.count = count; + bindingRangeInfo.isRootParameter = isBindingRangeRootParameter( + m_renderer->slangContext.globalSession, + static_cast<DeviceImpl*>(m_renderer)->m_extendedDesc.rootParameterShaderAttributeName, + typeLayout, + r); + if (bindingRangeInfo.isRootParameter) + { + RootParameterInfo rootInfo = {}; + switch (slangBindingType) + { + case slang::BindingType::RayTracingAccelerationStructure: + rootInfo.type = IResourceView::Type::AccelerationStructure; + break; + case slang::BindingType::RawBuffer: + case slang::BindingType::TypedBuffer: + rootInfo.type = IResourceView::Type::ShaderResource; + break; + case slang::BindingType::MutableRawBuffer: + case slang::BindingType::MutableTypedBuffer: + rootInfo.type = IResourceView::Type::UnorderedAccess; + break; + } + bindingRangeInfo.baseIndex = (uint32_t)m_rootParamsInfo.getCount(); + for (uint32_t i = 0; i < count; i++) + { + m_rootParamsInfo.add(rootInfo); + } + } + else + { + switch (slangBindingType) + { + case slang::BindingType::ConstantBuffer: + case slang::BindingType::ParameterBlock: + case slang::BindingType::ExistentialValue: + bindingRangeInfo.baseIndex = m_subObjectCount; + bindingRangeInfo.subObjectIndex = m_subObjectCount; + m_subObjectCount += count; + break; + case slang::BindingType::RawBuffer: + case slang::BindingType::MutableRawBuffer: + if (slangLeafTypeLayout->getType()->getElementType() != nullptr) + { + // A structured buffer occupies both a resource slot and + // a sub-object slot. + bindingRangeInfo.subObjectIndex = m_subObjectCount; + m_subObjectCount += count; + } + bindingRangeInfo.baseIndex = m_ownCounts.resource; + m_ownCounts.resource += count; + break; + case slang::BindingType::Sampler: + bindingRangeInfo.baseIndex = m_ownCounts.sampler; + m_ownCounts.sampler += count; + break; + + case slang::BindingType::CombinedTextureSampler: + // TODO: support this case... + break; + + case slang::BindingType::VaryingInput: + case slang::BindingType::VaryingOutput: + break; + + default: + bindingRangeInfo.baseIndex = m_ownCounts.resource; + m_ownCounts.resource += count; + break; + } + } + m_bindingRanges.add(bindingRangeInfo); + } + + // At this point we've computed the number of resources/samplers that + // the type needs to represent its *own* state, and stored those counts + // in `m_ownCounts`. Next we need to consider any resources/samplers + // and root parameters needed to represent the state of the transitive + // sub-objects of this objet, so that we can compute the total size + // of the object when bound to the pipeline. + + m_totalCounts = m_ownCounts; + + SlangInt subObjectRangeCount = typeLayout->getSubObjectRangeCount(); + for (SlangInt r = 0; r < subObjectRangeCount; ++r) + { + SlangInt bindingRangeIndex = typeLayout->getSubObjectRangeBindingRangeIndex(r); + auto slangBindingType = typeLayout->getBindingRangeType(bindingRangeIndex); + auto count = (uint32_t)typeLayout->getBindingRangeBindingCount(bindingRangeIndex); + slang::TypeLayoutReflection* slangLeafTypeLayout = + typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); + + // A sub-object range can either represent a sub-object of a known + // type, like a `ConstantBuffer<Foo>` or `ParameterBlock<Foo>` + // (in which case we can pre-compute a layout to use, based on + // the type `Foo`) *or* it can represent a sub-object of some + // existential type (e.g., `IBar`) in which case we cannot + // know the appropraite type/layout of sub-object to allocate. + // + RefPtr<ShaderObjectLayoutImpl> subObjectLayout; + if (slangBindingType == slang::BindingType::ExistentialValue) + { + if (auto pendingTypeLayout = slangLeafTypeLayout->getPendingDataTypeLayout()) + { + createForElementType(m_renderer, pendingTypeLayout, subObjectLayout.writeRef()); + } + } + else + { + createForElementType( + m_renderer, + slangLeafTypeLayout->getElementTypeLayout(), + subObjectLayout.writeRef()); + } + + SubObjectRangeInfo subObjectRange; + subObjectRange.bindingRangeIndex = bindingRangeIndex; + subObjectRange.layout = subObjectLayout; + + // The Slang reflection API stors offset information for sub-object ranges, + // and we care about *some* of that information: in particular, we need + // the offset of sub-objects in terms of uniform/ordinary data for the + // cases where we need to fill in "pending" data in our ordinary buffer. + // + subObjectRange.offset = SubObjectRangeOffset(typeLayout->getSubObjectRangeOffset(r)); + subObjectRange.stride = SubObjectRangeStride(slangLeafTypeLayout); + + // The remaining offset information is computed based on the counters + // we are generating here, which depend only on the in-memory layout + // decisions being made in our implementation. Remember that the + // `register` and `space` values coming from DXBC/DXIL do *not* + // dictate the in-memory layout we use. + // + // Note: One subtle point here is that the `.rootParam` offset we are computing + // here does *not* include any root parameters that would be allocated + // for the parent object type itself (e.g., for descriptor tables + // used if it were bound as a parameter block). The later logic when + // we actually go to bind things will need to apply those offsets. + // + // Note: An even *more* subtle point is that the `.resource` offset + // being computed here *does* include the resource descriptor allocated + // for holding the ordinary data buffer, if any. The implications of + // this for later offset math is subtle. + // + subObjectRange.offset.rootParam = m_childRootParameterCount; + subObjectRange.offset.resource = m_totalCounts.resource; + subObjectRange.offset.sampler = m_totalCounts.sampler; + + // Along with the offset information, we also need to compute the + // "stride" between consecutive sub-objects in the range. The actual + // size/stride of a single object depends on the type of range we + // are dealing with. + // + BindingOffset objectCounts; + switch (slangBindingType) + { + default: + { + // We only treat buffers of interface types as actual sub-object binding + // range. + auto bindingRangeTypeLayout = + typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); + if (!bindingRangeTypeLayout) + continue; + auto elementType = typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex) + ->getElementTypeLayout(); + if (!elementType) + continue; + if (elementType->getKind() != slang::TypeReflection::Kind::Interface) + { + continue; + } + } + break; + + case slang::BindingType::ConstantBuffer: + { + SLANG_ASSERT(subObjectLayout); + + // The resource and sampler descriptors of a nested + // constant buffer will "leak" into those of the + // parent type, and we need to account for them + // whenever we allocate storage. + // + objectCounts.resource = subObjectLayout->getTotalResourceDescriptorCount(); + objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); + objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); + } + break; + + case slang::BindingType::ParameterBlock: + { + SLANG_ASSERT(subObjectLayout); + + // In contrast to a constant buffer, a parameter block can hide + // the resource and sampler descriptor allocation it uses (since they + // are allocated into the tables that make up the parameter block. + // + // The only resource usage that leaks into the surrounding context + // is the number of root parameters consumed. + // + objectCounts.rootParam = subObjectRange.layout->getTotalRootTableParameterCount(); + } + break; + + case slang::BindingType::ExistentialValue: + // An unspecialized existential/interface value cannot consume any resources + // as part of the parent object (it needs to fit inside the fixed-size + // represnetation of existential types). + // + // However, if we are statically specializing to a type that doesn't "fit" + // we may need to account for additional information that needs to be + // allocaated. + // + if (subObjectLayout) + { + // The ordinary data for an existential-type value is allocated into + // the same buffer as the parent object, so we only want to consider + // the resource descriptors *other than* the ordinary data buffer. + // + // Otherwise the logic here is identical to the constant buffer case. + // + objectCounts.resource = + subObjectLayout->getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer(); + objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); + objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); + + // Note: In the implementation for some other graphics API (e.g., + // Vulkan) there needs to be more work done to handle the fact that + // "pending" data from interface-type sub-objects get allocated to a + // distinct offset after all the "primary" data. We are consciously + // ignoring that issue here, and the physical layout of a shader object + // into the D3D12 binding state may end up interleaving + // resources/samplers for "primary" and "pending" data. + // + // If this choice ever causes issues, we can revisit the approach here. + + // An interface-type range that includes ordinary data can + // increase the size of the ordinary data buffer we need to + // allocate for the parent object. + // + uint32_t ordinaryDataEnd = + subObjectRange.offset.pendingOrdinaryData + + (uint32_t)count * subObjectRange.stride.pendingOrdinaryData; + + if (ordinaryDataEnd > m_totalOrdinaryDataSize) + { + m_totalOrdinaryDataSize = ordinaryDataEnd; + } + } + break; + } + + // Once we've computed the usage for each object in the range, we can + // easily compute the usage for the entire range. + // + auto rangeResourceCount = count * objectCounts.resource; + auto rangeSamplerCount = count * objectCounts.sampler; + auto rangeRootParamCount = count * objectCounts.rootParam; + + m_totalCounts.resource += rangeResourceCount; + m_totalCounts.sampler += rangeSamplerCount; + m_childRootParameterCount += rangeRootParamCount; + + m_subObjectRanges.add(subObjectRange); + } + + // Once we have added up the resource usage from all the sub-objects + // we can look at the total number of resources and samplers that + // need to be bound as part of this objects descriptor tables and + // that will allow us to decide whether we need to allocate a root + // parameter for a resource table or not, ans similarly for a + // sampler table. + // + if (m_totalCounts.resource) + m_ownCounts.rootParam++; + if (m_totalCounts.sampler) + m_ownCounts.rootParam++; + + m_totalCounts.rootParam = m_ownCounts.rootParam + m_childRootParameterCount; + + return SLANG_OK; +} + +Result ShaderObjectLayoutImpl::Builder::build(ShaderObjectLayoutImpl** outLayout) +{ + auto layout = RefPtr<ShaderObjectLayoutImpl>(new ShaderObjectLayoutImpl()); + SLANG_RETURN_ON_FAIL(layout->init(this)); + + returnRefPtrMove(outLayout, layout); + return SLANG_OK; +} + +Result RootShaderObjectLayoutImpl::Builder::build(RootShaderObjectLayoutImpl** outLayout) +{ + RefPtr<RootShaderObjectLayoutImpl> layout = new RootShaderObjectLayoutImpl(); + SLANG_RETURN_ON_FAIL(layout->init(this)); + + returnRefPtrMove(outLayout, layout); + return SLANG_OK; +} + +void RootShaderObjectLayoutImpl::Builder::addGlobalParams( + slang::VariableLayoutReflection* globalsLayout) +{ + setElementTypeLayout(globalsLayout->getTypeLayout()); +} + +void RootShaderObjectLayoutImpl::Builder::addEntryPoint( + SlangStage stage, ShaderObjectLayoutImpl* entryPointLayout) +{ + EntryPointInfo info; + info.layout = entryPointLayout; + + info.offset.resource = m_totalCounts.resource; + info.offset.sampler = m_totalCounts.sampler; + info.offset.rootParam = m_childRootParameterCount; + + m_totalCounts.resource += entryPointLayout->getTotalResourceDescriptorCount(); + m_totalCounts.sampler += entryPointLayout->getTotalSamplerDescriptorCount(); + + // TODO(tfoley): Check this to make sure it is reasonable... + m_childRootParameterCount += entryPointLayout->getChildRootParameterCount(); + + m_entryPoints.add(info); +} + +Result RootShaderObjectLayoutImpl::RootSignatureDescBuilder::translateDescriptorRangeType( + slang::BindingType c, D3D12_DESCRIPTOR_RANGE_TYPE* outType) +{ + switch (c) + { + case slang::BindingType::ConstantBuffer: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + return SLANG_OK; + case slang::BindingType::RawBuffer: + case slang::BindingType::Texture: + case slang::BindingType::TypedBuffer: + case slang::BindingType::RayTracingAccelerationStructure: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + return SLANG_OK; + case slang::BindingType::MutableRawBuffer: + case slang::BindingType::MutableTexture: + case slang::BindingType::MutableTypedBuffer: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + return SLANG_OK; + case slang::BindingType::Sampler: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + return SLANG_OK; + default: + return SLANG_FAIL; + } +} + +/// Add a new descriptor set to the layout being computed. +/// +/// Note that a "descriptor set" in the layout may amount to +/// zero, one, or two different descriptor *tables* in the +/// final D3D12 root signature. Each descriptor set may +/// contain zero or more view ranges (CBV/SRV/UAV) and zero +/// or more sampler ranges. It maps to a view descriptor table +/// if the number of view ranges is non-zero and to a sampler +/// descriptor table if the number of sampler ranges is non-zero. +/// + +uint32_t RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addDescriptorSet() +{ + auto result = (uint32_t)m_descriptorSets.getCount(); + m_descriptorSets.add(DescriptorSetLayout{}); + return result; +} + +Result RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addDescriptorRange( + Index physicalDescriptorSetIndex, + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT registerIndex, + UINT spaceIndex, + UINT count, + bool isRootParameter) +{ + if (isRootParameter) + { + D3D12_ROOT_PARAMETER rootParam = {}; + switch (rangeType) + { + case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + break; + case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + break; + default: + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "A shader parameter marked as root parameter is neither SRV nor UAV."); + return SLANG_FAIL; + } + rootParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParam.Descriptor.RegisterSpace = spaceIndex; + rootParam.Descriptor.ShaderRegister = registerIndex; + m_rootParameters.add(rootParam); + return SLANG_OK; + } + + auto& descriptorSet = m_descriptorSets[physicalDescriptorSetIndex]; + + D3D12_DESCRIPTOR_RANGE range = {}; + range.RangeType = rangeType; + range.NumDescriptors = count; + range.BaseShaderRegister = registerIndex; + range.RegisterSpace = spaceIndex; + range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER) + { + descriptorSet.m_samplerRanges.add(range); + descriptorSet.m_samplerCount += range.NumDescriptors; + } + else + { + descriptorSet.m_resourceRanges.add(range); + descriptorSet.m_resourceCount += range.NumDescriptors; + } + + return SLANG_OK; +} + +/// Add one descriptor range as specified in Slang reflection information to the layout. +/// +/// The layout information is taken from `typeLayout` for the descriptor +/// range with the given `descriptorRangeIndex` within the logical +/// descriptor set (reflected by Slang) with the given `logicalDescriptorSetIndex`. +/// +/// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of +/// the descriptor set that the range should be added to. +/// +/// The `offset` encodes information about space and/or register offsets that +/// should be applied to descrptor ranges. +/// +/// This operation can fail if the given descriptor range encodes a range that +/// doesn't map to anything directly supported by D3D12. Higher-level routines +/// will often want to ignore such failures. +/// + +Result RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addDescriptorRange( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffset const& containerOffset, + BindingRegisterOffset const& elementOffset, + Index logicalDescriptorSetIndex, + Index descriptorRangeIndex, + bool isRootParameter) +{ + auto bindingType = typeLayout->getDescriptorSetDescriptorRangeType( + logicalDescriptorSetIndex, descriptorRangeIndex); + auto count = typeLayout->getDescriptorSetDescriptorRangeDescriptorCount( + logicalDescriptorSetIndex, descriptorRangeIndex); + auto index = typeLayout->getDescriptorSetDescriptorRangeIndexOffset( + logicalDescriptorSetIndex, descriptorRangeIndex); + auto space = typeLayout->getDescriptorSetSpaceOffset(logicalDescriptorSetIndex); + + D3D12_DESCRIPTOR_RANGE_TYPE rangeType; + SLANG_RETURN_ON_FAIL(translateDescriptorRangeType(bindingType, &rangeType)); + + return addDescriptorRange( + physicalDescriptorSetIndex, + rangeType, + (UINT)index + elementOffset[rangeType], + (UINT)space + containerOffset.spaceOffset, + (UINT)count, + isRootParameter); +} + +/// Add one binding range to the computed layout. +/// +/// The layout information is taken from `typeLayout` for the binding +/// range with the given `bindingRangeIndex`. +/// +/// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of +/// the descriptor set that the range should be added to. +/// +/// The `offset` encodes information about space and/or register offsets that +/// should be applied to descrptor ranges. +/// +/// Note that a single binding range may encompass zero or more descriptor ranges. +/// + +void RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addBindingRange( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffset const& containerOffset, + BindingRegisterOffset const& elementOffset, + Index bindingRangeIndex) +{ + auto logicalDescriptorSetIndex = + typeLayout->getBindingRangeDescriptorSetIndex(bindingRangeIndex); + auto firstDescriptorRangeIndex = + typeLayout->getBindingRangeFirstDescriptorRangeIndex(bindingRangeIndex); + Index descriptorRangeCount = typeLayout->getBindingRangeDescriptorRangeCount(bindingRangeIndex); + bool isRootParameter = isBindingRangeRootParameter( + m_device->slangContext.globalSession, + m_device->m_extendedDesc.rootParameterShaderAttributeName, + typeLayout, + bindingRangeIndex); + for (Index i = 0; i < descriptorRangeCount; ++i) + { + auto descriptorRangeIndex = firstDescriptorRangeIndex + i; + + // Note: we ignore the `Result` returned by `addDescriptorRange()` because we + // want to silently skip any ranges that represent kinds of bindings that + // don't actually exist in D3D12. + // + addDescriptorRange( + typeLayout, + physicalDescriptorSetIndex, + containerOffset, + elementOffset, + logicalDescriptorSetIndex, + descriptorRangeIndex, + isRootParameter); + } +} + +void RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addAsValue( + slang::VariableLayoutReflection* varLayout, Index physicalDescriptorSetIndex) +{ + BindingRegisterOffsetPair offset(varLayout); + addAsValue(varLayout->getTypeLayout(), physicalDescriptorSetIndex, offset, offset); +} + +/// Add binding ranges and parameter blocks to the root signature. +/// +/// The layout information is taken from `typeLayout` which should +/// be a layout for either a program or an entry point. +/// +/// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of +/// the descriptor set that binding ranges not belonging to nested +/// parameter blocks should be added to. +/// +/// The `offset` encodes information about space and/or register offsets that +/// should be applied to descrptor ranges. +/// + +void RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addAsConstantBuffer( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffsetPair const& containerOffset, + BindingRegisterOffsetPair const& elementOffset) +{ + if (typeLayout->getSize(SLANG_PARAMETER_CATEGORY_UNIFORM) != 0) + { + auto descriptorRangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + auto& offsetForRangeType = containerOffset.primary.offsetForRangeType[descriptorRangeType]; + addDescriptorRange( + physicalDescriptorSetIndex, + descriptorRangeType, + offsetForRangeType, + containerOffset.primary.spaceOffset, + 1, + false); + } + + addAsValue(typeLayout, physicalDescriptorSetIndex, containerOffset, elementOffset); +} + +void RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addAsValue( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffsetPair const& containerOffset, + BindingRegisterOffsetPair const& elementOffset) +{ + // Our first task is to add the binding ranges for stuff that is + // directly contained in `typeLayout` rather than via sub-objects. + // + // Our goal is to have the descriptors for directly-contained views/samplers + // always be contiguous in CPU and GPU memory, so that we can write + // to them easily with a single operaiton. + // + Index bindingRangeCount = typeLayout->getBindingRangeCount(); + for (Index bindingRangeIndex = 0; bindingRangeIndex < bindingRangeCount; bindingRangeIndex++) + { + // We will look at the type of each binding range and intentionally + // skip those that represent sub-objects. + // + auto bindingType = typeLayout->getBindingRangeType(bindingRangeIndex); + switch (bindingType) + { + case slang::BindingType::ConstantBuffer: + case slang::BindingType::ParameterBlock: + case slang::BindingType::ExistentialValue: + continue; + + default: + break; + } + + // For binding ranges that don't represent sub-objects, we will add + // all of the descriptor ranges they encompass to the root signature. + // + addBindingRange( + typeLayout, + physicalDescriptorSetIndex, + containerOffset.primary, + elementOffset.primary, + bindingRangeIndex); + } + + // Next we need to recursively include everything bound via sub-objects + Index subObjectRangeCount = typeLayout->getSubObjectRangeCount(); + for (Index subObjectRangeIndex = 0; subObjectRangeIndex < subObjectRangeCount; + subObjectRangeIndex++) + { + auto bindingRangeIndex = + typeLayout->getSubObjectRangeBindingRangeIndex(subObjectRangeIndex); + auto bindingType = typeLayout->getBindingRangeType(bindingRangeIndex); + + auto subObjectTypeLayout = typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); + + BindingRegisterOffsetPair subObjectRangeContainerOffset = containerOffset; + subObjectRangeContainerOffset += + BindingRegisterOffsetPair(typeLayout->getSubObjectRangeOffset(subObjectRangeIndex)); + BindingRegisterOffsetPair subObjectRangeElementOffset = elementOffset; + subObjectRangeElementOffset += + BindingRegisterOffsetPair(typeLayout->getSubObjectRangeOffset(subObjectRangeIndex)); + + switch (bindingType) + { + case slang::BindingType::ConstantBuffer: + { + auto containerVarLayout = subObjectTypeLayout->getContainerVarLayout(); + SLANG_ASSERT(containerVarLayout); + + auto elementVarLayout = subObjectTypeLayout->getElementVarLayout(); + SLANG_ASSERT(elementVarLayout); + + auto elementTypeLayout = elementVarLayout->getTypeLayout(); + SLANG_ASSERT(elementTypeLayout); + + BindingRegisterOffsetPair containerOffset = subObjectRangeContainerOffset; + containerOffset += BindingRegisterOffsetPair(containerVarLayout); + + BindingRegisterOffsetPair elementOffset = subObjectRangeElementOffset; + elementOffset += BindingRegisterOffsetPair(elementVarLayout); + + addAsConstantBuffer( + elementTypeLayout, physicalDescriptorSetIndex, containerOffset, elementOffset); + } + break; + + case slang::BindingType::ParameterBlock: + { + auto containerVarLayout = subObjectTypeLayout->getContainerVarLayout(); + SLANG_ASSERT(containerVarLayout); + + auto elementVarLayout = subObjectTypeLayout->getElementVarLayout(); + SLANG_ASSERT(elementVarLayout); + + auto elementTypeLayout = elementVarLayout->getTypeLayout(); + SLANG_ASSERT(elementTypeLayout); + + BindingRegisterOffsetPair subDescriptorSetOffset; + subDescriptorSetOffset.primary.spaceOffset = + subObjectRangeElementOffset.primary.spaceOffset; + subDescriptorSetOffset.pending.spaceOffset = + subObjectRangeElementOffset.pending.spaceOffset; + + auto subPhysicalDescriptorSetIndex = addDescriptorSet(); + + BindingRegisterOffsetPair containerOffset = subDescriptorSetOffset; + containerOffset += BindingRegisterOffsetPair(containerVarLayout); + + BindingRegisterOffsetPair elementOffset = subDescriptorSetOffset; + elementOffset += BindingRegisterOffsetPair(elementVarLayout); + + addAsConstantBuffer( + elementTypeLayout, + subPhysicalDescriptorSetIndex, + containerOffset, + elementOffset); + } + break; + + case slang::BindingType::ExistentialValue: + { + // Any nested binding ranges in the sub-object will "leak" into the + // binding ranges for the surrounding context. + // + auto specializedTypeLayout = subObjectTypeLayout->getPendingDataTypeLayout(); + if (specializedTypeLayout) + { + BindingRegisterOffsetPair pendingOffset; + pendingOffset.primary = subObjectRangeElementOffset.pending; + + addAsValue( + specializedTypeLayout, + physicalDescriptorSetIndex, + pendingOffset, + pendingOffset); + } + } + break; + } + } +} + +D3D12_ROOT_SIGNATURE_DESC& RootShaderObjectLayoutImpl::RootSignatureDescBuilder::build() +{ + for (Index i = 0; i < m_descriptorSets.getCount(); i++) + { + auto& descriptorSet = m_descriptorSets[i]; + if (descriptorSet.m_resourceRanges.getCount()) + { + D3D12_ROOT_PARAMETER rootParam = {}; + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParam.DescriptorTable.NumDescriptorRanges = + (UINT)descriptorSet.m_resourceRanges.getCount(); + rootParam.DescriptorTable.pDescriptorRanges = + descriptorSet.m_resourceRanges.getBuffer(); + m_rootParameters.add(rootParam); + } + if (descriptorSet.m_samplerRanges.getCount()) + { + D3D12_ROOT_PARAMETER rootParam = {}; + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParam.DescriptorTable.NumDescriptorRanges = + (UINT)descriptorSet.m_samplerRanges.getCount(); + rootParam.DescriptorTable.pDescriptorRanges = descriptorSet.m_samplerRanges.getBuffer(); + m_rootParameters.add(rootParam); + } + } + + m_rootSignatureDesc.NumParameters = UINT(m_rootParameters.getCount()); + m_rootSignatureDesc.pParameters = m_rootParameters.getBuffer(); + + // TODO: static samplers should be reasonably easy to support... + m_rootSignatureDesc.NumStaticSamplers = 0; + m_rootSignatureDesc.pStaticSamplers = nullptr; + + // TODO: only set this flag if needed (requires creating root + // signature at same time as pipeline state...). + // + m_rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + return m_rootSignatureDesc; +} + +Result RootShaderObjectLayoutImpl::createRootSignatureFromSlang( + DeviceImpl* device, + RootShaderObjectLayoutImpl* rootLayout, + slang::IComponentType* program, + ID3D12RootSignature** outRootSignature, + ID3DBlob** outError) +{ + // We are going to build up the root signature by adding + // binding/descritpor ranges and nested parameter blocks + // based on the computed layout information for `program`. + // + RootSignatureDescBuilder builder(device); + auto layout = program->getLayout(); + + // The layout information computed by Slang breaks up shader + // parameters into what we can think of as "logical" descriptor + // sets based on whether or not parameters have the same `space`. + // + // We want to basically ignore that decomposition and generate a + // single descriptor set to hold all top-level parameters, and only + // generate distinct descriptor sets when the shader has opted in + // via explicit parameter blocks. + // + // To achieve this goal, we will manually allocate a default descriptor + // set for root parameters in our signature, and then recursively + // add all the binding/descriptor ranges implied by the global-scope + // parameters. + // + auto rootDescriptorSetIndex = builder.addDescriptorSet(); + builder.addAsValue(layout->getGlobalParamsVarLayout(), rootDescriptorSetIndex); + + for (SlangUInt i = 0; i < layout->getEntryPointCount(); i++) + { + // Entry-point parameters should also be added to the default root + // descriptor set. + // + // We add the parameters using the "variable layout" for the entry point + // and not just its type layout, to ensure that any offset information is + // applied correctly to the `register` and `space` information for entry-point + // parameters. + // + // Note: When we start to support DXR we will need to handle entry-point parameters + // differently because they will need to map to local root signatures rather than + // being included in the global root signature as is being done here. + // + auto entryPoint = layout->getEntryPointByIndex(i); + builder.addAsValue(entryPoint->getVarLayout(), rootDescriptorSetIndex); + } + + auto& rootSignatureDesc = builder.build(); + + ComPtr<ID3DBlob> signature; + ComPtr<ID3DBlob> error; + if (SLANG_FAILED(device->m_D3D12SerializeRootSignature( + &rootSignatureDesc, + D3D_ROOT_SIGNATURE_VERSION_1, + signature.writeRef(), + error.writeRef()))) + { + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "error: D3D12SerializeRootSignature failed"); + if (error) + { + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Driver, + (const char*)error->GetBufferPointer()); + if (outError) + returnComPtr(outError, error); + } + return SLANG_FAIL; + } + + SLANG_RETURN_ON_FAIL(device->m_device->CreateRootSignature( + 0, + signature->GetBufferPointer(), + signature->GetBufferSize(), + IID_PPV_ARGS(outRootSignature))); + return SLANG_OK; +} + +Result RootShaderObjectLayoutImpl::create( + DeviceImpl* device, + slang::IComponentType* program, + slang::ProgramLayout* programLayout, + RootShaderObjectLayoutImpl** outLayout, + ID3DBlob** outError) +{ + RootShaderObjectLayoutImpl::Builder builder(device, program, programLayout); + builder.addGlobalParams(programLayout->getGlobalParamsVarLayout()); + + SlangInt entryPointCount = programLayout->getEntryPointCount(); + for (SlangInt e = 0; e < entryPointCount; ++e) + { + auto slangEntryPoint = programLayout->getEntryPointByIndex(e); + RefPtr<ShaderObjectLayoutImpl> entryPointLayout; + SLANG_RETURN_ON_FAIL(ShaderObjectLayoutImpl::createForElementType( + device, slangEntryPoint->getTypeLayout(), entryPointLayout.writeRef())); + builder.addEntryPoint(slangEntryPoint->getStage(), entryPointLayout); + } + + RefPtr<RootShaderObjectLayoutImpl> layout; + SLANG_RETURN_ON_FAIL(builder.build(layout.writeRef())); + + if (program->getSpecializationParamCount() == 0) + { + // For root object, we would like know the union of all binding slots + // including all sub-objects in the shader-object hierarchy, so at + // parameter binding time we can easily know how many GPU descriptor tables + // to create without walking through the shader-object hierarchy again. + // We build out this array along with root signature construction and store + // it in `m_gpuDescriptorSetInfos`. + SLANG_RETURN_ON_FAIL(createRootSignatureFromSlang( + device, layout, program, layout->m_rootSignature.writeRef(), outError)); + } + + *outLayout = layout.detach(); + + return SLANG_OK; +} + +Result RootShaderObjectLayoutImpl::init(Builder* builder) +{ + auto renderer = builder->m_renderer; + + SLANG_RETURN_ON_FAIL(Super::init(builder)); + + m_program = builder->m_program; + m_programLayout = builder->m_programLayout; + m_entryPoints = builder->m_entryPoints; + return SLANG_OK; +} + +Result ShaderProgramImpl::createShaderModule( + slang::EntryPointReflection* entryPointInfo, ComPtr<ISlangBlob> kernelCode) +{ + ShaderBinary shaderBin; + shaderBin.stage = entryPointInfo->getStage(); + shaderBin.entryPointInfo = entryPointInfo; + shaderBin.code.addRange( + reinterpret_cast<const uint8_t*>(kernelCode->getBufferPointer()), + (Index)kernelCode->getBufferSize()); + m_shaders.add(_Move(shaderBin)); + return SLANG_OK; +} + +Result ShaderObjectImpl::create( + DeviceImpl* device, ShaderObjectLayoutImpl* layout, ShaderObjectImpl** outShaderObject) +{ + auto object = RefPtr<ShaderObjectImpl>(new ShaderObjectImpl()); + SLANG_RETURN_ON_FAIL( + object->init(device, layout, device->m_cpuViewHeap.Ptr(), device->m_cpuSamplerHeap.Ptr())); + returnRefPtrMove(outShaderObject, object); + return SLANG_OK; +} + +ShaderObjectImpl::~ShaderObjectImpl() { m_descriptorSet.freeIfSupported(); } + +RootShaderObjectLayoutImpl* RootShaderObjectImpl::getLayout() +{ + return static_cast<RootShaderObjectLayoutImpl*>(m_layout.Ptr()); +} + +UInt RootShaderObjectImpl::getEntryPointCount() { return (UInt)m_entryPoints.getCount(); } + +SlangResult RootShaderObjectImpl::getEntryPoint(UInt index, IShaderObject** outEntryPoint) +{ + returnComPtr(outEntryPoint, m_entryPoints[index]); + return SLANG_OK; +} + +Result RootShaderObjectImpl::collectSpecializationArgs(ExtendedShaderObjectTypeList& args) +{ + SLANG_RETURN_ON_FAIL(ShaderObjectImpl::collectSpecializationArgs(args)); + for (auto& entryPoint : m_entryPoints) + { + SLANG_RETURN_ON_FAIL(entryPoint->collectSpecializationArgs(args)); + } + return SLANG_OK; +} + +Result RootShaderObjectImpl::_createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) +{ + ExtendedShaderObjectTypeList specializationArgs; + SLANG_RETURN_ON_FAIL(collectSpecializationArgs(specializationArgs)); + + // Note: There is an important policy decision being made here that we need + // to approach carefully. + // + // We are doing two different things that affect the layout of a program: + // + // 1. We are *composing* one or more pieces of code (notably the shared global/module + // stuff and the per-entry-point stuff). + // + // 2. We are *specializing* code that includes generic/existential parameters + // to concrete types/values. + // + // We need to decide the relative *order* of these two steps, because of how it impacts + // layout. The layout for `specialize(compose(A,B), X, Y)` is potentially different + // form that of `compose(specialize(A,X), speciealize(B,Y))`, even when both are + // semantically equivalent programs. + // + // Right now we are using the first option: we are first generating a full composition + // of all the code we plan to use (global scope plus all entry points), and then + // specializing it to the concatenated specialization argumenst for all of that. + // + // In some cases, though, this model isn't appropriate. For example, when dealing with + // ray-tracing shaders and local root signatures, we really want the parameters of each + // entry point (actually, each entry-point *group*) to be allocated distinct storage, + // which really means we want to compute something like: + // + // SpecializedGlobals = specialize(compose(ModuleA, ModuleB, ...), X, Y, ...) + // + // SpecializedEP1 = compose(SpecializedGlobals, specialize(EntryPoint1, T, U, ...)) + // SpecializedEP2 = compose(SpecializedGlobals, specialize(EntryPoint2, A, B, ...)) + // + // Note how in this case all entry points agree on the layout for the shared/common + // parmaeters, but their layouts are also independent of one another. + // + // Furthermore, in this example, loading another entry point into the system would not + // rquire re-computing the layouts (or generated kernel code) for any of the entry + // points that had already been loaded (in contrast to a compose-then-specialize + // approach). + // + ComPtr<slang::IComponentType> specializedComponentType; + ComPtr<slang::IBlob> diagnosticBlob; + auto result = getLayout()->getSlangProgram()->specialize( + specializationArgs.components.getArrayView().getBuffer(), + specializationArgs.getCount(), + specializedComponentType.writeRef(), + diagnosticBlob.writeRef()); + + if (diagnosticBlob && diagnosticBlob->getBufferSize()) + { + getDebugCallback()->handleMessage( + SLANG_FAILED(result) ? DebugMessageType::Error : DebugMessageType::Info, + DebugMessageSource::Layer, + (const char*)diagnosticBlob->getBufferPointer()); + } + + if (SLANG_FAILED(result)) + return result; + + ComPtr<ID3DBlob> d3dDiagnosticBlob; + auto slangSpecializedLayout = specializedComponentType->getLayout(); + RefPtr<RootShaderObjectLayoutImpl> specializedLayout; + auto rootLayoutResult = RootShaderObjectLayoutImpl::create( + static_cast<DeviceImpl*>(getRenderer()), + specializedComponentType, + slangSpecializedLayout, + specializedLayout.writeRef(), + d3dDiagnosticBlob.writeRef()); + + if (SLANG_FAILED(rootLayoutResult)) + { + return rootLayoutResult; + } + + // Note: Computing the layout for the specialized program will have also computed + // the layouts for the entry points, and we really need to attach that information + // to them so that they don't go and try to compute their own specializations. + // + // TODO: Well, if we move to the specialization model described above then maybe + // we *will* want entry points to do their own specialization work... + // + auto entryPointCount = m_entryPoints.getCount(); + for (Index i = 0; i < entryPointCount; ++i) + { + auto entryPointInfo = specializedLayout->getEntryPoint(i); + auto entryPointVars = m_entryPoints[i]; + + entryPointVars->m_specializedLayout = entryPointInfo.layout; + } + + returnRefPtrMove(outLayout, specializedLayout); + return SLANG_OK; +} + +Result RootShaderObjectImpl::copyFrom(IShaderObject* object, ITransientResourceHeap* transientHeap) +{ + if (auto srcObj = dynamic_cast<MutableRootShaderObjectImpl*>(object)) + { + *this = *srcObj; + return SLANG_OK; + } + return SLANG_FAIL; +} + +Result RootShaderObjectImpl::bindAsRoot( + BindingContext* context, RootShaderObjectLayoutImpl* specializedLayout) +{ + // Pull updates from sub-objects when this is a mutable root shader object. + updateSubObjectsRecursive(); + + // A root shader object always binds as if it were a parameter block, + // insofar as it needs to allocate a descriptor set to hold the bindings + // for its own state and any sub-objects. + // + // Note: We do not direclty use `bindAsParameterBlock` here because we also + // need to bind the entry points into the same descriptor set that is + // being used for the root object. + + BindingOffset rootOffset; + + // Bind all root parameters first. + Super::bindRootArguments(context, rootOffset.rootParam); + + DescriptorSet descriptorSet; + SLANG_RETURN_ON_FAIL(prepareToBindAsParameterBlock( + context, /* inout */ rootOffset, specializedLayout, descriptorSet)); + + SLANG_RETURN_ON_FAIL( + Super::bindAsConstantBuffer(context, descriptorSet, rootOffset, specializedLayout)); + + auto entryPointCount = m_entryPoints.getCount(); + for (Index i = 0; i < entryPointCount; ++i) + { + auto entryPoint = m_entryPoints[i]; + auto& entryPointInfo = specializedLayout->getEntryPoint(i); + + auto entryPointOffset = rootOffset; + entryPointOffset += entryPointInfo.offset; + + entryPoint->updateSubObjectsRecursive(); + + SLANG_RETURN_ON_FAIL(entryPoint->bindAsConstantBuffer( + context, descriptorSet, entryPointOffset, entryPointInfo.layout)); + } + + return SLANG_OK; +} + +Result RootShaderObjectImpl::resetImpl( + DeviceImpl* device, + RootShaderObjectLayoutImpl* layout, + DescriptorHeapReference viewHeap, + DescriptorHeapReference samplerHeap, + bool isMutable) +{ + SLANG_RETURN_ON_FAIL(Super::init(device, layout, viewHeap, samplerHeap)); + m_isMutable = isMutable; + m_specializedLayout = nullptr; + m_entryPoints.clear(); + for (auto entryPointInfo : layout->getEntryPoints()) + { + RefPtr<ShaderObjectImpl> entryPoint; + SLANG_RETURN_ON_FAIL( + ShaderObjectImpl::create(device, entryPointInfo.layout, entryPoint.writeRef())); + entryPoint->m_isMutable = isMutable; + m_entryPoints.add(entryPoint); + } + return SLANG_OK; +} + +Result RootShaderObjectImpl::reset( + DeviceImpl* device, RootShaderObjectLayoutImpl* layout, TransientResourceHeapImpl* heap) +{ + return resetImpl( + device, layout, &heap->m_stagingCpuViewHeap, &heap->m_stagingCpuSamplerHeap, false); +} + +RefPtr<BufferResource> ShaderTableImpl::createDeviceBuffer( + PipelineStateBase* pipeline, + TransientResourceHeapBase* transientHeap, + IResourceCommandEncoder* encoder) +{ + uint32_t raygenTableSize = m_rayGenShaderCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + uint32_t missTableSize = m_missShaderCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + uint32_t hitgroupTableSize = m_hitGroupCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + m_rayGenTableOffset = 0; + m_missTableOffset = (uint32_t)D3DUtil::calcAligned( + raygenTableSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); + m_hitGroupTableOffset = (uint32_t)D3DUtil::calcAligned( + m_missTableOffset + missTableSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); + uint32_t tableSize = m_hitGroupTableOffset + hitgroupTableSize; + + auto pipelineImpl = static_cast<RayTracingPipelineStateImpl*>(pipeline); + ComPtr<IBufferResource> bufferResource; + IBufferResource::Desc bufferDesc = {}; + bufferDesc.memoryType = gfx::MemoryType::DeviceLocal; + bufferDesc.defaultState = ResourceState::General; + bufferDesc.type = IResource::Type::Buffer; + bufferDesc.sizeInBytes = tableSize; + m_device->createBufferResource(bufferDesc, nullptr, bufferResource.writeRef()); + + ComPtr<ID3D12StateObjectProperties> stateObjectProperties; + pipelineImpl->m_stateObject->QueryInterface(stateObjectProperties.writeRef()); + + TransientResourceHeapImpl* transientHeapImpl = + static_cast<TransientResourceHeapImpl*>(transientHeap); + + IBufferResource* stagingBuffer = nullptr; + size_t stagingBufferOffset = 0; + transientHeapImpl->allocateStagingBuffer( + tableSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload); + + assert(stagingBuffer); + void* stagingPtr = nullptr; + stagingBuffer->map(nullptr, &stagingPtr); + + auto copyShaderIdInto = [&](void* dest, String& name, const ShaderRecordOverwrite& overwrite) + { + if (name.getLength()) + { + void* shaderId = stateObjectProperties->GetShaderIdentifier(name.toWString().begin()); + memcpy(dest, shaderId, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + } + else + { + memset(dest, 0, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + } + if (overwrite.size) + { + memcpy((uint8_t*)dest + overwrite.offset, overwrite.data, overwrite.size); + } + }; + + uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr + stagingBufferOffset; + for (uint32_t i = 0; i < m_rayGenShaderCount; i++) + { + copyShaderIdInto( + stagingBufferPtr + m_rayGenTableOffset + D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, + m_shaderGroupNames[i], + m_recordOverwrites[i]); + } + for (uint32_t i = 0; i < m_missShaderCount; i++) + { + copyShaderIdInto( + stagingBufferPtr + m_missTableOffset + D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, + m_shaderGroupNames[m_rayGenShaderCount + i], + m_recordOverwrites[m_rayGenShaderCount + i]); + } + for (uint32_t i = 0; i < m_hitGroupCount; i++) + { + copyShaderIdInto( + stagingBufferPtr + m_hitGroupTableOffset + D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, + m_shaderGroupNames[m_rayGenShaderCount + m_missShaderCount + i], + m_recordOverwrites[m_rayGenShaderCount + m_missShaderCount + i]); + } + + stagingBuffer->unmap(nullptr); + encoder->copyBuffer(bufferResource, 0, stagingBuffer, stagingBufferOffset, tableSize); + encoder->bufferBarrier( + 1, + bufferResource.readRef(), + gfx::ResourceState::CopyDestination, + gfx::ResourceState::ShaderResource); + RefPtr<BufferResource> resultPtr = static_cast<BufferResource*>(bufferResource.get()); + return _Move(resultPtr); +} + +// There are a pair of cyclic references between a `TransientResourceHeap` and +// a `CommandBuffer` created from the heap. We need to break the cycle upon +// the public reference count of a command buffer dropping to 0. + +ICommandBuffer* CommandBufferImpl::getInterface(const Guid& guid) +{ + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer) + return static_cast<ICommandBuffer*>(this); + return nullptr; +} + +Result CommandBufferImpl::getNativeHandle(InteropHandle* handle) +{ + handle->api = InteropHandleAPI::D3D12; + handle->handleValue = (uint64_t)m_cmdList.get(); + return SLANG_OK; +} + +void CommandBufferImpl::bindDescriptorHeaps() +{ + if (!m_descriptorHeapsBound) + { + ID3D12DescriptorHeap* heaps[] = { + m_transientHeap->getCurrentViewHeap().getHeap(), + m_transientHeap->getCurrentSamplerHeap().getHeap(), + }; + m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); + m_descriptorHeapsBound = true; + } +} + +void CommandBufferImpl::reinit() +{ + invalidateDescriptorHeapBinding(); + m_rootShaderObject.init(m_renderer); +} + +void CommandBufferImpl::init( + DeviceImpl* renderer, + ID3D12GraphicsCommandList* d3dCommandList, + TransientResourceHeapImpl* transientHeap) +{ + m_transientHeap = transientHeap; + m_renderer = renderer; + m_cmdList = d3dCommandList; + + reinit(); + +#if SLANG_GFX_HAS_DXR_SUPPORT + m_cmdList->QueryInterface<ID3D12GraphicsCommandList4>(m_cmdList4.writeRef()); + if (m_cmdList4) + { + m_cmdList1 = m_cmdList4; + return; + } +#endif + m_cmdList->QueryInterface<ID3D12GraphicsCommandList1>(m_cmdList1.writeRef()); +} + +void CommandBufferImpl::encodeResourceCommands(IResourceCommandEncoder** outEncoder) +{ + m_resourceCommandEncoder.init(this); + *outEncoder = &m_resourceCommandEncoder; +} + +void CommandBufferImpl::encodeRenderCommands( + IRenderPassLayout* renderPass, IFramebuffer* framebuffer, IRenderCommandEncoder** outEncoder) +{ + m_renderCommandEncoder.init( + m_renderer, + m_transientHeap, + this, + static_cast<RenderPassLayoutImpl*>(renderPass), + static_cast<FramebufferImpl*>(framebuffer)); + *outEncoder = &m_renderCommandEncoder; +} + +void CommandBufferImpl::encodeComputeCommands(IComputeCommandEncoder** outEncoder) +{ + m_computeCommandEncoder.init(m_renderer, m_transientHeap, this); + *outEncoder = &m_computeCommandEncoder; +} + +void CommandBufferImpl::encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) +{ +#if SLANG_GFX_HAS_DXR_SUPPORT + m_rayTracingCommandEncoder.init(this); + *outEncoder = &m_rayTracingCommandEncoder; +#else + *outEncoder = nullptr; +#endif +} + +void CommandBufferImpl::close() { m_cmdList->Close(); } + +void ResourceCommandEncoderImpl::copyBuffer( + IBufferResource* dst, size_t dstOffset, IBufferResource* src, size_t srcOffset, size_t size) +{ + auto dstBuffer = static_cast<BufferResourceImpl*>(dst); + auto srcBuffer = static_cast<BufferResourceImpl*>(src); + + m_commandBuffer->m_cmdList->CopyBufferRegion( + dstBuffer->m_resource.getResource(), + dstOffset, + srcBuffer->m_resource.getResource(), + srcOffset, + size); +} + +void ResourceCommandEncoderImpl::uploadBufferData( + IBufferResource* dst, size_t offset, size_t size, void* data) +{ + uploadBufferDataImpl( + m_commandBuffer->m_renderer->m_device, + m_commandBuffer->m_cmdList, + m_commandBuffer->m_transientHeap, + static_cast<BufferResourceImpl*>(dst), + offset, + size, + data); +} + +void ResourceCommandEncoderImpl::textureBarrier( + size_t count, ITextureResource* const* textures, ResourceState src, ResourceState dst) +{ + ShortList<D3D12_RESOURCE_BARRIER> barriers; + + for (size_t i = 0; i < count; i++) + { + auto textureImpl = static_cast<TextureResourceImpl*>(textures[i]); + auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format); + auto textureDesc = textureImpl->getDesc(); + D3D12_RESOURCE_BARRIER barrier; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + if (src == dst && src == ResourceState::UnorderedAccess) + { + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.UAV.pResource = textureImpl->m_resource.getResource(); + } + else + { + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3DUtil::getResourceState(src); + barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); + if (barrier.Transition.StateBefore == barrier.Transition.StateAfter) + continue; + barrier.Transition.pResource = textureImpl->m_resource.getResource(); + auto planeCount = + D3DUtil::getPlaneSliceCount(D3DUtil::getMapFormat(textureImpl->getDesc()->format)); + auto arraySize = textureDesc->arraySize; + if (arraySize == 0) + arraySize = 1; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + } + barriers.add(barrier); + } + if (barriers.getCount()) + { + m_commandBuffer->m_cmdList->ResourceBarrier( + (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); + } +} + +void ResourceCommandEncoderImpl::bufferBarrier( + size_t count, IBufferResource* const* buffers, ResourceState src, ResourceState dst) +{ + + List<D3D12_RESOURCE_BARRIER> barriers; + barriers.reserve(count); + + for (size_t i = 0; i < count; i++) + { + auto bufferImpl = static_cast<BufferResourceImpl*>(buffers[i]); + + D3D12_RESOURCE_BARRIER barrier = {}; + // If the src == dst, it must be a UAV barrier. + barrier.Type = (src == dst && dst == ResourceState::UnorderedAccess) + ? D3D12_RESOURCE_BARRIER_TYPE_UAV + : D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + + if (barrier.Type == D3D12_RESOURCE_BARRIER_TYPE_UAV) + { + barrier.UAV.pResource = bufferImpl->m_resource; + } + else + { + barrier.Transition.pResource = bufferImpl->m_resource; + barrier.Transition.StateBefore = D3DUtil::getResourceState(src); + barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); + barrier.Transition.Subresource = 0; + if (barrier.Transition.StateAfter == barrier.Transition.StateBefore) + continue; + } + barriers.add(barrier); + } + if (barriers.getCount()) + { + m_commandBuffer->m_cmdList4->ResourceBarrier( + (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); + } +} + +void ResourceCommandEncoderImpl::writeTimestamp(IQueryPool* pool, SlangInt index) +{ + static_cast<QueryPoolImpl*>(pool)->writeTimestamp(m_commandBuffer->m_cmdList, index); +} + +void ResourceCommandEncoderImpl::copyTexture( + ITextureResource* dst, + ResourceState dstState, + SubresourceRange dstSubresource, + ITextureResource::Offset3D dstOffset, + ITextureResource* src, + ResourceState srcState, + SubresourceRange srcSubresource, + ITextureResource::Offset3D srcOffset, + ITextureResource::Size extent) +{ + auto dstTexture = static_cast<TextureResourceImpl*>(dst); + auto srcTexture = static_cast<TextureResourceImpl*>(src); + + if (dstSubresource.layerCount == 0 && dstSubresource.mipLevelCount == 0 && + srcSubresource.layerCount == 0 && srcSubresource.mipLevelCount == 0) + { + m_commandBuffer->m_cmdList->CopyResource( + dstTexture->m_resource.getResource(), srcTexture->m_resource.getResource()); + return; + } + + auto d3dFormat = D3DUtil::getMapFormat(dstTexture->getDesc()->format); + auto aspectMask = (int32_t)dstSubresource.aspectMask; + if (dstSubresource.aspectMask == TextureAspect::Default) + aspectMask = (int32_t)TextureAspect::Color; + while (aspectMask) + { + auto aspect = Math::getLowestBit((int32_t)aspectMask); + aspectMask &= ~aspect; + auto planeIndex = D3DUtil::getPlaneSlice(d3dFormat, (TextureAspect)aspect); + for (uint32_t layer = 0; layer < dstSubresource.layerCount; layer++) + { + for (uint32_t mipLevel = 0; mipLevel < dstSubresource.mipLevelCount; mipLevel++) + { + D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; + + dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstRegion.pResource = dstTexture->m_resource.getResource(); + dstRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( + dstSubresource.mipLevel + mipLevel, + dstSubresource.baseArrayLayer + layer, + planeIndex, + dstTexture->getDesc()->numMipLevels, + dstTexture->getDesc()->arraySize); + + D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; + srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcRegion.pResource = srcTexture->m_resource.getResource(); + srcRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( + srcSubresource.mipLevel + mipLevel, + srcSubresource.baseArrayLayer + layer, + planeIndex, + srcTexture->getDesc()->numMipLevels, + srcTexture->getDesc()->arraySize); + + D3D12_BOX srcBox = {}; + srcBox.left = srcOffset.x; + srcBox.top = srcOffset.y; + srcBox.front = srcOffset.z; + srcBox.right = srcBox.left + extent.width; + srcBox.bottom = srcBox.top + extent.height; + srcBox.back = srcBox.front + extent.depth; + + m_commandBuffer->m_cmdList->CopyTextureRegion( + &dstRegion, dstOffset.x, dstOffset.y, dstOffset.z, &srcRegion, &srcBox); + } + } + } +} + +void ResourceCommandEncoderImpl::uploadTextureData( + ITextureResource* dst, + SubresourceRange subResourceRange, + ITextureResource::Offset3D offset, + ITextureResource::Size extent, + ITextureResource::SubresourceData* subResourceData, + size_t subResourceDataCount) +{ + auto dstTexture = static_cast<TextureResourceImpl*>(dst); + auto baseSubresourceIndex = D3DUtil::getSubresourceIndex( + subResourceRange.mipLevel, + subResourceRange.baseArrayLayer, + 0, + dstTexture->getDesc()->numMipLevels, + dstTexture->getDesc()->arraySize); + auto textureSize = dstTexture->getDesc()->size; + FormatInfo formatInfo = {}; + gfxGetFormatInfo(dstTexture->getDesc()->format, &formatInfo); + for (uint32_t i = 0; i < (uint32_t)subResourceDataCount; i++) + { + auto subresourceIndex = baseSubresourceIndex + i; + // Get the footprint + D3D12_RESOURCE_DESC texDesc = dstTexture->m_resource.getResource()->GetDesc(); + + D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; + + dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstRegion.SubresourceIndex = subresourceIndex; + dstRegion.pResource = dstTexture->m_resource.getResource(); + + D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; + srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint = srcRegion.PlacedFootprint; + footprint.Offset = 0; + footprint.Footprint.Format = texDesc.Format; + uint32_t mipLevel = + D3DUtil::getSubresourceMipLevel(subresourceIndex, dstTexture->getDesc()->numMipLevels); + if (extent.width != ITextureResource::kRemainingTextureSize) + { + footprint.Footprint.Width = extent.width; + } + else + { + footprint.Footprint.Width = Math::Max(1, (textureSize.width >> mipLevel)) - offset.x; + } + if (extent.height != ITextureResource::kRemainingTextureSize) + { + footprint.Footprint.Height = extent.height; + } + else + { + footprint.Footprint.Height = Math::Max(1, (textureSize.height >> mipLevel)) - offset.y; + } + if (extent.depth != ITextureResource::kRemainingTextureSize) + { + footprint.Footprint.Depth = extent.depth; + } + else + { + footprint.Footprint.Depth = Math::Max(1, (textureSize.depth >> mipLevel)) - offset.z; + } + auto rowSize = (footprint.Footprint.Width + formatInfo.blockWidth - 1) / + formatInfo.blockWidth * formatInfo.blockSizeInBytes; + auto rowCount = + (footprint.Footprint.Height + formatInfo.blockHeight - 1) / formatInfo.blockHeight; + footprint.Footprint.RowPitch = + (UINT)D3DUtil::calcAligned(rowSize, (uint32_t)D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + auto bufferSize = footprint.Footprint.RowPitch * rowCount * footprint.Footprint.Depth; + + IBufferResource* stagingBuffer; + size_t stagingBufferOffset = 0; + m_commandBuffer->m_transientHeap->allocateStagingBuffer( + bufferSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload, true); + assert(stagingBufferOffset == 0); + BufferResourceImpl* bufferImpl = static_cast<BufferResourceImpl*>(stagingBuffer); + uint8_t* bufferData = nullptr; + D3D12_RANGE mapRange = {0, 0}; + bufferImpl->m_resource.getResource()->Map(0, &mapRange, (void**)&bufferData); + for (uint32_t z = 0; z < footprint.Footprint.Depth; z++) + { + auto imageStart = bufferData + footprint.Footprint.RowPitch * rowCount * (size_t)z; + auto srcData = (uint8_t*)subResourceData->data + subResourceData->strideZ * z; + for (uint32_t row = 0; row < rowCount; row++) + { + memcpy( + imageStart + row * (size_t)footprint.Footprint.RowPitch, + srcData + subResourceData->strideY * row, + rowSize); + } + } + bufferImpl->m_resource.getResource()->Unmap(0, nullptr); + srcRegion.pResource = bufferImpl->m_resource.getResource(); + m_commandBuffer->m_cmdList->CopyTextureRegion( + &dstRegion, offset.x, offset.y, offset.z, &srcRegion, nullptr); + } +} + +void ResourceCommandEncoderImpl::clearResourceView( + IResourceView* view, ClearValue* clearValue, ClearResourceViewFlags::Enum flags) +{ + auto viewImpl = static_cast<ResourceViewImpl*>(view); + switch (view->getViewDesc()->type) + { + case IResourceView::Type::RenderTarget: + m_commandBuffer->m_cmdList->ClearRenderTargetView( + viewImpl->m_descriptor.cpuHandle, clearValue->color.floatValues, 0, nullptr); + break; + case IResourceView::Type::DepthStencil: + { + D3D12_CLEAR_FLAGS clearFlags = (D3D12_CLEAR_FLAGS)0; + if (flags & ClearResourceViewFlags::ClearDepth) + { + clearFlags |= D3D12_CLEAR_FLAG_DEPTH; + } + if (flags & ClearResourceViewFlags::ClearStencil) + { + clearFlags |= D3D12_CLEAR_FLAG_STENCIL; + } + m_commandBuffer->m_cmdList->ClearDepthStencilView( + viewImpl->m_descriptor.cpuHandle, + clearFlags, + clearValue->depthStencil.depth, + (UINT8)clearValue->depthStencil.stencil, + 0, + nullptr); + break; + } + case IResourceView::Type::UnorderedAccess: + { + ID3D12Resource* d3dResource = nullptr; + switch (viewImpl->m_resource->getType()) + { + case IResource::Type::Buffer: + d3dResource = static_cast<BufferResourceImpl*>(viewImpl->m_resource.Ptr()) + ->m_resource.getResource(); + break; + default: + d3dResource = static_cast<TextureResourceImpl*>(viewImpl->m_resource.Ptr()) + ->m_resource.getResource(); + break; + } + auto gpuHandleIndex = + m_commandBuffer->m_transientHeap->getCurrentViewHeap().allocate(1); + if (gpuHandleIndex == -1) + { + m_commandBuffer->m_transientHeap->allocateNewViewDescriptorHeap( + m_commandBuffer->m_renderer); + gpuHandleIndex = m_commandBuffer->m_transientHeap->getCurrentViewHeap().allocate(1); + m_commandBuffer->bindDescriptorHeaps(); + } + this->m_commandBuffer->m_renderer->m_device->CopyDescriptorsSimple( + 1, + m_commandBuffer->m_transientHeap->getCurrentViewHeap().getCpuHandle(gpuHandleIndex), + viewImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + if (flags & ClearResourceViewFlags::FloatClearValues) + { + m_commandBuffer->m_cmdList->ClearUnorderedAccessViewFloat( + m_commandBuffer->m_transientHeap->getCurrentViewHeap().getGpuHandle( + gpuHandleIndex), + viewImpl->m_descriptor.cpuHandle, + d3dResource, + clearValue->color.floatValues, + 0, + nullptr); + } + else + { + m_commandBuffer->m_cmdList->ClearUnorderedAccessViewUint( + m_commandBuffer->m_transientHeap->getCurrentViewHeap().getGpuHandle( + gpuHandleIndex), + viewImpl->m_descriptor.cpuHandle, + d3dResource, + clearValue->color.uintValues, + 0, + nullptr); + } + break; + } + default: + break; + } +} + +void ResourceCommandEncoderImpl::resolveResource( + ITextureResource* source, + ResourceState sourceState, + SubresourceRange sourceRange, + ITextureResource* dest, + ResourceState destState, + SubresourceRange destRange) +{ + auto srcTexture = static_cast<TextureResourceImpl*>(source); + auto srcDesc = srcTexture->getDesc(); + auto dstTexture = static_cast<TextureResourceImpl*>(dest); + auto dstDesc = dstTexture->getDesc(); + + for (uint32_t layer = 0; layer < sourceRange.layerCount; ++layer) + { + for (uint32_t mip = 0; mip < sourceRange.mipLevelCount; ++mip) + { + auto srcSubresourceIndex = D3DUtil::getSubresourceIndex( + mip + sourceRange.mipLevel, + layer + sourceRange.baseArrayLayer, + 0, + srcDesc->numMipLevels, + srcDesc->arraySize); + auto dstSubresourceIndex = D3DUtil::getSubresourceIndex( + mip + destRange.mipLevel, + layer + destRange.baseArrayLayer, + 0, + dstDesc->numMipLevels, + dstDesc->arraySize); + + DXGI_FORMAT format = D3DUtil::getMapFormat(srcDesc->format); + + m_commandBuffer->m_cmdList->ResolveSubresource( + dstTexture->m_resource.getResource(), + dstSubresourceIndex, + srcTexture->m_resource.getResource(), + srcSubresourceIndex, + format); + } + } +} + +void ResourceCommandEncoderImpl::resolveQuery( + IQueryPool* queryPool, uint32_t index, uint32_t count, IBufferResource* buffer, uint64_t offset) +{ + auto queryBase = static_cast<QueryPoolBase*>(queryPool); + switch (queryBase->m_desc.type) + { + case QueryType::AccelerationStructureCompactedSize: + case QueryType::AccelerationStructureCurrentSize: + case QueryType::AccelerationStructureSerializedSize: + { + auto queryPoolImpl = static_cast<PlainBufferProxyQueryPoolImpl*>(queryPool); + auto bufferImpl = static_cast<BufferResourceImpl*>(buffer); + auto srcQueryBuffer = queryPoolImpl->m_bufferResource->m_resource.getResource(); + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.pResource = srcQueryBuffer; + m_commandBuffer->m_cmdList->ResourceBarrier(1, &barrier); + + m_commandBuffer->m_cmdList->CopyBufferRegion( + bufferImpl->m_resource.getResource(), + offset, + srcQueryBuffer, + index * sizeof(uint64_t), + count * sizeof(uint64_t)); + + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barrier.Transition.pResource = srcQueryBuffer; + m_commandBuffer->m_cmdList->ResourceBarrier(1, &barrier); + } + break; + default: + { + auto queryPoolImpl = static_cast<QueryPoolImpl*>(queryPool); + auto bufferImpl = static_cast<BufferResourceImpl*>(buffer); + m_commandBuffer->m_cmdList->ResolveQueryData( + queryPoolImpl->m_queryHeap.get(), + queryPoolImpl->m_queryType, + index, + count, + bufferImpl->m_resource.getResource(), + offset); + } + break; + } +} + +void ResourceCommandEncoderImpl::copyTextureToBuffer( + IBufferResource* dst, + size_t dstOffset, + size_t dstSize, + size_t dstRowStride, + ITextureResource* src, + ResourceState srcState, + SubresourceRange srcSubresource, + ITextureResource::Offset3D srcOffset, + ITextureResource::Size extent) +{ + assert(srcSubresource.mipLevelCount <= 1); + + auto srcTexture = static_cast<TextureResourceImpl*>(src); + auto dstBuffer = static_cast<BufferResourceImpl*>(dst); + auto baseSubresourceIndex = D3DUtil::getSubresourceIndex( + srcSubresource.mipLevel, + srcSubresource.baseArrayLayer, + 0, + srcTexture->getDesc()->numMipLevels, + srcTexture->getDesc()->arraySize); + auto textureSize = srcTexture->getDesc()->size; + FormatInfo formatInfo = {}; + gfxGetFormatInfo(srcTexture->getDesc()->format, &formatInfo); + if (srcSubresource.mipLevelCount == 0) + srcSubresource.mipLevelCount = srcTexture->getDesc()->numMipLevels; + if (srcSubresource.layerCount == 0) + srcSubresource.layerCount = srcTexture->getDesc()->arraySize; + + for (uint32_t layer = 0; layer < srcSubresource.layerCount; layer++) + { + // Get the footprint + D3D12_RESOURCE_DESC texDesc = srcTexture->m_resource.getResource()->GetDesc(); + + D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; + dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstRegion.pResource = dstBuffer->m_resource.getResource(); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint = dstRegion.PlacedFootprint; + + D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; + srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( + srcSubresource.mipLevel, + layer + srcSubresource.baseArrayLayer, + 0, + srcTexture->getDesc()->numMipLevels, + srcTexture->getDesc()->arraySize); + srcRegion.pResource = srcTexture->m_resource.getResource(); + + footprint.Offset = dstOffset; + footprint.Footprint.Format = texDesc.Format; + uint32_t mipLevel = srcSubresource.mipLevel; + if (extent.width != 0xFFFFFFFF) + { + footprint.Footprint.Width = extent.width; + } + else + { + footprint.Footprint.Width = Math::Max(1, (textureSize.width >> mipLevel)) - srcOffset.x; + } + if (extent.height != 0xFFFFFFFF) + { + footprint.Footprint.Height = extent.height; + } + else + { + footprint.Footprint.Height = + Math::Max(1, (textureSize.height >> mipLevel)) - srcOffset.y; + } + if (extent.depth != 0xFFFFFFFF) + { + footprint.Footprint.Depth = extent.depth; + } + else + { + footprint.Footprint.Depth = Math::Max(1, (textureSize.depth >> mipLevel)) - srcOffset.z; + } + + assert(dstRowStride % D3D12_TEXTURE_DATA_PITCH_ALIGNMENT == 0); + footprint.Footprint.RowPitch = dstRowStride; + + auto bufferSize = + footprint.Footprint.RowPitch * footprint.Footprint.Height * footprint.Footprint.Depth; + + D3D12_BOX srcBox = {}; + srcBox.left = srcOffset.x; + srcBox.top = srcOffset.y; + srcBox.front = srcOffset.z; + srcBox.right = srcOffset.x + extent.width; + srcBox.bottom = srcOffset.y + extent.height; + srcBox.back = srcOffset.z + extent.depth; + m_commandBuffer->m_cmdList->CopyTextureRegion(&dstRegion, 0, 0, 0, &srcRegion, &srcBox); + } +} + +void ResourceCommandEncoderImpl::textureSubresourceBarrier( + ITextureResource* texture, + SubresourceRange subresourceRange, + ResourceState src, + ResourceState dst) +{ + auto textureImpl = static_cast<TextureResourceImpl*>(texture); + + ShortList<D3D12_RESOURCE_BARRIER> barriers; + D3D12_RESOURCE_BARRIER barrier; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + if (src == dst && src == ResourceState::UnorderedAccess) + { + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.UAV.pResource = textureImpl->m_resource.getResource(); + barriers.add(barrier); + } + else + { + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3DUtil::getResourceState(src); + barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); + if (barrier.Transition.StateBefore == barrier.Transition.StateAfter) + return; + barrier.Transition.pResource = textureImpl->m_resource.getResource(); + auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format); + auto aspectMask = (int32_t)subresourceRange.aspectMask; + if (subresourceRange.aspectMask == TextureAspect::Default) + aspectMask = (int32_t)TextureAspect::Color; + while (aspectMask) + { + auto aspect = Math::getLowestBit((int32_t)aspectMask); + aspectMask &= ~aspect; + auto planeIndex = D3DUtil::getPlaneSlice(d3dFormat, (TextureAspect)aspect); + for (uint32_t layer = 0; layer < subresourceRange.layerCount; layer++) + { + for (uint32_t mip = 0; mip < subresourceRange.mipLevelCount; mip++) + { + barrier.Transition.Subresource = D3DUtil::getSubresourceIndex( + mip + subresourceRange.mipLevel, + layer + subresourceRange.baseArrayLayer, + planeIndex, + textureImpl->getDesc()->numMipLevels, + textureImpl->getDesc()->arraySize); + barriers.add(barrier); + } + } + } + } + m_commandBuffer->m_cmdList->ResourceBarrier( + (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); +} + +void ResourceCommandEncoderImpl::beginDebugEvent(const char* name, float rgbColor[3]) +{ + auto beginEvent = m_commandBuffer->m_renderer->m_BeginEventOnCommandList; + if (beginEvent) + { + beginEvent( + m_commandBuffer->m_cmdList, + 0xff000000 | (uint8_t(rgbColor[0] * 255.0f) << 16) | + (uint8_t(rgbColor[1] * 255.0f) << 8) | uint8_t(rgbColor[2] * 255.0f), + name); + } +} + +void ResourceCommandEncoderImpl::endDebugEvent() +{ + auto endEvent = m_commandBuffer->m_renderer->m_EndEventOnCommandList; + if (endEvent) + { + endEvent(m_commandBuffer->m_cmdList); + } +} + +void RenderCommandEncoderImpl::init( + DeviceImpl* renderer, + TransientResourceHeapImpl* transientHeap, + CommandBufferImpl* cmdBuffer, + RenderPassLayoutImpl* renderPass, + FramebufferImpl* framebuffer) +{ + PipelineCommandEncoder::init(cmdBuffer); + m_preCmdList = nullptr; + m_renderPass = renderPass; + m_framebuffer = framebuffer; + m_transientHeap = transientHeap; + m_boundVertexBuffers.clear(); + m_boundIndexBuffer = nullptr; + m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + m_primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + m_boundIndexFormat = DXGI_FORMAT_UNKNOWN; + m_boundIndexOffset = 0; + m_currentPipeline = nullptr; + + // Set render target states. + if (!framebuffer) + { + return; + } + m_d3dCmdList->OMSetRenderTargets( + (UINT)framebuffer->renderTargetViews.getCount(), + framebuffer->renderTargetDescriptors.getArrayView().getBuffer(), + FALSE, + framebuffer->depthStencilView ? &framebuffer->depthStencilDescriptor : nullptr); + + // Issue clear commands based on render pass set up. + for (Index i = 0; i < framebuffer->renderTargetViews.getCount(); i++) + { + if (i >= renderPass->m_renderTargetAccesses.getCount()) + continue; + + auto& access = renderPass->m_renderTargetAccesses[i]; + + // Transit resource states. + { + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = framebuffer->renderTargetViews[i].Ptr(); + if (resourceViewImpl) + { + auto textureResource = + static_cast<TextureResourceImpl*>(resourceViewImpl->m_resource.Ptr()); + if (textureResource) + { + D3D12_RESOURCE_STATES initialState; + if (access.initialState == ResourceState::Undefined) + { + initialState = textureResource->m_defaultState; + } + else + { + initialState = D3DUtil::getResourceState(access.initialState); + } + textureResource->m_resource.transition( + initialState, D3D12_RESOURCE_STATE_RENDER_TARGET, submitter); + } + } + } + // Clear. + if (access.loadOp == IRenderPassLayout::AttachmentLoadOp::Clear) + { + m_d3dCmdList->ClearRenderTargetView( + framebuffer->renderTargetDescriptors[i], + framebuffer->renderTargetClearValues[i].values, + 0, + nullptr); + } + } + + if (renderPass->m_hasDepthStencil) + { + // Transit resource states. + { + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = framebuffer->depthStencilView.Ptr(); + auto textureResource = + static_cast<TextureResourceImpl*>(resourceViewImpl->m_resource.Ptr()); + D3D12_RESOURCE_STATES initialState; + if (renderPass->m_depthStencilAccess.initialState == ResourceState::Undefined) + { + initialState = textureResource->m_defaultState; + } + else + { + initialState = + D3DUtil::getResourceState(renderPass->m_depthStencilAccess.initialState); + } + textureResource->m_resource.transition( + initialState, D3D12_RESOURCE_STATE_DEPTH_WRITE, submitter); + } + // Clear. + uint32_t clearFlags = 0; + if (renderPass->m_depthStencilAccess.loadOp == IRenderPassLayout::AttachmentLoadOp::Clear) + { + clearFlags |= D3D12_CLEAR_FLAG_DEPTH; + } + if (renderPass->m_depthStencilAccess.stencilLoadOp == + IRenderPassLayout::AttachmentLoadOp::Clear) + { + clearFlags |= D3D12_CLEAR_FLAG_STENCIL; + } + if (clearFlags) + { + m_d3dCmdList->ClearDepthStencilView( + framebuffer->depthStencilDescriptor, + (D3D12_CLEAR_FLAGS)clearFlags, + framebuffer->depthStencilClearValue.depth, + framebuffer->depthStencilClearValue.stencil, + 0, + nullptr); + } + } +} + +Result RenderCommandEncoderImpl::bindPipeline(IPipelineState* state, IShaderObject** outRootObject) +{ + return bindPipelineImpl(state, outRootObject); +} + +Result RenderCommandEncoderImpl::bindPipelineWithRootObject( + IPipelineState* state, IShaderObject* rootObject) +{ + return bindPipelineWithRootObjectImpl(state, rootObject); +} + +void RenderCommandEncoderImpl::setViewports(uint32_t count, const Viewport* viewports) +{ + static const int kMaxViewports = D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + assert(count <= kMaxViewports && count <= kMaxRTVCount); + for (UInt ii = 0; ii < count; ++ii) + { + auto& inViewport = viewports[ii]; + auto& dxViewport = m_viewports[ii]; + + dxViewport.TopLeftX = inViewport.originX; + dxViewport.TopLeftY = inViewport.originY; + dxViewport.Width = inViewport.extentX; + dxViewport.Height = inViewport.extentY; + dxViewport.MinDepth = inViewport.minZ; + dxViewport.MaxDepth = inViewport.maxZ; + } + m_d3dCmdList->RSSetViewports(UINT(count), m_viewports); +} + +void RenderCommandEncoderImpl::setScissorRects(uint32_t count, const ScissorRect* rects) +{ + static const int kMaxScissorRects = D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + assert(count <= kMaxScissorRects && count <= kMaxRTVCount); + + for (UInt ii = 0; ii < count; ++ii) + { + auto& inRect = rects[ii]; + auto& dxRect = m_scissorRects[ii]; + + dxRect.left = LONG(inRect.minX); + dxRect.top = LONG(inRect.minY); + dxRect.right = LONG(inRect.maxX); + dxRect.bottom = LONG(inRect.maxY); + } + + m_d3dCmdList->RSSetScissorRects(UINT(count), m_scissorRects); +} + +void RenderCommandEncoderImpl::setPrimitiveTopology(PrimitiveTopology topology) +{ + m_primitiveTopologyType = D3DUtil::getPrimitiveType(topology); + m_primitiveTopology = D3DUtil::getPrimitiveTopology(topology); +} + +void RenderCommandEncoderImpl::setVertexBuffers( + uint32_t startSlot, + uint32_t slotCount, + IBufferResource* const* buffers, + const uint32_t* offsets) +{ + { + const Index num = startSlot + slotCount; + if (num > m_boundVertexBuffers.getCount()) + { + m_boundVertexBuffers.setCount(num); + } + } + + for (UInt i = 0; i < slotCount; i++) + { + BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(buffers[i]); + + BoundVertexBuffer& boundBuffer = m_boundVertexBuffers[startSlot + i]; + boundBuffer.m_buffer = buffer; + boundBuffer.m_offset = int(offsets[i]); + } +} + +void RenderCommandEncoderImpl::setIndexBuffer( + IBufferResource* buffer, Format indexFormat, uint32_t offset) +{ + m_boundIndexBuffer = (BufferResourceImpl*)buffer; + m_boundIndexFormat = D3DUtil::getMapFormat(indexFormat); + m_boundIndexOffset = offset; +} + +void RenderCommandEncoderImpl::prepareDraw() +{ + auto pipelineState = m_currentPipeline.Ptr(); + if (!pipelineState || (pipelineState->desc.type != PipelineType::Graphics)) + { + assert(!"No graphics pipeline state set"); + return; + } + + // Submit - setting for graphics + { + GraphicsSubmitter submitter(m_d3dCmdList); + RefPtr<PipelineStateBase> newPipeline; + if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) + { + assert(!"Failed to bind render state"); + } + } + + m_d3dCmdList->IASetPrimitiveTopology(m_primitiveTopology); + + // Set up vertex buffer views + { + auto inputLayout = (InputLayoutImpl*)pipelineState->inputLayout.Ptr(); + if (inputLayout) + { + int numVertexViews = 0; + D3D12_VERTEX_BUFFER_VIEW vertexViews[16]; + for (Index i = 0; i < m_boundVertexBuffers.getCount(); i++) + { + const BoundVertexBuffer& boundVertexBuffer = m_boundVertexBuffers[i]; + BufferResourceImpl* buffer = boundVertexBuffer.m_buffer; + if (buffer) + { + D3D12_VERTEX_BUFFER_VIEW& vertexView = vertexViews[numVertexViews++]; + vertexView.BufferLocation = + buffer->m_resource.getResource()->GetGPUVirtualAddress() + + boundVertexBuffer.m_offset; + vertexView.SizeInBytes = + UINT(buffer->getDesc()->sizeInBytes - boundVertexBuffer.m_offset); + vertexView.StrideInBytes = inputLayout->m_vertexStreamStrides[i]; + } + } + m_d3dCmdList->IASetVertexBuffers(0, numVertexViews, vertexViews); + } + } + // Set up index buffer + if (m_boundIndexBuffer) + { + D3D12_INDEX_BUFFER_VIEW indexBufferView; + indexBufferView.BufferLocation = + m_boundIndexBuffer->m_resource.getResource()->GetGPUVirtualAddress() + + m_boundIndexOffset; + indexBufferView.SizeInBytes = + UINT(m_boundIndexBuffer->getDesc()->sizeInBytes - m_boundIndexOffset); + indexBufferView.Format = m_boundIndexFormat; + + m_d3dCmdList->IASetIndexBuffer(&indexBufferView); + } +} + +void RenderCommandEncoderImpl::draw(uint32_t vertexCount, uint32_t startVertex) +{ + prepareDraw(); + m_d3dCmdList->DrawInstanced(vertexCount, 1, startVertex, 0); +} + +void RenderCommandEncoderImpl::drawIndexed( + uint32_t indexCount, uint32_t startIndex, uint32_t baseVertex) +{ + prepareDraw(); + m_d3dCmdList->DrawIndexedInstanced(indexCount, 1, startIndex, baseVertex, 0); +} + +void RenderCommandEncoderImpl::endEncoding() +{ + PipelineCommandEncoder::endEncodingImpl(); + if (!m_framebuffer) + return; + // Issue clear commands based on render pass set up. + for (Index i = 0; i < m_renderPass->m_renderTargetAccesses.getCount(); i++) + { + auto& access = m_renderPass->m_renderTargetAccesses[i]; + + // Transit resource states. + { + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = m_framebuffer->renderTargetViews[i].Ptr(); + if (!resourceViewImpl) + continue; + auto textureResource = + static_cast<TextureResourceImpl*>(resourceViewImpl->m_resource.Ptr()); + if (textureResource) + { + textureResource->m_resource.transition( + D3D12_RESOURCE_STATE_RENDER_TARGET, + D3DUtil::getResourceState(access.finalState), + submitter); + } + } + } + + if (m_renderPass->m_hasDepthStencil) + { + // Transit resource states. + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = m_framebuffer->depthStencilView.Ptr(); + auto textureResource = + static_cast<TextureResourceImpl*>(resourceViewImpl->m_resource.Ptr()); + textureResource->m_resource.transition( + D3D12_RESOURCE_STATE_DEPTH_WRITE, + D3DUtil::getResourceState(m_renderPass->m_depthStencilAccess.finalState), + submitter); + } + m_framebuffer = nullptr; +} + +void RenderCommandEncoderImpl::setStencilReference(uint32_t referenceValue) +{ + m_d3dCmdList->OMSetStencilRef((UINT)referenceValue); +} + +void RenderCommandEncoderImpl::drawIndirect( + uint32_t maxDrawCount, + IBufferResource* argBuffer, + uint64_t argOffset, + IBufferResource* countBuffer, + uint64_t countOffset) +{ + prepareDraw(); + + auto argBufferImpl = static_cast<BufferResourceImpl*>(argBuffer); + auto countBufferImpl = static_cast<BufferResourceImpl*>(countBuffer); + + m_d3dCmdList->ExecuteIndirect( + m_renderer->drawIndirectCmdSignature, + maxDrawCount, + argBufferImpl->m_resource, + argOffset, + countBufferImpl ? countBufferImpl->m_resource.getResource() : nullptr, + countOffset); +} + +void RenderCommandEncoderImpl::drawIndexedIndirect( + uint32_t maxDrawCount, + IBufferResource* argBuffer, + uint64_t argOffset, + IBufferResource* countBuffer, + uint64_t countOffset) +{ + prepareDraw(); + + auto argBufferImpl = static_cast<BufferResourceImpl*>(argBuffer); + auto countBufferImpl = static_cast<BufferResourceImpl*>(countBuffer); + + m_d3dCmdList->ExecuteIndirect( + m_renderer->drawIndexedIndirectCmdSignature, + maxDrawCount, + argBufferImpl->m_resource, + argOffset, + countBufferImpl ? countBufferImpl->m_resource.getResource() : nullptr, + countOffset); +} + +Result RenderCommandEncoderImpl::setSamplePositions( + uint32_t samplesPerPixel, uint32_t pixelCount, const SamplePosition* samplePositions) +{ + if (m_commandBuffer->m_cmdList1) + { + m_commandBuffer->m_cmdList1->SetSamplePositions( + samplesPerPixel, pixelCount, (D3D12_SAMPLE_POSITION*)samplePositions); + return SLANG_OK; + } + return SLANG_E_NOT_AVAILABLE; +} + +void RenderCommandEncoderImpl::drawInstanced( + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t startVertex, + uint32_t startInstanceLocation) +{ + prepareDraw(); + m_d3dCmdList->DrawInstanced(vertexCount, instanceCount, startVertex, startInstanceLocation); +} + +void RenderCommandEncoderImpl::drawIndexedInstanced( + uint32_t indexCount, + uint32_t instanceCount, + uint32_t startIndexLocation, + int32_t baseVertexLocation, + uint32_t startInstanceLocation) +{ + prepareDraw(); + m_d3dCmdList->DrawIndexedInstanced( + indexCount, instanceCount, startIndexLocation, baseVertexLocation, startInstanceLocation); +} + +void ComputeCommandEncoderImpl::endEncoding() { PipelineCommandEncoder::endEncodingImpl(); } + +void ComputeCommandEncoderImpl::init( + DeviceImpl* renderer, TransientResourceHeapImpl* transientHeap, CommandBufferImpl* cmdBuffer) +{ + PipelineCommandEncoder::init(cmdBuffer); + m_preCmdList = nullptr; + m_transientHeap = transientHeap; + m_currentPipeline = nullptr; +} + +Result ComputeCommandEncoderImpl::bindPipeline(IPipelineState* state, IShaderObject** outRootObject) +{ + return bindPipelineImpl(state, outRootObject); +} + +Result ComputeCommandEncoderImpl::bindPipelineWithRootObject( + IPipelineState* state, IShaderObject* rootObject) +{ + return bindPipelineWithRootObjectImpl(state, rootObject); +} + +void ComputeCommandEncoderImpl::dispatchCompute(int x, int y, int z) +{ + // Submit binding for compute + { + ComputeSubmitter submitter(m_d3dCmdList); + RefPtr<PipelineStateBase> newPipeline; + if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) + { + assert(!"Failed to bind render state"); + } + } + m_d3dCmdList->Dispatch(x, y, z); +} + +void ComputeCommandEncoderImpl::dispatchComputeIndirect(IBufferResource* argBuffer, uint64_t offset) +{ + // Submit binding for compute + { + ComputeSubmitter submitter(m_d3dCmdList); + RefPtr<PipelineStateBase> newPipeline; + if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) + { + assert(!"Failed to bind render state"); + } + } + auto argBufferImpl = static_cast<BufferResourceImpl*>(argBuffer); + + m_d3dCmdList->ExecuteIndirect( + m_renderer->dispatchIndirectCmdSignature, 1, argBufferImpl->m_resource, offset, nullptr, 0); +} + +FenceImpl::~FenceImpl() +{ + if (m_waitEvent) + CloseHandle(m_waitEvent); +} + +HANDLE FenceImpl::getWaitEvent() +{ + if (m_waitEvent) + return m_waitEvent; + m_waitEvent = CreateEventEx(nullptr, nullptr, 0, EVENT_ALL_ACCESS); + return m_waitEvent; +} + +Result FenceImpl::init(DeviceImpl* device, const IFence::Desc& desc) +{ + SLANG_RETURN_ON_FAIL(device->m_device->CreateFence( + desc.initialValue, + desc.isShared ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(m_fence.writeRef()))); + return SLANG_OK; +} + +Result FenceImpl::getCurrentValue(uint64_t* outValue) +{ + *outValue = m_fence->GetCompletedValue(); + return SLANG_OK; +} + +Result FenceImpl::setCurrentValue(uint64_t value) +{ + SLANG_RETURN_ON_FAIL(m_fence->Signal(value)); + return SLANG_OK; +} + +Result FenceImpl::getSharedHandle(InteropHandle* outHandle) +{ + // Check if a shared handle already exists. + if (sharedHandle.handleValue != 0) + { + *outHandle = sharedHandle; + return SLANG_OK; + } + + ComPtr<ID3D12Device> devicePtr; + m_fence->GetDevice(IID_PPV_ARGS(devicePtr.writeRef())); + SLANG_RETURN_ON_FAIL(devicePtr->CreateSharedHandle( + m_fence, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); + outHandle->api = InteropHandleAPI::D3D12; + sharedHandle = *outHandle; + return SLANG_OK; +} + +Result FenceImpl::getNativeHandle(InteropHandle* outNativeHandle) +{ + outNativeHandle->api = gfx::InteropHandleAPI::D3D12; + outNativeHandle->handleValue = (uint64_t)m_fence.get(); + return SLANG_OK; +} + +} // namespace d3d12 + +Result SLANG_MCALL createD3D12Device(const IDevice::Desc* desc, IDevice** outDevice) +{ + RefPtr<d3d12::DeviceImpl> result = new d3d12::DeviceImpl(); + SLANG_RETURN_ON_FAIL(result->initialize(*desc)); + returnComPtr(outDevice, result); + return SLANG_OK; +} +} // namespace gfx diff --git a/tools/gfx/d3d12/render-d3d12.h b/tools/gfx/d3d12/render-d3d12.h index 457fc1db6..3ef4a84a7 100644 --- a/tools/gfx/d3d12/render-d3d12.h +++ b/tools/gfx/d3d12/render-d3d12.h @@ -1,11 +1,1799 @@ // render-d3d12.h #pragma once +#include "../command-encoder-com-forward.h" +#include "../d3d/d3d-swapchain.h" +#include "../mutable-shader-object.h" #include "../renderer-shared.h" +#include "../simple-render-pass-layout.h" +#include "../transient-resource-heap-base.h" +#include "core/slang-basic.h" +#include "core/slang-blob.h" +#include "core/slang-chunked-list.h" +#include "descriptor-heap-d3d12.h" +#include "resource-d3d12.h" + +#define _CRT_SECURE_NO_WARNINGS +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include <Windows.h> +#undef WIN32_LEAN_AND_MEAN +#undef NOMINMAX + +#include <d3d12.h> +#include <dxgi1_4.h> + +#ifndef __ID3D12GraphicsCommandList1_FWD_DEFINED__ +// If can't find a definition of CommandList1, just use an empty definition +struct ID3D12GraphicsCommandList1 +{}; +#endif namespace gfx { +namespace d3d12 +{ + +using namespace Slang; + +// Define function pointer types for PIX library. +typedef HRESULT(WINAPI* PFN_BeginEventOnCommandList)( + ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString); +typedef HRESULT(WINAPI* PFN_EndEventOnCommandList)(ID3D12GraphicsCommandList* commandList); + +static const Int kMaxNumRenderFrames = 4; +static const Int kMaxRTVCount = 8; + +struct D3D12DeviceInfo +{ + void clear() + { + m_dxgiFactory.setNull(); + m_device.setNull(); + m_adapter.setNull(); + m_desc = {}; + m_desc1 = {}; + m_isWarp = false; + m_isSoftware = false; + } + + bool m_isWarp; + bool m_isSoftware; + ComPtr<IDXGIFactory> m_dxgiFactory; + ComPtr<ID3D12Device> m_device; + ComPtr<ID3D12Device5> m_device5; + ComPtr<IDXGIAdapter> m_adapter; + DXGI_ADAPTER_DESC m_desc; + DXGI_ADAPTER_DESC1 m_desc1; +}; + +class CommandQueueImpl; +class TransientResourceHeapImpl; +class TextureResourceImpl; +class CommandBufferImpl; + +class DeviceImpl : public RendererBase +{ +public: + Desc m_desc; + D3D12DeviceExtendedDesc m_extendedDesc; + + DeviceInfo m_info; + String m_adapterName; + + bool m_isInitialized = false; + + ComPtr<ID3D12Debug> m_dxDebug; + + D3D12DeviceInfo m_deviceInfo; + ID3D12Device* m_device = nullptr; + ID3D12Device5* m_device5 = nullptr; + + VirtualObjectPool m_queueIndexAllocator; + + RefPtr<CommandQueueImpl> m_resourceCommandQueue; + RefPtr<TransientResourceHeapImpl> m_resourceCommandTransientHeap; + + RefPtr<D3D12GeneralExpandingDescriptorHeap> m_rtvAllocator; + RefPtr<D3D12GeneralExpandingDescriptorHeap> m_dsvAllocator; + + // Space in the GPU-visible heaps is precious, so we will also keep + // around CPU-visible heaps for storing shader-objects' descriptors in a format + // that is ready for copying into the GPU-visible heaps as needed. + // + RefPtr<D3D12GeneralExpandingDescriptorHeap> m_cpuViewHeap; ///< Cbv, Srv, Uav + RefPtr<D3D12GeneralExpandingDescriptorHeap> m_cpuSamplerHeap; ///< Heap for samplers + + // Dll entry points + PFN_D3D12_GET_DEBUG_INTERFACE m_D3D12GetDebugInterface = nullptr; + PFN_D3D12_CREATE_DEVICE m_D3D12CreateDevice = nullptr; + PFN_D3D12_SERIALIZE_ROOT_SIGNATURE m_D3D12SerializeRootSignature = nullptr; + + PFN_BeginEventOnCommandList m_BeginEventOnCommandList = nullptr; + PFN_EndEventOnCommandList m_EndEventOnCommandList = nullptr; + + bool m_nvapi = false; + + // Command signatures required for indirect draws. These indicate the format of the indirect + // as well as the command type to be used (DrawInstanced and DrawIndexedInstanced, in this + // case). + ComPtr<ID3D12CommandSignature> drawIndirectCmdSignature; + ComPtr<ID3D12CommandSignature> drawIndexedIndirectCmdSignature; + ComPtr<ID3D12CommandSignature> dispatchIndirectCmdSignature; + +public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + getFormatSupportedResourceStates(Format format, ResourceStateSet* outStates) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, ITransientResourceHeap** outHeap) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( + const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL getTextureAllocationInfo( + const ITextureResource::Desc& desc, size_t* outSize, size_t* outAlignment) override; + virtual SLANG_NO_THROW Result SLANG_MCALL getTextureRowAlignment(size_t* outAlignment) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource( + const ITextureResource::Desc& desc, + const ITextureResource::SubresourceData* initData, + ITextureResource** outResource) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureFromNativeHandle( + InteropHandle handle, + const ITextureResource::Desc& srcDesc, + ITextureResource** outResource) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferResource( + const IBufferResource::Desc& desc, + const void* initData, + IBufferResource** outResource) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferFromNativeHandle( + InteropHandle handle, + const IBufferResource::Desc& srcDesc, + IBufferResource** outResource) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView( + ITextureResource* texture, + IResourceView::Desc const& desc, + IResourceView** outView) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferView( + IBufferResource* buffer, + IBufferResource* counterBuffer, + IResourceView::Desc const& desc, + IResourceView** outView) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFrameBuffer) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout( + IFramebufferLayout::Desc const& desc, IFramebufferLayout** outLayout) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( + const IRenderPassLayout::Desc& desc, IRenderPassLayout** outRenderPassLayout) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createInputLayout(IInputLayout::Desc const& desc, IInputLayout** outLayout) override; + + virtual Result createShaderObjectLayout( + slang::TypeLayoutReflection* typeLayout, ShaderObjectLayoutBase** outLayout) override; + virtual Result createShaderObject( + ShaderObjectLayoutBase* layout, IShaderObject** outObject) override; + virtual Result createMutableShaderObject( + ShaderObjectLayoutBase* layout, IShaderObject** outObject) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + createMutableRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createShaderTable(const IShaderTable::Desc& desc, IShaderTable** outShaderTable) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createProgram( + const IShaderProgram::Desc& desc, + IShaderProgram** outProgram, + ISlangBlob** outDiagnostics) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState( + const GraphicsPipelineStateDesc& desc, IPipelineState** outState) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState( + const ComputePipelineStateDesc& desc, IPipelineState** outState) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createQueryPool(const IQueryPool::Desc& desc, IQueryPool** outState) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createFence(const IFence::Desc& desc, IFence** outFence) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL waitForFences( + uint32_t fenceCount, + IFence** fences, + uint64_t* fenceValues, + bool waitForAll, + uint64_t timeout) override; + + virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( + ITextureResource* resource, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) override; + + virtual SLANG_NO_THROW SlangResult SLANG_MCALL readBufferResource( + IBufferResource* resource, size_t offset, size_t size, ISlangBlob** outBlob) override; + + virtual SLANG_NO_THROW const gfx::DeviceInfo& SLANG_MCALL getDeviceInfo() const override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + getNativeDeviceHandles(InteropHandles* outHandles) override; + + ~DeviceImpl(); + + virtual SLANG_NO_THROW Result SLANG_MCALL getAccelerationStructurePrebuildInfo( + const IAccelerationStructure::BuildInputs& buildInputs, + IAccelerationStructure::PrebuildInfo* outPrebuildInfo) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createAccelerationStructure( + const IAccelerationStructure::CreateDesc& desc, IAccelerationStructure** outView) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createRayTracingPipelineState( + const RayTracingPipelineStateDesc& desc, IPipelineState** outState) override; + +public: + static PROC loadProc(HMODULE module, char const* name); + + Result createCommandQueueImpl(CommandQueueImpl** outQueue); + + Result createTransientResourceHeapImpl( + ITransientResourceHeap::Flags::Enum flags, + size_t constantBufferSize, + uint32_t viewDescriptors, + uint32_t samplerDescriptors, + TransientResourceHeapImpl** outHeap); + + Result createBuffer( + const D3D12_RESOURCE_DESC& resourceDesc, + const void* srcData, + size_t srcDataSize, + D3D12_RESOURCE_STATES finalState, + D3D12Resource& resourceOut, + bool isShared, + MemoryType access = MemoryType::DeviceLocal); + + Result captureTextureToSurface( + TextureResourceImpl* resource, + ResourceState state, + ISlangBlob** blob, + size_t* outRowPitch, + size_t* outPixelSize); + + Result _createDevice( + DeviceCheckFlags deviceCheckFlags, + const UnownedStringSlice& nameMatch, + D3D_FEATURE_LEVEL featureLevel, + D3D12DeviceInfo& outDeviceInfo); + + struct ResourceCommandRecordInfo + { + ComPtr<ICommandBuffer> commandBuffer; + ID3D12GraphicsCommandList* d3dCommandList; + }; + ResourceCommandRecordInfo encodeResourceCommands(); + void submitResourceCommandsAndWait(const ResourceCommandRecordInfo& info); +}; + +class BufferResourceImpl : public gfx::BufferResource +{ +public: + typedef BufferResource Parent; + + BufferResourceImpl(const Desc& desc); + + ~BufferResourceImpl(); + + D3D12Resource m_resource; ///< The resource in gpu memory, allocated on the correct heap + ///< relative to the cpu access flag + + D3D12_RESOURCE_STATES m_defaultState; + + virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + getNativeResourceHandle(InteropHandle* outHandle) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + map(MemoryRange* rangeToRead, void** outPointer) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL unmap(MemoryRange* writtenRange) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL setDebugName(const char* name) override; +}; + +class TextureResourceImpl : public TextureResource +{ +public: + typedef TextureResource Parent; + + TextureResourceImpl(const Desc& desc); + + ~TextureResourceImpl(); + + D3D12Resource m_resource; + D3D12_RESOURCE_STATES m_defaultState; + + virtual SLANG_NO_THROW Result SLANG_MCALL + getNativeResourceHandle(InteropHandle* outHandle) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL setDebugName(const char* name) override; +}; + +class SamplerStateImpl : public SamplerStateBase +{ +public: + D3D12Descriptor m_descriptor; + RefPtr<D3D12GeneralExpandingDescriptorHeap> m_allocator; + ~SamplerStateImpl(); + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override; +}; + +class ResourceViewInternalImpl +{ +public: + D3D12Descriptor m_descriptor; + RefPtr<D3D12GeneralExpandingDescriptorHeap> m_allocator; + ~ResourceViewInternalImpl(); +}; + +class ResourceViewImpl + : public ResourceViewBase + , public ResourceViewInternalImpl +{ +public: + Slang::RefPtr<Resource> m_resource; + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override; +}; + +class FramebufferLayoutImpl : public FramebufferLayoutBase +{ +public: + ShortList<IFramebufferLayout::AttachmentLayout> m_renderTargets; + bool m_hasDepthStencil = false; + IFramebufferLayout::AttachmentLayout m_depthStencil; +}; + +class FramebufferImpl : public FramebufferBase +{ +public: + ShortList<RefPtr<ResourceViewImpl>> renderTargetViews; + RefPtr<ResourceViewImpl> depthStencilView; + ShortList<D3D12_CPU_DESCRIPTOR_HANDLE> renderTargetDescriptors; + struct Color4f + { + float values[4]; + }; + ShortList<Color4f> renderTargetClearValues; + D3D12_CPU_DESCRIPTOR_HANDLE depthStencilDescriptor; + DepthStencilClearValue depthStencilClearValue; +}; + +class RenderPassLayoutImpl : public SimpleRenderPassLayout +{ +public: + RefPtr<FramebufferLayoutImpl> m_framebufferLayout; + void init(const IRenderPassLayout::Desc& desc); +}; + +class InputLayoutImpl : public InputLayoutBase +{ +public: + List<D3D12_INPUT_ELEMENT_DESC> m_elements; + List<UINT> m_vertexStreamStrides; + List<char> m_text; ///< Holds all strings to keep in scope +}; + +class PipelineStateImpl : public PipelineStateBase +{ +public: + PipelineStateImpl(DeviceImpl* device) + : m_device(device) + {} + DeviceImpl* m_device; + ComPtr<ID3D12PipelineState> m_pipelineState; + void init(const GraphicsPipelineStateDesc& inDesc); + void init(const ComputePipelineStateDesc& inDesc); + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override; + virtual Result ensureAPIPipelineStateCreated() override; +}; + +#if SLANG_GFX_HAS_DXR_SUPPORT +class RayTracingPipelineStateImpl : public PipelineStateBase +{ +public: + ComPtr<ID3D12StateObject> m_stateObject; + DeviceImpl* m_device; + RayTracingPipelineStateImpl(DeviceImpl* device); + void init(const RayTracingPipelineStateDesc& inDesc); + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override; + virtual Result ensureAPIPipelineStateCreated() override; +}; +#endif + +class QueryPoolImpl : public QueryPoolBase +{ +public: + Result init(const IQueryPool::Desc& desc, DeviceImpl* device); + + virtual SLANG_NO_THROW Result SLANG_MCALL + getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) override; + + void writeTimestamp(ID3D12GraphicsCommandList* cmdList, SlangInt index); + +public: + D3D12_QUERY_TYPE m_queryType; + ComPtr<ID3D12QueryHeap> m_queryHeap; + D3D12Resource m_readBackBuffer; + ComPtr<ID3D12CommandAllocator> m_commandAllocator; + ComPtr<ID3D12GraphicsCommandList> m_commandList; + ComPtr<ID3D12Fence> m_fence; + ComPtr<ID3D12CommandQueue> m_commandQueue; + HANDLE m_waitEvent; + UINT64 m_eventValue = 0; +}; + +/// Implements the IQueryPool interface with a plain buffer. +/// Used for query types that does not correspond to a D3D query, +/// such as ray-tracing acceleration structure post-build info. +class PlainBufferProxyQueryPoolImpl : public QueryPoolBase +{ +public: + SLANG_COM_OBJECT_IUNKNOWN_ALL + IQueryPool* getInterface(const Guid& guid); + +public: + Result init(const IQueryPool::Desc& desc, DeviceImpl* device, uint32_t stride); + + virtual SLANG_NO_THROW Result SLANG_MCALL reset() override; + virtual SLANG_NO_THROW Result SLANG_MCALL + getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) override; + +public: + QueryType m_queryType; + RefPtr<BufferResourceImpl> m_bufferResource; + RefPtr<DeviceImpl> m_device; + List<uint8_t> m_result; + bool m_resultDirty = true; + uint32_t m_stride = 0; + uint32_t m_count = 0; +}; + +struct BoundVertexBuffer +{ + RefPtr<BufferResourceImpl> m_buffer; + int m_offset; +}; + +class TransientResourceHeapImpl + : public TransientResourceHeapBaseImpl<DeviceImpl, BufferResourceImpl> + , public ID3D12TransientResourceHeap +{ +private: + typedef TransientResourceHeapBaseImpl<DeviceImpl, BufferResourceImpl> Super; + +public: + ComPtr<ID3D12CommandAllocator> m_commandAllocator; + List<ComPtr<ID3D12GraphicsCommandList>> m_d3dCommandListPool; + List<RefPtr<RefObject>> m_commandBufferPool; + uint32_t m_commandListAllocId = 0; + // Wait values for each command queue. + struct QueueWaitInfo + { + uint64_t waitValue; + HANDLE fenceEvent; + ComPtr<ID3D12Fence> fence = nullptr; + }; + ShortList<QueueWaitInfo, 4> m_waitInfos; + + QueueWaitInfo& getQueueWaitInfo(uint32_t queueIndex); + // During command submission, we need all the descriptor tables that get + // used to come from a single heap (for each descriptor heap type). + // + // We will thus keep a single heap of each type that we hope will hold + // all the descriptors that actually get needed in a frame. + ShortList<D3D12DescriptorHeap, 4> m_viewHeaps; // Cbv, Srv, Uav + ShortList<D3D12DescriptorHeap, 4> m_samplerHeaps; // Heap for samplers + int32_t m_currentViewHeapIndex = -1; + int32_t m_currentSamplerHeapIndex = -1; + bool m_canResize = false; + + uint32_t m_viewHeapSize; + uint32_t m_samplerHeapSize; + + D3D12DescriptorHeap& getCurrentViewHeap(); + D3D12DescriptorHeap& getCurrentSamplerHeap(); + + D3D12LinearExpandingDescriptorHeap m_stagingCpuViewHeap; + D3D12LinearExpandingDescriptorHeap m_stagingCpuSamplerHeap; + + virtual SLANG_NO_THROW Result SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override; + + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return Super::addRef(); } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return Super::release(); } + + virtual SLANG_NO_THROW Result SLANG_MCALL allocateTransientDescriptorTable( + DescriptorType type, + uint32_t count, + uint64_t& outDescriptorOffset, + void** outD3DDescriptorHeapHandle) override; + + ~TransientResourceHeapImpl(); + + bool canResize() { return m_canResize; } + + Result init( + const ITransientResourceHeap::Desc& desc, + DeviceImpl* device, + uint32_t viewHeapSize, + uint32_t samplerHeapSize); + + Result allocateNewViewDescriptorHeap(DeviceImpl* device); + + Result allocateNewSamplerDescriptorHeap(DeviceImpl* device); + + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandBuffer(ICommandBuffer** outCommandBuffer) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override; +}; + +struct Submitter +{ + virtual void setRootConstantBufferView( + int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; + virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; + virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; + virtual void setRootDescriptorTable(int index, D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor) = 0; + virtual void setRootSignature(ID3D12RootSignature* rootSignature) = 0; + virtual void setRootConstants( + Index rootParamIndex, + Index dstOffsetIn32BitValues, + Index countOf32BitValues, + void const* srcData) = 0; + virtual void setPipelineState(PipelineStateBase* pipelineState) = 0; +}; + +class PipelineCommandEncoder +{ +public: + bool m_isOpen = false; + bool m_bindingDirty = true; + CommandBufferImpl* m_commandBuffer; + TransientResourceHeapImpl* m_transientHeap; + DeviceImpl* m_renderer; + ID3D12Device* m_device; + ID3D12GraphicsCommandList* m_d3dCmdList; + ID3D12GraphicsCommandList* m_preCmdList = nullptr; + + RefPtr<PipelineStateBase> m_currentPipeline; + + static int getBindPointIndex(PipelineType type); + + void init(CommandBufferImpl* commandBuffer); + + void endEncodingImpl() { m_isOpen = false; } + + Result bindPipelineImpl(IPipelineState* pipelineState, IShaderObject** outRootObject); + + Result bindPipelineWithRootObjectImpl(IPipelineState* pipelineState, IShaderObject* rootObject); + + /// Specializes the pipeline according to current root-object argument values, + /// applys the root object bindings and binds the pipeline state. + /// The newly specialized pipeline is held alive by the pipeline cache so users of + /// `newPipeline` do not need to maintain its lifespan. + Result _bindRenderState(Submitter* submitter, RefPtr<PipelineStateBase>& newPipeline); +}; + +struct DescriptorTable +{ + DescriptorHeapReference m_heap; + uint32_t m_offset = 0; + uint32_t m_count = 0; + + SLANG_FORCE_INLINE uint32_t getDescriptorCount() const { return m_count; } + + /// Get the GPU handle at the specified index + SLANG_FORCE_INLINE D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(uint32_t index = 0) const + { + SLANG_ASSERT(index < getDescriptorCount()); + return m_heap.getGpuHandle(m_offset + index); + } + + /// Get the CPU handle at the specified index + SLANG_FORCE_INLINE D3D12_CPU_DESCRIPTOR_HANDLE getCpuHandle(uint32_t index = 0) const + { + SLANG_ASSERT(index < getDescriptorCount()); + return m_heap.getCpuHandle(m_offset + index); + } + + void freeIfSupported() + { + if (m_count) + { + m_heap.freeIfSupported(m_offset, m_count); + m_offset = 0; + m_count = 0; + } + } + + bool allocate(uint32_t count) + { + auto allocatedOffset = m_heap.allocate(count); + if (allocatedOffset == -1) + return false; + m_offset = allocatedOffset; + m_count = count; + return true; + } + + bool allocate(DescriptorHeapReference heap, uint32_t count) + { + auto allocatedOffset = heap.allocate(count); + if (allocatedOffset == -1) + return false; + m_heap = heap; + m_offset = allocatedOffset; + m_count = count; + return true; + } +}; + +/// Contextual data and operations required when binding shader objects to the pipeline state +struct BindingContext +{ + PipelineCommandEncoder* encoder; + Submitter* submitter; + TransientResourceHeapImpl* transientHeap; + DeviceImpl* device; + D3D12_DESCRIPTOR_HEAP_TYPE + outOfMemoryHeap; // The type of descriptor heap that is OOM during binding. +}; + +/// A representation of the offset at which to bind a shader parameter or sub-object +struct BindingOffset +{ + // Note: When we actually bind a shader object to the pipeline we do not care about + // HLSL-specific notions like `t` registers and `space`s. Those concepts are all + // mediated by the root signature. + // + // Instead, we need to consider the offsets at which the object will be bound + // into the actual D3D12 API state, which consists of the index of the current + // root parameter to bind from, as well as indices into the current descriptor + // tables (for resource views and samplers). + + uint32_t rootParam = 0; + uint32_t resource = 0; + uint32_t sampler = 0; + + void operator+=(BindingOffset const& offset) + { + rootParam += offset.rootParam; + resource += offset.resource; + sampler += offset.sampler; + } +}; + +/// A reprsentation of an allocated descriptor set, consisting of an option resource table and +/// an optional sampler table +struct DescriptorSet +{ + DescriptorTable resourceTable; + DescriptorTable samplerTable; + + void freeIfSupported() + { + resourceTable.freeIfSupported(); + samplerTable.freeIfSupported(); + } +}; + +// Provides information on how binding ranges are stored in descriptor tables for +// a shader object. +// We allocate one CPU descriptor table for each descriptor heap type for the shader +// object. In `ShaderObjectLayoutImpl`, we store the offset into the descriptor tables +// for each binding, so we know where to write the descriptor when the user sets +// a resource or sampler binding. +class ShaderObjectLayoutImpl : public ShaderObjectLayoutBase +{ +public: + /// Information about a single logical binding range + struct BindingRangeInfo + { + // Some of the information we store on binding ranges is redundant with + // the information that Slang's reflection information stores, but having + // it here can make the code more compact and obvious. + + /// The type of binding in this range. + slang::BindingType bindingType; + + /// The shape of the resource + SlangResourceShape resourceShape; + + /// The number of distinct bindings in this range. + uint32_t count; + + /// A "flat" index for this range in whatever array provides backing storage for it + uint32_t baseIndex; + + /// An index into the sub-object array if this binding range is treated + /// as a sub-object. + uint32_t subObjectIndex; + + bool isRootParameter; + }; + + /// Offset information for a sub-object range + struct SubObjectRangeOffset : BindingOffset + { + SubObjectRangeOffset() {} + + SubObjectRangeOffset(slang::VariableLayoutReflection* varLayout); + + /// The offset for "pending" ordinary data related to this range + uint32_t pendingOrdinaryData = 0; + }; + + /// Stride information for a sub-object range + struct SubObjectRangeStride : BindingOffset + { + SubObjectRangeStride() {} + + SubObjectRangeStride(slang::TypeLayoutReflection* typeLayout); + + /// The strid for "pending" ordinary data related to this range + uint32_t pendingOrdinaryData = 0; + }; + + /// Information about a sub-objecrt range + struct SubObjectRangeInfo + { + /// The index of the binding range corresponding to this sub-object range + Index bindingRangeIndex = 0; + + /// Layout information for the type of sub-object expected to be bound, if known + RefPtr<ShaderObjectLayoutImpl> layout; + + /// The offset to use when binding the first object in this range + SubObjectRangeOffset offset; + + /// Stride between consecutive objects in this range + SubObjectRangeStride stride; + }; + + struct RootParameterInfo + { + IResourceView::Type type; + }; + + static bool isBindingRangeRootParameter( + SlangSession* globalSession, + const char* rootParameterAttributeName, + slang::TypeLayoutReflection* typeLayout, + Index bindingRangeIndex); + + struct Builder + { + public: + Builder(RendererBase* renderer) + : m_renderer(renderer) + {} + + RendererBase* m_renderer; + slang::TypeLayoutReflection* m_elementTypeLayout; + List<BindingRangeInfo> m_bindingRanges; + List<SubObjectRangeInfo> m_subObjectRanges; + List<RootParameterInfo> m_rootParamsInfo; + + /// The number of sub-objects (not just sub-object *ranges*) stored in instances of this + /// layout + uint32_t m_subObjectCount = 0; + + /// Counters for the number of root parameters, resources, and samplers in this object + /// itself + BindingOffset m_ownCounts; + + /// Counters for the number of root parameters, resources, and sampler in this object + /// and transitive sub-objects + BindingOffset m_totalCounts; + + /// The number of root parameter consumed by (transitive) sub-objects + uint32_t m_childRootParameterCount = 0; + + /// The total size in bytes of the ordinary data for this object and transitive + /// sub-object. + uint32_t m_totalOrdinaryDataSize = 0; + + /// The container type of this shader object. When `m_containerType` is + /// `StructuredBuffer` or `UnsizedArray`, this shader object represents a collection + /// instead of a single object. + ShaderObjectContainerType m_containerType = ShaderObjectContainerType::None; + + Result setElementTypeLayout(slang::TypeLayoutReflection* typeLayout); + + Result build(ShaderObjectLayoutImpl** outLayout); + }; + + static Result createForElementType( + RendererBase* renderer, + slang::TypeLayoutReflection* elementType, + ShaderObjectLayoutImpl** outLayout); + + List<BindingRangeInfo> const& getBindingRanges() { return m_bindingRanges; } + + Index getBindingRangeCount() { return m_bindingRanges.getCount(); } + + BindingRangeInfo const& getBindingRange(Index index) { return m_bindingRanges[index]; } + + uint32_t getResourceSlotCount() { return m_ownCounts.resource; } + uint32_t getSamplerSlotCount() { return m_ownCounts.sampler; } + Index getSubObjectSlotCount() { return m_subObjectCount; } + Index getSubObjectCount() { return m_subObjectCount; } + + uint32_t getTotalResourceDescriptorCount() { return m_totalCounts.resource; } + uint32_t getTotalSamplerDescriptorCount() { return m_totalCounts.sampler; } + + uint32_t getOrdinaryDataBufferCount() { return m_totalOrdinaryDataSize ? 1 : 0; } + bool hasOrdinaryDataBuffer() { return m_totalOrdinaryDataSize != 0; } + + uint32_t getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer() + { + return m_totalCounts.resource - getOrdinaryDataBufferCount(); + } + + uint32_t getOwnUserRootParameterCount() { return (uint32_t)m_rootParamsInfo.getCount(); } + uint32_t getTotalRootTableParameterCount() { return m_totalCounts.rootParam; } + uint32_t getChildRootParameterCount() { return m_childRootParameterCount; } + + uint32_t getTotalOrdinaryDataSize() const { return m_totalOrdinaryDataSize; } + + SubObjectRangeInfo const& getSubObjectRange(Index index) { return m_subObjectRanges[index]; } + List<SubObjectRangeInfo> const& getSubObjectRanges() { return m_subObjectRanges; } + + RendererBase* getRenderer() { return m_renderer; } + + slang::TypeReflection* getType() { return m_elementTypeLayout->getType(); } + + const RootParameterInfo& getRootParameterInfo(Index index) { return m_rootParamsInfo[index]; } + +protected: + Result init(Builder* builder); + + List<BindingRangeInfo> m_bindingRanges; + List<SubObjectRangeInfo> m_subObjectRanges; + List<RootParameterInfo> m_rootParamsInfo; + + BindingOffset m_ownCounts; + BindingOffset m_totalCounts; + + uint32_t m_subObjectCount = 0; + uint32_t m_childRootParameterCount = 0; + + uint32_t m_totalOrdinaryDataSize = 0; +}; + +class RootShaderObjectLayoutImpl : public ShaderObjectLayoutImpl +{ + typedef ShaderObjectLayoutImpl Super; + +public: + struct EntryPointInfo + { + RefPtr<ShaderObjectLayoutImpl> layout; + BindingOffset offset; + }; + + struct Builder : Super::Builder + { + Builder( + RendererBase* renderer, + slang::IComponentType* program, + slang::ProgramLayout* programLayout) + : Super::Builder(renderer) + , m_program(program) + , m_programLayout(programLayout) + {} + + Result build(RootShaderObjectLayoutImpl** outLayout); + + void addGlobalParams(slang::VariableLayoutReflection* globalsLayout); + + void addEntryPoint(SlangStage stage, ShaderObjectLayoutImpl* entryPointLayout); + + slang::IComponentType* m_program; + slang::ProgramLayout* m_programLayout; + List<EntryPointInfo> m_entryPoints; + }; + + EntryPointInfo& getEntryPoint(Index index) { return m_entryPoints[index]; } + + List<EntryPointInfo>& getEntryPoints() { return m_entryPoints; } + + struct DescriptorSetLayout + { + List<D3D12_DESCRIPTOR_RANGE> m_resourceRanges; + List<D3D12_DESCRIPTOR_RANGE> m_samplerRanges; + uint32_t m_resourceCount = 0; + uint32_t m_samplerCount = 0; + }; + + struct RootSignatureDescBuilder + { + DeviceImpl* m_device; + + RootSignatureDescBuilder(DeviceImpl* device) + : m_device(device) + {} + + // We will use one descriptor set for the global scope and one additional + // descriptor set for each `ParameterBlock` binding range in the shader object + // hierarchy, regardless of the shader's `space` indices. + List<DescriptorSetLayout> m_descriptorSets; + List<D3D12_ROOT_PARAMETER> m_rootParameters; + D3D12_ROOT_SIGNATURE_DESC m_rootSignatureDesc = {}; + + static Result translateDescriptorRangeType( + slang::BindingType c, D3D12_DESCRIPTOR_RANGE_TYPE* outType); + + /// Stores offset information to apply to the reflected register/space for a descriptor + /// range. + /// + struct BindingRegisterOffset + { + uint32_t spaceOffset = 0; // The `space` index as specified in shader. + + enum + { + kRangeTypeCount = 4 + }; + + /// An offset to apply for each D3D12 register class, as given + /// by a `D3D12_DESCRIPTOR_RANGE_TYPE`. + /// + /// Note that the `D3D12_DESCRIPTOR_RANGE_TYPE` enumeration has + /// values between 0 and 3, inclusive. + /// + uint32_t offsetForRangeType[kRangeTypeCount] = {0, 0, 0, 0}; + + uint32_t& operator[](D3D12_DESCRIPTOR_RANGE_TYPE type) + { + return offsetForRangeType[int(type)]; + } + + uint32_t operator[](D3D12_DESCRIPTOR_RANGE_TYPE type) const + { + return offsetForRangeType[int(type)]; + } + + BindingRegisterOffset() {} + + BindingRegisterOffset(slang::VariableLayoutReflection* varLayout) + { + if (varLayout) + { + spaceOffset = + (UINT)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_REGISTER_SPACE); + offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_CBV] = + (UINT)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER); + offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_SRV] = + (UINT)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE); + offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_UAV] = + (UINT)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_UNORDERED_ACCESS); + offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER] = + (UINT)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_SAMPLER_STATE); + } + } + + void operator+=(BindingRegisterOffset const& other) + { + spaceOffset += other.spaceOffset; + for (int i = 0; i < kRangeTypeCount; ++i) + { + offsetForRangeType[i] += other.offsetForRangeType[i]; + } + } + }; + + struct BindingRegisterOffsetPair + { + BindingRegisterOffset primary; + BindingRegisterOffset pending; + + BindingRegisterOffsetPair() {} + + BindingRegisterOffsetPair(slang::VariableLayoutReflection* varLayout) + : primary(varLayout) + , pending(varLayout->getPendingDataLayout()) + {} + + void operator+=(BindingRegisterOffsetPair const& other) + { + primary += other.primary; + pending += other.pending; + } + }; + /// Add a new descriptor set to the layout being computed. + /// + /// Note that a "descriptor set" in the layout may amount to + /// zero, one, or two different descriptor *tables* in the + /// final D3D12 root signature. Each descriptor set may + /// contain zero or more view ranges (CBV/SRV/UAV) and zero + /// or more sampler ranges. It maps to a view descriptor table + /// if the number of view ranges is non-zero and to a sampler + /// descriptor table if the number of sampler ranges is non-zero. + /// + uint32_t addDescriptorSet(); + + Result addDescriptorRange( + Index physicalDescriptorSetIndex, + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT registerIndex, + UINT spaceIndex, + UINT count, + bool isRootParameter); + /// Add one descriptor range as specified in Slang reflection information to the layout. + /// + /// The layout information is taken from `typeLayout` for the descriptor + /// range with the given `descriptorRangeIndex` within the logical + /// descriptor set (reflected by Slang) with the given `logicalDescriptorSetIndex`. + /// + /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of + /// the descriptor set that the range should be added to. + /// + /// The `offset` encodes information about space and/or register offsets that + /// should be applied to descrptor ranges. + /// + /// This operation can fail if the given descriptor range encodes a range that + /// doesn't map to anything directly supported by D3D12. Higher-level routines + /// will often want to ignore such failures. + /// + Result addDescriptorRange( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffset const& containerOffset, + BindingRegisterOffset const& elementOffset, + Index logicalDescriptorSetIndex, + Index descriptorRangeIndex, + bool isRootParameter); + + /// Add one binding range to the computed layout. + /// + /// The layout information is taken from `typeLayout` for the binding + /// range with the given `bindingRangeIndex`. + /// + /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of + /// the descriptor set that the range should be added to. + /// + /// The `offset` encodes information about space and/or register offsets that + /// should be applied to descrptor ranges. + /// + /// Note that a single binding range may encompass zero or more descriptor ranges. + /// + void addBindingRange( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffset const& containerOffset, + BindingRegisterOffset const& elementOffset, + Index bindingRangeIndex); + + void addAsValue( + slang::VariableLayoutReflection* varLayout, Index physicalDescriptorSetIndex); + + /// Add binding ranges and parameter blocks to the root signature. + /// + /// The layout information is taken from `typeLayout` which should + /// be a layout for either a program or an entry point. + /// + /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of + /// the descriptor set that binding ranges not belonging to nested + /// parameter blocks should be added to. + /// + /// The `offset` encodes information about space and/or register offsets that + /// should be applied to descrptor ranges. + /// + void addAsConstantBuffer( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffsetPair const& containerOffset, + BindingRegisterOffsetPair const& elementOffset); + + void addAsValue( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffsetPair const& containerOffset, + BindingRegisterOffsetPair const& elementOffset); + + D3D12_ROOT_SIGNATURE_DESC& build(); + }; + + static Result createRootSignatureFromSlang( + DeviceImpl* device, + RootShaderObjectLayoutImpl* rootLayout, + slang::IComponentType* program, + ID3D12RootSignature** outRootSignature, + ID3DBlob** outError); + + static Result create( + DeviceImpl* device, + slang::IComponentType* program, + slang::ProgramLayout* programLayout, + RootShaderObjectLayoutImpl** outLayout, + ID3DBlob** outError); + + slang::IComponentType* getSlangProgram() const { return m_program; } + slang::ProgramLayout* getSlangProgramLayout() const { return m_programLayout; } + +protected: + Result init(Builder* builder); + + ComPtr<slang::IComponentType> m_program; + slang::ProgramLayout* m_programLayout = nullptr; + + List<EntryPointInfo> m_entryPoints; + +public: + ComPtr<ID3D12RootSignature> m_rootSignature; +}; + +struct ShaderBinary +{ + SlangStage stage; + slang::EntryPointReflection* entryPointInfo; + String actualEntryPointNameInAPI; + List<uint8_t> code; +}; + +class ShaderProgramImpl : public ShaderProgramBase +{ +public: + RefPtr<RootShaderObjectLayoutImpl> m_rootObjectLayout; + List<ShaderBinary> m_shaders; + + virtual Result createShaderModule( + slang::EntryPointReflection* entryPointInfo, ComPtr<ISlangBlob> kernelCode) override; +}; + +class ShaderObjectImpl + : public ShaderObjectBaseImpl<ShaderObjectImpl, ShaderObjectLayoutImpl, SimpleShaderObjectData> +{ + typedef ShaderObjectBaseImpl<ShaderObjectImpl, ShaderObjectLayoutImpl, SimpleShaderObjectData> + Super; + +public: + static Result create( + DeviceImpl* device, ShaderObjectLayoutImpl* layout, ShaderObjectImpl** outShaderObject); + + ~ShaderObjectImpl(); + + RendererBase* getDevice() { return m_device.get(); } + + virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + getEntryPoint(UInt index, IShaderObject** outEntryPoint) override; + + virtual SLANG_NO_THROW const void* SLANG_MCALL getRawData() override; + + virtual SLANG_NO_THROW size_t SLANG_MCALL getSize() override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + setData(ShaderOffset const& inOffset, void const* data, size_t inSize) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + setObject(ShaderOffset const& offset, IShaderObject* object) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + setResource(ShaderOffset const& offset, IResourceView* resourceView) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + setSampler(ShaderOffset const& offset, ISamplerState* sampler) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL setCombinedTextureSampler( + ShaderOffset const& offset, IResourceView* textureView, ISamplerState* sampler) override; + +protected: + Result init( + DeviceImpl* device, + ShaderObjectLayoutImpl* layout, + DescriptorHeapReference viewHeap, + DescriptorHeapReference samplerHeap); + + /// Write the uniform/ordinary data of this object into the given `dest` buffer at the given + /// `offset` + Result _writeOrdinaryData( + PipelineCommandEncoder* encoder, + BufferResourceImpl* buffer, + size_t offset, + size_t destSize, + ShaderObjectLayoutImpl* specializedLayout); + + bool shouldAllocateConstantBuffer(TransientResourceHeapImpl* transientHeap); + + /// Ensure that the `m_ordinaryDataBuffer` has been created, if it is needed + Result _ensureOrdinaryDataBufferCreatedIfNeeded( + PipelineCommandEncoder* encoder, ShaderObjectLayoutImpl* specializedLayout); + +public: + void updateSubObjectsRecursive(); + /// Prepare to bind this object as a parameter block. + /// + /// This involves allocating and binding any descriptor tables necessary + /// to to store the state of the object. The function returns a descriptor + /// set formed from any table(s) allocated. In addition, the `ioOffset` + /// parameter will be adjusted to be correct for binding values into + /// the resulting descriptor set. + /// + /// Returns: + /// SLANG_OK when successful, + /// SLANG_E_OUT_OF_MEMORY when descriptor heap is full. + /// + Result prepareToBindAsParameterBlock( + BindingContext* context, + BindingOffset& ioOffset, + ShaderObjectLayoutImpl* specializedLayout, + DescriptorSet& outDescriptorSet); + + bool checkIfCachedDescriptorSetIsValidRecursive(BindingContext* context); + + /// Bind this object as a `ParameterBlock<X>` + Result bindAsParameterBlock( + BindingContext* context, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout); + + /// Bind this object as a `ConstantBuffer<X>` + Result bindAsConstantBuffer( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout); + + /// Bind this object as a value (for an interface-type parameter) + Result bindAsValue( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout); + + /// Shared logic for `bindAsConstantBuffer()` and `bindAsValue()` + Result _bindImpl( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout); + + Result bindRootArguments(BindingContext* context, uint32_t& index); + /// A CPU-memory descriptor set holding any descriptors used to represent the + /// resources/samplers in this object's state + DescriptorSet m_descriptorSet; + /// A cached descriptor set on GPU heap. + DescriptorSet m_cachedGPUDescriptorSet; + + ShortList<RefPtr<Resource>, 8> m_boundResources; + List<D3D12_GPU_VIRTUAL_ADDRESS> m_rootArguments; + /// A constant buffer used to stored ordinary data for this object + /// and existential-type sub-objects. + /// + /// Allocated from transient heap on demand with `_createOrdinaryDataBufferIfNeeded()` + IBufferResource* m_constantBufferWeakPtr = nullptr; + size_t m_constantBufferOffset = 0; + size_t m_constantBufferSize = 0; + + /// Dirty bit tracking whether the constant buffer needs to be updated. + bool m_isConstantBufferDirty = true; + /// The transient heap from which the constant buffer and descriptor set is allocated. + TransientResourceHeapImpl* m_cachedTransientHeap; + /// The version of the transient heap when the constant buffer and descriptor set is + /// allocated. + uint64_t m_cachedTransientHeapVersion; + + /// Whether this shader object is allowed to be mutable. + bool m_isMutable = false; + /// The version of a mutable shader object. + uint32_t m_version = 0; + /// The version of this mutable shader object when the gpu descriptor table is cached. + uint32_t m_cachedGPUDescriptorSetVersion = -1; + /// The versions of bound subobjects. + List<uint32_t> m_subObjectVersions; + + /// Get the layout of this shader object with specialization arguments considered + /// + /// This operation should only be called after the shader object has been + /// fully filled in and finalized. + /// + Result getSpecializedLayout(ShaderObjectLayoutImpl** outLayout); + + /// Create the layout for this shader object with specialization arguments considered + /// + /// This operation is virtual so that it can be customized by `RootShaderObject`. + /// + virtual Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout); + + RefPtr<ShaderObjectLayoutImpl> m_specializedLayout; +}; + +class RootShaderObjectImpl : public ShaderObjectImpl +{ + typedef ShaderObjectImpl Super; + +public: + // Override default reference counting behavior to disable lifetime management via ComPtr. + // Root objects are managed by command buffer and does not need to be freed by the user. + SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 1; } + SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 1; } + +public: + RootShaderObjectLayoutImpl* getLayout(); + + virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override; + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + getEntryPoint(UInt index, IShaderObject** outEntryPoint) override; + virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + copyFrom(IShaderObject* object, ITransientResourceHeap* transientHeap) override; + +public: + Result bindAsRoot(BindingContext* context, RootShaderObjectLayoutImpl* specializedLayout); + +public: + Result init(DeviceImpl* device) { return SLANG_OK; } + + Result resetImpl( + DeviceImpl* device, + RootShaderObjectLayoutImpl* layout, + DescriptorHeapReference viewHeap, + DescriptorHeapReference samplerHeap, + bool isMutable); + + Result reset( + DeviceImpl* device, RootShaderObjectLayoutImpl* layout, TransientResourceHeapImpl* heap); + +protected: + virtual Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) override; + + List<RefPtr<ShaderObjectImpl>> m_entryPoints; +}; + +class MutableRootShaderObjectImpl : public RootShaderObjectImpl +{ +public: + // Override default reference counting behavior to disable lifetime management via ComPtr. + // Root objects are managed by command buffer and does not need to be freed by the user. + SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return ShaderObjectBase::addRef(); } + SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return ShaderObjectBase::release(); } +}; + +class ShaderTableImpl : public ShaderTableBase +{ +public: + uint32_t m_rayGenTableOffset; + uint32_t m_missTableOffset; + uint32_t m_hitGroupTableOffset; + + DeviceImpl* m_device; + + virtual RefPtr<BufferResource> createDeviceBuffer( + PipelineStateBase* pipeline, + TransientResourceHeapBase* transientHeap, + IResourceCommandEncoder* encoder) override; +}; + +class ResourceCommandEncoderImpl + : public IResourceCommandEncoder + , public PipelineCommandEncoder +{ +public: + virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) override; + virtual SLANG_NO_THROW void SLANG_MCALL + uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data) override; + virtual SLANG_NO_THROW void SLANG_MCALL textureBarrier( + size_t count, + ITextureResource* const* textures, + ResourceState src, + ResourceState dst) override; + virtual SLANG_NO_THROW void SLANG_MCALL bufferBarrier( + size_t count, + IBufferResource* const* buffers, + ResourceState src, + ResourceState dst) override; + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} + virtual SLANG_NO_THROW void SLANG_MCALL + writeTimestamp(IQueryPool* pool, SlangInt index) override; + virtual SLANG_NO_THROW void SLANG_MCALL copyTexture( + ITextureResource* dst, + ResourceState dstState, + SubresourceRange dstSubresource, + ITextureResource::Offset3D dstOffset, + ITextureResource* src, + ResourceState srcState, + SubresourceRange srcSubresource, + ITextureResource::Offset3D srcOffset, + ITextureResource::Size extent) override; + + virtual SLANG_NO_THROW void SLANG_MCALL uploadTextureData( + ITextureResource* dst, + SubresourceRange subResourceRange, + ITextureResource::Offset3D offset, + ITextureResource::Size extent, + ITextureResource::SubresourceData* subResourceData, + size_t subResourceDataCount) override; + + virtual SLANG_NO_THROW void SLANG_MCALL clearResourceView( + IResourceView* view, ClearValue* clearValue, ClearResourceViewFlags::Enum flags) override; + + virtual SLANG_NO_THROW void SLANG_MCALL resolveResource( + ITextureResource* source, + ResourceState sourceState, + SubresourceRange sourceRange, + ITextureResource* dest, + ResourceState destState, + SubresourceRange destRange) override; + + virtual SLANG_NO_THROW void SLANG_MCALL resolveQuery( + IQueryPool* queryPool, + uint32_t index, + uint32_t count, + IBufferResource* buffer, + uint64_t offset) override; + + virtual SLANG_NO_THROW void SLANG_MCALL copyTextureToBuffer( + IBufferResource* dst, + size_t dstOffset, + size_t dstSize, + size_t dstRowStride, + ITextureResource* src, + ResourceState srcState, + SubresourceRange srcSubresource, + ITextureResource::Offset3D srcOffset, + ITextureResource::Size extent) override; + + virtual SLANG_NO_THROW void SLANG_MCALL textureSubresourceBarrier( + ITextureResource* texture, + SubresourceRange subresourceRange, + ResourceState src, + ResourceState dst) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + beginDebugEvent(const char* name, float rgbColor[3]) override; + virtual SLANG_NO_THROW void SLANG_MCALL endDebugEvent() override; +}; + +class ComputeCommandEncoderImpl + : public IComputeCommandEncoder + , public ResourceCommandEncoderImpl +{ +public: + SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) +public: + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override; + void init( + DeviceImpl* renderer, + TransientResourceHeapImpl* transientHeap, + CommandBufferImpl* cmdBuffer); + + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) override; + + virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + dispatchComputeIndirect(IBufferResource* argBuffer, uint64_t offset) override; +}; +class RenderCommandEncoderImpl + : public IRenderCommandEncoder + , public ResourceCommandEncoderImpl +{ +public: + SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) +public: + RefPtr<RenderPassLayoutImpl> m_renderPass; + RefPtr<FramebufferImpl> m_framebuffer; + + List<BoundVertexBuffer> m_boundVertexBuffers; + + RefPtr<BufferResourceImpl> m_boundIndexBuffer; + + D3D12_VIEWPORT m_viewports[kMaxRTVCount]; + D3D12_RECT m_scissorRects[kMaxRTVCount]; + + DXGI_FORMAT m_boundIndexFormat; + UINT m_boundIndexOffset; + + D3D12_PRIMITIVE_TOPOLOGY_TYPE m_primitiveTopologyType; + D3D12_PRIMITIVE_TOPOLOGY m_primitiveTopology; + + void init( + DeviceImpl* renderer, + TransientResourceHeapImpl* transientHeap, + CommandBufferImpl* cmdBuffer, + RenderPassLayoutImpl* renderPass, + FramebufferImpl* framebuffer); + + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + setViewports(uint32_t count, const Viewport* viewports) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + setScissorRects(uint32_t count, const ScissorRect* rects) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + setPrimitiveTopology(PrimitiveTopology topology) override; + + virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( + uint32_t startSlot, + uint32_t slotCount, + IBufferResource* const* buffers, + const uint32_t* offsets) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + setIndexBuffer(IBufferResource* buffer, Format indexFormat, uint32_t offset = 0) override; + + void prepareDraw(); + virtual SLANG_NO_THROW void SLANG_MCALL + draw(uint32_t vertexCount, uint32_t startVertex = 0) override; + virtual SLANG_NO_THROW void SLANG_MCALL + drawIndexed(uint32_t indexCount, uint32_t startIndex = 0, uint32_t baseVertex = 0) override; + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override; + + virtual SLANG_NO_THROW void SLANG_MCALL setStencilReference(uint32_t referenceValue) override; + + virtual SLANG_NO_THROW void SLANG_MCALL drawIndirect( + uint32_t maxDrawCount, + IBufferResource* argBuffer, + uint64_t argOffset, + IBufferResource* countBuffer, + uint64_t countOffset) override; + + virtual SLANG_NO_THROW void SLANG_MCALL drawIndexedIndirect( + uint32_t maxDrawCount, + IBufferResource* argBuffer, + uint64_t argOffset, + IBufferResource* countBuffer, + uint64_t countOffset) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL setSamplePositions( + uint32_t samplesPerPixel, + uint32_t pixelCount, + const SamplePosition* samplePositions) override; + + virtual SLANG_NO_THROW void SLANG_MCALL drawInstanced( + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t startVertex, + uint32_t startInstanceLocation) override; + + virtual SLANG_NO_THROW void SLANG_MCALL drawIndexedInstanced( + uint32_t indexCount, + uint32_t instanceCount, + uint32_t startIndexLocation, + int32_t baseVertexLocation, + uint32_t startInstanceLocation) override; +}; + +#if SLANG_GFX_HAS_DXR_SUPPORT +class RayTracingCommandEncoderImpl + : public IRayTracingCommandEncoder + , public ResourceCommandEncoderImpl +{ +public: + SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) +public: + virtual SLANG_NO_THROW void SLANG_MCALL buildAccelerationStructure( + const IAccelerationStructure::BuildDesc& desc, + int propertyQueryCount, + AccelerationStructureQueryDesc* queryDescs) override; + virtual SLANG_NO_THROW void SLANG_MCALL copyAccelerationStructure( + IAccelerationStructure* dest, + IAccelerationStructure* src, + AccelerationStructureCopyMode mode) override; + virtual SLANG_NO_THROW void SLANG_MCALL queryAccelerationStructureProperties( + int accelerationStructureCount, + IAccelerationStructure* const* accelerationStructures, + int queryCount, + AccelerationStructureQueryDesc* queryDescs) override; + virtual SLANG_NO_THROW void SLANG_MCALL + serializeAccelerationStructure(DeviceAddress dest, IAccelerationStructure* source) override; + virtual SLANG_NO_THROW void SLANG_MCALL deserializeAccelerationStructure( + IAccelerationStructure* dest, DeviceAddress source) override; + virtual SLANG_NO_THROW void SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) override + { + return bindPipelineWithRootObjectImpl(state, rootObject); + } + virtual SLANG_NO_THROW void SLANG_MCALL dispatchRays( + uint32_t rayGenShaderIndex, + IShaderTable* shaderTable, + int32_t width, + int32_t height, + int32_t depth) override; + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} +}; +#endif + +class CommandBufferImpl + : public ICommandBuffer + , public ComObject +{ +public: + // There are a pair of cyclic references between a `TransientResourceHeap` and + // a `CommandBuffer` created from the heap. We need to break the cycle upon + // the public reference count of a command buffer dropping to 0. + SLANG_COM_OBJECT_IUNKNOWN_ALL + + ICommandBuffer* getInterface(const Guid& guid); + virtual void comFree() override { m_transientHeap.breakStrongReference(); } + + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* handle) override; + +public: + ComPtr<ID3D12GraphicsCommandList> m_cmdList; + ComPtr<ID3D12GraphicsCommandList1> m_cmdList1; + ComPtr<ID3D12GraphicsCommandList4> m_cmdList4; + + BreakableReference<TransientResourceHeapImpl> m_transientHeap; + // Weak reference is fine here since `m_transientHeap` already holds strong reference to + // device. + DeviceImpl* m_renderer; + RootShaderObjectImpl m_rootShaderObject; + RefPtr<MutableRootShaderObjectImpl> m_mutableRootShaderObject; + bool m_descriptorHeapsBound = false; + + void bindDescriptorHeaps(); + + void invalidateDescriptorHeapBinding() { m_descriptorHeapsBound = false; } + + void reinit(); + + void init( + DeviceImpl* renderer, + ID3D12GraphicsCommandList* d3dCommandList, + TransientResourceHeapImpl* transientHeap); + + ResourceCommandEncoderImpl m_resourceCommandEncoder; + + virtual SLANG_NO_THROW void SLANG_MCALL + encodeResourceCommands(IResourceCommandEncoder** outEncoder) override; + + RenderCommandEncoderImpl m_renderCommandEncoder; + virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( + IRenderPassLayout* renderPass, + IFramebuffer* framebuffer, + IRenderCommandEncoder** outEncoder) override; + + ComputeCommandEncoderImpl m_computeCommandEncoder; + virtual SLANG_NO_THROW void SLANG_MCALL + encodeComputeCommands(IComputeCommandEncoder** outEncoder) override; + +#if SLANG_GFX_HAS_DXR_SUPPORT + RayTracingCommandEncoderImpl m_rayTracingCommandEncoder; +#endif + virtual SLANG_NO_THROW void SLANG_MCALL + encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) override; + virtual SLANG_NO_THROW void SLANG_MCALL close() override; +}; + +class FenceImpl : public FenceBase +{ +public: + ComPtr<ID3D12Fence> m_fence; + HANDLE m_waitEvent = 0; + + ~FenceImpl(); + + HANDLE getWaitEvent(); + + Result init(DeviceImpl* device, const IFence::Desc& desc); + + virtual SLANG_NO_THROW Result SLANG_MCALL getCurrentValue(uint64_t* outValue) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL setCurrentValue(uint64_t value) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + getNativeHandle(InteropHandle* outNativeHandle) override; +}; + +class CommandQueueImpl + : public ICommandQueue + , public ComObject +{ +public: + SLANG_COM_OBJECT_IUNKNOWN_ALL + ICommandQueue* getInterface(const Guid& guid); + void breakStrongReferenceToDevice() { m_renderer.breakStrongReference(); } + + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* handle) override; + +public: + BreakableReference<DeviceImpl> m_renderer; + ComPtr<ID3D12Device> m_device; + ComPtr<ID3D12CommandQueue> m_d3dQueue; + ComPtr<ID3D12Fence> m_fence; + uint64_t m_fenceValue = 0; + HANDLE globalWaitHandle; + Desc m_desc; + uint32_t m_queueIndex = 0; + + Result init(DeviceImpl* device, uint32_t queueIndex); + ~CommandQueueImpl(); + virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override; + + virtual SLANG_NO_THROW void SLANG_MCALL executeCommandBuffers( + uint32_t count, + ICommandBuffer* const* commandBuffers, + IFence* fence, + uint64_t valueToSignal) override; + + virtual SLANG_NO_THROW void SLANG_MCALL waitOnHost() override; + + virtual SLANG_NO_THROW Result SLANG_MCALL waitForFenceValuesOnDevice( + uint32_t fenceCount, IFence** fences, uint64_t* waitValues) override; +}; + +class SwapchainImpl : public D3DSwapchainBase +{ +public: + ComPtr<ID3D12CommandQueue> m_queue; + ComPtr<IDXGIFactory> m_dxgiFactory; + ComPtr<IDXGISwapChain3> m_swapChain3; + ComPtr<ID3D12Fence> m_fence; + ShortList<HANDLE, kMaxNumRenderFrames> m_frameEvents; + uint64_t fenceValue = 0; + Result init(DeviceImpl* renderer, const ISwapchain::Desc& swapchainDesc, WindowHandle window); + + virtual SLANG_NO_THROW Result SLANG_MCALL resize(uint32_t width, uint32_t height) override; + + virtual void createSwapchainBufferImages() override; + virtual IDXGIFactory* getDXGIFactory() override { return m_dxgiFactory; } + virtual IUnknown* getOwningDevice() override { return m_queue; } + virtual SLANG_NO_THROW int SLANG_MCALL acquireNextImage() override; + virtual SLANG_NO_THROW Result SLANG_MCALL present() override; + virtual SLANG_NO_THROW bool SLANG_MCALL isOccluded() override; + virtual SLANG_NO_THROW Result SLANG_MCALL setFullScreenMode(bool mode) override; +}; + +#if SLANG_GFX_HAS_DXR_SUPPORT + +class AccelerationStructureImpl + : public AccelerationStructureBase + , public ResourceViewInternalImpl +{ +public: + RefPtr<BufferResourceImpl> m_buffer; + uint64_t m_offset; + uint64_t m_size; + ComPtr<ID3D12Device5> m_device5; + +public: + virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() override; + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override; +}; -SlangResult SLANG_MCALL createD3D12Device(const IDevice::Desc* desc, IDevice** outDevice); +#endif -} // gfx +} // namespace d3d12 +} // namespace gfx diff --git a/tools/gfx/render.cpp b/tools/gfx/render.cpp index 2125ed47f..3cb19f205 100644 --- a/tools/gfx/render.cpp +++ b/tools/gfx/render.cpp @@ -1,9 +1,7 @@ // render.cpp #include "renderer-shared.h" #include "../../source/core/slang-math.h" - #include "d3d11/render-d3d11.h" -#include "d3d12/render-d3d12.h" #include "open-gl/render-gl.h" #include "vulkan/render-vk.h" #include "cuda/render-cuda.h" @@ -15,6 +13,8 @@ namespace gfx { using namespace Slang; +Result SLANG_MCALL createD3D12Device(const IDevice::Desc* desc, IDevice** outDevice); + static bool debugLayerEnabled = false; /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Global Renderer Functions !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ diff --git a/tools/gfx/renderer-shared.h b/tools/gfx/renderer-shared.h index 5dc4da59a..24d51c87a 100644 --- a/tools/gfx/renderer-shared.h +++ b/tools/gfx/renderer-shared.h @@ -1327,13 +1327,13 @@ protected: virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc); protected: Slang::List<Slang::String> m_features; - Slang::ComPtr<IPipelineCreationAPIDispatcher> m_pipelineCreationAPIDispatcher; public: SlangContext slangContext; ShaderCache shaderCache; Slang::Dictionary<slang::TypeLayoutReflection*, Slang::RefPtr<ShaderObjectLayoutBase>> m_shaderObjectLayoutCache; + Slang::ComPtr<IPipelineCreationAPIDispatcher> m_pipelineCreationAPIDispatcher; }; bool isDepthFormat(Format format); diff --git a/tools/gfx/vulkan/render-vk.cpp b/tools/gfx/vulkan/render-vk.cpp index 5f3c8f8df..6f4bbb2e5 100644 --- a/tools/gfx/vulkan/render-vk.cpp +++ b/tools/gfx/vulkan/render-vk.cpp @@ -2942,7 +2942,7 @@ public: pipeline = VK_NULL_HANDLE; } - static void _uploadBufferData( + static void uploadBufferData( VkCommandBuffer commandBuffer, TransientResourceHeapImpl* transientHeap, BufferResourceImpl* buffer, @@ -2986,7 +2986,7 @@ public: void uploadBufferDataImpl(IBufferResource* buffer, size_t offset, size_t size, void* data) { m_vkPreCommandBuffer = m_commandBuffer->getPreCommandBuffer(); - _uploadBufferData( + uploadBufferData( m_vkPreCommandBuffer, m_commandBuffer->m_transientHeap.get(), static_cast<BufferResourceImpl*>(buffer), @@ -4816,7 +4816,7 @@ public: virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferData( IBufferResource* buffer, size_t offset, size_t size, void* data) override { - PipelineCommandEncoder::_uploadBufferData( + PipelineCommandEncoder::uploadBufferData( m_commandBuffer->m_commandBuffer, m_commandBuffer->m_transientHeap.get(), static_cast<BufferResourceImpl*>(buffer), |
