From 727c7d2b824913b3ae263243421ea79ca4940eb8 Mon Sep 17 00:00:00 2001 From: Yong He Date: Wed, 9 Mar 2022 11:32:23 -0800 Subject: gfx: restructure render-d3d12.cpp (#2154) * Vulkan: deferred shader compilation and pipeline creation. * Fix 32bit build. * gfx: restructure the code in render-d3d12.cpp * Move `Submitter`. * Fix. * merge with master. * Revert dictionary change in previous PR. Co-authored-by: Yong He --- tools/gfx/d3d11/render-d3d11.cpp | 14 +- tools/gfx/d3d12/render-d3d12.cpp | 14183 +++++++++++++++++-------------------- tools/gfx/d3d12/render-d3d12.h | 1792 ++++- tools/gfx/render.cpp | 4 +- tools/gfx/renderer-shared.h | 2 +- tools/gfx/vulkan/render-vk.cpp | 6 +- 6 files changed, 8306 insertions(+), 7695 deletions(-) (limited to 'tools') diff --git a/tools/gfx/d3d11/render-d3d11.cpp b/tools/gfx/d3d11/render-d3d11.cpp index d7a918a3b..aecadcf46 100644 --- a/tools/gfx/d3d11/render-d3d11.cpp +++ b/tools/gfx/d3d11/render-d3d11.cpp @@ -2071,7 +2071,7 @@ SlangResult SLANG_MCALL createD3D11Device(const IDevice::Desc* desc, IDevice** o return SLANG_OK; } -static void _initSrvDesc(IResource::Type resourceType, const ITextureResource::Desc& textureDesc, DXGI_FORMAT pixelFormat, D3D11_SHADER_RESOURCE_VIEW_DESC& descOut) +static void initSrvDesc(IResource::Type resourceType, const ITextureResource::Desc& textureDesc, DXGI_FORMAT pixelFormat, D3D11_SHADER_RESOURCE_VIEW_DESC& descOut) { // create SRV descOut = D3D11_SHADER_RESOURCE_VIEW_DESC(); @@ -2170,7 +2170,7 @@ D3D11Device::ScopeNVAPI::~ScopeNVAPI() // !!!!!!!!!!!!!!!!!!!!!!!!!!!! Renderer interface !!!!!!!!!!!!!!!!!!!!!!!!!! -static bool _isSupportedNVAPIOp(IUnknown* dev, uint32_t op) +static bool isSupportedNVAPIOp(IUnknown* dev, uint32_t op) { #ifdef GFX_NVAPI { @@ -2347,11 +2347,11 @@ SlangResult D3D11Device::initialize(const Desc& desc) return SLANG_E_NOT_AVAILABLE; } - if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC )) + if (isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC )) { m_features.add("atomic-int64"); } - if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC)) + if (isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC)) { m_features.add("atomic-float"); } @@ -2548,7 +2548,7 @@ SlangResult D3D11Device::readTextureResource( } } -static D3D11_BIND_FLAG _calcResourceFlag(ResourceState state) +static D3D11_BIND_FLAG calcResourceFlag(ResourceState state) { switch (state) { @@ -2581,7 +2581,7 @@ static int _calcResourceBindFlags(ResourceStateSet allowedStates) { auto state = (ResourceState)i; if (allowedStates.contains(state)) - dstFlags |= _calcResourceFlag(state); + dstFlags |= calcResourceFlag(state); } return dstFlags; } @@ -3000,7 +3000,7 @@ Result D3D11Device::createTextureView(ITextureResource* texture, IResourceView:: case IResourceView::Type::ShaderResource: { D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc; - _initSrvDesc(resourceImpl->getType(), *resourceImpl->getDesc(), D3DUtil::getMapFormat(desc.format), srvDesc); + initSrvDesc(resourceImpl->getType(), *resourceImpl->getDesc(), D3DUtil::getMapFormat(desc.format), srvDesc); ComPtr srv; SLANG_RETURN_ON_FAIL(m_device->CreateShaderResourceView(resourceImpl->m_resource, &srvDesc, srv.writeRef())); diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index 0fc3b2874..1e73caf44 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -1,5511 +1,2789 @@ // render-d3d12.cpp -#define _CRT_SECURE_NO_WARNINGS - #include "render-d3d12.h" -//WORKING:#include "options.h" -#include "../renderer-shared.h" -#include "../transient-resource-heap-base.h" -#include "../simple-render-pass-layout.h" -#include "../d3d/d3d-swapchain.h" -#include "../mutable-shader-object.h" -#include "../command-encoder-com-forward.h" -#include "core/slang-blob.h" -#include "core/slang-basic.h" -#include "core/slang-chunked-list.h" - -// In order to use the Slang API, we need to include its header - -//WORKING:#include - -// We will be rendering with Direct3D 12, so we need to include -// the Windows and D3D12 headers - -#define WIN32_LEAN_AND_MEAN -#define NOMINMAX -#include -#undef WIN32_LEAN_AND_MEAN -#undef NOMINMAX - -#include -#include -//#include - -#ifndef __ID3D12GraphicsCommandList1_FWD_DEFINED__ -// If can't find a definition of CommandList1, just use an empty definition -struct ID3D12GraphicsCommandList1 {}; -#endif - #ifdef GFX_NVAPI -# include "../nvapi/nvapi-include.h" +# include "../nvapi/nvapi-include.h" #endif -#include "slang-com-ptr.h" -#include "../flag-combiner.h" - -#include "resource-d3d12.h" -#include "descriptor-heap-d3d12.h" - #include "../d3d/d3d-util.h" - +#include "../flag-combiner.h" #include "../nvapi/nvapi-util.h" - -// We will use the C standard library just for printing error messages. +#include "slang-com-ptr.h" #include -#ifdef _MSC_VER -#include -#if (_MSC_VER < 1900) -#define snprintf sprintf_s -#endif -#endif -// - #ifdef _DEBUG -#define ENABLE_DEBUG_LAYER 1 +# define ENABLE_DEBUG_LAYER 1 #else -#define ENABLE_DEBUG_LAYER 0 +# define ENABLE_DEBUG_LAYER 0 #endif -namespace gfx { - +namespace gfx +{ using namespace Slang; -// Define function pointer types for PIX library. -typedef HRESULT(WINAPI* PFN_BeginEventOnCommandList)( - ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString); -typedef HRESULT(WINAPI* PFN_EndEventOnCommandList)(ID3D12GraphicsCommandList* commandList); - -class D3D12Device : public RendererBase -{ -public: - // Renderer implementation - virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; - virtual SLANG_NO_THROW Result SLANG_MCALL - getFormatSupportedResourceStates(Format format, ResourceStateSet* outStates) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap( - const ITransientResourceHeap::Desc& desc, - ITransientResourceHeap** outHeap) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( - const ISwapchain::Desc& desc, - WindowHandle window, - ISwapchain** outSwapchain) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL getTextureAllocationInfo( - const ITextureResource::Desc& desc, size_t* outSize, size_t* outAlignment) override; - virtual SLANG_NO_THROW Result SLANG_MCALL getTextureRowAlignment(size_t* outAlignment) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource( - const ITextureResource::Desc& desc, - const ITextureResource::SubresourceData* initData, - ITextureResource** outResource) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createTextureFromNativeHandle( - InteropHandle handle, - const ITextureResource::Desc& srcDesc, - ITextureResource** outResource) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createBufferResource( - const IBufferResource::Desc& desc, - const void* initData, - IBufferResource** outResource) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createBufferFromNativeHandle( - InteropHandle handle, - const IBufferResource::Desc& srcDesc, - IBufferResource** outResource) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView( - ITextureResource* texture, - IResourceView::Desc const& desc, - IResourceView** outView) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createBufferView( - IBufferResource* buffer, - IBufferResource* counterBuffer, - IResourceView::Desc const& desc, - IResourceView** outView) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFrameBuffer) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createFramebufferLayout(IFramebufferLayout::Desc const& desc, IFramebufferLayout** outLayout) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( - const IRenderPassLayout::Desc& desc, - IRenderPassLayout** outRenderPassLayout) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL createInputLayout( - IInputLayout::Desc const& desc, - IInputLayout** outLayout) override; - - virtual Result createShaderObjectLayout( - slang::TypeLayoutReflection* typeLayout, - ShaderObjectLayoutBase** outLayout) override; - virtual Result createShaderObject(ShaderObjectLayoutBase* layout, IShaderObject** outObject) - override; - virtual Result createMutableShaderObject( - ShaderObjectLayoutBase* layout, IShaderObject** outObject) override; - virtual SLANG_NO_THROW Result SLANG_MCALL - createMutableRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createShaderTable(const IShaderTable::Desc& desc, IShaderTable** outShaderTable) override; - virtual SLANG_NO_THROW Result SLANG_MCALL - createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram, ISlangBlob** outDiagnostics) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState( - const GraphicsPipelineStateDesc& desc, IPipelineState** outState) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState( - const ComputePipelineStateDesc& desc, IPipelineState** outState) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL createQueryPool( - const IQueryPool::Desc& desc, IQueryPool** outState) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL - createFence(const IFence::Desc& desc, IFence** outFence) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL waitForFences( - uint32_t fenceCount, - IFence** fences, - uint64_t* fenceValues, - bool waitForAll, - uint64_t timeout) override; - - virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( - ITextureResource* resource, - ResourceState state, - ISlangBlob** outBlob, - size_t* outRowPitch, - size_t* outPixelSize) override; - - virtual SLANG_NO_THROW SlangResult SLANG_MCALL readBufferResource( - IBufferResource* resource, - size_t offset, - size_t size, - ISlangBlob** outBlob) override; - - virtual SLANG_NO_THROW const DeviceInfo& SLANG_MCALL getDeviceInfo() const override - { - return m_info; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeDeviceHandles(InteropHandles* outHandles) override; - - ~D3D12Device(); +namespace d3d12 +{ -#if SLANG_GFX_HAS_DXR_SUPPORT - virtual SLANG_NO_THROW Result SLANG_MCALL getAccelerationStructurePrebuildInfo( - const IAccelerationStructure::BuildInputs& buildInputs, - IAccelerationStructure::PrebuildInfo* outPrebuildInfo) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createAccelerationStructure( - const IAccelerationStructure::CreateDesc& desc, - IAccelerationStructure** outView) override; - virtual SLANG_NO_THROW Result SLANG_MCALL createRayTracingPipelineState( - const RayTracingPipelineStateDesc& desc, IPipelineState** outState) override; +namespace +{ +bool isSupportedNVAPIOp(ID3D12Device* dev, uint32_t op) +{ +#ifdef GFX_NVAPI + { + bool isSupported; + NvAPI_Status status = + NvAPI_D3D12_IsNvShaderExtnOpCodeSupported(dev, NvU32(op), &isSupported); + return status == NVAPI_OK && isSupported; + } +#else + return false; #endif +} -public: - - static const Int kMaxNumRenderFrames = 4; - static const Int kMaxNumRenderTargets = 3; +D3D12_RESOURCE_FLAGS calcResourceFlag(ResourceState state) +{ + switch (state) + { + case ResourceState::RenderTarget: + return D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + case ResourceState::DepthRead: + case ResourceState::DepthWrite: + return D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + case ResourceState::UnorderedAccess: + case ResourceState::AccelerationStructure: + return D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + default: + return D3D12_RESOURCE_FLAG_NONE; + } +} - static const Int kMaxRTVCount = 8; - static const Int kMaxDescriptorSetCount = 16; +D3D12_RESOURCE_FLAGS calcResourceFlags(ResourceStateSet states) +{ + int dstFlags = 0; + for (uint32_t i = 0; i < (uint32_t)ResourceState::_Count; i++) + { + auto state = (ResourceState)i; + if (states.contains(state)) + dstFlags |= calcResourceFlag(state); + } + return (D3D12_RESOURCE_FLAGS)dstFlags; +} - struct DeviceInfo +D3D12_RESOURCE_DIMENSION calcResourceDimension(IResource::Type type) +{ + switch (type) { - void clear() + case IResource::Type::Buffer: + return D3D12_RESOURCE_DIMENSION_BUFFER; + case IResource::Type::Texture1D: + return D3D12_RESOURCE_DIMENSION_TEXTURE1D; + case IResource::Type::TextureCube: + case IResource::Type::Texture2D: { - m_dxgiFactory.setNull(); - m_device.setNull(); - m_adapter.setNull(); - m_desc = {}; - m_desc1 = {}; - m_isWarp = false; - m_isSoftware = false; + return D3D12_RESOURCE_DIMENSION_TEXTURE2D; } + case IResource::Type::Texture3D: + return D3D12_RESOURCE_DIMENSION_TEXTURE3D; + default: + return D3D12_RESOURCE_DIMENSION_UNKNOWN; + } +} - bool m_isWarp; - bool m_isSoftware; - ComPtr m_dxgiFactory; - ComPtr m_device; - ComPtr m_device5; - ComPtr m_adapter; - DXGI_ADAPTER_DESC m_desc; - DXGI_ADAPTER_DESC1 m_desc1; - }; +DXGI_FORMAT getTypelessFormatFromDepthFormat(Format format) +{ + switch (format) + { + case Format::D16_UNORM: + return DXGI_FORMAT_R16_TYPELESS; + case Format::D32_FLOAT: + return DXGI_FORMAT_R32_TYPELESS; + default: + return D3DUtil::getMapFormat(format); + } +} - struct Submitter +bool isTypelessDepthFormat(DXGI_FORMAT format) +{ + switch (format) { - virtual void setRootConstantBufferView(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; - virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; - virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; - virtual void setRootDescriptorTable(int index, D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor) = 0; - virtual void setRootSignature(ID3D12RootSignature* rootSignature) = 0; - virtual void setRootConstants(Index rootParamIndex, Index dstOffsetIn32BitValues, Index countOf32BitValues, void const* srcData) = 0; - virtual void setPipelineState(PipelineStateBase* pipelineState) = 0; - }; + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R32_TYPELESS: + return true; + default: + return false; + } +} - class BufferResourceImpl: public gfx::BufferResource +D3D12_FILTER_TYPE translateFilterMode(TextureFilteringMode mode) +{ + switch (mode) { - public: - typedef BufferResource Parent; + default: + return D3D12_FILTER_TYPE(0); - BufferResourceImpl(const Desc& desc) - : Parent(desc) - , m_defaultState(D3DUtil::getResourceState(desc.defaultState)) - { - } +#define CASE(SRC, DST) \ + case TextureFilteringMode::SRC: \ + return D3D12_FILTER_TYPE_##DST - ~BufferResourceImpl() - { - if (sharedHandle.handleValue != 0) - { - CloseHandle((HANDLE)sharedHandle.handleValue); - } - } + CASE(Point, POINT); + CASE(Linear, LINEAR); - D3D12Resource m_resource; ///< The resource in gpu memory, allocated on the correct heap relative to the cpu access flag +#undef CASE + } +} - D3D12_RESOURCE_STATES m_defaultState; +D3D12_FILTER_REDUCTION_TYPE translateFilterReduction(TextureReductionOp op) +{ + switch (op) + { + default: + return D3D12_FILTER_REDUCTION_TYPE(0); - virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() override - { - return (DeviceAddress)m_resource.getResource()->GetGPUVirtualAddress(); - } +#define CASE(SRC, DST) \ + case TextureReductionOp::SRC: \ + return D3D12_FILTER_REDUCTION_TYPE_##DST - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeResourceHandle(InteropHandle* outHandle) override - { - outHandle->handleValue = (uint64_t)m_resource.getResource(); - outHandle->api = InteropHandleAPI::D3D12; - return SLANG_OK; - } + CASE(Average, STANDARD); + CASE(Comparison, COMPARISON); + CASE(Minimum, MINIMUM); + CASE(Maximum, MAXIMUM); - virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override - { - // Check if a shared handle already exists for this resource. - if (sharedHandle.handleValue != 0) - { - *outHandle = sharedHandle; - return SLANG_OK; - } +#undef CASE + } +} - // If a shared handle doesn't exist, create one and store it. - ComPtr pDevice; - auto pResource = m_resource.getResource(); - pResource->GetDevice(IID_PPV_ARGS(pDevice.writeRef())); - SLANG_RETURN_ON_FAIL(pDevice->CreateSharedHandle(pResource, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); - outHandle->api = InteropHandleAPI::D3D12; - sharedHandle = *outHandle; - return SLANG_OK; - } +D3D12_TEXTURE_ADDRESS_MODE translateAddressingMode(TextureAddressingMode mode) +{ + switch (mode) + { + default: + return D3D12_TEXTURE_ADDRESS_MODE(0); - virtual SLANG_NO_THROW Result SLANG_MCALL - map(MemoryRange* rangeToRead, void** outPointer) override - { - D3D12_RANGE range = {}; - if (rangeToRead) - { - range.Begin = (SIZE_T)rangeToRead->offset; - range.End = (SIZE_T)(rangeToRead->offset + rangeToRead->size); - } - SLANG_RETURN_ON_FAIL(m_resource.getResource()->Map(0, rangeToRead ? &range : nullptr, outPointer)); - return SLANG_OK; - } +#define CASE(SRC, DST) \ + case TextureAddressingMode::SRC: \ + return D3D12_TEXTURE_ADDRESS_MODE_##DST - virtual SLANG_NO_THROW Result SLANG_MCALL unmap(MemoryRange* writtenRange) override - { - D3D12_RANGE range = {}; - if (writtenRange) - { - range.Begin = (SIZE_T)writtenRange->offset; - range.End = (SIZE_T)(writtenRange->offset + writtenRange->size); - } - m_resource.getResource()->Unmap(0, writtenRange ? &range : nullptr); - return SLANG_OK; - } + CASE(Wrap, WRAP); + CASE(ClampToEdge, CLAMP); + CASE(ClampToBorder, BORDER); + CASE(MirrorRepeat, MIRROR); + CASE(MirrorOnce, MIRROR_ONCE); - virtual SLANG_NO_THROW Result SLANG_MCALL setDebugName(const char* name) override - { - Parent::setDebugName(name); - m_resource.setDebugName(name); - return SLANG_OK; - } - }; +#undef CASE + } +} - class TextureResourceImpl: public TextureResource +D3D12_COMPARISON_FUNC translateComparisonFunc(ComparisonFunc func) +{ + switch (func) { - public: - typedef TextureResource Parent; - - TextureResourceImpl(const Desc& desc) - : Parent(desc) - , m_defaultState(D3DUtil::getResourceState(desc.defaultState)) - { - } - - ~TextureResourceImpl() - { - if (sharedHandle.handleValue != 0) - { - CloseHandle((HANDLE)sharedHandle.handleValue); - } - } + default: + // TODO: need to report failures + return D3D12_COMPARISON_FUNC_ALWAYS; - D3D12Resource m_resource; - D3D12_RESOURCE_STATES m_defaultState; +#define CASE(FROM, TO) \ + case ComparisonFunc::FROM: \ + return D3D12_COMPARISON_FUNC_##TO - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeResourceHandle(InteropHandle* outHandle) override - { - outHandle->handleValue = (uint64_t)m_resource.getResource(); - outHandle->api = InteropHandleAPI::D3D12; - return SLANG_OK; - } + CASE(Never, NEVER); + CASE(Less, LESS); + CASE(Equal, EQUAL); + CASE(LessEqual, LESS_EQUAL); + CASE(Greater, GREATER); + CASE(NotEqual, NOT_EQUAL); + CASE(GreaterEqual, GREATER_EQUAL); + CASE(Always, ALWAYS); +#undef CASE + } +} - virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override - { - // Check if a shared handle already exists for this resource. - if (sharedHandle.handleValue != 0) - { - *outHandle = sharedHandle; - return SLANG_OK; - } +uint32_t getViewDescriptorCount(const ITransientResourceHeap::Desc& desc) +{ + return Math::Max( + Math::Max( + desc.srvDescriptorCount, + desc.uavDescriptorCount, + desc.accelerationStructureDescriptorCount), + desc.constantBufferDescriptorCount, + 2048u); +} - // If a shared handle doesn't exist, create one and store it. - ComPtr pDevice; - auto pResource = m_resource.getResource(); - pResource->GetDevice(IID_PPV_ARGS(pDevice.writeRef())); - SLANG_RETURN_ON_FAIL(pDevice->CreateSharedHandle(pResource, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); - outHandle->api = InteropHandleAPI::D3D12; - return SLANG_OK; - } +void initSrvDesc( + IResource::Type resourceType, + const ITextureResource::Desc& textureDesc, + const D3D12_RESOURCE_DESC& desc, + DXGI_FORMAT pixelFormat, + SubresourceRange subresourceRange, + D3D12_SHADER_RESOURCE_VIEW_DESC& descOut) +{ + // create SRV + descOut = D3D12_SHADER_RESOURCE_VIEW_DESC(); - virtual SLANG_NO_THROW Result SLANG_MCALL setDebugName(const char* name) override + descOut.Format = (pixelFormat == DXGI_FORMAT_UNKNOWN) + ? D3DUtil::calcFormat(D3DUtil::USAGE_SRV, desc.Format) + : pixelFormat; + descOut.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + if (desc.DepthOrArraySize == 1) + { + switch (desc.Dimension) { - Parent::setDebugName(name); - m_resource.setDebugName(name); - return SLANG_OK; + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: + descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + descOut.Texture1D.MipLevels = subresourceRange.mipLevelCount == 0 + ? desc.MipLevels - subresourceRange.mipLevel + : subresourceRange.mipLevelCount; + descOut.Texture1D.MostDetailedMip = subresourceRange.mipLevel; + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: + descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + descOut.Texture2D.PlaneSlice = + D3DUtil::getPlaneSlice(descOut.Format, subresourceRange.aspectMask); + descOut.Texture2D.ResourceMinLODClamp = 0.0f; + descOut.Texture2D.MipLevels = subresourceRange.mipLevelCount == 0 + ? desc.MipLevels - subresourceRange.mipLevel + : subresourceRange.mipLevelCount; + descOut.Texture2D.MostDetailedMip = subresourceRange.mipLevel; + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: + descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + descOut.Texture3D.MipLevels = subresourceRange.mipLevelCount == 0 + ? desc.MipLevels - subresourceRange.mipLevel + : subresourceRange.mipLevelCount; + descOut.Texture3D.MostDetailedMip = subresourceRange.mipLevel; + break; + default: + assert(!"Unknown dimension"); } - }; - - class SamplerStateImpl : public SamplerStateBase + } + else if (resourceType == IResource::Type::TextureCube) { - public: - D3D12Descriptor m_descriptor; - Slang::RefPtr m_allocator; - ~SamplerStateImpl() + if (textureDesc.arraySize > 1) { - m_allocator->free(m_descriptor); + descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + + descOut.TextureCubeArray.NumCubes = subresourceRange.layerCount == 0 + ? textureDesc.arraySize + : subresourceRange.layerCount / 6; + descOut.TextureCubeArray.First2DArrayFace = subresourceRange.baseArrayLayer; + descOut.TextureCubeArray.MipLevels = subresourceRange.mipLevelCount == 0 + ? desc.MipLevels - subresourceRange.mipLevel + : subresourceRange.mipLevelCount; + descOut.TextureCubeArray.MostDetailedMip = subresourceRange.mipLevel; + descOut.TextureCubeArray.ResourceMinLODClamp = 0; } - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override + else { - outHandle->api = InteropHandleAPI::D3D12CpuDescriptorHandle; - outHandle->handleValue = m_descriptor.cpuHandle.ptr; - return SLANG_OK; - } - }; + descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; - class ResourceViewInternalImpl - { - public: - D3D12Descriptor m_descriptor; - RefPtr m_allocator; - ~ResourceViewInternalImpl() - { - if (m_descriptor.cpuHandle.ptr) - m_allocator->free(m_descriptor); + descOut.TextureCube.MipLevels = subresourceRange.mipLevelCount == 0 + ? desc.MipLevels - subresourceRange.mipLevel + : subresourceRange.mipLevelCount; + descOut.TextureCube.MostDetailedMip = subresourceRange.mipLevel; + descOut.TextureCube.ResourceMinLODClamp = 0; } - }; - - class ResourceViewImpl - : public ResourceViewBase - , public ResourceViewInternalImpl + } + else { - public: - RefPtr m_resource; - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override - { - outHandle->api = InteropHandleAPI::D3D12CpuDescriptorHandle; - outHandle->handleValue = m_descriptor.cpuHandle.ptr; - return SLANG_OK; - } - }; + assert(desc.DepthOrArraySize > 1); - class FramebufferLayoutImpl : public FramebufferLayoutBase - { - public: - ShortList m_renderTargets; - bool m_hasDepthStencil = false; - IFramebufferLayout::AttachmentLayout m_depthStencil; - }; - - class FramebufferImpl : public FramebufferBase - { - public: - ShortList> renderTargetViews; - RefPtr depthStencilView; - ShortList renderTargetDescriptors; - struct Color4f + switch (desc.Dimension) { - float values[4]; - }; - ShortList renderTargetClearValues; - D3D12_CPU_DESCRIPTOR_HANDLE depthStencilDescriptor; - DepthStencilClearValue depthStencilClearValue; - }; + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: + descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + descOut.Texture1D.MostDetailedMip = subresourceRange.mipLevel; + descOut.Texture1D.MipLevels = subresourceRange.mipLevelCount == 0 + ? desc.MipLevels + : subresourceRange.mipLevelCount; + descOut.Texture1DArray.ArraySize = subresourceRange.layerCount == 0 + ? desc.DepthOrArraySize + : subresourceRange.layerCount; + descOut.Texture1DArray.FirstArraySlice = subresourceRange.baseArrayLayer; + descOut.Texture1DArray.ResourceMinLODClamp = 0; + descOut.Texture1DArray.MostDetailedMip = subresourceRange.mipLevel; + descOut.Texture1DArray.MipLevels = subresourceRange.mipLevelCount == 0 + ? desc.MipLevels - subresourceRange.mipLevel + : subresourceRange.mipLevelCount; + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: + descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + descOut.Texture2DArray.ArraySize = subresourceRange.layerCount == 0 + ? desc.DepthOrArraySize + : subresourceRange.layerCount; + descOut.Texture2DArray.FirstArraySlice = subresourceRange.baseArrayLayer; + descOut.Texture2DArray.PlaneSlice = + D3DUtil::getPlaneSlice(descOut.Format, subresourceRange.aspectMask); + descOut.Texture2DArray.ResourceMinLODClamp = 0; + descOut.Texture2DArray.MostDetailedMip = subresourceRange.mipLevel; + descOut.Texture2DArray.MipLevels = subresourceRange.mipLevelCount == 0 + ? desc.MipLevels - subresourceRange.mipLevel + : subresourceRange.mipLevelCount; + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: + descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + descOut.Texture3D.MostDetailedMip = subresourceRange.mipLevel; + descOut.Texture3D.MipLevels = subresourceRange.mipLevelCount == 0 + ? desc.MipLevels + : subresourceRange.mipLevelCount; + break; - class RenderPassLayoutImpl : public SimpleRenderPassLayout - { - public: - RefPtr m_framebufferLayout; - void init(const IRenderPassLayout::Desc& desc) - { - SimpleRenderPassLayout::init(desc); - m_framebufferLayout = static_cast(desc.framebufferLayout); - m_hasDepthStencil = m_framebufferLayout->m_hasDepthStencil; + default: + assert(!"Unknown dimension"); } - }; + } +} - class InputLayoutImpl : public InputLayoutBase - { - public: - List m_elements; - List m_vertexStreamStrides; - List m_text; ///< Holds all strings to keep in scope - }; +Result initTextureResourceDesc( + D3D12_RESOURCE_DESC& resourceDesc, const ITextureResource::Desc& srcDesc) +{ + const DXGI_FORMAT pixelFormat = D3DUtil::getMapFormat(srcDesc.format); + if (pixelFormat == DXGI_FORMAT_UNKNOWN) + { + return SLANG_FAIL; + } + + const int arraySize = calcEffectiveArraySize(srcDesc); - class PipelineStateImpl : public PipelineStateBase + const D3D12_RESOURCE_DIMENSION dimension = calcResourceDimension(srcDesc.type); + if (dimension == D3D12_RESOURCE_DIMENSION_UNKNOWN) { - public: - PipelineStateImpl(D3D12Device* device) - : m_device(device) - {} - D3D12Device* m_device; - ComPtr m_pipelineState; - void init(const GraphicsPipelineStateDesc& inDesc) - { - PipelineStateDesc pipelineDesc; - pipelineDesc.type = PipelineType::Graphics; - pipelineDesc.graphics = inDesc; - initializeBase(pipelineDesc); - } - void init(const ComputePipelineStateDesc& inDesc) - { - PipelineStateDesc pipelineDesc; - pipelineDesc.type = PipelineType::Compute; - pipelineDesc.compute = inDesc; - initializeBase(pipelineDesc); - } - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override - { - SLANG_RETURN_ON_FAIL(ensureAPIPipelineStateCreated()); - outHandle->api = InteropHandleAPI::D3D12; - outHandle->handleValue = reinterpret_cast(m_pipelineState.get()); - return SLANG_OK; - } - virtual Result ensureAPIPipelineStateCreated() override; - }; + return SLANG_FAIL; + } -#if SLANG_GFX_HAS_DXR_SUPPORT - class RayTracingPipelineStateImpl : public PipelineStateBase - { - public: - ComPtr m_stateObject; - D3D12Device* m_device; - RayTracingPipelineStateImpl(D3D12Device* device) - : m_device(device) - {} - void init(const RayTracingPipelineStateDesc& inDesc) - { - PipelineStateDesc pipelineDesc; - pipelineDesc.type = PipelineType::RayTracing; - pipelineDesc.rayTracing.set(inDesc); - initializeBase(pipelineDesc); - } - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override - { - SLANG_RETURN_ON_FAIL(ensureAPIPipelineStateCreated()); - outHandle->api = InteropHandleAPI::D3D12; - outHandle->handleValue = reinterpret_cast(m_stateObject.get()); - return SLANG_OK; - } - virtual Result ensureAPIPipelineStateCreated() override; - }; -#endif + const int numMipMaps = srcDesc.numMipLevels; + resourceDesc.Dimension = dimension; + resourceDesc.Format = pixelFormat; + resourceDesc.Width = srcDesc.size.width; + resourceDesc.Height = srcDesc.size.height; + resourceDesc.DepthOrArraySize = (srcDesc.size.depth > 1) ? srcDesc.size.depth : arraySize; - class QueryPoolImpl : public QueryPoolBase - { - public: - Result init(const IQueryPool::Desc& desc, D3D12Device* device); - - virtual SLANG_NO_THROW Result SLANG_MCALL getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) override - { - m_commandList->Reset(m_commandAllocator, nullptr); - m_commandList->ResolveQueryData( - m_queryHeap, - m_queryType, - (UINT)queryIndex, - (UINT)count, - m_readBackBuffer, - sizeof(uint64_t) * queryIndex); - m_commandList->Close(); - ID3D12CommandList* cmdList = m_commandList; - m_commandQueue->ExecuteCommandLists(1, &cmdList); - m_eventValue++; - m_fence->SetEventOnCompletion(m_eventValue, m_waitEvent); - m_commandQueue->Signal(m_fence, m_eventValue); - WaitForSingleObject(m_waitEvent, INFINITE); - m_commandAllocator->Reset(); - - int8_t* mappedData = nullptr; - D3D12_RANGE readRange = { sizeof(uint64_t) * queryIndex, sizeof(uint64_t) * (queryIndex + count) }; - m_readBackBuffer.getResource()->Map(0, &readRange, (void**)&mappedData); - memcpy(data, mappedData + sizeof(uint64_t) * queryIndex, sizeof(uint64_t) * count); - m_readBackBuffer.getResource()->Unmap(0, nullptr); - return SLANG_OK; - } + resourceDesc.MipLevels = numMipMaps; + resourceDesc.SampleDesc.Count = srcDesc.sampleDesc.numSamples; + resourceDesc.SampleDesc.Quality = srcDesc.sampleDesc.quality; - void writeTimestamp(ID3D12GraphicsCommandList* cmdList, SlangInt index) - { - cmdList->EndQuery(m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, (UINT)index); - } + resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; - public: - D3D12_QUERY_TYPE m_queryType; - ComPtr m_queryHeap; - D3D12Resource m_readBackBuffer; - ComPtr m_commandAllocator; - ComPtr m_commandList; - ComPtr m_fence; - ComPtr m_commandQueue; - HANDLE m_waitEvent; - UINT64 m_eventValue = 0; - }; + resourceDesc.Flags |= calcResourceFlags(srcDesc.allowedStates); - /// Implements the IQueryPool interface with a plain buffer. - /// Used for query types that does not correspond to a D3D query, - /// such as ray-tracing acceleration structure post-build info. - class PlainBufferProxyQueryPoolImpl - : public QueryPoolBase - { - public: - SLANG_COM_OBJECT_IUNKNOWN_ALL - IQueryPool* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IQueryPool) - return static_cast(this); - return nullptr; - } - public: - Result init(const IQueryPool::Desc& desc, D3D12Device* device, uint32_t stride); + resourceDesc.Alignment = 0; - virtual SLANG_NO_THROW Result SLANG_MCALL reset() override - { - m_resultDirty = true; - auto encodeInfo = m_device->encodeResourceCommands(); - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barrier.Transition.pResource = m_bufferResource->m_resource.getResource(); - encodeInfo.d3dCommandList->ResourceBarrier(1, &barrier); - m_device->submitResourceCommandsAndWait(encodeInfo); - return SLANG_OK; - } - virtual SLANG_NO_THROW Result SLANG_MCALL - getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) override - { - if (m_resultDirty) - { - auto encodeInfo = m_device->encodeResourceCommands(); - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier.Transition.pResource = m_bufferResource->m_resource.getResource(); - encodeInfo.d3dCommandList->ResourceBarrier(1, &barrier); - - D3D12Resource stageBuf; - - auto size = (size_t)m_count * m_stride; - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; - - D3D12_RESOURCE_DESC stagingDesc; - _initBufferResourceDesc(size, stagingDesc); - - SLANG_RETURN_ON_FAIL(stageBuf.initCommitted( - m_device->m_device, - heapProps, - D3D12_HEAP_FLAG_NONE, - stagingDesc, - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr)); - - encodeInfo.d3dCommandList->CopyBufferRegion( - stageBuf, - 0, - m_bufferResource->m_resource.getResource(), - 0, - size); - m_device->submitResourceCommandsAndWait(encodeInfo); - void* ptr = nullptr; - stageBuf.getResource()->Map(0, nullptr, &ptr); - m_result.setCount(m_count * m_stride); - memcpy(m_result.getBuffer(), ptr, m_result.getCount()); - - m_resultDirty = false; - } + if (isDepthFormat(srcDesc.format) && + (srcDesc.allowedStates.contains(ResourceState::ShaderResource) || + srcDesc.allowedStates.contains(ResourceState::UnorderedAccess))) + { + resourceDesc.Format = getTypelessFormatFromDepthFormat(srcDesc.format); + } - memcpy(data, m_result.getBuffer() + queryIndex * m_stride, count * m_stride); + return SLANG_OK; +} - return SLANG_OK; - } - public: - QueryType m_queryType; - RefPtr m_bufferResource; - RefPtr m_device; - List m_result; - bool m_resultDirty = true; - uint32_t m_stride = 0; - uint32_t m_count = 0; - }; +void initBufferResourceDesc(size_t bufferSize, D3D12_RESOURCE_DESC& out) +{ + out = {}; + + out.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + out.Alignment = 0; + out.Width = bufferSize; + out.Height = 1; + out.DepthOrArraySize = 1; + out.MipLevels = 1; + out.Format = DXGI_FORMAT_UNKNOWN; + out.SampleDesc.Count = 1; + out.SampleDesc.Quality = 0; + out.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + out.Flags = D3D12_RESOURCE_FLAG_NONE; +} - struct BoundVertexBuffer +Result uploadBufferDataImpl( + ID3D12Device* device, + ID3D12GraphicsCommandList* cmdList, + TransientResourceHeapImpl* transientHeap, + BufferResourceImpl* buffer, + size_t offset, + size_t size, + void* data) +{ + IBufferResource* uploadResource; + size_t uploadResourceOffset = 0; + if (buffer->getDesc()->memoryType != MemoryType::Upload) { - RefPtr m_buffer; - int m_offset; - }; + SLANG_RETURN_ON_FAIL(transientHeap->allocateStagingBuffer( + size, uploadResource, uploadResourceOffset, MemoryType::Upload)); + } - struct GraphicsSubmitter : public Submitter - { - virtual void setRootConstantBufferView(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetGraphicsRootConstantBufferView(index, gpuBufferLocation); - } - virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetGraphicsRootUnorderedAccessView(index, gpuBufferLocation); - } - virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetGraphicsRootShaderResourceView(index, gpuBufferLocation); - } - virtual void setRootDescriptorTable(int index, D3D12_GPU_DESCRIPTOR_HANDLE baseDescriptor) override - { - m_commandList->SetGraphicsRootDescriptorTable(index, baseDescriptor); - } - void setRootSignature(ID3D12RootSignature* rootSignature) - { - m_commandList->SetGraphicsRootSignature(rootSignature); - } - void setRootConstants( - Index rootParamIndex, - Index dstOffsetIn32BitValues, - Index countOf32BitValues, - void const* srcData) override - { - m_commandList->SetGraphicsRoot32BitConstants(UINT(rootParamIndex), UINT(countOf32BitValues), srcData, UINT(dstOffsetIn32BitValues)); - } - virtual void setPipelineState(PipelineStateBase* pipeline) override - { - auto pipelineImpl = static_cast(pipeline); - m_commandList->SetPipelineState(pipelineImpl->m_pipelineState.get()); - } + D3D12Resource& uploadResourceRef = + (buffer->getDesc()->memoryType == MemoryType::Upload) + ? buffer->m_resource + : static_cast(uploadResource)->m_resource; - GraphicsSubmitter(ID3D12GraphicsCommandList* commandList): - m_commandList(commandList) - { - } + D3D12_RANGE readRange = {}; + readRange.Begin = 0; + readRange.End = 0; + void* uploadData; + SLANG_RETURN_ON_FAIL( + uploadResourceRef.getResource()->Map(0, &readRange, reinterpret_cast(&uploadData))); + memcpy((uint8_t*)uploadData + uploadResourceOffset + offset, data, size); + D3D12_RANGE writtenRange = {}; + writtenRange.Begin = uploadResourceOffset + offset; + writtenRange.End = uploadResourceOffset + offset + size; + uploadResourceRef.getResource()->Unmap(0, &writtenRange); + + if (buffer->getDesc()->memoryType != MemoryType::Upload) + { + cmdList->CopyBufferRegion( + buffer->m_resource.getResource(), + offset, + uploadResourceRef.getResource(), + uploadResourceOffset + offset, + size); + } - ID3D12GraphicsCommandList* m_commandList; - }; + return SLANG_OK; +} - struct ComputeSubmitter : public Submitter +Result createNullDescriptor( + ID3D12Device* d3dDevice, + D3D12_CPU_DESCRIPTOR_HANDLE destDescriptor, + const ShaderObjectLayoutImpl::BindingRangeInfo& bindingRange) +{ + switch (bindingRange.bindingType) { - virtual void setRootConstantBufferView(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetComputeRootConstantBufferView(index, gpuBufferLocation); - } - virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetComputeRootUnorderedAccessView(index, gpuBufferLocation); - } - virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override - { - m_commandList->SetComputeRootShaderResourceView(index, gpuBufferLocation); - } - virtual void setRootDescriptorTable(int index, D3D12_GPU_DESCRIPTOR_HANDLE baseDescriptor) override + case slang::BindingType::ConstantBuffer: { - m_commandList->SetComputeRootDescriptorTable(index, baseDescriptor); + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = 0; + cbvDesc.SizeInBytes = 0; + d3dDevice->CreateConstantBufferView(&cbvDesc, destDescriptor); } - void setRootSignature(ID3D12RootSignature* rootSignature) + break; + case slang::BindingType::MutableRawBuffer: { - m_commandList->SetComputeRootSignature(rootSignature); + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; + d3dDevice->CreateUnorderedAccessView(nullptr, nullptr, &uavDesc, destDescriptor); } - void setRootConstants( - Index rootParamIndex, - Index dstOffsetIn32BitValues, - Index countOf32BitValues, - void const* srcData) override + break; + case slang::BindingType::MutableTypedBuffer: { - m_commandList->SetComputeRoot32BitConstants(UINT(rootParamIndex), UINT(countOf32BitValues), srcData, UINT(dstOffsetIn32BitValues)); + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + d3dDevice->CreateUnorderedAccessView(nullptr, nullptr, &uavDesc, destDescriptor); } - virtual void setPipelineState(PipelineStateBase* pipeline) override + break; + case slang::BindingType::RawBuffer: { - auto pipelineImpl = static_cast(pipeline); - m_commandList->SetPipelineState(pipelineImpl->m_pipelineState.get()); + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); } - ComputeSubmitter(ID3D12GraphicsCommandList* commandList) : - m_commandList(commandList) + break; + case slang::BindingType::TypedBuffer: { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); } - - ID3D12GraphicsCommandList* m_commandList; - }; - - static void _initBufferResourceDesc(size_t bufferSize, D3D12_RESOURCE_DESC& out) - { - out = {}; - - out.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - out.Alignment = 0; - out.Width = bufferSize; - out.Height = 1; - out.DepthOrArraySize = 1; - out.MipLevels = 1; - out.Format = DXGI_FORMAT_UNKNOWN; - out.SampleDesc.Count = 1; - out.SampleDesc.Quality = 0; - out.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - out.Flags = D3D12_RESOURCE_FLAG_NONE; - } - - class TransientResourceHeapImpl - : public TransientResourceHeapBaseImpl - , public ID3D12TransientResourceHeap - { - private: - typedef TransientResourceHeapBaseImpl Super; - public: - ComPtr m_commandAllocator; - List> m_d3dCommandListPool; - List> m_commandBufferPool; - uint32_t m_commandListAllocId = 0; - // Wait values for each command queue. - struct QueueWaitInfo - { - uint64_t waitValue; - HANDLE fenceEvent; - ComPtr fence = nullptr; - }; - ShortList m_waitInfos; - - QueueWaitInfo& getQueueWaitInfo(uint32_t queueIndex) + break; + case slang::BindingType::Texture: { - if (queueIndex < (uint32_t)m_waitInfos.getCount()) - { - return m_waitInfos[queueIndex]; - } - auto oldCount = m_waitInfos.getCount(); - m_waitInfos.setCount(queueIndex + 1); - for (auto i = oldCount; i < m_waitInfos.getCount(); i++) + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + switch (bindingRange.resourceShape) { - m_waitInfos[i].waitValue = 0; - m_waitInfos[i].fenceEvent = CreateEventEx( - nullptr, - false, - 0, - EVENT_ALL_ACCESS); + case SLANG_TEXTURE_1D: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + break; + case SLANG_TEXTURE_1D_ARRAY: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + break; + case SLANG_TEXTURE_2D: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + break; + case SLANG_TEXTURE_2D_ARRAY: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + break; + case SLANG_TEXTURE_3D: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + break; + case SLANG_TEXTURE_CUBE: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + break; + case SLANG_TEXTURE_CUBE_ARRAY: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + break; + case SLANG_TEXTURE_2D_MULTISAMPLE: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; + break; + case SLANG_TEXTURE_2D_MULTISAMPLE_ARRAY: + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + break; + default: + return SLANG_OK; } - return m_waitInfos[queueIndex]; + d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); } - // During command submission, we need all the descriptor tables that get - // used to come from a single heap (for each descriptor heap type). - // - // We will thus keep a single heap of each type that we hope will hold - // all the descriptors that actually get needed in a frame. - ShortList m_viewHeaps; // Cbv, Srv, Uav - ShortList m_samplerHeaps; // Heap for samplers - int32_t m_currentViewHeapIndex = -1; - int32_t m_currentSamplerHeapIndex = -1; - bool m_canResize = false; + break; + default: + break; + } + return SLANG_OK; +} +} // namespace + +Result DeviceImpl::createBuffer( + const D3D12_RESOURCE_DESC& resourceDesc, + const void* srcData, + size_t srcDataSize, + D3D12_RESOURCE_STATES finalState, + D3D12Resource& resourceOut, + bool isShared, + MemoryType memoryType) +{ + const size_t bufferSize = size_t(resourceDesc.Width); - uint32_t m_viewHeapSize; - uint32_t m_samplerHeapSize; + D3D12_HEAP_PROPERTIES heapProps; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - D3D12DescriptorHeap& getCurrentViewHeap() { return m_viewHeaps[m_currentViewHeapIndex]; } - D3D12DescriptorHeap& getCurrentSamplerHeap() { return m_samplerHeaps[m_currentSamplerHeapIndex]; } + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE; + if (isShared) + flags |= D3D12_HEAP_FLAG_SHARED; - D3D12LinearExpandingDescriptorHeap m_stagingCpuViewHeap; - D3D12LinearExpandingDescriptorHeap m_stagingCpuSamplerHeap; + D3D12_RESOURCE_DESC desc = resourceDesc; - virtual SLANG_NO_THROW SlangResult SLANG_MCALL - queryInterface(SlangUUID const& uuid, void** outObject) override - { - if (uuid == GfxGUID::IID_ID3D12TransientResourceHeap) - { - *outObject = static_cast(this); - addRef(); - return SLANG_OK; - } - return Super::queryInterface(uuid, outObject); - } + D3D12_RESOURCE_STATES initialState = finalState; - virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return Super::addRef(); } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return Super::release(); } + switch (memoryType) + { + case MemoryType::ReadBack: + assert(!srcData); - virtual SLANG_NO_THROW Result SLANG_MCALL allocateTransientDescriptorTable( - DescriptorType type, - uint32_t count, - uint64_t& outDescriptorOffset, - void** outD3DDescriptorHeapHandle) override - { - auto& heap = (type == DescriptorType::ResourceView) ? getCurrentViewHeap() - : getCurrentSamplerHeap(); - int allocResult = heap.allocate((int)count); - if (allocResult == -1) - { - return SLANG_E_OUT_OF_MEMORY; - } - outDescriptorOffset = (uint64_t)allocResult; - *outD3DDescriptorHeapHandle = heap.getHeap(); - return SLANG_OK; - } - - ~TransientResourceHeapImpl() - { - synchronizeAndReset(); - for (auto& waitInfo : m_waitInfos) - CloseHandle(waitInfo.fenceEvent); - } - - bool canResize() { return m_canResize; } - - Result init( - const ITransientResourceHeap::Desc& desc, - D3D12Device* device, - uint32_t viewHeapSize, - uint32_t samplerHeapSize) - { - Super::init(desc, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, device); - m_canResize = (desc.flags & ITransientResourceHeap::Flags::AllowResizing) != 0; - m_viewHeapSize = viewHeapSize; - m_samplerHeapSize = samplerHeapSize; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + initialState |= D3D12_RESOURCE_STATE_COPY_DEST; - m_stagingCpuViewHeap.init( - device->m_device, - 1000000, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - D3D12_DESCRIPTOR_HEAP_FLAG_NONE); - m_stagingCpuSamplerHeap.init( - device->m_device, - 1000000, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - D3D12_DESCRIPTOR_HEAP_FLAG_NONE); + break; + case MemoryType::Upload: - auto d3dDevice = device->m_device; - SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator( - D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef()))); + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + initialState |= D3D12_RESOURCE_STATE_GENERIC_READ; - allocateNewViewDescriptorHeap(device); - allocateNewSamplerDescriptorHeap(device); + break; + case MemoryType::DeviceLocal: + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + initialState = (srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState); + break; + default: + return SLANG_FAIL; + } - return SLANG_OK; - } + // Create the resource. + SLANG_RETURN_ON_FAIL( + resourceOut.initCommitted(m_device, heapProps, flags, desc, initialState, nullptr)); - Result allocateNewViewDescriptorHeap(D3D12Device* device) - { - auto nextHeapIndex = m_currentViewHeapIndex + 1; - if (nextHeapIndex < m_viewHeaps.getCount()) - { - m_viewHeaps[nextHeapIndex].deallocateAll(); - m_currentViewHeapIndex = nextHeapIndex; - return SLANG_OK; - } - auto d3dDevice = device->m_device; - D3D12DescriptorHeap viewHeap; - SLANG_RETURN_ON_FAIL(viewHeap.init( - d3dDevice, - m_viewHeapSize, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); - m_currentViewHeapIndex = (int32_t)m_viewHeaps.getCount(); - m_viewHeaps.add(_Move(viewHeap)); - return SLANG_OK; - } + if (srcData) + { + D3D12Resource uploadResource; - Result allocateNewSamplerDescriptorHeap(D3D12Device* device) + if (memoryType == MemoryType::DeviceLocal) { - auto nextHeapIndex = m_currentSamplerHeapIndex + 1; - if (nextHeapIndex < m_samplerHeaps.getCount()) - { - m_samplerHeaps[nextHeapIndex].deallocateAll(); - m_currentSamplerHeapIndex = nextHeapIndex; - return SLANG_OK; - } - auto d3dDevice = device->m_device; - D3D12DescriptorHeap samplerHeap; - SLANG_RETURN_ON_FAIL(samplerHeap.init( - d3dDevice, - m_samplerHeapSize, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); - m_currentSamplerHeapIndex = (int32_t)m_samplerHeaps.getCount(); - m_samplerHeaps.add(_Move(samplerHeap)); - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL - createCommandBuffer(ICommandBuffer** outCommandBuffer) override; - - virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override; - }; + // If the buffer is on the default heap, create upload buffer. + D3D12_RESOURCE_DESC uploadDesc(resourceDesc); + uploadDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; - static Result _uploadBufferData( - ID3D12Device* device, - ID3D12GraphicsCommandList* cmdList, - TransientResourceHeapImpl* transientHeap, - BufferResourceImpl* buffer, - size_t offset, - size_t size, - void* data) - { - IBufferResource* uploadResource; - size_t uploadResourceOffset = 0; - if (buffer->getDesc()->memoryType != MemoryType::Upload) - { - SLANG_RETURN_ON_FAIL(transientHeap->allocateStagingBuffer( - size, uploadResource, uploadResourceOffset, MemoryType::Upload)); + SLANG_RETURN_ON_FAIL(uploadResource.initCommitted( + m_device, + heapProps, + D3D12_HEAP_FLAG_NONE, + uploadDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr)); } + // Be careful not to actually copy a resource here. D3D12Resource& uploadResourceRef = - (buffer->getDesc()->memoryType == MemoryType::Upload) - ? buffer->m_resource - : static_cast(uploadResource)->m_resource; - - D3D12_RANGE readRange = {}; - readRange.Begin = 0; - readRange.End = 0; - void* uploadData; - SLANG_RETURN_ON_FAIL(uploadResourceRef.getResource()->Map( - 0, &readRange, reinterpret_cast(&uploadData))); - memcpy((uint8_t*)uploadData + uploadResourceOffset + offset, data, size); - D3D12_RANGE writtenRange = {}; - writtenRange.Begin = uploadResourceOffset + offset; - writtenRange.End = uploadResourceOffset + offset + size; - uploadResourceRef.getResource()->Unmap(0, &writtenRange); - - if (buffer->getDesc()->memoryType != MemoryType::Upload) - { - cmdList->CopyBufferRegion( - buffer->m_resource.getResource(), - offset, - uploadResourceRef.getResource(), - uploadResourceOffset + offset, - size); - } - - return SLANG_OK; - } + (memoryType == MemoryType::DeviceLocal) ? uploadResource : resourceOut; - class CommandBufferImpl; + // Copy data to the intermediate upload heap and then schedule a copy + // from the upload heap to the vertex buffer. + UINT8* dstData; + D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU. - class PipelineCommandEncoder - { - public: - bool m_isOpen = false; - bool m_bindingDirty = true; - CommandBufferImpl* m_commandBuffer; - TransientResourceHeapImpl* m_transientHeap; - D3D12Device* m_renderer; - ID3D12Device* m_device; - ID3D12GraphicsCommandList* m_d3dCmdList; - ID3D12GraphicsCommandList* m_preCmdList = nullptr; - - RefPtr m_currentPipeline; + ID3D12Resource* dxUploadResource = uploadResourceRef.getResource(); - static int getBindPointIndex(PipelineType type) - { - switch (type) - { - case PipelineType::Graphics: - return 0; - case PipelineType::Compute: - return 1; - case PipelineType::RayTracing: - return 2; - default: - assert(!"unknown pipeline type."); - return -1; - } - } + SLANG_RETURN_ON_FAIL( + dxUploadResource->Map(0, &readRange, reinterpret_cast(&dstData))); + ::memcpy(dstData, srcData, srcDataSize); + dxUploadResource->Unmap(0, nullptr); - void init(CommandBufferImpl* commandBuffer) + if (memoryType == MemoryType::DeviceLocal) { - m_commandBuffer = commandBuffer; - m_d3dCmdList = m_commandBuffer->m_cmdList; - m_renderer = commandBuffer->m_renderer; - m_transientHeap = commandBuffer->m_transientHeap; - m_device = commandBuffer->m_renderer->m_device; + auto encodeInfo = encodeResourceCommands(); + encodeInfo.d3dCommandList->CopyBufferRegion( + resourceOut, 0, uploadResourceRef, 0, bufferSize); + submitResourceCommandsAndWait(encodeInfo); } + } - void endEncodingImpl() { m_isOpen = false; } + return SLANG_OK; +} - Result bindPipelineImpl(IPipelineState* pipelineState, IShaderObject** outRootObject) - { - m_currentPipeline = static_cast(pipelineState); - auto rootObject = &m_commandBuffer->m_rootShaderObject; - m_commandBuffer->m_mutableRootShaderObject = nullptr; - SLANG_RETURN_ON_FAIL(rootObject->reset( - m_renderer, - m_currentPipeline->getProgram()->m_rootObjectLayout, - m_commandBuffer->m_transientHeap)); - *outRootObject = rootObject; - m_bindingDirty = true; - return SLANG_OK; - } +Result DeviceImpl::captureTextureToSurface( + TextureResourceImpl* resourceImpl, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) +{ + auto& resource = resourceImpl->m_resource; - Result bindPipelineWithRootObjectImpl(IPipelineState* pipelineState, IShaderObject* rootObject) - { - m_currentPipeline = static_cast(pipelineState); - m_commandBuffer->m_mutableRootShaderObject = static_cast(rootObject); - m_bindingDirty = true; - return SLANG_OK; - } + const D3D12_RESOURCE_STATES initialState = D3DUtil::getResourceState(state); - /// Specializes the pipeline according to current root-object argument values, - /// applys the root object bindings and binds the pipeline state. - /// The newly specialized pipeline is held alive by the pipeline cache so users of - /// `newPipeline` do not need to maintain its lifespan. - Result _bindRenderState(Submitter* submitter, RefPtr& newPipeline); - }; + const ITextureResource::Desc& gfxDesc = *resourceImpl->getDesc(); + const D3D12_RESOURCE_DESC desc = resource.getResource()->GetDesc(); - struct DescriptorTable + // Don't bother supporting MSAA for right now + if (desc.SampleDesc.Count > 1) { - DescriptorHeapReference m_heap; - uint32_t m_offset = 0; - uint32_t m_count = 0; - - SLANG_FORCE_INLINE uint32_t getDescriptorCount() const { return m_count; } + fprintf(stderr, "ERROR: cannot capture multi-sample texture\n"); + return SLANG_FAIL; + } - /// Get the GPU handle at the specified index - SLANG_FORCE_INLINE D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(uint32_t index = 0) const - { - SLANG_ASSERT(index < getDescriptorCount()); - return m_heap.getGpuHandle(m_offset + index); - } + FormatInfo formatInfo; + gfxGetFormatInfo(gfxDesc.format, &formatInfo); + size_t bytesPerPixel = formatInfo.blockSizeInBytes / formatInfo.pixelsPerBlock; + size_t rowPitch = int(desc.Width) * bytesPerPixel; + static const size_t align = 256; // D3D requires minimum 256 byte alignment for texture data. + rowPitch = (rowPitch + align - 1) & ~(align - 1); // Bit trick for rounding up + size_t bufferSize = rowPitch * int(desc.Height); + if (outRowPitch) + *outRowPitch = rowPitch; + if (outPixelSize) + *outPixelSize = bytesPerPixel; - /// Get the CPU handle at the specified index - SLANG_FORCE_INLINE D3D12_CPU_DESCRIPTOR_HANDLE getCpuHandle(uint32_t index = 0) const - { - SLANG_ASSERT(index < getDescriptorCount()); - return m_heap.getCpuHandle(m_offset + index); - } + D3D12Resource stagingResource; + { + D3D12_RESOURCE_DESC stagingDesc; + initBufferResourceDesc(bufferSize, stagingDesc); - void freeIfSupported() - { - if(m_count) - { - m_heap.freeIfSupported(m_offset, m_count); - m_offset = 0; - m_count = 0; - } - } + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - bool allocate(uint32_t count) - { - auto allocatedOffset = m_heap.allocate(count); - if (allocatedOffset == -1) - return false; - m_offset = allocatedOffset; - m_count = count; - return true; - } + SLANG_RETURN_ON_FAIL(stagingResource.initCommitted( + m_device, + heapProps, + D3D12_HEAP_FLAG_NONE, + stagingDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr)); + } - bool allocate(DescriptorHeapReference heap, uint32_t count) - { - auto allocatedOffset = heap.allocate(count); - if (allocatedOffset == -1) - return false; - m_heap = heap; - m_offset = allocatedOffset; - m_count = count; - return true; - } - }; + auto encodeInfo = encodeResourceCommands(); + auto currentState = D3DUtil::getResourceState(state); - /// Contextual data and operations required when binding shader objects to the pipeline state - struct BindingContext { - PipelineCommandEncoder* encoder; - Submitter* submitter; - TransientResourceHeapImpl* transientHeap; - D3D12Device* device; - D3D12_DESCRIPTOR_HEAP_TYPE outOfMemoryHeap; // The type of descriptor heap that is OOM during binding. - }; + D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); + resource.transition(currentState, D3D12_RESOURCE_STATE_COPY_SOURCE, submitter); + } - /// A representation of the offset at which to bind a shader parameter or sub-object - struct BindingOffset + // Do the copy { - // Note: When we actually bind a shader object to the pipeline we do not care about - // HLSL-specific notions like `t` registers and `space`s. Those concepts are all - // mediated by the root signature. - // - // Instead, we need to consider the offsets at which the object will be bound - // into the actual D3D12 API state, which consists of the index of the current - // root parameter to bind from, as well as indices into the current descriptor - // tables (for resource views and samplers). + D3D12_TEXTURE_COPY_LOCATION srcLoc; + srcLoc.pResource = resource; + srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcLoc.SubresourceIndex = 0; - uint32_t rootParam = 0; - uint32_t resource = 0; - uint32_t sampler = 0; + D3D12_TEXTURE_COPY_LOCATION dstLoc; + dstLoc.pResource = stagingResource; + dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstLoc.PlacedFootprint.Offset = 0; + dstLoc.PlacedFootprint.Footprint.Format = desc.Format; + dstLoc.PlacedFootprint.Footprint.Width = UINT(desc.Width); + dstLoc.PlacedFootprint.Footprint.Height = UINT(desc.Height); + dstLoc.PlacedFootprint.Footprint.Depth = 1; + dstLoc.PlacedFootprint.Footprint.RowPitch = UINT(rowPitch); - void operator+=(BindingOffset const& offset) - { - rootParam += offset.rootParam; - resource += offset.resource; - sampler += offset.sampler; - } - }; + encodeInfo.d3dCommandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr); + } - /// A reprsentation of an allocated descriptor set, consisting of an option resource table and an optional sampler table - struct DescriptorSet { - DescriptorTable resourceTable; - DescriptorTable samplerTable; + D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); + resource.transition(D3D12_RESOURCE_STATE_COPY_SOURCE, currentState, submitter); + } - void freeIfSupported() - { - resourceTable.freeIfSupported(); - samplerTable .freeIfSupported(); - } - }; + // Submit the copy, and wait for copy to complete + submitResourceCommandsAndWait(encodeInfo); - // Provides information on how binding ranges are stored in descriptor tables for - // a shader object. - // We allocate one CPU descriptor table for each descriptor heap type for the shader - // object. In `ShaderObjectLayoutImpl`, we store the offset into the descriptor tables - // for each binding, so we know where to write the descriptor when the user sets - // a resource or sampler binding. - class ShaderObjectLayoutImpl : public ShaderObjectLayoutBase { - public: - - /// Information about a single logical binding range - struct BindingRangeInfo - { - // Some of the information we store on binding ranges is redundant with - // the information that Slang's reflection information stores, but having - // it here can make the code more compact and obvious. + ID3D12Resource* dxResource = stagingResource; - /// The type of binding in this range. - slang::BindingType bindingType; + UINT8* data; + D3D12_RANGE readRange = {0, bufferSize}; - /// The shape of the resource - SlangResourceShape resourceShape; + SLANG_RETURN_ON_FAIL(dxResource->Map(0, &readRange, reinterpret_cast(&data))); - /// The number of distinct bindings in this range. - uint32_t count; + RefPtr resultBlob = new Slang::ListBlob(); + resultBlob->m_data.setCount(bufferSize); + memcpy(resultBlob->m_data.getBuffer(), data, bufferSize); + dxResource->Unmap(0, nullptr); + returnComPtr(outBlob, resultBlob); + return SLANG_OK; + } +} - /// A "flat" index for this range in whatever array provides backing storage for it - uint32_t baseIndex; +Result DeviceImpl::getNativeDeviceHandles(InteropHandles* outHandles) +{ + outHandles->handles[0].handleValue = (uint64_t)m_device; + outHandles->handles[0].api = InteropHandleAPI::D3D12; + return SLANG_OK; +} - /// An index into the sub-object array if this binding range is treated - /// as a sub-object. - uint32_t subObjectIndex; +Result DeviceImpl::_createDevice( + DeviceCheckFlags deviceCheckFlags, + const UnownedStringSlice& nameMatch, + D3D_FEATURE_LEVEL featureLevel, + D3D12DeviceInfo& outDeviceInfo) +{ + if (m_dxDebug && (deviceCheckFlags & DeviceCheckFlag::UseDebug)) + { + m_dxDebug->EnableDebugLayer(); + } - bool isRootParameter; - }; + outDeviceInfo.clear(); - /// Offset information for a sub-object range - struct SubObjectRangeOffset : BindingOffset - { - SubObjectRangeOffset() - {} + ComPtr dxgiFactory; + SLANG_RETURN_ON_FAIL(D3DUtil::createFactory(deviceCheckFlags, dxgiFactory)); - SubObjectRangeOffset(slang::VariableLayoutReflection* varLayout) - { - if(auto pendingLayout = varLayout->getPendingDataLayout()) - { - pendingOrdinaryData = (uint32_t) pendingLayout->getOffset(SLANG_PARAMETER_CATEGORY_UNIFORM); - } - } + List> dxgiAdapters; + SLANG_RETURN_ON_FAIL( + D3DUtil::findAdapters(deviceCheckFlags, nameMatch, dxgiFactory, dxgiAdapters)); - /// The offset for "pending" ordinary data related to this range - uint32_t pendingOrdinaryData = 0; - }; + ComPtr device; + ComPtr adapter; - /// Stride information for a sub-object range - struct SubObjectRangeStride : BindingOffset + for (Index i = 0; i < dxgiAdapters.getCount(); ++i) + { + IDXGIAdapter* dxgiAdapter = dxgiAdapters[i]; + if (SLANG_SUCCEEDED( + m_D3D12CreateDevice(dxgiAdapter, featureLevel, IID_PPV_ARGS(device.writeRef())))) { - SubObjectRangeStride() - {} - - SubObjectRangeStride(slang::TypeLayoutReflection* typeLayout) - { - if(auto pendingLayout = typeLayout->getPendingDataTypeLayout()) - { - pendingOrdinaryData = (uint32_t) pendingLayout->getSize(SLANG_PARAMETER_CATEGORY_UNIFORM); - } - } + adapter = dxgiAdapter; + break; + } + } - /// The strid for "pending" ordinary data related to this range - uint32_t pendingOrdinaryData = 0; - }; + if (!device) + { + return SLANG_FAIL; + } - /// Information about a sub-objecrt range - struct SubObjectRangeInfo + if (m_dxDebug && (deviceCheckFlags & DeviceCheckFlag::UseDebug)) + { + ComPtr infoQueue; + if (SLANG_SUCCEEDED(device->QueryInterface(infoQueue.writeRef()))) { - /// The index of the binding range corresponding to this sub-object range - Index bindingRangeIndex = 0; + // Make break + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true); + if (m_extendedDesc.debugBreakOnD3D12Error) + { + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true); + } + D3D12_MESSAGE_ID hideMessages[] = { + D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, + D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, + }; + D3D12_INFO_QUEUE_FILTER f = {}; + f.DenyList.NumIDs = (UINT)SLANG_COUNT_OF(hideMessages); + f.DenyList.pIDList = hideMessages; + infoQueue->AddStorageFilterEntries(&f); - /// Layout information for the type of sub-object expected to be bound, if known - RefPtr layout; + // Apparently there is a problem with sm 6.3 with spurious errors, with debug layer + // enabled + D3D12_FEATURE_DATA_SHADER_MODEL featureShaderModel; + featureShaderModel.HighestShaderModel = D3D_SHADER_MODEL(0x63); + SLANG_SUCCEEDED(device->CheckFeatureSupport( + D3D12_FEATURE_SHADER_MODEL, &featureShaderModel, sizeof(featureShaderModel))); - /// The offset to use when binding the first object in this range - SubObjectRangeOffset offset; + if (featureShaderModel.HighestShaderModel >= D3D_SHADER_MODEL(0x63)) + { + // Filter out any messages that cause issues + // TODO: Remove this when the debug layers work properly + D3D12_MESSAGE_ID messageIds[] = { + // When the debug layer is enabled this error is triggered sometimes after a + // CopyDescriptorsSimple call The failed check validates that the source and + // destination ranges of the copy do not overlap. The check assumes descriptor + // handles are pointers to memory, but this is not always the case and the check + // fails (even though everything is okay). + D3D12_MESSAGE_ID_COPY_DESCRIPTORS_INVALID_RANGES, + }; - /// Stride between consecutive objects in this range - SubObjectRangeStride stride; - }; + // We filter INFO messages because they are way too many + D3D12_MESSAGE_SEVERITY severities[] = {D3D12_MESSAGE_SEVERITY_INFO}; - struct RootParameterInfo - { - IResourceView::Type type; - }; + D3D12_INFO_QUEUE_FILTER infoQueueFilter = {}; + infoQueueFilter.DenyList.NumSeverities = SLANG_COUNT_OF(severities); + infoQueueFilter.DenyList.pSeverityList = severities; + infoQueueFilter.DenyList.NumIDs = SLANG_COUNT_OF(messageIds); + infoQueueFilter.DenyList.pIDList = messageIds; - static bool isBindingRangeRootParameter( - SlangSession* globalSession, - const char* rootParameterAttributeName, - slang::TypeLayoutReflection* typeLayout, - Index bindingRangeIndex) - { - bool isRootParameter = false; - if (rootParameterAttributeName) - { - if (auto leafVariable = typeLayout->getBindingRangeLeafVariable(bindingRangeIndex)) - { - if (leafVariable->findUserAttributeByName( - globalSession, rootParameterAttributeName)) - { - isRootParameter = true; - } - } + infoQueue->PushStorageFilter(&infoQueueFilter); } - return isRootParameter; } + } - struct Builder - { - public: - Builder(RendererBase* renderer) - : m_renderer(renderer) - {} - - RendererBase* m_renderer; - slang::TypeLayoutReflection* m_elementTypeLayout; - List m_bindingRanges; - List m_subObjectRanges; - List m_rootParamsInfo; - - /// The number of sub-objects (not just sub-object *ranges*) stored in instances of this layout - uint32_t m_subObjectCount = 0; - - /// Counters for the number of root parameters, resources, and samplers in this object itself - BindingOffset m_ownCounts; - - /// Counters for the number of root parameters, resources, and sampler in this object and transitive sub-objects - BindingOffset m_totalCounts; - - /// The number of root parameter consumed by (transitive) sub-objects - uint32_t m_childRootParameterCount = 0; + // Get the descs + { + adapter->GetDesc(&outDeviceInfo.m_desc); - /// The total size in bytes of the ordinary data for this object and transitive sub-object. - uint32_t m_totalOrdinaryDataSize = 0; + // Look up GetDesc1 info + ComPtr adapter1; + if (SLANG_SUCCEEDED(adapter->QueryInterface(adapter1.writeRef()))) + { + adapter1->GetDesc1(&outDeviceInfo.m_desc1); + } + } - /// The container type of this shader object. When `m_containerType` is - /// `StructuredBuffer` or `UnsizedArray`, this shader object represents a collection - /// instead of a single object. - ShaderObjectContainerType m_containerType = ShaderObjectContainerType::None; + // Save other info + outDeviceInfo.m_device = device; + outDeviceInfo.m_dxgiFactory = dxgiFactory; + outDeviceInfo.m_adapter = adapter; + outDeviceInfo.m_isWarp = D3DUtil::isWarp(dxgiFactory, adapter); + const UINT kMicrosoftVendorId = 5140; + outDeviceInfo.m_isSoftware = + outDeviceInfo.m_isWarp || + ((outDeviceInfo.m_desc1.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) != 0) || + outDeviceInfo.m_desc.VendorId == kMicrosoftVendorId; - Result setElementTypeLayout(slang::TypeLayoutReflection* typeLayout) - { - typeLayout = _unwrapParameterGroups(typeLayout, m_containerType); - m_elementTypeLayout = typeLayout; + return SLANG_OK; +} - // If the type contains any ordinary data, then we must reserve a buffer - // descriptor to hold it when binding as a parameter block. - // - m_totalOrdinaryDataSize = (uint32_t) typeLayout->getSize(); - if (m_totalOrdinaryDataSize != 0) - { - m_ownCounts.resource++; - } +Result DeviceImpl::initialize(const Desc& desc) +{ + SLANG_RETURN_ON_FAIL(RendererBase::initialize(desc)); - // We will scan over the reflected Slang binding ranges and add them - // to our array. There are two main things we compute along the way: - // - // * For each binding range we compute a `flatIndex` that can be - // used to identify where the values for the given range begin - // in the flattened arrays (e.g., `m_objects`) and descriptor - // tables that hold the state of a shader object. - // - // * We also update the various counters taht keep track of the number - // of sub-objects, resources, samplers, etc. that are being - // consumed. These counters will contribute to figuring out - // the descriptor table(s) that might be needed to represent - // the object. - // - SlangInt bindingRangeCount = typeLayout->getBindingRangeCount(); - for (SlangInt r = 0; r < bindingRangeCount; ++r) - { - slang::BindingType slangBindingType = typeLayout->getBindingRangeType(r); - uint32_t count = (uint32_t)typeLayout->getBindingRangeBindingCount(r); - slang::TypeLayoutReflection* slangLeafTypeLayout = - typeLayout->getBindingRangeLeafTypeLayout(r); - BindingRangeInfo bindingRangeInfo = {}; - bindingRangeInfo.bindingType = slangBindingType; - bindingRangeInfo.resourceShape = slangLeafTypeLayout->getResourceShape(); - bindingRangeInfo.count = count; - bindingRangeInfo.isRootParameter = isBindingRangeRootParameter( - m_renderer->slangContext.globalSession, - static_cast(m_renderer) - ->m_extendedDesc.rootParameterShaderAttributeName, - typeLayout, - r); - if (bindingRangeInfo.isRootParameter) - { - RootParameterInfo rootInfo = {}; - switch (slangBindingType) - { - case slang::BindingType::RayTracingAccelerationStructure: - rootInfo.type = IResourceView::Type::AccelerationStructure; - break; - case slang::BindingType::RawBuffer: - case slang::BindingType::TypedBuffer: - rootInfo.type = IResourceView::Type::ShaderResource; - break; - case slang::BindingType::MutableRawBuffer: - case slang::BindingType::MutableTypedBuffer: - rootInfo.type = IResourceView::Type::UnorderedAccess; - break; - } - bindingRangeInfo.baseIndex = (uint32_t)m_rootParamsInfo.getCount(); - for (uint32_t i = 0; i < count; i++) - { - m_rootParamsInfo.add(rootInfo); - } - } - else - { - switch (slangBindingType) - { - case slang::BindingType::ConstantBuffer: - case slang::BindingType::ParameterBlock: - case slang::BindingType::ExistentialValue: - bindingRangeInfo.baseIndex = m_subObjectCount; - bindingRangeInfo.subObjectIndex = m_subObjectCount; - m_subObjectCount += count; - break; - case slang::BindingType::RawBuffer: - case slang::BindingType::MutableRawBuffer: - if (slangLeafTypeLayout->getType()->getElementType() != nullptr) - { - // A structured buffer occupies both a resource slot and - // a sub-object slot. - bindingRangeInfo.subObjectIndex = m_subObjectCount; - m_subObjectCount += count; - } - bindingRangeInfo.baseIndex = m_ownCounts.resource; - m_ownCounts.resource += count; - break; - case slang::BindingType::Sampler: - bindingRangeInfo.baseIndex = m_ownCounts.sampler; - m_ownCounts.sampler += count; - break; - - case slang::BindingType::CombinedTextureSampler: - // TODO: support this case... - break; - - case slang::BindingType::VaryingInput: - case slang::BindingType::VaryingOutput: - break; - - default: - bindingRangeInfo.baseIndex = m_ownCounts.resource; - m_ownCounts.resource += count; - break; - } - } - m_bindingRanges.add(bindingRangeInfo); - } + // Find extended desc. + for (uint32_t i = 0; i < desc.extendedDescCount; i++) + { + StructType stype; + memcpy(&stype, desc.extendedDescs[i], sizeof(stype)); + if (stype == StructType::D3D12ExtendedDesc) + { + memcpy(&m_extendedDesc, desc.extendedDescs[i], sizeof(m_extendedDesc)); + } + } - // At this point we've computed the number of resources/samplers that - // the type needs to represent its *own* state, and stored those counts - // in `m_ownCounts`. Next we need to consider any resources/samplers - // and root parameters needed to represent the state of the transitive - // sub-objects of this objet, so that we can compute the total size - // of the object when bound to the pipeline. + // Initialize queue index allocator. + // Support max 32 queues. + m_queueIndexAllocator.initPool(32); - m_totalCounts = m_ownCounts; + // Initialize DeviceInfo + { + m_info.deviceType = DeviceType::DirectX12; + m_info.bindingStyle = BindingStyle::DirectX; + m_info.projectionStyle = ProjectionStyle::DirectX; + m_info.apiName = "Direct3D 12"; + static const float kIdentity[] = {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}; + ::memcpy(m_info.identityProjectionMatrix, kIdentity, sizeof(kIdentity)); + } - SlangInt subObjectRangeCount = typeLayout->getSubObjectRangeCount(); - for (SlangInt r = 0; r < subObjectRangeCount; ++r) - { - SlangInt bindingRangeIndex = typeLayout->getSubObjectRangeBindingRangeIndex(r); - auto slangBindingType = typeLayout->getBindingRangeType(bindingRangeIndex); - auto count = (uint32_t) typeLayout->getBindingRangeBindingCount(bindingRangeIndex); - slang::TypeLayoutReflection* slangLeafTypeLayout = - typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); - - // A sub-object range can either represent a sub-object of a known - // type, like a `ConstantBuffer` or `ParameterBlock` - // (in which case we can pre-compute a layout to use, based on - // the type `Foo`) *or* it can represent a sub-object of some - // existential type (e.g., `IBar`) in which case we cannot - // know the appropraite type/layout of sub-object to allocate. - // - RefPtr subObjectLayout; - if (slangBindingType == slang::BindingType::ExistentialValue) - { - if(auto pendingTypeLayout = slangLeafTypeLayout->getPendingDataTypeLayout()) - { - createForElementType( - m_renderer, - pendingTypeLayout, - subObjectLayout.writeRef()); - } - } - else - { - createForElementType( - m_renderer, - slangLeafTypeLayout->getElementTypeLayout(), - subObjectLayout.writeRef()); - } + // Rather than statically link against D3D, we load it dynamically. - SubObjectRangeInfo subObjectRange; - subObjectRange.bindingRangeIndex = bindingRangeIndex; - subObjectRange.layout = subObjectLayout; + HMODULE d3dModule = LoadLibraryA("d3d12.dll"); + if (!d3dModule) + { + fprintf(stderr, "error: failed load 'd3d12.dll'\n"); + return SLANG_FAIL; + } - // The Slang reflection API stors offset information for sub-object ranges, - // and we care about *some* of that information: in particular, we need - // the offset of sub-objects in terms of uniform/ordinary data for the - // cases where we need to fill in "pending" data in our ordinary buffer. - // - subObjectRange.offset = SubObjectRangeOffset(typeLayout->getSubObjectRangeOffset(r)); - subObjectRange.stride = SubObjectRangeStride(slangLeafTypeLayout); - - // The remaining offset information is computed based on the counters - // we are generating here, which depend only on the in-memory layout - // decisions being made in our implementation. Remember that the - // `register` and `space` values coming from DXBC/DXIL do *not* - // dictate the in-memory layout we use. - // - // Note: One subtle point here is that the `.rootParam` offset we are computing - // here does *not* include any root parameters that would be allocated - // for the parent object type itself (e.g., for descriptor tables - // used if it were bound as a parameter block). The later logic when - // we actually go to bind things will need to apply those offsets. - // - // Note: An even *more* subtle point is that the `.resource` offset - // being computed here *does* include the resource descriptor allocated - // for holding the ordinary data buffer, if any. The implications of - // this for later offset math is subtle. - // - subObjectRange.offset.rootParam = m_childRootParameterCount; - subObjectRange.offset.resource = m_totalCounts.resource; - subObjectRange.offset.sampler = m_totalCounts.sampler; - - // Along with the offset information, we also need to compute the - // "stride" between consecutive sub-objects in the range. The actual - // size/stride of a single object depends on the type of range we - // are dealing with. - // - BindingOffset objectCounts; - switch(slangBindingType) - { - default: - { - // We only treat buffers of interface types as actual sub-object binding range. - auto bindingRangeTypeLayout = - typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); - if (!bindingRangeTypeLayout) - continue; - auto elementType = - typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex) - ->getElementTypeLayout(); - if (!elementType) - continue; - if (elementType->getKind() != slang::TypeReflection::Kind::Interface) - { - continue; - } - } - break; - - case slang::BindingType::ConstantBuffer: - { - SLANG_ASSERT(subObjectLayout); - - // The resource and sampler descriptors of a nested - // constant buffer will "leak" into those of the - // parent type, and we need to account for them - // whenever we allocate storage. - // - objectCounts.resource = subObjectLayout->getTotalResourceDescriptorCount(); - objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); - objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); - } - break; - - case slang::BindingType::ParameterBlock: - { - SLANG_ASSERT(subObjectLayout); - - // In contrast to a constant buffer, a parameter block can hide - // the resource and sampler descriptor allocation it uses (since they - // are allocated into the tables that make up the parameter block. - // - // The only resource usage that leaks into the surrounding context - // is the number of root parameters consumed. - // - objectCounts.rootParam = subObjectRange.layout->getTotalRootTableParameterCount(); - } - break; - - case slang::BindingType::ExistentialValue: - // An unspecialized existential/interface value cannot consume any resources - // as part of the parent object (it needs to fit inside the fixed-size - // represnetation of existential types). - // - // However, if we are statically specializing to a type that doesn't "fit" - // we may need to account for additional information that needs to be - // allocaated. - // - if(subObjectLayout) - { - // The ordinary data for an existential-type value is allocated into - // the same buffer as the parent object, so we only want to consider - // the resource descriptors *other than* the ordinary data buffer. - // - // Otherwise the logic here is identical to the constant buffer case. - // - objectCounts.resource = subObjectLayout->getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer(); - objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); - objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); - - // Note: In the implementation for some other graphics API (e.g., Vulkan) there - // needs to be more work done to handle the fact that "pending" data from - // interface-type sub-objects get allocated to a distinct offset after all the - // "primary" data. We are consciously ignoring that issue here, and the physical - // layout of a shader object into the D3D12 binding state may end up interleaving - // resources/samplers for "primary" and "pending" data. - // - // If this choice ever causes issues, we can revisit the approach here. - - // An interface-type range that includes ordinary data can - // increase the size of the ordinary data buffer we need to - // allocate for the parent object. - // - uint32_t ordinaryDataEnd = subObjectRange.offset.pendingOrdinaryData - + (uint32_t) count * subObjectRange.stride.pendingOrdinaryData; - - if(ordinaryDataEnd > m_totalOrdinaryDataSize) - { - m_totalOrdinaryDataSize = ordinaryDataEnd; - } - } - break; - } + // Get all the dll entry points + m_D3D12SerializeRootSignature = + (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)loadProc(d3dModule, "D3D12SerializeRootSignature"); + if (!m_D3D12SerializeRootSignature) + { + return SLANG_FAIL; + } - // Once we've computed the usage for each object in the range, we can - // easily compute the usage for the entire range. - // - auto rangeResourceCount = count * objectCounts.resource; - auto rangeSamplerCount = count * objectCounts.sampler; - auto rangeRootParamCount = count * objectCounts.rootParam; + HMODULE pixModule = LoadLibraryW(L"WinPixEventRuntime.dll"); + if (pixModule) + { + m_BeginEventOnCommandList = + (PFN_BeginEventOnCommandList)GetProcAddress(pixModule, "PIXBeginEventOnCommandList"); + m_EndEventOnCommandList = + (PFN_EndEventOnCommandList)GetProcAddress(pixModule, "PIXEndEventOnCommandList"); + } - m_totalCounts.resource += rangeResourceCount; - m_totalCounts.sampler += rangeSamplerCount; - m_childRootParameterCount += rangeRootParamCount; +#if ENABLE_DEBUG_LAYER + m_D3D12GetDebugInterface = + (PFN_D3D12_GET_DEBUG_INTERFACE)loadProc(d3dModule, "D3D12GetDebugInterface"); + if (m_D3D12GetDebugInterface) + { + if (SLANG_SUCCEEDED(m_D3D12GetDebugInterface(IID_PPV_ARGS(m_dxDebug.writeRef())))) + { +# if 0 + // Can enable for extra validation. NOTE! That d3d12 warns if you do.... + // D3D12 MESSAGE : Device Debug Layer Startup Options : GPU - Based Validation is enabled(disabled by default). + // This results in new validation not possible during API calls on the CPU, by creating patched shaders that have validation + // added directly to the shader. However, it can slow things down a lot, especially for applications with numerous + // PSOs.Time to see the first render frame may take several minutes. + // [INITIALIZATION MESSAGE #1016: CREATEDEVICE_DEBUG_LAYER_STARTUP_OPTIONS] - m_subObjectRanges.add(subObjectRange); - } + ComPtr debug1; + if (SLANG_SUCCEEDED(m_dxDebug->QueryInterface(debug1.writeRef()))) + { + debug1->SetEnableGPUBasedValidation(true); + } +# endif + } + } +#endif - // Once we have added up the resource usage from all the sub-objects - // we can look at the total number of resources and samplers that - // need to be bound as part of this objects descriptor tables and - // that will allow us to decide whether we need to allocate a root - // parameter for a resource table or not, ans similarly for a - // sampler table. - // - if(m_totalCounts.resource) m_ownCounts.rootParam++; - if(m_totalCounts.sampler) m_ownCounts.rootParam++; + m_D3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)loadProc(d3dModule, "D3D12CreateDevice"); + if (!m_D3D12CreateDevice) + { + return SLANG_FAIL; + } - m_totalCounts.rootParam = m_ownCounts.rootParam + m_childRootParameterCount; + if (desc.existingDeviceHandles.handles[0].handleValue == 0) + { + FlagCombiner combiner; + // TODO: we should probably provide a command-line option + // to override UseDebug of default rather than leave it + // up to each back-end to specify. +#if ENABLE_DEBUG_LAYER + combiner.add( + DeviceCheckFlag::UseDebug, ChangeType::OnOff); ///< First try debug then non debug +#else + combiner.add(DeviceCheckFlag::UseDebug, ChangeType::Off); ///< Don't bother with debug +#endif + combiner.add( + DeviceCheckFlag::UseHardwareDevice, + ChangeType::OnOff); ///< First try hardware, then reference - return SLANG_OK; - } + const D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0; - SlangResult build(ShaderObjectLayoutImpl** outLayout) + const int numCombinations = combiner.getNumCombinations(); + for (int i = 0; i < numCombinations; ++i) + { + if (SLANG_SUCCEEDED(_createDevice( + combiner.getCombination(i), + UnownedStringSlice(desc.adapter), + featureLevel, + m_deviceInfo))) { - auto layout = RefPtr(new ShaderObjectLayoutImpl()); - SLANG_RETURN_ON_FAIL(layout->_init(this)); - - returnRefPtrMove(outLayout, layout); - return SLANG_OK; + break; } - }; + } - static Result createForElementType( - RendererBase* renderer, - slang::TypeLayoutReflection* elementType, - ShaderObjectLayoutImpl** outLayout) + if (!m_deviceInfo.m_adapter) { - Builder builder(renderer); - builder.setElementTypeLayout(elementType); - return builder.build(outLayout); + // Couldn't find an adapter + return SLANG_FAIL; } + } + else + { + // Store the existing device handle in desc in m_deviceInfo + m_deviceInfo.m_device = (ID3D12Device*)desc.existingDeviceHandles.handles[0].handleValue; + } - List const& getBindingRanges() { return m_bindingRanges; } - - Index getBindingRangeCount() { return m_bindingRanges.getCount(); } - - BindingRangeInfo const& getBindingRange(Index index) { return m_bindingRanges[index]; } - - uint32_t getResourceSlotCount() { return m_ownCounts.resource; } - uint32_t getSamplerSlotCount() { return m_ownCounts.sampler; } - Index getSubObjectSlotCount() { return m_subObjectCount; } - Index getSubObjectCount() { return m_subObjectCount; } - - uint32_t getTotalResourceDescriptorCount() { return m_totalCounts.resource; } - uint32_t getTotalSamplerDescriptorCount() { return m_totalCounts.sampler; } - - uint32_t getOrdinaryDataBufferCount() { return m_totalOrdinaryDataSize ? 1 : 0; } - bool hasOrdinaryDataBuffer() { return m_totalOrdinaryDataSize != 0; } - - uint32_t getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer() { return m_totalCounts.resource - getOrdinaryDataBufferCount(); } - - uint32_t getOwnUserRootParameterCount() { return (uint32_t)m_rootParamsInfo.getCount(); } - uint32_t getTotalRootTableParameterCount() { return m_totalCounts.rootParam; } - uint32_t getChildRootParameterCount() { return m_childRootParameterCount; } + // Set the device + m_device = m_deviceInfo.m_device; - uint32_t getTotalOrdinaryDataSize() const { return m_totalOrdinaryDataSize; } + if (m_deviceInfo.m_isSoftware) + { + m_features.add("software-device"); + } + else + { + m_features.add("hardware-device"); + } - SubObjectRangeInfo const& getSubObjectRange(Index index) + // NVAPI + if (desc.nvapiExtnSlot >= 0) + { + if (SLANG_FAILED(NVAPIUtil::initialize())) { - return m_subObjectRanges[index]; + return SLANG_E_NOT_AVAILABLE; } - List const& getSubObjectRanges() { return m_subObjectRanges; } - RendererBase* getRenderer() { return m_renderer; } +#ifdef GFX_NVAPI + // From DOCS: Applications are expected to bind null UAV to this slot. + // NOTE! We don't currently do this, but doesn't seem to be a problem. - slang::TypeReflection* getType() { return m_elementTypeLayout->getType(); } + const NvAPI_Status status = + NvAPI_D3D12_SetNvShaderExtnSlotSpace(m_device, NvU32(desc.nvapiExtnSlot), NvU32(0)); - const RootParameterInfo& getRootParameterInfo(Index index) + if (status != NVAPI_OK) { - return m_rootParamsInfo[index]; + return SLANG_E_NOT_AVAILABLE; } - protected: - Result _init(Builder* builder) + if (isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC)) { - auto renderer = builder->m_renderer; - - initBase(renderer, builder->m_elementTypeLayout); - - m_containerType = builder->m_containerType; - - m_bindingRanges = _Move(builder->m_bindingRanges); - m_subObjectRanges = _Move(builder->m_subObjectRanges); - m_rootParamsInfo = _Move(builder->m_rootParamsInfo); - - m_ownCounts = builder->m_ownCounts; - m_totalCounts = builder->m_totalCounts; - m_subObjectCount = builder->m_subObjectCount; - m_childRootParameterCount = builder->m_childRootParameterCount; - m_totalOrdinaryDataSize = builder->m_totalOrdinaryDataSize; - - return SLANG_OK; + m_features.add("atomic-int64"); + } + if (isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC)) + { + m_features.add("atomic-float"); } - List m_bindingRanges; - List m_subObjectRanges; - List m_rootParamsInfo; - - BindingOffset m_ownCounts; - BindingOffset m_totalCounts; - - uint32_t m_subObjectCount = 0; - uint32_t m_childRootParameterCount = 0; + m_nvapi = true; +#endif + } - uint32_t m_totalOrdinaryDataSize = 0; - }; + D3D12_FEATURE_DATA_SHADER_MODEL shaderModelData = {}; + shaderModelData.HighestShaderModel = D3D_SHADER_MODEL_6_6; - class RootShaderObjectLayoutImpl : public ShaderObjectLayoutImpl + // Find what features are supported { - typedef ShaderObjectLayoutImpl Super; - - public: - struct EntryPointInfo - { - RefPtr layout; - BindingOffset offset; - }; + // Check this is how this is laid out... + SLANG_COMPILE_TIME_ASSERT(D3D_SHADER_MODEL_6_0 == 0x60); - struct Builder : Super::Builder { - Builder( - RendererBase* renderer, - slang::IComponentType* program, - slang::ProgramLayout* programLayout) - : Super::Builder(renderer) - , m_program(program) - , m_programLayout(programLayout) - {} - - Result build(RootShaderObjectLayoutImpl** outLayout) - { - RefPtr layout = new RootShaderObjectLayoutImpl(); - SLANG_RETURN_ON_FAIL(layout->_init(this)); - - returnRefPtrMove(outLayout, layout); - return SLANG_OK; - } - - void addGlobalParams(slang::VariableLayoutReflection* globalsLayout) - { - setElementTypeLayout(globalsLayout->getTypeLayout()); - } - - void addEntryPoint(SlangStage stage, ShaderObjectLayoutImpl* entryPointLayout) + // TODO: Currently warp causes a crash when using half, so disable for now + if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport( + D3D12_FEATURE_SHADER_MODEL, &shaderModelData, sizeof(shaderModelData))) && + m_deviceInfo.m_isWarp == false && shaderModelData.HighestShaderModel >= 0x62) { - EntryPointInfo info; - info.layout = entryPointLayout; - - info.offset.resource = m_totalCounts.resource; - info.offset.sampler = m_totalCounts.sampler; - info.offset.rootParam = m_childRootParameterCount; - - m_totalCounts.resource += entryPointLayout->getTotalResourceDescriptorCount(); - m_totalCounts.sampler += entryPointLayout->getTotalSamplerDescriptorCount(); - - // TODO(tfoley): Check this to make sure it is reasonable... - m_childRootParameterCount += entryPointLayout->getChildRootParameterCount(); - - m_entryPoints.add(info); + // With sm_6_2 we have half + m_features.add("half"); } - - slang::IComponentType* m_program; - slang::ProgramLayout* m_programLayout; - List m_entryPoints; - }; - - EntryPointInfo& getEntryPoint(Index index) { return m_entryPoints[index]; } - - List& getEntryPoints() { return m_entryPoints; } - - struct DescriptorSetLayout - { - List m_resourceRanges; - List m_samplerRanges; - uint32_t m_resourceCount = 0; - uint32_t m_samplerCount = 0; - }; - - struct RootSignatureDescBuilder + } { - D3D12Device* m_device; - - RootSignatureDescBuilder(D3D12Device* device) - : m_device(device) - {} - - // We will use one descriptor set for the global scope and one additional - // descriptor set for each `ParameterBlock` binding range in the shader object - // hierarchy, regardless of the shader's `space` indices. - List m_descriptorSets; - List m_rootParameters; - D3D12_ROOT_SIGNATURE_DESC m_rootSignatureDesc = {}; - - static Result translateDescriptorRangeType( - slang::BindingType c, - D3D12_DESCRIPTOR_RANGE_TYPE* outType) - { - switch (c) - { - case slang::BindingType::ConstantBuffer: - *outType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - return SLANG_OK; - case slang::BindingType::RawBuffer: - case slang::BindingType::Texture: - case slang::BindingType::TypedBuffer: - case slang::BindingType::RayTracingAccelerationStructure: - *outType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - return SLANG_OK; - case slang::BindingType::MutableRawBuffer: - case slang::BindingType::MutableTexture: - case slang::BindingType::MutableTypedBuffer: - *outType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - return SLANG_OK; - case slang::BindingType::Sampler: - *outType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - return SLANG_OK; - default: - return SLANG_FAIL; - } - } - - /// Stores offset information to apply to the reflected register/space for a descriptor range. - /// - struct BindingRegisterOffset + D3D12_FEATURE_DATA_D3D12_OPTIONS options; + if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)))) { - uint32_t spaceOffset = 0; // The `space` index as specified in shader. - - enum { kRangeTypeCount = 4 }; - - /// An offset to apply for each D3D12 register class, as given - /// by a `D3D12_DESCRIPTOR_RANGE_TYPE`. - /// - /// Note that the `D3D12_DESCRIPTOR_RANGE_TYPE` enumeration has - /// values between 0 and 3, inclusive. - /// - uint32_t offsetForRangeType[kRangeTypeCount] = {0, 0, 0, 0}; + // Check double precision support + if (options.DoublePrecisionFloatShaderOps) + m_features.add("double"); - uint32_t& operator[](D3D12_DESCRIPTOR_RANGE_TYPE type) + // Check conservative-rasterization support + auto conservativeRasterTier = options.ConservativeRasterizationTier; + if (conservativeRasterTier == D3D12_CONSERVATIVE_RASTERIZATION_TIER_3) { - return offsetForRangeType[int(type)]; + m_features.add("conservative-rasterization-3"); + m_features.add("conservative-rasterization-2"); + m_features.add("conservative-rasterization-1"); } - - uint32_t operator[](D3D12_DESCRIPTOR_RANGE_TYPE type) const + else if (conservativeRasterTier == D3D12_CONSERVATIVE_RASTERIZATION_TIER_2) { - return offsetForRangeType[int(type)]; + m_features.add("conservative-rasterization-2"); + m_features.add("conservative-rasterization-1"); } - - BindingRegisterOffset() - {} - - BindingRegisterOffset(slang::VariableLayoutReflection* varLayout) + else if (conservativeRasterTier == D3D12_CONSERVATIVE_RASTERIZATION_TIER_1) { - if(varLayout) - { - spaceOffset = (UINT) varLayout->getOffset(SLANG_PARAMETER_CATEGORY_REGISTER_SPACE); - offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_CBV] = (UINT) varLayout->getOffset(SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER); - offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_SRV] = (UINT) varLayout->getOffset(SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE); - offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_UAV] = (UINT) varLayout->getOffset(SLANG_PARAMETER_CATEGORY_UNORDERED_ACCESS); - offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER] = (UINT) varLayout->getOffset(SLANG_PARAMETER_CATEGORY_SAMPLER_STATE); - } + m_features.add("conservative-rasterization-1"); } - void operator+=(BindingRegisterOffset const& other) + // Check rasterizer ordered views support + if (options.ROVsSupported) { - spaceOffset += other.spaceOffset; - for(int i = 0; i < kRangeTypeCount; ++i) - { - offsetForRangeType[i] += other.offsetForRangeType[i]; - } + m_features.add("rasterizer-ordered-views"); } - - }; - - struct BindingRegisterOffsetPair + } + } + { + D3D12_FEATURE_DATA_D3D12_OPTIONS2 options; + if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS2, &options, sizeof(options)))) { - BindingRegisterOffset primary; - BindingRegisterOffset pending; - - BindingRegisterOffsetPair() - {} - - BindingRegisterOffsetPair(slang::VariableLayoutReflection* varLayout) - : primary(varLayout) - , pending(varLayout->getPendingDataLayout()) - {} - - void operator+=(BindingRegisterOffsetPair const& other) + // Check programmable sample positions support + switch (options.ProgrammableSamplePositionsTier) { - primary += other.primary; - pending += other.pending; + case D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_2: + m_features.add("programmable-sample-positions-2"); + m_features.add("programmable-sample-positions-1"); + break; + case D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_1: + m_features.add("programmable-sample-positions-1"); + break; + default: + break; } - }; - /// Add a new descriptor set to the layout being computed. - /// - /// Note that a "descriptor set" in the layout may amount to - /// zero, one, or two different descriptor *tables* in the - /// final D3D12 root signature. Each descriptor set may - /// contain zero or more view ranges (CBV/SRV/UAV) and zero - /// or more sampler ranges. It maps to a view descriptor table - /// if the number of view ranges is non-zero and to a sampler - /// descriptor table if the number of sampler ranges is non-zero. - /// - uint32_t addDescriptorSet() - { - auto result = (uint32_t) m_descriptorSets.getCount(); - m_descriptorSets.add(DescriptorSetLayout{}); - return result; } - - Result addDescriptorRange( - Index physicalDescriptorSetIndex, - D3D12_DESCRIPTOR_RANGE_TYPE rangeType, - UINT registerIndex, - UINT spaceIndex, - UINT count, - bool isRootParameter) + } + { + D3D12_FEATURE_DATA_D3D12_OPTIONS3 options; + if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS3, &options, sizeof(options)))) { - if (isRootParameter) - { - D3D12_ROOT_PARAMETER rootParam = {}; - switch (rangeType) - { - case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: - rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; - break; - case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: - rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; - break; - default: - getDebugCallback()->handleMessage( - DebugMessageType::Error, - DebugMessageSource::Layer, - "A shader parameter marked as root parameter is neither SRV nor UAV."); - return SLANG_FAIL; - } - rootParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - rootParam.Descriptor.RegisterSpace = spaceIndex; - rootParam.Descriptor.ShaderRegister = registerIndex; - m_rootParameters.add(rootParam); - return SLANG_OK; - } - - auto& descriptorSet = m_descriptorSets[physicalDescriptorSetIndex]; - - D3D12_DESCRIPTOR_RANGE range = {}; - range.RangeType = rangeType; - range.NumDescriptors = count; - range.BaseShaderRegister = registerIndex; - range.RegisterSpace = spaceIndex; - range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - - if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER) - { - descriptorSet.m_samplerRanges.add(range); - descriptorSet.m_samplerCount += range.NumDescriptors; - } - else + // Check barycentrics support + if (options.BarycentricsSupported) { - descriptorSet.m_resourceRanges.add(range); - descriptorSet.m_resourceCount += range.NumDescriptors; + m_features.add("barycentrics"); } - - return SLANG_OK; } - /// Add one descriptor range as specified in Slang reflection information to the layout. - /// - /// The layout information is taken from `typeLayout` for the descriptor - /// range with the given `descriptorRangeIndex` within the logical - /// descriptor set (reflected by Slang) with the given `logicalDescriptorSetIndex`. - /// - /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of - /// the descriptor set that the range should be added to. - /// - /// The `offset` encodes information about space and/or register offsets that - /// should be applied to descrptor ranges. - /// - /// This operation can fail if the given descriptor range encodes a range that - /// doesn't map to anything directly supported by D3D12. Higher-level routines - /// will often want to ignore such failures. - /// - Result addDescriptorRange( - slang::TypeLayoutReflection* typeLayout, - Index physicalDescriptorSetIndex, - BindingRegisterOffset const& containerOffset, - BindingRegisterOffset const& elementOffset, - Index logicalDescriptorSetIndex, - Index descriptorRangeIndex, - bool isRootParameter) - { - auto bindingType = typeLayout->getDescriptorSetDescriptorRangeType(logicalDescriptorSetIndex, descriptorRangeIndex); - auto count = typeLayout->getDescriptorSetDescriptorRangeDescriptorCount(logicalDescriptorSetIndex, descriptorRangeIndex); - auto index = typeLayout->getDescriptorSetDescriptorRangeIndexOffset(logicalDescriptorSetIndex, descriptorRangeIndex); - auto space = typeLayout->getDescriptorSetSpaceOffset(logicalDescriptorSetIndex); - - D3D12_DESCRIPTOR_RANGE_TYPE rangeType; - SLANG_RETURN_ON_FAIL(translateDescriptorRangeType(bindingType, &rangeType)); - - return addDescriptorRange( - physicalDescriptorSetIndex, - rangeType, - (UINT)index + elementOffset[rangeType], - (UINT)space + containerOffset.spaceOffset, - (UINT)count, - isRootParameter); - } - - /// Add one binding range to the computed layout. - /// - /// The layout information is taken from `typeLayout` for the binding - /// range with the given `bindingRangeIndex`. - /// - /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of - /// the descriptor set that the range should be added to. - /// - /// The `offset` encodes information about space and/or register offsets that - /// should be applied to descrptor ranges. - /// - /// Note that a single binding range may encompass zero or more descriptor ranges. - /// - void addBindingRange( - slang::TypeLayoutReflection* typeLayout, - Index physicalDescriptorSetIndex, - BindingRegisterOffset const& containerOffset, - BindingRegisterOffset const& elementOffset, - Index bindingRangeIndex) + } + // Check ray tracing support + { + D3D12_FEATURE_DATA_D3D12_OPTIONS5 options; + if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS5, &options, sizeof(options)))) { - auto logicalDescriptorSetIndex = typeLayout->getBindingRangeDescriptorSetIndex(bindingRangeIndex); - auto firstDescriptorRangeIndex = typeLayout->getBindingRangeFirstDescriptorRangeIndex(bindingRangeIndex); - Index descriptorRangeCount = typeLayout->getBindingRangeDescriptorRangeCount(bindingRangeIndex); - bool isRootParameter = isBindingRangeRootParameter( - m_device->slangContext.globalSession, - m_device->m_extendedDesc.rootParameterShaderAttributeName, - typeLayout, - bindingRangeIndex); - for( Index i = 0; i < descriptorRangeCount; ++i ) + if (options.RaytracingTier != D3D12_RAYTRACING_TIER_NOT_SUPPORTED) { - auto descriptorRangeIndex = firstDescriptorRangeIndex + i; - - // Note: we ignore the `Result` returned by `addDescriptorRange()` because we - // want to silently skip any ranges that represent kinds of bindings that - // don't actually exist in D3D12. - // - addDescriptorRange( - typeLayout, - physicalDescriptorSetIndex, - containerOffset, - elementOffset, - logicalDescriptorSetIndex, - descriptorRangeIndex, - isRootParameter); + m_features.add("ray-tracing"); } - } - - void addAsValue( - slang::VariableLayoutReflection* varLayout, - Index physicalDescriptorSetIndex) - { - BindingRegisterOffsetPair offset(varLayout); - addAsValue(varLayout->getTypeLayout(), physicalDescriptorSetIndex, offset, offset); - } - - - /// Add binding ranges and parameter blocks to the root signature. - /// - /// The layout information is taken from `typeLayout` which should - /// be a layout for either a program or an entry point. - /// - /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of - /// the descriptor set that binding ranges not belonging to nested - /// parameter blocks should be added to. - /// - /// The `offset` encodes information about space and/or register offsets that - /// should be applied to descrptor ranges. - /// - void addAsConstantBuffer( - slang::TypeLayoutReflection* typeLayout, - Index physicalDescriptorSetIndex, - BindingRegisterOffsetPair const& containerOffset, - BindingRegisterOffsetPair const& elementOffset) - { - if(typeLayout->getSize(SLANG_PARAMETER_CATEGORY_UNIFORM) != 0) + if (options.RaytracingTier >= D3D12_RAYTRACING_TIER_1_1) { - auto descriptorRangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - auto& offsetForRangeType = containerOffset.primary.offsetForRangeType[descriptorRangeType]; - addDescriptorRange( - physicalDescriptorSetIndex, - descriptorRangeType, - offsetForRangeType, - containerOffset.primary.spaceOffset, - 1, - false); + m_features.add("ray-query"); } - - addAsValue(typeLayout, physicalDescriptorSetIndex, containerOffset, elementOffset); } + } + } - void addAsValue( - slang::TypeLayoutReflection* typeLayout, - Index physicalDescriptorSetIndex, - BindingRegisterOffsetPair const& containerOffset, - BindingRegisterOffsetPair const& elementOffset) - { - // Our first task is to add the binding ranges for stuff that is - // directly contained in `typeLayout` rather than via sub-objects. - // - // Our goal is to have the descriptors for directly-contained views/samplers - // always be contiguous in CPU and GPU memory, so that we can write - // to them easily with a single operaiton. - // - Index bindingRangeCount = typeLayout->getBindingRangeCount(); - for (Index bindingRangeIndex = 0; bindingRangeIndex < bindingRangeCount; bindingRangeIndex++) - { - // We will look at the type of each binding range and intentionally - // skip those that represent sub-objects. - // - auto bindingType = typeLayout->getBindingRangeType(bindingRangeIndex); - switch(bindingType) - { - case slang::BindingType::ConstantBuffer: - case slang::BindingType::ParameterBlock: - case slang::BindingType::ExistentialValue: - continue; - - default: - break; - } + m_desc = desc; - // For binding ranges that don't represent sub-objects, we will add - // all of the descriptor ranges they encompass to the root signature. - // - addBindingRange( - typeLayout, - physicalDescriptorSetIndex, - containerOffset.primary, - elementOffset.primary, - bindingRangeIndex); - } + // Create a command queue for internal resource transfer operations. + SLANG_RETURN_ON_FAIL(createCommandQueueImpl(m_resourceCommandQueue.writeRef())); + // `CommandQueueImpl` holds a back reference to `D3D12Device`, make it a weak reference here + // since this object is already owned by `D3D12Device`. + m_resourceCommandQueue->breakStrongReferenceToDevice(); + // Retrieve timestamp frequency. + m_resourceCommandQueue->m_d3dQueue->GetTimestampFrequency(&m_info.timestampFrequency); - // Next we need to recursively include everything bound via sub-objects - Index subObjectRangeCount = typeLayout->getSubObjectRangeCount(); - for (Index subObjectRangeIndex = 0; subObjectRangeIndex < subObjectRangeCount; subObjectRangeIndex++) - { - auto bindingRangeIndex = typeLayout->getSubObjectRangeBindingRangeIndex(subObjectRangeIndex); - auto bindingType = typeLayout->getBindingRangeType(bindingRangeIndex); + SLANG_RETURN_ON_FAIL(createTransientResourceHeapImpl( + ITransientResourceHeap::Flags::AllowResizing, + 0, + 8, + 4, + m_resourceCommandTransientHeap.writeRef())); + // `TransientResourceHeap` holds a back reference to `D3D12Device`, make it a weak reference + // here since this object is already owned by `D3D12Device`. + m_resourceCommandTransientHeap->breakStrongReferenceToDevice(); - auto subObjectTypeLayout = typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); + m_cpuViewHeap = new D3D12GeneralExpandingDescriptorHeap(); + SLANG_RETURN_ON_FAIL(m_cpuViewHeap->init( + m_device, + 1024 * 1024, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); + m_cpuSamplerHeap = new D3D12GeneralExpandingDescriptorHeap(); + SLANG_RETURN_ON_FAIL(m_cpuSamplerHeap->init( + m_device, 2048, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); - BindingRegisterOffsetPair subObjectRangeContainerOffset = containerOffset; - subObjectRangeContainerOffset += BindingRegisterOffsetPair( - typeLayout->getSubObjectRangeOffset(subObjectRangeIndex)); - BindingRegisterOffsetPair subObjectRangeElementOffset = elementOffset; - subObjectRangeElementOffset += BindingRegisterOffsetPair( - typeLayout->getSubObjectRangeOffset(subObjectRangeIndex)); + m_rtvAllocator = new D3D12GeneralExpandingDescriptorHeap(); + SLANG_RETURN_ON_FAIL(m_rtvAllocator->init( + m_device, 16 * 1024, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); + m_dsvAllocator = new D3D12GeneralExpandingDescriptorHeap(); + SLANG_RETURN_ON_FAIL(m_dsvAllocator->init( + m_device, 1024, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); - switch(bindingType) - { - case slang::BindingType::ConstantBuffer: - { - auto containerVarLayout = subObjectTypeLayout->getContainerVarLayout(); - SLANG_ASSERT(containerVarLayout); - - auto elementVarLayout = subObjectTypeLayout->getElementVarLayout(); - SLANG_ASSERT(elementVarLayout); - - auto elementTypeLayout = elementVarLayout->getTypeLayout(); - SLANG_ASSERT(elementTypeLayout); - - BindingRegisterOffsetPair containerOffset = subObjectRangeContainerOffset; - containerOffset += BindingRegisterOffsetPair(containerVarLayout); - - BindingRegisterOffsetPair elementOffset = subObjectRangeElementOffset; - elementOffset += BindingRegisterOffsetPair(elementVarLayout); - - addAsConstantBuffer(elementTypeLayout, physicalDescriptorSetIndex, containerOffset, elementOffset); - } - break; - - case slang::BindingType::ParameterBlock: - { - auto containerVarLayout = subObjectTypeLayout->getContainerVarLayout(); - SLANG_ASSERT(containerVarLayout); - - auto elementVarLayout = subObjectTypeLayout->getElementVarLayout(); - SLANG_ASSERT(elementVarLayout); - - auto elementTypeLayout = elementVarLayout->getTypeLayout(); - SLANG_ASSERT(elementTypeLayout); - - BindingRegisterOffsetPair subDescriptorSetOffset; - subDescriptorSetOffset.primary.spaceOffset = - subObjectRangeElementOffset.primary.spaceOffset; - subDescriptorSetOffset.pending.spaceOffset = - subObjectRangeElementOffset.pending.spaceOffset; - - auto subPhysicalDescriptorSetIndex = addDescriptorSet(); - - BindingRegisterOffsetPair containerOffset = subDescriptorSetOffset; - containerOffset += BindingRegisterOffsetPair(containerVarLayout); - - BindingRegisterOffsetPair elementOffset = subDescriptorSetOffset; - elementOffset += BindingRegisterOffsetPair(elementVarLayout); - - addAsConstantBuffer(elementTypeLayout, subPhysicalDescriptorSetIndex, containerOffset, elementOffset); - } - break; - - case slang::BindingType::ExistentialValue: - { - // Any nested binding ranges in the sub-object will "leak" into the - // binding ranges for the surrounding context. - // - auto specializedTypeLayout = subObjectTypeLayout->getPendingDataTypeLayout(); - if(specializedTypeLayout) - { - BindingRegisterOffsetPair pendingOffset; - pendingOffset.primary = subObjectRangeElementOffset.pending; - - addAsValue( - specializedTypeLayout, - physicalDescriptorSetIndex, - pendingOffset, - pendingOffset); - } - } - break; - } - } + ComPtr dxgiDevice; + if (m_deviceInfo.m_adapter) + { + DXGI_ADAPTER_DESC adapterDesc; + m_deviceInfo.m_adapter->GetDesc(&adapterDesc); + m_adapterName = String::fromWString(adapterDesc.Description); + m_info.adapterName = m_adapterName.begin(); + } -// BindingRegisterOffsetPair pendingOffset; -// pendingOffset.primary = offset.pending; -// addPendingResourceBindingRanges(typeLayout, physicalDescriptorSetIndex, pendingOffset); - } + // Initialize DXR interface. +#if SLANG_GFX_HAS_DXR_SUPPORT + m_device->QueryInterface(m_deviceInfo.m_device5.writeRef()); + m_device5 = m_deviceInfo.m_device5.get(); +#endif + // Check shader model version. + SlangCompileTarget compileTarget = SLANG_DXBC; + const char* profileName = "sm_5_1"; + switch (shaderModelData.HighestShaderModel) + { + case D3D_SHADER_MODEL_5_1: + compileTarget = SLANG_DXBC; + profileName = "sm_5_1"; + break; + case D3D_SHADER_MODEL_6_0: + compileTarget = SLANG_DXIL; + profileName = "sm_6_0"; + break; + case D3D_SHADER_MODEL_6_1: + compileTarget = SLANG_DXIL; + profileName = "sm_6_1"; + break; + case D3D_SHADER_MODEL_6_2: + compileTarget = SLANG_DXIL; + profileName = "sm_6_2"; + break; + case D3D_SHADER_MODEL_6_3: + compileTarget = SLANG_DXIL; + profileName = "sm_6_3"; + break; + case D3D_SHADER_MODEL_6_4: + compileTarget = SLANG_DXIL; + profileName = "sm_6_4"; + break; + case D3D_SHADER_MODEL_6_5: + compileTarget = SLANG_DXIL; + profileName = "sm_6_5"; + break; + default: + compileTarget = SLANG_DXIL; + profileName = "sm_6_6"; + break; + } + m_features.add(profileName); + // If user specified a higher shader model than what the system supports, return failure. + int userSpecifiedShaderModel = D3DUtil::getShaderModelFromProfileName(desc.slang.targetProfile); + if (userSpecifiedShaderModel > shaderModelData.HighestShaderModel) + { + getDebugCallback()->handleMessage( + gfx::DebugMessageType::Error, + gfx::DebugMessageSource::Layer, + "The requested shader model is not supported by the system."); + return SLANG_E_NOT_AVAILABLE; + } + SLANG_RETURN_ON_FAIL(slangContext.initialize( + desc.slang, + compileTarget, + profileName, + makeArray(slang::PreprocessorMacroDesc{"__D3D12__", "1"}).getView())); - D3D12_ROOT_SIGNATURE_DESC& build() - { - for (Index i = 0; i < m_descriptorSets.getCount(); i++) - { - auto& descriptorSet = m_descriptorSets[i]; -// D3D12Device::DescriptorSetInfo setInfo; -// setInfo.resourceDescriptorCount = descriptorSet.m_resourceCount; -// setInfo.samplerDescriptorCount = descriptorSet.m_samplerCount; -// outRootDescriptorSetInfos.add(setInfo); - if (descriptorSet.m_resourceRanges.getCount()) - { - D3D12_ROOT_PARAMETER rootParam = {}; - rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - rootParam.DescriptorTable.NumDescriptorRanges = - (UINT)descriptorSet.m_resourceRanges.getCount(); - rootParam.DescriptorTable.pDescriptorRanges = - descriptorSet.m_resourceRanges.getBuffer(); - m_rootParameters.add(rootParam); - } - if (descriptorSet.m_samplerRanges.getCount()) - { - D3D12_ROOT_PARAMETER rootParam = {}; - rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - rootParam.DescriptorTable.NumDescriptorRanges = - (UINT)descriptorSet.m_samplerRanges.getCount(); - rootParam.DescriptorTable.pDescriptorRanges = - descriptorSet.m_samplerRanges.getBuffer(); - m_rootParameters.add(rootParam); - } - } + // Allocate a D3D12 "command signature" object that matches the behavior + // of a D3D11-style `DrawInstancedIndirect` operation. + { + D3D12_INDIRECT_ARGUMENT_DESC args; + args.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW; - m_rootSignatureDesc.NumParameters = UINT(m_rootParameters.getCount()); - m_rootSignatureDesc.pParameters = m_rootParameters.getBuffer(); + D3D12_COMMAND_SIGNATURE_DESC desc; + desc.ByteStride = sizeof(D3D12_DRAW_ARGUMENTS); + desc.NumArgumentDescs = 1; + desc.pArgumentDescs = &args; + desc.NodeMask = 0; - // TODO: static samplers should be reasonably easy to support... - m_rootSignatureDesc.NumStaticSamplers = 0; - m_rootSignatureDesc.pStaticSamplers = nullptr; + SLANG_RETURN_ON_FAIL(m_device->CreateCommandSignature( + &desc, nullptr, IID_PPV_ARGS(drawIndirectCmdSignature.writeRef()))); + } - // TODO: only set this flag if needed (requires creating root - // signature at same time as pipeline state...). - // - m_rootSignatureDesc.Flags = - D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + // Allocate a D3D12 "command signature" object that matches the behavior + // of a D3D11-style `DrawIndexedInstancedIndirect` operation. + { + D3D12_INDIRECT_ARGUMENT_DESC args; + args.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED; - return m_rootSignatureDesc; - } - }; + D3D12_COMMAND_SIGNATURE_DESC desc; + desc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS); + desc.NumArgumentDescs = 1; + desc.pArgumentDescs = &args; + desc.NodeMask = 0; - static Result createRootSignatureFromSlang( - D3D12Device* device, - RootShaderObjectLayoutImpl* rootLayout, - slang::IComponentType* program, - ID3D12RootSignature** outRootSignature, - ID3DBlob** outError) - { - // We are going to build up the root signature by adding - // binding/descritpor ranges and nested parameter blocks - // based on the computed layout information for `program`. - // - RootSignatureDescBuilder builder(device); - auto layout = program->getLayout(); + SLANG_RETURN_ON_FAIL(m_device->CreateCommandSignature( + &desc, nullptr, IID_PPV_ARGS(drawIndexedIndirectCmdSignature.writeRef()))); + } - // The layout information computed by Slang breaks up shader - // parameters into what we can think of as "logical" descriptor - // sets based on whether or not parameters have the same `space`. - // - // We want to basically ignore that decomposition and generate a - // single descriptor set to hold all top-level parameters, and only - // generate distinct descriptor sets when the shader has opted in - // via explicit parameter blocks. - // - // To achieve this goal, we will manually allocate a default descriptor - // set for root parameters in our signature, and then recursively - // add all the binding/descriptor ranges implied by the global-scope - // parameters. - // - auto rootDescriptorSetIndex = builder.addDescriptorSet(); - builder.addAsValue(layout->getGlobalParamsVarLayout(), rootDescriptorSetIndex); + // Allocate a D3D12 "command signature" object that matches the behavior + // of a D3D11-style `Dispatch` operation. + { + D3D12_INDIRECT_ARGUMENT_DESC args; + args.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH; - for (SlangUInt i = 0; i < layout->getEntryPointCount(); i++) - { - // Entry-point parameters should also be added to the default root - // descriptor set. - // - // We add the parameters using the "variable layout" for the entry point - // and not just its type layout, to ensure that any offset information is - // applied correctly to the `register` and `space` information for entry-point - // parameters. - // - // Note: When we start to support DXR we will need to handle entry-point parameters - // differently because they will need to map to local root signatures rather than - // being included in the global root signature as is being done here. - // - auto entryPoint = layout->getEntryPointByIndex(i); - builder.addAsValue(entryPoint->getVarLayout(), rootDescriptorSetIndex); - } + D3D12_COMMAND_SIGNATURE_DESC desc; + desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS); + desc.NumArgumentDescs = 1; + desc.pArgumentDescs = &args; + desc.NodeMask = 0; - auto& rootSignatureDesc = builder.build(); + SLANG_RETURN_ON_FAIL(m_device->CreateCommandSignature( + &desc, nullptr, IID_PPV_ARGS(dispatchIndirectCmdSignature.writeRef()))); + } + m_isInitialized = true; + return SLANG_OK; +} - ComPtr signature; - ComPtr error; - if (SLANG_FAILED(device->m_D3D12SerializeRootSignature( - &rootSignatureDesc, - D3D_ROOT_SIGNATURE_VERSION_1, - signature.writeRef(), - error.writeRef()))) - { - getDebugCallback()->handleMessage(DebugMessageType::Error, DebugMessageSource::Layer, "error: D3D12SerializeRootSignature failed"); - if (error) - { - getDebugCallback()->handleMessage( - DebugMessageType::Error, - DebugMessageSource::Driver, - (const char*)error->GetBufferPointer()); - if (outError) - returnComPtr(outError, error); - } - return SLANG_FAIL; - } +Result DeviceImpl::createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, ITransientResourceHeap** outHeap) +{ + RefPtr heap; + SLANG_RETURN_ON_FAIL(createTransientResourceHeapImpl( + desc.flags, + desc.constantBufferSize, + getViewDescriptorCount(desc), + Math::Max(1024u, desc.samplerDescriptorCount), + heap.writeRef())); + returnComPtr(outHeap, heap); + return SLANG_OK; +} - SLANG_RETURN_ON_FAIL(device->m_device->CreateRootSignature( - 0, - signature->GetBufferPointer(), - signature->GetBufferSize(), - IID_PPV_ARGS(outRootSignature))); - return SLANG_OK; - } +Result DeviceImpl::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) +{ + RefPtr queue; + SLANG_RETURN_ON_FAIL(createCommandQueueImpl(queue.writeRef())); + returnComPtr(outQueue, queue); + return SLANG_OK; +} - static Result create( - D3D12Device* device, - slang::IComponentType* program, - slang::ProgramLayout* programLayout, - RootShaderObjectLayoutImpl** outLayout, - ID3DBlob** outError) - { - RootShaderObjectLayoutImpl::Builder builder(device, program, programLayout); - builder.addGlobalParams(programLayout->getGlobalParamsVarLayout()); +Result DeviceImpl::createSwapchain( + const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) +{ + RefPtr swapchain = new SwapchainImpl(); + SLANG_RETURN_ON_FAIL(swapchain->init(this, desc, window)); + returnComPtr(outSwapchain, swapchain); + return SLANG_OK; +} - SlangInt entryPointCount = programLayout->getEntryPointCount(); - for (SlangInt e = 0; e < entryPointCount; ++e) - { - auto slangEntryPoint = programLayout->getEntryPointByIndex(e); - RefPtr entryPointLayout; - SLANG_RETURN_ON_FAIL(ShaderObjectLayoutImpl::createForElementType( - device, slangEntryPoint->getTypeLayout(), entryPointLayout.writeRef())); - builder.addEntryPoint(slangEntryPoint->getStage(), entryPointLayout); - } +SlangResult DeviceImpl::readTextureResource( + ITextureResource* resource, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) +{ + return captureTextureToSurface( + static_cast(resource), state, outBlob, outRowPitch, outPixelSize); +} - RefPtr layout; - SLANG_RETURN_ON_FAIL(builder.build(layout.writeRef())); +Result DeviceImpl::getTextureAllocationInfo( + const ITextureResource::Desc& desc, size_t* outSize, size_t* outAlignment) +{ + TextureResource::Desc srcDesc = fixupTextureDesc(desc); + D3D12_RESOURCE_DESC resourceDesc = {}; + initTextureResourceDesc(resourceDesc, srcDesc); + auto allocInfo = m_device->GetResourceAllocationInfo(0, 1, &resourceDesc); + *outSize = (size_t)allocInfo.SizeInBytes; + *outAlignment = (size_t)allocInfo.Alignment; + return SLANG_OK; +} - if (program->getSpecializationParamCount() == 0) - { - // For root object, we would like know the union of all binding slots - // including all sub-objects in the shader-object hierarchy, so at - // parameter binding time we can easily know how many GPU descriptor tables - // to create without walking through the shader-object hierarchy again. - // We build out this array along with root signature construction and store - // it in `m_gpuDescriptorSetInfos`. - SLANG_RETURN_ON_FAIL(createRootSignatureFromSlang( - device, - layout, - program, - layout->m_rootSignature.writeRef(), - outError)); - } +Result DeviceImpl::getTextureRowAlignment(size_t* outAlignment) +{ + *outAlignment = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; + return SLANG_OK; +} - *outLayout = layout.detach(); +Result DeviceImpl::createTextureResource( + const ITextureResource::Desc& descIn, + const ITextureResource::SubresourceData* initData, + ITextureResource** outResource) +{ + // Description of uploading on Dx12 + // https://msdn.microsoft.com/en-us/library/windows/desktop/dn899215%28v=vs.85%29.aspx - return SLANG_OK; - } + TextureResource::Desc srcDesc = fixupTextureDesc(descIn); - slang::IComponentType* getSlangProgram() const { return m_program; } - slang::ProgramLayout* getSlangProgramLayout() const { return m_programLayout; } + D3D12_RESOURCE_DESC resourceDesc = {}; + initTextureResourceDesc(resourceDesc, srcDesc); + const int arraySize = calcEffectiveArraySize(srcDesc); + const int numMipMaps = srcDesc.numMipLevels; - protected: - Result _init(Builder* builder) - { - auto renderer = builder->m_renderer; + RefPtr texture(new TextureResourceImpl(srcDesc)); - SLANG_RETURN_ON_FAIL(Super::_init(builder)); + // Create the target resource + { + D3D12_HEAP_PROPERTIES heapProps; - m_program = builder->m_program; - m_programLayout = builder->m_programLayout; - m_entryPoints = builder->m_entryPoints; - return SLANG_OK; - } + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; + + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE; + if (descIn.isShared) + flags |= D3D12_HEAP_FLAG_SHARED; - ComPtr m_program; - slang::ProgramLayout* m_programLayout = nullptr; + D3D12_CLEAR_VALUE clearValue; + D3D12_CLEAR_VALUE* clearValuePtr = &clearValue; + if ((resourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | + D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) == 0) + { + clearValuePtr = nullptr; + } + if (isTypelessDepthFormat(resourceDesc.Format)) + { + clearValuePtr = nullptr; + } + clearValue.Format = resourceDesc.Format; + memcpy(clearValue.Color, &descIn.optimalClearValue.color, sizeof(clearValue.Color)); + clearValue.DepthStencil.Depth = descIn.optimalClearValue.depthStencil.depth; + clearValue.DepthStencil.Stencil = descIn.optimalClearValue.depthStencil.stencil; + SLANG_RETURN_ON_FAIL(texture->m_resource.initCommitted( + m_device, + heapProps, + flags, + resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + clearValuePtr)); - List m_entryPoints; + texture->m_resource.setDebugName(L"Texture"); + } - public: - ComPtr m_rootSignature; -// List m_gpuDescriptorSetInfos; - }; + // Calculate the layout + List layouts; + layouts.setCount(numMipMaps); + List mipRowSizeInBytes; + mipRowSizeInBytes.setCount(srcDesc.numMipLevels); + List mipNumRows; + mipNumRows.setCount(numMipMaps); - struct ShaderBinary - { - SlangStage stage; - slang::EntryPointReflection* entryPointInfo; - String actualEntryPointNameInAPI; - List code; - }; + // NOTE! This is just the size for one array upload -> not for the whole texture + UInt64 requiredSize = 0; + m_device->GetCopyableFootprints( + &resourceDesc, + 0, + srcDesc.numMipLevels, + 0, + layouts.begin(), + mipNumRows.begin(), + mipRowSizeInBytes.begin(), + &requiredSize); - class ShaderProgramImpl : public ShaderProgramBase + // Sub resource indexing + // https://msdn.microsoft.com/en-us/library/windows/desktop/dn705766(v=vs.85).aspx#subresource_indexing + if (initData) { - public: - RefPtr m_rootObjectLayout; - List m_shaders; + // Create the upload texture + D3D12Resource uploadTexture; - virtual Result createShaderModule( - slang::EntryPointReflection* entryPointInfo, ComPtr kernelCode) override { - ShaderBinary shaderBin; - shaderBin.stage = entryPointInfo->getStage(); - shaderBin.entryPointInfo = entryPointInfo; - shaderBin.code.addRange( - reinterpret_cast(kernelCode->getBufferPointer()), - (Index)kernelCode->getBufferSize()); - m_shaders.add(_Move(shaderBin)); - return SLANG_OK; - } - }; + D3D12_HEAP_PROPERTIES heapProps; - class ShaderObjectImpl - : public ShaderObjectBaseImpl< - ShaderObjectImpl, - ShaderObjectLayoutImpl, - SimpleShaderObjectData> - { - typedef ShaderObjectBaseImpl< - ShaderObjectImpl, - ShaderObjectLayoutImpl, - SimpleShaderObjectData> - Super; - public: - static Result create( - D3D12Device* device, - ShaderObjectLayoutImpl* layout, - ShaderObjectImpl** outShaderObject) - { - auto object = RefPtr(new ShaderObjectImpl()); - SLANG_RETURN_ON_FAIL( - object->init(device, layout, device->m_cpuViewHeap.Ptr(), device->m_cpuSamplerHeap.Ptr())); - returnRefPtrMove(outShaderObject, object); - return SLANG_OK; - } + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - ~ShaderObjectImpl() - { - m_descriptorSet.freeIfSupported(); - } + D3D12_RESOURCE_DESC uploadResourceDesc; - RendererBase* getDevice() { return m_device.get(); } + uploadResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + uploadResourceDesc.Format = DXGI_FORMAT_UNKNOWN; + uploadResourceDesc.Width = requiredSize; + uploadResourceDesc.Height = 1; + uploadResourceDesc.DepthOrArraySize = 1; + uploadResourceDesc.MipLevels = 1; + uploadResourceDesc.SampleDesc.Count = 1; + uploadResourceDesc.SampleDesc.Quality = 0; + uploadResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + uploadResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + uploadResourceDesc.Alignment = 0; - SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() SLANG_OVERRIDE { return 0; } + SLANG_RETURN_ON_FAIL(uploadTexture.initCommitted( + m_device, + heapProps, + D3D12_HEAP_FLAG_NONE, + uploadResourceDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr)); - SLANG_NO_THROW Result SLANG_MCALL getEntryPoint(UInt index, IShaderObject** outEntryPoint) - SLANG_OVERRIDE - { - *outEntryPoint = nullptr; - return SLANG_OK; + uploadTexture.setDebugName(L"TextureUpload"); } + // Get the pointer to the upload resource + ID3D12Resource* uploadResource = uploadTexture; - virtual SLANG_NO_THROW const void* SLANG_MCALL getRawData() override + int subResourceIndex = 0; + for (int arrayIndex = 0; arrayIndex < arraySize; arrayIndex++) { - return m_data.getBuffer(); - } + uint8_t* p; + uploadResource->Map(0, nullptr, reinterpret_cast(&p)); - virtual SLANG_NO_THROW size_t SLANG_MCALL getSize() override - { - return (size_t)m_data.getCount(); - } + for (int j = 0; j < numMipMaps; ++j) + { + auto srcSubresource = initData[subResourceIndex + j]; - SLANG_NO_THROW Result SLANG_MCALL - setData(ShaderOffset const& inOffset, void const* data, size_t inSize) SLANG_OVERRIDE - { - Index offset = inOffset.uniformOffset; - Index size = inSize; + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& layout = layouts[j]; + const D3D12_SUBRESOURCE_FOOTPRINT& footprint = layout.Footprint; - char* dest = m_data.getBuffer(); - Index availableSize = m_data.getCount(); + TextureResource::Size mipSize = calcMipSize(srcDesc.size, j); + if (gfxIsCompressedFormat(descIn.format)) + { + mipSize.width = int(D3DUtil::calcAligned(mipSize.width, 4)); + mipSize.height = int(D3DUtil::calcAligned(mipSize.height, 4)); + } - // TODO: We really should bounds-check access rather than silently ignoring sets - // that are too large, but we have several test cases that set more data than - // an object actually stores on several targets... - // - if (offset < 0) - { - size += offset; - offset = 0; - } - if ((offset + size) >= availableSize) - { - size = availableSize - offset; - } + assert( + footprint.Width == mipSize.width && footprint.Height == mipSize.height && + footprint.Depth == mipSize.depth); - memcpy(dest + offset, data, size); + auto mipRowSize = mipRowSizeInBytes[j]; - m_isConstantBufferDirty = true; + const ptrdiff_t dstMipRowPitch = ptrdiff_t(footprint.RowPitch); + const ptrdiff_t srcMipRowPitch = ptrdiff_t(srcSubresource.strideY); - m_version++; + const ptrdiff_t dstMipLayerPitch = ptrdiff_t(footprint.RowPitch * footprint.Height); + const ptrdiff_t srcMipLayerPitch = ptrdiff_t(srcSubresource.strideZ); - return SLANG_OK; - } + // Our outer loop will copy the depth layers one at a time. + // + const uint8_t* srcLayer = (const uint8_t*)srcSubresource.data; + uint8_t* dstLayer = p + layouts[j].Offset; + for (int l = 0; l < mipSize.depth; l++) + { + // Our inner loop will copy the rows one at a time. + // + const uint8_t* srcRow = srcLayer; + uint8_t* dstRow = dstLayer; + int j = gfxIsCompressedFormat(descIn.format) + ? 4 + : 1; // BC compressed formats are organized into 4x4 blocks + for (int k = 0; k < mipSize.height; k += j) + { + ::memcpy(dstRow, srcRow, (size_t)mipRowSize); - SLANG_NO_THROW Result SLANG_MCALL - setObject(ShaderOffset const& offset, IShaderObject* object) SLANG_OVERRIDE - { - SLANG_RETURN_ON_FAIL(Super::setObject(offset, object)); - if (m_isMutable) - { - auto subObjectIndex = getSubObjectIndex(offset); - if (subObjectIndex >= m_subObjectVersions.getCount()) - m_subObjectVersions.setCount(subObjectIndex + 1); - m_subObjectVersions[subObjectIndex] = - static_cast(object)->m_version; - m_version++; + srcRow += srcMipRowPitch; + dstRow += dstMipRowPitch; + } + + srcLayer += srcMipLayerPitch; + dstLayer += dstMipLayerPitch; + } + + // assert(srcRow == (const uint8_t*)(srcMip.getBuffer() + srcMip.getCount())); } - return SLANG_OK; - } + uploadResource->Unmap(0, nullptr); + + auto encodeInfo = encodeResourceCommands(); + for (int mipIndex = 0; mipIndex < numMipMaps; ++mipIndex) + { + // https://msdn.microsoft.com/en-us/library/windows/desktop/dn903862(v=vs.85).aspx + + D3D12_TEXTURE_COPY_LOCATION src; + src.pResource = uploadTexture; + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src.PlacedFootprint = layouts[mipIndex]; + + D3D12_TEXTURE_COPY_LOCATION dst; + dst.pResource = texture->m_resource; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst.SubresourceIndex = subResourceIndex; + encodeInfo.d3dCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); - SLANG_NO_THROW Result SLANG_MCALL - setResource(ShaderOffset const& offset, IResourceView* resourceView) SLANG_OVERRIDE; + subResourceIndex++; + } - SLANG_NO_THROW Result SLANG_MCALL - setSampler(ShaderOffset const& offset, ISamplerState* sampler) SLANG_OVERRIDE + // Block - waiting for copy to complete (so can drop upload texture) + submitResourceCommandsAndWait(encodeInfo); + } + } + { + auto encodeInfo = encodeResourceCommands(); { - if (offset.bindingRangeIndex < 0) - return SLANG_E_INVALID_ARG; - auto layout = getLayout(); - if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) - return SLANG_E_INVALID_ARG; - auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); - auto samplerImpl = static_cast(sampler); - ID3D12Device* d3dDevice = static_cast(getDevice())->m_device; - d3dDevice->CopyDescriptorsSimple( - 1, - m_descriptorSet.samplerTable.getCpuHandle( - bindingRange.baseIndex + - (int32_t)offset.bindingArrayIndex), - samplerImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - m_version++; - return SLANG_OK; + D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); + texture->m_resource.transition( + D3D12_RESOURCE_STATE_COPY_DEST, texture->m_defaultState, submitter); } + submitResourceCommandsAndWait(encodeInfo); + } - SLANG_NO_THROW Result SLANG_MCALL setCombinedTextureSampler( - ShaderOffset const& offset, - IResourceView* textureView, - ISamplerState* sampler) SLANG_OVERRIDE - { -#if 0 - if (offset.bindingRangeIndex < 0) - return SLANG_E_INVALID_ARG; - auto layout = getLayout(); - if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) - return SLANG_E_INVALID_ARG; - auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); - auto resourceViewImpl = static_cast(textureView); - ID3D12Device* d3dDevice = static_cast(getDevice())->m_device; - d3dDevice->CopyDescriptorsSimple( - 1, - m_resourceHeap.getCpuHandle( - m_descriptorSet.m_resourceTable + - bindingRange.binding.offsetInDescriptorTable.resource + - (int32_t)offset.bindingArrayIndex), - resourceViewImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - auto samplerImpl = static_cast(sampler); - d3dDevice->CopyDescriptorsSimple( - 1, - m_samplerHeap.getCpuHandle( - m_descriptorSet.m_samplerTable + - bindingRange.binding.offsetInDescriptorTable.sampler + - (int32_t)offset.bindingArrayIndex), - samplerImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); -#endif - m_version++; - return SLANG_OK; - } - protected: - Result init( - D3D12Device* device, - ShaderObjectLayoutImpl* layout, - DescriptorHeapReference viewHeap, - DescriptorHeapReference samplerHeap) - { - m_device = device; + returnComPtr(outResource, texture); + return SLANG_OK; +} - m_layout = layout; +Result DeviceImpl::createTextureFromNativeHandle( + InteropHandle handle, const ITextureResource::Desc& srcDesc, ITextureResource** outResource) +{ + RefPtr texture(new TextureResourceImpl(srcDesc)); - m_cachedTransientHeap = nullptr; - m_cachedTransientHeapVersion = 0; - m_isConstantBufferDirty = true; + if (handle.api == InteropHandleAPI::D3D12) + { + texture->m_resource.setResource((ID3D12Resource*)handle.handleValue); + } + else + { + return SLANG_FAIL; + } - // If the layout tells us that there is any uniform data, - // then we will allocate a CPU memory buffer to hold that data - // while it is being set from the host. - // - // Once the user is done setting the parameters/fields of this - // shader object, we will produce a GPU-memory version of the - // uniform data (which includes values from this object and - // any existential-type sub-objects). - // - size_t uniformSize = layout->getElementTypeLayout()->getSize(); - if (uniformSize) - { - m_data.setCount(uniformSize); - memset(m_data.getBuffer(), 0, uniformSize); - } - m_rootArguments.setCount(layout->getOwnUserRootParameterCount()); - memset( - m_rootArguments.getBuffer(), - 0, - sizeof(D3D12_GPU_VIRTUAL_ADDRESS) * m_rootArguments.getCount()); - // Each shader object will own CPU descriptor heap memory - // for any resource or sampler descriptors it might store - // as part of its value. - // - // This allocate includes a reservation for any constant - // buffer descriptor pertaining to the ordinary data, - // but does *not* include any descriptors that are managed - // as part of sub-objects. - // - if (auto resourceCount = layout->getResourceSlotCount()) - { - m_descriptorSet.resourceTable.allocate(viewHeap, resourceCount); + returnComPtr(outResource, texture); + return SLANG_OK; +} - // We must also ensure that the memory for any resources - // referenced by descriptors in this object does not get - // freed while the object is still live. - // - m_boundResources.setCount(resourceCount); - } - if (auto samplerCount = layout->getSamplerSlotCount()) - { - m_descriptorSet.samplerTable.allocate(samplerHeap, samplerCount); - } +Result DeviceImpl::createBufferResource( + const IBufferResource::Desc& descIn, const void* initData, IBufferResource** outResource) +{ + BufferResource::Desc srcDesc = fixupBufferDesc(descIn); - // If the layout specifies that we have any sub-objects, then - // we need to size the array to account for them. - // - Index subObjectCount = layout->getSubObjectSlotCount(); - m_objects.setCount(subObjectCount); + RefPtr buffer(new BufferResourceImpl(srcDesc)); - for (auto subObjectRangeInfo : layout->getSubObjectRanges()) - { - auto subObjectLayout = subObjectRangeInfo.layout; + D3D12_RESOURCE_DESC bufferDesc; + initBufferResourceDesc(descIn.sizeInBytes, bufferDesc); - // In the case where the sub-object range represents an - // existential-type leaf field (e.g., an `IBar`), we - // cannot pre-allocate the object(s) to go into that - // range, since we can't possibly know what to allocate - // at this point. - // - if (!subObjectLayout) - continue; - // - // Otherwise, we will allocate a sub-object to fill - // in each entry in this range, based on the layout - // information we already have. + bufferDesc.Flags |= calcResourceFlags(srcDesc.allowedStates); - auto& bindingRangeInfo = - layout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); - for (uint32_t i = 0; i < bindingRangeInfo.count; ++i) - { - RefPtr subObject; - SLANG_RETURN_ON_FAIL( - ShaderObjectImpl::create(device, subObjectLayout, subObject.writeRef())); - m_objects[bindingRangeInfo.subObjectIndex + i] = subObject; - } - } + const D3D12_RESOURCE_STATES initialState = buffer->m_defaultState; + SLANG_RETURN_ON_FAIL(createBuffer( + bufferDesc, + initData, + srcDesc.sizeInBytes, + initialState, + buffer->m_resource, + descIn.isShared, + descIn.memoryType)); - return SLANG_OK; - } + returnComPtr(outResource, buffer); + return SLANG_OK; +} - /// Write the uniform/ordinary data of this object into the given `dest` buffer at the given - /// `offset` - Result _writeOrdinaryData( - PipelineCommandEncoder* encoder, - BufferResourceImpl* buffer, - size_t offset, - size_t destSize, - ShaderObjectLayoutImpl* specializedLayout) - { - auto src = m_data.getBuffer(); - auto srcSize = size_t(m_data.getCount()); +Result DeviceImpl::createBufferFromNativeHandle( + InteropHandle handle, const IBufferResource::Desc& srcDesc, IBufferResource** outResource) +{ + RefPtr buffer(new BufferResourceImpl(srcDesc)); - SLANG_ASSERT(srcSize <= destSize); + if (handle.api == InteropHandleAPI::D3D12) + { + buffer->m_resource.setResource((ID3D12Resource*)handle.handleValue); + } + else + { + return SLANG_FAIL; + } - _uploadBufferData(encoder->m_device, encoder->m_d3dCmdList, encoder->m_transientHeap, buffer, offset, srcSize, src); + returnComPtr(outResource, buffer); + return SLANG_OK; +} - // In the case where this object has any sub-objects of - // existential/interface type, we need to recurse on those objects - // that need to write their state into an appropriate "pending" allocation. - // - // Note: Any values that could fit into the "payload" included - // in the existential-type field itself will have already been - // written as part of `setObject()`. This loop only needs to handle - // those sub-objects that do not "fit." - // - // An implementers looking at this code might wonder if things could be changed - // so that *all* writes related to sub-objects for interface-type fields could - // be handled in this one location, rather than having some in `setObject()` and - // others handled here. - // - Index subObjectRangeCounter = 0; - for (auto const& subObjectRangeInfo : specializedLayout->getSubObjectRanges()) - { - Index subObjectRangeIndex = subObjectRangeCounter++; - auto const& bindingRangeInfo = - specializedLayout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); +Result DeviceImpl::createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) +{ + D3D12_FILTER_REDUCTION_TYPE dxReduction = translateFilterReduction(desc.reductionOp); + D3D12_FILTER dxFilter; + if (desc.maxAnisotropy > 1) + { + dxFilter = D3D12_ENCODE_ANISOTROPIC_FILTER(dxReduction); + } + else + { + D3D12_FILTER_TYPE dxMin = translateFilterMode(desc.minFilter); + D3D12_FILTER_TYPE dxMag = translateFilterMode(desc.magFilter); + D3D12_FILTER_TYPE dxMip = translateFilterMode(desc.mipFilter); - // We only need to handle sub-object ranges for interface/existential-type fields, - // because fields of constant-buffer or parameter-block type are responsible for - // the ordinary/uniform data of their own existential/interface-type sub-objects. - // - if (bindingRangeInfo.bindingType != slang::BindingType::ExistentialValue) - continue; + dxFilter = D3D12_ENCODE_BASIC_FILTER(dxMin, dxMag, dxMip, dxReduction); + } - // Each sub-object range represents a single "leaf" field, but might be nested - // under zero or more outer arrays, such that the number of existential values - // in the same range can be one or more. - // - auto count = bindingRangeInfo.count; + D3D12_SAMPLER_DESC dxDesc = {}; + dxDesc.Filter = dxFilter; + dxDesc.AddressU = translateAddressingMode(desc.addressU); + dxDesc.AddressV = translateAddressingMode(desc.addressV); + dxDesc.AddressW = translateAddressingMode(desc.addressW); + dxDesc.MipLODBias = desc.mipLODBias; + dxDesc.MaxAnisotropy = desc.maxAnisotropy; + dxDesc.ComparisonFunc = translateComparisonFunc(desc.comparisonFunc); + for (int ii = 0; ii < 4; ++ii) + dxDesc.BorderColor[ii] = desc.borderColor[ii]; + dxDesc.MinLOD = desc.minLOD; + dxDesc.MaxLOD = desc.maxLOD; - // We are not concerned with the case where the existential value(s) in the range - // git into the payload part of the leaf field. - // - // In the case where the value didn't fit, the Slang layout strategy would have - // considered the requirements of the value as a "pending" allocation, and would - // allocate storage for the ordinary/uniform part of that pending allocation inside - // of the parent object's type layout. - // - // Here we assume that the Slang reflection API can provide us with a single byte - // offset and stride for the location of the pending data allocation in the - // specialized type layout, which will store the values for this sub-object range. - // - // TODO: The reflection API functions we are assuming here haven't been implemented - // yet, so the functions being called here are stubs. - // - // TODO: It might not be that a single sub-object range can reliably map to a single - // contiguous array with a single stride; we need to carefully consider what the - // layout logic does for complex cases with multiple layers of nested arrays and - // structures. - // - size_t subObjectRangePendingDataOffset = subObjectRangeInfo.offset.pendingOrdinaryData; - size_t subObjectRangePendingDataStride = subObjectRangeInfo.stride.pendingOrdinaryData; + auto& samplerHeap = m_cpuSamplerHeap; - // If the range doesn't actually need/use the "pending" allocation at all, then - // we need to detect that case and skip such ranges. - // - // TODO: This should probably be handled on a per-object basis by caching a "does it - // fit?" bit as part of the information for bound sub-objects, given that we already - // compute the "does it fit?" status as part of `setObject()`. - // - if (subObjectRangePendingDataOffset == 0) - continue; + D3D12Descriptor cpuDescriptor; + samplerHeap->allocate(&cpuDescriptor); + m_device->CreateSampler(&dxDesc, cpuDescriptor.cpuHandle); - for (uint32_t i = 0; i < count; ++i) - { - auto subObject = m_objects[bindingRangeInfo.subObjectIndex + i]; + // TODO: We really ought to have a free-list of sampler-heap + // entries that we check before we go to the heap, and then + // when we are done with a sampler we simply add it to the free list. + // + RefPtr samplerImpl = new SamplerStateImpl(); + samplerImpl->m_allocator = samplerHeap; + samplerImpl->m_descriptor = cpuDescriptor; + returnComPtr(outSampler, samplerImpl); + return SLANG_OK; +} - RefPtr subObjectLayout; - SLANG_RETURN_ON_FAIL( - subObject->getSpecializedLayout(subObjectLayout.writeRef())); +Result DeviceImpl::createTextureView( + ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) +{ + auto resourceImpl = (TextureResourceImpl*)texture; - auto subObjectOffset = - subObjectRangePendingDataOffset + i * subObjectRangePendingDataStride; + RefPtr viewImpl = new ResourceViewImpl(); + viewImpl->m_resource = resourceImpl; + viewImpl->m_desc = desc; + bool isArray = resourceImpl ? resourceImpl->getDesc()->arraySize != 0 : false; + bool isMultiSample = resourceImpl ? resourceImpl->getDesc()->sampleDesc.numSamples > 1 : false; + switch (desc.type) + { + default: + return SLANG_FAIL; - subObject->_writeOrdinaryData( - encoder, - buffer, - offset + subObjectOffset, - destSize - subObjectOffset, - subObjectLayout); + case IResourceView::Type::RenderTarget: + { + SLANG_RETURN_ON_FAIL(m_rtvAllocator->allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = m_rtvAllocator; + D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + rtvDesc.Format = D3DUtil::getMapFormat(desc.format); + isArray = desc.subresourceRange.layerCount > 1; + switch (desc.renderTarget.shape) + { + case IResource::Type::Texture1D: + rtvDesc.ViewDimension = + isArray ? D3D12_RTV_DIMENSION_TEXTURE1DARRAY : D3D12_RTV_DIMENSION_TEXTURE1D; + rtvDesc.Texture1D.MipSlice = desc.subresourceRange.mipLevel; + break; + case IResource::Type::Texture2D: + if (isMultiSample) + { + rtvDesc.ViewDimension = isArray ? D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY + : D3D12_RTV_DIMENSION_TEXTURE2DMS; + rtvDesc.Texture2DMSArray.ArraySize = desc.subresourceRange.layerCount; + rtvDesc.Texture2DMSArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; + } + else + { + rtvDesc.ViewDimension = isArray ? D3D12_RTV_DIMENSION_TEXTURE2DARRAY + : D3D12_RTV_DIMENSION_TEXTURE2D; + rtvDesc.Texture2DArray.MipSlice = desc.subresourceRange.mipLevel; + rtvDesc.Texture2DArray.PlaneSlice = + resourceImpl ? D3DUtil::getPlaneSlice( + D3DUtil::getMapFormat(resourceImpl->getDesc()->format), + desc.subresourceRange.aspectMask) + : 0; + rtvDesc.Texture2DArray.ArraySize = desc.subresourceRange.layerCount; + rtvDesc.Texture2DArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; } + break; + case IResource::Type::Texture3D: + rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; + rtvDesc.Texture3D.MipSlice = desc.subresourceRange.mipLevel; + rtvDesc.Texture3D.FirstWSlice = desc.subresourceRange.baseArrayLayer; + rtvDesc.Texture3D.WSize = desc.subresourceRange.layerCount; + break; + case IResource::Type::Buffer: + rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_BUFFER; + break; + default: + return SLANG_FAIL; } - - return SLANG_OK; + m_device->CreateRenderTargetView( + resourceImpl ? resourceImpl->m_resource.getResource() : nullptr, + &rtvDesc, + viewImpl->m_descriptor.cpuHandle); } + break; - bool shouldAllocateConstantBuffer(TransientResourceHeapImpl* transientHeap) + case IResourceView::Type::DepthStencil: { - if (m_isConstantBufferDirty || m_cachedTransientHeap != transientHeap || - m_cachedTransientHeapVersion != transientHeap->getVersion()) + SLANG_RETURN_ON_FAIL(m_dsvAllocator->allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = m_dsvAllocator; + D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {}; + dsvDesc.Format = D3DUtil::getMapFormat(desc.format); + isArray = desc.subresourceRange.layerCount > 1; + switch (desc.renderTarget.shape) { - return true; + case IResource::Type::Texture1D: + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1D; + dsvDesc.Texture1D.MipSlice = desc.subresourceRange.mipLevel; + break; + case IResource::Type::Texture2D: + if (isMultiSample) + { + dsvDesc.ViewDimension = isArray ? D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY + : D3D12_DSV_DIMENSION_TEXTURE2DMS; + dsvDesc.Texture2DMSArray.ArraySize = desc.subresourceRange.layerCount; + dsvDesc.Texture2DMSArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; + } + else + { + dsvDesc.ViewDimension = isArray ? D3D12_DSV_DIMENSION_TEXTURE2DARRAY + : D3D12_DSV_DIMENSION_TEXTURE2D; + dsvDesc.Texture2DArray.MipSlice = desc.subresourceRange.mipLevel; + dsvDesc.Texture2DArray.ArraySize = desc.subresourceRange.layerCount; + dsvDesc.Texture2DArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; + } + break; + default: + return SLANG_FAIL; } - return false; + m_device->CreateDepthStencilView( + resourceImpl ? resourceImpl->m_resource.getResource() : nullptr, + &dsvDesc, + viewImpl->m_descriptor.cpuHandle); } + break; - /// Ensure that the `m_ordinaryDataBuffer` has been created, if it is needed - Result _ensureOrdinaryDataBufferCreatedIfNeeded( - PipelineCommandEncoder* encoder, - ShaderObjectLayoutImpl* specializedLayout) + case IResourceView::Type::UnorderedAccess: { - // If data has been changed since last allocation/filling of constant buffer, - // we will need to allocate a new one. - // - if (!shouldAllocateConstantBuffer(encoder->m_transientHeap)) - { - return SLANG_OK; - } - m_isConstantBufferDirty = false; - m_cachedTransientHeap = encoder->m_transientHeap; - m_cachedTransientHeapVersion = encoder->m_transientHeap->getVersion(); - - // Computing the size of the ordinary data buffer is *not* just as simple - // as using the size of the `m_ordinayData` array that we store. The reason - // for the added complexity is that interface-type fields may lead to the - // storage being specialized such that it needs extra appended data to - // store the concrete values that logically belong in those interface-type - // fields but wouldn't fit in the fixed-size allocation we gave them. - // - m_constantBufferSize = specializedLayout->getTotalOrdinaryDataSize(); - if (m_constantBufferSize == 0) - { - return SLANG_OK; - } - - // Once we have computed how large the buffer should be, we can allocate - // it from the transient resource heap. - // - auto alignedConstantBufferSize = D3DUtil::calcAligned(m_constantBufferSize, 256); - SLANG_RETURN_ON_FAIL(encoder->m_commandBuffer->m_transientHeap->allocateConstantBuffer( - alignedConstantBufferSize, m_constantBufferWeakPtr, m_constantBufferOffset)); - - // Once the buffer is allocated, we can use `_writeOrdinaryData` to fill it in. - // - // Note that `_writeOrdinaryData` is potentially recursive in the case - // where this object contains interface/existential-type fields, so we - // don't need or want to inline it into this call site. - // - SLANG_RETURN_ON_FAIL(_writeOrdinaryData( - encoder, - static_cast(m_constantBufferWeakPtr), - m_constantBufferOffset, - m_constantBufferSize, - specializedLayout)); + // TODO: need to support the separate "counter resource" for the case + // of append/consume buffers with attached counters. + SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = m_cpuViewHeap; + D3D12_UNORDERED_ACCESS_VIEW_DESC d3d12desc = {}; + auto& resourceDesc = *resourceImpl->getDesc(); + d3d12desc.Format = gfxIsTypelessFormat(texture->getDesc()->format) + ? D3DUtil::getMapFormat(desc.format) + : D3DUtil::getMapFormat(texture->getDesc()->format); + switch (resourceImpl->getDesc()->type) { - // We also create and store a descriptor for our root constant buffer - // into the descriptor table allocation that was reserved for them. - // - // We always know that the ordinary data buffer will be the first descriptor - // in the table of resource views. - // - auto descriptorTable = m_descriptorSet.resourceTable; - D3D12_CONSTANT_BUFFER_VIEW_DESC viewDesc = {}; - viewDesc.BufferLocation = static_cast(m_constantBufferWeakPtr) - ->m_resource.getResource() - ->GetGPUVirtualAddress() + - m_constantBufferOffset; - viewDesc.SizeInBytes = (UINT)alignedConstantBufferSize; - encoder->m_device->CreateConstantBufferView( - &viewDesc, descriptorTable.getCpuHandle()); - } + case IResource::Type::Texture1D: + d3d12desc.ViewDimension = resourceDesc.arraySize == 0 + ? D3D12_UAV_DIMENSION_TEXTURE1D + : D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + d3d12desc.Texture1D.MipSlice = desc.subresourceRange.mipLevel; + d3d12desc.Texture1DArray.ArraySize = desc.subresourceRange.layerCount == 0 + ? resourceDesc.arraySize + : desc.subresourceRange.layerCount; + d3d12desc.Texture1DArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; - return SLANG_OK; + break; + case IResource::Type::Texture2D: + d3d12desc.ViewDimension = resourceDesc.arraySize == 0 + ? D3D12_UAV_DIMENSION_TEXTURE2D + : D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + d3d12desc.Texture2D.MipSlice = desc.subresourceRange.mipLevel; + d3d12desc.Texture2D.PlaneSlice = + D3DUtil::getPlaneSlice(d3d12desc.Format, desc.subresourceRange.aspectMask); + d3d12desc.Texture2DArray.ArraySize = desc.subresourceRange.layerCount == 0 + ? resourceDesc.arraySize + : desc.subresourceRange.layerCount; + d3d12desc.Texture2DArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; + break; + case IResource::Type::Texture3D: + d3d12desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; + d3d12desc.Texture3D.MipSlice = desc.subresourceRange.mipLevel; + d3d12desc.Texture3D.FirstWSlice = desc.subresourceRange.baseArrayLayer; + d3d12desc.Texture3D.WSize = resourceDesc.size.depth; + break; + default: + return SLANG_FAIL; + } + m_device->CreateUnorderedAccessView( + resourceImpl->m_resource, nullptr, &d3d12desc, viewImpl->m_descriptor.cpuHandle); } + break; - public: - void updateSubObjectsRecursive() + case IResourceView::Type::ShaderResource: { - if (!m_isMutable) - return; - auto& subObjectRanges = getLayout()->getSubObjectRanges(); - for (Slang::Index subObjectRangeIndex = 0; - subObjectRangeIndex < subObjectRanges.getCount(); - subObjectRangeIndex++) - { - auto const& subObjectRange = subObjectRanges[subObjectRangeIndex]; - auto const& bindingRange = - getLayout()->getBindingRange(subObjectRange.bindingRangeIndex); - Slang::Index count = bindingRange.count; + SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = m_cpuViewHeap; - for (Slang::Index subObjectIndexInRange = 0; subObjectIndexInRange < count; - subObjectIndexInRange++) - { - Slang::Index objectIndex = bindingRange.subObjectIndex + subObjectIndexInRange; - auto subObject = m_objects[objectIndex].Ptr(); - if (!subObject) - continue; - subObject->updateSubObjectsRecursive(); - if (m_subObjectVersions[objectIndex] != m_objects[objectIndex]->m_version) - { - ShaderOffset offset; - offset.bindingRangeIndex = subObjectRange.bindingRangeIndex; - offset.bindingArrayIndex = subObjectIndexInRange; - setObject(offset, subObject); - } - } - } - } - /// Prepare to bind this object as a parameter block. - /// - /// This involves allocating and binding any descriptor tables necessary - /// to to store the state of the object. The function returns a descriptor - /// set formed from any table(s) allocated. In addition, the `ioOffset` - /// parameter will be adjusted to be correct for binding values into - /// the resulting descriptor set. - /// - /// Returns: - /// SLANG_OK when successful, - /// SLANG_E_OUT_OF_MEMORY when descriptor heap is full. - /// - Result prepareToBindAsParameterBlock( - BindingContext* context, - BindingOffset& ioOffset, - ShaderObjectLayoutImpl* specializedLayout, - DescriptorSet& outDescriptorSet) - { - auto transientHeap = context->transientHeap; - auto submitter = context->submitter; - - // When writing into the new descriptor set, resource and sampler - // descriptors will need to start at index zero in the respective - // tables. - // - ioOffset.resource = 0; - ioOffset.sampler = 0; + // Need to construct the D3D12_SHADER_RESOURCE_VIEW_DESC because otherwise TextureCube + // is not accessed appropriately (rather than just passing nullptr to + // CreateShaderResourceView) + const D3D12_RESOURCE_DESC resourceDesc = + resourceImpl->m_resource.getResource()->GetDesc(); + const DXGI_FORMAT pixelFormat = desc.format == Format::Unknown + ? resourceDesc.Format + : D3DUtil::getMapFormat(desc.format); - // The index of the next root parameter to bind will be maintained, - // but needs to be incremented by the number of descriptor tables - // we allocate (zero or one resource table and zero or one sampler - // table). - // - auto& rootParamIndex = ioOffset.rootParam; + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc; + initSrvDesc( + resourceImpl->getType(), + *resourceImpl->getDesc(), + resourceDesc, + pixelFormat, + desc.subresourceRange, + srvDesc); - if (auto descriptorCount = specializedLayout->getTotalResourceDescriptorCount()) - { - // There is a non-zero number of resource descriptors needed, - // so we will allocate a table out of the appropriate heap, - // and store it into the appropriate part of `descriptorSet`. - // - auto descriptorHeap = &transientHeap->getCurrentViewHeap(); - auto& table = outDescriptorSet.resourceTable; + m_device->CreateShaderResourceView( + resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle); + } + break; + } - // Allocate the table. - // - if (!table.allocate(descriptorHeap, descriptorCount)) - { - context->outOfMemoryHeap = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - return SLANG_E_OUT_OF_MEMORY; - } + returnComPtr(outView, viewImpl); + return SLANG_OK; +} - // Bind the table to the pipeline, consuming the next available - // root parameter. - // - auto tableRootParamIndex = rootParamIndex++; - submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); - } - if (auto descriptorCount = specializedLayout->getTotalSamplerDescriptorCount()) - { - // There is a non-zero number of sampler descriptors needed, - // so we will allocate a table out of the appropriate heap, - // and store it into the appropriate part of `descriptorSet`. - // - auto descriptorHeap = &transientHeap->getCurrentSamplerHeap(); - auto& table = outDescriptorSet.samplerTable; +Result DeviceImpl::getFormatSupportedResourceStates(Format format, ResourceStateSet* outStates) +{ + D3D12_FEATURE_DATA_FORMAT_SUPPORT support; + support.Format = D3DUtil::getMapFormat(format); + SLANG_RETURN_ON_FAIL( + m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))); - // Allocate the table. - // - if (!table.allocate(descriptorHeap, descriptorCount)) - { - context->outOfMemoryHeap = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; - return SLANG_E_OUT_OF_MEMORY; - } + ResourceStateSet allowedStates; - // Bind the table to the pipeline, consuming the next available - // root parameter. - // - auto tableRootParamIndex = rootParamIndex++; - submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); - } + auto dxgi1 = support.Support1; + if (dxgi1 & D3D12_FORMAT_SUPPORT1_BUFFER) + allowedStates.add(ResourceState::ConstantBuffer); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER) + allowedStates.add(ResourceState::VertexBuffer); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_IA_INDEX_BUFFER) + allowedStates.add(ResourceState::IndexBuffer); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_SO_BUFFER) + allowedStates.add(ResourceState::StreamOutput); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_TEXTURE1D) + allowedStates.add(ResourceState::ShaderResource); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_TEXTURE2D) + allowedStates.add(ResourceState::ShaderResource); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_TEXTURE3D) + allowedStates.add(ResourceState::ShaderResource); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_TEXTURECUBE) + allowedStates.add(ResourceState::ShaderResource); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) + allowedStates.add(ResourceState::ShaderResource); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) + allowedStates.add(ResourceState::ShaderResource); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON) + allowedStates.add(ResourceState::ShaderResource); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_SHADER_GATHER) + allowedStates.add(ResourceState::ShaderResource); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_SHADER_GATHER_COMPARISON) + allowedStates.add(ResourceState::ShaderResource); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) + allowedStates.add(ResourceState::RenderTarget); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) + allowedStates.add(ResourceState::DepthWrite); + if (dxgi1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) + allowedStates.add(ResourceState::UnorderedAccess); - return SLANG_OK; - } + *outStates = allowedStates; + return SLANG_OK; +} - bool checkIfCachedDescriptorSetIsValidRecursive(BindingContext* context) - { - if (shouldAllocateConstantBuffer(context->transientHeap)) - return false; - if (m_isMutable && m_version != m_cachedGPUDescriptorSetVersion) - return false; - if (m_cachedGPUDescriptorSet.resourceTable.getDescriptorCount() != 0 && - m_cachedGPUDescriptorSet.resourceTable.m_heap.ptr.linearHeap->getHeap() != - m_cachedTransientHeap->getCurrentViewHeap().getHeap()) - return false; - if (m_cachedGPUDescriptorSet.samplerTable.getDescriptorCount() != 0 && - m_cachedGPUDescriptorSet.samplerTable.m_heap.ptr.linearHeap->getHeap() != - m_cachedTransientHeap->getCurrentSamplerHeap().getHeap()) - return false; +Result DeviceImpl::createBufferView( + IBufferResource* buffer, + IBufferResource* counterBuffer, + IResourceView::Desc const& desc, + IResourceView** outView) +{ + auto resourceImpl = (BufferResourceImpl*)buffer; + auto resourceDesc = *resourceImpl->getDesc(); - auto& subObjectRanges = getLayout()->getSubObjectRanges(); - for (Slang::Index subObjectRangeIndex = 0; - subObjectRangeIndex < subObjectRanges.getCount(); - subObjectRangeIndex++) - { - auto const& subObjectRange = subObjectRanges[subObjectRangeIndex]; - auto const& bindingRange = - getLayout()->getBindingRange(subObjectRange.bindingRangeIndex); - if (bindingRange.bindingType != slang::BindingType::ParameterBlock) - continue; - Slang::Index count = bindingRange.count; + RefPtr viewImpl = new ResourceViewImpl(); + viewImpl->m_resource = resourceImpl; + viewImpl->m_desc = desc; - for (Slang::Index subObjectIndexInRange = 0; subObjectIndexInRange < count; - subObjectIndexInRange++) - { - Slang::Index objectIndex = bindingRange.subObjectIndex + subObjectIndexInRange; - auto subObject = m_objects[objectIndex].Ptr(); - if (!subObject) - continue; - if (subObject->checkIfCachedDescriptorSetIsValidRecursive(context)) - return false; - } - } - return true; - } + switch (desc.type) + { + default: + return SLANG_FAIL; - /// Bind this object as a `ParameterBlock` - Result bindAsParameterBlock( - BindingContext* context, - BindingOffset const& offset, - ShaderObjectLayoutImpl* specializedLayout) + case IResourceView::Type::UnorderedAccess: { - if (checkIfCachedDescriptorSetIsValidRecursive(context)) + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Format = D3DUtil::getMapFormat(desc.format); + uavDesc.Buffer.FirstElement = desc.bufferRange.firstElement; + uint64_t viewSize = 0; + if (desc.bufferElementSize) { - // If we already have a valid gpu descriptor table in the current - // heap, bind it. - auto rootParamIndex = offset.rootParam; - if (m_cachedGPUDescriptorSet.resourceTable.getDescriptorCount()) - { - auto tableRootParamIndex = rootParamIndex++; - context->submitter->setRootDescriptorTable( - tableRootParamIndex, m_cachedGPUDescriptorSet.resourceTable.getGpuHandle()); - } - if (m_cachedGPUDescriptorSet.samplerTable.getDescriptorCount()) - { - auto tableRootParamIndex = rootParamIndex++; - context->submitter->setRootDescriptorTable( - tableRootParamIndex, m_cachedGPUDescriptorSet.samplerTable.getGpuHandle()); - } - return SLANG_OK; + uavDesc.Buffer.StructureByteStride = desc.bufferElementSize; + uavDesc.Buffer.NumElements = + desc.bufferRange.elementCount == 0 + ? UINT(resourceDesc.sizeInBytes / desc.bufferElementSize) + : (UINT)desc.bufferRange.elementCount; + viewSize = (uint64_t)desc.bufferElementSize * uavDesc.Buffer.NumElements; } - - // The first step to binding an object as a parameter block is to allocate a descriptor - // set (consisting of zero or one resource descriptor table and zero or one sampler - // descriptor table) to represent its values. - // - BindingOffset subOffset = offset; - SLANG_RETURN_ON_FAIL(prepareToBindAsParameterBlock( - context, /* inout */ subOffset, specializedLayout, m_cachedGPUDescriptorSet)); - - // Next we bind the object into that descriptor set as if it were being used - // as a `ConstantBuffer`. - // - SLANG_RETURN_ON_FAIL(bindAsConstantBuffer( - context, m_cachedGPUDescriptorSet, subOffset, specializedLayout)); - - m_cachedGPUDescriptorSetVersion = m_version; - return SLANG_OK; - } - - /// Bind this object as a `ConstantBuffer` - Result bindAsConstantBuffer( - BindingContext* context, - DescriptorSet const& descriptorSet, - BindingOffset const& offset, - ShaderObjectLayoutImpl* specializedLayout) - { - // If we are to bind as a constant buffer we first need to ensure that - // the ordinary data buffer is created, if this object needs one. - // - SLANG_RETURN_ON_FAIL(_ensureOrdinaryDataBufferCreatedIfNeeded(context->encoder, specializedLayout)); - - // Next, we need to bind all of the resource descriptors for this object - // (including any ordinary data buffer) into the provided `descriptorSet`. - // - auto resourceCount = specializedLayout->getResourceSlotCount(); - if(resourceCount) + else if (desc.format == Format::Unknown) { - auto& dstTable = descriptorSet.resourceTable; - auto& srcTable = m_descriptorSet.resourceTable; - - context->device->m_device->CopyDescriptorsSimple( - UINT(resourceCount), - dstTable.getCpuHandle(offset.resource), - srcTable.getCpuHandle(), - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; + uavDesc.Buffer.NumElements = desc.bufferRange.elementCount == 0 + ? UINT(resourceDesc.sizeInBytes / 4) + : UINT(desc.bufferRange.elementCount / 4); + uavDesc.Buffer.Flags |= D3D12_BUFFER_UAV_FLAG_RAW; + viewSize = 4ull * uavDesc.Buffer.NumElements; } - - // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, - // since the logic is shared with the `bindAsValue()` case below. - // - SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, offset, specializedLayout)); - return SLANG_OK; - } - - /// Bind this object as a value (for an interface-type parameter) - Result bindAsValue( - BindingContext* context, - DescriptorSet const& descriptorSet, - BindingOffset const& offset, - ShaderObjectLayoutImpl* specializedLayout) - { - // When binding a value for an interface-type field we do *not* want - // to bind a buffer for the ordinary data (if there is any) because - // ordinary data for interface-type fields gets allocated into the - // parent object's ordinary data buffer. - // - // This CPU-memory descriptor table that holds resource descriptors - // will have already been allocated to have space for an ordinary data - // buffer (if needed), so we need to take care to skip over that - // descriptor when copying descriptors from the CPU-memory set - // to the GPU-memory `descriptorSet`. - // - auto skipResourceCount = specializedLayout->getOrdinaryDataBufferCount(); - auto resourceCount = specializedLayout->getResourceSlotCount() - skipResourceCount; - if(resourceCount) + else { - auto& dstTable = descriptorSet.resourceTable; - auto& srcTable = m_descriptorSet.resourceTable; - - context->device->m_device->CopyDescriptorsSimple( - UINT(resourceCount), - dstTable.getCpuHandle(offset.resource), - srcTable.getCpuHandle(skipResourceCount), - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + FormatInfo sizeInfo; + gfxGetFormatInfo(desc.format, &sizeInfo); + assert(sizeInfo.pixelsPerBlock == 1); + uavDesc.Buffer.NumElements = + desc.bufferRange.elementCount == 0 + ? UINT(resourceDesc.sizeInBytes / sizeInfo.blockSizeInBytes) + : (UINT)desc.bufferRange.elementCount; + viewSize = (uint64_t)uavDesc.Buffer.NumElements * sizeInfo.blockSizeInBytes; } - // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, - // since the logic is shared with the `bindAsConstantBuffer()` case above. - // - // Note: Just like we had to do some subtle handling of the ordinary data buffer - // above, here we need to contend with the fact that the `offset.resource` fields - // computed for sub-object ranges were baked to take the ordinary data buffer - // into account, so that if `skipResourceCount` is non-zero then they are all - // too high by `skipResourceCount`. - // - // We will address the problem here by computing a modified offset that adjusts - // for the ordinary data buffer that we have not bound after all. - // - BindingOffset subOffset = offset; - subOffset.resource -= skipResourceCount; - SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, subOffset, specializedLayout)); - return SLANG_OK; - } - - /// Shared logic for `bindAsConstantBuffer()` and `bindAsValue()` - Result _bindImpl( - BindingContext* context, - DescriptorSet const& descriptorSet, - BindingOffset const& offset, - ShaderObjectLayoutImpl* specializedLayout) - { - // We start by binding all the sampler decriptors, if needed. - // - // Note: resource descriptors were handled in either `bindAsConstantBuffer()` - // or `bindAsValue()` before calling into `_bindImpl()`. - // - if (auto samplerCount = specializedLayout->getSamplerSlotCount()) + if (viewSize >= (1ull << 32) - 8) { - auto& dstTable = descriptorSet.samplerTable; - auto& srcTable = m_descriptorSet.samplerTable; - - context->device->m_device->CopyDescriptorsSimple( - UINT(samplerCount), - dstTable.getCpuHandle(offset.sampler), - srcTable.getCpuHandle(), - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + // D3D12 does not support view descriptors that has size near 4GB. + // We will not create actual SRV/UAVs for such large buffers. + // However, a buffer this large can still be bound as root parameter. + // So instead of failing, we quietly ignore descriptor creation. + viewImpl->m_descriptor.cpuHandle.ptr = 0; } - - // Next we iterate over the sub-object ranges and bind anything they require. - // - auto& subObjectRanges = specializedLayout->getSubObjectRanges(); - auto subObjectRangeCount = subObjectRanges.getCount(); - for (Index i = 0; i < subObjectRangeCount; i++) + else { - auto& subObjectRange = specializedLayout->getSubObjectRange(i); - auto& bindingRange = specializedLayout->getBindingRange(subObjectRange.bindingRangeIndex); - auto subObjectIndex = bindingRange.subObjectIndex; - auto subObjectLayout = subObjectRange.layout.Ptr(); - - BindingOffset rangeOffset = offset; - rangeOffset += subObjectRange.offset; - - BindingOffset rangeStride = subObjectRange.stride; - - switch(bindingRange.bindingType) - { - case slang::BindingType::ConstantBuffer: - { - auto objOffset = rangeOffset; - for (uint32_t j = 0; j < bindingRange.count; j++) - { - auto& object = m_objects[subObjectIndex + j]; - SLANG_RETURN_ON_FAIL(object->bindAsConstantBuffer(context, descriptorSet, objOffset, subObjectLayout)); - objOffset += rangeStride; - } - } - break; + auto counterResourceImpl = static_cast(counterBuffer); + SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = m_cpuViewHeap; + m_device->CreateUnorderedAccessView( + resourceImpl->m_resource, + counterResourceImpl ? counterResourceImpl->m_resource.getResource() : nullptr, + &uavDesc, + viewImpl->m_descriptor.cpuHandle); + } + } + break; - case slang::BindingType::ParameterBlock: - { - auto objOffset = rangeOffset; - for (uint32_t j = 0; j < bindingRange.count; j++) - { - auto& object = m_objects[subObjectIndex + j]; - SLANG_RETURN_ON_FAIL(object->bindAsParameterBlock(context, objOffset, subObjectLayout)); - objOffset += rangeStride; - } - } - break; - - case slang::BindingType::ExistentialValue: - if(subObjectLayout) - { - auto objOffset = rangeOffset; - for (uint32_t j = 0; j < bindingRange.count; j++) - { - auto& object = m_objects[subObjectIndex + j]; - SLANG_RETURN_ON_FAIL(object->bindAsValue(context, descriptorSet, objOffset, subObjectLayout)); - objOffset += rangeStride; - } - } - break; - } - } - - return SLANG_OK; - } - - Result bindRootArguments(BindingContext* context, uint32_t& index) + case IResourceView::Type::ShaderResource: { - auto layoutImpl = getLayout(); - for (Index i = 0; i < m_rootArguments.getCount(); i++) + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Format = D3DUtil::getMapFormat(desc.format); + srvDesc.Buffer.StructureByteStride = 0; + srvDesc.Buffer.FirstElement = desc.bufferRange.firstElement; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + uint64_t viewSize = 0; + if (desc.bufferElementSize) { - switch (layoutImpl->getRootParameterInfo(i).type) - { - case IResourceView::Type::ShaderResource: - case IResourceView::Type::AccelerationStructure: - context->submitter->setRootSRV(index, m_rootArguments[i]); - break; - case IResourceView::Type::UnorderedAccess: - context->submitter->setRootUAV(index, m_rootArguments[i]); - break; - default: - continue; - } - index++; + srvDesc.Buffer.StructureByteStride = desc.bufferElementSize; + srvDesc.Buffer.NumElements = + desc.bufferRange.elementCount == 0 + ? UINT(resourceDesc.sizeInBytes / desc.bufferElementSize) + : (UINT)desc.bufferRange.elementCount; + viewSize = (uint64_t)desc.bufferElementSize * srvDesc.Buffer.NumElements; } - for (auto& subObject : m_objects) + else if (desc.format == Format::Unknown) { - if (subObject) - { - SLANG_RETURN_ON_FAIL(subObject->bindRootArguments(context, index)); - } + srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; + srvDesc.Buffer.NumElements = desc.bufferRange.elementCount == 0 + ? UINT(resourceDesc.sizeInBytes / 4) + : UINT(desc.bufferRange.elementCount / 4); + srvDesc.Buffer.Flags |= D3D12_BUFFER_SRV_FLAG_RAW; + viewSize = 4ull * srvDesc.Buffer.NumElements; } - return SLANG_OK; - } - /// A CPU-memory descriptor set holding any descriptors used to represent the resources/samplers in this object's state - DescriptorSet m_descriptorSet; - /// A cached descriptor set on GPU heap. - DescriptorSet m_cachedGPUDescriptorSet; - - ShortList, 8> m_boundResources; - List m_rootArguments; - /// A constant buffer used to stored ordinary data for this object - /// and existential-type sub-objects. - /// - /// Allocated from transient heap on demand with `_createOrdinaryDataBufferIfNeeded()` - IBufferResource* m_constantBufferWeakPtr = nullptr; - size_t m_constantBufferOffset = 0; - size_t m_constantBufferSize = 0; - - /// Dirty bit tracking whether the constant buffer needs to be updated. - bool m_isConstantBufferDirty = true; - /// The transient heap from which the constant buffer and descriptor set is allocated. - TransientResourceHeapImpl* m_cachedTransientHeap; - /// The version of the transient heap when the constant buffer and descriptor set is allocated. - uint64_t m_cachedTransientHeapVersion; - - /// Whether this shader object is allowed to be mutable. - bool m_isMutable = false; - /// The version of a mutable shader object. - uint32_t m_version = 0; - /// The version of this mutable shader object when the gpu descriptor table is cached. - uint32_t m_cachedGPUDescriptorSetVersion = -1; - /// The versions of bound subobjects. - List m_subObjectVersions; - - /// Get the layout of this shader object with specialization arguments considered - /// - /// This operation should only be called after the shader object has been - /// fully filled in and finalized. - /// - Result getSpecializedLayout(ShaderObjectLayoutImpl** outLayout) - { - if (!m_specializedLayout) + else { - SLANG_RETURN_ON_FAIL(_createSpecializedLayout(m_specializedLayout.writeRef())); + FormatInfo sizeInfo; + gfxGetFormatInfo(desc.format, &sizeInfo); + assert(sizeInfo.pixelsPerBlock == 1); + srvDesc.Buffer.NumElements = + desc.bufferRange.elementCount == 0 + ? UINT(resourceDesc.sizeInBytes / sizeInfo.blockSizeInBytes) + : (UINT)desc.bufferRange.elementCount; + viewSize = (uint64_t)srvDesc.Buffer.NumElements * sizeInfo.blockSizeInBytes; + } + if (viewSize >= (1ull << 32) - 8) + { + // D3D12 does not support view descriptors that has size near 4GB. + // We will not create actual SRV/UAVs for such large buffers. + // However, a buffer this large can still be bound as root parameter. + // So instead of failing, we quietly ignore descriptor creation. + viewImpl->m_descriptor.cpuHandle.ptr = 0; + } + else + { + SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = m_cpuViewHeap; + m_device->CreateShaderResourceView( + resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle); } - returnRefPtr(outLayout, m_specializedLayout); - return SLANG_OK; - } - - /// Create the layout for this shader object with specialization arguments considered - /// - /// This operation is virtual so that it can be customized by `RootShaderObject`. - /// - virtual Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) - { - ExtendedShaderObjectType extendedType; - SLANG_RETURN_ON_FAIL(getSpecializedShaderObjectType(&extendedType)); - - auto renderer = getRenderer(); - RefPtr layout; - SLANG_RETURN_ON_FAIL(renderer->getShaderObjectLayout( - extendedType.slangType, - m_layout->getContainerType(), - (ShaderObjectLayoutBase**)layout.writeRef())); - - returnRefPtrMove(outLayout, layout); - return SLANG_OK; } + break; + } - RefPtr m_specializedLayout; - }; + returnComPtr(outView, viewImpl); + return SLANG_OK; +} - class RootShaderObjectImpl : public ShaderObjectImpl +Result DeviceImpl::createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFb) +{ + RefPtr framebuffer = new FramebufferImpl(); + framebuffer->renderTargetViews.setCount(desc.renderTargetCount); + framebuffer->renderTargetDescriptors.setCount(desc.renderTargetCount); + framebuffer->renderTargetClearValues.setCount(desc.renderTargetCount); + for (uint32_t i = 0; i < desc.renderTargetCount; i++) { - typedef ShaderObjectImpl Super; - - public: - // Override default reference counting behavior to disable lifetime management via ComPtr. - // Root objects are managed by command buffer and does not need to be freed by the user. - SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 1; } - SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 1; } - public: - RootShaderObjectLayoutImpl* getLayout() - { - return static_cast(m_layout.Ptr()); - } - - UInt SLANG_MCALL getEntryPointCount() SLANG_OVERRIDE - { - return (UInt)m_entryPoints.getCount(); - } - SlangResult SLANG_MCALL getEntryPoint(UInt index, IShaderObject** outEntryPoint) - SLANG_OVERRIDE - { - returnComPtr(outEntryPoint, m_entryPoints[index]); - return SLANG_OK; - } - - virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override + framebuffer->renderTargetViews[i] = + static_cast(desc.renderTargetViews[i]); + framebuffer->renderTargetDescriptors[i] = + framebuffer->renderTargetViews[i]->m_descriptor.cpuHandle; + if (static_cast(desc.renderTargetViews[i])->m_resource.Ptr()) { - SLANG_RETURN_ON_FAIL(ShaderObjectImpl::collectSpecializationArgs(args)); - for (auto& entryPoint : m_entryPoints) - { - SLANG_RETURN_ON_FAIL(entryPoint->collectSpecializationArgs(args)); - } - return SLANG_OK; + auto clearValue = + static_cast( + static_cast(desc.renderTargetViews[i])->m_resource.Ptr()) + ->getDesc() + ->optimalClearValue.color; + memcpy(&framebuffer->renderTargetClearValues[i], &clearValue, sizeof(ColorClearValue)); } - - virtual SLANG_NO_THROW Result SLANG_MCALL - copyFrom(IShaderObject* object, ITransientResourceHeap* transientHeap) override + else { - if (auto srcObj = dynamic_cast(object)) - { - *this = *srcObj; - return SLANG_OK; - } - return SLANG_FAIL; + memset(&framebuffer->renderTargetClearValues[i], 0, sizeof(ColorClearValue)); } + } + framebuffer->depthStencilView = static_cast(desc.depthStencilView); + if (desc.depthStencilView) + { + framebuffer->depthStencilClearValue = + static_cast( + static_cast(desc.depthStencilView)->m_resource.Ptr()) + ->getDesc() + ->optimalClearValue.depthStencil; + framebuffer->depthStencilDescriptor = + static_cast(desc.depthStencilView)->m_descriptor.cpuHandle; + } + else + { + framebuffer->depthStencilDescriptor.ptr = 0; + } + returnComPtr(outFb, framebuffer); + return SLANG_OK; +} - public: - Result bindAsRoot( - BindingContext* context, - RootShaderObjectLayoutImpl* specializedLayout) - { - // Pull updates from sub-objects when this is a mutable root shader object. - updateSubObjectsRecursive(); - - // A root shader object always binds as if it were a parameter block, - // insofar as it needs to allocate a descriptor set to hold the bindings - // for its own state and any sub-objects. - // - // Note: We do not direclty use `bindAsParameterBlock` here because we also - // need to bind the entry points into the same descriptor set that is - // being used for the root object. - - BindingOffset rootOffset; - - // Bind all root parameters first. - Super::bindRootArguments(context, rootOffset.rootParam); - - DescriptorSet descriptorSet; - SLANG_RETURN_ON_FAIL(prepareToBindAsParameterBlock( - context, /* inout */ rootOffset, specializedLayout, descriptorSet)); - - SLANG_RETURN_ON_FAIL(Super::bindAsConstantBuffer(context, descriptorSet, rootOffset, specializedLayout)); - - auto entryPointCount = m_entryPoints.getCount(); - for (Index i = 0; i < entryPointCount; ++i) - { - auto entryPoint = m_entryPoints[i]; - auto& entryPointInfo = specializedLayout->getEntryPoint(i); - - auto entryPointOffset = rootOffset; - entryPointOffset += entryPointInfo.offset; +Result DeviceImpl::createFramebufferLayout( + IFramebufferLayout::Desc const& desc, IFramebufferLayout** outLayout) +{ + RefPtr layout = new FramebufferLayoutImpl(); + layout->m_renderTargets.setCount(desc.renderTargetCount); + for (uint32_t i = 0; i < desc.renderTargetCount; i++) + { + layout->m_renderTargets[i] = desc.renderTargets[i]; + } - entryPoint->updateSubObjectsRecursive(); + if (desc.depthStencil) + { + layout->m_hasDepthStencil = true; + layout->m_depthStencil = *desc.depthStencil; + } + else + { + layout->m_hasDepthStencil = false; + } + returnComPtr(outLayout, layout); + return SLANG_OK; +} - SLANG_RETURN_ON_FAIL(entryPoint->bindAsConstantBuffer(context, descriptorSet, entryPointOffset, entryPointInfo.layout)); - } +Result DeviceImpl::createRenderPassLayout( + const IRenderPassLayout::Desc& desc, IRenderPassLayout** outRenderPassLayout) +{ + RefPtr result = new RenderPassLayoutImpl(); + result->init(desc); + returnComPtr(outRenderPassLayout, result); + return SLANG_OK; +} - return SLANG_OK; - } +Result DeviceImpl::createInputLayout(IInputLayout::Desc const& desc, IInputLayout** outLayout) +{ + RefPtr layout(new InputLayoutImpl); - public: + // Work out a buffer size to hold all text + size_t textSize = 0; + auto inputElementCount = desc.inputElementCount; + auto inputElements = desc.inputElements; + auto vertexStreamCount = desc.vertexStreamCount; + auto vertexStreams = desc.vertexStreams; + for (int i = 0; i < Int(inputElementCount); ++i) + { + const char* text = inputElements[i].semanticName; + textSize += text ? (::strlen(text) + 1) : 0; + } + layout->m_text.setCount(textSize); + char* textPos = layout->m_text.getBuffer(); - Result init(D3D12Device* device) - { - return SLANG_OK; - } + List& elements = layout->m_elements; + elements.setCount(inputElementCount); - Result resetImpl( - D3D12Device* device, - RootShaderObjectLayoutImpl* layout, - DescriptorHeapReference viewHeap, - DescriptorHeapReference samplerHeap, - bool isMutable) - { - SLANG_RETURN_ON_FAIL(Super::init(device, layout, viewHeap, samplerHeap)); - m_isMutable = isMutable; - m_specializedLayout = nullptr; - m_entryPoints.clear(); - for (auto entryPointInfo : layout->getEntryPoints()) - { - RefPtr entryPoint; - SLANG_RETURN_ON_FAIL( - ShaderObjectImpl::create(device, entryPointInfo.layout, entryPoint.writeRef())); - entryPoint->m_isMutable = isMutable; - m_entryPoints.add(entryPoint); - } - return SLANG_OK; - } + for (Int i = 0; i < inputElementCount; ++i) + { + const InputElementDesc& srcEle = inputElements[i]; + const auto& srcStream = vertexStreams[srcEle.bufferSlotIndex]; + D3D12_INPUT_ELEMENT_DESC& dstEle = elements[i]; - Result reset( - D3D12Device* device, - RootShaderObjectLayoutImpl* layout, - TransientResourceHeapImpl* heap) + // Add text to the buffer + const char* semanticName = srcEle.semanticName; + if (semanticName) { - return resetImpl( - device, layout, &heap->m_stagingCpuViewHeap, &heap->m_stagingCpuSamplerHeap, false); + const int len = int(::strlen(semanticName)); + ::memcpy(textPos, semanticName, len + 1); + semanticName = textPos; + textPos += len + 1; } - protected: - Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) SLANG_OVERRIDE - { - ExtendedShaderObjectTypeList specializationArgs; - SLANG_RETURN_ON_FAIL(collectSpecializationArgs(specializationArgs)); - - // Note: There is an important policy decision being made here that we need - // to approach carefully. - // - // We are doing two different things that affect the layout of a program: - // - // 1. We are *composing* one or more pieces of code (notably the shared global/module - // stuff and the per-entry-point stuff). - // - // 2. We are *specializing* code that includes generic/existential parameters - // to concrete types/values. - // - // We need to decide the relative *order* of these two steps, because of how it impacts - // layout. The layout for `specialize(compose(A,B), X, Y)` is potentially different - // form that of `compose(specialize(A,X), speciealize(B,Y))`, even when both are - // semantically equivalent programs. - // - // Right now we are using the first option: we are first generating a full composition - // of all the code we plan to use (global scope plus all entry points), and then - // specializing it to the concatenated specialization argumenst for all of that. - // - // In some cases, though, this model isn't appropriate. For example, when dealing with - // ray-tracing shaders and local root signatures, we really want the parameters of each - // entry point (actually, each entry-point *group*) to be allocated distinct storage, - // which really means we want to compute something like: - // - // SpecializedGlobals = specialize(compose(ModuleA, ModuleB, ...), X, Y, ...) - // - // SpecializedEP1 = compose(SpecializedGlobals, specialize(EntryPoint1, T, U, ...)) - // SpecializedEP2 = compose(SpecializedGlobals, specialize(EntryPoint2, A, B, ...)) - // - // Note how in this case all entry points agree on the layout for the shared/common - // parmaeters, but their layouts are also independent of one another. - // - // Furthermore, in this example, loading another entry point into the system would not - // rquire re-computing the layouts (or generated kernel code) for any of the entry - // points that had already been loaded (in contrast to a compose-then-specialize - // approach). - // - ComPtr specializedComponentType; - ComPtr diagnosticBlob; - auto result = getLayout()->getSlangProgram()->specialize( - specializationArgs.components.getArrayView().getBuffer(), - specializationArgs.getCount(), - specializedComponentType.writeRef(), - diagnosticBlob.writeRef()); - - if (diagnosticBlob && diagnosticBlob->getBufferSize()) - { - getDebugCallback()->handleMessage( - SLANG_FAILED(result) ? DebugMessageType::Error : DebugMessageType::Info, - DebugMessageSource::Layer, - (const char*)diagnosticBlob->getBufferPointer()); - } + dstEle.SemanticName = semanticName; + dstEle.SemanticIndex = (UINT)srcEle.semanticIndex; + dstEle.Format = D3DUtil::getMapFormat(srcEle.format); + dstEle.InputSlot = (UINT)srcEle.bufferSlotIndex; + dstEle.AlignedByteOffset = (UINT)srcEle.offset; + dstEle.InputSlotClass = D3DUtil::getInputSlotClass(srcStream.slotClass); + dstEle.InstanceDataStepRate = (UINT)srcStream.instanceDataStepRate; + } - if (SLANG_FAILED(result)) - return result; + auto& vertexStreamStrides = layout->m_vertexStreamStrides; + vertexStreamStrides.setCount(vertexStreamCount); + for (Int i = 0; i < vertexStreamCount; ++i) + { + vertexStreamStrides[i] = vertexStreams[i].stride; + } - ComPtr d3dDiagnosticBlob; - auto slangSpecializedLayout = specializedComponentType->getLayout(); - RefPtr specializedLayout; - auto rootLayoutResult = RootShaderObjectLayoutImpl::create( - static_cast(getRenderer()), - specializedComponentType, - slangSpecializedLayout, - specializedLayout.writeRef(), - d3dDiagnosticBlob.writeRef()); + returnComPtr(outLayout, layout); + return SLANG_OK; +} - if (SLANG_FAILED(rootLayoutResult)) - { - return rootLayoutResult; - } +const gfx::DeviceInfo& DeviceImpl::getDeviceInfo() const { return m_info; } - // Note: Computing the layout for the specialized program will have also computed - // the layouts for the entry points, and we really need to attach that information - // to them so that they don't go and try to compute their own specializations. - // - // TODO: Well, if we move to the specialization model described above then maybe - // we *will* want entry points to do their own specialization work... - // - auto entryPointCount = m_entryPoints.getCount(); - for (Index i = 0; i < entryPointCount; ++i) - { - auto entryPointInfo = specializedLayout->getEntryPoint(i); - auto entryPointVars = m_entryPoints[i]; +Result DeviceImpl::readBufferResource( + IBufferResource* bufferIn, size_t offset, size_t size, ISlangBlob** outBlob) +{ - entryPointVars->m_specializedLayout = entryPointInfo.layout; - } + BufferResourceImpl* buffer = static_cast(bufferIn); - returnRefPtrMove(outLayout, specializedLayout); - return SLANG_OK; - } + const size_t bufferSize = buffer->getDesc()->sizeInBytes; - List> m_entryPoints; - }; + // This will be slow!!! - it blocks CPU on GPU completion + D3D12Resource& resource = buffer->m_resource; - class MutableRootShaderObjectImpl : public RootShaderObjectImpl + D3D12Resource stageBuf; + if (buffer->getDesc()->memoryType != MemoryType::ReadBack) { - public: - // Override default reference counting behavior to disable lifetime management via ComPtr. - // Root objects are managed by command buffer and does not need to be freed by the user. - SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return ShaderObjectBase::addRef(); } - SLANG_NO_THROW uint32_t SLANG_MCALL release() override - { - return ShaderObjectBase::release(); - } - }; + auto encodeInfo = encodeResourceCommands(); - class ShaderTableImpl : public ShaderTableBase - { - public: - uint32_t m_rayGenTableOffset; - uint32_t m_missTableOffset; - uint32_t m_hitGroupTableOffset; - - D3D12Device* m_device; - - virtual RefPtr createDeviceBuffer( - PipelineStateBase* pipeline, - TransientResourceHeapBase* transientHeap, - IResourceCommandEncoder* encoder) override - { - uint32_t raygenTableSize = m_rayGenShaderCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - uint32_t missTableSize = m_missShaderCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - uint32_t hitgroupTableSize = m_hitGroupCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - m_rayGenTableOffset = 0; - m_missTableOffset = - (uint32_t)D3DUtil::calcAligned(raygenTableSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); - m_hitGroupTableOffset = (uint32_t)D3DUtil::calcAligned( - m_missTableOffset + missTableSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); - uint32_t tableSize = m_hitGroupTableOffset + hitgroupTableSize; - - auto pipelineImpl = static_cast(pipeline); - ComPtr bufferResource; - IBufferResource::Desc bufferDesc = {}; - bufferDesc.memoryType = gfx::MemoryType::DeviceLocal; - bufferDesc.defaultState = ResourceState::General; - bufferDesc.type = IResource::Type::Buffer; - bufferDesc.sizeInBytes = tableSize; - m_device->createBufferResource(bufferDesc, nullptr, bufferResource.writeRef()); - - ComPtr stateObjectProperties; - pipelineImpl->m_stateObject->QueryInterface(stateObjectProperties.writeRef()); - - TransientResourceHeapImpl* transientHeapImpl = - static_cast(transientHeap); - - IBufferResource* stagingBuffer = nullptr; - size_t stagingBufferOffset = 0; - transientHeapImpl->allocateStagingBuffer( - tableSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload); - - assert(stagingBuffer); - void* stagingPtr = nullptr; - stagingBuffer->map(nullptr, &stagingPtr); - - auto copyShaderIdInto = [&](void* dest, String& name, const ShaderRecordOverwrite& overwrite) - { - if (name.getLength()) - { - void* shaderId = stateObjectProperties->GetShaderIdentifier(name.toWString().begin()); - memcpy(dest, shaderId, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); - } - else - { - memset(dest, 0, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); - } - if (overwrite.size) - { - memcpy((uint8_t*)dest + overwrite.offset, overwrite.data, overwrite.size); - } - }; + // Readback heap + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr + stagingBufferOffset; - for (uint32_t i = 0; i < m_rayGenShaderCount; i++) - { - copyShaderIdInto( - stagingBufferPtr + m_rayGenTableOffset + - D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, - m_shaderGroupNames[i], - m_recordOverwrites[i]); - } - for (uint32_t i = 0; i < m_missShaderCount; i++) - { - copyShaderIdInto( - stagingBufferPtr + m_missTableOffset + - D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, - m_shaderGroupNames[m_rayGenShaderCount + i], - m_recordOverwrites[m_rayGenShaderCount + i]); - } - for (uint32_t i = 0; i < m_hitGroupCount; i++) - { - copyShaderIdInto( - stagingBufferPtr + m_hitGroupTableOffset + - D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, - m_shaderGroupNames[m_rayGenShaderCount + m_missShaderCount + i], - m_recordOverwrites[m_rayGenShaderCount + m_missShaderCount + i]); - } + // Resource to readback to + D3D12_RESOURCE_DESC stagingDesc; + initBufferResourceDesc(size, stagingDesc); - stagingBuffer->unmap(nullptr); - encoder->copyBuffer(bufferResource, 0, stagingBuffer, stagingBufferOffset, tableSize); - encoder->bufferBarrier( - 1, - bufferResource.readRef(), - gfx::ResourceState::CopyDestination, - gfx::ResourceState::ShaderResource); - RefPtr resultPtr = static_cast(bufferResource.get()); - return _Move(resultPtr); - } + SLANG_RETURN_ON_FAIL(stageBuf.initCommitted( + m_device, + heapProps, + D3D12_HEAP_FLAG_NONE, + stagingDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr)); - }; + // Do the copy + encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, offset, size); - class CommandBufferImpl - : public ICommandBuffer - , public ComObject - { - public: - // There are a pair of cyclic references between a `TransientResourceHeap` and - // a `CommandBuffer` created from the heap. We need to break the cycle upon - // the public reference count of a command buffer dropping to 0. - SLANG_COM_OBJECT_IUNKNOWN_ALL + // Wait until complete + submitResourceCommandsAndWait(encodeInfo); + } - ICommandBuffer* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer) - return static_cast(this); - return nullptr; - } - virtual void comFree() override { m_transientHeap.breakStrongReference(); } + D3D12Resource& stageBufRef = + buffer->getDesc()->memoryType != MemoryType::ReadBack ? stageBuf : resource; - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* handle) override - { - handle->api = InteropHandleAPI::D3D12; - handle->handleValue = (uint64_t)m_cmdList.get(); - return SLANG_OK; - } + // Map and copy + RefPtr blob = new ListBlob(); + { + UINT8* data; + D3D12_RANGE readRange = {0, size}; + + SLANG_RETURN_ON_FAIL( + stageBufRef.getResource()->Map(0, &readRange, reinterpret_cast(&data))); - public: - ComPtr m_cmdList; - ComPtr m_cmdList1; - ComPtr m_cmdList4; + // Copy to memory buffer + blob->m_data.setCount(size); + ::memcpy(blob->m_data.getBuffer(), data, size); - BreakableReference m_transientHeap; - // Weak reference is fine here since `m_transientHeap` already holds strong reference to - // device. - D3D12Device* m_renderer; - RootShaderObjectImpl m_rootShaderObject; - RefPtr m_mutableRootShaderObject; - bool m_descriptorHeapsBound = false; + stageBufRef.getResource()->Unmap(0, nullptr); + } + returnComPtr(outBlob, blob); + return SLANG_OK; +} - void bindDescriptorHeaps() +Result DeviceImpl::createProgram( + const IShaderProgram::Desc& desc, IShaderProgram** outProgram, ISlangBlob** outDiagnosticBlob) +{ + RefPtr shaderProgram = new ShaderProgramImpl(); + shaderProgram->init(desc); + ComPtr d3dDiagnosticBlob; + auto rootShaderLayoutResult = RootShaderObjectLayoutImpl::create( + this, + shaderProgram->linkedProgram, + shaderProgram->linkedProgram->getLayout(), + shaderProgram->m_rootObjectLayout.writeRef(), + d3dDiagnosticBlob.writeRef()); + if (!SLANG_SUCCEEDED(rootShaderLayoutResult)) + { + if (outDiagnosticBlob && d3dDiagnosticBlob) { - if (!m_descriptorHeapsBound) - { - ID3D12DescriptorHeap* heaps[] = { - m_transientHeap->getCurrentViewHeap().getHeap(), - m_transientHeap->getCurrentSamplerHeap().getHeap(), - }; - m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); - m_descriptorHeapsBound = true; - } + RefPtr diagnosticBlob = + new StringBlob(String((const char*)d3dDiagnosticBlob->GetBufferPointer())); + returnComPtr(outDiagnosticBlob, diagnosticBlob); } + return rootShaderLayoutResult; + } + returnComPtr(outProgram, shaderProgram); + return SLANG_OK; +} - void invalidateDescriptorHeapBinding() { m_descriptorHeapsBound = false; } +Result DeviceImpl::createShaderObjectLayout( + slang::TypeLayoutReflection* typeLayout, ShaderObjectLayoutBase** outLayout) +{ + RefPtr layout; + SLANG_RETURN_ON_FAIL( + ShaderObjectLayoutImpl::createForElementType(this, typeLayout, layout.writeRef())); + returnRefPtrMove(outLayout, layout); + return SLANG_OK; +} - void reinit() - { - invalidateDescriptorHeapBinding(); - m_rootShaderObject.init(m_renderer); - } +Result DeviceImpl::createShaderObject(ShaderObjectLayoutBase* layout, IShaderObject** outObject) +{ + RefPtr shaderObject; + SLANG_RETURN_ON_FAIL(ShaderObjectImpl::create( + this, reinterpret_cast(layout), shaderObject.writeRef())); + returnComPtr(outObject, shaderObject); + return SLANG_OK; +} - void init( - D3D12Device* renderer, - ID3D12GraphicsCommandList* d3dCommandList, - TransientResourceHeapImpl* transientHeap) - { - m_transientHeap = transientHeap; - m_renderer = renderer; - m_cmdList = d3dCommandList; +Result DeviceImpl::createMutableShaderObject( + ShaderObjectLayoutBase* layout, IShaderObject** outObject) +{ + auto result = createShaderObject(layout, outObject); + SLANG_RETURN_ON_FAIL(result); + static_cast(*outObject)->m_isMutable = true; + return result; +} - reinit(); +Result DeviceImpl::createMutableRootShaderObject(IShaderProgram* program, IShaderObject** outObject) +{ + RefPtr result = new MutableRootShaderObjectImpl(); + result->init(this); + auto programImpl = static_cast(program); + result->resetImpl( + this, programImpl->m_rootObjectLayout, m_cpuViewHeap.Ptr(), m_cpuSamplerHeap.Ptr(), true); + returnComPtr(outObject, result); + return SLANG_OK; +} -#if SLANG_GFX_HAS_DXR_SUPPORT - m_cmdList->QueryInterface(m_cmdList4.writeRef()); - if (m_cmdList4) - { - m_cmdList1 = m_cmdList4; - return; - } -#endif - m_cmdList->QueryInterface(m_cmdList1.writeRef()); - } +Result DeviceImpl::createShaderTable(const IShaderTable::Desc& desc, IShaderTable** outShaderTable) +{ + RefPtr result = new ShaderTableImpl(); + result->m_device = this; + result->init(desc); + returnComPtr(outShaderTable, result); + return SLANG_OK; +} - - class ResourceCommandEncoderImpl - : public IResourceCommandEncoder - , public PipelineCommandEncoder - { - public: - virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( - IBufferResource* dst, - size_t dstOffset, - IBufferResource* src, - size_t srcOffset, - size_t size) override - { - auto dstBuffer = static_cast(dst); - auto srcBuffer = static_cast(src); - - m_commandBuffer->m_cmdList->CopyBufferRegion( - dstBuffer->m_resource.getResource(), - dstOffset, - srcBuffer->m_resource.getResource(), - srcOffset, - size); - } - virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferData( - IBufferResource* dst, size_t offset, size_t size, void* data) override - { - _uploadBufferData( - m_commandBuffer->m_renderer->m_device, - m_commandBuffer->m_cmdList, - m_commandBuffer->m_transientHeap, - static_cast(dst), - offset, - size, - data); - } - virtual SLANG_NO_THROW void SLANG_MCALL textureBarrier( - size_t count, - ITextureResource* const* textures, - ResourceState src, - ResourceState dst) override - { - ShortList barriers; +Result DeviceImpl::createGraphicsPipelineState( + const GraphicsPipelineStateDesc& desc, IPipelineState** outState) +{ + RefPtr pipelineStateImpl = new PipelineStateImpl(this); + pipelineStateImpl->init(desc); + returnComPtr(outState, pipelineStateImpl); + return SLANG_OK; +} - for (size_t i = 0; i < count; i++) - { - auto textureImpl = static_cast(textures[i]); - auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format); - auto textureDesc = textureImpl->getDesc(); - D3D12_RESOURCE_BARRIER barrier; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - if (src == dst && src == ResourceState::UnorderedAccess) - { - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - barrier.UAV.pResource = textureImpl->m_resource.getResource(); - } - else - { - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3DUtil::getResourceState(src); - barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); - if (barrier.Transition.StateBefore == barrier.Transition.StateAfter) - continue; - barrier.Transition.pResource = textureImpl->m_resource.getResource(); - auto planeCount = D3DUtil::getPlaneSliceCount( - D3DUtil::getMapFormat(textureImpl->getDesc()->format)); - auto arraySize = textureDesc->arraySize; - if (arraySize == 0) - arraySize = 1; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - } - barriers.add(barrier); - } - if (barriers.getCount()) - { - m_commandBuffer->m_cmdList->ResourceBarrier( - (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); - } - } - virtual SLANG_NO_THROW void SLANG_MCALL bufferBarrier( - size_t count, - IBufferResource* const* buffers, - ResourceState src, - ResourceState dst) override - { +Result DeviceImpl::createComputePipelineState( + const ComputePipelineStateDesc& desc, IPipelineState** outState) +{ + RefPtr pipelineStateImpl = new PipelineStateImpl(this); + pipelineStateImpl->init(desc); + returnComPtr(outState, pipelineStateImpl); + return SLANG_OK; +} - List barriers; - barriers.reserve(count); +DeviceImpl::ResourceCommandRecordInfo DeviceImpl::encodeResourceCommands() +{ + ResourceCommandRecordInfo info; + m_resourceCommandTransientHeap->createCommandBuffer(info.commandBuffer.writeRef()); + info.d3dCommandList = static_cast(info.commandBuffer.get())->m_cmdList; + return info; +} - for (size_t i = 0; i < count; i++) - { - auto bufferImpl = static_cast(buffers[i]); +void DeviceImpl::submitResourceCommandsAndWait(const DeviceImpl::ResourceCommandRecordInfo& info) +{ + info.commandBuffer->close(); + m_resourceCommandQueue->executeCommandBuffer(info.commandBuffer); + m_resourceCommandTransientHeap->synchronizeAndReset(); +} - D3D12_RESOURCE_BARRIER barrier = {}; - // If the src == dst, it must be a UAV barrier. - barrier.Type = (src == dst && dst == ResourceState::UnorderedAccess) - ? D3D12_RESOURCE_BARRIER_TYPE_UAV - : D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; +Result DeviceImpl::createQueryPool(const IQueryPool::Desc& desc, IQueryPool** outState) +{ + switch (desc.type) + { + case QueryType::AccelerationStructureCompactedSize: + case QueryType::AccelerationStructureSerializedSize: + case QueryType::AccelerationStructureCurrentSize: + { + RefPtr queryPoolImpl = + new PlainBufferProxyQueryPoolImpl(); + uint32_t stride = 8; + if (desc.type == QueryType::AccelerationStructureSerializedSize) + stride = 16; + SLANG_RETURN_ON_FAIL(queryPoolImpl->init(desc, this, stride)); + returnComPtr(outState, queryPoolImpl); + return SLANG_OK; + } + default: + { + RefPtr queryPoolImpl = new QueryPoolImpl(); + SLANG_RETURN_ON_FAIL(queryPoolImpl->init(desc, this)); + returnComPtr(outState, queryPoolImpl); + return SLANG_OK; + } + } +} - if (barrier.Type == D3D12_RESOURCE_BARRIER_TYPE_UAV) - { - barrier.UAV.pResource = bufferImpl->m_resource; - } - else - { - barrier.Transition.pResource = bufferImpl->m_resource; - barrier.Transition.StateBefore = D3DUtil::getResourceState(src); - barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); - barrier.Transition.Subresource = 0; - if (barrier.Transition.StateAfter == barrier.Transition.StateBefore) - continue; - } - barriers.add(barrier); - } - if (barriers.getCount()) - { - m_commandBuffer->m_cmdList4->ResourceBarrier( - (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); - } - } - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} - virtual SLANG_NO_THROW void SLANG_MCALL - writeTimestamp(IQueryPool* pool, SlangInt index) override - { - static_cast(pool)->writeTimestamp( - m_commandBuffer->m_cmdList, index); - } - virtual SLANG_NO_THROW void SLANG_MCALL copyTexture( - ITextureResource* dst, - ResourceState dstState, - SubresourceRange dstSubresource, - ITextureResource::Offset3D dstOffset, - ITextureResource* src, - ResourceState srcState, - SubresourceRange srcSubresource, - ITextureResource::Offset3D srcOffset, - ITextureResource::Size extent) override - { - auto dstTexture = static_cast(dst); - auto srcTexture = static_cast(src); +Result DeviceImpl::createFence(const IFence::Desc& desc, IFence** outFence) +{ + RefPtr fence = new FenceImpl(); + SLANG_RETURN_ON_FAIL(fence->init(this, desc)); + returnComPtr(outFence, fence); + return SLANG_OK; +} - if (dstSubresource.layerCount == 0 && dstSubresource.mipLevelCount == 0 && - srcSubresource.layerCount == 0 && srcSubresource.mipLevelCount == 0) - { - m_commandBuffer->m_cmdList->CopyResource( - dstTexture->m_resource.getResource(), srcTexture->m_resource.getResource()); - return; - } +Result DeviceImpl::waitForFences( + uint32_t fenceCount, IFence** fences, uint64_t* fenceValues, bool waitForAll, uint64_t timeout) +{ + ShortList waitHandles; + for (uint32_t i = 0; i < fenceCount; ++i) + { + auto fenceImpl = static_cast(fences[i]); + waitHandles.add(fenceImpl->getWaitEvent()); + SLANG_RETURN_ON_FAIL( + fenceImpl->m_fence->SetEventOnCompletion(fenceValues[i], fenceImpl->getWaitEvent())); + } + auto result = WaitForMultipleObjects( + fenceCount, + waitHandles.getArrayView().getBuffer(), + waitForAll ? TRUE : FALSE, + timeout == kTimeoutInfinite ? INFINITE : (DWORD)(timeout / 1000000)); + if (result == WAIT_TIMEOUT) + return SLANG_E_TIME_OUT; + return result == WAIT_FAILED ? SLANG_FAIL : SLANG_OK; +} - auto d3dFormat = D3DUtil::getMapFormat(dstTexture->getDesc()->format); - auto aspectMask = (int32_t)dstSubresource.aspectMask; - if (dstSubresource.aspectMask == TextureAspect::Default) - aspectMask = (int32_t)TextureAspect::Color; - while (aspectMask) - { - auto aspect = Math::getLowestBit((int32_t)aspectMask); - aspectMask &= ~aspect; - auto planeIndex = D3DUtil::getPlaneSlice(d3dFormat, (TextureAspect)aspect); - for (uint32_t layer = 0; layer < dstSubresource.layerCount; layer++) - { - for (uint32_t mipLevel = 0; mipLevel < dstSubresource.mipLevelCount; - mipLevel++) - { - D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; - - dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dstRegion.pResource = dstTexture->m_resource.getResource(); - dstRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( - dstSubresource.mipLevel + mipLevel, - dstSubresource.baseArrayLayer + layer, - planeIndex, - dstTexture->getDesc()->numMipLevels, - dstTexture->getDesc()->arraySize); - - D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; - srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - srcRegion.pResource = srcTexture->m_resource.getResource(); - srcRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( - srcSubresource.mipLevel + mipLevel, - srcSubresource.baseArrayLayer + layer, - planeIndex, - srcTexture->getDesc()->numMipLevels, - srcTexture->getDesc()->arraySize); - - D3D12_BOX srcBox = {}; - srcBox.left = srcOffset.x; - srcBox.top = srcOffset.y; - srcBox.front = srcOffset.z; - srcBox.right = srcBox.left + extent.width; - srcBox.bottom = srcBox.top + extent.height; - srcBox.back = srcBox.front + extent.depth; - - m_commandBuffer->m_cmdList->CopyTextureRegion( - &dstRegion, - dstOffset.x, - dstOffset.y, - dstOffset.z, - &srcRegion, - &srcBox); - } - } - } - } +Result DeviceImpl::getAccelerationStructurePrebuildInfo( + const IAccelerationStructure::BuildInputs& buildInputs, + IAccelerationStructure::PrebuildInfo* outPrebuildInfo) +{ + if (!m_device5) + return SLANG_E_NOT_AVAILABLE; - virtual SLANG_NO_THROW void SLANG_MCALL uploadTextureData( - ITextureResource* dst, - SubresourceRange subResourceRange, - ITextureResource::Offset3D offset, - ITextureResource::Size extent, - ITextureResource::SubresourceData* subResourceData, - size_t subResourceDataCount) override - { - auto dstTexture = static_cast(dst); - auto baseSubresourceIndex = D3DUtil::getSubresourceIndex( - subResourceRange.mipLevel, - subResourceRange.baseArrayLayer, - 0, - dstTexture->getDesc()->numMipLevels, - dstTexture->getDesc()->arraySize); - auto textureSize = dstTexture->getDesc()->size; - FormatInfo formatInfo = {}; - gfxGetFormatInfo(dstTexture->getDesc()->format, &formatInfo); - for (uint32_t i = 0; i < (uint32_t)subResourceDataCount; i++) - { - auto subresourceIndex = baseSubresourceIndex + i; - // Get the footprint - D3D12_RESOURCE_DESC texDesc = dstTexture->m_resource.getResource()->GetDesc(); - - D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; - - dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dstRegion.SubresourceIndex = subresourceIndex; - dstRegion.pResource = dstTexture->m_resource.getResource(); - - D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; - srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint = srcRegion.PlacedFootprint; - footprint.Offset = 0; - footprint.Footprint.Format = texDesc.Format; - uint32_t mipLevel = D3DUtil::getSubresourceMipLevel( - subresourceIndex, dstTexture->getDesc()->numMipLevels); - if (extent.width != ITextureResource::kRemainingTextureSize) - { - footprint.Footprint.Width = extent.width; - } - else - { - footprint.Footprint.Width = - Math::Max(1, (textureSize.width >> mipLevel)) - offset.x; - } - if (extent.height != ITextureResource::kRemainingTextureSize) - { - footprint.Footprint.Height = extent.height; - } - else - { - footprint.Footprint.Height = - Math::Max(1, (textureSize.height >> mipLevel)) - offset.y; - } - if (extent.depth != ITextureResource::kRemainingTextureSize) - { - footprint.Footprint.Depth = extent.depth; - } - else - { - footprint.Footprint.Depth = - Math::Max(1, (textureSize.depth >> mipLevel)) - offset.z; - } - auto rowSize = (footprint.Footprint.Width + formatInfo.blockWidth - 1) / - formatInfo.blockWidth * formatInfo.blockSizeInBytes; - auto rowCount = (footprint.Footprint.Height + formatInfo.blockHeight - 1) / - formatInfo.blockHeight; - footprint.Footprint.RowPitch = (UINT)D3DUtil::calcAligned( - rowSize, (uint32_t)D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - - auto bufferSize = - footprint.Footprint.RowPitch * rowCount * footprint.Footprint.Depth; - - IBufferResource* stagingBuffer; - size_t stagingBufferOffset = 0; - m_commandBuffer->m_transientHeap->allocateStagingBuffer( - bufferSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload, true); - assert(stagingBufferOffset == 0); - BufferResourceImpl* bufferImpl = - static_cast(stagingBuffer); - uint8_t* bufferData = nullptr; - D3D12_RANGE mapRange = {0, 0}; - bufferImpl->m_resource.getResource()->Map(0, &mapRange, (void**)&bufferData); - for (uint32_t z = 0; z < footprint.Footprint.Depth; z++) - { - auto imageStart = - bufferData + footprint.Footprint.RowPitch * rowCount * (size_t)z; - auto srcData = - (uint8_t*)subResourceData->data + subResourceData->strideZ * z; - for (uint32_t row = 0; row < rowCount; row++) - { - memcpy( - imageStart + row * (size_t)footprint.Footprint.RowPitch, - srcData + subResourceData->strideY * row, - rowSize); - } - } - bufferImpl->m_resource.getResource()->Unmap(0, nullptr); - srcRegion.pResource = bufferImpl->m_resource.getResource(); - m_commandBuffer->m_cmdList->CopyTextureRegion( - &dstRegion, offset.x, offset.y, offset.z, &srcRegion, nullptr); - } - } + D3DAccelerationStructureInputsBuilder inputsBuilder; + SLANG_RETURN_ON_FAIL(inputsBuilder.build(buildInputs, getDebugCallback())); - virtual SLANG_NO_THROW void SLANG_MCALL clearResourceView( - IResourceView* view, - ClearValue* clearValue, - ClearResourceViewFlags::Enum flags) override - { - auto viewImpl = static_cast(view); - switch (view->getViewDesc()->type) - { - case IResourceView::Type::RenderTarget: - m_commandBuffer->m_cmdList->ClearRenderTargetView( - viewImpl->m_descriptor.cpuHandle, - clearValue->color.floatValues, - 0, - nullptr); - break; - case IResourceView::Type::DepthStencil: - { - D3D12_CLEAR_FLAGS clearFlags = (D3D12_CLEAR_FLAGS)0; - if (flags & ClearResourceViewFlags::ClearDepth) - { - clearFlags |= D3D12_CLEAR_FLAG_DEPTH; - } - if (flags & ClearResourceViewFlags::ClearStencil) - { - clearFlags |= D3D12_CLEAR_FLAG_STENCIL; - } - m_commandBuffer->m_cmdList->ClearDepthStencilView( - viewImpl->m_descriptor.cpuHandle, - clearFlags, - clearValue->depthStencil.depth, - (UINT8)clearValue->depthStencil.stencil, - 0, - nullptr); - break; - } - case IResourceView::Type::UnorderedAccess: - { - ID3D12Resource* d3dResource = nullptr; - switch (viewImpl->m_resource->getType()) - { - case IResource::Type::Buffer: - d3dResource = - static_cast(viewImpl->m_resource.Ptr()) - ->m_resource.getResource(); - break; - default: - d3dResource = - static_cast(viewImpl->m_resource.Ptr()) - ->m_resource.getResource(); - break; - } - auto gpuHandleIndex = - m_commandBuffer->m_transientHeap->getCurrentViewHeap().allocate(1); - if (gpuHandleIndex == -1) - { - m_commandBuffer->m_transientHeap->allocateNewViewDescriptorHeap( - m_commandBuffer->m_renderer); - gpuHandleIndex = - m_commandBuffer->m_transientHeap->getCurrentViewHeap().allocate(1); - m_commandBuffer->bindDescriptorHeaps(); - } - this->m_commandBuffer->m_renderer->m_device->CopyDescriptorsSimple( - 1, - m_commandBuffer->m_transientHeap->getCurrentViewHeap().getCpuHandle( - gpuHandleIndex), - viewImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - - if (flags & ClearResourceViewFlags::FloatClearValues) - { - m_commandBuffer->m_cmdList->ClearUnorderedAccessViewFloat( - m_commandBuffer->m_transientHeap->getCurrentViewHeap().getGpuHandle( - gpuHandleIndex), - viewImpl->m_descriptor.cpuHandle, - d3dResource, - clearValue->color.floatValues, - 0, - nullptr); - } - else - { - m_commandBuffer->m_cmdList->ClearUnorderedAccessViewUint( - m_commandBuffer->m_transientHeap->getCurrentViewHeap().getGpuHandle( - gpuHandleIndex), - viewImpl->m_descriptor.cpuHandle, - d3dResource, - clearValue->color.uintValues, - 0, - nullptr); - } - break; - } - default: - break; - } - } + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo; + m_device5->GetRaytracingAccelerationStructurePrebuildInfo(&inputsBuilder.desc, &prebuildInfo); - virtual SLANG_NO_THROW void SLANG_MCALL resolveResource( - ITextureResource* source, - ResourceState sourceState, - SubresourceRange sourceRange, - ITextureResource* dest, - ResourceState destState, - SubresourceRange destRange) override - { - auto srcTexture = static_cast(source); - auto srcDesc = srcTexture->getDesc(); - auto dstTexture = static_cast(dest); - auto dstDesc = dstTexture->getDesc(); + outPrebuildInfo->resultDataMaxSize = prebuildInfo.ResultDataMaxSizeInBytes; + outPrebuildInfo->scratchDataSize = prebuildInfo.ScratchDataSizeInBytes; + outPrebuildInfo->updateScratchDataSize = prebuildInfo.UpdateScratchDataSizeInBytes; + return SLANG_OK; +} - for (uint32_t layer = 0; layer < sourceRange.layerCount; ++layer) - { - for (uint32_t mip = 0; mip < sourceRange.mipLevelCount; ++mip) - { - auto srcSubresourceIndex = D3DUtil::getSubresourceIndex( - mip + sourceRange.mipLevel, - layer + sourceRange.baseArrayLayer, - 0, - srcDesc->numMipLevels, - srcDesc->arraySize); - auto dstSubresourceIndex = D3DUtil::getSubresourceIndex( - mip + destRange.mipLevel, - layer + destRange.baseArrayLayer, - 0, - dstDesc->numMipLevels, - dstDesc->arraySize); - - DXGI_FORMAT format = D3DUtil::getMapFormat(srcDesc->format); - - m_commandBuffer->m_cmdList->ResolveSubresource( - dstTexture->m_resource.getResource(), - dstSubresourceIndex, - srcTexture->m_resource.getResource(), - srcSubresourceIndex, - format); - } - } - } +Result DeviceImpl::createAccelerationStructure( + const IAccelerationStructure::CreateDesc& desc, IAccelerationStructure** outAS) +{ +#if SLANG_GFX_HAS_DXR_SUPPORT + RefPtr result = new AccelerationStructureImpl(); + result->m_device5 = m_device5; + result->m_buffer = static_cast(desc.buffer); + result->m_size = desc.size; + result->m_offset = desc.offset; + result->m_allocator = m_cpuViewHeap; + result->m_desc.type = IResourceView::Type::AccelerationStructure; + SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&result->m_descriptor)); + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.RaytracingAccelerationStructure.Location = + result->m_buffer->getDeviceAddress() + desc.offset; + m_device->CreateShaderResourceView(nullptr, &srvDesc, result->m_descriptor.cpuHandle); + returnComPtr(outAS, result); + return SLANG_OK; +#else + *outAS = nullptr; + return SLANG_FAIL; +#endif +} - virtual SLANG_NO_THROW void SLANG_MCALL resolveQuery( - IQueryPool* queryPool, - uint32_t index, - uint32_t count, - IBufferResource* buffer, - uint64_t offset) override - { - auto queryBase = static_cast(queryPool); - switch (queryBase->m_desc.type) - { - case QueryType::AccelerationStructureCompactedSize: - case QueryType::AccelerationStructureCurrentSize: - case QueryType::AccelerationStructureSerializedSize: - { - auto queryPoolImpl = static_cast(queryPool); - auto bufferImpl = static_cast(buffer); - auto srcQueryBuffer = - queryPoolImpl->m_bufferResource->m_resource.getResource(); - - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier.Transition.pResource = srcQueryBuffer; - m_commandBuffer->m_cmdList->ResourceBarrier(1, &barrier); - - m_commandBuffer->m_cmdList->CopyBufferRegion( - bufferImpl->m_resource.getResource(), - offset, - srcQueryBuffer, - index * sizeof(uint64_t), - count * sizeof(uint64_t)); - - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barrier.Transition.pResource = srcQueryBuffer; - m_commandBuffer->m_cmdList->ResourceBarrier(1, &barrier); - } - break; - default: - { - auto queryPoolImpl = static_cast(queryPool); - auto bufferImpl = static_cast(buffer); - m_commandBuffer->m_cmdList->ResolveQueryData( - queryPoolImpl->m_queryHeap.get(), - queryPoolImpl->m_queryType, - index, - count, - bufferImpl->m_resource.getResource(), - offset); - } - break; - } - } +Result DeviceImpl::createRayTracingPipelineState( + const RayTracingPipelineStateDesc& inDesc, IPipelineState** outState) +{ + if (!m_device5) + { + return SLANG_E_NOT_AVAILABLE; + } - virtual SLANG_NO_THROW void SLANG_MCALL copyTextureToBuffer( - IBufferResource* dst, - size_t dstOffset, - size_t dstSize, - size_t dstRowStride, - ITextureResource* src, - ResourceState srcState, - SubresourceRange srcSubresource, - ITextureResource::Offset3D srcOffset, - ITextureResource::Size extent) override - { - assert(srcSubresource.mipLevelCount <= 1); + RefPtr pipelineStateImpl = new RayTracingPipelineStateImpl(this); + pipelineStateImpl->init(inDesc); + returnComPtr(outState, pipelineStateImpl); + return SLANG_OK; +} - auto srcTexture = static_cast(src); - auto dstBuffer = static_cast(dst); - auto baseSubresourceIndex = D3DUtil::getSubresourceIndex( - srcSubresource.mipLevel, - srcSubresource.baseArrayLayer, - 0, - srcTexture->getDesc()->numMipLevels, - srcTexture->getDesc()->arraySize); - auto textureSize = srcTexture->getDesc()->size; - FormatInfo formatInfo = {}; - gfxGetFormatInfo(srcTexture->getDesc()->format, &formatInfo); - if (srcSubresource.mipLevelCount == 0) - srcSubresource.mipLevelCount = srcTexture->getDesc()->numMipLevels; - if (srcSubresource.layerCount == 0) - srcSubresource.layerCount = srcTexture->getDesc()->arraySize; - - for (uint32_t layer = 0; layer < srcSubresource.layerCount; layer++) - { - // Get the footprint - D3D12_RESOURCE_DESC texDesc = srcTexture->m_resource.getResource()->GetDesc(); - - D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; - dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dstRegion.pResource = dstBuffer->m_resource.getResource(); - D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint = dstRegion.PlacedFootprint; - - D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; - srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - srcRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( - srcSubresource.mipLevel, - layer + srcSubresource.baseArrayLayer, - 0, - srcTexture->getDesc()->numMipLevels, - srcTexture->getDesc()->arraySize); - srcRegion.pResource = srcTexture->m_resource.getResource(); - - footprint.Offset = dstOffset; - footprint.Footprint.Format = texDesc.Format; - uint32_t mipLevel = srcSubresource.mipLevel; - if (extent.width != 0xFFFFFFFF) - { - footprint.Footprint.Width = extent.width; - } - else - { - footprint.Footprint.Width = - Math::Max(1, (textureSize.width >> mipLevel)) - srcOffset.x; - } - if (extent.height != 0xFFFFFFFF) - { - footprint.Footprint.Height = extent.height; - } - else - { - footprint.Footprint.Height = - Math::Max(1, (textureSize.height >> mipLevel)) - srcOffset.y; - } - if (extent.depth != 0xFFFFFFFF) - { - footprint.Footprint.Depth = extent.depth; - } - else - { - footprint.Footprint.Depth = - Math::Max(1, (textureSize.depth >> mipLevel)) - srcOffset.z; - } +Result DeviceImpl::createTransientResourceHeapImpl( + ITransientResourceHeap::Flags::Enum flags, + size_t constantBufferSize, + uint32_t viewDescriptors, + uint32_t samplerDescriptors, + TransientResourceHeapImpl** outHeap) +{ + RefPtr result = new TransientResourceHeapImpl(); + ITransientResourceHeap::Desc desc = {}; + desc.flags = flags; + desc.samplerDescriptorCount = samplerDescriptors; + desc.constantBufferSize = constantBufferSize; + desc.constantBufferDescriptorCount = viewDescriptors; + desc.accelerationStructureDescriptorCount = viewDescriptors; + desc.srvDescriptorCount = viewDescriptors; + desc.uavDescriptorCount = viewDescriptors; + SLANG_RETURN_ON_FAIL(result->init(desc, this, viewDescriptors, samplerDescriptors)); + returnRefPtrMove(outHeap, result); + return SLANG_OK; +} - assert(dstRowStride % D3D12_TEXTURE_DATA_PITCH_ALIGNMENT == 0); - footprint.Footprint.RowPitch = dstRowStride; - - auto bufferSize = footprint.Footprint.RowPitch * footprint.Footprint.Height * - footprint.Footprint.Depth; - - D3D12_BOX srcBox = {}; - srcBox.left = srcOffset.x; - srcBox.top = srcOffset.y; - srcBox.front = srcOffset.z; - srcBox.right = srcOffset.x + extent.width; - srcBox.bottom = srcOffset.y + extent.height; - srcBox.back = srcOffset.z + extent.depth; - m_commandBuffer->m_cmdList->CopyTextureRegion( - &dstRegion, 0, 0, 0, &srcRegion, &srcBox); - } - } +Result DeviceImpl::createCommandQueueImpl(CommandQueueImpl** outQueue) +{ + int queueIndex = m_queueIndexAllocator.alloc(1); + // If we run out of queue index space, then the user is requesting too many queues. + if (queueIndex == -1) + return SLANG_FAIL; - virtual SLANG_NO_THROW void SLANG_MCALL textureSubresourceBarrier( - ITextureResource* texture, - SubresourceRange subresourceRange, - ResourceState src, - ResourceState dst) override - { - auto textureImpl = static_cast(texture); + RefPtr queue = new CommandQueueImpl(); + SLANG_RETURN_ON_FAIL(queue->init(this, (uint32_t)queueIndex)); + returnRefPtrMove(outQueue, queue); + return SLANG_OK; +} - ShortList barriers; - D3D12_RESOURCE_BARRIER barrier; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - if (src == dst && src == ResourceState::UnorderedAccess) - { - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - barrier.UAV.pResource = textureImpl->m_resource.getResource(); - barriers.add(barrier); - } - else - { - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.StateBefore = D3DUtil::getResourceState(src); - barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); - if (barrier.Transition.StateBefore == barrier.Transition.StateAfter) - return; - barrier.Transition.pResource = textureImpl->m_resource.getResource(); - auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format); - auto aspectMask = (int32_t)subresourceRange.aspectMask; - if (subresourceRange.aspectMask == TextureAspect::Default) - aspectMask = (int32_t)TextureAspect::Color; - while (aspectMask) - { - auto aspect = Math::getLowestBit((int32_t)aspectMask); - aspectMask &= ~aspect; - auto planeIndex = D3DUtil::getPlaneSlice(d3dFormat, (TextureAspect)aspect); - for (uint32_t layer = 0; layer < subresourceRange.layerCount; layer++) - { - for (uint32_t mip = 0; mip < subresourceRange.mipLevelCount; mip++) - { - barrier.Transition.Subresource = D3DUtil::getSubresourceIndex( - mip + subresourceRange.mipLevel, - layer + subresourceRange.baseArrayLayer, - planeIndex, - textureImpl->getDesc()->numMipLevels, - textureImpl->getDesc()->arraySize); - barriers.add(barrier); - } - } - } - } - m_commandBuffer->m_cmdList->ResourceBarrier( - (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); - } +PROC DeviceImpl::loadProc(HMODULE module, char const* name) +{ + PROC proc = ::GetProcAddress(module, name); + if (!proc) + { + fprintf(stderr, "error: failed load symbol '%s'\n", name); + return nullptr; + } + return proc; +} - virtual SLANG_NO_THROW void SLANG_MCALL - beginDebugEvent(const char* name, float rgbColor[3]) override - { - auto beginEvent = m_commandBuffer->m_renderer->m_BeginEventOnCommandList; - if (beginEvent) - { - beginEvent( - m_commandBuffer->m_cmdList, - 0xff000000 | (uint8_t(rgbColor[0] * 255.0f) << 16) | - (uint8_t(rgbColor[1] * 255.0f) << 8) | uint8_t(rgbColor[2] * 255.0f), - name); - } - } - virtual SLANG_NO_THROW void SLANG_MCALL endDebugEvent() override - { - auto endEvent = m_commandBuffer->m_renderer->m_EndEventOnCommandList; - if (endEvent) - { - endEvent(m_commandBuffer->m_cmdList); - } - } - }; +DeviceImpl::~DeviceImpl() { m_shaderObjectLayoutCache = decltype(m_shaderObjectLayoutCache)(); } - ResourceCommandEncoderImpl m_resourceCommandEncoder; +struct GraphicsSubmitter : public Submitter +{ + virtual void setRootConstantBufferView( + int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetGraphicsRootConstantBufferView(index, gpuBufferLocation); + } + virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetGraphicsRootUnorderedAccessView(index, gpuBufferLocation); + } + virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetGraphicsRootShaderResourceView(index, gpuBufferLocation); + } + virtual void setRootDescriptorTable( + int index, D3D12_GPU_DESCRIPTOR_HANDLE baseDescriptor) override + { + m_commandList->SetGraphicsRootDescriptorTable(index, baseDescriptor); + } + void setRootSignature(ID3D12RootSignature* rootSignature) + { + m_commandList->SetGraphicsRootSignature(rootSignature); + } + void setRootConstants( + Index rootParamIndex, + Index dstOffsetIn32BitValues, + Index countOf32BitValues, + void const* srcData) override + { + m_commandList->SetGraphicsRoot32BitConstants( + UINT(rootParamIndex), UINT(countOf32BitValues), srcData, UINT(dstOffsetIn32BitValues)); + } + virtual void setPipelineState(PipelineStateBase* pipeline) override + { + auto pipelineImpl = static_cast(pipeline); + m_commandList->SetPipelineState(pipelineImpl->m_pipelineState.get()); + } - virtual SLANG_NO_THROW void SLANG_MCALL - encodeResourceCommands(IResourceCommandEncoder** outEncoder) override - { - m_resourceCommandEncoder.init(this); - *outEncoder = &m_resourceCommandEncoder; - } + GraphicsSubmitter(ID3D12GraphicsCommandList* commandList) + : m_commandList(commandList) + {} - class RenderCommandEncoderImpl - : public IRenderCommandEncoder - , public ResourceCommandEncoderImpl - { - public: - SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) - public: - RefPtr m_renderPass; - RefPtr m_framebuffer; + ID3D12GraphicsCommandList* m_commandList; +}; - List m_boundVertexBuffers; +struct ComputeSubmitter : public Submitter +{ + virtual void setRootConstantBufferView( + int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetComputeRootConstantBufferView(index, gpuBufferLocation); + } + virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetComputeRootUnorderedAccessView(index, gpuBufferLocation); + } + virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) override + { + m_commandList->SetComputeRootShaderResourceView(index, gpuBufferLocation); + } + virtual void setRootDescriptorTable( + int index, D3D12_GPU_DESCRIPTOR_HANDLE baseDescriptor) override + { + m_commandList->SetComputeRootDescriptorTable(index, baseDescriptor); + } + void setRootSignature(ID3D12RootSignature* rootSignature) + { + m_commandList->SetComputeRootSignature(rootSignature); + } + void setRootConstants( + Index rootParamIndex, + Index dstOffsetIn32BitValues, + Index countOf32BitValues, + void const* srcData) override + { + m_commandList->SetComputeRoot32BitConstants( + UINT(rootParamIndex), UINT(countOf32BitValues), srcData, UINT(dstOffsetIn32BitValues)); + } + virtual void setPipelineState(PipelineStateBase* pipeline) override + { + auto pipelineImpl = static_cast(pipeline); + m_commandList->SetPipelineState(pipelineImpl->m_pipelineState.get()); + } + ComputeSubmitter(ID3D12GraphicsCommandList* commandList) + : m_commandList(commandList) + {} - RefPtr m_boundIndexBuffer; + ID3D12GraphicsCommandList* m_commandList; +}; - D3D12_VIEWPORT m_viewports[kMaxRTVCount]; - D3D12_RECT m_scissorRects[kMaxRTVCount]; +BufferResourceImpl::BufferResourceImpl(const Desc& desc) + : Parent(desc) + , m_defaultState(D3DUtil::getResourceState(desc.defaultState)) +{} - DXGI_FORMAT m_boundIndexFormat; - UINT m_boundIndexOffset; +BufferResourceImpl::~BufferResourceImpl() +{ + if (sharedHandle.handleValue != 0) + { + CloseHandle((HANDLE)sharedHandle.handleValue); + } +} - D3D12_PRIMITIVE_TOPOLOGY_TYPE m_primitiveTopologyType; - D3D12_PRIMITIVE_TOPOLOGY m_primitiveTopology; +DeviceAddress BufferResourceImpl::getDeviceAddress() +{ + return (DeviceAddress)m_resource.getResource()->GetGPUVirtualAddress(); +} - void init( - D3D12Device* renderer, - TransientResourceHeapImpl* transientHeap, - CommandBufferImpl* cmdBuffer, - RenderPassLayoutImpl* renderPass, - FramebufferImpl* framebuffer) - { - PipelineCommandEncoder::init(cmdBuffer); - m_preCmdList = nullptr; - m_renderPass = renderPass; - m_framebuffer = framebuffer; - m_transientHeap = transientHeap; - m_boundVertexBuffers.clear(); - m_boundIndexBuffer = nullptr; - m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - m_primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - m_boundIndexFormat = DXGI_FORMAT_UNKNOWN; - m_boundIndexOffset = 0; - m_currentPipeline = nullptr; - - // Set render target states. - if (!framebuffer) - { - return; - } - m_d3dCmdList->OMSetRenderTargets( - (UINT)framebuffer->renderTargetViews.getCount(), - framebuffer->renderTargetDescriptors.getArrayView().getBuffer(), - FALSE, - framebuffer->depthStencilView ? &framebuffer->depthStencilDescriptor : nullptr); - - // Issue clear commands based on render pass set up. - for (Index i = 0; i < framebuffer->renderTargetViews.getCount(); i++) - { - if (i >= renderPass->m_renderTargetAccesses.getCount()) - continue; +Result BufferResourceImpl::getNativeResourceHandle(InteropHandle* outHandle) +{ + outHandle->handleValue = (uint64_t)m_resource.getResource(); + outHandle->api = InteropHandleAPI::D3D12; + return SLANG_OK; +} - auto& access = renderPass->m_renderTargetAccesses[i]; +Result BufferResourceImpl::getSharedHandle(InteropHandle* outHandle) +{ + // Check if a shared handle already exists for this resource. + if (sharedHandle.handleValue != 0) + { + *outHandle = sharedHandle; + return SLANG_OK; + } - // Transit resource states. - { - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = framebuffer->renderTargetViews[i].Ptr(); - if (resourceViewImpl) - { - auto textureResource = static_cast( - resourceViewImpl->m_resource.Ptr()); - if (textureResource) - { - D3D12_RESOURCE_STATES initialState; - if (access.initialState == ResourceState::Undefined) - { - initialState = textureResource->m_defaultState; - } - else - { - initialState = D3DUtil::getResourceState(access.initialState); - } - textureResource->m_resource.transition( - initialState, D3D12_RESOURCE_STATE_RENDER_TARGET, submitter); - } - } - } - // Clear. - if (access.loadOp == IRenderPassLayout::AttachmentLoadOp::Clear) - { - m_d3dCmdList->ClearRenderTargetView( - framebuffer->renderTargetDescriptors[i], - framebuffer->renderTargetClearValues[i].values, - 0, - nullptr); - } - } + // If a shared handle doesn't exist, create one and store it. + ComPtr pDevice; + auto pResource = m_resource.getResource(); + pResource->GetDevice(IID_PPV_ARGS(pDevice.writeRef())); + SLANG_RETURN_ON_FAIL(pDevice->CreateSharedHandle( + pResource, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); + outHandle->api = InteropHandleAPI::D3D12; + sharedHandle = *outHandle; + return SLANG_OK; +} - if (renderPass->m_hasDepthStencil) - { - // Transit resource states. - { - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = framebuffer->depthStencilView.Ptr(); - auto textureResource = - static_cast(resourceViewImpl->m_resource.Ptr()); - D3D12_RESOURCE_STATES initialState; - if (renderPass->m_depthStencilAccess.initialState == - ResourceState::Undefined) - { - initialState = textureResource->m_defaultState; - } - else - { - initialState = D3DUtil::getResourceState( - renderPass->m_depthStencilAccess.initialState); - } - textureResource->m_resource.transition( - initialState, - D3D12_RESOURCE_STATE_DEPTH_WRITE, - submitter); - } - // Clear. - uint32_t clearFlags = 0; - if (renderPass->m_depthStencilAccess.loadOp == - IRenderPassLayout::AttachmentLoadOp::Clear) - { - clearFlags |= D3D12_CLEAR_FLAG_DEPTH; - } - if (renderPass->m_depthStencilAccess.stencilLoadOp == - IRenderPassLayout::AttachmentLoadOp::Clear) - { - clearFlags |= D3D12_CLEAR_FLAG_STENCIL; - } - if (clearFlags) - { - m_d3dCmdList->ClearDepthStencilView( - framebuffer->depthStencilDescriptor, - (D3D12_CLEAR_FLAGS)clearFlags, - framebuffer->depthStencilClearValue.depth, - framebuffer->depthStencilClearValue.stencil, - 0, - nullptr); - } - } - } +Result BufferResourceImpl::map(MemoryRange* rangeToRead, void** outPointer) +{ + D3D12_RANGE range = {}; + if (rangeToRead) + { + range.Begin = (SIZE_T)rangeToRead->offset; + range.End = (SIZE_T)(rangeToRead->offset + rangeToRead->size); + } + SLANG_RETURN_ON_FAIL( + m_resource.getResource()->Map(0, rangeToRead ? &range : nullptr, outPointer)); + return SLANG_OK; +} - virtual SLANG_NO_THROW Result SLANG_MCALL - bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override - { - return bindPipelineImpl(state, outRootObject); - } +Result BufferResourceImpl::unmap(MemoryRange* writtenRange) +{ + D3D12_RANGE range = {}; + if (writtenRange) + { + range.Begin = (SIZE_T)writtenRange->offset; + range.End = (SIZE_T)(writtenRange->offset + writtenRange->size); + } + m_resource.getResource()->Unmap(0, writtenRange ? &range : nullptr); + return SLANG_OK; +} - virtual SLANG_NO_THROW Result SLANG_MCALL - bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) override - { - return bindPipelineWithRootObjectImpl(state, rootObject); - } +Result BufferResourceImpl::setDebugName(const char* name) +{ + Parent::setDebugName(name); + m_resource.setDebugName(name); + return SLANG_OK; +} - virtual SLANG_NO_THROW void SLANG_MCALL - setViewports(uint32_t count, const Viewport* viewports) override - { - static const int kMaxViewports = - D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; - assert(count <= kMaxViewports && count <= kMaxRTVCount); - for (UInt ii = 0; ii < count; ++ii) - { - auto& inViewport = viewports[ii]; - auto& dxViewport = m_viewports[ii]; - - dxViewport.TopLeftX = inViewport.originX; - dxViewport.TopLeftY = inViewport.originY; - dxViewport.Width = inViewport.extentX; - dxViewport.Height = inViewport.extentY; - dxViewport.MinDepth = inViewport.minZ; - dxViewport.MaxDepth = inViewport.maxZ; - } - m_d3dCmdList->RSSetViewports(UINT(count), m_viewports); - } +TextureResourceImpl::TextureResourceImpl(const Desc& desc) + : Parent(desc) + , m_defaultState(D3DUtil::getResourceState(desc.defaultState)) +{} - virtual SLANG_NO_THROW void SLANG_MCALL - setScissorRects(uint32_t count, const ScissorRect* rects) override - { - static const int kMaxScissorRects = - D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; - assert(count <= kMaxScissorRects && count <= kMaxRTVCount); +TextureResourceImpl::~TextureResourceImpl() +{ + if (sharedHandle.handleValue != 0) + { + CloseHandle((HANDLE)sharedHandle.handleValue); + } +} - for (UInt ii = 0; ii < count; ++ii) - { - auto& inRect = rects[ii]; - auto& dxRect = m_scissorRects[ii]; +Result TextureResourceImpl::getNativeResourceHandle(InteropHandle* outHandle) +{ + outHandle->handleValue = (uint64_t)m_resource.getResource(); + outHandle->api = InteropHandleAPI::D3D12; + return SLANG_OK; +} - dxRect.left = LONG(inRect.minX); - dxRect.top = LONG(inRect.minY); - dxRect.right = LONG(inRect.maxX); - dxRect.bottom = LONG(inRect.maxY); - } +Result TextureResourceImpl::getSharedHandle(InteropHandle* outHandle) +{ + // Check if a shared handle already exists for this resource. + if (sharedHandle.handleValue != 0) + { + *outHandle = sharedHandle; + return SLANG_OK; + } - m_d3dCmdList->RSSetScissorRects(UINT(count), m_scissorRects); - } + // If a shared handle doesn't exist, create one and store it. + ComPtr pDevice; + auto pResource = m_resource.getResource(); + pResource->GetDevice(IID_PPV_ARGS(pDevice.writeRef())); + SLANG_RETURN_ON_FAIL(pDevice->CreateSharedHandle( + pResource, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); + outHandle->api = InteropHandleAPI::D3D12; + return SLANG_OK; +} - virtual SLANG_NO_THROW void SLANG_MCALL - setPrimitiveTopology(PrimitiveTopology topology) override - { - m_primitiveTopologyType = D3DUtil::getPrimitiveType(topology); - m_primitiveTopology = D3DUtil::getPrimitiveTopology(topology); - } +Result TextureResourceImpl::setDebugName(const char* name) +{ + Parent::setDebugName(name); + m_resource.setDebugName(name); + return SLANG_OK; +} - virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( - uint32_t startSlot, - uint32_t slotCount, - IBufferResource* const* buffers, - const uint32_t* offsets) override - { - { - const Index num = startSlot + slotCount; - if (num > m_boundVertexBuffers.getCount()) - { - m_boundVertexBuffers.setCount(num); - } - } - - for (UInt i = 0; i < slotCount; i++) - { - BufferResourceImpl* buffer = static_cast(buffers[i]); - - BoundVertexBuffer& boundBuffer = m_boundVertexBuffers[startSlot + i]; - boundBuffer.m_buffer = buffer; - boundBuffer.m_offset = int(offsets[i]); - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL setIndexBuffer( - IBufferResource* buffer, Format indexFormat, uint32_t offset = 0) override - { - m_boundIndexBuffer = (BufferResourceImpl*)buffer; - m_boundIndexFormat = D3DUtil::getMapFormat(indexFormat); - m_boundIndexOffset = offset; - } - - void prepareDraw() - { - auto pipelineState = m_currentPipeline.Ptr(); - if (!pipelineState || (pipelineState->desc.type != PipelineType::Graphics)) - { - assert(!"No graphics pipeline state set"); - return; - } - - // Submit - setting for graphics - { - GraphicsSubmitter submitter(m_d3dCmdList); - RefPtr newPipeline; - if(SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) - { - assert(!"Failed to bind render state"); - } - } - - m_d3dCmdList->IASetPrimitiveTopology(m_primitiveTopology); - - // Set up vertex buffer views - { - auto inputLayout = (InputLayoutImpl*)pipelineState->inputLayout.Ptr(); - if (inputLayout) - { - int numVertexViews = 0; - D3D12_VERTEX_BUFFER_VIEW vertexViews[16]; - for (Index i = 0; i < m_boundVertexBuffers.getCount(); i++) - { - const BoundVertexBuffer& boundVertexBuffer = m_boundVertexBuffers[i]; - BufferResourceImpl* buffer = boundVertexBuffer.m_buffer; - if (buffer) - { - D3D12_VERTEX_BUFFER_VIEW& vertexView = - vertexViews[numVertexViews++]; - vertexView.BufferLocation = - buffer->m_resource.getResource()->GetGPUVirtualAddress() + - boundVertexBuffer.m_offset; - vertexView.SizeInBytes = UINT( - buffer->getDesc()->sizeInBytes - boundVertexBuffer.m_offset); - vertexView.StrideInBytes = inputLayout->m_vertexStreamStrides[i]; - } - } - m_d3dCmdList->IASetVertexBuffers(0, numVertexViews, vertexViews); - } - } - // Set up index buffer - if (m_boundIndexBuffer) - { - D3D12_INDEX_BUFFER_VIEW indexBufferView; - indexBufferView.BufferLocation = - m_boundIndexBuffer->m_resource.getResource()->GetGPUVirtualAddress() + - m_boundIndexOffset; - indexBufferView.SizeInBytes = - UINT(m_boundIndexBuffer->getDesc()->sizeInBytes - m_boundIndexOffset); - indexBufferView.Format = m_boundIndexFormat; - - m_d3dCmdList->IASetIndexBuffer(&indexBufferView); - } - } - virtual SLANG_NO_THROW void SLANG_MCALL - draw(uint32_t vertexCount, uint32_t startVertex = 0) override - { - prepareDraw(); - m_d3dCmdList->DrawInstanced(vertexCount, 1, startVertex, 0); - } - virtual SLANG_NO_THROW void SLANG_MCALL drawIndexed( - uint32_t indexCount, uint32_t startIndex = 0, uint32_t baseVertex = 0) override - { - prepareDraw(); - m_d3dCmdList->DrawIndexedInstanced(indexCount, 1, startIndex, baseVertex, 0); - } - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override - { - PipelineCommandEncoder::endEncodingImpl(); - if (!m_framebuffer) - return; - // Issue clear commands based on render pass set up. - for (Index i = 0; i < m_renderPass->m_renderTargetAccesses.getCount(); i++) - { - auto& access = m_renderPass->m_renderTargetAccesses[i]; - - // Transit resource states. - { - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = m_framebuffer->renderTargetViews[i].Ptr(); - if (!resourceViewImpl) - continue; - auto textureResource = - static_cast(resourceViewImpl->m_resource.Ptr()); - if (textureResource) - { - textureResource->m_resource.transition( - D3D12_RESOURCE_STATE_RENDER_TARGET, - D3DUtil::getResourceState(access.finalState), - submitter); - } - } - } - - if (m_renderPass->m_hasDepthStencil) - { - // Transit resource states. - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = m_framebuffer->depthStencilView.Ptr(); - auto textureResource = - static_cast(resourceViewImpl->m_resource.Ptr()); - textureResource->m_resource.transition( - D3D12_RESOURCE_STATE_DEPTH_WRITE, - D3DUtil::getResourceState( - m_renderPass->m_depthStencilAccess.finalState), - submitter); - } - m_framebuffer = nullptr; - } - - virtual SLANG_NO_THROW void SLANG_MCALL - setStencilReference(uint32_t referenceValue) override - { - m_d3dCmdList->OMSetStencilRef((UINT)referenceValue); - } - - virtual SLANG_NO_THROW void SLANG_MCALL drawIndirect( - uint32_t maxDrawCount, - IBufferResource* argBuffer, - uint64_t argOffset, - IBufferResource* countBuffer, - uint64_t countOffset) override - { - prepareDraw(); - - auto argBufferImpl = static_cast(argBuffer); - auto countBufferImpl = static_cast(countBuffer); - - m_d3dCmdList->ExecuteIndirect( - m_renderer->drawIndirectCmdSignature, - maxDrawCount, - argBufferImpl->m_resource, - argOffset, - countBufferImpl ? countBufferImpl->m_resource.getResource() : nullptr, - countOffset); - } - - virtual SLANG_NO_THROW void SLANG_MCALL drawIndexedIndirect( - uint32_t maxDrawCount, - IBufferResource* argBuffer, - uint64_t argOffset, - IBufferResource* countBuffer, - uint64_t countOffset) override - { - prepareDraw(); - - auto argBufferImpl = static_cast(argBuffer); - auto countBufferImpl = static_cast(countBuffer); - - m_d3dCmdList->ExecuteIndirect( - m_renderer->drawIndexedIndirectCmdSignature, - maxDrawCount, - argBufferImpl->m_resource, - argOffset, - countBufferImpl ? countBufferImpl->m_resource.getResource() : nullptr, - countOffset); - } - - virtual SLANG_NO_THROW Result SLANG_MCALL setSamplePositions( - uint32_t samplesPerPixel, - uint32_t pixelCount, - const SamplePosition* samplePositions) override - { - if (m_commandBuffer->m_cmdList1) - { - m_commandBuffer->m_cmdList1->SetSamplePositions( - samplesPerPixel, pixelCount, (D3D12_SAMPLE_POSITION*)samplePositions); - return SLANG_OK; - } - return SLANG_E_NOT_AVAILABLE; - } - - virtual SLANG_NO_THROW void SLANG_MCALL drawInstanced( - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t startVertex, - uint32_t startInstanceLocation) override - { - prepareDraw(); - m_d3dCmdList->DrawInstanced( - vertexCount, instanceCount, startVertex, startInstanceLocation); - } - - virtual SLANG_NO_THROW void SLANG_MCALL drawIndexedInstanced( - uint32_t indexCount, - uint32_t instanceCount, - uint32_t startIndexLocation, - int32_t baseVertexLocation, - uint32_t startInstanceLocation) override - { - prepareDraw(); - m_d3dCmdList->DrawIndexedInstanced(indexCount, instanceCount, startIndexLocation, baseVertexLocation, startInstanceLocation); - } - }; - - RenderCommandEncoderImpl m_renderCommandEncoder; - virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( - IRenderPassLayout* renderPass, - IFramebuffer* framebuffer, - IRenderCommandEncoder** outEncoder) override - { - m_renderCommandEncoder.init( - m_renderer, - m_transientHeap, - this, - static_cast(renderPass), - static_cast(framebuffer)); - *outEncoder = &m_renderCommandEncoder; - } - - class ComputeCommandEncoderImpl - : public IComputeCommandEncoder - , public ResourceCommandEncoderImpl - { - public: - SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) - public: - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override - { - PipelineCommandEncoder::endEncodingImpl(); - } - void init( - D3D12Device* renderer, - TransientResourceHeapImpl* transientHeap, - CommandBufferImpl* cmdBuffer) - { - PipelineCommandEncoder::init(cmdBuffer); - m_preCmdList = nullptr; - m_transientHeap = transientHeap; - m_currentPipeline = nullptr; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL - bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override - { - return bindPipelineImpl(state, outRootObject); - } - - virtual SLANG_NO_THROW Result SLANG_MCALL bindPipelineWithRootObject( - IPipelineState* state, IShaderObject* rootObject) override - { - return bindPipelineWithRootObjectImpl(state, rootObject); - } - - virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override - { - // Submit binding for compute - { - ComputeSubmitter submitter(m_d3dCmdList); - RefPtr newPipeline; - if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) - { - assert(!"Failed to bind render state"); - } - } - m_d3dCmdList->Dispatch(x, y, z); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - dispatchComputeIndirect(IBufferResource* argBuffer, uint64_t offset) override - { - // Submit binding for compute - { - ComputeSubmitter submitter(m_d3dCmdList); - RefPtr newPipeline; - if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) - { - assert(!"Failed to bind render state"); - } - } - auto argBufferImpl = static_cast(argBuffer); - - m_d3dCmdList->ExecuteIndirect( - m_renderer->dispatchIndirectCmdSignature, - 1, - argBufferImpl->m_resource, - offset, - nullptr, - 0); - } - }; +SamplerStateImpl::~SamplerStateImpl() { m_allocator->free(m_descriptor); } - ComputeCommandEncoderImpl m_computeCommandEncoder; - virtual SLANG_NO_THROW void SLANG_MCALL - encodeComputeCommands(IComputeCommandEncoder** outEncoder) override - { - m_computeCommandEncoder.init(m_renderer, m_transientHeap, this); - *outEncoder = &m_computeCommandEncoder; - } +Result SamplerStateImpl::getNativeHandle(InteropHandle* outHandle) +{ + outHandle->api = InteropHandleAPI::D3D12CpuDescriptorHandle; + outHandle->handleValue = m_descriptor.cpuHandle.ptr; + return SLANG_OK; +} #if SLANG_GFX_HAS_DXR_SUPPORT - class RayTracingCommandEncoderImpl - : public IRayTracingCommandEncoder - , public ResourceCommandEncoderImpl - { - public: - SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) - public: - virtual SLANG_NO_THROW void SLANG_MCALL buildAccelerationStructure( - const IAccelerationStructure::BuildDesc& desc, - int propertyQueryCount, - AccelerationStructureQueryDesc* queryDescs) override; - virtual SLANG_NO_THROW void SLANG_MCALL copyAccelerationStructure( - IAccelerationStructure* dest, - IAccelerationStructure* src, - AccelerationStructureCopyMode mode) override; - virtual SLANG_NO_THROW void SLANG_MCALL queryAccelerationStructureProperties( - int accelerationStructureCount, - IAccelerationStructure* const* accelerationStructures, - int queryCount, - AccelerationStructureQueryDesc* queryDescs) override; - virtual SLANG_NO_THROW void SLANG_MCALL serializeAccelerationStructure( - DeviceAddress dest, - IAccelerationStructure* source) override; - virtual SLANG_NO_THROW void SLANG_MCALL deserializeAccelerationStructure( - IAccelerationStructure* dest, - DeviceAddress source) override; - virtual SLANG_NO_THROW void SLANG_MCALL - bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override; - virtual SLANG_NO_THROW Result SLANG_MCALL bindPipelineWithRootObject( - IPipelineState* state, IShaderObject* rootObject) override - { - return bindPipelineWithRootObjectImpl(state, rootObject); - } - virtual SLANG_NO_THROW void SLANG_MCALL dispatchRays( - uint32_t rayGenShaderIndex, - IShaderTable* shaderTable, - int32_t width, - int32_t height, - int32_t depth) override; - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} - }; - RayTracingCommandEncoderImpl m_rayTracingCommandEncoder; - virtual SLANG_NO_THROW void SLANG_MCALL - encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) override - { - m_rayTracingCommandEncoder.init(this); - *outEncoder = &m_rayTracingCommandEncoder; - } -#else - virtual SLANG_NO_THROW void SLANG_MCALL - encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) override - { - *outEncoder = nullptr; - } -#endif - - virtual SLANG_NO_THROW void SLANG_MCALL close() override { m_cmdList->Close(); } - }; - - class FenceImpl : public FenceBase - { - public: - ComPtr m_fence; - HANDLE m_waitEvent = 0; - - ~FenceImpl() - { - if (m_waitEvent) - CloseHandle(m_waitEvent); - } - - HANDLE getWaitEvent() - { - if (m_waitEvent) - return m_waitEvent; - m_waitEvent = CreateEventEx( - nullptr, - nullptr, - 0, - EVENT_ALL_ACCESS); - return m_waitEvent; - } - - Result init(D3D12Device* device, const IFence::Desc& desc) - { - SLANG_RETURN_ON_FAIL(device->m_device->CreateFence( - desc.initialValue, - desc.isShared ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE, - IID_PPV_ARGS(m_fence.writeRef()))); - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL getCurrentValue(uint64_t* outValue) override - { - *outValue = m_fence->GetCompletedValue(); - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL setCurrentValue(uint64_t value) override - { - SLANG_RETURN_ON_FAIL(m_fence->Signal(value)); - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override - { - // Check if a shared handle already exists. - if (sharedHandle.handleValue != 0) - { - *outHandle = sharedHandle; - return SLANG_OK; - } - - ComPtr devicePtr; - m_fence->GetDevice(IID_PPV_ARGS(devicePtr.writeRef())); - SLANG_RETURN_ON_FAIL(devicePtr->CreateSharedHandle(m_fence, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); - outHandle->api = InteropHandleAPI::D3D12; - sharedHandle = *outHandle; - return SLANG_OK; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL - getNativeHandle(InteropHandle* outNativeHandle) override - { - outNativeHandle->api = gfx::InteropHandleAPI::D3D12; - outNativeHandle->handleValue = (uint64_t)m_fence.get(); - return SLANG_OK; - } - }; - - class CommandQueueImpl - : public ICommandQueue - , public ComObject - { - public: - SLANG_COM_OBJECT_IUNKNOWN_ALL - ICommandQueue* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue) - return static_cast(this); - return nullptr; - } - void breakStrongReferenceToDevice() { m_renderer.breakStrongReference(); } - - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* handle) override - { - handle->api = InteropHandleAPI::D3D12; - handle->handleValue = (uint64_t)m_d3dQueue.get(); - return SLANG_OK; - } - public: - BreakableReference m_renderer; - ComPtr m_device; - ComPtr m_d3dQueue; - ComPtr m_fence; - uint64_t m_fenceValue = 0; - HANDLE globalWaitHandle; - Desc m_desc; - uint32_t m_queueIndex = 0; - - Result init(D3D12Device* device, uint32_t queueIndex) - { - m_queueIndex = queueIndex; - m_renderer = device; - m_device = device->m_device; - D3D12_COMMAND_QUEUE_DESC queueDesc = {}; - queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; - SLANG_RETURN_ON_FAIL(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(m_d3dQueue.writeRef()))); - SLANG_RETURN_ON_FAIL( - m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); - globalWaitHandle = CreateEventEx( - nullptr, - nullptr, - CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, - EVENT_ALL_ACCESS); - return SLANG_OK; - } - ~CommandQueueImpl() - { - waitOnHost(); - CloseHandle(globalWaitHandle); - m_renderer->m_queueIndexAllocator.free((int)m_queueIndex, 1); - } - virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override - { - return m_desc; - } - - virtual SLANG_NO_THROW void SLANG_MCALL - executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers, IFence* fence, uint64_t valueToSignal) override - { - ShortList commandLists; - for (uint32_t i = 0; i < count; i++) - { - auto cmdImpl = static_cast(commandBuffers[i]); - commandLists.add(cmdImpl->m_cmdList); - } - if (count > 0) - { - m_d3dQueue->ExecuteCommandLists((UINT)count, commandLists.getArrayView().getBuffer()); - - m_fenceValue++; - - for (uint32_t i = 0; i < count; i++) - { - if (i > 0 && commandBuffers[i] == commandBuffers[i - 1]) - continue; - auto cmdImpl = static_cast(commandBuffers[i]); - auto transientHeap = cmdImpl->m_transientHeap; - auto& waitInfo = transientHeap->getQueueWaitInfo(m_queueIndex); - waitInfo.waitValue = m_fenceValue; - waitInfo.fence = m_fence; - } - m_d3dQueue->Signal(m_fence, m_fenceValue); - } - - if (fence) - { - auto fenceImpl = static_cast(fence); - m_d3dQueue->Signal(fenceImpl->m_fence.get(), valueToSignal); - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL waitOnHost() override - { - m_fenceValue++; - m_d3dQueue->Signal(m_fence, m_fenceValue); - ResetEvent(globalWaitHandle); - m_fence->SetEventOnCompletion(m_fenceValue, globalWaitHandle); - WaitForSingleObject(globalWaitHandle, INFINITE); - } - - virtual SLANG_NO_THROW Result SLANG_MCALL waitForFenceValuesOnDevice( - uint32_t fenceCount, IFence** fences, uint64_t* waitValues) override - { - for (uint32_t i = 0; i < fenceCount; ++i) - { - auto fenceImpl = static_cast(fences[i]); - m_d3dQueue->Wait( - fenceImpl->m_fence.get(), - waitValues[i]); - } - return SLANG_OK; - } - }; - - class SwapchainImpl : public D3DSwapchainBase - { - public: - ComPtr m_queue; - ComPtr m_dxgiFactory; - ComPtr m_swapChain3; - ComPtr m_fence; - ShortList m_frameEvents; - uint64_t fenceValue = 0; - Result init( - D3D12Device* renderer, - const ISwapchain::Desc& swapchainDesc, - WindowHandle window) - { - m_queue = static_cast(swapchainDesc.queue)->m_d3dQueue; - m_dxgiFactory = renderer->m_deviceInfo.m_dxgiFactory; - SLANG_RETURN_ON_FAIL( - D3DSwapchainBase::init(swapchainDesc, window, DXGI_SWAP_EFFECT_FLIP_DISCARD)); - renderer->m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef())); - - SLANG_RETURN_ON_FAIL(m_swapChain->QueryInterface(m_swapChain3.writeRef())); - for (uint32_t i = 0; i < swapchainDesc.imageCount; i++) - { - m_frameEvents.add(CreateEventEx( - nullptr, - false, - CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, - EVENT_ALL_ACCESS)); - } - return SLANG_OK; - } - virtual SLANG_NO_THROW Result SLANG_MCALL resize(uint32_t width, uint32_t height) override - { - for (auto evt : m_frameEvents) - SetEvent(evt); - SLANG_RETURN_ON_FAIL(D3DSwapchainBase::resize(width, height)); - return SLANG_OK; - } - - virtual void createSwapchainBufferImages() override - { - m_images.clear(); - - for (uint32_t i = 0; i < m_desc.imageCount; i++) - { - ComPtr d3dResource; - m_swapChain->GetBuffer(i, IID_PPV_ARGS(d3dResource.writeRef())); - ITextureResource::Desc imageDesc = {}; - imageDesc.allowedStates = ResourceStateSet( - ResourceState::Present, - ResourceState::RenderTarget, - ResourceState::CopyDestination); - imageDesc.type = IResource::Type::Texture2D; - imageDesc.arraySize = 0; - imageDesc.format = m_desc.format; - imageDesc.size.width = m_desc.width; - imageDesc.size.height = m_desc.height; - imageDesc.size.depth = 1; - imageDesc.numMipLevels = 1; - imageDesc.defaultState = ResourceState::Present; - RefPtr image = new TextureResourceImpl(imageDesc); - image->m_resource.setResource(d3dResource.get()); - image->m_defaultState = D3D12_RESOURCE_STATE_PRESENT; - m_images.add(image); - } - for (auto evt : m_frameEvents) - SetEvent(evt); - } - virtual IDXGIFactory* getDXGIFactory() override { return m_dxgiFactory; } - virtual IUnknown* getOwningDevice() override { return m_queue; } - virtual SLANG_NO_THROW int SLANG_MCALL acquireNextImage() override - { - auto result = (int)m_swapChain3->GetCurrentBackBufferIndex(); - WaitForSingleObject(m_frameEvents[result], INFINITE); - ResetEvent(m_frameEvents[result]); - return result; - } - virtual SLANG_NO_THROW Result SLANG_MCALL present() override - { - m_fence->SetEventOnCompletion(fenceValue, m_frameEvents[m_swapChain3->GetCurrentBackBufferIndex()]); - SLANG_RETURN_ON_FAIL(D3DSwapchainBase::present()); - fenceValue++; - m_queue->Signal(m_fence, fenceValue); - - return SLANG_OK; - } - virtual SLANG_NO_THROW bool SLANG_MCALL isOccluded() override - { - return (m_swapChain3->Present(0, DXGI_PRESENT_TEST) == DXGI_STATUS_OCCLUDED); - } - virtual SLANG_NO_THROW Result SLANG_MCALL setFullScreenMode(bool mode) override - { - return m_swapChain3->SetFullscreenState(mode, nullptr); - } - }; - - static PROC loadProc(HMODULE module, char const* name); - - Result createCommandQueueImpl(CommandQueueImpl** outQueue); - - Result createTransientResourceHeapImpl( - ITransientResourceHeap::Flags::Enum flags, - size_t constantBufferSize, - uint32_t viewDescriptors, - uint32_t samplerDescriptors, - TransientResourceHeapImpl** outHeap); - - Result createBuffer( - const D3D12_RESOURCE_DESC& resourceDesc, - const void* srcData, - size_t srcDataSize, - D3D12_RESOURCE_STATES finalState, - D3D12Resource& resourceOut, - bool isShared, - MemoryType access = MemoryType::DeviceLocal); - - Result captureTextureToSurface( - TextureResourceImpl* resource, - ResourceState state, - ISlangBlob** blob, - size_t* outRowPitch, - size_t* outPixelSize); - - Result _createDevice( - DeviceCheckFlags deviceCheckFlags, - const UnownedStringSlice& nameMatch, - D3D_FEATURE_LEVEL featureLevel, - DeviceInfo& outDeviceInfo); - - struct ResourceCommandRecordInfo - { - ComPtr commandBuffer; - ID3D12GraphicsCommandList* d3dCommandList; - }; - ResourceCommandRecordInfo encodeResourceCommands() - { - ResourceCommandRecordInfo info; - m_resourceCommandTransientHeap->createCommandBuffer(info.commandBuffer.writeRef()); - info.d3dCommandList = static_cast(info.commandBuffer.get())->m_cmdList; - return info; - } - void submitResourceCommandsAndWait(const ResourceCommandRecordInfo& info) - { - info.commandBuffer->close(); - m_resourceCommandQueue->executeCommandBuffer(info.commandBuffer); - m_resourceCommandTransientHeap->synchronizeAndReset(); - } - - // D3D12Device members. - - Desc m_desc; - D3D12DeviceExtendedDesc m_extendedDesc; - - gfx::DeviceInfo m_info; - String m_adapterName; - - bool m_isInitialized = false; - - ComPtr m_dxDebug; - - DeviceInfo m_deviceInfo; - ID3D12Device* m_device = nullptr; - ID3D12Device5* m_device5 = nullptr; - - VirtualObjectPool m_queueIndexAllocator; - - RefPtr m_resourceCommandQueue; - RefPtr m_resourceCommandTransientHeap; - - RefPtr m_rtvAllocator; - RefPtr m_dsvAllocator; - // Space in the GPU-visible heaps is precious, so we will also keep - // around CPU-visible heaps for storing descriptors in a format - // that is ready for copying into the GPU-visible heaps as needed. - // - RefPtr m_cpuViewHeap; ///< Cbv, Srv, Uav - RefPtr m_cpuSamplerHeap; ///< Heap for samplers - - // Dll entry points - PFN_D3D12_GET_DEBUG_INTERFACE m_D3D12GetDebugInterface = nullptr; - PFN_D3D12_CREATE_DEVICE m_D3D12CreateDevice = nullptr; - PFN_D3D12_SERIALIZE_ROOT_SIGNATURE m_D3D12SerializeRootSignature = nullptr; - - PFN_BeginEventOnCommandList m_BeginEventOnCommandList = nullptr; - PFN_EndEventOnCommandList m_EndEventOnCommandList = nullptr; +DeviceAddress AccelerationStructureImpl::getDeviceAddress() +{ + return m_buffer->getDeviceAddress() + m_offset; +} - bool m_nvapi = false; +Result AccelerationStructureImpl::getNativeHandle(InteropHandle* outHandle) +{ + outHandle->api = InteropHandleAPI::DeviceAddress; + outHandle->handleValue = getDeviceAddress(); + return SLANG_OK; +} - // Command signatures required for indirect draws. These indicate the format of the indirect - // as well as the command type to be used (DrawInstanced and DrawIndexedInstanced, in this case). - ComPtr drawIndirectCmdSignature; - ComPtr drawIndexedIndirectCmdSignature; - ComPtr dispatchIndirectCmdSignature; -}; +#endif // SLANG_GFX_HAS_DXR_SUPPORT -SLANG_NO_THROW Result SLANG_MCALL D3D12Device::TransientResourceHeapImpl::synchronizeAndReset() +Result TransientResourceHeapImpl::synchronizeAndReset() { Array waitHandles; for (auto& waitInfo : m_waitInfos) @@ -5531,41 +2809,230 @@ SLANG_NO_THROW Result SLANG_MCALL D3D12Device::TransientResourceHeapImpl::synchr return SLANG_OK; } -Result D3D12Device::TransientResourceHeapImpl::createCommandBuffer(ICommandBuffer** outCmdBuffer) +TransientResourceHeapImpl::QueueWaitInfo& TransientResourceHeapImpl::getQueueWaitInfo( + uint32_t queueIndex) { - if ((Index)m_commandListAllocId < m_commandBufferPool.getCount()) + if (queueIndex < (uint32_t)m_waitInfos.getCount()) { - auto result = static_cast( - m_commandBufferPool[m_commandListAllocId].Ptr()); - m_d3dCommandListPool[m_commandListAllocId]->Reset(m_commandAllocator, nullptr); - result->reinit(); - ++m_commandListAllocId; - returnComPtr(outCmdBuffer, result); - return SLANG_OK; + return m_waitInfos[queueIndex]; } - ComPtr cmdList; - m_device->m_device->CreateCommandList( - 0, - D3D12_COMMAND_LIST_TYPE_DIRECT, - m_commandAllocator, - nullptr, - IID_PPV_ARGS(cmdList.writeRef())); + auto oldCount = m_waitInfos.getCount(); + m_waitInfos.setCount(queueIndex + 1); + for (auto i = oldCount; i < m_waitInfos.getCount(); i++) + { + m_waitInfos[i].waitValue = 0; + m_waitInfos[i].fenceEvent = CreateEventEx(nullptr, false, 0, EVENT_ALL_ACCESS); + } + return m_waitInfos[queueIndex]; +} - m_d3dCommandListPool.add(cmdList); - RefPtr cmdBuffer = new CommandBufferImpl(); - cmdBuffer->init(m_device, cmdList, this); - m_commandBufferPool.add(cmdBuffer); +D3D12DescriptorHeap& TransientResourceHeapImpl::getCurrentViewHeap() +{ + return m_viewHeaps[m_currentViewHeapIndex]; +} + +D3D12DescriptorHeap& TransientResourceHeapImpl::getCurrentSamplerHeap() +{ + return m_samplerHeaps[m_currentSamplerHeapIndex]; +} + +Result TransientResourceHeapImpl::queryInterface(SlangUUID const& uuid, void** outObject) +{ + if (uuid == GfxGUID::IID_ID3D12TransientResourceHeap) + { + *outObject = static_cast(this); + addRef(); + return SLANG_OK; + } + return Super::queryInterface(uuid, outObject); +} + +Result TransientResourceHeapImpl::allocateTransientDescriptorTable( + DescriptorType type, + uint32_t count, + uint64_t& outDescriptorOffset, + void** outD3DDescriptorHeapHandle) +{ + auto& heap = + (type == DescriptorType::ResourceView) ? getCurrentViewHeap() : getCurrentSamplerHeap(); + int allocResult = heap.allocate((int)count); + if (allocResult == -1) + { + return SLANG_E_OUT_OF_MEMORY; + } + outDescriptorOffset = (uint64_t)allocResult; + *outD3DDescriptorHeapHandle = heap.getHeap(); + return SLANG_OK; +} + +TransientResourceHeapImpl::~TransientResourceHeapImpl() +{ + synchronizeAndReset(); + for (auto& waitInfo : m_waitInfos) + CloseHandle(waitInfo.fenceEvent); +} + +Result TransientResourceHeapImpl::init( + const ITransientResourceHeap::Desc& desc, + DeviceImpl* device, + uint32_t viewHeapSize, + uint32_t samplerHeapSize) +{ + Super::init(desc, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, device); + m_canResize = (desc.flags & ITransientResourceHeap::Flags::AllowResizing) != 0; + m_viewHeapSize = viewHeapSize; + m_samplerHeapSize = samplerHeapSize; + + m_stagingCpuViewHeap.init( + device->m_device, + 1000000, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + D3D12_DESCRIPTOR_HEAP_FLAG_NONE); + m_stagingCpuSamplerHeap.init( + device->m_device, + 1000000, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + D3D12_DESCRIPTOR_HEAP_FLAG_NONE); + + auto d3dDevice = device->m_device; + SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef()))); + + allocateNewViewDescriptorHeap(device); + allocateNewSamplerDescriptorHeap(device); + + return SLANG_OK; +} + +Result TransientResourceHeapImpl::allocateNewViewDescriptorHeap(DeviceImpl* device) +{ + auto nextHeapIndex = m_currentViewHeapIndex + 1; + if (nextHeapIndex < m_viewHeaps.getCount()) + { + m_viewHeaps[nextHeapIndex].deallocateAll(); + m_currentViewHeapIndex = nextHeapIndex; + return SLANG_OK; + } + auto d3dDevice = device->m_device; + D3D12DescriptorHeap viewHeap; + SLANG_RETURN_ON_FAIL(viewHeap.init( + d3dDevice, + m_viewHeapSize, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); + m_currentViewHeapIndex = (int32_t)m_viewHeaps.getCount(); + m_viewHeaps.add(_Move(viewHeap)); + return SLANG_OK; +} + +Result TransientResourceHeapImpl::allocateNewSamplerDescriptorHeap(DeviceImpl* device) +{ + auto nextHeapIndex = m_currentSamplerHeapIndex + 1; + if (nextHeapIndex < m_samplerHeaps.getCount()) + { + m_samplerHeaps[nextHeapIndex].deallocateAll(); + m_currentSamplerHeapIndex = nextHeapIndex; + return SLANG_OK; + } + auto d3dDevice = device->m_device; + D3D12DescriptorHeap samplerHeap; + SLANG_RETURN_ON_FAIL(samplerHeap.init( + d3dDevice, + m_samplerHeapSize, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); + m_currentSamplerHeapIndex = (int32_t)m_samplerHeaps.getCount(); + m_samplerHeaps.add(_Move(samplerHeap)); + return SLANG_OK; +} + +Result TransientResourceHeapImpl::createCommandBuffer(ICommandBuffer** outCmdBuffer) +{ + if ((Index)m_commandListAllocId < m_commandBufferPool.getCount()) + { + auto result = + static_cast(m_commandBufferPool[m_commandListAllocId].Ptr()); + m_d3dCommandListPool[m_commandListAllocId]->Reset(m_commandAllocator, nullptr); + result->reinit(); + ++m_commandListAllocId; + returnComPtr(outCmdBuffer, result); + return SLANG_OK; + } + ComPtr cmdList; + m_device->m_device->CreateCommandList( + 0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + m_commandAllocator, + nullptr, + IID_PPV_ARGS(cmdList.writeRef())); + + m_d3dCommandListPool.add(cmdList); + RefPtr cmdBuffer = new CommandBufferImpl(); + cmdBuffer->init(m_device, cmdList, this); + m_commandBufferPool.add(cmdBuffer); ++m_commandListAllocId; returnComPtr(outCmdBuffer, cmdBuffer); return SLANG_OK; } -Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitter, RefPtr& newPipeline) +int PipelineCommandEncoder::getBindPointIndex(PipelineType type) +{ + switch (type) + { + case PipelineType::Graphics: + return 0; + case PipelineType::Compute: + return 1; + case PipelineType::RayTracing: + return 2; + default: + assert(!"unknown pipeline type."); + return -1; + } +} + +void PipelineCommandEncoder::init(CommandBufferImpl* commandBuffer) +{ + m_commandBuffer = commandBuffer; + m_d3dCmdList = m_commandBuffer->m_cmdList; + m_renderer = commandBuffer->m_renderer; + m_transientHeap = commandBuffer->m_transientHeap; + m_device = commandBuffer->m_renderer->m_device; +} + +Result PipelineCommandEncoder::bindPipelineImpl( + IPipelineState* pipelineState, IShaderObject** outRootObject) +{ + m_currentPipeline = static_cast(pipelineState); + auto rootObject = &m_commandBuffer->m_rootShaderObject; + m_commandBuffer->m_mutableRootShaderObject = nullptr; + SLANG_RETURN_ON_FAIL(rootObject->reset( + m_renderer, + m_currentPipeline->getProgram()->m_rootObjectLayout, + m_commandBuffer->m_transientHeap)); + *outRootObject = rootObject; + m_bindingDirty = true; + return SLANG_OK; +} + +Result PipelineCommandEncoder::bindPipelineWithRootObjectImpl( + IPipelineState* pipelineState, IShaderObject* rootObject) +{ + m_currentPipeline = static_cast(pipelineState); + m_commandBuffer->m_mutableRootShaderObject = + static_cast(rootObject); + m_bindingDirty = true; + return SLANG_OK; +} + +Result PipelineCommandEncoder::_bindRenderState( + Submitter* submitter, RefPtr& newPipeline) { RootShaderObjectImpl* rootObjectImpl = m_commandBuffer->m_mutableRootShaderObject ? m_commandBuffer->m_mutableRootShaderObject.Ptr() : &m_commandBuffer->m_rootShaderObject; - SLANG_RETURN_ON_FAIL(m_renderer->maybeSpecializePipeline(m_currentPipeline, rootObjectImpl, newPipeline)); + SLANG_RETURN_ON_FAIL( + m_renderer->maybeSpecializePipeline(m_currentPipeline, rootObjectImpl, newPipeline)); PipelineStateBase* newPipelineImpl = static_cast(newPipeline.Ptr()); auto commandList = m_d3dCmdList; auto pipelineTypeIndex = (int)newPipelineImpl->desc.type; @@ -5621,3204 +3088,4560 @@ Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitte // Try again. SLANG_RETURN_ON_FAIL(rootObjectImpl->bindAsRoot(&context, rootLayoutImpl)); } - + return SLANG_OK; } -Result D3D12Device::createTransientResourceHeapImpl( - ITransientResourceHeap::Flags::Enum flags, - size_t constantBufferSize, - uint32_t viewDescriptors, - uint32_t samplerDescriptors, - TransientResourceHeapImpl** outHeap) +Result QueryPoolImpl::init(const IQueryPool::Desc& desc, DeviceImpl* device) { - RefPtr result = new TransientResourceHeapImpl(); - ITransientResourceHeap::Desc desc = {}; - desc.flags = flags; - desc.samplerDescriptorCount = samplerDescriptors; - desc.constantBufferSize = constantBufferSize; - desc.constantBufferDescriptorCount = viewDescriptors; - desc.accelerationStructureDescriptorCount = viewDescriptors; - desc.srvDescriptorCount = viewDescriptors; - desc.uavDescriptorCount = viewDescriptors; - SLANG_RETURN_ON_FAIL(result->init(desc, this, viewDescriptors, samplerDescriptors)); - returnRefPtrMove(outHeap, result); + m_desc = desc; + + // Translate query type. + D3D12_QUERY_HEAP_DESC heapDesc = {}; + heapDesc.Count = (UINT)desc.count; + heapDesc.NodeMask = 1; + switch (desc.type) + { + case QueryType::Timestamp: + heapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + m_queryType = D3D12_QUERY_TYPE_TIMESTAMP; + break; + default: + return SLANG_E_INVALID_ARG; + } + + // Create query heap. + auto d3dDevice = device->m_device; + SLANG_RETURN_ON_FAIL( + d3dDevice->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(m_queryHeap.writeRef()))); + + // Create readback buffer. + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; + D3D12_RESOURCE_DESC resourceDesc = {}; + initBufferResourceDesc(sizeof(uint64_t) * desc.count, resourceDesc); + SLANG_RETURN_ON_FAIL(m_readBackBuffer.initCommitted( + d3dDevice, + heapProps, + D3D12_HEAP_FLAG_NONE, + resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr)); + + // Create command allocator. + SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef()))); + + // Create command list. + SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandList( + 0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + m_commandAllocator, + nullptr, + IID_PPV_ARGS(m_commandList.writeRef()))); + m_commandList->Close(); + + // Create fence. + SLANG_RETURN_ON_FAIL( + d3dDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); + + // Get command queue from device. + m_commandQueue = device->m_resourceCommandQueue->m_d3dQueue; + + // Create wait event. + m_waitEvent = CreateEventEx(nullptr, false, 0, EVENT_ALL_ACCESS); + return SLANG_OK; } -Result D3D12Device::createCommandQueueImpl(D3D12Device::CommandQueueImpl** outQueue) +Result QueryPoolImpl::getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) { - int queueIndex = m_queueIndexAllocator.alloc(1); - // If we run out of queue index space, then the user is requesting too many queues. - if (queueIndex == -1) - return SLANG_FAIL; + m_commandList->Reset(m_commandAllocator, nullptr); + m_commandList->ResolveQueryData( + m_queryHeap, + m_queryType, + (UINT)queryIndex, + (UINT)count, + m_readBackBuffer, + sizeof(uint64_t) * queryIndex); + m_commandList->Close(); + ID3D12CommandList* cmdList = m_commandList; + m_commandQueue->ExecuteCommandLists(1, &cmdList); + m_eventValue++; + m_fence->SetEventOnCompletion(m_eventValue, m_waitEvent); + m_commandQueue->Signal(m_fence, m_eventValue); + WaitForSingleObject(m_waitEvent, INFINITE); + m_commandAllocator->Reset(); + + int8_t* mappedData = nullptr; + D3D12_RANGE readRange = { + sizeof(uint64_t) * queryIndex, sizeof(uint64_t) * (queryIndex + count)}; + m_readBackBuffer.getResource()->Map(0, &readRange, (void**)&mappedData); + memcpy(data, mappedData + sizeof(uint64_t) * queryIndex, sizeof(uint64_t) * count); + m_readBackBuffer.getResource()->Unmap(0, nullptr); + return SLANG_OK; +} - RefPtr queue = new D3D12Device::CommandQueueImpl(); - SLANG_RETURN_ON_FAIL(queue->init(this, (uint32_t)queueIndex)); - returnRefPtrMove(outQueue, queue); +void QueryPoolImpl::writeTimestamp(ID3D12GraphicsCommandList* cmdList, SlangInt index) +{ + cmdList->EndQuery(m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, (UINT)index); +} + +IQueryPool* PlainBufferProxyQueryPoolImpl::getInterface(const Guid& guid) +{ + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IQueryPool) + return static_cast(this); + return nullptr; +} + +Result PlainBufferProxyQueryPoolImpl::init( + const IQueryPool::Desc& desc, DeviceImpl* device, uint32_t stride) +{ + ComPtr bufferResource; + IBufferResource::Desc bufferDesc = {}; + bufferDesc.defaultState = ResourceState::CopySource; + bufferDesc.elementSize = 0; + bufferDesc.type = IResource::Type::Buffer; + bufferDesc.sizeInBytes = desc.count * stride; + bufferDesc.format = Format::Unknown; + bufferDesc.allowedStates.add(ResourceState::UnorderedAccess); + SLANG_RETURN_ON_FAIL( + device->createBufferResource(bufferDesc, nullptr, bufferResource.writeRef())); + m_bufferResource = static_cast(bufferResource.get()); + m_queryType = desc.type; + m_device = device; + m_stride = stride; + m_count = (uint32_t)desc.count; + m_desc = desc; return SLANG_OK; } -SlangResult SLANG_MCALL createD3D12Device(const IDevice::Desc* desc, IDevice** outDevice) +Result PlainBufferProxyQueryPoolImpl::reset() { - RefPtr result = new D3D12Device(); - SLANG_RETURN_ON_FAIL(result->initialize(*desc)); - returnComPtr(outDevice, result); + m_resultDirty = true; + auto encodeInfo = m_device->encodeResourceCommands(); + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barrier.Transition.pResource = m_bufferResource->m_resource.getResource(); + encodeInfo.d3dCommandList->ResourceBarrier(1, &barrier); + m_device->submitResourceCommandsAndWait(encodeInfo); return SLANG_OK; } -/* static */PROC D3D12Device::loadProc(HMODULE module, char const* name) +Result PlainBufferProxyQueryPoolImpl::getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) { - PROC proc = ::GetProcAddress(module, name); - if (!proc) + if (m_resultDirty) { - fprintf(stderr, "error: failed load symbol '%s'\n", name); - return nullptr; + auto encodeInfo = m_device->encodeResourceCommands(); + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.pResource = m_bufferResource->m_resource.getResource(); + encodeInfo.d3dCommandList->ResourceBarrier(1, &barrier); + + D3D12Resource stageBuf; + + auto size = (size_t)m_count * m_stride; + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; + + D3D12_RESOURCE_DESC stagingDesc; + initBufferResourceDesc(size, stagingDesc); + + SLANG_RETURN_ON_FAIL(stageBuf.initCommitted( + m_device->m_device, + heapProps, + D3D12_HEAP_FLAG_NONE, + stagingDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr)); + + encodeInfo.d3dCommandList->CopyBufferRegion( + stageBuf, 0, m_bufferResource->m_resource.getResource(), 0, size); + m_device->submitResourceCommandsAndWait(encodeInfo); + void* ptr = nullptr; + stageBuf.getResource()->Map(0, nullptr, &ptr); + m_result.setCount(m_count * m_stride); + memcpy(m_result.getBuffer(), ptr, m_result.getCount()); + + m_resultDirty = false; } - return proc; -} -D3D12Device::~D3D12Device() { m_shaderObjectLayoutCache = decltype(m_shaderObjectLayoutCache)(); } + memcpy(data, m_result.getBuffer() + queryIndex * m_stride, count * m_stride); -static void _initSrvDesc( - IResource::Type resourceType, - const ITextureResource::Desc& textureDesc, - const D3D12_RESOURCE_DESC& desc, - DXGI_FORMAT pixelFormat, - SubresourceRange subresourceRange, - D3D12_SHADER_RESOURCE_VIEW_DESC& descOut) -{ - // create SRV - descOut = D3D12_SHADER_RESOURCE_VIEW_DESC(); + return SLANG_OK; +} - descOut.Format = (pixelFormat == DXGI_FORMAT_UNKNOWN) ? D3DUtil::calcFormat(D3DUtil::USAGE_SRV, desc.Format) : pixelFormat; - descOut.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - if (desc.DepthOrArraySize == 1) +void translatePostBuildInfoDescs( + int propertyQueryCount, + AccelerationStructureQueryDesc* queryDescs, + List& postBuildInfoDescs) +{ + postBuildInfoDescs.setCount(propertyQueryCount); + for (int i = 0; i < propertyQueryCount; i++) { - switch (desc.Dimension) + switch (queryDescs[i].queryType) { - case D3D12_RESOURCE_DIMENSION_TEXTURE1D: - descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; - descOut.Texture1D.MipLevels = subresourceRange.mipLevelCount == 0 - ? desc.MipLevels - subresourceRange.mipLevel - : subresourceRange.mipLevelCount; - descOut.Texture1D.MostDetailedMip = subresourceRange.mipLevel; + case QueryType::AccelerationStructureCompactedSize: + postBuildInfoDescs[i].InfoType = + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE; + postBuildInfoDescs[i].DestBuffer = + static_cast(queryDescs[i].queryPool) + ->m_bufferResource->getDeviceAddress() + + sizeof(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE_DESC) * + queryDescs[i].firstQueryIndex; break; - case D3D12_RESOURCE_DIMENSION_TEXTURE2D: - descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - descOut.Texture2D.PlaneSlice = - D3DUtil::getPlaneSlice(descOut.Format, subresourceRange.aspectMask); - descOut.Texture2D.ResourceMinLODClamp = 0.0f; - descOut.Texture2D.MipLevels = subresourceRange.mipLevelCount == 0 - ? desc.MipLevels - subresourceRange.mipLevel - : subresourceRange.mipLevelCount; - descOut.Texture2D.MostDetailedMip = subresourceRange.mipLevel; + case QueryType::AccelerationStructureCurrentSize: + postBuildInfoDescs[i].InfoType = + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_CURRENT_SIZE; + postBuildInfoDescs[i].DestBuffer = + static_cast(queryDescs[i].queryPool) + ->m_bufferResource->getDeviceAddress() + + sizeof(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE_DESC) * + queryDescs[i].firstQueryIndex; break; - case D3D12_RESOURCE_DIMENSION_TEXTURE3D: - descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; - descOut.Texture3D.MipLevels = subresourceRange.mipLevelCount == 0 - ? desc.MipLevels - subresourceRange.mipLevel - : subresourceRange.mipLevelCount; - descOut.Texture3D.MostDetailedMip = subresourceRange.mipLevel; + case QueryType::AccelerationStructureSerializedSize: + postBuildInfoDescs[i].InfoType = + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION; + postBuildInfoDescs[i].DestBuffer = + static_cast(queryDescs[i].queryPool) + ->m_bufferResource->getDeviceAddress() + + sizeof(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION_DESC) * + queryDescs[i].firstQueryIndex; break; - default: - assert(!"Unknown dimension"); } - } - else if (resourceType == IResource::Type::TextureCube) - { - if (textureDesc.arraySize > 1) - { - descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; +} - descOut.TextureCubeArray.NumCubes = subresourceRange.layerCount == 0 - ? textureDesc.arraySize - : subresourceRange.layerCount / 6; - descOut.TextureCubeArray.First2DArrayFace = subresourceRange.baseArrayLayer; - descOut.TextureCubeArray.MipLevels = subresourceRange.mipLevelCount == 0 - ? desc.MipLevels - subresourceRange.mipLevel - : subresourceRange.mipLevelCount; - descOut.TextureCubeArray.MostDetailedMip = subresourceRange.mipLevel; - descOut.TextureCubeArray.ResourceMinLODClamp = 0; - } - else - { - descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; +#if SLANG_GFX_HAS_DXR_SUPPORT - descOut.TextureCube.MipLevels = subresourceRange.mipLevelCount == 0 - ? desc.MipLevels - subresourceRange.mipLevel - : subresourceRange.mipLevelCount; - descOut.TextureCube.MostDetailedMip = subresourceRange.mipLevel; - descOut.TextureCube.ResourceMinLODClamp = 0; - } - } - else +void RayTracingCommandEncoderImpl::buildAccelerationStructure( + const IAccelerationStructure::BuildDesc& desc, + int propertyQueryCount, + AccelerationStructureQueryDesc* queryDescs) +{ + if (!m_commandBuffer->m_cmdList4) { - assert(desc.DepthOrArraySize > 1); + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "Ray-tracing is not supported on current system."); + return; + } + AccelerationStructureImpl* destASImpl = nullptr; + if (desc.dest) + destASImpl = static_cast(desc.dest); + AccelerationStructureImpl* srcASImpl = nullptr; + if (desc.source) + srcASImpl = static_cast(desc.source); - switch (desc.Dimension) - { - case D3D12_RESOURCE_DIMENSION_TEXTURE1D: - descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; - descOut.Texture1D.MostDetailedMip = subresourceRange.mipLevel; - descOut.Texture1D.MipLevels = subresourceRange.mipLevelCount == 0 - ? desc.MipLevels - : subresourceRange.mipLevelCount; - descOut.Texture1DArray.ArraySize = subresourceRange.layerCount == 0 - ? desc.DepthOrArraySize - : subresourceRange.layerCount; - descOut.Texture1DArray.FirstArraySlice = subresourceRange.baseArrayLayer; - descOut.Texture1DArray.ResourceMinLODClamp = 0; - descOut.Texture1DArray.MostDetailedMip = subresourceRange.mipLevel; - descOut.Texture1DArray.MipLevels = subresourceRange.mipLevelCount == 0 - ? desc.MipLevels - subresourceRange.mipLevel - : subresourceRange.mipLevelCount; - break; - case D3D12_RESOURCE_DIMENSION_TEXTURE2D: - descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; - descOut.Texture2DArray.ArraySize = - subresourceRange.layerCount == 0 ? desc.DepthOrArraySize : subresourceRange.layerCount; - descOut.Texture2DArray.FirstArraySlice = subresourceRange.baseArrayLayer; - descOut.Texture2DArray.PlaneSlice = - D3DUtil::getPlaneSlice(descOut.Format, subresourceRange.aspectMask); - descOut.Texture2DArray.ResourceMinLODClamp = 0; - descOut.Texture2DArray.MostDetailedMip = subresourceRange.mipLevel; - descOut.Texture2DArray.MipLevels = subresourceRange.mipLevelCount == 0 - ? desc.MipLevels - subresourceRange.mipLevel - : subresourceRange.mipLevelCount; - break; - case D3D12_RESOURCE_DIMENSION_TEXTURE3D: - descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; - descOut.Texture3D.MostDetailedMip = subresourceRange.mipLevel; - descOut.Texture3D.MipLevels = subresourceRange.mipLevelCount == 0 - ? desc.MipLevels - : subresourceRange.mipLevelCount; - break; + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC buildDesc = {}; + buildDesc.DestAccelerationStructureData = destASImpl->getDeviceAddress(); + buildDesc.SourceAccelerationStructureData = srcASImpl ? srcASImpl->getDeviceAddress() : 0; + buildDesc.ScratchAccelerationStructureData = desc.scratchData; + D3DAccelerationStructureInputsBuilder builder; + builder.build(desc.inputs, getDebugCallback()); + buildDesc.Inputs = builder.desc; - default: - assert(!"Unknown dimension"); - } - } + List postBuildInfoDescs; + translatePostBuildInfoDescs(propertyQueryCount, queryDescs, postBuildInfoDescs); + m_commandBuffer->m_cmdList4->BuildRaytracingAccelerationStructure( + &buildDesc, (UINT)propertyQueryCount, postBuildInfoDescs.getBuffer()); } -Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut, bool isShared, MemoryType memoryType) +void RayTracingCommandEncoderImpl::copyAccelerationStructure( + IAccelerationStructure* dest, IAccelerationStructure* src, AccelerationStructureCopyMode mode) { - const size_t bufferSize = size_t(resourceDesc.Width); - - D3D12_HEAP_PROPERTIES heapProps; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; - - D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE; - if (isShared) flags |= D3D12_HEAP_FLAG_SHARED; - - D3D12_RESOURCE_DESC desc = resourceDesc; - - D3D12_RESOURCE_STATES initialState = finalState; + auto destASImpl = static_cast(dest); + auto srcASImpl = static_cast(src); + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE copyMode; + switch (mode) + { + case AccelerationStructureCopyMode::Clone: + copyMode = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_CLONE; + break; + case AccelerationStructureCopyMode::Compact: + copyMode = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_COMPACT; + break; + default: + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "Unsupported AccelerationStructureCopyMode."); + return; + } + m_commandBuffer->m_cmdList4->CopyRaytracingAccelerationStructure( + destASImpl->getDeviceAddress(), srcASImpl->getDeviceAddress(), copyMode); +} - switch (memoryType) - { - case MemoryType::ReadBack: - assert(!srcData); +void RayTracingCommandEncoderImpl::queryAccelerationStructureProperties( + int accelerationStructureCount, + IAccelerationStructure* const* accelerationStructures, + int queryCount, + AccelerationStructureQueryDesc* queryDescs) +{ + List postBuildInfoDescs; + List asAddresses; + asAddresses.setCount(accelerationStructureCount); + for (int i = 0; i < accelerationStructureCount; i++) + asAddresses[i] = accelerationStructures[i]->getDeviceAddress(); + translatePostBuildInfoDescs(queryCount, queryDescs, postBuildInfoDescs); + m_commandBuffer->m_cmdList4->EmitRaytracingAccelerationStructurePostbuildInfo( + postBuildInfoDescs.getBuffer(), (UINT)accelerationStructureCount, asAddresses.getBuffer()); +} - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - desc.Flags = D3D12_RESOURCE_FLAG_NONE; - initialState |= D3D12_RESOURCE_STATE_COPY_DEST; +void RayTracingCommandEncoderImpl::serializeAccelerationStructure( + DeviceAddress dest, IAccelerationStructure* src) +{ + auto srcASImpl = static_cast(src); + m_commandBuffer->m_cmdList4->CopyRaytracingAccelerationStructure( + dest, + srcASImpl->getDeviceAddress(), + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_SERIALIZE); +} - break; - case MemoryType::Upload: +void RayTracingCommandEncoderImpl::deserializeAccelerationStructure( + IAccelerationStructure* dest, DeviceAddress source) +{ + auto destASImpl = static_cast(dest); + m_commandBuffer->m_cmdList4->CopyRaytracingAccelerationStructure( + dest->getDeviceAddress(), + source, + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_DESERIALIZE); +} - heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; - desc.Flags = D3D12_RESOURCE_FLAG_NONE; - initialState |= D3D12_RESOURCE_STATE_GENERIC_READ; +void RayTracingCommandEncoderImpl::bindPipeline( + IPipelineState* state, IShaderObject** outRootObject) +{ + bindPipelineImpl(state, outRootObject); +} - break; - case MemoryType::DeviceLocal: - heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; - initialState = (srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState); - break; - default: - return SLANG_FAIL; - } +void RayTracingCommandEncoderImpl::dispatchRays( + uint32_t rayGenShaderIndex, + IShaderTable* shaderTable, + int32_t width, + int32_t height, + int32_t depth) +{ + RefPtr newPipeline; + PipelineStateBase* pipeline = m_currentPipeline.Ptr(); + { + struct RayTracingSubmitter : public ComputeSubmitter + { + ID3D12GraphicsCommandList4* m_cmdList4; + RayTracingSubmitter(ID3D12GraphicsCommandList4* cmdList4) + : ComputeSubmitter(cmdList4) + , m_cmdList4(cmdList4) + {} + virtual void setPipelineState(PipelineStateBase* pipeline) override + { + auto pipelineImpl = static_cast(pipeline); + m_cmdList4->SetPipelineState1(pipelineImpl->m_stateObject.get()); + } + }; + RayTracingSubmitter submitter(m_commandBuffer->m_cmdList4); + if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) + { + assert(!"Failed to bind render state"); + } + if (newPipeline) + pipeline = newPipeline.Ptr(); + } + auto pipelineImpl = static_cast(pipeline); - // Create the resource. - SLANG_RETURN_ON_FAIL(resourceOut.initCommitted(m_device, heapProps, flags, desc, initialState, nullptr)); + auto shaderTableImpl = static_cast(shaderTable); - if (srcData) - { - D3D12Resource uploadResource; + ResourceCommandEncoderImpl resourceCopyEncoder; + resourceCopyEncoder.init(m_commandBuffer); + auto shaderTableBuffer = + shaderTableImpl->getOrCreateBuffer(pipelineImpl, m_transientHeap, &resourceCopyEncoder); + auto shaderTableAddr = shaderTableBuffer->getDeviceAddress(); - if (memoryType == MemoryType::DeviceLocal) - { - // If the buffer is on the default heap, create upload buffer. - D3D12_RESOURCE_DESC uploadDesc(resourceDesc); - uploadDesc.Flags = D3D12_RESOURCE_FLAG_NONE; - heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + D3D12_DISPATCH_RAYS_DESC dispatchDesc = {}; - SLANG_RETURN_ON_FAIL(uploadResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); - } + dispatchDesc.RayGenerationShaderRecord.StartAddress = + shaderTableAddr + shaderTableImpl->m_rayGenTableOffset + + rayGenShaderIndex * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + dispatchDesc.RayGenerationShaderRecord.SizeInBytes = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - // Be careful not to actually copy a resource here. - D3D12Resource& uploadResourceRef = (memoryType == MemoryType::DeviceLocal) ? uploadResource : resourceOut; + dispatchDesc.MissShaderTable.StartAddress = + shaderTableAddr + shaderTableImpl->m_missTableOffset; + dispatchDesc.MissShaderTable.SizeInBytes = + shaderTableImpl->m_missShaderCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + dispatchDesc.MissShaderTable.StrideInBytes = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - // Copy data to the intermediate upload heap and then schedule a copy - // from the upload heap to the vertex buffer. - UINT8* dstData; - D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU. + dispatchDesc.HitGroupTable.StartAddress = + shaderTableAddr + shaderTableImpl->m_hitGroupTableOffset; + dispatchDesc.HitGroupTable.SizeInBytes = + shaderTableImpl->m_hitGroupCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + dispatchDesc.HitGroupTable.StrideInBytes = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - ID3D12Resource* dxUploadResource = uploadResourceRef.getResource(); + dispatchDesc.Width = (UINT)width; + dispatchDesc.Height = (UINT)height; + dispatchDesc.Depth = (UINT)depth; + m_commandBuffer->m_cmdList4->DispatchRays(&dispatchDesc); +} - SLANG_RETURN_ON_FAIL(dxUploadResource->Map(0, &readRange, reinterpret_cast(&dstData))); - ::memcpy(dstData, srcData, srcDataSize); - dxUploadResource->Unmap(0, nullptr); +RayTracingPipelineStateImpl::RayTracingPipelineStateImpl(DeviceImpl* device) + : m_device(device) +{} - if (memoryType == MemoryType::DeviceLocal) - { - auto encodeInfo = encodeResourceCommands(); - encodeInfo.d3dCommandList->CopyBufferRegion(resourceOut, 0, uploadResourceRef, 0, bufferSize); - submitResourceCommandsAndWait(encodeInfo); - } - } +void RayTracingPipelineStateImpl::init(const RayTracingPipelineStateDesc& inDesc) +{ + PipelineStateDesc pipelineDesc; + pipelineDesc.type = PipelineType::RayTracing; + pipelineDesc.rayTracing.set(inDesc); + initializeBase(pipelineDesc); +} +Result RayTracingPipelineStateImpl::getNativeHandle(InteropHandle* outHandle) +{ + SLANG_RETURN_ON_FAIL(ensureAPIPipelineStateCreated()); + outHandle->api = InteropHandleAPI::D3D12; + outHandle->handleValue = reinterpret_cast(m_stateObject.get()); return SLANG_OK; } -Result D3D12Device::captureTextureToSurface( - TextureResourceImpl* resourceImpl, - ResourceState state, - ISlangBlob** outBlob, - size_t* outRowPitch, - size_t* outPixelSize) +Result RayTracingPipelineStateImpl::ensureAPIPipelineStateCreated() { - auto& resource = resourceImpl->m_resource; + if (m_stateObject) + return SLANG_OK; - const D3D12_RESOURCE_STATES initialState = D3DUtil::getResourceState(state); + auto program = static_cast(m_program.Ptr()); + auto slangGlobalScope = program->linkedProgram; + auto programLayout = slangGlobalScope->getLayout(); - const ITextureResource::Desc& gfxDesc = *resourceImpl->getDesc(); - const D3D12_RESOURCE_DESC desc = resource.getResource()->GetDesc(); + List subObjects; + ChunkedList dxilLibraries; + ChunkedList hitGroups; + ChunkedList> codeBlobs; + ChunkedList exports; + ChunkedList strPtrs; - // Don't bother supporting MSAA for right now - if (desc.SampleDesc.Count > 1) + ComPtr diagnostics; + ChunkedList stringPool; + auto getWStr = [&](const char* name) { - fprintf(stderr, "ERROR: cannot capture multi-sample texture\n"); - return SLANG_FAIL; - } - - FormatInfo formatInfo; - gfxGetFormatInfo(gfxDesc.format, &formatInfo); - size_t bytesPerPixel = formatInfo.blockSizeInBytes / formatInfo.pixelsPerBlock; - size_t rowPitch = int(desc.Width) * bytesPerPixel; - static const size_t align = 256; // D3D requires minimum 256 byte alignment for texture data. - rowPitch = (rowPitch + align - 1) & ~(align - 1); // Bit trick for rounding up - size_t bufferSize = rowPitch * int(desc.Height); - if (outRowPitch) - *outRowPitch = rowPitch; - if (outPixelSize) - *outPixelSize = bytesPerPixel; - - D3D12Resource stagingResource; + String str = String(name); + auto wstr = str.toWString(); + return stringPool.add(wstr)->begin(); + }; + auto compileShader = [&](slang::EntryPointLayout* entryPointInfo, + slang::IComponentType* component, + SlangInt entryPointIndex) { - D3D12_RESOURCE_DESC stagingDesc; - _initBufferResourceDesc(bufferSize, stagingDesc); - - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + ComPtr codeBlob; + auto compileResult = component->getEntryPointCode( + entryPointIndex, 0, codeBlob.writeRef(), diagnostics.writeRef()); + if (diagnostics.get()) + { + getDebugCallback()->handleMessage( + compileResult == SLANG_OK ? DebugMessageType::Warning : DebugMessageType::Error, + DebugMessageSource::Slang, + (char*)diagnostics->getBufferPointer()); + } + SLANG_RETURN_ON_FAIL(compileResult); + codeBlobs.add(codeBlob); + D3D12_DXIL_LIBRARY_DESC library = {}; + library.DXILLibrary.BytecodeLength = codeBlob->getBufferSize(); + library.DXILLibrary.pShaderBytecode = codeBlob->getBufferPointer(); + library.NumExports = 1; + D3D12_EXPORT_DESC exportDesc = {}; + exportDesc.Name = getWStr(entryPointInfo->getNameOverride()); + exportDesc.ExportToRename = getWStr(entryPointInfo->getNameOverride()); + exportDesc.Flags = D3D12_EXPORT_FLAG_NONE; + library.pExports = exports.add(exportDesc); - SLANG_RETURN_ON_FAIL(stagingResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); + D3D12_STATE_SUBOBJECT dxilSubObject = {}; + dxilSubObject.Type = D3D12_STATE_SUBOBJECT_TYPE_DXIL_LIBRARY; + dxilSubObject.pDesc = dxilLibraries.add(library); + subObjects.add(dxilSubObject); + return SLANG_OK; + }; + if (program->linkedEntryPoints.getCount() == 0) + { + for (SlangUInt i = 0; i < programLayout->getEntryPointCount(); i++) + { + SLANG_RETURN_ON_FAIL(compileShader( + programLayout->getEntryPointByIndex(i), program->linkedProgram, (SlangInt)i)); + } } - - auto encodeInfo = encodeResourceCommands(); - auto currentState = D3DUtil::getResourceState(state); - + else { - D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); - resource.transition(currentState, D3D12_RESOURCE_STATE_COPY_SOURCE, submitter); + for (auto& entryPoint : program->linkedEntryPoints) + { + SLANG_RETURN_ON_FAIL( + compileShader(entryPoint->getLayout()->getEntryPointByIndex(0), entryPoint, 0)); + } } - // Do the copy + for (Index i = 0; i < desc.rayTracing.hitGroupDescs.getCount(); i++) { - D3D12_TEXTURE_COPY_LOCATION srcLoc; - srcLoc.pResource = resource; - srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - srcLoc.SubresourceIndex = 0; + auto& hitGroup = desc.rayTracing.hitGroups[i]; + D3D12_HIT_GROUP_DESC hitGroupDesc = {}; + hitGroupDesc.Type = hitGroup.intersectionEntryPoint.getLength() == 0 + ? D3D12_HIT_GROUP_TYPE_TRIANGLES + : D3D12_HIT_GROUP_TYPE_PROCEDURAL_PRIMITIVE; - D3D12_TEXTURE_COPY_LOCATION dstLoc; - dstLoc.pResource = stagingResource; - dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dstLoc.PlacedFootprint.Offset = 0; - dstLoc.PlacedFootprint.Footprint.Format = desc.Format; - dstLoc.PlacedFootprint.Footprint.Width = UINT(desc.Width); - dstLoc.PlacedFootprint.Footprint.Height = UINT(desc.Height); - dstLoc.PlacedFootprint.Footprint.Depth = 1; - dstLoc.PlacedFootprint.Footprint.RowPitch = UINT(rowPitch); + if (hitGroup.anyHitEntryPoint.getLength()) + { + hitGroupDesc.AnyHitShaderImport = getWStr(hitGroup.anyHitEntryPoint.getBuffer()); + } + if (hitGroup.closestHitEntryPoint.getLength()) + { + hitGroupDesc.ClosestHitShaderImport = + getWStr(hitGroup.closestHitEntryPoint.getBuffer()); + } + if (hitGroup.intersectionEntryPoint.getLength()) + { + hitGroupDesc.IntersectionShaderImport = + getWStr(hitGroup.intersectionEntryPoint.getBuffer()); + } + hitGroupDesc.HitGroupExport = getWStr(hitGroup.hitGroupName.getBuffer()); - encodeInfo.d3dCommandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr); + D3D12_STATE_SUBOBJECT hitGroupSubObject = {}; + hitGroupSubObject.Type = D3D12_STATE_SUBOBJECT_TYPE_HIT_GROUP; + hitGroupSubObject.pDesc = hitGroups.add(hitGroupDesc); + subObjects.add(hitGroupSubObject); } - { - D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); - resource.transition(D3D12_RESOURCE_STATE_COPY_SOURCE, currentState, submitter); - } + D3D12_RAYTRACING_SHADER_CONFIG shaderConfig = {}; + // According to DXR spec, fixed function triangle intersections must use float2 as ray + // attributes that defines the barycentric coordinates at intersection. + shaderConfig.MaxAttributeSizeInBytes = desc.rayTracing.maxAttributeSizeInBytes; + shaderConfig.MaxPayloadSizeInBytes = desc.rayTracing.maxRayPayloadSize; + D3D12_STATE_SUBOBJECT shaderConfigSubObject = {}; + shaderConfigSubObject.Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_SHADER_CONFIG; + shaderConfigSubObject.pDesc = &shaderConfig; + subObjects.add(shaderConfigSubObject); - // Submit the copy, and wait for copy to complete - submitResourceCommandsAndWait(encodeInfo); + D3D12_GLOBAL_ROOT_SIGNATURE globalSignatureDesc = {}; + globalSignatureDesc.pGlobalRootSignature = program->m_rootObjectLayout->m_rootSignature.get(); + D3D12_STATE_SUBOBJECT globalSignatureSubobject = {}; + globalSignatureSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE; + globalSignatureSubobject.pDesc = &globalSignatureDesc; + subObjects.add(globalSignatureSubobject); - { - ID3D12Resource* dxResource = stagingResource; + D3D12_RAYTRACING_PIPELINE_CONFIG pipelineConfig = {}; + pipelineConfig.MaxTraceRecursionDepth = desc.rayTracing.maxRecursion; + D3D12_STATE_SUBOBJECT pipelineConfigSubobject = {}; + pipelineConfigSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG; + pipelineConfigSubobject.pDesc = &pipelineConfig; + subObjects.add(pipelineConfigSubobject); - UINT8* data; - D3D12_RANGE readRange = {0, bufferSize}; + if (m_device->m_pipelineCreationAPIDispatcher) + { + m_device->m_pipelineCreationAPIDispatcher->beforeCreateRayTracingState( + m_device, slangGlobalScope); + } - SLANG_RETURN_ON_FAIL(dxResource->Map(0, &readRange, reinterpret_cast(&data))); + D3D12_STATE_OBJECT_DESC rtpsoDesc = {}; + rtpsoDesc.Type = D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE; + rtpsoDesc.NumSubobjects = (UINT)subObjects.getCount(); + rtpsoDesc.pSubobjects = subObjects.getBuffer(); + SLANG_RETURN_ON_FAIL( + m_device->m_device5->CreateStateObject(&rtpsoDesc, IID_PPV_ARGS(m_stateObject.writeRef()))); - RefPtr resultBlob = new Slang::ListBlob(); - resultBlob->m_data.setCount(bufferSize); - memcpy(resultBlob->m_data.getBuffer(), data, bufferSize); - dxResource->Unmap(0, nullptr); - returnComPtr(outBlob, resultBlob); - return SLANG_OK; + if (m_device->m_pipelineCreationAPIDispatcher) + { + m_device->m_pipelineCreationAPIDispatcher->afterCreateRayTracingState( + m_device, slangGlobalScope); } + return SLANG_OK; } -// !!!!!!!!!!!!!!!!!!!!!!!!!!!! Renderer interface !!!!!!!!!!!!!!!!!!!!!!!!!! +#endif + +UInt ShaderObjectImpl::getEntryPointCount() { return 0; } -Result D3D12Device::getNativeDeviceHandles(InteropHandles* outHandles) +Result ShaderObjectImpl::getEntryPoint(UInt index, IShaderObject** outEntryPoint) { - outHandles->handles[0].handleValue = (uint64_t)m_device; - outHandles->handles[0].api = InteropHandleAPI::D3D12; + *outEntryPoint = nullptr; return SLANG_OK; } -Result D3D12Device::_createDevice(DeviceCheckFlags deviceCheckFlags, const UnownedStringSlice& nameMatch, D3D_FEATURE_LEVEL featureLevel, DeviceInfo& outDeviceInfo) -{ - if (m_dxDebug && (deviceCheckFlags & DeviceCheckFlag::UseDebug)) - { - m_dxDebug->EnableDebugLayer(); - } - - outDeviceInfo.clear(); +const void* ShaderObjectImpl::getRawData() { return m_data.getBuffer(); } - ComPtr dxgiFactory; - SLANG_RETURN_ON_FAIL(D3DUtil::createFactory(deviceCheckFlags, dxgiFactory)); +size_t ShaderObjectImpl::getSize() { return (size_t)m_data.getCount(); } - List> dxgiAdapters; - SLANG_RETURN_ON_FAIL(D3DUtil::findAdapters(deviceCheckFlags, nameMatch, dxgiFactory, dxgiAdapters)); +Result ShaderObjectImpl::setData(ShaderOffset const& inOffset, void const* data, size_t inSize) +{ + Index offset = inOffset.uniformOffset; + Index size = inSize; - ComPtr device; - ComPtr adapter; + char* dest = m_data.getBuffer(); + Index availableSize = m_data.getCount(); - for (Index i = 0; i < dxgiAdapters.getCount(); ++i) + // TODO: We really should bounds-check access rather than silently ignoring sets + // that are too large, but we have several test cases that set more data than + // an object actually stores on several targets... + // + if (offset < 0) { - IDXGIAdapter* dxgiAdapter = dxgiAdapters[i]; - if (SLANG_SUCCEEDED(m_D3D12CreateDevice(dxgiAdapter, featureLevel, IID_PPV_ARGS(device.writeRef())))) - { - adapter = dxgiAdapter; - break; - } + size += offset; + offset = 0; } - - if (!device) + if ((offset + size) >= availableSize) { - return SLANG_FAIL; + size = availableSize - offset; } - if (m_dxDebug && (deviceCheckFlags & DeviceCheckFlag::UseDebug)) - { - ComPtr infoQueue; - if (SLANG_SUCCEEDED(device->QueryInterface(infoQueue.writeRef()))) - { - // Make break - infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true); - if (m_extendedDesc.debugBreakOnD3D12Error) - { - infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true); - } - D3D12_MESSAGE_ID hideMessages[] = { - D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, - D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, - }; - D3D12_INFO_QUEUE_FILTER f = {}; - f.DenyList.NumIDs = (UINT)SLANG_COUNT_OF(hideMessages); - f.DenyList.pIDList = hideMessages; - infoQueue->AddStorageFilterEntries(&f); - - // Apparently there is a problem with sm 6.3 with spurious errors, with debug layer enabled - D3D12_FEATURE_DATA_SHADER_MODEL featureShaderModel; - featureShaderModel.HighestShaderModel = D3D_SHADER_MODEL(0x63); - SLANG_SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &featureShaderModel, sizeof(featureShaderModel))); - - if (featureShaderModel.HighestShaderModel >= D3D_SHADER_MODEL(0x63)) - { - // Filter out any messages that cause issues - // TODO: Remove this when the debug layers work properly - D3D12_MESSAGE_ID messageIds[] = - { - // When the debug layer is enabled this error is triggered sometimes after a CopyDescriptorsSimple - // call The failed check validates that the source and destination ranges of the copy do not - // overlap. The check assumes descriptor handles are pointers to memory, but this is not always the - // case and the check fails (even though everything is okay). - D3D12_MESSAGE_ID_COPY_DESCRIPTORS_INVALID_RANGES, - }; - - // We filter INFO messages because they are way too many - D3D12_MESSAGE_SEVERITY severities[] = { D3D12_MESSAGE_SEVERITY_INFO }; + memcpy(dest + offset, data, size); - D3D12_INFO_QUEUE_FILTER infoQueueFilter = {}; - infoQueueFilter.DenyList.NumSeverities = SLANG_COUNT_OF(severities); - infoQueueFilter.DenyList.pSeverityList = severities; - infoQueueFilter.DenyList.NumIDs = SLANG_COUNT_OF(messageIds); - infoQueueFilter.DenyList.pIDList = messageIds; + m_isConstantBufferDirty = true; - infoQueue->PushStorageFilter(&infoQueueFilter); - } - } - } + m_version++; - // Get the descs - { - adapter->GetDesc(&outDeviceInfo.m_desc); + return SLANG_OK; +} - // Look up GetDesc1 info - ComPtr adapter1; - if (SLANG_SUCCEEDED(adapter->QueryInterface(adapter1.writeRef()))) - { - adapter1->GetDesc1(&outDeviceInfo.m_desc1); - } +Result ShaderObjectImpl::setObject(ShaderOffset const& offset, IShaderObject* object) +{ + SLANG_RETURN_ON_FAIL(Super::setObject(offset, object)); + if (m_isMutable) + { + auto subObjectIndex = getSubObjectIndex(offset); + if (subObjectIndex >= m_subObjectVersions.getCount()) + m_subObjectVersions.setCount(subObjectIndex + 1); + m_subObjectVersions[subObjectIndex] = static_cast(object)->m_version; + m_version++; } - - // Save other info - outDeviceInfo.m_device = device; - outDeviceInfo.m_dxgiFactory = dxgiFactory; - outDeviceInfo.m_adapter = adapter; - outDeviceInfo.m_isWarp = D3DUtil::isWarp(dxgiFactory, adapter); - const UINT kMicrosoftVendorId = 5140; - outDeviceInfo.m_isSoftware = outDeviceInfo.m_isWarp || ((outDeviceInfo.m_desc1.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) != 0) - || outDeviceInfo.m_desc.VendorId == kMicrosoftVendorId; - return SLANG_OK; } -static bool _isSupportedNVAPIOp(ID3D12Device* dev, uint32_t op) +Result ShaderObjectImpl::setSampler(ShaderOffset const& offset, ISamplerState* sampler) { -#ifdef GFX_NVAPI - { - bool isSupported; - NvAPI_Status status = NvAPI_D3D12_IsNvShaderExtnOpCodeSupported(dev, NvU32(op), &isSupported); - return status == NVAPI_OK && isSupported; - } -#else - return false; + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + auto samplerImpl = static_cast(sampler); + ID3D12Device* d3dDevice = static_cast(getDevice())->m_device; + d3dDevice->CopyDescriptorsSimple( + 1, + m_descriptorSet.samplerTable.getCpuHandle( + bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex), + samplerImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + m_version++; + return SLANG_OK; +} + +Result ShaderObjectImpl::setCombinedTextureSampler( + ShaderOffset const& offset, IResourceView* textureView, ISamplerState* sampler) +{ +#if 0 + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + auto resourceViewImpl = static_cast(textureView); + ID3D12Device* d3dDevice = static_cast(getDevice())->m_device; + d3dDevice->CopyDescriptorsSimple( + 1, + m_resourceHeap.getCpuHandle( + m_descriptorSet.m_resourceTable + + bindingRange.binding.offsetInDescriptorTable.resource + + (int32_t)offset.bindingArrayIndex), + resourceViewImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + auto samplerImpl = static_cast(sampler); + d3dDevice->CopyDescriptorsSimple( + 1, + m_samplerHeap.getCpuHandle( + m_descriptorSet.m_samplerTable + + bindingRange.binding.offsetInDescriptorTable.sampler + + (int32_t)offset.bindingArrayIndex), + samplerImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); #endif + m_version++; + return SLANG_OK; } -Result D3D12Device::initialize(const Desc& desc) +Result ShaderObjectImpl::init( + DeviceImpl* device, + ShaderObjectLayoutImpl* layout, + DescriptorHeapReference viewHeap, + DescriptorHeapReference samplerHeap) { - SLANG_RETURN_ON_FAIL(RendererBase::initialize(desc)); + m_device = device; - // Find extended desc. - for (uint32_t i = 0; i < desc.extendedDescCount; i++) - { - StructType stype; - memcpy(&stype, desc.extendedDescs[i], sizeof(stype)); - if (stype == StructType::D3D12ExtendedDesc) - { - memcpy(&m_extendedDesc, desc.extendedDescs[i], sizeof(m_extendedDesc)); - } - } + m_layout = layout; - // Initialize queue index allocator. - // Support max 32 queues. - m_queueIndexAllocator.initPool(32); + m_cachedTransientHeap = nullptr; + m_cachedTransientHeapVersion = 0; + m_isConstantBufferDirty = true; - // Initialize DeviceInfo + // If the layout tells us that there is any uniform data, + // then we will allocate a CPU memory buffer to hold that data + // while it is being set from the host. + // + // Once the user is done setting the parameters/fields of this + // shader object, we will produce a GPU-memory version of the + // uniform data (which includes values from this object and + // any existential-type sub-objects). + // + size_t uniformSize = layout->getElementTypeLayout()->getSize(); + if (uniformSize) { - m_info.deviceType = DeviceType::DirectX12; - m_info.bindingStyle = BindingStyle::DirectX; - m_info.projectionStyle = ProjectionStyle::DirectX; - m_info.apiName = "Direct3D 12"; - static const float kIdentity[] = {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}; - ::memcpy(m_info.identityProjectionMatrix, kIdentity, sizeof(kIdentity)); + m_data.setCount(uniformSize); + memset(m_data.getBuffer(), 0, uniformSize); } - - // Rather than statically link against D3D, we load it dynamically. - - HMODULE d3dModule = LoadLibraryA("d3d12.dll"); - if (!d3dModule) + m_rootArguments.setCount(layout->getOwnUserRootParameterCount()); + memset( + m_rootArguments.getBuffer(), + 0, + sizeof(D3D12_GPU_VIRTUAL_ADDRESS) * m_rootArguments.getCount()); + // Each shader object will own CPU descriptor heap memory + // for any resource or sampler descriptors it might store + // as part of its value. + // + // This allocate includes a reservation for any constant + // buffer descriptor pertaining to the ordinary data, + // but does *not* include any descriptors that are managed + // as part of sub-objects. + // + if (auto resourceCount = layout->getResourceSlotCount()) { - fprintf(stderr, "error: failed load 'd3d12.dll'\n"); - return SLANG_FAIL; - } + m_descriptorSet.resourceTable.allocate(viewHeap, resourceCount); - // Get all the dll entry points - m_D3D12SerializeRootSignature = (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)loadProc(d3dModule, "D3D12SerializeRootSignature"); - if (!m_D3D12SerializeRootSignature) - { - return SLANG_FAIL; + // We must also ensure that the memory for any resources + // referenced by descriptors in this object does not get + // freed while the object is still live. + // + m_boundResources.setCount(resourceCount); } - - HMODULE pixModule = LoadLibraryW(L"WinPixEventRuntime.dll"); - if (pixModule) + if (auto samplerCount = layout->getSamplerSlotCount()) { - m_BeginEventOnCommandList = - (PFN_BeginEventOnCommandList)GetProcAddress(pixModule, "PIXBeginEventOnCommandList"); - m_EndEventOnCommandList = - (PFN_EndEventOnCommandList)GetProcAddress(pixModule, "PIXEndEventOnCommandList"); + m_descriptorSet.samplerTable.allocate(samplerHeap, samplerCount); } -#if ENABLE_DEBUG_LAYER - m_D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)loadProc(d3dModule, "D3D12GetDebugInterface"); - if (m_D3D12GetDebugInterface) + // If the layout specifies that we have any sub-objects, then + // we need to size the array to account for them. + // + Index subObjectCount = layout->getSubObjectSlotCount(); + m_objects.setCount(subObjectCount); + + for (auto subObjectRangeInfo : layout->getSubObjectRanges()) { - if (SLANG_SUCCEEDED(m_D3D12GetDebugInterface(IID_PPV_ARGS(m_dxDebug.writeRef())))) - { -#if 0 - // Can enable for extra validation. NOTE! That d3d12 warns if you do.... - // D3D12 MESSAGE : Device Debug Layer Startup Options : GPU - Based Validation is enabled(disabled by default). - // This results in new validation not possible during API calls on the CPU, by creating patched shaders that have validation - // added directly to the shader. However, it can slow things down a lot, especially for applications with numerous - // PSOs.Time to see the first render frame may take several minutes. - // [INITIALIZATION MESSAGE #1016: CREATEDEVICE_DEBUG_LAYER_STARTUP_OPTIONS] + auto subObjectLayout = subObjectRangeInfo.layout; - ComPtr debug1; - if (SLANG_SUCCEEDED(m_dxDebug->QueryInterface(debug1.writeRef()))) - { - debug1->SetEnableGPUBasedValidation(true); - } -#endif + // In the case where the sub-object range represents an + // existential-type leaf field (e.g., an `IBar`), we + // cannot pre-allocate the object(s) to go into that + // range, since we can't possibly know what to allocate + // at this point. + // + if (!subObjectLayout) + continue; + // + // Otherwise, we will allocate a sub-object to fill + // in each entry in this range, based on the layout + // information we already have. + + auto& bindingRangeInfo = layout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); + for (uint32_t i = 0; i < bindingRangeInfo.count; ++i) + { + RefPtr subObject; + SLANG_RETURN_ON_FAIL( + ShaderObjectImpl::create(device, subObjectLayout, subObject.writeRef())); + m_objects[bindingRangeInfo.subObjectIndex + i] = subObject; } } -#endif - m_D3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)loadProc(d3dModule, "D3D12CreateDevice"); - if (!m_D3D12CreateDevice) - { - return SLANG_FAIL; - } + return SLANG_OK; +} - if (desc.existingDeviceHandles.handles[0].handleValue == 0) +/// Write the uniform/ordinary data of this object into the given `dest` buffer at the given +/// `offset` + +Result ShaderObjectImpl::_writeOrdinaryData( + PipelineCommandEncoder* encoder, + BufferResourceImpl* buffer, + size_t offset, + size_t destSize, + ShaderObjectLayoutImpl* specializedLayout) +{ + auto src = m_data.getBuffer(); + auto srcSize = size_t(m_data.getCount()); + + SLANG_ASSERT(srcSize <= destSize); + + uploadBufferDataImpl( + encoder->m_device, + encoder->m_d3dCmdList, + encoder->m_transientHeap, + buffer, + offset, + srcSize, + src); + + // In the case where this object has any sub-objects of + // existential/interface type, we need to recurse on those objects + // that need to write their state into an appropriate "pending" allocation. + // + // Note: Any values that could fit into the "payload" included + // in the existential-type field itself will have already been + // written as part of `setObject()`. This loop only needs to handle + // those sub-objects that do not "fit." + // + // An implementers looking at this code might wonder if things could be changed + // so that *all* writes related to sub-objects for interface-type fields could + // be handled in this one location, rather than having some in `setObject()` and + // others handled here. + // + Index subObjectRangeCounter = 0; + for (auto const& subObjectRangeInfo : specializedLayout->getSubObjectRanges()) { - FlagCombiner combiner; - // TODO: we should probably provide a command-line option - // to override UseDebug of default rather than leave it - // up to each back-end to specify. -#if ENABLE_DEBUG_LAYER - combiner.add(DeviceCheckFlag::UseDebug, ChangeType::OnOff); ///< First try debug then non debug -#else - combiner.add(DeviceCheckFlag::UseDebug, ChangeType::Off); ///< Don't bother with debug -#endif - combiner.add(DeviceCheckFlag::UseHardwareDevice, ChangeType::OnOff); ///< First try hardware, then reference + Index subObjectRangeIndex = subObjectRangeCounter++; + auto const& bindingRangeInfo = + specializedLayout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); - const D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0; + // We only need to handle sub-object ranges for interface/existential-type fields, + // because fields of constant-buffer or parameter-block type are responsible for + // the ordinary/uniform data of their own existential/interface-type sub-objects. + // + if (bindingRangeInfo.bindingType != slang::BindingType::ExistentialValue) + continue; - const int numCombinations = combiner.getNumCombinations(); - for (int i = 0; i < numCombinations; ++i) - { - if (SLANG_SUCCEEDED(_createDevice(combiner.getCombination(i), UnownedStringSlice(desc.adapter), featureLevel, m_deviceInfo))) - { - break; - } - } + // Each sub-object range represents a single "leaf" field, but might be nested + // under zero or more outer arrays, such that the number of existential values + // in the same range can be one or more. + // + auto count = bindingRangeInfo.count; - if (!m_deviceInfo.m_adapter) + // We are not concerned with the case where the existential value(s) in the range + // git into the payload part of the leaf field. + // + // In the case where the value didn't fit, the Slang layout strategy would have + // considered the requirements of the value as a "pending" allocation, and would + // allocate storage for the ordinary/uniform part of that pending allocation inside + // of the parent object's type layout. + // + // Here we assume that the Slang reflection API can provide us with a single byte + // offset and stride for the location of the pending data allocation in the + // specialized type layout, which will store the values for this sub-object range. + // + // TODO: The reflection API functions we are assuming here haven't been implemented + // yet, so the functions being called here are stubs. + // + // TODO: It might not be that a single sub-object range can reliably map to a single + // contiguous array with a single stride; we need to carefully consider what the + // layout logic does for complex cases with multiple layers of nested arrays and + // structures. + // + size_t subObjectRangePendingDataOffset = subObjectRangeInfo.offset.pendingOrdinaryData; + size_t subObjectRangePendingDataStride = subObjectRangeInfo.stride.pendingOrdinaryData; + + // If the range doesn't actually need/use the "pending" allocation at all, then + // we need to detect that case and skip such ranges. + // + // TODO: This should probably be handled on a per-object basis by caching a "does it + // fit?" bit as part of the information for bound sub-objects, given that we already + // compute the "does it fit?" status as part of `setObject()`. + // + if (subObjectRangePendingDataOffset == 0) + continue; + + for (uint32_t i = 0; i < count; ++i) { - // Couldn't find an adapter - return SLANG_FAIL; + auto subObject = m_objects[bindingRangeInfo.subObjectIndex + i]; + + RefPtr subObjectLayout; + SLANG_RETURN_ON_FAIL(subObject->getSpecializedLayout(subObjectLayout.writeRef())); + + auto subObjectOffset = + subObjectRangePendingDataOffset + i * subObjectRangePendingDataStride; + + subObject->_writeOrdinaryData( + encoder, + buffer, + offset + subObjectOffset, + destSize - subObjectOffset, + subObjectLayout); } } - else + + return SLANG_OK; +} + +bool ShaderObjectImpl::shouldAllocateConstantBuffer(TransientResourceHeapImpl* transientHeap) +{ + if (m_isConstantBufferDirty || m_cachedTransientHeap != transientHeap || + m_cachedTransientHeapVersion != transientHeap->getVersion()) { - // Store the existing device handle in desc in m_deviceInfo - m_deviceInfo.m_device = (ID3D12Device*)desc.existingDeviceHandles.handles[0].handleValue; + return true; } + return false; +} - // Set the device - m_device = m_deviceInfo.m_device; +/// Ensure that the `m_ordinaryDataBuffer` has been created, if it is needed - if (m_deviceInfo.m_isSoftware) +Result ShaderObjectImpl::_ensureOrdinaryDataBufferCreatedIfNeeded( + PipelineCommandEncoder* encoder, ShaderObjectLayoutImpl* specializedLayout) +{ + // If data has been changed since last allocation/filling of constant buffer, + // we will need to allocate a new one. + // + if (!shouldAllocateConstantBuffer(encoder->m_transientHeap)) { - m_features.add("software-device"); + return SLANG_OK; } - else + m_isConstantBufferDirty = false; + m_cachedTransientHeap = encoder->m_transientHeap; + m_cachedTransientHeapVersion = encoder->m_transientHeap->getVersion(); + + // Computing the size of the ordinary data buffer is *not* just as simple + // as using the size of the `m_ordinayData` array that we store. The reason + // for the added complexity is that interface-type fields may lead to the + // storage being specialized such that it needs extra appended data to + // store the concrete values that logically belong in those interface-type + // fields but wouldn't fit in the fixed-size allocation we gave them. + // + m_constantBufferSize = specializedLayout->getTotalOrdinaryDataSize(); + if (m_constantBufferSize == 0) { - m_features.add("hardware-device"); + return SLANG_OK; } - // NVAPI - if (desc.nvapiExtnSlot >= 0) + // Once we have computed how large the buffer should be, we can allocate + // it from the transient resource heap. + // + auto alignedConstantBufferSize = D3DUtil::calcAligned(m_constantBufferSize, 256); + SLANG_RETURN_ON_FAIL(encoder->m_commandBuffer->m_transientHeap->allocateConstantBuffer( + alignedConstantBufferSize, m_constantBufferWeakPtr, m_constantBufferOffset)); + + // Once the buffer is allocated, we can use `_writeOrdinaryData` to fill it in. + // + // Note that `_writeOrdinaryData` is potentially recursive in the case + // where this object contains interface/existential-type fields, so we + // don't need or want to inline it into this call site. + // + SLANG_RETURN_ON_FAIL(_writeOrdinaryData( + encoder, + static_cast(m_constantBufferWeakPtr), + m_constantBufferOffset, + m_constantBufferSize, + specializedLayout)); + { - if (SLANG_FAILED(NVAPIUtil::initialize())) - { - return SLANG_E_NOT_AVAILABLE; - } + // We also create and store a descriptor for our root constant buffer + // into the descriptor table allocation that was reserved for them. + // + // We always know that the ordinary data buffer will be the first descriptor + // in the table of resource views. + // + auto descriptorTable = m_descriptorSet.resourceTable; + D3D12_CONSTANT_BUFFER_VIEW_DESC viewDesc = {}; + viewDesc.BufferLocation = static_cast(m_constantBufferWeakPtr) + ->m_resource.getResource() + ->GetGPUVirtualAddress() + + m_constantBufferOffset; + viewDesc.SizeInBytes = (UINT)alignedConstantBufferSize; + encoder->m_device->CreateConstantBufferView(&viewDesc, descriptorTable.getCpuHandle()); + } -#ifdef GFX_NVAPI - // From DOCS: Applications are expected to bind null UAV to this slot. - // NOTE! We don't currently do this, but doesn't seem to be a problem. + return SLANG_OK; +} - const NvAPI_Status status = NvAPI_D3D12_SetNvShaderExtnSlotSpace(m_device, NvU32(desc.nvapiExtnSlot), NvU32(0)); - - if (status != NVAPI_OK) +void ShaderObjectImpl::updateSubObjectsRecursive() +{ + if (!m_isMutable) + return; + auto& subObjectRanges = getLayout()->getSubObjectRanges(); + for (Slang::Index subObjectRangeIndex = 0; subObjectRangeIndex < subObjectRanges.getCount(); + subObjectRangeIndex++) + { + auto const& subObjectRange = subObjectRanges[subObjectRangeIndex]; + auto const& bindingRange = getLayout()->getBindingRange(subObjectRange.bindingRangeIndex); + Slang::Index count = bindingRange.count; + + for (Slang::Index subObjectIndexInRange = 0; subObjectIndexInRange < count; + subObjectIndexInRange++) { - return SLANG_E_NOT_AVAILABLE; + Slang::Index objectIndex = bindingRange.subObjectIndex + subObjectIndexInRange; + auto subObject = m_objects[objectIndex].Ptr(); + if (!subObject) + continue; + subObject->updateSubObjectsRecursive(); + if (m_subObjectVersions[objectIndex] != m_objects[objectIndex]->m_version) + { + ShaderOffset offset; + offset.bindingRangeIndex = subObjectRange.bindingRangeIndex; + offset.bindingArrayIndex = subObjectIndexInRange; + setObject(offset, subObject); + } } + } +} + +/// Prepare to bind this object as a parameter block. +/// +/// This involves allocating and binding any descriptor tables necessary +/// to to store the state of the object. The function returns a descriptor +/// set formed from any table(s) allocated. In addition, the `ioOffset` +/// parameter will be adjusted to be correct for binding values into +/// the resulting descriptor set. +/// +/// Returns: +/// SLANG_OK when successful, +/// SLANG_E_OUT_OF_MEMORY when descriptor heap is full. +/// + +Result ShaderObjectImpl::prepareToBindAsParameterBlock( + BindingContext* context, + BindingOffset& ioOffset, + ShaderObjectLayoutImpl* specializedLayout, + DescriptorSet& outDescriptorSet) +{ + auto transientHeap = context->transientHeap; + auto submitter = context->submitter; + + // When writing into the new descriptor set, resource and sampler + // descriptors will need to start at index zero in the respective + // tables. + // + ioOffset.resource = 0; + ioOffset.sampler = 0; + + // The index of the next root parameter to bind will be maintained, + // but needs to be incremented by the number of descriptor tables + // we allocate (zero or one resource table and zero or one sampler + // table). + // + auto& rootParamIndex = ioOffset.rootParam; + + if (auto descriptorCount = specializedLayout->getTotalResourceDescriptorCount()) + { + // There is a non-zero number of resource descriptors needed, + // so we will allocate a table out of the appropriate heap, + // and store it into the appropriate part of `descriptorSet`. + // + auto descriptorHeap = &transientHeap->getCurrentViewHeap(); + auto& table = outDescriptorSet.resourceTable; - if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC)) + // Allocate the table. + // + if (!table.allocate(descriptorHeap, descriptorCount)) { - m_features.add("atomic-int64"); + context->outOfMemoryHeap = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + return SLANG_E_OUT_OF_MEMORY; } - if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC)) + + // Bind the table to the pipeline, consuming the next available + // root parameter. + // + auto tableRootParamIndex = rootParamIndex++; + submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); + } + if (auto descriptorCount = specializedLayout->getTotalSamplerDescriptorCount()) + { + // There is a non-zero number of sampler descriptors needed, + // so we will allocate a table out of the appropriate heap, + // and store it into the appropriate part of `descriptorSet`. + // + auto descriptorHeap = &transientHeap->getCurrentSamplerHeap(); + auto& table = outDescriptorSet.samplerTable; + + // Allocate the table. + // + if (!table.allocate(descriptorHeap, descriptorCount)) { - m_features.add("atomic-float"); + context->outOfMemoryHeap = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + return SLANG_E_OUT_OF_MEMORY; } - m_nvapi = true; -#endif - + // Bind the table to the pipeline, consuming the next available + // root parameter. + // + auto tableRootParamIndex = rootParamIndex++; + submitter->setRootDescriptorTable(tableRootParamIndex, table.getGpuHandle()); } - D3D12_FEATURE_DATA_SHADER_MODEL shaderModelData = {}; - shaderModelData.HighestShaderModel = D3D_SHADER_MODEL_6_6; + return SLANG_OK; +} - // Find what features are supported +bool ShaderObjectImpl::checkIfCachedDescriptorSetIsValidRecursive(BindingContext* context) +{ + if (shouldAllocateConstantBuffer(context->transientHeap)) + return false; + if (m_isMutable && m_version != m_cachedGPUDescriptorSetVersion) + return false; + if (m_cachedGPUDescriptorSet.resourceTable.getDescriptorCount() != 0 && + m_cachedGPUDescriptorSet.resourceTable.m_heap.ptr.linearHeap->getHeap() != + m_cachedTransientHeap->getCurrentViewHeap().getHeap()) + return false; + if (m_cachedGPUDescriptorSet.samplerTable.getDescriptorCount() != 0 && + m_cachedGPUDescriptorSet.samplerTable.m_heap.ptr.linearHeap->getHeap() != + m_cachedTransientHeap->getCurrentSamplerHeap().getHeap()) + return false; + + auto& subObjectRanges = getLayout()->getSubObjectRanges(); + for (Slang::Index subObjectRangeIndex = 0; subObjectRangeIndex < subObjectRanges.getCount(); + subObjectRangeIndex++) { - // Check this is how this is laid out... - SLANG_COMPILE_TIME_ASSERT(D3D_SHADER_MODEL_6_0 == 0x60); + auto const& subObjectRange = subObjectRanges[subObjectRangeIndex]; + auto const& bindingRange = getLayout()->getBindingRange(subObjectRange.bindingRangeIndex); + if (bindingRange.bindingType != slang::BindingType::ParameterBlock) + continue; + Slang::Index count = bindingRange.count; + for (Slang::Index subObjectIndexInRange = 0; subObjectIndexInRange < count; + subObjectIndexInRange++) { - // TODO: Currently warp causes a crash when using half, so disable for now - if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shaderModelData, sizeof(shaderModelData))) && - m_deviceInfo.m_isWarp == false && - shaderModelData.HighestShaderModel >= 0x62) - { - // With sm_6_2 we have half - m_features.add("half"); - } + Slang::Index objectIndex = bindingRange.subObjectIndex + subObjectIndexInRange; + auto subObject = m_objects[objectIndex].Ptr(); + if (!subObject) + continue; + if (subObject->checkIfCachedDescriptorSetIsValidRecursive(context)) + return false; } - { - D3D12_FEATURE_DATA_D3D12_OPTIONS options; - if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)))) - { - // Check double precision support - if (options.DoublePrecisionFloatShaderOps) - m_features.add("double"); + } + return true; +} - // Check conservative-rasterization support - auto conservativeRasterTier = options.ConservativeRasterizationTier; - if (conservativeRasterTier == D3D12_CONSERVATIVE_RASTERIZATION_TIER_3) - { - m_features.add("conservative-rasterization-3"); - m_features.add("conservative-rasterization-2"); - m_features.add("conservative-rasterization-1"); - } - else if (conservativeRasterTier == D3D12_CONSERVATIVE_RASTERIZATION_TIER_2) - { - m_features.add("conservative-rasterization-2"); - m_features.add("conservative-rasterization-1"); - } - else if (conservativeRasterTier == D3D12_CONSERVATIVE_RASTERIZATION_TIER_1) - { - m_features.add("conservative-rasterization-1"); - } +/// Bind this object as a `ParameterBlock` - // Check rasterizer ordered views support - if (options.ROVsSupported) - { - m_features.add("rasterizer-ordered-views"); - } - } - } - { - D3D12_FEATURE_DATA_D3D12_OPTIONS2 options; - if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport( - D3D12_FEATURE_D3D12_OPTIONS2, &options, sizeof(options)))) - { - // Check programmable sample positions support - switch (options.ProgrammableSamplePositionsTier) - { - case D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_2: - m_features.add("programmable-sample-positions-2"); - m_features.add("programmable-sample-positions-1"); - break; - case D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_1: - m_features.add("programmable-sample-positions-1"); - break; - default: - break; - } - } - } +Result ShaderObjectImpl::bindAsParameterBlock( + BindingContext* context, BindingOffset const& offset, ShaderObjectLayoutImpl* specializedLayout) +{ + if (checkIfCachedDescriptorSetIsValidRecursive(context)) + { + // If we already have a valid gpu descriptor table in the current + // heap, bind it. + auto rootParamIndex = offset.rootParam; + if (m_cachedGPUDescriptorSet.resourceTable.getDescriptorCount()) { - D3D12_FEATURE_DATA_D3D12_OPTIONS3 options; - if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport( - D3D12_FEATURE_D3D12_OPTIONS3, &options, sizeof(options)))) - { - // Check barycentrics support - if (options.BarycentricsSupported) - { - m_features.add("barycentrics"); - } - } + auto tableRootParamIndex = rootParamIndex++; + context->submitter->setRootDescriptorTable( + tableRootParamIndex, m_cachedGPUDescriptorSet.resourceTable.getGpuHandle()); } - // Check ray tracing support + if (m_cachedGPUDescriptorSet.samplerTable.getDescriptorCount()) { - D3D12_FEATURE_DATA_D3D12_OPTIONS5 options; - if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport( - D3D12_FEATURE_D3D12_OPTIONS5, &options, sizeof(options)))) - { - if (options.RaytracingTier != D3D12_RAYTRACING_TIER_NOT_SUPPORTED) - { - m_features.add("ray-tracing"); - } - if (options.RaytracingTier >= D3D12_RAYTRACING_TIER_1_1) - { - m_features.add("ray-query"); - } - } + auto tableRootParamIndex = rootParamIndex++; + context->submitter->setRootDescriptorTable( + tableRootParamIndex, m_cachedGPUDescriptorSet.samplerTable.getGpuHandle()); } + return SLANG_OK; } - m_desc = desc; + // The first step to binding an object as a parameter block is to allocate a descriptor + // set (consisting of zero or one resource descriptor table and zero or one sampler + // descriptor table) to represent its values. + // + BindingOffset subOffset = offset; + SLANG_RETURN_ON_FAIL(prepareToBindAsParameterBlock( + context, /* inout */ subOffset, specializedLayout, m_cachedGPUDescriptorSet)); - // Create a command queue for internal resource transfer operations. - SLANG_RETURN_ON_FAIL(createCommandQueueImpl(m_resourceCommandQueue.writeRef())); - // `CommandQueueImpl` holds a back reference to `D3D12Device`, make it a weak reference here - // since this object is already owned by `D3D12Device`. - m_resourceCommandQueue->breakStrongReferenceToDevice(); - // Retrieve timestamp frequency. - m_resourceCommandQueue->m_d3dQueue->GetTimestampFrequency(&m_info.timestampFrequency); + // Next we bind the object into that descriptor set as if it were being used + // as a `ConstantBuffer`. + // + SLANG_RETURN_ON_FAIL( + bindAsConstantBuffer(context, m_cachedGPUDescriptorSet, subOffset, specializedLayout)); - SLANG_RETURN_ON_FAIL(createTransientResourceHeapImpl(ITransientResourceHeap::Flags::AllowResizing, 0, 8, 4, m_resourceCommandTransientHeap.writeRef())); - // `TransientResourceHeap` holds a back reference to `D3D12Device`, make it a weak reference here - // since this object is already owned by `D3D12Device`. - m_resourceCommandTransientHeap->breakStrongReferenceToDevice(); + m_cachedGPUDescriptorSetVersion = m_version; + return SLANG_OK; +} - m_cpuViewHeap = new D3D12GeneralExpandingDescriptorHeap(); - SLANG_RETURN_ON_FAIL(m_cpuViewHeap->init( - m_device, - 1024 * 1024, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); - m_cpuSamplerHeap = new D3D12GeneralExpandingDescriptorHeap(); - SLANG_RETURN_ON_FAIL(m_cpuSamplerHeap->init( - m_device, 2048, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); +/// Bind this object as a `ConstantBuffer` - m_rtvAllocator = new D3D12GeneralExpandingDescriptorHeap(); - SLANG_RETURN_ON_FAIL(m_rtvAllocator->init( - m_device, 16 * 1024, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); - m_dsvAllocator = new D3D12GeneralExpandingDescriptorHeap(); - SLANG_RETURN_ON_FAIL(m_dsvAllocator->init( - m_device, 1024, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); +Result ShaderObjectImpl::bindAsConstantBuffer( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) +{ + // If we are to bind as a constant buffer we first need to ensure that + // the ordinary data buffer is created, if this object needs one. + // + SLANG_RETURN_ON_FAIL( + _ensureOrdinaryDataBufferCreatedIfNeeded(context->encoder, specializedLayout)); - ComPtr dxgiDevice; - if (m_deviceInfo.m_adapter) + // Next, we need to bind all of the resource descriptors for this object + // (including any ordinary data buffer) into the provided `descriptorSet`. + // + auto resourceCount = specializedLayout->getResourceSlotCount(); + if (resourceCount) { - DXGI_ADAPTER_DESC adapterDesc; - m_deviceInfo.m_adapter->GetDesc(&adapterDesc); - m_adapterName = String::fromWString(adapterDesc.Description); - m_info.adapterName = m_adapterName.begin(); - } + auto& dstTable = descriptorSet.resourceTable; + auto& srcTable = m_descriptorSet.resourceTable; - // Initialize DXR interface. -#if SLANG_GFX_HAS_DXR_SUPPORT - m_device->QueryInterface(m_deviceInfo.m_device5.writeRef()); - m_device5 = m_deviceInfo.m_device5.get(); -#endif - // Check shader model version. - SlangCompileTarget compileTarget = SLANG_DXBC; - const char* profileName = "sm_5_1"; - switch (shaderModelData.HighestShaderModel) - { - case D3D_SHADER_MODEL_5_1: - compileTarget = SLANG_DXBC; - profileName = "sm_5_1"; - break; - case D3D_SHADER_MODEL_6_0: - compileTarget = SLANG_DXIL; - profileName = "sm_6_0"; - break; - case D3D_SHADER_MODEL_6_1: - compileTarget = SLANG_DXIL; - profileName = "sm_6_1"; - break; - case D3D_SHADER_MODEL_6_2: - compileTarget = SLANG_DXIL; - profileName = "sm_6_2"; - break; - case D3D_SHADER_MODEL_6_3: - compileTarget = SLANG_DXIL; - profileName = "sm_6_3"; - break; - case D3D_SHADER_MODEL_6_4: - compileTarget = SLANG_DXIL; - profileName = "sm_6_4"; - break; - case D3D_SHADER_MODEL_6_5: - compileTarget = SLANG_DXIL; - profileName = "sm_6_5"; - break; - default: - compileTarget = SLANG_DXIL; - profileName = "sm_6_6"; - break; + context->device->m_device->CopyDescriptorsSimple( + UINT(resourceCount), + dstTable.getCpuHandle(offset.resource), + srcTable.getCpuHandle(), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } - m_features.add(profileName); - // If user specified a higher shader model than what the system supports, return failure. - int userSpecifiedShaderModel = D3DUtil::getShaderModelFromProfileName(desc.slang.targetProfile); - if (userSpecifiedShaderModel > shaderModelData.HighestShaderModel) + + // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, + // since the logic is shared with the `bindAsValue()` case below. + // + SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, offset, specializedLayout)); + return SLANG_OK; +} + +/// Bind this object as a value (for an interface-type parameter) + +Result ShaderObjectImpl::bindAsValue( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) +{ + // When binding a value for an interface-type field we do *not* want + // to bind a buffer for the ordinary data (if there is any) because + // ordinary data for interface-type fields gets allocated into the + // parent object's ordinary data buffer. + // + // This CPU-memory descriptor table that holds resource descriptors + // will have already been allocated to have space for an ordinary data + // buffer (if needed), so we need to take care to skip over that + // descriptor when copying descriptors from the CPU-memory set + // to the GPU-memory `descriptorSet`. + // + auto skipResourceCount = specializedLayout->getOrdinaryDataBufferCount(); + auto resourceCount = specializedLayout->getResourceSlotCount() - skipResourceCount; + if (resourceCount) { - getDebugCallback()->handleMessage(gfx::DebugMessageType::Error, gfx::DebugMessageSource::Layer, - "The requested shader model is not supported by the system."); - return SLANG_E_NOT_AVAILABLE; + auto& dstTable = descriptorSet.resourceTable; + auto& srcTable = m_descriptorSet.resourceTable; + + context->device->m_device->CopyDescriptorsSimple( + UINT(resourceCount), + dstTable.getCpuHandle(offset.resource), + srcTable.getCpuHandle(skipResourceCount), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } - SLANG_RETURN_ON_FAIL(slangContext.initialize( - desc.slang, - compileTarget, - profileName, - makeArray(slang::PreprocessorMacroDesc{"__D3D12__", "1"}).getView())); - // Allocate a D3D12 "command signature" object that matches the behavior - // of a D3D11-style `DrawInstancedIndirect` operation. - { - D3D12_INDIRECT_ARGUMENT_DESC args; - args.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW; + // Finally, we delegate to `_bindImpl` to bind samplers and sub-objects, + // since the logic is shared with the `bindAsConstantBuffer()` case above. + // + // Note: Just like we had to do some subtle handling of the ordinary data buffer + // above, here we need to contend with the fact that the `offset.resource` fields + // computed for sub-object ranges were baked to take the ordinary data buffer + // into account, so that if `skipResourceCount` is non-zero then they are all + // too high by `skipResourceCount`. + // + // We will address the problem here by computing a modified offset that adjusts + // for the ordinary data buffer that we have not bound after all. + // + BindingOffset subOffset = offset; + subOffset.resource -= skipResourceCount; + SLANG_RETURN_ON_FAIL(_bindImpl(context, descriptorSet, subOffset, specializedLayout)); + return SLANG_OK; +} - D3D12_COMMAND_SIGNATURE_DESC desc; - desc.ByteStride = sizeof(D3D12_DRAW_ARGUMENTS); - desc.NumArgumentDescs = 1; - desc.pArgumentDescs = &args; - desc.NodeMask = 0; +/// Shared logic for `bindAsConstantBuffer()` and `bindAsValue()` + +Result ShaderObjectImpl::_bindImpl( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout) +{ + // We start by binding all the sampler decriptors, if needed. + // + // Note: resource descriptors were handled in either `bindAsConstantBuffer()` + // or `bindAsValue()` before calling into `_bindImpl()`. + // + if (auto samplerCount = specializedLayout->getSamplerSlotCount()) + { + auto& dstTable = descriptorSet.samplerTable; + auto& srcTable = m_descriptorSet.samplerTable; - SLANG_RETURN_ON_FAIL(m_device->CreateCommandSignature(&desc, nullptr, IID_PPV_ARGS(drawIndirectCmdSignature.writeRef()))); + context->device->m_device->CopyDescriptorsSimple( + UINT(samplerCount), + dstTable.getCpuHandle(offset.sampler), + srcTable.getCpuHandle(), + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); } - // Allocate a D3D12 "command signature" object that matches the behavior - // of a D3D11-style `DrawIndexedInstancedIndirect` operation. + // Next we iterate over the sub-object ranges and bind anything they require. + // + auto& subObjectRanges = specializedLayout->getSubObjectRanges(); + auto subObjectRangeCount = subObjectRanges.getCount(); + for (Index i = 0; i < subObjectRangeCount; i++) { - D3D12_INDIRECT_ARGUMENT_DESC args; - args.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED; + auto& subObjectRange = specializedLayout->getSubObjectRange(i); + auto& bindingRange = specializedLayout->getBindingRange(subObjectRange.bindingRangeIndex); + auto subObjectIndex = bindingRange.subObjectIndex; + auto subObjectLayout = subObjectRange.layout.Ptr(); - D3D12_COMMAND_SIGNATURE_DESC desc; - desc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS); - desc.NumArgumentDescs = 1; - desc.pArgumentDescs = &args; - desc.NodeMask = 0; + BindingOffset rangeOffset = offset; + rangeOffset += subObjectRange.offset; - SLANG_RETURN_ON_FAIL(m_device->CreateCommandSignature(&desc, nullptr, IID_PPV_ARGS(drawIndexedIndirectCmdSignature.writeRef()))); - } + BindingOffset rangeStride = subObjectRange.stride; - // Allocate a D3D12 "command signature" object that matches the behavior - // of a D3D11-style `Dispatch` operation. - { - D3D12_INDIRECT_ARGUMENT_DESC args; - args.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH; + switch (bindingRange.bindingType) + { + case slang::BindingType::ConstantBuffer: + { + auto objOffset = rangeOffset; + for (uint32_t j = 0; j < bindingRange.count; j++) + { + auto& object = m_objects[subObjectIndex + j]; + SLANG_RETURN_ON_FAIL(object->bindAsConstantBuffer( + context, descriptorSet, objOffset, subObjectLayout)); + objOffset += rangeStride; + } + } + break; - D3D12_COMMAND_SIGNATURE_DESC desc; - desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS); - desc.NumArgumentDescs = 1; - desc.pArgumentDescs = &args; - desc.NodeMask = 0; + case slang::BindingType::ParameterBlock: + { + auto objOffset = rangeOffset; + for (uint32_t j = 0; j < bindingRange.count; j++) + { + auto& object = m_objects[subObjectIndex + j]; + SLANG_RETURN_ON_FAIL( + object->bindAsParameterBlock(context, objOffset, subObjectLayout)); + objOffset += rangeStride; + } + } + break; - SLANG_RETURN_ON_FAIL(m_device->CreateCommandSignature( - &desc, nullptr, IID_PPV_ARGS(dispatchIndirectCmdSignature.writeRef()))); + case slang::BindingType::ExistentialValue: + if (subObjectLayout) + { + auto objOffset = rangeOffset; + for (uint32_t j = 0; j < bindingRange.count; j++) + { + auto& object = m_objects[subObjectIndex + j]; + SLANG_RETURN_ON_FAIL( + object->bindAsValue(context, descriptorSet, objOffset, subObjectLayout)); + objOffset += rangeStride; + } + } + break; + } } - m_isInitialized = true; + return SLANG_OK; } -namespace +Result ShaderObjectImpl::bindRootArguments(BindingContext* context, uint32_t& index) { - uint32_t getViewDescriptorCount(const ITransientResourceHeap::Desc& desc) + auto layoutImpl = getLayout(); + for (Index i = 0; i < m_rootArguments.getCount(); i++) { - return Math::Max( - Math::Max( - desc.srvDescriptorCount, - desc.uavDescriptorCount, - desc.accelerationStructureDescriptorCount), - desc.constantBufferDescriptorCount, - 2048u); + switch (layoutImpl->getRootParameterInfo(i).type) + { + case IResourceView::Type::ShaderResource: + case IResourceView::Type::AccelerationStructure: + context->submitter->setRootSRV(index, m_rootArguments[i]); + break; + case IResourceView::Type::UnorderedAccess: + context->submitter->setRootUAV(index, m_rootArguments[i]); + break; + default: + continue; + } + index++; + } + for (auto& subObject : m_objects) + { + if (subObject) + { + SLANG_RETURN_ON_FAIL(subObject->bindRootArguments(context, index)); + } } -} - -Result D3D12Device::createTransientResourceHeap( - const ITransientResourceHeap::Desc& desc, - ITransientResourceHeap** outHeap) -{ - RefPtr heap; - SLANG_RETURN_ON_FAIL(createTransientResourceHeapImpl( - desc.flags, - desc.constantBufferSize, - getViewDescriptorCount(desc), - Math::Max(1024u, desc.samplerDescriptorCount), - heap.writeRef())); - returnComPtr(outHeap, heap); return SLANG_OK; } -Result D3D12Device::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) +/// Get the layout of this shader object with specialization arguments considered +/// +/// This operation should only be called after the shader object has been +/// fully filled in and finalized. +/// + +Result ShaderObjectImpl::getSpecializedLayout(ShaderObjectLayoutImpl** outLayout) { - RefPtr queue; - SLANG_RETURN_ON_FAIL(createCommandQueueImpl(queue.writeRef())); - returnComPtr(outQueue, queue); + if (!m_specializedLayout) + { + SLANG_RETURN_ON_FAIL(_createSpecializedLayout(m_specializedLayout.writeRef())); + } + returnRefPtr(outLayout, m_specializedLayout); return SLANG_OK; } -SLANG_NO_THROW Result SLANG_MCALL D3D12Device::createSwapchain( - const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) +/// Create the layout for this shader object with specialization arguments considered +/// +/// This operation is virtual so that it can be customized by `RootShaderObject`. +/// + +Result ShaderObjectImpl::_createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) { - RefPtr swapchain = new SwapchainImpl(); - SLANG_RETURN_ON_FAIL(swapchain->init(this, desc, window)); - returnComPtr(outSwapchain, swapchain); + ExtendedShaderObjectType extendedType; + SLANG_RETURN_ON_FAIL(getSpecializedShaderObjectType(&extendedType)); + + auto renderer = getRenderer(); + RefPtr layout; + SLANG_RETURN_ON_FAIL(renderer->getShaderObjectLayout( + extendedType.slangType, + m_layout->getContainerType(), + (ShaderObjectLayoutBase**)layout.writeRef())); + + returnRefPtrMove(outLayout, layout); return SLANG_OK; } -SlangResult D3D12Device::readTextureResource( - ITextureResource* resource, - ResourceState state, - ISlangBlob** outBlob, - size_t* outRowPitch, - size_t* outPixelSize) +Result ShaderObjectImpl::setResource(ShaderOffset const& offset, IResourceView* resourceView) { - return captureTextureToSurface( - static_cast(resource), - state, - outBlob, - outRowPitch, - outPixelSize); -} + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; -static D3D12_RESOURCE_FLAGS _calcResourceFlag(ResourceState state) -{ - switch (state) - { - case ResourceState::RenderTarget: - return D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - case ResourceState::DepthRead: - case ResourceState::DepthWrite: - return D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; - case ResourceState::UnorderedAccess: - case ResourceState::AccelerationStructure: - return D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - default: - return D3D12_RESOURCE_FLAG_NONE; - } -} + m_version++; -static D3D12_RESOURCE_FLAGS _calcResourceFlags(ResourceStateSet states) -{ - int dstFlags = 0; - for (uint32_t i = 0; i < (uint32_t)ResourceState::_Count; i++) - { - auto state = (ResourceState)i; - if (states.contains(state)) - dstFlags |= _calcResourceFlag(state); - } - return (D3D12_RESOURCE_FLAGS)dstFlags; -} + ID3D12Device* d3dDevice = static_cast(getDevice())->m_device; -static D3D12_RESOURCE_DIMENSION _calcResourceDimension(IResource::Type type) -{ - switch (type) + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + + if (bindingRange.isRootParameter && resourceView) { - case IResource::Type::Buffer: return D3D12_RESOURCE_DIMENSION_BUFFER; - case IResource::Type::Texture1D: return D3D12_RESOURCE_DIMENSION_TEXTURE1D; - case IResource::Type::TextureCube: - case IResource::Type::Texture2D: + auto& rootArg = m_rootArguments[bindingRange.baseIndex]; + switch (resourceView->getViewDesc()->type) { - return D3D12_RESOURCE_DIMENSION_TEXTURE2D; + case IResourceView::Type::AccelerationStructure: + { + auto resourceViewImpl = static_cast(resourceView); + rootArg = resourceViewImpl->getDeviceAddress(); + } + break; + case IResourceView::Type::ShaderResource: + case IResourceView::Type::UnorderedAccess: + { + auto resourceViewImpl = static_cast(resourceView); + if (resourceViewImpl->m_resource->isBuffer()) + { + rootArg = static_cast(resourceViewImpl->m_resource.Ptr()) + ->getDeviceAddress(); + } + else + { + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "The shader parameter at the specified offset is a root parameter, and " + "therefore can only be a buffer view."); + return SLANG_FAIL; + } + } + break; } - case IResource::Type::Texture3D: return D3D12_RESOURCE_DIMENSION_TEXTURE3D; - default: return D3D12_RESOURCE_DIMENSION_UNKNOWN; + return SLANG_OK; } -} -DXGI_FORMAT getTypelessFormatFromDepthFormat(Format format) -{ - switch (format) + if (resourceView == nullptr) { - case Format::D16_UNORM: - return DXGI_FORMAT_R16_TYPELESS; - case Format::D32_FLOAT: - return DXGI_FORMAT_R32_TYPELESS; - default: - return D3DUtil::getMapFormat(format); + // Create null descriptor for the binding. + auto destDescriptor = m_descriptorSet.resourceTable.getCpuHandle( + bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex); + return createNullDescriptor(d3dDevice, destDescriptor, bindingRange); } -} -BOOL isTypelessDepthFormat(DXGI_FORMAT format) -{ - switch (format) + ResourceViewInternalImpl* internalResourceView = nullptr; + switch (resourceView->getViewDesc()->type) { - case DXGI_FORMAT_R16_TYPELESS: - case DXGI_FORMAT_R32_TYPELESS: - return true; +#if SLANG_GFX_HAS_DXR_SUPPORT + case IResourceView::Type::AccelerationStructure: + { + auto asImpl = static_cast(resourceView); + // Hold a reference to the resource to prevent its destruction. + m_boundResources[bindingRange.baseIndex + offset.bindingArrayIndex] = asImpl->m_buffer; + internalResourceView = asImpl; + } + break; +#endif default: - return false; + { + auto resourceViewImpl = static_cast(resourceView); + // Hold a reference to the resource to prevent its destruction. + m_boundResources[bindingRange.baseIndex + offset.bindingArrayIndex] = + resourceViewImpl->m_resource; + internalResourceView = resourceViewImpl; + } + break; } -} -Result setupResourceDesc(D3D12_RESOURCE_DESC& resourceDesc, const ITextureResource::Desc& srcDesc) -{ - const DXGI_FORMAT pixelFormat = D3DUtil::getMapFormat(srcDesc.format); - if (pixelFormat == DXGI_FORMAT_UNKNOWN) + auto descriptorSlotIndex = bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex; + if (internalResourceView->m_descriptor.cpuHandle.ptr) { - return SLANG_FAIL; + d3dDevice->CopyDescriptorsSimple( + 1, + m_descriptorSet.resourceTable.getCpuHandle( + bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex), + internalResourceView->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } - - const int arraySize = calcEffectiveArraySize(srcDesc); - - const D3D12_RESOURCE_DIMENSION dimension = _calcResourceDimension(srcDesc.type); - if (dimension == D3D12_RESOURCE_DIMENSION_UNKNOWN) + else { + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "IShaderObject::setResource: the resource view cannot be set to this shader parameter. " + "A possible reason is that the view is too large to be supported by D3D12."); return SLANG_FAIL; } - - const int numMipMaps = srcDesc.numMipLevels; - resourceDesc.Dimension = dimension; - resourceDesc.Format = pixelFormat; - resourceDesc.Width = srcDesc.size.width; - resourceDesc.Height = srcDesc.size.height; - resourceDesc.DepthOrArraySize = (srcDesc.size.depth > 1) ? srcDesc.size.depth : arraySize; - - resourceDesc.MipLevels = numMipMaps; - resourceDesc.SampleDesc.Count = srcDesc.sampleDesc.numSamples; - resourceDesc.SampleDesc.Quality = srcDesc.sampleDesc.quality; - - resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; - resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; - - resourceDesc.Flags |= _calcResourceFlags(srcDesc.allowedStates); - - resourceDesc.Alignment = 0; - - if (isDepthFormat(srcDesc.format) && - (srcDesc.allowedStates.contains(ResourceState::ShaderResource) || - srcDesc.allowedStates.contains(ResourceState::UnorderedAccess))) - { - resourceDesc.Format = getTypelessFormatFromDepthFormat(srcDesc.format); - } - return SLANG_OK; } -Result D3D12Device::getTextureAllocationInfo( - const ITextureResource::Desc& desc, size_t* outSize, size_t* outAlignment) +void PipelineStateImpl::init(const GraphicsPipelineStateDesc& inDesc) { - TextureResource::Desc srcDesc = fixupTextureDesc(desc); - D3D12_RESOURCE_DESC resourceDesc = {}; - setupResourceDesc(resourceDesc, srcDesc); - auto allocInfo = m_device->GetResourceAllocationInfo(0, 1, &resourceDesc); - *outSize = (size_t)allocInfo.SizeInBytes; - *outAlignment = (size_t)allocInfo.Alignment; - return SLANG_OK; + PipelineStateDesc pipelineDesc; + pipelineDesc.type = PipelineType::Graphics; + pipelineDesc.graphics = inDesc; + initializeBase(pipelineDesc); } -Result D3D12Device::getTextureRowAlignment(size_t* outAlignment) +void PipelineStateImpl::init(const ComputePipelineStateDesc& inDesc) { - *outAlignment = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; - return SLANG_OK; + PipelineStateDesc pipelineDesc; + pipelineDesc.type = PipelineType::Compute; + pipelineDesc.compute = inDesc; + initializeBase(pipelineDesc); } -Result D3D12Device::createTextureResource(const ITextureResource::Desc& descIn, const ITextureResource::SubresourceData* initData, ITextureResource** outResource) +Result PipelineStateImpl::getNativeHandle(InteropHandle* outHandle) { - // Description of uploading on Dx12 - // https://msdn.microsoft.com/en-us/library/windows/desktop/dn899215%28v=vs.85%29.aspx - - TextureResource::Desc srcDesc = fixupTextureDesc(descIn); - - D3D12_RESOURCE_DESC resourceDesc = {}; - setupResourceDesc(resourceDesc, srcDesc); - const int arraySize = calcEffectiveArraySize(srcDesc); - const int numMipMaps = srcDesc.numMipLevels; + SLANG_RETURN_ON_FAIL(ensureAPIPipelineStateCreated()); + outHandle->api = InteropHandleAPI::D3D12; + outHandle->handleValue = reinterpret_cast(m_pipelineState.get()); + return SLANG_OK; +} - RefPtr texture(new TextureResourceImpl(srcDesc)); +Result PipelineStateImpl::ensureAPIPipelineStateCreated() +{ + if (m_pipelineState) + return SLANG_OK; - // Create the target resource + auto programImpl = static_cast(m_program.Ptr()); + if (programImpl->m_shaders.getCount() == 0) { - D3D12_HEAP_PROPERTIES heapProps; + SLANG_RETURN_ON_FAIL(programImpl->compileShaders()); + } + if (desc.type == PipelineType::Graphics) + { + // Only actually create a D3D12 pipeline state if the pipeline is fully specialized. + auto inputLayoutImpl = (InputLayoutImpl*)desc.graphics.inputLayout; - heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + // Describe and create the graphics pipeline state object (PSO) + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; - D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE; - if (descIn.isShared) flags |= D3D12_HEAP_FLAG_SHARED; + psoDesc.pRootSignature = programImpl->m_rootObjectLayout->m_rootSignature; - D3D12_CLEAR_VALUE clearValue; - D3D12_CLEAR_VALUE* clearValuePtr = &clearValue; - if ((resourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | - D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) == 0) - { - clearValuePtr = nullptr; - } - if (isTypelessDepthFormat(resourceDesc.Format)) + for (auto& shaderBin : programImpl->m_shaders) { - clearValuePtr = nullptr; - } - clearValue.Format = resourceDesc.Format; - memcpy(clearValue.Color, &descIn.optimalClearValue.color, sizeof(clearValue.Color)); - clearValue.DepthStencil.Depth = descIn.optimalClearValue.depthStencil.depth; - clearValue.DepthStencil.Stencil = descIn.optimalClearValue.depthStencil.stencil; - SLANG_RETURN_ON_FAIL(texture->m_resource.initCommitted( - m_device, - heapProps, - flags, - resourceDesc, - D3D12_RESOURCE_STATE_COPY_DEST, - clearValuePtr)); - - texture->m_resource.setDebugName(L"Texture"); - } - - // Calculate the layout - List layouts; - layouts.setCount(numMipMaps); - List mipRowSizeInBytes; - mipRowSizeInBytes.setCount(srcDesc.numMipLevels); - List mipNumRows; - mipNumRows.setCount(numMipMaps); - - // NOTE! This is just the size for one array upload -> not for the whole texture - UInt64 requiredSize = 0; - m_device->GetCopyableFootprints( - &resourceDesc, - 0, - srcDesc.numMipLevels, - 0, - layouts.begin(), - mipNumRows.begin(), - mipRowSizeInBytes.begin(), - &requiredSize); + switch (shaderBin.stage) + { + case SLANG_STAGE_VERTEX: + psoDesc.VS = {shaderBin.code.getBuffer(), SIZE_T(shaderBin.code.getCount())}; + break; + case SLANG_STAGE_FRAGMENT: + psoDesc.PS = {shaderBin.code.getBuffer(), SIZE_T(shaderBin.code.getCount())}; + break; + case SLANG_STAGE_DOMAIN: + psoDesc.DS = {shaderBin.code.getBuffer(), SIZE_T(shaderBin.code.getCount())}; + break; + case SLANG_STAGE_HULL: + psoDesc.HS = {shaderBin.code.getBuffer(), SIZE_T(shaderBin.code.getCount())}; + break; + case SLANG_STAGE_GEOMETRY: + psoDesc.GS = {shaderBin.code.getBuffer(), SIZE_T(shaderBin.code.getCount())}; + break; + default: + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "Unsupported shader stage."); + return SLANG_E_NOT_AVAILABLE; + } + } - // Sub resource indexing - // https://msdn.microsoft.com/en-us/library/windows/desktop/dn705766(v=vs.85).aspx#subresource_indexing - if (initData) - { - // Create the upload texture - D3D12Resource uploadTexture; - + if (inputLayoutImpl) { - D3D12_HEAP_PROPERTIES heapProps; - - heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; - - D3D12_RESOURCE_DESC uploadResourceDesc; - - uploadResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - uploadResourceDesc.Format = DXGI_FORMAT_UNKNOWN; - uploadResourceDesc.Width = requiredSize; - uploadResourceDesc.Height = 1; - uploadResourceDesc.DepthOrArraySize = 1; - uploadResourceDesc.MipLevels = 1; - uploadResourceDesc.SampleDesc.Count = 1; - uploadResourceDesc.SampleDesc.Quality = 0; - uploadResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; - uploadResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - uploadResourceDesc.Alignment = 0; - - SLANG_RETURN_ON_FAIL(uploadTexture.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadResourceDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); - - uploadTexture.setDebugName(L"TextureUpload"); + psoDesc.InputLayout = { + inputLayoutImpl->m_elements.getBuffer(), + UINT(inputLayoutImpl->m_elements.getCount())}; } - // Get the pointer to the upload resource - ID3D12Resource* uploadResource = uploadTexture; - int subResourceIndex = 0; - for (int arrayIndex = 0; arrayIndex < arraySize; arrayIndex++) + psoDesc.PrimitiveTopologyType = D3DUtil::getPrimitiveType(desc.graphics.primitiveType); + { - uint8_t* p; - uploadResource->Map(0, nullptr, reinterpret_cast(&p)); + auto framebufferLayout = + static_cast(desc.graphics.framebufferLayout); + const int numRenderTargets = int(framebufferLayout->m_renderTargets.getCount()); - for (int j = 0; j < numMipMaps; ++j) + if (framebufferLayout->m_hasDepthStencil) { - auto srcSubresource = initData[subResourceIndex + j]; - - const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& layout = layouts[j]; - const D3D12_SUBRESOURCE_FOOTPRINT& footprint = layout.Footprint; - - TextureResource::Size mipSize = calcMipSize(srcDesc.size, j); - if (gfxIsCompressedFormat(descIn.format)) - { - mipSize.width = int(D3DUtil::calcAligned(mipSize.width, 4)); - mipSize.height = int(D3DUtil::calcAligned(mipSize.height, 4)); - } - - assert(footprint.Width == mipSize.width && footprint.Height == mipSize.height && footprint.Depth == mipSize.depth); - - auto mipRowSize = mipRowSizeInBytes[j]; - - const ptrdiff_t dstMipRowPitch = ptrdiff_t(footprint.RowPitch); - const ptrdiff_t srcMipRowPitch = ptrdiff_t(srcSubresource.strideY); - - const ptrdiff_t dstMipLayerPitch = ptrdiff_t(footprint.RowPitch*footprint.Height); - const ptrdiff_t srcMipLayerPitch = ptrdiff_t(srcSubresource.strideZ); - - // Our outer loop will copy the depth layers one at a time. - // - const uint8_t* srcLayer = (const uint8_t*) srcSubresource.data; - uint8_t* dstLayer = p + layouts[j].Offset; - for (int l = 0; l < mipSize.depth; l++) + psoDesc.DSVFormat = D3DUtil::getMapFormat(framebufferLayout->m_depthStencil.format); + psoDesc.SampleDesc.Count = framebufferLayout->m_depthStencil.sampleCount; + } + else + { + psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; + if (framebufferLayout->m_renderTargets.getCount()) { - // Our inner loop will copy the rows one at a time. - // - const uint8_t* srcRow = srcLayer; - uint8_t* dstRow = dstLayer; - int j = gfxIsCompressedFormat(descIn.format) ? 4 : 1; // BC compressed formats are organized into 4x4 blocks - for (int k = 0; k < mipSize.height; k += j) - { - ::memcpy(dstRow, srcRow, (size_t)mipRowSize); - - srcRow += srcMipRowPitch; - dstRow += dstMipRowPitch; - } - - srcLayer += srcMipLayerPitch; - dstLayer += dstMipLayerPitch; + psoDesc.SampleDesc.Count = framebufferLayout->m_renderTargets[0].sampleCount; } - - //assert(srcRow == (const uint8_t*)(srcMip.getBuffer() + srcMip.getCount())); } - uploadResource->Unmap(0, nullptr); - - auto encodeInfo = encodeResourceCommands(); - for (int mipIndex = 0; mipIndex < numMipMaps; ++mipIndex) + psoDesc.NumRenderTargets = numRenderTargets; + for (Int i = 0; i < numRenderTargets; i++) { - // https://msdn.microsoft.com/en-us/library/windows/desktop/dn903862(v=vs.85).aspx - - D3D12_TEXTURE_COPY_LOCATION src; - src.pResource = uploadTexture; - src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src.PlacedFootprint = layouts[mipIndex]; - - D3D12_TEXTURE_COPY_LOCATION dst; - dst.pResource = texture->m_resource; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst.SubresourceIndex = subResourceIndex; - encodeInfo.d3dCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); - - subResourceIndex++; + psoDesc.RTVFormats[i] = + D3DUtil::getMapFormat(framebufferLayout->m_renderTargets[i].format); } - // Block - waiting for copy to complete (so can drop upload texture) - submitResourceCommandsAndWait(encodeInfo); + psoDesc.SampleDesc.Quality = 0; + psoDesc.SampleMask = UINT_MAX; } - } - { - auto encodeInfo = encodeResourceCommands(); + { - D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); - texture->m_resource.transition( - D3D12_RESOURCE_STATE_COPY_DEST, texture->m_defaultState, submitter); + auto& rs = psoDesc.RasterizerState; + rs.FillMode = D3DUtil::getFillMode(desc.graphics.rasterizer.fillMode); + rs.CullMode = D3DUtil::getCullMode(desc.graphics.rasterizer.cullMode); + rs.FrontCounterClockwise = + desc.graphics.rasterizer.frontFace == gfx::FrontFaceMode::CounterClockwise ? TRUE + : FALSE; + rs.DepthBias = desc.graphics.rasterizer.depthBias; + rs.DepthBiasClamp = desc.graphics.rasterizer.depthBiasClamp; + rs.SlopeScaledDepthBias = desc.graphics.rasterizer.slopeScaledDepthBias; + rs.DepthClipEnable = desc.graphics.rasterizer.depthClipEnable ? TRUE : FALSE; + rs.MultisampleEnable = desc.graphics.rasterizer.multisampleEnable ? TRUE : FALSE; + rs.AntialiasedLineEnable = + desc.graphics.rasterizer.antialiasedLineEnable ? TRUE : FALSE; + rs.ForcedSampleCount = desc.graphics.rasterizer.forcedSampleCount; + rs.ConservativeRaster = desc.graphics.rasterizer.enableConservativeRasterization + ? D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON + : D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; } - submitResourceCommandsAndWait(encodeInfo); - } - - returnComPtr(outResource, texture); - return SLANG_OK; -} - -Result D3D12Device::createTextureFromNativeHandle(InteropHandle handle, const ITextureResource::Desc& srcDesc, ITextureResource** outResource) -{ - RefPtr texture(new TextureResourceImpl(srcDesc)); - - if (handle.api == InteropHandleAPI::D3D12) - { - texture->m_resource.setResource((ID3D12Resource*)handle.handleValue); - } - else - { - return SLANG_FAIL; - } - returnComPtr(outResource, texture); - return SLANG_OK; -} - -Result D3D12Device::createBufferResource(const IBufferResource::Desc& descIn, const void* initData, IBufferResource** outResource) -{ - BufferResource::Desc srcDesc = fixupBufferDesc(descIn); - - RefPtr buffer(new BufferResourceImpl(srcDesc)); - - D3D12_RESOURCE_DESC bufferDesc; - _initBufferResourceDesc(descIn.sizeInBytes, bufferDesc); - - bufferDesc.Flags |= _calcResourceFlags(srcDesc.allowedStates); - - const D3D12_RESOURCE_STATES initialState = buffer->m_defaultState; - SLANG_RETURN_ON_FAIL(createBuffer( - bufferDesc, - initData, - srcDesc.sizeInBytes, - initialState, - buffer->m_resource, - descIn.isShared, - descIn.memoryType)); - - returnComPtr(outResource, buffer); - return SLANG_OK; -} - -Result D3D12Device::createBufferFromNativeHandle(InteropHandle handle, const IBufferResource::Desc& srcDesc, IBufferResource** outResource) -{ - RefPtr buffer(new BufferResourceImpl(srcDesc)); - - if (handle.api == InteropHandleAPI::D3D12) - { - buffer->m_resource.setResource((ID3D12Resource*)handle.handleValue); - } - else - { - return SLANG_FAIL; - } - - returnComPtr(outResource, buffer); - return SLANG_OK; -} - -D3D12_FILTER_TYPE translateFilterMode(TextureFilteringMode mode) -{ - switch (mode) - { - default: - return D3D12_FILTER_TYPE(0); - -#define CASE(SRC, DST) \ - case TextureFilteringMode::SRC: return D3D12_FILTER_TYPE_##DST - - CASE(Point, POINT); - CASE(Linear, LINEAR); - -#undef CASE - } -} - -D3D12_FILTER_REDUCTION_TYPE translateFilterReduction(TextureReductionOp op) -{ - switch (op) - { - default: - return D3D12_FILTER_REDUCTION_TYPE(0); - -#define CASE(SRC, DST) \ - case TextureReductionOp::SRC: return D3D12_FILTER_REDUCTION_TYPE_##DST - - CASE(Average, STANDARD); - CASE(Comparison, COMPARISON); - CASE(Minimum, MINIMUM); - CASE(Maximum, MAXIMUM); - -#undef CASE - } -} - -D3D12_TEXTURE_ADDRESS_MODE translateAddressingMode(TextureAddressingMode mode) -{ - switch (mode) - { - default: - return D3D12_TEXTURE_ADDRESS_MODE(0); - -#define CASE(SRC, DST) \ - case TextureAddressingMode::SRC: return D3D12_TEXTURE_ADDRESS_MODE_##DST - - CASE(Wrap, WRAP); - CASE(ClampToEdge, CLAMP); - CASE(ClampToBorder, BORDER); - CASE(MirrorRepeat, MIRROR); - CASE(MirrorOnce, MIRROR_ONCE); - -#undef CASE - } -} + { + D3D12_BLEND_DESC& blend = psoDesc.BlendState; + blend.IndependentBlendEnable = FALSE; + blend.AlphaToCoverageEnable = desc.graphics.blend.alphaToCoverageEnable ? TRUE : FALSE; + blend.RenderTarget[0].RenderTargetWriteMask = (uint8_t)RenderTargetWriteMask::EnableAll; + for (uint32_t i = 0; i < desc.graphics.blend.targetCount; i++) + { + auto& d3dDesc = blend.RenderTarget[i]; + d3dDesc.BlendEnable = desc.graphics.blend.targets[i].enableBlend ? TRUE : FALSE; + d3dDesc.BlendOp = D3DUtil::getBlendOp(desc.graphics.blend.targets[i].color.op); + d3dDesc.BlendOpAlpha = D3DUtil::getBlendOp(desc.graphics.blend.targets[i].alpha.op); + d3dDesc.DestBlend = + D3DUtil::getBlendFactor(desc.graphics.blend.targets[i].color.dstFactor); + d3dDesc.DestBlendAlpha = + D3DUtil::getBlendFactor(desc.graphics.blend.targets[i].alpha.dstFactor); + d3dDesc.LogicOp = D3D12_LOGIC_OP_NOOP; + d3dDesc.LogicOpEnable = FALSE; + d3dDesc.RenderTargetWriteMask = desc.graphics.blend.targets[i].writeMask; + d3dDesc.SrcBlend = + D3DUtil::getBlendFactor(desc.graphics.blend.targets[i].color.srcFactor); + d3dDesc.SrcBlendAlpha = + D3DUtil::getBlendFactor(desc.graphics.blend.targets[i].alpha.srcFactor); + } + for (uint32_t i = 1; i < desc.graphics.blend.targetCount; i++) + { + if (memcmp( + &desc.graphics.blend.targets[i], + &desc.graphics.blend.targets[0], + sizeof(desc.graphics.blend.targets[0])) != 0) + { + blend.IndependentBlendEnable = TRUE; + break; + } + } + for (uint32_t i = (uint32_t)desc.graphics.blend.targetCount; + i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; + ++i) + { + blend.RenderTarget[i] = blend.RenderTarget[0]; + } + } -static D3D12_COMPARISON_FUNC translateComparisonFunc(ComparisonFunc func) -{ - switch (func) - { - default: - // TODO: need to report failures - return D3D12_COMPARISON_FUNC_ALWAYS; + { + auto& ds = psoDesc.DepthStencilState; -#define CASE(FROM, TO) \ - case ComparisonFunc::FROM: return D3D12_COMPARISON_FUNC_##TO + ds.DepthEnable = desc.graphics.depthStencil.depthTestEnable; + ds.DepthWriteMask = desc.graphics.depthStencil.depthWriteEnable + ? D3D12_DEPTH_WRITE_MASK_ALL + : D3D12_DEPTH_WRITE_MASK_ZERO; + ds.DepthFunc = D3DUtil::getComparisonFunc(desc.graphics.depthStencil.depthFunc); + ds.StencilEnable = desc.graphics.depthStencil.stencilEnable; + ds.StencilReadMask = (UINT8)desc.graphics.depthStencil.stencilReadMask; + ds.StencilWriteMask = (UINT8)desc.graphics.depthStencil.stencilWriteMask; + ds.FrontFace = D3DUtil::translateStencilOpDesc(desc.graphics.depthStencil.frontFace); + ds.BackFace = D3DUtil::translateStencilOpDesc(desc.graphics.depthStencil.backFace); + } - CASE(Never, NEVER); - CASE(Less, LESS); - CASE(Equal, EQUAL); - CASE(LessEqual, LESS_EQUAL); - CASE(Greater, GREATER); - CASE(NotEqual, NOT_EQUAL); - CASE(GreaterEqual, GREATER_EQUAL); - CASE(Always, ALWAYS); -#undef CASE - } -} + psoDesc.PrimitiveTopologyType = D3DUtil::getPrimitiveType(desc.graphics.primitiveType); -Result D3D12Device::createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) -{ - D3D12_FILTER_REDUCTION_TYPE dxReduction = translateFilterReduction(desc.reductionOp); - D3D12_FILTER dxFilter; - if (desc.maxAnisotropy > 1) - { - dxFilter = D3D12_ENCODE_ANISOTROPIC_FILTER(dxReduction); + if (m_device->m_pipelineCreationAPIDispatcher) + { + SLANG_RETURN_ON_FAIL( + m_device->m_pipelineCreationAPIDispatcher->createGraphicsPipelineState( + m_device, + programImpl->linkedProgram.get(), + &psoDesc, + (void**)m_pipelineState.writeRef())); + } + else + { + SLANG_RETURN_ON_FAIL(m_device->m_device->CreateGraphicsPipelineState( + &psoDesc, IID_PPV_ARGS(m_pipelineState.writeRef()))); + } } else { - D3D12_FILTER_TYPE dxMin = translateFilterMode(desc.minFilter); - D3D12_FILTER_TYPE dxMag = translateFilterMode(desc.magFilter); - D3D12_FILTER_TYPE dxMip = translateFilterMode(desc.mipFilter); - - dxFilter = D3D12_ENCODE_BASIC_FILTER(dxMin, dxMag, dxMip, dxReduction); - } - - D3D12_SAMPLER_DESC dxDesc = {}; - dxDesc.Filter = dxFilter; - dxDesc.AddressU = translateAddressingMode(desc.addressU); - dxDesc.AddressV = translateAddressingMode(desc.addressV); - dxDesc.AddressW = translateAddressingMode(desc.addressW); - dxDesc.MipLODBias = desc.mipLODBias; - dxDesc.MaxAnisotropy = desc.maxAnisotropy; - dxDesc.ComparisonFunc = translateComparisonFunc(desc.comparisonFunc); - for (int ii = 0; ii < 4; ++ii) - dxDesc.BorderColor[ii] = desc.borderColor[ii]; - dxDesc.MinLOD = desc.minLOD; - dxDesc.MaxLOD = desc.maxLOD; - auto& samplerHeap = m_cpuSamplerHeap; + // Only actually create a D3D12 pipeline state if the pipeline is fully specialized. + ComPtr pipelineState; + if (!programImpl->isSpecializable()) + { + // Describe and create the compute pipeline state object + D3D12_COMPUTE_PIPELINE_STATE_DESC computeDesc = {}; + computeDesc.pRootSignature = + desc.compute.d3d12RootSignatureOverride + ? static_cast(desc.compute.d3d12RootSignatureOverride) + : programImpl->m_rootObjectLayout->m_rootSignature; + computeDesc.CS = { + programImpl->m_shaders[0].code.getBuffer(), + SIZE_T(programImpl->m_shaders[0].code.getCount())}; - D3D12Descriptor cpuDescriptor; - samplerHeap->allocate(&cpuDescriptor); - m_device->CreateSampler(&dxDesc, cpuDescriptor.cpuHandle); +#ifdef GFX_NVAPI + if (m_nvapi) + { + // Also fill the extension structure. + // Use the same UAV slot index and register space that are declared in the shader. - // TODO: We really ought to have a free-list of sampler-heap - // entries that we check before we go to the heap, and then - // when we are done with a sampler we simply add it to the free list. - // - RefPtr samplerImpl = new SamplerStateImpl(); - samplerImpl->m_allocator = samplerHeap; - samplerImpl->m_descriptor = cpuDescriptor; - returnComPtr(outSampler, samplerImpl); - return SLANG_OK; -} + // For simplicities sake we just use u0 + NVAPI_D3D12_PSO_SET_SHADER_EXTENSION_SLOT_DESC extensionDesc; + extensionDesc.baseVersion = NV_PSO_EXTENSION_DESC_VER; + extensionDesc.version = NV_SET_SHADER_EXTENSION_SLOT_DESC_VER; + extensionDesc.uavSlot = 0; + extensionDesc.registerSpace = 0; -Result D3D12Device::createTextureView(ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) -{ - auto resourceImpl = (TextureResourceImpl*) texture; + // Put the pointer to the extension into an array - there can be multiple extensions + // enabled at once. + const NVAPI_D3D12_PSO_EXTENSION_DESC* extensions[] = {&extensionDesc}; - RefPtr viewImpl = new ResourceViewImpl(); - viewImpl->m_resource = resourceImpl; - viewImpl->m_desc = desc; - bool isArray = resourceImpl ? resourceImpl->getDesc()->arraySize != 0 : false; - bool isMultiSample = resourceImpl ? resourceImpl->getDesc()->sampleDesc.numSamples > 1: false; - switch (desc.type) - { - default: - return SLANG_FAIL; + // Now create the PSO. + const NvAPI_Status nvapiStatus = NvAPI_D3D12_CreateComputePipelineState( + m_device->m_device, + &computeDesc, + SLANG_COUNT_OF(extensions), + extensions, + m_pipelineState.writeRef()); - case IResourceView::Type::RenderTarget: - { - SLANG_RETURN_ON_FAIL(m_rtvAllocator->allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = m_rtvAllocator; - D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; - rtvDesc.Format = D3DUtil::getMapFormat(desc.format); - isArray = desc.subresourceRange.layerCount > 1; - switch (desc.renderTarget.shape) - { - case IResource::Type::Texture1D: - rtvDesc.ViewDimension = isArray ? D3D12_RTV_DIMENSION_TEXTURE1DARRAY - : D3D12_RTV_DIMENSION_TEXTURE1D; - rtvDesc.Texture1D.MipSlice = desc.subresourceRange.mipLevel; - break; - case IResource::Type::Texture2D: - if (isMultiSample) - { - rtvDesc.ViewDimension = isArray ? D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY - : D3D12_RTV_DIMENSION_TEXTURE2DMS; - rtvDesc.Texture2DMSArray.ArraySize = desc.subresourceRange.layerCount; - rtvDesc.Texture2DMSArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; - } - else + if (nvapiStatus != NVAPI_OK) { - rtvDesc.ViewDimension = isArray ? D3D12_RTV_DIMENSION_TEXTURE2DARRAY - : D3D12_RTV_DIMENSION_TEXTURE2D; - rtvDesc.Texture2DArray.MipSlice = desc.subresourceRange.mipLevel; - rtvDesc.Texture2DArray.PlaneSlice = - resourceImpl ? D3DUtil::getPlaneSlice( - D3DUtil::getMapFormat(resourceImpl->getDesc()->format), - desc.subresourceRange.aspectMask) - : 0; - rtvDesc.Texture2DArray.ArraySize = desc.subresourceRange.layerCount; - rtvDesc.Texture2DArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; + return SLANG_FAIL; } - break; - case IResource::Type::Texture3D: - rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; - rtvDesc.Texture3D.MipSlice = desc.subresourceRange.mipLevel; - rtvDesc.Texture3D.FirstWSlice = desc.subresourceRange.baseArrayLayer; - rtvDesc.Texture3D.WSize = desc.subresourceRange.layerCount; - break; - case IResource::Type::Buffer: - rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_BUFFER; - break; - default: - return SLANG_FAIL; } - m_device->CreateRenderTargetView( - resourceImpl ? resourceImpl->m_resource.getResource() : nullptr, - &rtvDesc, - viewImpl->m_descriptor.cpuHandle); - } - break; - - case IResourceView::Type::DepthStencil: - { - SLANG_RETURN_ON_FAIL(m_dsvAllocator->allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = m_dsvAllocator; - D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {}; - dsvDesc.Format = D3DUtil::getMapFormat(desc.format); - isArray = desc.subresourceRange.layerCount > 1; - switch (desc.renderTarget.shape) + else +#endif { - case IResource::Type::Texture1D: - dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1D; - dsvDesc.Texture1D.MipSlice = desc.subresourceRange.mipLevel; - break; - case IResource::Type::Texture2D: - if (isMultiSample) + if (m_device->m_pipelineCreationAPIDispatcher) { - dsvDesc.ViewDimension = isArray ? D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY - : D3D12_DSV_DIMENSION_TEXTURE2DMS; - dsvDesc.Texture2DMSArray.ArraySize = desc.subresourceRange.layerCount; - dsvDesc.Texture2DMSArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; + SLANG_RETURN_ON_FAIL( + m_device->m_pipelineCreationAPIDispatcher->createComputePipelineState( + m_device, + programImpl->linkedProgram.get(), + &computeDesc, + (void**)m_pipelineState.writeRef())); } else { - dsvDesc.ViewDimension = isArray ? D3D12_DSV_DIMENSION_TEXTURE2DARRAY - : D3D12_DSV_DIMENSION_TEXTURE2D; - dsvDesc.Texture2DArray.MipSlice = desc.subresourceRange.mipLevel; - dsvDesc.Texture2DArray.ArraySize = desc.subresourceRange.layerCount; - dsvDesc.Texture2DArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; + SLANG_RETURN_ON_FAIL(m_device->m_device->CreateComputePipelineState( + &computeDesc, IID_PPV_ARGS(m_pipelineState.writeRef()))); } - break; - default: - return SLANG_FAIL; } - m_device->CreateDepthStencilView( - resourceImpl ? resourceImpl->m_resource.getResource() : nullptr, - &dsvDesc, - viewImpl->m_descriptor.cpuHandle); } - break; + } + + return SLANG_OK; +} + +// Swapchain Implementation + +Result SwapchainImpl::init( + DeviceImpl* renderer, const ISwapchain::Desc& swapchainDesc, WindowHandle window) +{ + m_queue = static_cast(swapchainDesc.queue)->m_d3dQueue; + m_dxgiFactory = renderer->m_deviceInfo.m_dxgiFactory; + SLANG_RETURN_ON_FAIL( + D3DSwapchainBase::init(swapchainDesc, window, DXGI_SWAP_EFFECT_FLIP_DISCARD)); + renderer->m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef())); + + SLANG_RETURN_ON_FAIL(m_swapChain->QueryInterface(m_swapChain3.writeRef())); + for (uint32_t i = 0; i < swapchainDesc.imageCount; i++) + { + m_frameEvents.add(CreateEventEx( + nullptr, + false, + CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, + EVENT_ALL_ACCESS)); + } + return SLANG_OK; +} + +Result SwapchainImpl::resize(uint32_t width, uint32_t height) +{ + for (auto evt : m_frameEvents) + SetEvent(evt); + SLANG_RETURN_ON_FAIL(D3DSwapchainBase::resize(width, height)); + return SLANG_OK; +} + +void SwapchainImpl::createSwapchainBufferImages() +{ + m_images.clear(); + + for (uint32_t i = 0; i < m_desc.imageCount; i++) + { + ComPtr d3dResource; + m_swapChain->GetBuffer(i, IID_PPV_ARGS(d3dResource.writeRef())); + ITextureResource::Desc imageDesc = {}; + imageDesc.allowedStates = ResourceStateSet( + ResourceState::Present, ResourceState::RenderTarget, ResourceState::CopyDestination); + imageDesc.type = IResource::Type::Texture2D; + imageDesc.arraySize = 0; + imageDesc.format = m_desc.format; + imageDesc.size.width = m_desc.width; + imageDesc.size.height = m_desc.height; + imageDesc.size.depth = 1; + imageDesc.numMipLevels = 1; + imageDesc.defaultState = ResourceState::Present; + RefPtr image = new TextureResourceImpl(imageDesc); + image->m_resource.setResource(d3dResource.get()); + image->m_defaultState = D3D12_RESOURCE_STATE_PRESENT; + m_images.add(image); + } + for (auto evt : m_frameEvents) + SetEvent(evt); +} + +int SwapchainImpl::acquireNextImage() +{ + auto result = (int)m_swapChain3->GetCurrentBackBufferIndex(); + WaitForSingleObject(m_frameEvents[result], INFINITE); + ResetEvent(m_frameEvents[result]); + return result; +} - case IResourceView::Type::UnorderedAccess: - { - // TODO: need to support the separate "counter resource" for the case - // of append/consume buffers with attached counters. +Result SwapchainImpl::present() +{ + m_fence->SetEventOnCompletion( + fenceValue, m_frameEvents[m_swapChain3->GetCurrentBackBufferIndex()]); + SLANG_RETURN_ON_FAIL(D3DSwapchainBase::present()); + fenceValue++; + m_queue->Signal(m_fence, fenceValue); + return SLANG_OK; +} - SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = m_cpuViewHeap; - D3D12_UNORDERED_ACCESS_VIEW_DESC d3d12desc = {}; - auto& resourceDesc = *resourceImpl->getDesc(); - d3d12desc.Format = gfxIsTypelessFormat(texture->getDesc()->format) - ? D3DUtil::getMapFormat(desc.format) - : D3DUtil::getMapFormat(texture->getDesc()->format); - switch (resourceImpl->getDesc()->type) - { - case IResource::Type::Texture1D: - d3d12desc.ViewDimension = resourceDesc.arraySize == 0 - ? D3D12_UAV_DIMENSION_TEXTURE1D - : D3D12_UAV_DIMENSION_TEXTURE1DARRAY; - d3d12desc.Texture1D.MipSlice = desc.subresourceRange.mipLevel; - d3d12desc.Texture1DArray.ArraySize = desc.subresourceRange.layerCount == 0 - ? resourceDesc.arraySize - : desc.subresourceRange.layerCount; - d3d12desc.Texture1DArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; +bool SwapchainImpl::isOccluded() +{ + return (m_swapChain3->Present(0, DXGI_PRESENT_TEST) == DXGI_STATUS_OCCLUDED); +} - break; - case IResource::Type::Texture2D: - d3d12desc.ViewDimension = resourceDesc.arraySize == 0 - ? D3D12_UAV_DIMENSION_TEXTURE2D - : D3D12_UAV_DIMENSION_TEXTURE2DARRAY; - d3d12desc.Texture2D.MipSlice = desc.subresourceRange.mipLevel; - d3d12desc.Texture2D.PlaneSlice = - D3DUtil::getPlaneSlice(d3d12desc.Format, desc.subresourceRange.aspectMask); - d3d12desc.Texture2DArray.ArraySize = desc.subresourceRange.layerCount == 0 - ? resourceDesc.arraySize - : desc.subresourceRange.layerCount; - d3d12desc.Texture2DArray.FirstArraySlice = desc.subresourceRange.baseArrayLayer; - break; - case IResource::Type::Texture3D: - d3d12desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; - d3d12desc.Texture3D.MipSlice = desc.subresourceRange.mipLevel; - d3d12desc.Texture3D.FirstWSlice = desc.subresourceRange.baseArrayLayer; - d3d12desc.Texture3D.WSize = resourceDesc.size.depth; - break; - default: - return SLANG_FAIL; - } - m_device->CreateUnorderedAccessView( - resourceImpl->m_resource, nullptr, &d3d12desc, viewImpl->m_descriptor.cpuHandle); - } - break; +Result SwapchainImpl::setFullScreenMode(bool mode) +{ + return m_swapChain3->SetFullscreenState(mode, nullptr); +} - case IResourceView::Type::ShaderResource: - { - SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = m_cpuViewHeap; +// CommandQueue implementation. + +Result CommandQueueImpl::init(DeviceImpl* device, uint32_t queueIndex) +{ + m_queueIndex = queueIndex; + m_renderer = device; + m_device = device->m_device; + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + SLANG_RETURN_ON_FAIL( + m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(m_d3dQueue.writeRef()))); + SLANG_RETURN_ON_FAIL( + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); + globalWaitHandle = CreateEventEx( + nullptr, nullptr, CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, EVENT_ALL_ACCESS); + return SLANG_OK; +} - // Need to construct the D3D12_SHADER_RESOURCE_VIEW_DESC because otherwise TextureCube is not accessed - // appropriately (rather than just passing nullptr to CreateShaderResourceView) - const D3D12_RESOURCE_DESC resourceDesc = resourceImpl->m_resource.getResource()->GetDesc(); - const DXGI_FORMAT pixelFormat = desc.format == Format::Unknown ? resourceDesc.Format : D3DUtil::getMapFormat(desc.format); +CommandQueueImpl::~CommandQueueImpl() +{ + waitOnHost(); + CloseHandle(globalWaitHandle); + m_renderer->m_queueIndexAllocator.free((int)m_queueIndex, 1); +} - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc; - _initSrvDesc( - resourceImpl->getType(), - *resourceImpl->getDesc(), - resourceDesc, - pixelFormat, - desc.subresourceRange, - srvDesc); +void CommandQueueImpl::executeCommandBuffers( + uint32_t count, ICommandBuffer* const* commandBuffers, IFence* fence, uint64_t valueToSignal) +{ + ShortList commandLists; + for (uint32_t i = 0; i < count; i++) + { + auto cmdImpl = static_cast(commandBuffers[i]); + commandLists.add(cmdImpl->m_cmdList); + } + if (count > 0) + { + m_d3dQueue->ExecuteCommandLists((UINT)count, commandLists.getArrayView().getBuffer()); - m_device->CreateShaderResourceView(resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle); + m_fenceValue++; + + for (uint32_t i = 0; i < count; i++) + { + if (i > 0 && commandBuffers[i] == commandBuffers[i - 1]) + continue; + auto cmdImpl = static_cast(commandBuffers[i]); + auto transientHeap = cmdImpl->m_transientHeap; + auto& waitInfo = transientHeap->getQueueWaitInfo(m_queueIndex); + waitInfo.waitValue = m_fenceValue; + waitInfo.fence = m_fence; } - break; + m_d3dQueue->Signal(m_fence, m_fenceValue); } - returnComPtr(outView, viewImpl); + if (fence) + { + auto fenceImpl = static_cast(fence); + m_d3dQueue->Signal(fenceImpl->m_fence.get(), valueToSignal); + } +} + +void CommandQueueImpl::waitOnHost() +{ + m_fenceValue++; + m_d3dQueue->Signal(m_fence, m_fenceValue); + ResetEvent(globalWaitHandle); + m_fence->SetEventOnCompletion(m_fenceValue, globalWaitHandle); + WaitForSingleObject(globalWaitHandle, INFINITE); +} + +Result CommandQueueImpl::waitForFenceValuesOnDevice( + uint32_t fenceCount, IFence** fences, uint64_t* waitValues) +{ + for (uint32_t i = 0; i < fenceCount; ++i) + { + auto fenceImpl = static_cast(fences[i]); + m_d3dQueue->Wait(fenceImpl->m_fence.get(), waitValues[i]); + } return SLANG_OK; } -Result D3D12Device::getFormatSupportedResourceStates(Format format, ResourceStateSet* outStates) +const CommandQueueImpl::Desc& CommandQueueImpl::getDesc() { return m_desc; } + +ICommandQueue* CommandQueueImpl::getInterface(const Guid& guid) { - D3D12_FEATURE_DATA_FORMAT_SUPPORT support; - support.Format = D3DUtil::getMapFormat(format); - SLANG_RETURN_ON_FAIL(m_device->CheckFeatureSupport( - D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))); + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue) + return static_cast(this); + return nullptr; +} - ResourceStateSet allowedStates; +Result CommandQueueImpl::getNativeHandle(InteropHandle* handle) +{ + handle->api = InteropHandleAPI::D3D12; + handle->handleValue = (uint64_t)m_d3dQueue.get(); + return SLANG_OK; +} - auto dxgi1 = support.Support1; - if (dxgi1 & D3D12_FORMAT_SUPPORT1_BUFFER) - allowedStates.add(ResourceState::ConstantBuffer); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER) - allowedStates.add(ResourceState::VertexBuffer); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_IA_INDEX_BUFFER) - allowedStates.add(ResourceState::IndexBuffer); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_SO_BUFFER) - allowedStates.add(ResourceState::StreamOutput); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_TEXTURE1D) - allowedStates.add(ResourceState::ShaderResource); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_TEXTURE2D) - allowedStates.add(ResourceState::ShaderResource); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_TEXTURE3D) - allowedStates.add(ResourceState::ShaderResource); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_TEXTURECUBE) - allowedStates.add(ResourceState::ShaderResource); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) - allowedStates.add(ResourceState::ShaderResource); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) - allowedStates.add(ResourceState::ShaderResource); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON) - allowedStates.add(ResourceState::ShaderResource); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_SHADER_GATHER) - allowedStates.add(ResourceState::ShaderResource); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_SHADER_GATHER_COMPARISON) - allowedStates.add(ResourceState::ShaderResource); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) - allowedStates.add(ResourceState::RenderTarget); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) - allowedStates.add(ResourceState::DepthWrite); - if (dxgi1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) - allowedStates.add(ResourceState::UnorderedAccess); +ResourceViewInternalImpl::~ResourceViewInternalImpl() +{ + if (m_descriptor.cpuHandle.ptr) + m_allocator->free(m_descriptor); +} - *outStates = allowedStates; +Result ResourceViewImpl::getNativeHandle(InteropHandle* outHandle) +{ + outHandle->api = InteropHandleAPI::D3D12CpuDescriptorHandle; + outHandle->handleValue = m_descriptor.cpuHandle.ptr; return SLANG_OK; } -Result D3D12Device::createBufferView( - IBufferResource* buffer, - IBufferResource* counterBuffer, - IResourceView::Desc const& desc, - IResourceView** outView) +void RenderPassLayoutImpl::init(const IRenderPassLayout::Desc& desc) { - auto resourceImpl = (BufferResourceImpl*) buffer; - auto resourceDesc = *resourceImpl->getDesc(); + SimpleRenderPassLayout::init(desc); + m_framebufferLayout = static_cast(desc.framebufferLayout); + m_hasDepthStencil = m_framebufferLayout->m_hasDepthStencil; +} - RefPtr viewImpl = new ResourceViewImpl(); - viewImpl->m_resource = resourceImpl; - viewImpl->m_desc = desc; +ShaderObjectLayoutImpl::SubObjectRangeOffset::SubObjectRangeOffset( + slang::VariableLayoutReflection* varLayout) +{ + if (auto pendingLayout = varLayout->getPendingDataLayout()) + { + pendingOrdinaryData = (uint32_t)pendingLayout->getOffset(SLANG_PARAMETER_CATEGORY_UNIFORM); + } +} - switch (desc.type) +ShaderObjectLayoutImpl::SubObjectRangeStride::SubObjectRangeStride( + slang::TypeLayoutReflection* typeLayout) +{ + if (auto pendingLayout = typeLayout->getPendingDataTypeLayout()) { - default: - return SLANG_FAIL; + pendingOrdinaryData = (uint32_t)pendingLayout->getSize(SLANG_PARAMETER_CATEGORY_UNIFORM); + } +} - case IResourceView::Type::UnorderedAccess: +bool ShaderObjectLayoutImpl::isBindingRangeRootParameter( + SlangSession* globalSession, + const char* rootParameterAttributeName, + slang::TypeLayoutReflection* typeLayout, + Index bindingRangeIndex) +{ + bool isRootParameter = false; + if (rootParameterAttributeName) + { + if (auto leafVariable = typeLayout->getBindingRangeLeafVariable(bindingRangeIndex)) { - D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - uavDesc.Format = D3DUtil::getMapFormat(desc.format); - uavDesc.Buffer.FirstElement = desc.bufferRange.firstElement; - uint64_t viewSize = 0; - if (desc.bufferElementSize) - { - uavDesc.Buffer.StructureByteStride = desc.bufferElementSize; - uavDesc.Buffer.NumElements = - desc.bufferRange.elementCount == 0 - ? UINT(resourceDesc.sizeInBytes / desc.bufferElementSize) - : (UINT)desc.bufferRange.elementCount; - viewSize = (uint64_t)desc.bufferElementSize * uavDesc.Buffer.NumElements; - } - else if(desc.format == Format::Unknown) - { - uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; - uavDesc.Buffer.NumElements = desc.bufferRange.elementCount == 0 - ? UINT(resourceDesc.sizeInBytes / 4) - : UINT(desc.bufferRange.elementCount / 4); - uavDesc.Buffer.Flags |= D3D12_BUFFER_UAV_FLAG_RAW; - viewSize = 4ull * uavDesc.Buffer.NumElements; - } - else + if (leafVariable->findUserAttributeByName(globalSession, rootParameterAttributeName)) { - FormatInfo sizeInfo; - gfxGetFormatInfo(desc.format, &sizeInfo); - assert(sizeInfo.pixelsPerBlock == 1); - uavDesc.Buffer.NumElements = - desc.bufferRange.elementCount == 0 - ? UINT(resourceDesc.sizeInBytes / sizeInfo.blockSizeInBytes) - : (UINT)desc.bufferRange.elementCount; - viewSize = (uint64_t)uavDesc.Buffer.NumElements * sizeInfo.blockSizeInBytes; + isRootParameter = true; } + } + } + return isRootParameter; +} - if (viewSize >= (1ull << 32) - 8) +Result ShaderObjectLayoutImpl::createForElementType( + RendererBase* renderer, + slang::TypeLayoutReflection* elementType, + ShaderObjectLayoutImpl** outLayout) +{ + Builder builder(renderer); + builder.setElementTypeLayout(elementType); + return builder.build(outLayout); +} + +Result ShaderObjectLayoutImpl::init(Builder* builder) +{ + auto renderer = builder->m_renderer; + + initBase(renderer, builder->m_elementTypeLayout); + + m_containerType = builder->m_containerType; + + m_bindingRanges = _Move(builder->m_bindingRanges); + m_subObjectRanges = _Move(builder->m_subObjectRanges); + m_rootParamsInfo = _Move(builder->m_rootParamsInfo); + + m_ownCounts = builder->m_ownCounts; + m_totalCounts = builder->m_totalCounts; + m_subObjectCount = builder->m_subObjectCount; + m_childRootParameterCount = builder->m_childRootParameterCount; + m_totalOrdinaryDataSize = builder->m_totalOrdinaryDataSize; + + return SLANG_OK; +} + +Result ShaderObjectLayoutImpl::Builder::setElementTypeLayout( + slang::TypeLayoutReflection* typeLayout) +{ + typeLayout = _unwrapParameterGroups(typeLayout, m_containerType); + m_elementTypeLayout = typeLayout; + + // If the type contains any ordinary data, then we must reserve a buffer + // descriptor to hold it when binding as a parameter block. + // + m_totalOrdinaryDataSize = (uint32_t)typeLayout->getSize(); + if (m_totalOrdinaryDataSize != 0) + { + m_ownCounts.resource++; + } + + // We will scan over the reflected Slang binding ranges and add them + // to our array. There are two main things we compute along the way: + // + // * For each binding range we compute a `flatIndex` that can be + // used to identify where the values for the given range begin + // in the flattened arrays (e.g., `m_objects`) and descriptor + // tables that hold the state of a shader object. + // + // * We also update the various counters taht keep track of the number + // of sub-objects, resources, samplers, etc. that are being + // consumed. These counters will contribute to figuring out + // the descriptor table(s) that might be needed to represent + // the object. + // + SlangInt bindingRangeCount = typeLayout->getBindingRangeCount(); + for (SlangInt r = 0; r < bindingRangeCount; ++r) + { + slang::BindingType slangBindingType = typeLayout->getBindingRangeType(r); + uint32_t count = (uint32_t)typeLayout->getBindingRangeBindingCount(r); + slang::TypeLayoutReflection* slangLeafTypeLayout = + typeLayout->getBindingRangeLeafTypeLayout(r); + BindingRangeInfo bindingRangeInfo = {}; + bindingRangeInfo.bindingType = slangBindingType; + bindingRangeInfo.resourceShape = slangLeafTypeLayout->getResourceShape(); + bindingRangeInfo.count = count; + bindingRangeInfo.isRootParameter = isBindingRangeRootParameter( + m_renderer->slangContext.globalSession, + static_cast(m_renderer)->m_extendedDesc.rootParameterShaderAttributeName, + typeLayout, + r); + if (bindingRangeInfo.isRootParameter) + { + RootParameterInfo rootInfo = {}; + switch (slangBindingType) + { + case slang::BindingType::RayTracingAccelerationStructure: + rootInfo.type = IResourceView::Type::AccelerationStructure; + break; + case slang::BindingType::RawBuffer: + case slang::BindingType::TypedBuffer: + rootInfo.type = IResourceView::Type::ShaderResource; + break; + case slang::BindingType::MutableRawBuffer: + case slang::BindingType::MutableTypedBuffer: + rootInfo.type = IResourceView::Type::UnorderedAccess; + break; + } + bindingRangeInfo.baseIndex = (uint32_t)m_rootParamsInfo.getCount(); + for (uint32_t i = 0; i < count; i++) { - // D3D12 does not support view descriptors that has size near 4GB. - // We will not create actual SRV/UAVs for such large buffers. - // However, a buffer this large can still be bound as root parameter. - // So instead of failing, we quietly ignore descriptor creation. - viewImpl->m_descriptor.cpuHandle.ptr = 0; + m_rootParamsInfo.add(rootInfo); } - else + } + else + { + switch (slangBindingType) { - auto counterResourceImpl = static_cast(counterBuffer); - SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = m_cpuViewHeap; - m_device->CreateUnorderedAccessView( - resourceImpl->m_resource, - counterResourceImpl ? counterResourceImpl->m_resource.getResource() : nullptr, - &uavDesc, - viewImpl->m_descriptor.cpuHandle); + case slang::BindingType::ConstantBuffer: + case slang::BindingType::ParameterBlock: + case slang::BindingType::ExistentialValue: + bindingRangeInfo.baseIndex = m_subObjectCount; + bindingRangeInfo.subObjectIndex = m_subObjectCount; + m_subObjectCount += count; + break; + case slang::BindingType::RawBuffer: + case slang::BindingType::MutableRawBuffer: + if (slangLeafTypeLayout->getType()->getElementType() != nullptr) + { + // A structured buffer occupies both a resource slot and + // a sub-object slot. + bindingRangeInfo.subObjectIndex = m_subObjectCount; + m_subObjectCount += count; + } + bindingRangeInfo.baseIndex = m_ownCounts.resource; + m_ownCounts.resource += count; + break; + case slang::BindingType::Sampler: + bindingRangeInfo.baseIndex = m_ownCounts.sampler; + m_ownCounts.sampler += count; + break; + + case slang::BindingType::CombinedTextureSampler: + // TODO: support this case... + break; + + case slang::BindingType::VaryingInput: + case slang::BindingType::VaryingOutput: + break; + + default: + bindingRangeInfo.baseIndex = m_ownCounts.resource; + m_ownCounts.resource += count; + break; } } - break; + m_bindingRanges.add(bindingRangeInfo); + } - case IResourceView::Type::ShaderResource: + // At this point we've computed the number of resources/samplers that + // the type needs to represent its *own* state, and stored those counts + // in `m_ownCounts`. Next we need to consider any resources/samplers + // and root parameters needed to represent the state of the transitive + // sub-objects of this objet, so that we can compute the total size + // of the object when bound to the pipeline. + + m_totalCounts = m_ownCounts; + + SlangInt subObjectRangeCount = typeLayout->getSubObjectRangeCount(); + for (SlangInt r = 0; r < subObjectRangeCount; ++r) + { + SlangInt bindingRangeIndex = typeLayout->getSubObjectRangeBindingRangeIndex(r); + auto slangBindingType = typeLayout->getBindingRangeType(bindingRangeIndex); + auto count = (uint32_t)typeLayout->getBindingRangeBindingCount(bindingRangeIndex); + slang::TypeLayoutReflection* slangLeafTypeLayout = + typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); + + // A sub-object range can either represent a sub-object of a known + // type, like a `ConstantBuffer` or `ParameterBlock` + // (in which case we can pre-compute a layout to use, based on + // the type `Foo`) *or* it can represent a sub-object of some + // existential type (e.g., `IBar`) in which case we cannot + // know the appropraite type/layout of sub-object to allocate. + // + RefPtr subObjectLayout; + if (slangBindingType == slang::BindingType::ExistentialValue) { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - srvDesc.Format = D3DUtil::getMapFormat(desc.format); - srvDesc.Buffer.StructureByteStride = 0; - srvDesc.Buffer.FirstElement = desc.bufferRange.firstElement; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - uint64_t viewSize = 0; - if (desc.bufferElementSize) + if (auto pendingTypeLayout = slangLeafTypeLayout->getPendingDataTypeLayout()) { - srvDesc.Buffer.StructureByteStride = desc.bufferElementSize; - srvDesc.Buffer.NumElements = - desc.bufferRange.elementCount == 0 - ? UINT(resourceDesc.sizeInBytes / desc.bufferElementSize) - : (UINT)desc.bufferRange.elementCount; - viewSize = (uint64_t)desc.bufferElementSize * srvDesc.Buffer.NumElements; + createForElementType(m_renderer, pendingTypeLayout, subObjectLayout.writeRef()); } - else if (desc.format == Format::Unknown) + } + else + { + createForElementType( + m_renderer, + slangLeafTypeLayout->getElementTypeLayout(), + subObjectLayout.writeRef()); + } + + SubObjectRangeInfo subObjectRange; + subObjectRange.bindingRangeIndex = bindingRangeIndex; + subObjectRange.layout = subObjectLayout; + + // The Slang reflection API stors offset information for sub-object ranges, + // and we care about *some* of that information: in particular, we need + // the offset of sub-objects in terms of uniform/ordinary data for the + // cases where we need to fill in "pending" data in our ordinary buffer. + // + subObjectRange.offset = SubObjectRangeOffset(typeLayout->getSubObjectRangeOffset(r)); + subObjectRange.stride = SubObjectRangeStride(slangLeafTypeLayout); + + // The remaining offset information is computed based on the counters + // we are generating here, which depend only on the in-memory layout + // decisions being made in our implementation. Remember that the + // `register` and `space` values coming from DXBC/DXIL do *not* + // dictate the in-memory layout we use. + // + // Note: One subtle point here is that the `.rootParam` offset we are computing + // here does *not* include any root parameters that would be allocated + // for the parent object type itself (e.g., for descriptor tables + // used if it were bound as a parameter block). The later logic when + // we actually go to bind things will need to apply those offsets. + // + // Note: An even *more* subtle point is that the `.resource` offset + // being computed here *does* include the resource descriptor allocated + // for holding the ordinary data buffer, if any. The implications of + // this for later offset math is subtle. + // + subObjectRange.offset.rootParam = m_childRootParameterCount; + subObjectRange.offset.resource = m_totalCounts.resource; + subObjectRange.offset.sampler = m_totalCounts.sampler; + + // Along with the offset information, we also need to compute the + // "stride" between consecutive sub-objects in the range. The actual + // size/stride of a single object depends on the type of range we + // are dealing with. + // + BindingOffset objectCounts; + switch (slangBindingType) + { + default: { - srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; - srvDesc.Buffer.NumElements = desc.bufferRange.elementCount == 0 - ? UINT(resourceDesc.sizeInBytes / 4) - : UINT(desc.bufferRange.elementCount / 4); - srvDesc.Buffer.Flags |= D3D12_BUFFER_SRV_FLAG_RAW; - viewSize = 4ull * srvDesc.Buffer.NumElements; + // We only treat buffers of interface types as actual sub-object binding + // range. + auto bindingRangeTypeLayout = + typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); + if (!bindingRangeTypeLayout) + continue; + auto elementType = typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex) + ->getElementTypeLayout(); + if (!elementType) + continue; + if (elementType->getKind() != slang::TypeReflection::Kind::Interface) + { + continue; + } } - else + break; + + case slang::BindingType::ConstantBuffer: { - FormatInfo sizeInfo; - gfxGetFormatInfo(desc.format, &sizeInfo); - assert(sizeInfo.pixelsPerBlock == 1); - srvDesc.Buffer.NumElements = - desc.bufferRange.elementCount == 0 - ? UINT(resourceDesc.sizeInBytes / sizeInfo.blockSizeInBytes) - : (UINT)desc.bufferRange.elementCount; - viewSize = (uint64_t)srvDesc.Buffer.NumElements * sizeInfo.blockSizeInBytes; + SLANG_ASSERT(subObjectLayout); + + // The resource and sampler descriptors of a nested + // constant buffer will "leak" into those of the + // parent type, and we need to account for them + // whenever we allocate storage. + // + objectCounts.resource = subObjectLayout->getTotalResourceDescriptorCount(); + objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); + objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); } - if (viewSize >= (1ull << 32) - 8) + break; + + case slang::BindingType::ParameterBlock: { - // D3D12 does not support view descriptors that has size near 4GB. - // We will not create actual SRV/UAVs for such large buffers. - // However, a buffer this large can still be bound as root parameter. - // So instead of failing, we quietly ignore descriptor creation. - viewImpl->m_descriptor.cpuHandle.ptr = 0; + SLANG_ASSERT(subObjectLayout); + + // In contrast to a constant buffer, a parameter block can hide + // the resource and sampler descriptor allocation it uses (since they + // are allocated into the tables that make up the parameter block. + // + // The only resource usage that leaks into the surrounding context + // is the number of root parameters consumed. + // + objectCounts.rootParam = subObjectRange.layout->getTotalRootTableParameterCount(); } - else + break; + + case slang::BindingType::ExistentialValue: + // An unspecialized existential/interface value cannot consume any resources + // as part of the parent object (it needs to fit inside the fixed-size + // represnetation of existential types). + // + // However, if we are statically specializing to a type that doesn't "fit" + // we may need to account for additional information that needs to be + // allocaated. + // + if (subObjectLayout) { - SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = m_cpuViewHeap; - m_device->CreateShaderResourceView( - resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle); + // The ordinary data for an existential-type value is allocated into + // the same buffer as the parent object, so we only want to consider + // the resource descriptors *other than* the ordinary data buffer. + // + // Otherwise the logic here is identical to the constant buffer case. + // + objectCounts.resource = + subObjectLayout->getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer(); + objectCounts.sampler = subObjectLayout->getTotalSamplerDescriptorCount(); + objectCounts.rootParam = subObjectRange.layout->getChildRootParameterCount(); + + // Note: In the implementation for some other graphics API (e.g., + // Vulkan) there needs to be more work done to handle the fact that + // "pending" data from interface-type sub-objects get allocated to a + // distinct offset after all the "primary" data. We are consciously + // ignoring that issue here, and the physical layout of a shader object + // into the D3D12 binding state may end up interleaving + // resources/samplers for "primary" and "pending" data. + // + // If this choice ever causes issues, we can revisit the approach here. + + // An interface-type range that includes ordinary data can + // increase the size of the ordinary data buffer we need to + // allocate for the parent object. + // + uint32_t ordinaryDataEnd = + subObjectRange.offset.pendingOrdinaryData + + (uint32_t)count * subObjectRange.stride.pendingOrdinaryData; + + if (ordinaryDataEnd > m_totalOrdinaryDataSize) + { + m_totalOrdinaryDataSize = ordinaryDataEnd; + } } + break; } - break; + + // Once we've computed the usage for each object in the range, we can + // easily compute the usage for the entire range. + // + auto rangeResourceCount = count * objectCounts.resource; + auto rangeSamplerCount = count * objectCounts.sampler; + auto rangeRootParamCount = count * objectCounts.rootParam; + + m_totalCounts.resource += rangeResourceCount; + m_totalCounts.sampler += rangeSamplerCount; + m_childRootParameterCount += rangeRootParamCount; + + m_subObjectRanges.add(subObjectRange); } - returnComPtr(outView, viewImpl); + // Once we have added up the resource usage from all the sub-objects + // we can look at the total number of resources and samplers that + // need to be bound as part of this objects descriptor tables and + // that will allow us to decide whether we need to allocate a root + // parameter for a resource table or not, ans similarly for a + // sampler table. + // + if (m_totalCounts.resource) + m_ownCounts.rootParam++; + if (m_totalCounts.sampler) + m_ownCounts.rootParam++; + + m_totalCounts.rootParam = m_ownCounts.rootParam + m_childRootParameterCount; + return SLANG_OK; } -Result D3D12Device::createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFb) +Result ShaderObjectLayoutImpl::Builder::build(ShaderObjectLayoutImpl** outLayout) { - RefPtr framebuffer = new FramebufferImpl(); - framebuffer->renderTargetViews.setCount(desc.renderTargetCount); - framebuffer->renderTargetDescriptors.setCount(desc.renderTargetCount); - framebuffer->renderTargetClearValues.setCount(desc.renderTargetCount); - for (uint32_t i = 0; i < desc.renderTargetCount; i++) - { - framebuffer->renderTargetViews[i] = static_cast(desc.renderTargetViews[i]); - framebuffer->renderTargetDescriptors[i] = - framebuffer->renderTargetViews[i]->m_descriptor.cpuHandle; - if (static_cast(desc.renderTargetViews[i])->m_resource.Ptr()) - { - auto clearValue = - static_cast( - static_cast(desc.renderTargetViews[i])->m_resource.Ptr()) - ->getDesc() - ->optimalClearValue.color; - memcpy(&framebuffer->renderTargetClearValues[i], &clearValue, sizeof(ColorClearValue)); - } - else - { - memset(&framebuffer->renderTargetClearValues[i], 0, sizeof(ColorClearValue)); - } - } - framebuffer->depthStencilView = static_cast(desc.depthStencilView); - if (desc.depthStencilView) - { - framebuffer->depthStencilClearValue = - static_cast( - static_cast(desc.depthStencilView)->m_resource.Ptr()) - ->getDesc() - ->optimalClearValue.depthStencil; - framebuffer->depthStencilDescriptor = - static_cast(desc.depthStencilView)->m_descriptor.cpuHandle; - } - else + auto layout = RefPtr(new ShaderObjectLayoutImpl()); + SLANG_RETURN_ON_FAIL(layout->init(this)); + + returnRefPtrMove(outLayout, layout); + return SLANG_OK; +} + +Result RootShaderObjectLayoutImpl::Builder::build(RootShaderObjectLayoutImpl** outLayout) +{ + RefPtr layout = new RootShaderObjectLayoutImpl(); + SLANG_RETURN_ON_FAIL(layout->init(this)); + + returnRefPtrMove(outLayout, layout); + return SLANG_OK; +} + +void RootShaderObjectLayoutImpl::Builder::addGlobalParams( + slang::VariableLayoutReflection* globalsLayout) +{ + setElementTypeLayout(globalsLayout->getTypeLayout()); +} + +void RootShaderObjectLayoutImpl::Builder::addEntryPoint( + SlangStage stage, ShaderObjectLayoutImpl* entryPointLayout) +{ + EntryPointInfo info; + info.layout = entryPointLayout; + + info.offset.resource = m_totalCounts.resource; + info.offset.sampler = m_totalCounts.sampler; + info.offset.rootParam = m_childRootParameterCount; + + m_totalCounts.resource += entryPointLayout->getTotalResourceDescriptorCount(); + m_totalCounts.sampler += entryPointLayout->getTotalSamplerDescriptorCount(); + + // TODO(tfoley): Check this to make sure it is reasonable... + m_childRootParameterCount += entryPointLayout->getChildRootParameterCount(); + + m_entryPoints.add(info); +} + +Result RootShaderObjectLayoutImpl::RootSignatureDescBuilder::translateDescriptorRangeType( + slang::BindingType c, D3D12_DESCRIPTOR_RANGE_TYPE* outType) +{ + switch (c) { - framebuffer->depthStencilDescriptor.ptr = 0; + case slang::BindingType::ConstantBuffer: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + return SLANG_OK; + case slang::BindingType::RawBuffer: + case slang::BindingType::Texture: + case slang::BindingType::TypedBuffer: + case slang::BindingType::RayTracingAccelerationStructure: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + return SLANG_OK; + case slang::BindingType::MutableRawBuffer: + case slang::BindingType::MutableTexture: + case slang::BindingType::MutableTypedBuffer: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + return SLANG_OK; + case slang::BindingType::Sampler: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + return SLANG_OK; + default: + return SLANG_FAIL; } - returnComPtr(outFb, framebuffer); - return SLANG_OK; } -Result D3D12Device::createFramebufferLayout( - IFramebufferLayout::Desc const& desc, IFramebufferLayout** outLayout) +/// Add a new descriptor set to the layout being computed. +/// +/// Note that a "descriptor set" in the layout may amount to +/// zero, one, or two different descriptor *tables* in the +/// final D3D12 root signature. Each descriptor set may +/// contain zero or more view ranges (CBV/SRV/UAV) and zero +/// or more sampler ranges. It maps to a view descriptor table +/// if the number of view ranges is non-zero and to a sampler +/// descriptor table if the number of sampler ranges is non-zero. +/// + +uint32_t RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addDescriptorSet() { - RefPtr layout = new FramebufferLayoutImpl(); - layout->m_renderTargets.setCount(desc.renderTargetCount); - for (uint32_t i = 0; i < desc.renderTargetCount; i++) + auto result = (uint32_t)m_descriptorSets.getCount(); + m_descriptorSets.add(DescriptorSetLayout{}); + return result; +} + +Result RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addDescriptorRange( + Index physicalDescriptorSetIndex, + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT registerIndex, + UINT spaceIndex, + UINT count, + bool isRootParameter) +{ + if (isRootParameter) { - layout->m_renderTargets[i] = desc.renderTargets[i]; + D3D12_ROOT_PARAMETER rootParam = {}; + switch (rangeType) + { + case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + break; + case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + break; + default: + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "A shader parameter marked as root parameter is neither SRV nor UAV."); + return SLANG_FAIL; + } + rootParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParam.Descriptor.RegisterSpace = spaceIndex; + rootParam.Descriptor.ShaderRegister = registerIndex; + m_rootParameters.add(rootParam); + return SLANG_OK; } - - if (desc.depthStencil) + + auto& descriptorSet = m_descriptorSets[physicalDescriptorSetIndex]; + + D3D12_DESCRIPTOR_RANGE range = {}; + range.RangeType = rangeType; + range.NumDescriptors = count; + range.BaseShaderRegister = registerIndex; + range.RegisterSpace = spaceIndex; + range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER) { - layout->m_hasDepthStencil = true; - layout->m_depthStencil = *desc.depthStencil; + descriptorSet.m_samplerRanges.add(range); + descriptorSet.m_samplerCount += range.NumDescriptors; } else { - layout->m_hasDepthStencil = false; + descriptorSet.m_resourceRanges.add(range); + descriptorSet.m_resourceCount += range.NumDescriptors; } - returnComPtr(outLayout, layout); + return SLANG_OK; } -Result D3D12Device::createRenderPassLayout( - const IRenderPassLayout::Desc& desc, - IRenderPassLayout** outRenderPassLayout) +/// Add one descriptor range as specified in Slang reflection information to the layout. +/// +/// The layout information is taken from `typeLayout` for the descriptor +/// range with the given `descriptorRangeIndex` within the logical +/// descriptor set (reflected by Slang) with the given `logicalDescriptorSetIndex`. +/// +/// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of +/// the descriptor set that the range should be added to. +/// +/// The `offset` encodes information about space and/or register offsets that +/// should be applied to descrptor ranges. +/// +/// This operation can fail if the given descriptor range encodes a range that +/// doesn't map to anything directly supported by D3D12. Higher-level routines +/// will often want to ignore such failures. +/// + +Result RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addDescriptorRange( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffset const& containerOffset, + BindingRegisterOffset const& elementOffset, + Index logicalDescriptorSetIndex, + Index descriptorRangeIndex, + bool isRootParameter) { - RefPtr result = new RenderPassLayoutImpl(); - result->init(desc); - returnComPtr(outRenderPassLayout, result); - return SLANG_OK; + auto bindingType = typeLayout->getDescriptorSetDescriptorRangeType( + logicalDescriptorSetIndex, descriptorRangeIndex); + auto count = typeLayout->getDescriptorSetDescriptorRangeDescriptorCount( + logicalDescriptorSetIndex, descriptorRangeIndex); + auto index = typeLayout->getDescriptorSetDescriptorRangeIndexOffset( + logicalDescriptorSetIndex, descriptorRangeIndex); + auto space = typeLayout->getDescriptorSetSpaceOffset(logicalDescriptorSetIndex); + + D3D12_DESCRIPTOR_RANGE_TYPE rangeType; + SLANG_RETURN_ON_FAIL(translateDescriptorRangeType(bindingType, &rangeType)); + + return addDescriptorRange( + physicalDescriptorSetIndex, + rangeType, + (UINT)index + elementOffset[rangeType], + (UINT)space + containerOffset.spaceOffset, + (UINT)count, + isRootParameter); } -Result D3D12Device::createInputLayout(IInputLayout::Desc const& desc, IInputLayout** outLayout) +/// Add one binding range to the computed layout. +/// +/// The layout information is taken from `typeLayout` for the binding +/// range with the given `bindingRangeIndex`. +/// +/// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of +/// the descriptor set that the range should be added to. +/// +/// The `offset` encodes information about space and/or register offsets that +/// should be applied to descrptor ranges. +/// +/// Note that a single binding range may encompass zero or more descriptor ranges. +/// + +void RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addBindingRange( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffset const& containerOffset, + BindingRegisterOffset const& elementOffset, + Index bindingRangeIndex) { - RefPtr layout(new InputLayoutImpl); - - // Work out a buffer size to hold all text - size_t textSize = 0; - auto inputElementCount = desc.inputElementCount; - auto inputElements = desc.inputElements; - auto vertexStreamCount = desc.vertexStreamCount; - auto vertexStreams = desc.vertexStreams; - for (int i = 0; i < Int(inputElementCount); ++i) - { - const char* text = inputElements[i].semanticName; - textSize += text ? (::strlen(text) + 1) : 0; + auto logicalDescriptorSetIndex = + typeLayout->getBindingRangeDescriptorSetIndex(bindingRangeIndex); + auto firstDescriptorRangeIndex = + typeLayout->getBindingRangeFirstDescriptorRangeIndex(bindingRangeIndex); + Index descriptorRangeCount = typeLayout->getBindingRangeDescriptorRangeCount(bindingRangeIndex); + bool isRootParameter = isBindingRangeRootParameter( + m_device->slangContext.globalSession, + m_device->m_extendedDesc.rootParameterShaderAttributeName, + typeLayout, + bindingRangeIndex); + for (Index i = 0; i < descriptorRangeCount; ++i) + { + auto descriptorRangeIndex = firstDescriptorRangeIndex + i; + + // Note: we ignore the `Result` returned by `addDescriptorRange()` because we + // want to silently skip any ranges that represent kinds of bindings that + // don't actually exist in D3D12. + // + addDescriptorRange( + typeLayout, + physicalDescriptorSetIndex, + containerOffset, + elementOffset, + logicalDescriptorSetIndex, + descriptorRangeIndex, + isRootParameter); } - layout->m_text.setCount(textSize); - char* textPos = layout->m_text.getBuffer(); +} - List& elements = layout->m_elements; - elements.setCount(inputElementCount); +void RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addAsValue( + slang::VariableLayoutReflection* varLayout, Index physicalDescriptorSetIndex) +{ + BindingRegisterOffsetPair offset(varLayout); + addAsValue(varLayout->getTypeLayout(), physicalDescriptorSetIndex, offset, offset); +} +/// Add binding ranges and parameter blocks to the root signature. +/// +/// The layout information is taken from `typeLayout` which should +/// be a layout for either a program or an entry point. +/// +/// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of +/// the descriptor set that binding ranges not belonging to nested +/// parameter blocks should be added to. +/// +/// The `offset` encodes information about space and/or register offsets that +/// should be applied to descrptor ranges. +/// + +void RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addAsConstantBuffer( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffsetPair const& containerOffset, + BindingRegisterOffsetPair const& elementOffset) +{ + if (typeLayout->getSize(SLANG_PARAMETER_CATEGORY_UNIFORM) != 0) + { + auto descriptorRangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + auto& offsetForRangeType = containerOffset.primary.offsetForRangeType[descriptorRangeType]; + addDescriptorRange( + physicalDescriptorSetIndex, + descriptorRangeType, + offsetForRangeType, + containerOffset.primary.spaceOffset, + 1, + false); + } - for (Int i = 0; i < inputElementCount; ++i) - { - const InputElementDesc& srcEle = inputElements[i]; - const auto& srcStream = vertexStreams[srcEle.bufferSlotIndex]; - D3D12_INPUT_ELEMENT_DESC& dstEle = elements[i]; + addAsValue(typeLayout, physicalDescriptorSetIndex, containerOffset, elementOffset); +} - // Add text to the buffer - const char* semanticName = srcEle.semanticName; - if (semanticName) +void RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addAsValue( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffsetPair const& containerOffset, + BindingRegisterOffsetPair const& elementOffset) +{ + // Our first task is to add the binding ranges for stuff that is + // directly contained in `typeLayout` rather than via sub-objects. + // + // Our goal is to have the descriptors for directly-contained views/samplers + // always be contiguous in CPU and GPU memory, so that we can write + // to them easily with a single operaiton. + // + Index bindingRangeCount = typeLayout->getBindingRangeCount(); + for (Index bindingRangeIndex = 0; bindingRangeIndex < bindingRangeCount; bindingRangeIndex++) + { + // We will look at the type of each binding range and intentionally + // skip those that represent sub-objects. + // + auto bindingType = typeLayout->getBindingRangeType(bindingRangeIndex); + switch (bindingType) { - const int len = int(::strlen(semanticName)); - ::memcpy(textPos, semanticName, len + 1); - semanticName = textPos; - textPos += len + 1; - } + case slang::BindingType::ConstantBuffer: + case slang::BindingType::ParameterBlock: + case slang::BindingType::ExistentialValue: + continue; - dstEle.SemanticName = semanticName; - dstEle.SemanticIndex = (UINT)srcEle.semanticIndex; - dstEle.Format = D3DUtil::getMapFormat(srcEle.format); - dstEle.InputSlot = (UINT)srcEle.bufferSlotIndex; - dstEle.AlignedByteOffset = (UINT)srcEle.offset; - dstEle.InputSlotClass = D3DUtil::getInputSlotClass(srcStream.slotClass); - dstEle.InstanceDataStepRate = (UINT)srcStream.instanceDataStepRate; - } + default: + break; + } - auto& vertexStreamStrides = layout->m_vertexStreamStrides; - vertexStreamStrides.setCount(vertexStreamCount); - for (Int i = 0; i < vertexStreamCount; ++i) - { - vertexStreamStrides[i] = vertexStreams[i].stride; + // For binding ranges that don't represent sub-objects, we will add + // all of the descriptor ranges they encompass to the root signature. + // + addBindingRange( + typeLayout, + physicalDescriptorSetIndex, + containerOffset.primary, + elementOffset.primary, + bindingRangeIndex); } - returnComPtr(outLayout, layout); - return SLANG_OK; -} + // Next we need to recursively include everything bound via sub-objects + Index subObjectRangeCount = typeLayout->getSubObjectRangeCount(); + for (Index subObjectRangeIndex = 0; subObjectRangeIndex < subObjectRangeCount; + subObjectRangeIndex++) + { + auto bindingRangeIndex = + typeLayout->getSubObjectRangeBindingRangeIndex(subObjectRangeIndex); + auto bindingType = typeLayout->getBindingRangeType(bindingRangeIndex); -Result D3D12Device::readBufferResource( - IBufferResource* bufferIn, - size_t offset, - size_t size, - ISlangBlob** outBlob) -{ + auto subObjectTypeLayout = typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); - BufferResourceImpl* buffer = static_cast(bufferIn); + BindingRegisterOffsetPair subObjectRangeContainerOffset = containerOffset; + subObjectRangeContainerOffset += + BindingRegisterOffsetPair(typeLayout->getSubObjectRangeOffset(subObjectRangeIndex)); + BindingRegisterOffsetPair subObjectRangeElementOffset = elementOffset; + subObjectRangeElementOffset += + BindingRegisterOffsetPair(typeLayout->getSubObjectRangeOffset(subObjectRangeIndex)); - const size_t bufferSize = buffer->getDesc()->sizeInBytes; + switch (bindingType) + { + case slang::BindingType::ConstantBuffer: + { + auto containerVarLayout = subObjectTypeLayout->getContainerVarLayout(); + SLANG_ASSERT(containerVarLayout); - // This will be slow!!! - it blocks CPU on GPU completion - D3D12Resource& resource = buffer->m_resource; + auto elementVarLayout = subObjectTypeLayout->getElementVarLayout(); + SLANG_ASSERT(elementVarLayout); - D3D12Resource stageBuf; - if (buffer->getDesc()->memoryType != MemoryType::ReadBack) - { - auto encodeInfo = encodeResourceCommands(); + auto elementTypeLayout = elementVarLayout->getTypeLayout(); + SLANG_ASSERT(elementTypeLayout); - // Readback heap - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + BindingRegisterOffsetPair containerOffset = subObjectRangeContainerOffset; + containerOffset += BindingRegisterOffsetPair(containerVarLayout); - // Resource to readback to - D3D12_RESOURCE_DESC stagingDesc; - _initBufferResourceDesc(size, stagingDesc); + BindingRegisterOffsetPair elementOffset = subObjectRangeElementOffset; + elementOffset += BindingRegisterOffsetPair(elementVarLayout); - SLANG_RETURN_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); + addAsConstantBuffer( + elementTypeLayout, physicalDescriptorSetIndex, containerOffset, elementOffset); + } + break; - // Do the copy - encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, offset, size); + case slang::BindingType::ParameterBlock: + { + auto containerVarLayout = subObjectTypeLayout->getContainerVarLayout(); + SLANG_ASSERT(containerVarLayout); - // Wait until complete - submitResourceCommandsAndWait(encodeInfo); - } + auto elementVarLayout = subObjectTypeLayout->getElementVarLayout(); + SLANG_ASSERT(elementVarLayout); - D3D12Resource& stageBufRef = - buffer->getDesc()->memoryType != MemoryType::ReadBack ? stageBuf : resource; + auto elementTypeLayout = elementVarLayout->getTypeLayout(); + SLANG_ASSERT(elementTypeLayout); - // Map and copy - RefPtr blob = new ListBlob(); - { - UINT8* data; - D3D12_RANGE readRange = { 0, size }; + BindingRegisterOffsetPair subDescriptorSetOffset; + subDescriptorSetOffset.primary.spaceOffset = + subObjectRangeElementOffset.primary.spaceOffset; + subDescriptorSetOffset.pending.spaceOffset = + subObjectRangeElementOffset.pending.spaceOffset; - SLANG_RETURN_ON_FAIL(stageBufRef.getResource()->Map(0, &readRange, reinterpret_cast(&data))); + auto subPhysicalDescriptorSetIndex = addDescriptorSet(); - // Copy to memory buffer - blob->m_data.setCount(size); - ::memcpy(blob->m_data.getBuffer(), data, size); + BindingRegisterOffsetPair containerOffset = subDescriptorSetOffset; + containerOffset += BindingRegisterOffsetPair(containerVarLayout); - stageBufRef.getResource()->Unmap(0, nullptr); + BindingRegisterOffsetPair elementOffset = subDescriptorSetOffset; + elementOffset += BindingRegisterOffsetPair(elementVarLayout); + + addAsConstantBuffer( + elementTypeLayout, + subPhysicalDescriptorSetIndex, + containerOffset, + elementOffset); + } + break; + + case slang::BindingType::ExistentialValue: + { + // Any nested binding ranges in the sub-object will "leak" into the + // binding ranges for the surrounding context. + // + auto specializedTypeLayout = subObjectTypeLayout->getPendingDataTypeLayout(); + if (specializedTypeLayout) + { + BindingRegisterOffsetPair pendingOffset; + pendingOffset.primary = subObjectRangeElementOffset.pending; + + addAsValue( + specializedTypeLayout, + physicalDescriptorSetIndex, + pendingOffset, + pendingOffset); + } + } + break; + } } - returnComPtr(outBlob, blob); - return SLANG_OK; } -Result D3D12Device::createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram, ISlangBlob** outDiagnosticBlob) +D3D12_ROOT_SIGNATURE_DESC& RootShaderObjectLayoutImpl::RootSignatureDescBuilder::build() { - RefPtr shaderProgram = new ShaderProgramImpl(); - shaderProgram->init(desc); - ComPtr d3dDiagnosticBlob; - auto rootShaderLayoutResult = RootShaderObjectLayoutImpl::create( - this, - shaderProgram->linkedProgram, - shaderProgram->linkedProgram->getLayout(), - shaderProgram->m_rootObjectLayout.writeRef(), - d3dDiagnosticBlob.writeRef()); - if (!SLANG_SUCCEEDED(rootShaderLayoutResult)) + for (Index i = 0; i < m_descriptorSets.getCount(); i++) { - if (outDiagnosticBlob && d3dDiagnosticBlob) + auto& descriptorSet = m_descriptorSets[i]; + if (descriptorSet.m_resourceRanges.getCount()) { - RefPtr diagnosticBlob = - new StringBlob(String((const char*)d3dDiagnosticBlob->GetBufferPointer())); - returnComPtr(outDiagnosticBlob, diagnosticBlob); + D3D12_ROOT_PARAMETER rootParam = {}; + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParam.DescriptorTable.NumDescriptorRanges = + (UINT)descriptorSet.m_resourceRanges.getCount(); + rootParam.DescriptorTable.pDescriptorRanges = + descriptorSet.m_resourceRanges.getBuffer(); + m_rootParameters.add(rootParam); + } + if (descriptorSet.m_samplerRanges.getCount()) + { + D3D12_ROOT_PARAMETER rootParam = {}; + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParam.DescriptorTable.NumDescriptorRanges = + (UINT)descriptorSet.m_samplerRanges.getCount(); + rootParam.DescriptorTable.pDescriptorRanges = descriptorSet.m_samplerRanges.getBuffer(); + m_rootParameters.add(rootParam); } - return rootShaderLayoutResult; } - returnComPtr(outProgram, shaderProgram); - return SLANG_OK; -} -Result D3D12Device::createShaderObjectLayout( - slang::TypeLayoutReflection* typeLayout, - ShaderObjectLayoutBase** outLayout) -{ - RefPtr layout; - SLANG_RETURN_ON_FAIL( - ShaderObjectLayoutImpl::createForElementType( - this, typeLayout, layout.writeRef())); - returnRefPtrMove(outLayout, layout); - return SLANG_OK; -} + m_rootSignatureDesc.NumParameters = UINT(m_rootParameters.getCount()); + m_rootSignatureDesc.pParameters = m_rootParameters.getBuffer(); -Result D3D12Device::createShaderObject( - ShaderObjectLayoutBase* layout, - IShaderObject** outObject) -{ - RefPtr shaderObject; - SLANG_RETURN_ON_FAIL(ShaderObjectImpl::create( - this, reinterpret_cast(layout), - shaderObject.writeRef())); - returnComPtr(outObject, shaderObject); - return SLANG_OK; -} + // TODO: static samplers should be reasonably easy to support... + m_rootSignatureDesc.NumStaticSamplers = 0; + m_rootSignatureDesc.pStaticSamplers = nullptr; -Result D3D12Device::createMutableShaderObject( - ShaderObjectLayoutBase* layout, - IShaderObject** outObject) -{ - auto result = createShaderObject(layout, outObject); - SLANG_RETURN_ON_FAIL(result); - static_cast(*outObject)->m_isMutable = true; - return result; -} + // TODO: only set this flag if needed (requires creating root + // signature at same time as pipeline state...). + // + m_rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; -Result D3D12Device::createMutableRootShaderObject(IShaderProgram* program, IShaderObject** outObject) -{ - RefPtr result = new MutableRootShaderObjectImpl(); - result->init(this); - auto programImpl = static_cast(program); - result->resetImpl(this, programImpl->m_rootObjectLayout, m_cpuViewHeap.Ptr(), m_cpuSamplerHeap.Ptr(), true); - returnComPtr(outObject, result); - return SLANG_OK; + return m_rootSignatureDesc; } -Result D3D12Device::createShaderTable(const IShaderTable::Desc& desc, IShaderTable** outShaderTable) +Result RootShaderObjectLayoutImpl::createRootSignatureFromSlang( + DeviceImpl* device, + RootShaderObjectLayoutImpl* rootLayout, + slang::IComponentType* program, + ID3D12RootSignature** outRootSignature, + ID3DBlob** outError) { - RefPtr result = new ShaderTableImpl(); - result->m_device = this; - result->init(desc); - returnComPtr(outShaderTable, result); - return SLANG_OK; -} + // We are going to build up the root signature by adding + // binding/descritpor ranges and nested parameter blocks + // based on the computed layout information for `program`. + // + RootSignatureDescBuilder builder(device); + auto layout = program->getLayout(); -Result D3D12Device::createGraphicsPipelineState(const GraphicsPipelineStateDesc& desc, IPipelineState** outState) -{ - RefPtr pipelineStateImpl = new PipelineStateImpl(this); - pipelineStateImpl->init(desc); - returnComPtr(outState, pipelineStateImpl); - return SLANG_OK; -} + // The layout information computed by Slang breaks up shader + // parameters into what we can think of as "logical" descriptor + // sets based on whether or not parameters have the same `space`. + // + // We want to basically ignore that decomposition and generate a + // single descriptor set to hold all top-level parameters, and only + // generate distinct descriptor sets when the shader has opted in + // via explicit parameter blocks. + // + // To achieve this goal, we will manually allocate a default descriptor + // set for root parameters in our signature, and then recursively + // add all the binding/descriptor ranges implied by the global-scope + // parameters. + // + auto rootDescriptorSetIndex = builder.addDescriptorSet(); + builder.addAsValue(layout->getGlobalParamsVarLayout(), rootDescriptorSetIndex); -Result D3D12Device::createComputePipelineState(const ComputePipelineStateDesc& desc, IPipelineState** outState) -{ - RefPtr pipelineStateImpl = new PipelineStateImpl(this); - pipelineStateImpl->init(desc); - returnComPtr(outState, pipelineStateImpl); + for (SlangUInt i = 0; i < layout->getEntryPointCount(); i++) + { + // Entry-point parameters should also be added to the default root + // descriptor set. + // + // We add the parameters using the "variable layout" for the entry point + // and not just its type layout, to ensure that any offset information is + // applied correctly to the `register` and `space` information for entry-point + // parameters. + // + // Note: When we start to support DXR we will need to handle entry-point parameters + // differently because they will need to map to local root signatures rather than + // being included in the global root signature as is being done here. + // + auto entryPoint = layout->getEntryPointByIndex(i); + builder.addAsValue(entryPoint->getVarLayout(), rootDescriptorSetIndex); + } + + auto& rootSignatureDesc = builder.build(); + + ComPtr signature; + ComPtr error; + if (SLANG_FAILED(device->m_D3D12SerializeRootSignature( + &rootSignatureDesc, + D3D_ROOT_SIGNATURE_VERSION_1, + signature.writeRef(), + error.writeRef()))) + { + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "error: D3D12SerializeRootSignature failed"); + if (error) + { + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Driver, + (const char*)error->GetBufferPointer()); + if (outError) + returnComPtr(outError, error); + } + return SLANG_FAIL; + } + + SLANG_RETURN_ON_FAIL(device->m_device->CreateRootSignature( + 0, + signature->GetBufferPointer(), + signature->GetBufferSize(), + IID_PPV_ARGS(outRootSignature))); return SLANG_OK; } -Result D3D12Device::QueryPoolImpl::init(const IQueryPool::Desc& desc, D3D12Device* device) +Result RootShaderObjectLayoutImpl::create( + DeviceImpl* device, + slang::IComponentType* program, + slang::ProgramLayout* programLayout, + RootShaderObjectLayoutImpl** outLayout, + ID3DBlob** outError) { - m_desc = desc; + RootShaderObjectLayoutImpl::Builder builder(device, program, programLayout); + builder.addGlobalParams(programLayout->getGlobalParamsVarLayout()); - // Translate query type. - D3D12_QUERY_HEAP_DESC heapDesc = {}; - heapDesc.Count = (UINT)desc.count; - heapDesc.NodeMask = 1; - switch (desc.type) + SlangInt entryPointCount = programLayout->getEntryPointCount(); + for (SlangInt e = 0; e < entryPointCount; ++e) { - case QueryType::Timestamp: - heapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; - m_queryType = D3D12_QUERY_TYPE_TIMESTAMP; - break; - default: - return SLANG_E_INVALID_ARG; + auto slangEntryPoint = programLayout->getEntryPointByIndex(e); + RefPtr entryPointLayout; + SLANG_RETURN_ON_FAIL(ShaderObjectLayoutImpl::createForElementType( + device, slangEntryPoint->getTypeLayout(), entryPointLayout.writeRef())); + builder.addEntryPoint(slangEntryPoint->getStage(), entryPointLayout); } - // Create query heap. - auto d3dDevice = device->m_device; - SLANG_RETURN_ON_FAIL(d3dDevice->CreateQueryHeap( - &heapDesc, IID_PPV_ARGS(m_queryHeap.writeRef()))); - - // Create readback buffer. - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; - D3D12_RESOURCE_DESC resourceDesc = {}; - _initBufferResourceDesc(sizeof(uint64_t) * desc.count, resourceDesc); - SLANG_RETURN_ON_FAIL(m_readBackBuffer.initCommitted( - d3dDevice, - heapProps, - D3D12_HEAP_FLAG_NONE, - resourceDesc, - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr)); + RefPtr layout; + SLANG_RETURN_ON_FAIL(builder.build(layout.writeRef())); - // Create command allocator. - SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator( - D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef()))); + if (program->getSpecializationParamCount() == 0) + { + // For root object, we would like know the union of all binding slots + // including all sub-objects in the shader-object hierarchy, so at + // parameter binding time we can easily know how many GPU descriptor tables + // to create without walking through the shader-object hierarchy again. + // We build out this array along with root signature construction and store + // it in `m_gpuDescriptorSetInfos`. + SLANG_RETURN_ON_FAIL(createRootSignatureFromSlang( + device, layout, program, layout->m_rootSignature.writeRef(), outError)); + } - // Create command list. - SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandList( - 0, - D3D12_COMMAND_LIST_TYPE_DIRECT, - m_commandAllocator, - nullptr, - IID_PPV_ARGS(m_commandList.writeRef()))); - m_commandList->Close(); + *outLayout = layout.detach(); - // Create fence. - SLANG_RETURN_ON_FAIL(d3dDevice->CreateFence( - 0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); + return SLANG_OK; +} - // Get command queue from device. - m_commandQueue = device->m_resourceCommandQueue->m_d3dQueue; +Result RootShaderObjectLayoutImpl::init(Builder* builder) +{ + auto renderer = builder->m_renderer; - // Create wait event. - m_waitEvent = CreateEventEx( - nullptr, - false, - 0, - EVENT_ALL_ACCESS); + SLANG_RETURN_ON_FAIL(Super::init(builder)); + m_program = builder->m_program; + m_programLayout = builder->m_programLayout; + m_entryPoints = builder->m_entryPoints; return SLANG_OK; } -Result D3D12Device::PlainBufferProxyQueryPoolImpl::init( - const IQueryPool::Desc& desc, - D3D12Device* device, - uint32_t stride) +Result ShaderProgramImpl::createShaderModule( + slang::EntryPointReflection* entryPointInfo, ComPtr kernelCode) { - ComPtr bufferResource; - IBufferResource::Desc bufferDesc = {}; - bufferDesc.defaultState = ResourceState::CopySource; - bufferDesc.elementSize = 0; - bufferDesc.type = IResource::Type::Buffer; - bufferDesc.sizeInBytes = desc.count * stride; - bufferDesc.format = Format::Unknown; - bufferDesc.allowedStates.add(ResourceState::UnorderedAccess); - SLANG_RETURN_ON_FAIL( - device->createBufferResource(bufferDesc, nullptr, bufferResource.writeRef())); - m_bufferResource = static_cast(bufferResource.get()); - m_queryType = desc.type; - m_device = device; - m_stride = stride; - m_count = (uint32_t)desc.count; - m_desc = desc; + ShaderBinary shaderBin; + shaderBin.stage = entryPointInfo->getStage(); + shaderBin.entryPointInfo = entryPointInfo; + shaderBin.code.addRange( + reinterpret_cast(kernelCode->getBufferPointer()), + (Index)kernelCode->getBufferSize()); + m_shaders.add(_Move(shaderBin)); return SLANG_OK; } -Result D3D12Device::createQueryPool(const IQueryPool::Desc& desc, IQueryPool** outState) +Result ShaderObjectImpl::create( + DeviceImpl* device, ShaderObjectLayoutImpl* layout, ShaderObjectImpl** outShaderObject) { - switch (desc.type) - { - case QueryType::AccelerationStructureCompactedSize: - case QueryType::AccelerationStructureSerializedSize: - case QueryType::AccelerationStructureCurrentSize: - { - RefPtr queryPoolImpl = - new PlainBufferProxyQueryPoolImpl(); - uint32_t stride = 8; - if (desc.type == QueryType::AccelerationStructureSerializedSize) - stride = 16; - SLANG_RETURN_ON_FAIL(queryPoolImpl->init(desc, this, stride)); - returnComPtr(outState, queryPoolImpl); - return SLANG_OK; - } - default: - { - RefPtr queryPoolImpl = new QueryPoolImpl(); - SLANG_RETURN_ON_FAIL(queryPoolImpl->init(desc, this)); - returnComPtr(outState, queryPoolImpl); - return SLANG_OK; - } - } + auto object = RefPtr(new ShaderObjectImpl()); + SLANG_RETURN_ON_FAIL( + object->init(device, layout, device->m_cpuViewHeap.Ptr(), device->m_cpuSamplerHeap.Ptr())); + returnRefPtrMove(outShaderObject, object); + return SLANG_OK; } -Result D3D12Device::createFence(const IFence::Desc& desc, IFence** outFence) +ShaderObjectImpl::~ShaderObjectImpl() { m_descriptorSet.freeIfSupported(); } + +RootShaderObjectLayoutImpl* RootShaderObjectImpl::getLayout() { - RefPtr fence = new FenceImpl(); - SLANG_RETURN_ON_FAIL(fence->init(this, desc)); - returnComPtr(outFence, fence); - return SLANG_OK; + return static_cast(m_layout.Ptr()); } -Result D3D12Device::waitForFences( - uint32_t fenceCount, IFence** fences, uint64_t* fenceValues, bool waitForAll, uint64_t timeout) -{ - ShortList waitHandles; - for (uint32_t i = 0; i < fenceCount; ++i) - { - auto fenceImpl = static_cast(fences[i]); - waitHandles.add(fenceImpl->getWaitEvent()); - SLANG_RETURN_ON_FAIL(fenceImpl->m_fence->SetEventOnCompletion(fenceValues[i], fenceImpl->getWaitEvent())); - } - auto result = WaitForMultipleObjects( - fenceCount, - waitHandles.getArrayView().getBuffer(), - waitForAll ? TRUE : FALSE, - timeout == kTimeoutInfinite ? INFINITE : (DWORD)(timeout / 1000000)); - if (result == WAIT_TIMEOUT) - return SLANG_E_TIME_OUT; - return result == WAIT_FAILED ? SLANG_FAIL : SLANG_OK; +UInt RootShaderObjectImpl::getEntryPointCount() { return (UInt)m_entryPoints.getCount(); } + +SlangResult RootShaderObjectImpl::getEntryPoint(UInt index, IShaderObject** outEntryPoint) +{ + returnComPtr(outEntryPoint, m_entryPoints[index]); + return SLANG_OK; } -#if SLANG_GFX_HAS_DXR_SUPPORT +Result RootShaderObjectImpl::collectSpecializationArgs(ExtendedShaderObjectTypeList& args) +{ + SLANG_RETURN_ON_FAIL(ShaderObjectImpl::collectSpecializationArgs(args)); + for (auto& entryPoint : m_entryPoints) + { + SLANG_RETURN_ON_FAIL(entryPoint->collectSpecializationArgs(args)); + } + return SLANG_OK; +} -class D3D12AccelerationStructureImpl - : public AccelerationStructureBase - , public D3D12Device::ResourceViewInternalImpl +Result RootShaderObjectImpl::_createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) { -public: - RefPtr m_buffer; - uint64_t m_offset; - uint64_t m_size; - ComPtr m_device5; + ExtendedShaderObjectTypeList specializationArgs; + SLANG_RETURN_ON_FAIL(collectSpecializationArgs(specializationArgs)); + + // Note: There is an important policy decision being made here that we need + // to approach carefully. + // + // We are doing two different things that affect the layout of a program: + // + // 1. We are *composing* one or more pieces of code (notably the shared global/module + // stuff and the per-entry-point stuff). + // + // 2. We are *specializing* code that includes generic/existential parameters + // to concrete types/values. + // + // We need to decide the relative *order* of these two steps, because of how it impacts + // layout. The layout for `specialize(compose(A,B), X, Y)` is potentially different + // form that of `compose(specialize(A,X), speciealize(B,Y))`, even when both are + // semantically equivalent programs. + // + // Right now we are using the first option: we are first generating a full composition + // of all the code we plan to use (global scope plus all entry points), and then + // specializing it to the concatenated specialization argumenst for all of that. + // + // In some cases, though, this model isn't appropriate. For example, when dealing with + // ray-tracing shaders and local root signatures, we really want the parameters of each + // entry point (actually, each entry-point *group*) to be allocated distinct storage, + // which really means we want to compute something like: + // + // SpecializedGlobals = specialize(compose(ModuleA, ModuleB, ...), X, Y, ...) + // + // SpecializedEP1 = compose(SpecializedGlobals, specialize(EntryPoint1, T, U, ...)) + // SpecializedEP2 = compose(SpecializedGlobals, specialize(EntryPoint2, A, B, ...)) + // + // Note how in this case all entry points agree on the layout for the shared/common + // parmaeters, but their layouts are also independent of one another. + // + // Furthermore, in this example, loading another entry point into the system would not + // rquire re-computing the layouts (or generated kernel code) for any of the entry + // points that had already been loaded (in contrast to a compose-then-specialize + // approach). + // + ComPtr specializedComponentType; + ComPtr diagnosticBlob; + auto result = getLayout()->getSlangProgram()->specialize( + specializationArgs.components.getArrayView().getBuffer(), + specializationArgs.getCount(), + specializedComponentType.writeRef(), + diagnosticBlob.writeRef()); -public: - virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() override + if (diagnosticBlob && diagnosticBlob->getBufferSize()) { - return m_buffer->getDeviceAddress() + m_offset; + getDebugCallback()->handleMessage( + SLANG_FAILED(result) ? DebugMessageType::Error : DebugMessageType::Info, + DebugMessageSource::Layer, + (const char*)diagnosticBlob->getBufferPointer()); } - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override + if (SLANG_FAILED(result)) + return result; + + ComPtr d3dDiagnosticBlob; + auto slangSpecializedLayout = specializedComponentType->getLayout(); + RefPtr specializedLayout; + auto rootLayoutResult = RootShaderObjectLayoutImpl::create( + static_cast(getRenderer()), + specializedComponentType, + slangSpecializedLayout, + specializedLayout.writeRef(), + d3dDiagnosticBlob.writeRef()); + + if (SLANG_FAILED(rootLayoutResult)) { - outHandle->api = InteropHandleAPI::DeviceAddress; - outHandle->handleValue = getDeviceAddress(); - return SLANG_OK; + return rootLayoutResult; } -}; - -Result D3D12Device::getAccelerationStructurePrebuildInfo( - const IAccelerationStructure::BuildInputs& buildInputs, - IAccelerationStructure::PrebuildInfo* outPrebuildInfo) -{ - if (!m_device5) - return SLANG_E_NOT_AVAILABLE; - - D3DAccelerationStructureInputsBuilder inputsBuilder; - SLANG_RETURN_ON_FAIL(inputsBuilder.build(buildInputs, getDebugCallback())); - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo; - m_device5->GetRaytracingAccelerationStructurePrebuildInfo(&inputsBuilder.desc, &prebuildInfo); + // Note: Computing the layout for the specialized program will have also computed + // the layouts for the entry points, and we really need to attach that information + // to them so that they don't go and try to compute their own specializations. + // + // TODO: Well, if we move to the specialization model described above then maybe + // we *will* want entry points to do their own specialization work... + // + auto entryPointCount = m_entryPoints.getCount(); + for (Index i = 0; i < entryPointCount; ++i) + { + auto entryPointInfo = specializedLayout->getEntryPoint(i); + auto entryPointVars = m_entryPoints[i]; - outPrebuildInfo->resultDataMaxSize = prebuildInfo.ResultDataMaxSizeInBytes; - outPrebuildInfo->scratchDataSize = prebuildInfo.ScratchDataSizeInBytes; - outPrebuildInfo->updateScratchDataSize = prebuildInfo.UpdateScratchDataSizeInBytes; - return SLANG_OK; -} + entryPointVars->m_specializedLayout = entryPointInfo.layout; + } -Result D3D12Device::createAccelerationStructure( - const IAccelerationStructure::CreateDesc& desc, - IAccelerationStructure** outAS) -{ - RefPtr result = new D3D12AccelerationStructureImpl(); - result->m_device5 = m_device5; - result->m_buffer = static_cast(desc.buffer); - result->m_size = desc.size; - result->m_offset = desc.offset; - result->m_allocator = m_cpuViewHeap; - result->m_desc.type = IResourceView::Type::AccelerationStructure; - SLANG_RETURN_ON_FAIL(m_cpuViewHeap->allocate(&result->m_descriptor)); - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc; - srvDesc.Format = DXGI_FORMAT_UNKNOWN; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srvDesc.RaytracingAccelerationStructure.Location = - result->m_buffer->getDeviceAddress()+ desc.offset; - m_device->CreateShaderResourceView(nullptr, &srvDesc, result->m_descriptor.cpuHandle); - returnComPtr(outAS, result); + returnRefPtrMove(outLayout, specializedLayout); return SLANG_OK; } -void translatePostBuildInfoDescs( - int propertyQueryCount, - AccelerationStructureQueryDesc* queryDescs, - List& postBuildInfoDescs) +Result RootShaderObjectImpl::copyFrom(IShaderObject* object, ITransientResourceHeap* transientHeap) { - postBuildInfoDescs.setCount(propertyQueryCount); - for (int i = 0; i < propertyQueryCount; i++) + if (auto srcObj = dynamic_cast(object)) { - switch (queryDescs[i].queryType) - { - case QueryType::AccelerationStructureCompactedSize: - postBuildInfoDescs[i].InfoType = - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE; - postBuildInfoDescs[i].DestBuffer = - static_cast(queryDescs[i].queryPool) - ->m_bufferResource->getDeviceAddress() + - sizeof(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE_DESC) * - queryDescs[i].firstQueryIndex; - break; - case QueryType::AccelerationStructureCurrentSize: - postBuildInfoDescs[i].InfoType = - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_CURRENT_SIZE; - postBuildInfoDescs[i].DestBuffer = - static_cast(queryDescs[i].queryPool) - ->m_bufferResource->getDeviceAddress() + - sizeof(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE_DESC) * - queryDescs[i].firstQueryIndex; - break; - case QueryType::AccelerationStructureSerializedSize: - postBuildInfoDescs[i].InfoType = - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION; - postBuildInfoDescs[i].DestBuffer = - static_cast(queryDescs[i].queryPool) - ->m_bufferResource->getDeviceAddress() + - sizeof(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION_DESC) * - queryDescs[i].firstQueryIndex; - break; - } + *this = *srcObj; + return SLANG_OK; } + return SLANG_FAIL; } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::buildAccelerationStructure( - const IAccelerationStructure::BuildDesc& desc, - int propertyQueryCount, - AccelerationStructureQueryDesc* queryDescs) +Result RootShaderObjectImpl::bindAsRoot( + BindingContext* context, RootShaderObjectLayoutImpl* specializedLayout) { - if (!m_commandBuffer->m_cmdList4) + // Pull updates from sub-objects when this is a mutable root shader object. + updateSubObjectsRecursive(); + + // A root shader object always binds as if it were a parameter block, + // insofar as it needs to allocate a descriptor set to hold the bindings + // for its own state and any sub-objects. + // + // Note: We do not direclty use `bindAsParameterBlock` here because we also + // need to bind the entry points into the same descriptor set that is + // being used for the root object. + + BindingOffset rootOffset; + + // Bind all root parameters first. + Super::bindRootArguments(context, rootOffset.rootParam); + + DescriptorSet descriptorSet; + SLANG_RETURN_ON_FAIL(prepareToBindAsParameterBlock( + context, /* inout */ rootOffset, specializedLayout, descriptorSet)); + + SLANG_RETURN_ON_FAIL( + Super::bindAsConstantBuffer(context, descriptorSet, rootOffset, specializedLayout)); + + auto entryPointCount = m_entryPoints.getCount(); + for (Index i = 0; i < entryPointCount; ++i) { - getDebugCallback()->handleMessage( - DebugMessageType::Error, - DebugMessageSource::Layer, - "Ray-tracing is not supported on current system."); - return; + auto entryPoint = m_entryPoints[i]; + auto& entryPointInfo = specializedLayout->getEntryPoint(i); + + auto entryPointOffset = rootOffset; + entryPointOffset += entryPointInfo.offset; + + entryPoint->updateSubObjectsRecursive(); + + SLANG_RETURN_ON_FAIL(entryPoint->bindAsConstantBuffer( + context, descriptorSet, entryPointOffset, entryPointInfo.layout)); } - D3D12AccelerationStructureImpl* destASImpl = nullptr; - if (desc.dest) - destASImpl = static_cast(desc.dest); - D3D12AccelerationStructureImpl* srcASImpl = nullptr; - if (desc.source) - srcASImpl = static_cast(desc.source); - - D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC buildDesc = {}; - buildDesc.DestAccelerationStructureData = destASImpl->getDeviceAddress(); - buildDesc.SourceAccelerationStructureData = srcASImpl?srcASImpl->getDeviceAddress() : 0; - buildDesc.ScratchAccelerationStructureData = desc.scratchData; - D3DAccelerationStructureInputsBuilder builder; - builder.build(desc.inputs, getDebugCallback()); - buildDesc.Inputs = builder.desc; - List postBuildInfoDescs; - translatePostBuildInfoDescs(propertyQueryCount, queryDescs, postBuildInfoDescs); - m_commandBuffer->m_cmdList4->BuildRaytracingAccelerationStructure( - &buildDesc, (UINT)propertyQueryCount, postBuildInfoDescs.getBuffer()); + return SLANG_OK; } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::copyAccelerationStructure( - IAccelerationStructure* dest, - IAccelerationStructure* src, - AccelerationStructureCopyMode mode) +Result RootShaderObjectImpl::resetImpl( + DeviceImpl* device, + RootShaderObjectLayoutImpl* layout, + DescriptorHeapReference viewHeap, + DescriptorHeapReference samplerHeap, + bool isMutable) { - auto destASImpl = static_cast(dest); - auto srcASImpl = static_cast(src); - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE copyMode; - switch (mode) - { - case AccelerationStructureCopyMode::Clone: - copyMode = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_CLONE; - break; - case AccelerationStructureCopyMode::Compact: - copyMode = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_COMPACT; - break; - default: - getDebugCallback()->handleMessage( - DebugMessageType::Error, - DebugMessageSource::Layer, - "Unsupported AccelerationStructureCopyMode."); - return; + SLANG_RETURN_ON_FAIL(Super::init(device, layout, viewHeap, samplerHeap)); + m_isMutable = isMutable; + m_specializedLayout = nullptr; + m_entryPoints.clear(); + for (auto entryPointInfo : layout->getEntryPoints()) + { + RefPtr entryPoint; + SLANG_RETURN_ON_FAIL( + ShaderObjectImpl::create(device, entryPointInfo.layout, entryPoint.writeRef())); + entryPoint->m_isMutable = isMutable; + m_entryPoints.add(entryPoint); } - m_commandBuffer->m_cmdList4->CopyRaytracingAccelerationStructure( - destASImpl->getDeviceAddress(), srcASImpl->getDeviceAddress(), copyMode); + return SLANG_OK; } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl:: - queryAccelerationStructureProperties( - int accelerationStructureCount, - IAccelerationStructure* const* accelerationStructures, - int queryCount, - AccelerationStructureQueryDesc* queryDescs) +Result RootShaderObjectImpl::reset( + DeviceImpl* device, RootShaderObjectLayoutImpl* layout, TransientResourceHeapImpl* heap) { - List postBuildInfoDescs; - List asAddresses; - asAddresses.setCount(accelerationStructureCount); - for (int i = 0; i < accelerationStructureCount; i++) - asAddresses[i] = accelerationStructures[i]->getDeviceAddress(); - translatePostBuildInfoDescs(queryCount, queryDescs, postBuildInfoDescs); - m_commandBuffer->m_cmdList4->EmitRaytracingAccelerationStructurePostbuildInfo( - postBuildInfoDescs.getBuffer(), - (UINT)accelerationStructureCount, - asAddresses.getBuffer()); + return resetImpl( + device, layout, &heap->m_stagingCpuViewHeap, &heap->m_stagingCpuSamplerHeap, false); } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::serializeAccelerationStructure( - DeviceAddress dest, - IAccelerationStructure* src) +RefPtr ShaderTableImpl::createDeviceBuffer( + PipelineStateBase* pipeline, + TransientResourceHeapBase* transientHeap, + IResourceCommandEncoder* encoder) { - auto srcASImpl = static_cast(src); - m_commandBuffer->m_cmdList4->CopyRaytracingAccelerationStructure( - dest, - srcASImpl->getDeviceAddress(), - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_SERIALIZE); + uint32_t raygenTableSize = m_rayGenShaderCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + uint32_t missTableSize = m_missShaderCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + uint32_t hitgroupTableSize = m_hitGroupCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + m_rayGenTableOffset = 0; + m_missTableOffset = (uint32_t)D3DUtil::calcAligned( + raygenTableSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); + m_hitGroupTableOffset = (uint32_t)D3DUtil::calcAligned( + m_missTableOffset + missTableSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); + uint32_t tableSize = m_hitGroupTableOffset + hitgroupTableSize; + + auto pipelineImpl = static_cast(pipeline); + ComPtr bufferResource; + IBufferResource::Desc bufferDesc = {}; + bufferDesc.memoryType = gfx::MemoryType::DeviceLocal; + bufferDesc.defaultState = ResourceState::General; + bufferDesc.type = IResource::Type::Buffer; + bufferDesc.sizeInBytes = tableSize; + m_device->createBufferResource(bufferDesc, nullptr, bufferResource.writeRef()); + + ComPtr stateObjectProperties; + pipelineImpl->m_stateObject->QueryInterface(stateObjectProperties.writeRef()); + + TransientResourceHeapImpl* transientHeapImpl = + static_cast(transientHeap); + + IBufferResource* stagingBuffer = nullptr; + size_t stagingBufferOffset = 0; + transientHeapImpl->allocateStagingBuffer( + tableSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload); + + assert(stagingBuffer); + void* stagingPtr = nullptr; + stagingBuffer->map(nullptr, &stagingPtr); + + auto copyShaderIdInto = [&](void* dest, String& name, const ShaderRecordOverwrite& overwrite) + { + if (name.getLength()) + { + void* shaderId = stateObjectProperties->GetShaderIdentifier(name.toWString().begin()); + memcpy(dest, shaderId, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + } + else + { + memset(dest, 0, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + } + if (overwrite.size) + { + memcpy((uint8_t*)dest + overwrite.offset, overwrite.data, overwrite.size); + } + }; + + uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr + stagingBufferOffset; + for (uint32_t i = 0; i < m_rayGenShaderCount; i++) + { + copyShaderIdInto( + stagingBufferPtr + m_rayGenTableOffset + D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, + m_shaderGroupNames[i], + m_recordOverwrites[i]); + } + for (uint32_t i = 0; i < m_missShaderCount; i++) + { + copyShaderIdInto( + stagingBufferPtr + m_missTableOffset + D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, + m_shaderGroupNames[m_rayGenShaderCount + i], + m_recordOverwrites[m_rayGenShaderCount + i]); + } + for (uint32_t i = 0; i < m_hitGroupCount; i++) + { + copyShaderIdInto( + stagingBufferPtr + m_hitGroupTableOffset + D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES * i, + m_shaderGroupNames[m_rayGenShaderCount + m_missShaderCount + i], + m_recordOverwrites[m_rayGenShaderCount + m_missShaderCount + i]); + } + + stagingBuffer->unmap(nullptr); + encoder->copyBuffer(bufferResource, 0, stagingBuffer, stagingBufferOffset, tableSize); + encoder->bufferBarrier( + 1, + bufferResource.readRef(), + gfx::ResourceState::CopyDestination, + gfx::ResourceState::ShaderResource); + RefPtr resultPtr = static_cast(bufferResource.get()); + return _Move(resultPtr); } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::deserializeAccelerationStructure( - IAccelerationStructure* dest, - DeviceAddress source) +// There are a pair of cyclic references between a `TransientResourceHeap` and +// a `CommandBuffer` created from the heap. We need to break the cycle upon +// the public reference count of a command buffer dropping to 0. + +ICommandBuffer* CommandBufferImpl::getInterface(const Guid& guid) { - auto destASImpl = static_cast(dest); - m_commandBuffer->m_cmdList4->CopyRaytracingAccelerationStructure( - dest->getDeviceAddress(), - source, - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_DESERIALIZE); + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer) + return static_cast(this); + return nullptr; } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::bindPipeline( - IPipelineState* state, IShaderObject** outRootObject) +Result CommandBufferImpl::getNativeHandle(InteropHandle* handle) { - bindPipelineImpl(state, outRootObject); + handle->api = InteropHandleAPI::D3D12; + handle->handleValue = (uint64_t)m_cmdList.get(); + return SLANG_OK; } -void D3D12Device::CommandBufferImpl::RayTracingCommandEncoderImpl::dispatchRays( - uint32_t rayGenShaderIndex, - IShaderTable* shaderTable, - int32_t width, - int32_t height, - int32_t depth) +void CommandBufferImpl::bindDescriptorHeaps() { - RefPtr newPipeline; - PipelineStateBase* pipeline = m_currentPipeline.Ptr(); + if (!m_descriptorHeapsBound) { - struct RayTracingSubmitter : public ComputeSubmitter - { - ID3D12GraphicsCommandList4* m_cmdList4; - RayTracingSubmitter(ID3D12GraphicsCommandList4* cmdList4) - : ComputeSubmitter(cmdList4), m_cmdList4(cmdList4) - { - } - virtual void setPipelineState(PipelineStateBase* pipeline) override - { - auto pipelineImpl = static_cast(pipeline); - m_cmdList4->SetPipelineState1(pipelineImpl->m_stateObject.get()); - } + ID3D12DescriptorHeap* heaps[] = { + m_transientHeap->getCurrentViewHeap().getHeap(), + m_transientHeap->getCurrentSamplerHeap().getHeap(), }; - RayTracingSubmitter submitter(m_commandBuffer->m_cmdList4); - if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) - { - assert(!"Failed to bind render state"); - } - if (newPipeline) - pipeline = newPipeline.Ptr(); + m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); + m_descriptorHeapsBound = true; } - auto pipelineImpl = static_cast(pipeline); +} - auto shaderTableImpl = static_cast(shaderTable); +void CommandBufferImpl::reinit() +{ + invalidateDescriptorHeapBinding(); + m_rootShaderObject.init(m_renderer); +} - ResourceCommandEncoderImpl resourceCopyEncoder; - resourceCopyEncoder.init(m_commandBuffer); - auto shaderTableBuffer = shaderTableImpl->getOrCreateBuffer(pipelineImpl, m_transientHeap, &resourceCopyEncoder); - auto shaderTableAddr = shaderTableBuffer->getDeviceAddress(); +void CommandBufferImpl::init( + DeviceImpl* renderer, + ID3D12GraphicsCommandList* d3dCommandList, + TransientResourceHeapImpl* transientHeap) +{ + m_transientHeap = transientHeap; + m_renderer = renderer; + m_cmdList = d3dCommandList; - D3D12_DISPATCH_RAYS_DESC dispatchDesc = {}; + reinit(); - dispatchDesc.RayGenerationShaderRecord.StartAddress = - shaderTableAddr + shaderTableImpl->m_rayGenTableOffset + - rayGenShaderIndex * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - dispatchDesc.RayGenerationShaderRecord.SizeInBytes = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; +#if SLANG_GFX_HAS_DXR_SUPPORT + m_cmdList->QueryInterface(m_cmdList4.writeRef()); + if (m_cmdList4) + { + m_cmdList1 = m_cmdList4; + return; + } +#endif + m_cmdList->QueryInterface(m_cmdList1.writeRef()); +} - dispatchDesc.MissShaderTable.StartAddress = - shaderTableAddr + shaderTableImpl->m_missTableOffset; - dispatchDesc.MissShaderTable.SizeInBytes = - shaderTableImpl->m_missShaderCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - dispatchDesc.MissShaderTable.StrideInBytes = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; +void CommandBufferImpl::encodeResourceCommands(IResourceCommandEncoder** outEncoder) +{ + m_resourceCommandEncoder.init(this); + *outEncoder = &m_resourceCommandEncoder; +} - dispatchDesc.HitGroupTable.StartAddress = - shaderTableAddr + shaderTableImpl->m_hitGroupTableOffset; - dispatchDesc.HitGroupTable.SizeInBytes = - shaderTableImpl->m_hitGroupCount * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; - dispatchDesc.HitGroupTable.StrideInBytes = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; +void CommandBufferImpl::encodeRenderCommands( + IRenderPassLayout* renderPass, IFramebuffer* framebuffer, IRenderCommandEncoder** outEncoder) +{ + m_renderCommandEncoder.init( + m_renderer, + m_transientHeap, + this, + static_cast(renderPass), + static_cast(framebuffer)); + *outEncoder = &m_renderCommandEncoder; +} + +void CommandBufferImpl::encodeComputeCommands(IComputeCommandEncoder** outEncoder) +{ + m_computeCommandEncoder.init(m_renderer, m_transientHeap, this); + *outEncoder = &m_computeCommandEncoder; +} - dispatchDesc.Width = (UINT)width; - dispatchDesc.Height = (UINT)height; - dispatchDesc.Depth = (UINT)depth; - m_commandBuffer->m_cmdList4->DispatchRays(&dispatchDesc); +void CommandBufferImpl::encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) +{ +#if SLANG_GFX_HAS_DXR_SUPPORT + m_rayTracingCommandEncoder.init(this); + *outEncoder = &m_rayTracingCommandEncoder; +#else + *outEncoder = nullptr; +#endif } -Result D3D12Device::RayTracingPipelineStateImpl::ensureAPIPipelineStateCreated() +void CommandBufferImpl::close() { m_cmdList->Close(); } + +void ResourceCommandEncoderImpl::copyBuffer( + IBufferResource* dst, size_t dstOffset, IBufferResource* src, size_t srcOffset, size_t size) { - if (m_stateObject) - return SLANG_OK; + auto dstBuffer = static_cast(dst); + auto srcBuffer = static_cast(src); + + m_commandBuffer->m_cmdList->CopyBufferRegion( + dstBuffer->m_resource.getResource(), + dstOffset, + srcBuffer->m_resource.getResource(), + srcOffset, + size); +} - auto program = static_cast(m_program.Ptr()); - auto slangGlobalScope = program->linkedProgram; - auto programLayout = slangGlobalScope->getLayout(); +void ResourceCommandEncoderImpl::uploadBufferData( + IBufferResource* dst, size_t offset, size_t size, void* data) +{ + uploadBufferDataImpl( + m_commandBuffer->m_renderer->m_device, + m_commandBuffer->m_cmdList, + m_commandBuffer->m_transientHeap, + static_cast(dst), + offset, + size, + data); +} - List subObjects; - ChunkedList dxilLibraries; - ChunkedList hitGroups; - ChunkedList> codeBlobs; - ChunkedList exports; - ChunkedList strPtrs; +void ResourceCommandEncoderImpl::textureBarrier( + size_t count, ITextureResource* const* textures, ResourceState src, ResourceState dst) +{ + ShortList barriers; - ComPtr diagnostics; - ChunkedList stringPool; - auto getWStr = [&](const char* name) - { - String str = String(name); - auto wstr = str.toWString(); - return stringPool.add(wstr)->begin(); - }; - auto compileShader = [&](slang::EntryPointLayout* entryPointInfo, - slang::IComponentType* component, - SlangInt entryPointIndex) + for (size_t i = 0; i < count; i++) { - ComPtr codeBlob; - auto compileResult = component->getEntryPointCode( - entryPointIndex, 0, codeBlob.writeRef(), diagnostics.writeRef()); - if (diagnostics.get()) + auto textureImpl = static_cast(textures[i]); + auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format); + auto textureDesc = textureImpl->getDesc(); + D3D12_RESOURCE_BARRIER barrier; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + if (src == dst && src == ResourceState::UnorderedAccess) { - getDebugCallback()->handleMessage( - compileResult == SLANG_OK ? DebugMessageType::Warning : DebugMessageType::Error, - DebugMessageSource::Slang, - (char*)diagnostics->getBufferPointer()); + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.UAV.pResource = textureImpl->m_resource.getResource(); } - SLANG_RETURN_ON_FAIL(compileResult); - codeBlobs.add(codeBlob); - D3D12_DXIL_LIBRARY_DESC library = {}; - library.DXILLibrary.BytecodeLength = codeBlob->getBufferSize(); - library.DXILLibrary.pShaderBytecode = codeBlob->getBufferPointer(); - library.NumExports = 1; - D3D12_EXPORT_DESC exportDesc = {}; - exportDesc.Name = getWStr(entryPointInfo->getNameOverride()); - exportDesc.ExportToRename = getWStr(entryPointInfo->getNameOverride()); - exportDesc.Flags = D3D12_EXPORT_FLAG_NONE; - library.pExports = exports.add(exportDesc); + else + { + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3DUtil::getResourceState(src); + barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); + if (barrier.Transition.StateBefore == barrier.Transition.StateAfter) + continue; + barrier.Transition.pResource = textureImpl->m_resource.getResource(); + auto planeCount = + D3DUtil::getPlaneSliceCount(D3DUtil::getMapFormat(textureImpl->getDesc()->format)); + auto arraySize = textureDesc->arraySize; + if (arraySize == 0) + arraySize = 1; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + } + barriers.add(barrier); + } + if (barriers.getCount()) + { + m_commandBuffer->m_cmdList->ResourceBarrier( + (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); + } +} - D3D12_STATE_SUBOBJECT dxilSubObject = {}; - dxilSubObject.Type = D3D12_STATE_SUBOBJECT_TYPE_DXIL_LIBRARY; - dxilSubObject.pDesc = dxilLibraries.add(library); - subObjects.add(dxilSubObject); - return SLANG_OK; - }; - if (program->linkedEntryPoints.getCount() == 0) +void ResourceCommandEncoderImpl::bufferBarrier( + size_t count, IBufferResource* const* buffers, ResourceState src, ResourceState dst) +{ + + List barriers; + barriers.reserve(count); + + for (size_t i = 0; i < count; i++) { - for (SlangUInt i = 0; i < programLayout->getEntryPointCount(); i++) + auto bufferImpl = static_cast(buffers[i]); + + D3D12_RESOURCE_BARRIER barrier = {}; + // If the src == dst, it must be a UAV barrier. + barrier.Type = (src == dst && dst == ResourceState::UnorderedAccess) + ? D3D12_RESOURCE_BARRIER_TYPE_UAV + : D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + + if (barrier.Type == D3D12_RESOURCE_BARRIER_TYPE_UAV) { - SLANG_RETURN_ON_FAIL(compileShader( - programLayout->getEntryPointByIndex(i), program->linkedProgram, (SlangInt)i)); + barrier.UAV.pResource = bufferImpl->m_resource; } - } - else - { - for (auto& entryPoint : program->linkedEntryPoints) + else { - SLANG_RETURN_ON_FAIL( - compileShader(entryPoint->getLayout()->getEntryPointByIndex(0), entryPoint, 0)); + barrier.Transition.pResource = bufferImpl->m_resource; + barrier.Transition.StateBefore = D3DUtil::getResourceState(src); + barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); + barrier.Transition.Subresource = 0; + if (barrier.Transition.StateAfter == barrier.Transition.StateBefore) + continue; } + barriers.add(barrier); + } + if (barriers.getCount()) + { + m_commandBuffer->m_cmdList4->ResourceBarrier( + (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); } +} - for (Index i = 0; i < desc.rayTracing.hitGroupDescs.getCount(); i++) +void ResourceCommandEncoderImpl::writeTimestamp(IQueryPool* pool, SlangInt index) +{ + static_cast(pool)->writeTimestamp(m_commandBuffer->m_cmdList, index); +} + +void ResourceCommandEncoderImpl::copyTexture( + ITextureResource* dst, + ResourceState dstState, + SubresourceRange dstSubresource, + ITextureResource::Offset3D dstOffset, + ITextureResource* src, + ResourceState srcState, + SubresourceRange srcSubresource, + ITextureResource::Offset3D srcOffset, + ITextureResource::Size extent) +{ + auto dstTexture = static_cast(dst); + auto srcTexture = static_cast(src); + + if (dstSubresource.layerCount == 0 && dstSubresource.mipLevelCount == 0 && + srcSubresource.layerCount == 0 && srcSubresource.mipLevelCount == 0) { - auto& hitGroup = desc.rayTracing.hitGroups[i]; - D3D12_HIT_GROUP_DESC hitGroupDesc = {}; - hitGroupDesc.Type = hitGroup.intersectionEntryPoint.getLength() == 0 - ? D3D12_HIT_GROUP_TYPE_TRIANGLES - : D3D12_HIT_GROUP_TYPE_PROCEDURAL_PRIMITIVE; + m_commandBuffer->m_cmdList->CopyResource( + dstTexture->m_resource.getResource(), srcTexture->m_resource.getResource()); + return; + } - if (hitGroup.anyHitEntryPoint.getLength()) + auto d3dFormat = D3DUtil::getMapFormat(dstTexture->getDesc()->format); + auto aspectMask = (int32_t)dstSubresource.aspectMask; + if (dstSubresource.aspectMask == TextureAspect::Default) + aspectMask = (int32_t)TextureAspect::Color; + while (aspectMask) + { + auto aspect = Math::getLowestBit((int32_t)aspectMask); + aspectMask &= ~aspect; + auto planeIndex = D3DUtil::getPlaneSlice(d3dFormat, (TextureAspect)aspect); + for (uint32_t layer = 0; layer < dstSubresource.layerCount; layer++) { - hitGroupDesc.AnyHitShaderImport = getWStr(hitGroup.anyHitEntryPoint.getBuffer()); + for (uint32_t mipLevel = 0; mipLevel < dstSubresource.mipLevelCount; mipLevel++) + { + D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; + + dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstRegion.pResource = dstTexture->m_resource.getResource(); + dstRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( + dstSubresource.mipLevel + mipLevel, + dstSubresource.baseArrayLayer + layer, + planeIndex, + dstTexture->getDesc()->numMipLevels, + dstTexture->getDesc()->arraySize); + + D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; + srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcRegion.pResource = srcTexture->m_resource.getResource(); + srcRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( + srcSubresource.mipLevel + mipLevel, + srcSubresource.baseArrayLayer + layer, + planeIndex, + srcTexture->getDesc()->numMipLevels, + srcTexture->getDesc()->arraySize); + + D3D12_BOX srcBox = {}; + srcBox.left = srcOffset.x; + srcBox.top = srcOffset.y; + srcBox.front = srcOffset.z; + srcBox.right = srcBox.left + extent.width; + srcBox.bottom = srcBox.top + extent.height; + srcBox.back = srcBox.front + extent.depth; + + m_commandBuffer->m_cmdList->CopyTextureRegion( + &dstRegion, dstOffset.x, dstOffset.y, dstOffset.z, &srcRegion, &srcBox); + } } - if (hitGroup.closestHitEntryPoint.getLength()) + } +} + +void ResourceCommandEncoderImpl::uploadTextureData( + ITextureResource* dst, + SubresourceRange subResourceRange, + ITextureResource::Offset3D offset, + ITextureResource::Size extent, + ITextureResource::SubresourceData* subResourceData, + size_t subResourceDataCount) +{ + auto dstTexture = static_cast(dst); + auto baseSubresourceIndex = D3DUtil::getSubresourceIndex( + subResourceRange.mipLevel, + subResourceRange.baseArrayLayer, + 0, + dstTexture->getDesc()->numMipLevels, + dstTexture->getDesc()->arraySize); + auto textureSize = dstTexture->getDesc()->size; + FormatInfo formatInfo = {}; + gfxGetFormatInfo(dstTexture->getDesc()->format, &formatInfo); + for (uint32_t i = 0; i < (uint32_t)subResourceDataCount; i++) + { + auto subresourceIndex = baseSubresourceIndex + i; + // Get the footprint + D3D12_RESOURCE_DESC texDesc = dstTexture->m_resource.getResource()->GetDesc(); + + D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; + + dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstRegion.SubresourceIndex = subresourceIndex; + dstRegion.pResource = dstTexture->m_resource.getResource(); + + D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; + srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint = srcRegion.PlacedFootprint; + footprint.Offset = 0; + footprint.Footprint.Format = texDesc.Format; + uint32_t mipLevel = + D3DUtil::getSubresourceMipLevel(subresourceIndex, dstTexture->getDesc()->numMipLevels); + if (extent.width != ITextureResource::kRemainingTextureSize) + { + footprint.Footprint.Width = extent.width; + } + else { - hitGroupDesc.ClosestHitShaderImport = - getWStr(hitGroup.closestHitEntryPoint.getBuffer()); + footprint.Footprint.Width = Math::Max(1, (textureSize.width >> mipLevel)) - offset.x; } - if (hitGroup.intersectionEntryPoint.getLength()) + if (extent.height != ITextureResource::kRemainingTextureSize) { - hitGroupDesc.IntersectionShaderImport = - getWStr(hitGroup.intersectionEntryPoint.getBuffer()); + footprint.Footprint.Height = extent.height; } - hitGroupDesc.HitGroupExport = getWStr(hitGroup.hitGroupName.getBuffer()); - - D3D12_STATE_SUBOBJECT hitGroupSubObject = {}; - hitGroupSubObject.Type = D3D12_STATE_SUBOBJECT_TYPE_HIT_GROUP; - hitGroupSubObject.pDesc = hitGroups.add(hitGroupDesc); - subObjects.add(hitGroupSubObject); + else + { + footprint.Footprint.Height = Math::Max(1, (textureSize.height >> mipLevel)) - offset.y; + } + if (extent.depth != ITextureResource::kRemainingTextureSize) + { + footprint.Footprint.Depth = extent.depth; + } + else + { + footprint.Footprint.Depth = Math::Max(1, (textureSize.depth >> mipLevel)) - offset.z; + } + auto rowSize = (footprint.Footprint.Width + formatInfo.blockWidth - 1) / + formatInfo.blockWidth * formatInfo.blockSizeInBytes; + auto rowCount = + (footprint.Footprint.Height + formatInfo.blockHeight - 1) / formatInfo.blockHeight; + footprint.Footprint.RowPitch = + (UINT)D3DUtil::calcAligned(rowSize, (uint32_t)D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + auto bufferSize = footprint.Footprint.RowPitch * rowCount * footprint.Footprint.Depth; + + IBufferResource* stagingBuffer; + size_t stagingBufferOffset = 0; + m_commandBuffer->m_transientHeap->allocateStagingBuffer( + bufferSize, stagingBuffer, stagingBufferOffset, MemoryType::Upload, true); + assert(stagingBufferOffset == 0); + BufferResourceImpl* bufferImpl = static_cast(stagingBuffer); + uint8_t* bufferData = nullptr; + D3D12_RANGE mapRange = {0, 0}; + bufferImpl->m_resource.getResource()->Map(0, &mapRange, (void**)&bufferData); + for (uint32_t z = 0; z < footprint.Footprint.Depth; z++) + { + auto imageStart = bufferData + footprint.Footprint.RowPitch * rowCount * (size_t)z; + auto srcData = (uint8_t*)subResourceData->data + subResourceData->strideZ * z; + for (uint32_t row = 0; row < rowCount; row++) + { + memcpy( + imageStart + row * (size_t)footprint.Footprint.RowPitch, + srcData + subResourceData->strideY * row, + rowSize); + } + } + bufferImpl->m_resource.getResource()->Unmap(0, nullptr); + srcRegion.pResource = bufferImpl->m_resource.getResource(); + m_commandBuffer->m_cmdList->CopyTextureRegion( + &dstRegion, offset.x, offset.y, offset.z, &srcRegion, nullptr); } +} - D3D12_RAYTRACING_SHADER_CONFIG shaderConfig = {}; - // According to DXR spec, fixed function triangle intersections must use float2 as ray - // attributes that defines the barycentric coordinates at intersection. - shaderConfig.MaxAttributeSizeInBytes = desc.rayTracing.maxAttributeSizeInBytes; - shaderConfig.MaxPayloadSizeInBytes = desc.rayTracing.maxRayPayloadSize; - D3D12_STATE_SUBOBJECT shaderConfigSubObject = {}; - shaderConfigSubObject.Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_SHADER_CONFIG; - shaderConfigSubObject.pDesc = &shaderConfig; - subObjects.add(shaderConfigSubObject); +void ResourceCommandEncoderImpl::clearResourceView( + IResourceView* view, ClearValue* clearValue, ClearResourceViewFlags::Enum flags) +{ + auto viewImpl = static_cast(view); + switch (view->getViewDesc()->type) + { + case IResourceView::Type::RenderTarget: + m_commandBuffer->m_cmdList->ClearRenderTargetView( + viewImpl->m_descriptor.cpuHandle, clearValue->color.floatValues, 0, nullptr); + break; + case IResourceView::Type::DepthStencil: + { + D3D12_CLEAR_FLAGS clearFlags = (D3D12_CLEAR_FLAGS)0; + if (flags & ClearResourceViewFlags::ClearDepth) + { + clearFlags |= D3D12_CLEAR_FLAG_DEPTH; + } + if (flags & ClearResourceViewFlags::ClearStencil) + { + clearFlags |= D3D12_CLEAR_FLAG_STENCIL; + } + m_commandBuffer->m_cmdList->ClearDepthStencilView( + viewImpl->m_descriptor.cpuHandle, + clearFlags, + clearValue->depthStencil.depth, + (UINT8)clearValue->depthStencil.stencil, + 0, + nullptr); + break; + } + case IResourceView::Type::UnorderedAccess: + { + ID3D12Resource* d3dResource = nullptr; + switch (viewImpl->m_resource->getType()) + { + case IResource::Type::Buffer: + d3dResource = static_cast(viewImpl->m_resource.Ptr()) + ->m_resource.getResource(); + break; + default: + d3dResource = static_cast(viewImpl->m_resource.Ptr()) + ->m_resource.getResource(); + break; + } + auto gpuHandleIndex = + m_commandBuffer->m_transientHeap->getCurrentViewHeap().allocate(1); + if (gpuHandleIndex == -1) + { + m_commandBuffer->m_transientHeap->allocateNewViewDescriptorHeap( + m_commandBuffer->m_renderer); + gpuHandleIndex = m_commandBuffer->m_transientHeap->getCurrentViewHeap().allocate(1); + m_commandBuffer->bindDescriptorHeaps(); + } + this->m_commandBuffer->m_renderer->m_device->CopyDescriptorsSimple( + 1, + m_commandBuffer->m_transientHeap->getCurrentViewHeap().getCpuHandle(gpuHandleIndex), + viewImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - D3D12_GLOBAL_ROOT_SIGNATURE globalSignatureDesc = {}; - globalSignatureDesc.pGlobalRootSignature = program->m_rootObjectLayout->m_rootSignature.get(); - D3D12_STATE_SUBOBJECT globalSignatureSubobject = {}; - globalSignatureSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE; - globalSignatureSubobject.pDesc = &globalSignatureDesc; - subObjects.add(globalSignatureSubobject); + if (flags & ClearResourceViewFlags::FloatClearValues) + { + m_commandBuffer->m_cmdList->ClearUnorderedAccessViewFloat( + m_commandBuffer->m_transientHeap->getCurrentViewHeap().getGpuHandle( + gpuHandleIndex), + viewImpl->m_descriptor.cpuHandle, + d3dResource, + clearValue->color.floatValues, + 0, + nullptr); + } + else + { + m_commandBuffer->m_cmdList->ClearUnorderedAccessViewUint( + m_commandBuffer->m_transientHeap->getCurrentViewHeap().getGpuHandle( + gpuHandleIndex), + viewImpl->m_descriptor.cpuHandle, + d3dResource, + clearValue->color.uintValues, + 0, + nullptr); + } + break; + } + default: + break; + } +} - D3D12_RAYTRACING_PIPELINE_CONFIG pipelineConfig = {}; - pipelineConfig.MaxTraceRecursionDepth = desc.rayTracing.maxRecursion; - D3D12_STATE_SUBOBJECT pipelineConfigSubobject = {}; - pipelineConfigSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG; - pipelineConfigSubobject.pDesc = &pipelineConfig; - subObjects.add(pipelineConfigSubobject); +void ResourceCommandEncoderImpl::resolveResource( + ITextureResource* source, + ResourceState sourceState, + SubresourceRange sourceRange, + ITextureResource* dest, + ResourceState destState, + SubresourceRange destRange) +{ + auto srcTexture = static_cast(source); + auto srcDesc = srcTexture->getDesc(); + auto dstTexture = static_cast(dest); + auto dstDesc = dstTexture->getDesc(); - if (m_device->m_pipelineCreationAPIDispatcher) + for (uint32_t layer = 0; layer < sourceRange.layerCount; ++layer) { - m_device->m_pipelineCreationAPIDispatcher->beforeCreateRayTracingState( - m_device, slangGlobalScope); - } + for (uint32_t mip = 0; mip < sourceRange.mipLevelCount; ++mip) + { + auto srcSubresourceIndex = D3DUtil::getSubresourceIndex( + mip + sourceRange.mipLevel, + layer + sourceRange.baseArrayLayer, + 0, + srcDesc->numMipLevels, + srcDesc->arraySize); + auto dstSubresourceIndex = D3DUtil::getSubresourceIndex( + mip + destRange.mipLevel, + layer + destRange.baseArrayLayer, + 0, + dstDesc->numMipLevels, + dstDesc->arraySize); - D3D12_STATE_OBJECT_DESC rtpsoDesc = {}; - rtpsoDesc.Type = D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE; - rtpsoDesc.NumSubobjects = (UINT)subObjects.getCount(); - rtpsoDesc.pSubobjects = subObjects.getBuffer(); - SLANG_RETURN_ON_FAIL(m_device->m_device5->CreateStateObject( - &rtpsoDesc, IID_PPV_ARGS(m_stateObject.writeRef()))); + DXGI_FORMAT format = D3DUtil::getMapFormat(srcDesc->format); - if (m_device->m_pipelineCreationAPIDispatcher) - { - m_device->m_pipelineCreationAPIDispatcher->afterCreateRayTracingState( - m_device, slangGlobalScope); + m_commandBuffer->m_cmdList->ResolveSubresource( + dstTexture->m_resource.getResource(), + dstSubresourceIndex, + srcTexture->m_resource.getResource(), + srcSubresourceIndex, + format); + } } - return SLANG_OK; } -Result D3D12Device::createRayTracingPipelineState(const RayTracingPipelineStateDesc& inDesc, IPipelineState** outState) +void ResourceCommandEncoderImpl::resolveQuery( + IQueryPool* queryPool, uint32_t index, uint32_t count, IBufferResource* buffer, uint64_t offset) { - if (!m_device5) + auto queryBase = static_cast(queryPool); + switch (queryBase->m_desc.type) { - return SLANG_E_NOT_AVAILABLE; - } + case QueryType::AccelerationStructureCompactedSize: + case QueryType::AccelerationStructureCurrentSize: + case QueryType::AccelerationStructureSerializedSize: + { + auto queryPoolImpl = static_cast(queryPool); + auto bufferImpl = static_cast(buffer); + auto srcQueryBuffer = queryPoolImpl->m_bufferResource->m_resource.getResource(); - RefPtr pipelineStateImpl = new RayTracingPipelineStateImpl(this); - pipelineStateImpl->init(inDesc); - returnComPtr(outState, pipelineStateImpl); - return SLANG_OK; -} + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.pResource = srcQueryBuffer; + m_commandBuffer->m_cmdList->ResourceBarrier(1, &barrier); -#endif // SLANG_GFX_HAS_DXR_SUPPORT + m_commandBuffer->m_cmdList->CopyBufferRegion( + bufferImpl->m_resource.getResource(), + offset, + srcQueryBuffer, + index * sizeof(uint64_t), + count * sizeof(uint64_t)); -Result createNullDescriptor( - ID3D12Device* d3dDevice, - D3D12_CPU_DESCRIPTOR_HANDLE destDescriptor, - const D3D12Device::ShaderObjectLayoutImpl::BindingRangeInfo& bindingRange) -{ - switch (bindingRange.bindingType) - { - case slang::BindingType::ConstantBuffer: + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barrier.Transition.pResource = srcQueryBuffer; + m_commandBuffer->m_cmdList->ResourceBarrier(1, &barrier); + } + break; + default: { - D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; - cbvDesc.BufferLocation = 0; - cbvDesc.SizeInBytes = 0; - d3dDevice->CreateConstantBufferView(&cbvDesc, destDescriptor); + auto queryPoolImpl = static_cast(queryPool); + auto bufferImpl = static_cast(buffer); + m_commandBuffer->m_cmdList->ResolveQueryData( + queryPoolImpl->m_queryHeap.get(), + queryPoolImpl->m_queryType, + index, + count, + bufferImpl->m_resource.getResource(), + offset); + } + break; + } +} + +void ResourceCommandEncoderImpl::copyTextureToBuffer( + IBufferResource* dst, + size_t dstOffset, + size_t dstSize, + size_t dstRowStride, + ITextureResource* src, + ResourceState srcState, + SubresourceRange srcSubresource, + ITextureResource::Offset3D srcOffset, + ITextureResource::Size extent) +{ + assert(srcSubresource.mipLevelCount <= 1); + + auto srcTexture = static_cast(src); + auto dstBuffer = static_cast(dst); + auto baseSubresourceIndex = D3DUtil::getSubresourceIndex( + srcSubresource.mipLevel, + srcSubresource.baseArrayLayer, + 0, + srcTexture->getDesc()->numMipLevels, + srcTexture->getDesc()->arraySize); + auto textureSize = srcTexture->getDesc()->size; + FormatInfo formatInfo = {}; + gfxGetFormatInfo(srcTexture->getDesc()->format, &formatInfo); + if (srcSubresource.mipLevelCount == 0) + srcSubresource.mipLevelCount = srcTexture->getDesc()->numMipLevels; + if (srcSubresource.layerCount == 0) + srcSubresource.layerCount = srcTexture->getDesc()->arraySize; + + for (uint32_t layer = 0; layer < srcSubresource.layerCount; layer++) + { + // Get the footprint + D3D12_RESOURCE_DESC texDesc = srcTexture->m_resource.getResource()->GetDesc(); + + D3D12_TEXTURE_COPY_LOCATION dstRegion = {}; + dstRegion.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstRegion.pResource = dstBuffer->m_resource.getResource(); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint = dstRegion.PlacedFootprint; + + D3D12_TEXTURE_COPY_LOCATION srcRegion = {}; + srcRegion.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcRegion.SubresourceIndex = D3DUtil::getSubresourceIndex( + srcSubresource.mipLevel, + layer + srcSubresource.baseArrayLayer, + 0, + srcTexture->getDesc()->numMipLevels, + srcTexture->getDesc()->arraySize); + srcRegion.pResource = srcTexture->m_resource.getResource(); + + footprint.Offset = dstOffset; + footprint.Footprint.Format = texDesc.Format; + uint32_t mipLevel = srcSubresource.mipLevel; + if (extent.width != 0xFFFFFFFF) + { + footprint.Footprint.Width = extent.width; } - break; - case slang::BindingType::MutableRawBuffer: + else { - D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; - uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; - d3dDevice->CreateUnorderedAccessView(nullptr, nullptr, &uavDesc, destDescriptor); + footprint.Footprint.Width = Math::Max(1, (textureSize.width >> mipLevel)) - srcOffset.x; } - break; - case slang::BindingType::MutableTypedBuffer: + if (extent.height != 0xFFFFFFFF) { - D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - uavDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - d3dDevice->CreateUnorderedAccessView(nullptr, nullptr, &uavDesc, destDescriptor); + footprint.Footprint.Height = extent.height; } - break; - case slang::BindingType::RawBuffer: + else { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; - srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); + footprint.Footprint.Height = + Math::Max(1, (textureSize.height >> mipLevel)) - srcOffset.y; } - break; - case slang::BindingType::TypedBuffer: + if (extent.depth != 0xFFFFFFFF) { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); + footprint.Footprint.Depth = extent.depth; } - break; - case slang::BindingType::Texture: + else { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - switch (bindingRange.resourceShape) - { - case SLANG_TEXTURE_1D: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; - break; - case SLANG_TEXTURE_1D_ARRAY: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; - break; - case SLANG_TEXTURE_2D: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - break; - case SLANG_TEXTURE_2D_ARRAY: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; - break; - case SLANG_TEXTURE_3D: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; - break; - case SLANG_TEXTURE_CUBE: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; - break; - case SLANG_TEXTURE_CUBE_ARRAY: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; - break; - case SLANG_TEXTURE_2D_MULTISAMPLE: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; - break; - case SLANG_TEXTURE_2D_MULTISAMPLE_ARRAY: - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; - break; - default: - return SLANG_OK; - } - d3dDevice->CreateShaderResourceView(nullptr, &srvDesc, destDescriptor); + footprint.Footprint.Depth = Math::Max(1, (textureSize.depth >> mipLevel)) - srcOffset.z; } - break; - default: - break; - } - return SLANG_OK; -} -Result D3D12Device::ShaderObjectImpl::setResource(ShaderOffset const& offset, IResourceView* resourceView) -{ - if (offset.bindingRangeIndex < 0) - return SLANG_E_INVALID_ARG; - auto layout = getLayout(); - if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) - return SLANG_E_INVALID_ARG; + assert(dstRowStride % D3D12_TEXTURE_DATA_PITCH_ALIGNMENT == 0); + footprint.Footprint.RowPitch = dstRowStride; - m_version++; + auto bufferSize = + footprint.Footprint.RowPitch * footprint.Footprint.Height * footprint.Footprint.Depth; - ID3D12Device* d3dDevice = static_cast(getDevice())->m_device; + D3D12_BOX srcBox = {}; + srcBox.left = srcOffset.x; + srcBox.top = srcOffset.y; + srcBox.front = srcOffset.z; + srcBox.right = srcOffset.x + extent.width; + srcBox.bottom = srcOffset.y + extent.height; + srcBox.back = srcOffset.z + extent.depth; + m_commandBuffer->m_cmdList->CopyTextureRegion(&dstRegion, 0, 0, 0, &srcRegion, &srcBox); + } +} - auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); +void ResourceCommandEncoderImpl::textureSubresourceBarrier( + ITextureResource* texture, + SubresourceRange subresourceRange, + ResourceState src, + ResourceState dst) +{ + auto textureImpl = static_cast(texture); - if (bindingRange.isRootParameter && resourceView) + ShortList barriers; + D3D12_RESOURCE_BARRIER barrier; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + if (src == dst && src == ResourceState::UnorderedAccess) { - auto& rootArg = m_rootArguments[bindingRange.baseIndex]; - switch (resourceView->getViewDesc()->type) - { - case IResourceView::Type::AccelerationStructure: - { - auto resourceViewImpl = static_cast(resourceView); - rootArg = resourceViewImpl->getDeviceAddress(); - } - break; - case IResourceView::Type::ShaderResource: - case IResourceView::Type::UnorderedAccess: - { - auto resourceViewImpl = static_cast(resourceView); - if (resourceViewImpl->m_resource->isBuffer()) - { - rootArg = static_cast(resourceViewImpl->m_resource.Ptr()) - ->getDeviceAddress(); - } - else + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.UAV.pResource = textureImpl->m_resource.getResource(); + barriers.add(barrier); + } + else + { + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.StateBefore = D3DUtil::getResourceState(src); + barrier.Transition.StateAfter = D3DUtil::getResourceState(dst); + if (barrier.Transition.StateBefore == barrier.Transition.StateAfter) + return; + barrier.Transition.pResource = textureImpl->m_resource.getResource(); + auto d3dFormat = D3DUtil::getMapFormat(textureImpl->getDesc()->format); + auto aspectMask = (int32_t)subresourceRange.aspectMask; + if (subresourceRange.aspectMask == TextureAspect::Default) + aspectMask = (int32_t)TextureAspect::Color; + while (aspectMask) + { + auto aspect = Math::getLowestBit((int32_t)aspectMask); + aspectMask &= ~aspect; + auto planeIndex = D3DUtil::getPlaneSlice(d3dFormat, (TextureAspect)aspect); + for (uint32_t layer = 0; layer < subresourceRange.layerCount; layer++) + { + for (uint32_t mip = 0; mip < subresourceRange.mipLevelCount; mip++) { - getDebugCallback()->handleMessage( - DebugMessageType::Error, - DebugMessageSource::Layer, - "The shader parameter at the specified offset is a root parameter, and " - "therefore can only be a buffer view."); - return SLANG_FAIL; + barrier.Transition.Subresource = D3DUtil::getSubresourceIndex( + mip + subresourceRange.mipLevel, + layer + subresourceRange.baseArrayLayer, + planeIndex, + textureImpl->getDesc()->numMipLevels, + textureImpl->getDesc()->arraySize); + barriers.add(barrier); } } - break; } - return SLANG_OK; } + m_commandBuffer->m_cmdList->ResourceBarrier( + (UINT)barriers.getCount(), barriers.getArrayView().getBuffer()); +} + +void ResourceCommandEncoderImpl::beginDebugEvent(const char* name, float rgbColor[3]) +{ + auto beginEvent = m_commandBuffer->m_renderer->m_BeginEventOnCommandList; + if (beginEvent) + { + beginEvent( + m_commandBuffer->m_cmdList, + 0xff000000 | (uint8_t(rgbColor[0] * 255.0f) << 16) | + (uint8_t(rgbColor[1] * 255.0f) << 8) | uint8_t(rgbColor[2] * 255.0f), + name); + } +} - if (resourceView == nullptr) +void ResourceCommandEncoderImpl::endDebugEvent() +{ + auto endEvent = m_commandBuffer->m_renderer->m_EndEventOnCommandList; + if (endEvent) { - // Create null descriptor for the binding. - auto destDescriptor = m_descriptorSet.resourceTable.getCpuHandle( - bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex); - return createNullDescriptor(d3dDevice, destDescriptor, bindingRange); + endEvent(m_commandBuffer->m_cmdList); } +} - ResourceViewInternalImpl* internalResourceView = nullptr; - switch (resourceView->getViewDesc()->type) +void RenderCommandEncoderImpl::init( + DeviceImpl* renderer, + TransientResourceHeapImpl* transientHeap, + CommandBufferImpl* cmdBuffer, + RenderPassLayoutImpl* renderPass, + FramebufferImpl* framebuffer) +{ + PipelineCommandEncoder::init(cmdBuffer); + m_preCmdList = nullptr; + m_renderPass = renderPass; + m_framebuffer = framebuffer; + m_transientHeap = transientHeap; + m_boundVertexBuffers.clear(); + m_boundIndexBuffer = nullptr; + m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + m_primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + m_boundIndexFormat = DXGI_FORMAT_UNKNOWN; + m_boundIndexOffset = 0; + m_currentPipeline = nullptr; + + // Set render target states. + if (!framebuffer) { -#if SLANG_GFX_HAS_DXR_SUPPORT - case IResourceView::Type::AccelerationStructure: + return; + } + m_d3dCmdList->OMSetRenderTargets( + (UINT)framebuffer->renderTargetViews.getCount(), + framebuffer->renderTargetDescriptors.getArrayView().getBuffer(), + FALSE, + framebuffer->depthStencilView ? &framebuffer->depthStencilDescriptor : nullptr); + + // Issue clear commands based on render pass set up. + for (Index i = 0; i < framebuffer->renderTargetViews.getCount(); i++) + { + if (i >= renderPass->m_renderTargetAccesses.getCount()) + continue; + + auto& access = renderPass->m_renderTargetAccesses[i]; + + // Transit resource states. { - auto asImpl = static_cast(resourceView); - // Hold a reference to the resource to prevent its destruction. - m_boundResources[bindingRange.baseIndex + offset.bindingArrayIndex] = asImpl->m_buffer; - internalResourceView = asImpl; + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = framebuffer->renderTargetViews[i].Ptr(); + if (resourceViewImpl) + { + auto textureResource = + static_cast(resourceViewImpl->m_resource.Ptr()); + if (textureResource) + { + D3D12_RESOURCE_STATES initialState; + if (access.initialState == ResourceState::Undefined) + { + initialState = textureResource->m_defaultState; + } + else + { + initialState = D3DUtil::getResourceState(access.initialState); + } + textureResource->m_resource.transition( + initialState, D3D12_RESOURCE_STATE_RENDER_TARGET, submitter); + } + } } - break; -#endif - default: + // Clear. + if (access.loadOp == IRenderPassLayout::AttachmentLoadOp::Clear) { - auto resourceViewImpl = static_cast(resourceView); - // Hold a reference to the resource to prevent its destruction. - m_boundResources[bindingRange.baseIndex + offset.bindingArrayIndex] = - resourceViewImpl->m_resource; - internalResourceView = resourceViewImpl; + m_d3dCmdList->ClearRenderTargetView( + framebuffer->renderTargetDescriptors[i], + framebuffer->renderTargetClearValues[i].values, + 0, + nullptr); } - break; } - auto descriptorSlotIndex = bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex; - if (internalResourceView->m_descriptor.cpuHandle.ptr) + if (renderPass->m_hasDepthStencil) { - d3dDevice->CopyDescriptorsSimple( - 1, - m_descriptorSet.resourceTable.getCpuHandle( - bindingRange.baseIndex + (int32_t)offset.bindingArrayIndex), - internalResourceView->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + // Transit resource states. + { + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = framebuffer->depthStencilView.Ptr(); + auto textureResource = + static_cast(resourceViewImpl->m_resource.Ptr()); + D3D12_RESOURCE_STATES initialState; + if (renderPass->m_depthStencilAccess.initialState == ResourceState::Undefined) + { + initialState = textureResource->m_defaultState; + } + else + { + initialState = + D3DUtil::getResourceState(renderPass->m_depthStencilAccess.initialState); + } + textureResource->m_resource.transition( + initialState, D3D12_RESOURCE_STATE_DEPTH_WRITE, submitter); + } + // Clear. + uint32_t clearFlags = 0; + if (renderPass->m_depthStencilAccess.loadOp == IRenderPassLayout::AttachmentLoadOp::Clear) + { + clearFlags |= D3D12_CLEAR_FLAG_DEPTH; + } + if (renderPass->m_depthStencilAccess.stencilLoadOp == + IRenderPassLayout::AttachmentLoadOp::Clear) + { + clearFlags |= D3D12_CLEAR_FLAG_STENCIL; + } + if (clearFlags) + { + m_d3dCmdList->ClearDepthStencilView( + framebuffer->depthStencilDescriptor, + (D3D12_CLEAR_FLAGS)clearFlags, + framebuffer->depthStencilClearValue.depth, + framebuffer->depthStencilClearValue.stencil, + 0, + nullptr); + } } - else - { - getDebugCallback()->handleMessage( - DebugMessageType::Error, - DebugMessageSource::Layer, - "IShaderObject::setResource: the resource view cannot be set to this shader parameter. " - "A possible reason is that the view is too large to be supported by D3D12."); - return SLANG_FAIL; +} + +Result RenderCommandEncoderImpl::bindPipeline(IPipelineState* state, IShaderObject** outRootObject) +{ + return bindPipelineImpl(state, outRootObject); +} + +Result RenderCommandEncoderImpl::bindPipelineWithRootObject( + IPipelineState* state, IShaderObject* rootObject) +{ + return bindPipelineWithRootObjectImpl(state, rootObject); +} + +void RenderCommandEncoderImpl::setViewports(uint32_t count, const Viewport* viewports) +{ + static const int kMaxViewports = D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + assert(count <= kMaxViewports && count <= kMaxRTVCount); + for (UInt ii = 0; ii < count; ++ii) + { + auto& inViewport = viewports[ii]; + auto& dxViewport = m_viewports[ii]; + + dxViewport.TopLeftX = inViewport.originX; + dxViewport.TopLeftY = inViewport.originY; + dxViewport.Width = inViewport.extentX; + dxViewport.Height = inViewport.extentY; + dxViewport.MinDepth = inViewport.minZ; + dxViewport.MaxDepth = inViewport.maxZ; } - return SLANG_OK; + m_d3dCmdList->RSSetViewports(UINT(count), m_viewports); } -Result D3D12Device::PipelineStateImpl::ensureAPIPipelineStateCreated() +void RenderCommandEncoderImpl::setScissorRects(uint32_t count, const ScissorRect* rects) { - if (m_pipelineState) - return SLANG_OK; + static const int kMaxScissorRects = D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + assert(count <= kMaxScissorRects && count <= kMaxRTVCount); - auto programImpl = static_cast(m_program.Ptr()); - if (programImpl->m_shaders.getCount() == 0) + for (UInt ii = 0; ii < count; ++ii) { - SLANG_RETURN_ON_FAIL(programImpl->compileShaders()); + auto& inRect = rects[ii]; + auto& dxRect = m_scissorRects[ii]; + + dxRect.left = LONG(inRect.minX); + dxRect.top = LONG(inRect.minY); + dxRect.right = LONG(inRect.maxX); + dxRect.bottom = LONG(inRect.maxY); } - if (desc.type == PipelineType::Graphics) - { - // Only actually create a D3D12 pipeline state if the pipeline is fully specialized. - auto inputLayoutImpl = (InputLayoutImpl*)desc.graphics.inputLayout; - // Describe and create the graphics pipeline state object (PSO) - D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + m_d3dCmdList->RSSetScissorRects(UINT(count), m_scissorRects); +} - psoDesc.pRootSignature = programImpl->m_rootObjectLayout->m_rootSignature; +void RenderCommandEncoderImpl::setPrimitiveTopology(PrimitiveTopology topology) +{ + m_primitiveTopologyType = D3DUtil::getPrimitiveType(topology); + m_primitiveTopology = D3DUtil::getPrimitiveTopology(topology); +} - for (auto& shaderBin : programImpl->m_shaders) +void RenderCommandEncoderImpl::setVertexBuffers( + uint32_t startSlot, + uint32_t slotCount, + IBufferResource* const* buffers, + const uint32_t* offsets) +{ + { + const Index num = startSlot + slotCount; + if (num > m_boundVertexBuffers.getCount()) { - switch (shaderBin.stage) - { - case SLANG_STAGE_VERTEX: - psoDesc.VS = {shaderBin.code.getBuffer(), SIZE_T(shaderBin.code.getCount())}; - break; - case SLANG_STAGE_FRAGMENT: - psoDesc.PS = {shaderBin.code.getBuffer(), SIZE_T(shaderBin.code.getCount())}; - break; - case SLANG_STAGE_DOMAIN: - psoDesc.DS = {shaderBin.code.getBuffer(), SIZE_T(shaderBin.code.getCount())}; - break; - case SLANG_STAGE_HULL: - psoDesc.HS = {shaderBin.code.getBuffer(), SIZE_T(shaderBin.code.getCount())}; - break; - case SLANG_STAGE_GEOMETRY: - psoDesc.GS = {shaderBin.code.getBuffer(), SIZE_T(shaderBin.code.getCount())}; - break; - default: - getDebugCallback()->handleMessage( - DebugMessageType::Error, - DebugMessageSource::Layer, - "Unsupported shader stage."); - return SLANG_E_NOT_AVAILABLE; - } + m_boundVertexBuffers.setCount(num); } + } + + for (UInt i = 0; i < slotCount; i++) + { + BufferResourceImpl* buffer = static_cast(buffers[i]); + + BoundVertexBuffer& boundBuffer = m_boundVertexBuffers[startSlot + i]; + boundBuffer.m_buffer = buffer; + boundBuffer.m_offset = int(offsets[i]); + } +} + +void RenderCommandEncoderImpl::setIndexBuffer( + IBufferResource* buffer, Format indexFormat, uint32_t offset) +{ + m_boundIndexBuffer = (BufferResourceImpl*)buffer; + m_boundIndexFormat = D3DUtil::getMapFormat(indexFormat); + m_boundIndexOffset = offset; +} - if (inputLayoutImpl) +void RenderCommandEncoderImpl::prepareDraw() +{ + auto pipelineState = m_currentPipeline.Ptr(); + if (!pipelineState || (pipelineState->desc.type != PipelineType::Graphics)) + { + assert(!"No graphics pipeline state set"); + return; + } + + // Submit - setting for graphics + { + GraphicsSubmitter submitter(m_d3dCmdList); + RefPtr newPipeline; + if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) { - psoDesc.InputLayout = { - inputLayoutImpl->m_elements.getBuffer(), - UINT(inputLayoutImpl->m_elements.getCount())}; + assert(!"Failed to bind render state"); } + } - psoDesc.PrimitiveTopologyType = D3DUtil::getPrimitiveType(desc.graphics.primitiveType); + m_d3dCmdList->IASetPrimitiveTopology(m_primitiveTopology); + // Set up vertex buffer views + { + auto inputLayout = (InputLayoutImpl*)pipelineState->inputLayout.Ptr(); + if (inputLayout) { - auto framebufferLayout = static_cast(desc.graphics.framebufferLayout); - const int numRenderTargets = int(framebufferLayout->m_renderTargets.getCount()); - - if (framebufferLayout->m_hasDepthStencil) - { - psoDesc.DSVFormat = D3DUtil::getMapFormat(framebufferLayout->m_depthStencil.format); - psoDesc.SampleDesc.Count = framebufferLayout->m_depthStencil.sampleCount; - } - else + int numVertexViews = 0; + D3D12_VERTEX_BUFFER_VIEW vertexViews[16]; + for (Index i = 0; i < m_boundVertexBuffers.getCount(); i++) { - psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; - if (framebufferLayout->m_renderTargets.getCount()) + const BoundVertexBuffer& boundVertexBuffer = m_boundVertexBuffers[i]; + BufferResourceImpl* buffer = boundVertexBuffer.m_buffer; + if (buffer) { - psoDesc.SampleDesc.Count = framebufferLayout->m_renderTargets[0].sampleCount; + D3D12_VERTEX_BUFFER_VIEW& vertexView = vertexViews[numVertexViews++]; + vertexView.BufferLocation = + buffer->m_resource.getResource()->GetGPUVirtualAddress() + + boundVertexBuffer.m_offset; + vertexView.SizeInBytes = + UINT(buffer->getDesc()->sizeInBytes - boundVertexBuffer.m_offset); + vertexView.StrideInBytes = inputLayout->m_vertexStreamStrides[i]; } } - psoDesc.NumRenderTargets = numRenderTargets; - for (Int i = 0; i < numRenderTargets; i++) - { - psoDesc.RTVFormats[i] = - D3DUtil::getMapFormat(framebufferLayout->m_renderTargets[i].format); - } - - psoDesc.SampleDesc.Quality = 0; - psoDesc.SampleMask = UINT_MAX; + m_d3dCmdList->IASetVertexBuffers(0, numVertexViews, vertexViews); } + } + // Set up index buffer + if (m_boundIndexBuffer) + { + D3D12_INDEX_BUFFER_VIEW indexBufferView; + indexBufferView.BufferLocation = + m_boundIndexBuffer->m_resource.getResource()->GetGPUVirtualAddress() + + m_boundIndexOffset; + indexBufferView.SizeInBytes = + UINT(m_boundIndexBuffer->getDesc()->sizeInBytes - m_boundIndexOffset); + indexBufferView.Format = m_boundIndexFormat; + + m_d3dCmdList->IASetIndexBuffer(&indexBufferView); + } +} - { - auto& rs = psoDesc.RasterizerState; - rs.FillMode = D3DUtil::getFillMode(desc.graphics.rasterizer.fillMode); - rs.CullMode = D3DUtil::getCullMode(desc.graphics.rasterizer.cullMode); - rs.FrontCounterClockwise = - desc.graphics.rasterizer.frontFace == gfx::FrontFaceMode::CounterClockwise ? TRUE - : FALSE; - rs.DepthBias = desc.graphics.rasterizer.depthBias; - rs.DepthBiasClamp = desc.graphics.rasterizer.depthBiasClamp; - rs.SlopeScaledDepthBias = desc.graphics.rasterizer.slopeScaledDepthBias; - rs.DepthClipEnable = desc.graphics.rasterizer.depthClipEnable ? TRUE : FALSE; - rs.MultisampleEnable = desc.graphics.rasterizer.multisampleEnable ? TRUE : FALSE; - rs.AntialiasedLineEnable = - desc.graphics.rasterizer.antialiasedLineEnable ? TRUE : FALSE; - rs.ForcedSampleCount = desc.graphics.rasterizer.forcedSampleCount; - rs.ConservativeRaster = desc.graphics.rasterizer.enableConservativeRasterization - ? D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON - : D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; - } +void RenderCommandEncoderImpl::draw(uint32_t vertexCount, uint32_t startVertex) +{ + prepareDraw(); + m_d3dCmdList->DrawInstanced(vertexCount, 1, startVertex, 0); +} + +void RenderCommandEncoderImpl::drawIndexed( + uint32_t indexCount, uint32_t startIndex, uint32_t baseVertex) +{ + prepareDraw(); + m_d3dCmdList->DrawIndexedInstanced(indexCount, 1, startIndex, baseVertex, 0); +} +void RenderCommandEncoderImpl::endEncoding() +{ + PipelineCommandEncoder::endEncodingImpl(); + if (!m_framebuffer) + return; + // Issue clear commands based on render pass set up. + for (Index i = 0; i < m_renderPass->m_renderTargetAccesses.getCount(); i++) + { + auto& access = m_renderPass->m_renderTargetAccesses[i]; + + // Transit resource states. { - D3D12_BLEND_DESC& blend = psoDesc.BlendState; - blend.IndependentBlendEnable = FALSE; - blend.AlphaToCoverageEnable = desc.graphics.blend.alphaToCoverageEnable ? TRUE : FALSE; - blend.RenderTarget[0].RenderTargetWriteMask = (uint8_t)RenderTargetWriteMask::EnableAll; - for (uint32_t i = 0; i < desc.graphics.blend.targetCount; i++) - { - auto& d3dDesc = blend.RenderTarget[i]; - d3dDesc.BlendEnable = desc.graphics.blend.targets[i].enableBlend ? TRUE : FALSE; - d3dDesc.BlendOp = D3DUtil::getBlendOp(desc.graphics.blend.targets[i].color.op); - d3dDesc.BlendOpAlpha = D3DUtil::getBlendOp(desc.graphics.blend.targets[i].alpha.op); - d3dDesc.DestBlend = - D3DUtil::getBlendFactor(desc.graphics.blend.targets[i].color.dstFactor); - d3dDesc.DestBlendAlpha = - D3DUtil::getBlendFactor(desc.graphics.blend.targets[i].alpha.dstFactor); - d3dDesc.LogicOp = D3D12_LOGIC_OP_NOOP; - d3dDesc.LogicOpEnable = FALSE; - d3dDesc.RenderTargetWriteMask = desc.graphics.blend.targets[i].writeMask; - d3dDesc.SrcBlend = - D3DUtil::getBlendFactor(desc.graphics.blend.targets[i].color.srcFactor); - d3dDesc.SrcBlendAlpha = - D3DUtil::getBlendFactor(desc.graphics.blend.targets[i].alpha.srcFactor); - } - for (uint32_t i = 1; i < desc.graphics.blend.targetCount; i++) - { - if (memcmp( - &desc.graphics.blend.targets[i], - &desc.graphics.blend.targets[0], - sizeof(desc.graphics.blend.targets[0])) != 0) - { - blend.IndependentBlendEnable = TRUE; - break; - } - } - for (uint32_t i = (uint32_t)desc.graphics.blend.targetCount; - i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; - ++i) + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = m_framebuffer->renderTargetViews[i].Ptr(); + if (!resourceViewImpl) + continue; + auto textureResource = + static_cast(resourceViewImpl->m_resource.Ptr()); + if (textureResource) { - blend.RenderTarget[i] = blend.RenderTarget[0]; + textureResource->m_resource.transition( + D3D12_RESOURCE_STATE_RENDER_TARGET, + D3DUtil::getResourceState(access.finalState), + submitter); } } + } - { - auto& ds = psoDesc.DepthStencilState; + if (m_renderPass->m_hasDepthStencil) + { + // Transit resource states. + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = m_framebuffer->depthStencilView.Ptr(); + auto textureResource = + static_cast(resourceViewImpl->m_resource.Ptr()); + textureResource->m_resource.transition( + D3D12_RESOURCE_STATE_DEPTH_WRITE, + D3DUtil::getResourceState(m_renderPass->m_depthStencilAccess.finalState), + submitter); + } + m_framebuffer = nullptr; +} - ds.DepthEnable = desc.graphics.depthStencil.depthTestEnable; - ds.DepthWriteMask = desc.graphics.depthStencil.depthWriteEnable - ? D3D12_DEPTH_WRITE_MASK_ALL - : D3D12_DEPTH_WRITE_MASK_ZERO; - ds.DepthFunc = D3DUtil::getComparisonFunc(desc.graphics.depthStencil.depthFunc); - ds.StencilEnable = desc.graphics.depthStencil.stencilEnable; - ds.StencilReadMask = (UINT8)desc.graphics.depthStencil.stencilReadMask; - ds.StencilWriteMask = (UINT8)desc.graphics.depthStencil.stencilWriteMask; - ds.FrontFace = D3DUtil::translateStencilOpDesc(desc.graphics.depthStencil.frontFace); - ds.BackFace = D3DUtil::translateStencilOpDesc(desc.graphics.depthStencil.backFace); - } +void RenderCommandEncoderImpl::setStencilReference(uint32_t referenceValue) +{ + m_d3dCmdList->OMSetStencilRef((UINT)referenceValue); +} - psoDesc.PrimitiveTopologyType = D3DUtil::getPrimitiveType(desc.graphics.primitiveType); +void RenderCommandEncoderImpl::drawIndirect( + uint32_t maxDrawCount, + IBufferResource* argBuffer, + uint64_t argOffset, + IBufferResource* countBuffer, + uint64_t countOffset) +{ + prepareDraw(); + + auto argBufferImpl = static_cast(argBuffer); + auto countBufferImpl = static_cast(countBuffer); + + m_d3dCmdList->ExecuteIndirect( + m_renderer->drawIndirectCmdSignature, + maxDrawCount, + argBufferImpl->m_resource, + argOffset, + countBufferImpl ? countBufferImpl->m_resource.getResource() : nullptr, + countOffset); +} - if (m_device->m_pipelineCreationAPIDispatcher) +void RenderCommandEncoderImpl::drawIndexedIndirect( + uint32_t maxDrawCount, + IBufferResource* argBuffer, + uint64_t argOffset, + IBufferResource* countBuffer, + uint64_t countOffset) +{ + prepareDraw(); + + auto argBufferImpl = static_cast(argBuffer); + auto countBufferImpl = static_cast(countBuffer); + + m_d3dCmdList->ExecuteIndirect( + m_renderer->drawIndexedIndirectCmdSignature, + maxDrawCount, + argBufferImpl->m_resource, + argOffset, + countBufferImpl ? countBufferImpl->m_resource.getResource() : nullptr, + countOffset); +} + +Result RenderCommandEncoderImpl::setSamplePositions( + uint32_t samplesPerPixel, uint32_t pixelCount, const SamplePosition* samplePositions) +{ + if (m_commandBuffer->m_cmdList1) + { + m_commandBuffer->m_cmdList1->SetSamplePositions( + samplesPerPixel, pixelCount, (D3D12_SAMPLE_POSITION*)samplePositions); + return SLANG_OK; + } + return SLANG_E_NOT_AVAILABLE; +} + +void RenderCommandEncoderImpl::drawInstanced( + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t startVertex, + uint32_t startInstanceLocation) +{ + prepareDraw(); + m_d3dCmdList->DrawInstanced(vertexCount, instanceCount, startVertex, startInstanceLocation); +} + +void RenderCommandEncoderImpl::drawIndexedInstanced( + uint32_t indexCount, + uint32_t instanceCount, + uint32_t startIndexLocation, + int32_t baseVertexLocation, + uint32_t startInstanceLocation) +{ + prepareDraw(); + m_d3dCmdList->DrawIndexedInstanced( + indexCount, instanceCount, startIndexLocation, baseVertexLocation, startInstanceLocation); +} + +void ComputeCommandEncoderImpl::endEncoding() { PipelineCommandEncoder::endEncodingImpl(); } + +void ComputeCommandEncoderImpl::init( + DeviceImpl* renderer, TransientResourceHeapImpl* transientHeap, CommandBufferImpl* cmdBuffer) +{ + PipelineCommandEncoder::init(cmdBuffer); + m_preCmdList = nullptr; + m_transientHeap = transientHeap; + m_currentPipeline = nullptr; +} + +Result ComputeCommandEncoderImpl::bindPipeline(IPipelineState* state, IShaderObject** outRootObject) +{ + return bindPipelineImpl(state, outRootObject); +} + +Result ComputeCommandEncoderImpl::bindPipelineWithRootObject( + IPipelineState* state, IShaderObject* rootObject) +{ + return bindPipelineWithRootObjectImpl(state, rootObject); +} + +void ComputeCommandEncoderImpl::dispatchCompute(int x, int y, int z) +{ + // Submit binding for compute + { + ComputeSubmitter submitter(m_d3dCmdList); + RefPtr newPipeline; + if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) { - SLANG_RETURN_ON_FAIL( - m_device->m_pipelineCreationAPIDispatcher->createGraphicsPipelineState( - m_device, - programImpl->linkedProgram.get(), - &psoDesc, - (void**)m_pipelineState.writeRef())); + assert(!"Failed to bind render state"); } - else + } + m_d3dCmdList->Dispatch(x, y, z); +} + +void ComputeCommandEncoderImpl::dispatchComputeIndirect(IBufferResource* argBuffer, uint64_t offset) +{ + // Submit binding for compute + { + ComputeSubmitter submitter(m_d3dCmdList); + RefPtr newPipeline; + if (SLANG_FAILED(_bindRenderState(&submitter, newPipeline))) { - SLANG_RETURN_ON_FAIL(m_device->m_device->CreateGraphicsPipelineState( - &psoDesc, IID_PPV_ARGS(m_pipelineState.writeRef()))); + assert(!"Failed to bind render state"); } } - else - { + auto argBufferImpl = static_cast(argBuffer); - // Only actually create a D3D12 pipeline state if the pipeline is fully specialized. - ComPtr pipelineState; - if (!programImpl->isSpecializable()) - { - // Describe and create the compute pipeline state object - D3D12_COMPUTE_PIPELINE_STATE_DESC computeDesc = {}; - computeDesc.pRootSignature = - desc.compute.d3d12RootSignatureOverride - ? static_cast(desc.compute.d3d12RootSignatureOverride) - : programImpl->m_rootObjectLayout->m_rootSignature; - computeDesc.CS = { - programImpl->m_shaders[0].code.getBuffer(), - SIZE_T(programImpl->m_shaders[0].code.getCount())}; + m_d3dCmdList->ExecuteIndirect( + m_renderer->dispatchIndirectCmdSignature, 1, argBufferImpl->m_resource, offset, nullptr, 0); +} -#ifdef GFX_NVAPI - if (m_nvapi) - { - // Also fill the extension structure. - // Use the same UAV slot index and register space that are declared in the shader. +FenceImpl::~FenceImpl() +{ + if (m_waitEvent) + CloseHandle(m_waitEvent); +} - // For simplicities sake we just use u0 - NVAPI_D3D12_PSO_SET_SHADER_EXTENSION_SLOT_DESC extensionDesc; - extensionDesc.baseVersion = NV_PSO_EXTENSION_DESC_VER; - extensionDesc.version = NV_SET_SHADER_EXTENSION_SLOT_DESC_VER; - extensionDesc.uavSlot = 0; - extensionDesc.registerSpace = 0; +HANDLE FenceImpl::getWaitEvent() +{ + if (m_waitEvent) + return m_waitEvent; + m_waitEvent = CreateEventEx(nullptr, nullptr, 0, EVENT_ALL_ACCESS); + return m_waitEvent; +} - // Put the pointer to the extension into an array - there can be multiple extensions - // enabled at once. - const NVAPI_D3D12_PSO_EXTENSION_DESC* extensions[] = {&extensionDesc}; +Result FenceImpl::init(DeviceImpl* device, const IFence::Desc& desc) +{ + SLANG_RETURN_ON_FAIL(device->m_device->CreateFence( + desc.initialValue, + desc.isShared ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(m_fence.writeRef()))); + return SLANG_OK; +} - // Now create the PSO. - const NvAPI_Status nvapiStatus = NvAPI_D3D12_CreateComputePipelineState( - m_device->m_device, - &computeDesc, - SLANG_COUNT_OF(extensions), - extensions, - m_pipelineState.writeRef()); +Result FenceImpl::getCurrentValue(uint64_t* outValue) +{ + *outValue = m_fence->GetCompletedValue(); + return SLANG_OK; +} - if (nvapiStatus != NVAPI_OK) - { - return SLANG_FAIL; - } - } - else -#endif - { - if (m_device->m_pipelineCreationAPIDispatcher) - { - SLANG_RETURN_ON_FAIL( - m_device->m_pipelineCreationAPIDispatcher->createComputePipelineState( - m_device, - programImpl->linkedProgram.get(), - &computeDesc, - (void**)m_pipelineState.writeRef())); - } - else - { - SLANG_RETURN_ON_FAIL(m_device->m_device->CreateComputePipelineState( - &computeDesc, IID_PPV_ARGS(m_pipelineState.writeRef()))); - } - } - } +Result FenceImpl::setCurrentValue(uint64_t value) +{ + SLANG_RETURN_ON_FAIL(m_fence->Signal(value)); + return SLANG_OK; +} + +Result FenceImpl::getSharedHandle(InteropHandle* outHandle) +{ + // Check if a shared handle already exists. + if (sharedHandle.handleValue != 0) + { + *outHandle = sharedHandle; + return SLANG_OK; } + ComPtr devicePtr; + m_fence->GetDevice(IID_PPV_ARGS(devicePtr.writeRef())); + SLANG_RETURN_ON_FAIL(devicePtr->CreateSharedHandle( + m_fence, NULL, GENERIC_ALL, nullptr, (HANDLE*)&outHandle->handleValue)); + outHandle->api = InteropHandleAPI::D3D12; + sharedHandle = *outHandle; + return SLANG_OK; +} + +Result FenceImpl::getNativeHandle(InteropHandle* outNativeHandle) +{ + outNativeHandle->api = gfx::InteropHandleAPI::D3D12; + outNativeHandle->handleValue = (uint64_t)m_fence.get(); return SLANG_OK; } -} // renderer_test +} // namespace d3d12 + +Result SLANG_MCALL createD3D12Device(const IDevice::Desc* desc, IDevice** outDevice) +{ + RefPtr result = new d3d12::DeviceImpl(); + SLANG_RETURN_ON_FAIL(result->initialize(*desc)); + returnComPtr(outDevice, result); + return SLANG_OK; +} +} // namespace gfx diff --git a/tools/gfx/d3d12/render-d3d12.h b/tools/gfx/d3d12/render-d3d12.h index 457fc1db6..3ef4a84a7 100644 --- a/tools/gfx/d3d12/render-d3d12.h +++ b/tools/gfx/d3d12/render-d3d12.h @@ -1,11 +1,1799 @@ // render-d3d12.h #pragma once +#include "../command-encoder-com-forward.h" +#include "../d3d/d3d-swapchain.h" +#include "../mutable-shader-object.h" #include "../renderer-shared.h" +#include "../simple-render-pass-layout.h" +#include "../transient-resource-heap-base.h" +#include "core/slang-basic.h" +#include "core/slang-blob.h" +#include "core/slang-chunked-list.h" +#include "descriptor-heap-d3d12.h" +#include "resource-d3d12.h" + +#define _CRT_SECURE_NO_WARNINGS +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#undef WIN32_LEAN_AND_MEAN +#undef NOMINMAX + +#include +#include + +#ifndef __ID3D12GraphicsCommandList1_FWD_DEFINED__ +// If can't find a definition of CommandList1, just use an empty definition +struct ID3D12GraphicsCommandList1 +{}; +#endif namespace gfx { +namespace d3d12 +{ + +using namespace Slang; + +// Define function pointer types for PIX library. +typedef HRESULT(WINAPI* PFN_BeginEventOnCommandList)( + ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString); +typedef HRESULT(WINAPI* PFN_EndEventOnCommandList)(ID3D12GraphicsCommandList* commandList); + +static const Int kMaxNumRenderFrames = 4; +static const Int kMaxRTVCount = 8; + +struct D3D12DeviceInfo +{ + void clear() + { + m_dxgiFactory.setNull(); + m_device.setNull(); + m_adapter.setNull(); + m_desc = {}; + m_desc1 = {}; + m_isWarp = false; + m_isSoftware = false; + } + + bool m_isWarp; + bool m_isSoftware; + ComPtr m_dxgiFactory; + ComPtr m_device; + ComPtr m_device5; + ComPtr m_adapter; + DXGI_ADAPTER_DESC m_desc; + DXGI_ADAPTER_DESC1 m_desc1; +}; + +class CommandQueueImpl; +class TransientResourceHeapImpl; +class TextureResourceImpl; +class CommandBufferImpl; + +class DeviceImpl : public RendererBase +{ +public: + Desc m_desc; + D3D12DeviceExtendedDesc m_extendedDesc; + + DeviceInfo m_info; + String m_adapterName; + + bool m_isInitialized = false; + + ComPtr m_dxDebug; + + D3D12DeviceInfo m_deviceInfo; + ID3D12Device* m_device = nullptr; + ID3D12Device5* m_device5 = nullptr; + + VirtualObjectPool m_queueIndexAllocator; + + RefPtr m_resourceCommandQueue; + RefPtr m_resourceCommandTransientHeap; + + RefPtr m_rtvAllocator; + RefPtr m_dsvAllocator; + + // Space in the GPU-visible heaps is precious, so we will also keep + // around CPU-visible heaps for storing shader-objects' descriptors in a format + // that is ready for copying into the GPU-visible heaps as needed. + // + RefPtr m_cpuViewHeap; ///< Cbv, Srv, Uav + RefPtr m_cpuSamplerHeap; ///< Heap for samplers + + // Dll entry points + PFN_D3D12_GET_DEBUG_INTERFACE m_D3D12GetDebugInterface = nullptr; + PFN_D3D12_CREATE_DEVICE m_D3D12CreateDevice = nullptr; + PFN_D3D12_SERIALIZE_ROOT_SIGNATURE m_D3D12SerializeRootSignature = nullptr; + + PFN_BeginEventOnCommandList m_BeginEventOnCommandList = nullptr; + PFN_EndEventOnCommandList m_EndEventOnCommandList = nullptr; + + bool m_nvapi = false; + + // Command signatures required for indirect draws. These indicate the format of the indirect + // as well as the command type to be used (DrawInstanced and DrawIndexedInstanced, in this + // case). + ComPtr drawIndirectCmdSignature; + ComPtr drawIndexedIndirectCmdSignature; + ComPtr dispatchIndirectCmdSignature; + +public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + getFormatSupportedResourceStates(Format format, ResourceStateSet* outStates) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, ITransientResourceHeap** outHeap) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( + const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL getTextureAllocationInfo( + const ITextureResource::Desc& desc, size_t* outSize, size_t* outAlignment) override; + virtual SLANG_NO_THROW Result SLANG_MCALL getTextureRowAlignment(size_t* outAlignment) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource( + const ITextureResource::Desc& desc, + const ITextureResource::SubresourceData* initData, + ITextureResource** outResource) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureFromNativeHandle( + InteropHandle handle, + const ITextureResource::Desc& srcDesc, + ITextureResource** outResource) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferResource( + const IBufferResource::Desc& desc, + const void* initData, + IBufferResource** outResource) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferFromNativeHandle( + InteropHandle handle, + const IBufferResource::Desc& srcDesc, + IBufferResource** outResource) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView( + ITextureResource* texture, + IResourceView::Desc const& desc, + IResourceView** outView) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferView( + IBufferResource* buffer, + IBufferResource* counterBuffer, + IResourceView::Desc const& desc, + IResourceView** outView) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFrameBuffer) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout( + IFramebufferLayout::Desc const& desc, IFramebufferLayout** outLayout) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( + const IRenderPassLayout::Desc& desc, IRenderPassLayout** outRenderPassLayout) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createInputLayout(IInputLayout::Desc const& desc, IInputLayout** outLayout) override; + + virtual Result createShaderObjectLayout( + slang::TypeLayoutReflection* typeLayout, ShaderObjectLayoutBase** outLayout) override; + virtual Result createShaderObject( + ShaderObjectLayoutBase* layout, IShaderObject** outObject) override; + virtual Result createMutableShaderObject( + ShaderObjectLayoutBase* layout, IShaderObject** outObject) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + createMutableRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createShaderTable(const IShaderTable::Desc& desc, IShaderTable** outShaderTable) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createProgram( + const IShaderProgram::Desc& desc, + IShaderProgram** outProgram, + ISlangBlob** outDiagnostics) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState( + const GraphicsPipelineStateDesc& desc, IPipelineState** outState) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState( + const ComputePipelineStateDesc& desc, IPipelineState** outState) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createQueryPool(const IQueryPool::Desc& desc, IQueryPool** outState) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createFence(const IFence::Desc& desc, IFence** outFence) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL waitForFences( + uint32_t fenceCount, + IFence** fences, + uint64_t* fenceValues, + bool waitForAll, + uint64_t timeout) override; + + virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( + ITextureResource* resource, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) override; + + virtual SLANG_NO_THROW SlangResult SLANG_MCALL readBufferResource( + IBufferResource* resource, size_t offset, size_t size, ISlangBlob** outBlob) override; + + virtual SLANG_NO_THROW const gfx::DeviceInfo& SLANG_MCALL getDeviceInfo() const override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + getNativeDeviceHandles(InteropHandles* outHandles) override; + + ~DeviceImpl(); + + virtual SLANG_NO_THROW Result SLANG_MCALL getAccelerationStructurePrebuildInfo( + const IAccelerationStructure::BuildInputs& buildInputs, + IAccelerationStructure::PrebuildInfo* outPrebuildInfo) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createAccelerationStructure( + const IAccelerationStructure::CreateDesc& desc, IAccelerationStructure** outView) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createRayTracingPipelineState( + const RayTracingPipelineStateDesc& desc, IPipelineState** outState) override; + +public: + static PROC loadProc(HMODULE module, char const* name); + + Result createCommandQueueImpl(CommandQueueImpl** outQueue); + + Result createTransientResourceHeapImpl( + ITransientResourceHeap::Flags::Enum flags, + size_t constantBufferSize, + uint32_t viewDescriptors, + uint32_t samplerDescriptors, + TransientResourceHeapImpl** outHeap); + + Result createBuffer( + const D3D12_RESOURCE_DESC& resourceDesc, + const void* srcData, + size_t srcDataSize, + D3D12_RESOURCE_STATES finalState, + D3D12Resource& resourceOut, + bool isShared, + MemoryType access = MemoryType::DeviceLocal); + + Result captureTextureToSurface( + TextureResourceImpl* resource, + ResourceState state, + ISlangBlob** blob, + size_t* outRowPitch, + size_t* outPixelSize); + + Result _createDevice( + DeviceCheckFlags deviceCheckFlags, + const UnownedStringSlice& nameMatch, + D3D_FEATURE_LEVEL featureLevel, + D3D12DeviceInfo& outDeviceInfo); + + struct ResourceCommandRecordInfo + { + ComPtr commandBuffer; + ID3D12GraphicsCommandList* d3dCommandList; + }; + ResourceCommandRecordInfo encodeResourceCommands(); + void submitResourceCommandsAndWait(const ResourceCommandRecordInfo& info); +}; + +class BufferResourceImpl : public gfx::BufferResource +{ +public: + typedef BufferResource Parent; + + BufferResourceImpl(const Desc& desc); + + ~BufferResourceImpl(); + + D3D12Resource m_resource; ///< The resource in gpu memory, allocated on the correct heap + ///< relative to the cpu access flag + + D3D12_RESOURCE_STATES m_defaultState; + + virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + getNativeResourceHandle(InteropHandle* outHandle) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + map(MemoryRange* rangeToRead, void** outPointer) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL unmap(MemoryRange* writtenRange) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL setDebugName(const char* name) override; +}; + +class TextureResourceImpl : public TextureResource +{ +public: + typedef TextureResource Parent; + + TextureResourceImpl(const Desc& desc); + + ~TextureResourceImpl(); + + D3D12Resource m_resource; + D3D12_RESOURCE_STATES m_defaultState; + + virtual SLANG_NO_THROW Result SLANG_MCALL + getNativeResourceHandle(InteropHandle* outHandle) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL setDebugName(const char* name) override; +}; + +class SamplerStateImpl : public SamplerStateBase +{ +public: + D3D12Descriptor m_descriptor; + RefPtr m_allocator; + ~SamplerStateImpl(); + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override; +}; + +class ResourceViewInternalImpl +{ +public: + D3D12Descriptor m_descriptor; + RefPtr m_allocator; + ~ResourceViewInternalImpl(); +}; + +class ResourceViewImpl + : public ResourceViewBase + , public ResourceViewInternalImpl +{ +public: + Slang::RefPtr m_resource; + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override; +}; + +class FramebufferLayoutImpl : public FramebufferLayoutBase +{ +public: + ShortList m_renderTargets; + bool m_hasDepthStencil = false; + IFramebufferLayout::AttachmentLayout m_depthStencil; +}; + +class FramebufferImpl : public FramebufferBase +{ +public: + ShortList> renderTargetViews; + RefPtr depthStencilView; + ShortList renderTargetDescriptors; + struct Color4f + { + float values[4]; + }; + ShortList renderTargetClearValues; + D3D12_CPU_DESCRIPTOR_HANDLE depthStencilDescriptor; + DepthStencilClearValue depthStencilClearValue; +}; + +class RenderPassLayoutImpl : public SimpleRenderPassLayout +{ +public: + RefPtr m_framebufferLayout; + void init(const IRenderPassLayout::Desc& desc); +}; + +class InputLayoutImpl : public InputLayoutBase +{ +public: + List m_elements; + List m_vertexStreamStrides; + List m_text; ///< Holds all strings to keep in scope +}; + +class PipelineStateImpl : public PipelineStateBase +{ +public: + PipelineStateImpl(DeviceImpl* device) + : m_device(device) + {} + DeviceImpl* m_device; + ComPtr m_pipelineState; + void init(const GraphicsPipelineStateDesc& inDesc); + void init(const ComputePipelineStateDesc& inDesc); + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override; + virtual Result ensureAPIPipelineStateCreated() override; +}; + +#if SLANG_GFX_HAS_DXR_SUPPORT +class RayTracingPipelineStateImpl : public PipelineStateBase +{ +public: + ComPtr m_stateObject; + DeviceImpl* m_device; + RayTracingPipelineStateImpl(DeviceImpl* device); + void init(const RayTracingPipelineStateDesc& inDesc); + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override; + virtual Result ensureAPIPipelineStateCreated() override; +}; +#endif + +class QueryPoolImpl : public QueryPoolBase +{ +public: + Result init(const IQueryPool::Desc& desc, DeviceImpl* device); + + virtual SLANG_NO_THROW Result SLANG_MCALL + getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) override; + + void writeTimestamp(ID3D12GraphicsCommandList* cmdList, SlangInt index); + +public: + D3D12_QUERY_TYPE m_queryType; + ComPtr m_queryHeap; + D3D12Resource m_readBackBuffer; + ComPtr m_commandAllocator; + ComPtr m_commandList; + ComPtr m_fence; + ComPtr m_commandQueue; + HANDLE m_waitEvent; + UINT64 m_eventValue = 0; +}; + +/// Implements the IQueryPool interface with a plain buffer. +/// Used for query types that does not correspond to a D3D query, +/// such as ray-tracing acceleration structure post-build info. +class PlainBufferProxyQueryPoolImpl : public QueryPoolBase +{ +public: + SLANG_COM_OBJECT_IUNKNOWN_ALL + IQueryPool* getInterface(const Guid& guid); + +public: + Result init(const IQueryPool::Desc& desc, DeviceImpl* device, uint32_t stride); + + virtual SLANG_NO_THROW Result SLANG_MCALL reset() override; + virtual SLANG_NO_THROW Result SLANG_MCALL + getResult(SlangInt queryIndex, SlangInt count, uint64_t* data) override; + +public: + QueryType m_queryType; + RefPtr m_bufferResource; + RefPtr m_device; + List m_result; + bool m_resultDirty = true; + uint32_t m_stride = 0; + uint32_t m_count = 0; +}; + +struct BoundVertexBuffer +{ + RefPtr m_buffer; + int m_offset; +}; + +class TransientResourceHeapImpl + : public TransientResourceHeapBaseImpl + , public ID3D12TransientResourceHeap +{ +private: + typedef TransientResourceHeapBaseImpl Super; + +public: + ComPtr m_commandAllocator; + List> m_d3dCommandListPool; + List> m_commandBufferPool; + uint32_t m_commandListAllocId = 0; + // Wait values for each command queue. + struct QueueWaitInfo + { + uint64_t waitValue; + HANDLE fenceEvent; + ComPtr fence = nullptr; + }; + ShortList m_waitInfos; + + QueueWaitInfo& getQueueWaitInfo(uint32_t queueIndex); + // During command submission, we need all the descriptor tables that get + // used to come from a single heap (for each descriptor heap type). + // + // We will thus keep a single heap of each type that we hope will hold + // all the descriptors that actually get needed in a frame. + ShortList m_viewHeaps; // Cbv, Srv, Uav + ShortList m_samplerHeaps; // Heap for samplers + int32_t m_currentViewHeapIndex = -1; + int32_t m_currentSamplerHeapIndex = -1; + bool m_canResize = false; + + uint32_t m_viewHeapSize; + uint32_t m_samplerHeapSize; + + D3D12DescriptorHeap& getCurrentViewHeap(); + D3D12DescriptorHeap& getCurrentSamplerHeap(); + + D3D12LinearExpandingDescriptorHeap m_stagingCpuViewHeap; + D3D12LinearExpandingDescriptorHeap m_stagingCpuSamplerHeap; + + virtual SLANG_NO_THROW Result SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override; + + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return Super::addRef(); } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return Super::release(); } + + virtual SLANG_NO_THROW Result SLANG_MCALL allocateTransientDescriptorTable( + DescriptorType type, + uint32_t count, + uint64_t& outDescriptorOffset, + void** outD3DDescriptorHeapHandle) override; + + ~TransientResourceHeapImpl(); + + bool canResize() { return m_canResize; } + + Result init( + const ITransientResourceHeap::Desc& desc, + DeviceImpl* device, + uint32_t viewHeapSize, + uint32_t samplerHeapSize); + + Result allocateNewViewDescriptorHeap(DeviceImpl* device); + + Result allocateNewSamplerDescriptorHeap(DeviceImpl* device); + + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandBuffer(ICommandBuffer** outCommandBuffer) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() override; +}; + +struct Submitter +{ + virtual void setRootConstantBufferView( + int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; + virtual void setRootUAV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; + virtual void setRootSRV(int index, D3D12_GPU_VIRTUAL_ADDRESS gpuBufferLocation) = 0; + virtual void setRootDescriptorTable(int index, D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor) = 0; + virtual void setRootSignature(ID3D12RootSignature* rootSignature) = 0; + virtual void setRootConstants( + Index rootParamIndex, + Index dstOffsetIn32BitValues, + Index countOf32BitValues, + void const* srcData) = 0; + virtual void setPipelineState(PipelineStateBase* pipelineState) = 0; +}; + +class PipelineCommandEncoder +{ +public: + bool m_isOpen = false; + bool m_bindingDirty = true; + CommandBufferImpl* m_commandBuffer; + TransientResourceHeapImpl* m_transientHeap; + DeviceImpl* m_renderer; + ID3D12Device* m_device; + ID3D12GraphicsCommandList* m_d3dCmdList; + ID3D12GraphicsCommandList* m_preCmdList = nullptr; + + RefPtr m_currentPipeline; + + static int getBindPointIndex(PipelineType type); + + void init(CommandBufferImpl* commandBuffer); + + void endEncodingImpl() { m_isOpen = false; } + + Result bindPipelineImpl(IPipelineState* pipelineState, IShaderObject** outRootObject); + + Result bindPipelineWithRootObjectImpl(IPipelineState* pipelineState, IShaderObject* rootObject); + + /// Specializes the pipeline according to current root-object argument values, + /// applys the root object bindings and binds the pipeline state. + /// The newly specialized pipeline is held alive by the pipeline cache so users of + /// `newPipeline` do not need to maintain its lifespan. + Result _bindRenderState(Submitter* submitter, RefPtr& newPipeline); +}; + +struct DescriptorTable +{ + DescriptorHeapReference m_heap; + uint32_t m_offset = 0; + uint32_t m_count = 0; + + SLANG_FORCE_INLINE uint32_t getDescriptorCount() const { return m_count; } + + /// Get the GPU handle at the specified index + SLANG_FORCE_INLINE D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(uint32_t index = 0) const + { + SLANG_ASSERT(index < getDescriptorCount()); + return m_heap.getGpuHandle(m_offset + index); + } + + /// Get the CPU handle at the specified index + SLANG_FORCE_INLINE D3D12_CPU_DESCRIPTOR_HANDLE getCpuHandle(uint32_t index = 0) const + { + SLANG_ASSERT(index < getDescriptorCount()); + return m_heap.getCpuHandle(m_offset + index); + } + + void freeIfSupported() + { + if (m_count) + { + m_heap.freeIfSupported(m_offset, m_count); + m_offset = 0; + m_count = 0; + } + } + + bool allocate(uint32_t count) + { + auto allocatedOffset = m_heap.allocate(count); + if (allocatedOffset == -1) + return false; + m_offset = allocatedOffset; + m_count = count; + return true; + } + + bool allocate(DescriptorHeapReference heap, uint32_t count) + { + auto allocatedOffset = heap.allocate(count); + if (allocatedOffset == -1) + return false; + m_heap = heap; + m_offset = allocatedOffset; + m_count = count; + return true; + } +}; + +/// Contextual data and operations required when binding shader objects to the pipeline state +struct BindingContext +{ + PipelineCommandEncoder* encoder; + Submitter* submitter; + TransientResourceHeapImpl* transientHeap; + DeviceImpl* device; + D3D12_DESCRIPTOR_HEAP_TYPE + outOfMemoryHeap; // The type of descriptor heap that is OOM during binding. +}; + +/// A representation of the offset at which to bind a shader parameter or sub-object +struct BindingOffset +{ + // Note: When we actually bind a shader object to the pipeline we do not care about + // HLSL-specific notions like `t` registers and `space`s. Those concepts are all + // mediated by the root signature. + // + // Instead, we need to consider the offsets at which the object will be bound + // into the actual D3D12 API state, which consists of the index of the current + // root parameter to bind from, as well as indices into the current descriptor + // tables (for resource views and samplers). + + uint32_t rootParam = 0; + uint32_t resource = 0; + uint32_t sampler = 0; + + void operator+=(BindingOffset const& offset) + { + rootParam += offset.rootParam; + resource += offset.resource; + sampler += offset.sampler; + } +}; + +/// A reprsentation of an allocated descriptor set, consisting of an option resource table and +/// an optional sampler table +struct DescriptorSet +{ + DescriptorTable resourceTable; + DescriptorTable samplerTable; + + void freeIfSupported() + { + resourceTable.freeIfSupported(); + samplerTable.freeIfSupported(); + } +}; + +// Provides information on how binding ranges are stored in descriptor tables for +// a shader object. +// We allocate one CPU descriptor table for each descriptor heap type for the shader +// object. In `ShaderObjectLayoutImpl`, we store the offset into the descriptor tables +// for each binding, so we know where to write the descriptor when the user sets +// a resource or sampler binding. +class ShaderObjectLayoutImpl : public ShaderObjectLayoutBase +{ +public: + /// Information about a single logical binding range + struct BindingRangeInfo + { + // Some of the information we store on binding ranges is redundant with + // the information that Slang's reflection information stores, but having + // it here can make the code more compact and obvious. + + /// The type of binding in this range. + slang::BindingType bindingType; + + /// The shape of the resource + SlangResourceShape resourceShape; + + /// The number of distinct bindings in this range. + uint32_t count; + + /// A "flat" index for this range in whatever array provides backing storage for it + uint32_t baseIndex; + + /// An index into the sub-object array if this binding range is treated + /// as a sub-object. + uint32_t subObjectIndex; + + bool isRootParameter; + }; + + /// Offset information for a sub-object range + struct SubObjectRangeOffset : BindingOffset + { + SubObjectRangeOffset() {} + + SubObjectRangeOffset(slang::VariableLayoutReflection* varLayout); + + /// The offset for "pending" ordinary data related to this range + uint32_t pendingOrdinaryData = 0; + }; + + /// Stride information for a sub-object range + struct SubObjectRangeStride : BindingOffset + { + SubObjectRangeStride() {} + + SubObjectRangeStride(slang::TypeLayoutReflection* typeLayout); + + /// The strid for "pending" ordinary data related to this range + uint32_t pendingOrdinaryData = 0; + }; + + /// Information about a sub-objecrt range + struct SubObjectRangeInfo + { + /// The index of the binding range corresponding to this sub-object range + Index bindingRangeIndex = 0; + + /// Layout information for the type of sub-object expected to be bound, if known + RefPtr layout; + + /// The offset to use when binding the first object in this range + SubObjectRangeOffset offset; + + /// Stride between consecutive objects in this range + SubObjectRangeStride stride; + }; + + struct RootParameterInfo + { + IResourceView::Type type; + }; + + static bool isBindingRangeRootParameter( + SlangSession* globalSession, + const char* rootParameterAttributeName, + slang::TypeLayoutReflection* typeLayout, + Index bindingRangeIndex); + + struct Builder + { + public: + Builder(RendererBase* renderer) + : m_renderer(renderer) + {} + + RendererBase* m_renderer; + slang::TypeLayoutReflection* m_elementTypeLayout; + List m_bindingRanges; + List m_subObjectRanges; + List m_rootParamsInfo; + + /// The number of sub-objects (not just sub-object *ranges*) stored in instances of this + /// layout + uint32_t m_subObjectCount = 0; + + /// Counters for the number of root parameters, resources, and samplers in this object + /// itself + BindingOffset m_ownCounts; + + /// Counters for the number of root parameters, resources, and sampler in this object + /// and transitive sub-objects + BindingOffset m_totalCounts; + + /// The number of root parameter consumed by (transitive) sub-objects + uint32_t m_childRootParameterCount = 0; + + /// The total size in bytes of the ordinary data for this object and transitive + /// sub-object. + uint32_t m_totalOrdinaryDataSize = 0; + + /// The container type of this shader object. When `m_containerType` is + /// `StructuredBuffer` or `UnsizedArray`, this shader object represents a collection + /// instead of a single object. + ShaderObjectContainerType m_containerType = ShaderObjectContainerType::None; + + Result setElementTypeLayout(slang::TypeLayoutReflection* typeLayout); + + Result build(ShaderObjectLayoutImpl** outLayout); + }; + + static Result createForElementType( + RendererBase* renderer, + slang::TypeLayoutReflection* elementType, + ShaderObjectLayoutImpl** outLayout); + + List const& getBindingRanges() { return m_bindingRanges; } + + Index getBindingRangeCount() { return m_bindingRanges.getCount(); } + + BindingRangeInfo const& getBindingRange(Index index) { return m_bindingRanges[index]; } + + uint32_t getResourceSlotCount() { return m_ownCounts.resource; } + uint32_t getSamplerSlotCount() { return m_ownCounts.sampler; } + Index getSubObjectSlotCount() { return m_subObjectCount; } + Index getSubObjectCount() { return m_subObjectCount; } + + uint32_t getTotalResourceDescriptorCount() { return m_totalCounts.resource; } + uint32_t getTotalSamplerDescriptorCount() { return m_totalCounts.sampler; } + + uint32_t getOrdinaryDataBufferCount() { return m_totalOrdinaryDataSize ? 1 : 0; } + bool hasOrdinaryDataBuffer() { return m_totalOrdinaryDataSize != 0; } + + uint32_t getTotalResourceDescriptorCountWithoutOrdinaryDataBuffer() + { + return m_totalCounts.resource - getOrdinaryDataBufferCount(); + } + + uint32_t getOwnUserRootParameterCount() { return (uint32_t)m_rootParamsInfo.getCount(); } + uint32_t getTotalRootTableParameterCount() { return m_totalCounts.rootParam; } + uint32_t getChildRootParameterCount() { return m_childRootParameterCount; } + + uint32_t getTotalOrdinaryDataSize() const { return m_totalOrdinaryDataSize; } + + SubObjectRangeInfo const& getSubObjectRange(Index index) { return m_subObjectRanges[index]; } + List const& getSubObjectRanges() { return m_subObjectRanges; } + + RendererBase* getRenderer() { return m_renderer; } + + slang::TypeReflection* getType() { return m_elementTypeLayout->getType(); } + + const RootParameterInfo& getRootParameterInfo(Index index) { return m_rootParamsInfo[index]; } + +protected: + Result init(Builder* builder); + + List m_bindingRanges; + List m_subObjectRanges; + List m_rootParamsInfo; + + BindingOffset m_ownCounts; + BindingOffset m_totalCounts; + + uint32_t m_subObjectCount = 0; + uint32_t m_childRootParameterCount = 0; + + uint32_t m_totalOrdinaryDataSize = 0; +}; + +class RootShaderObjectLayoutImpl : public ShaderObjectLayoutImpl +{ + typedef ShaderObjectLayoutImpl Super; + +public: + struct EntryPointInfo + { + RefPtr layout; + BindingOffset offset; + }; + + struct Builder : Super::Builder + { + Builder( + RendererBase* renderer, + slang::IComponentType* program, + slang::ProgramLayout* programLayout) + : Super::Builder(renderer) + , m_program(program) + , m_programLayout(programLayout) + {} + + Result build(RootShaderObjectLayoutImpl** outLayout); + + void addGlobalParams(slang::VariableLayoutReflection* globalsLayout); + + void addEntryPoint(SlangStage stage, ShaderObjectLayoutImpl* entryPointLayout); + + slang::IComponentType* m_program; + slang::ProgramLayout* m_programLayout; + List m_entryPoints; + }; + + EntryPointInfo& getEntryPoint(Index index) { return m_entryPoints[index]; } + + List& getEntryPoints() { return m_entryPoints; } + + struct DescriptorSetLayout + { + List m_resourceRanges; + List m_samplerRanges; + uint32_t m_resourceCount = 0; + uint32_t m_samplerCount = 0; + }; + + struct RootSignatureDescBuilder + { + DeviceImpl* m_device; + + RootSignatureDescBuilder(DeviceImpl* device) + : m_device(device) + {} + + // We will use one descriptor set for the global scope and one additional + // descriptor set for each `ParameterBlock` binding range in the shader object + // hierarchy, regardless of the shader's `space` indices. + List m_descriptorSets; + List m_rootParameters; + D3D12_ROOT_SIGNATURE_DESC m_rootSignatureDesc = {}; + + static Result translateDescriptorRangeType( + slang::BindingType c, D3D12_DESCRIPTOR_RANGE_TYPE* outType); + + /// Stores offset information to apply to the reflected register/space for a descriptor + /// range. + /// + struct BindingRegisterOffset + { + uint32_t spaceOffset = 0; // The `space` index as specified in shader. + + enum + { + kRangeTypeCount = 4 + }; + + /// An offset to apply for each D3D12 register class, as given + /// by a `D3D12_DESCRIPTOR_RANGE_TYPE`. + /// + /// Note that the `D3D12_DESCRIPTOR_RANGE_TYPE` enumeration has + /// values between 0 and 3, inclusive. + /// + uint32_t offsetForRangeType[kRangeTypeCount] = {0, 0, 0, 0}; + + uint32_t& operator[](D3D12_DESCRIPTOR_RANGE_TYPE type) + { + return offsetForRangeType[int(type)]; + } + + uint32_t operator[](D3D12_DESCRIPTOR_RANGE_TYPE type) const + { + return offsetForRangeType[int(type)]; + } + + BindingRegisterOffset() {} + + BindingRegisterOffset(slang::VariableLayoutReflection* varLayout) + { + if (varLayout) + { + spaceOffset = + (UINT)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_REGISTER_SPACE); + offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_CBV] = + (UINT)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER); + offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_SRV] = + (UINT)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE); + offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_UAV] = + (UINT)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_UNORDERED_ACCESS); + offsetForRangeType[D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER] = + (UINT)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_SAMPLER_STATE); + } + } + + void operator+=(BindingRegisterOffset const& other) + { + spaceOffset += other.spaceOffset; + for (int i = 0; i < kRangeTypeCount; ++i) + { + offsetForRangeType[i] += other.offsetForRangeType[i]; + } + } + }; + + struct BindingRegisterOffsetPair + { + BindingRegisterOffset primary; + BindingRegisterOffset pending; + + BindingRegisterOffsetPair() {} + + BindingRegisterOffsetPair(slang::VariableLayoutReflection* varLayout) + : primary(varLayout) + , pending(varLayout->getPendingDataLayout()) + {} + + void operator+=(BindingRegisterOffsetPair const& other) + { + primary += other.primary; + pending += other.pending; + } + }; + /// Add a new descriptor set to the layout being computed. + /// + /// Note that a "descriptor set" in the layout may amount to + /// zero, one, or two different descriptor *tables* in the + /// final D3D12 root signature. Each descriptor set may + /// contain zero or more view ranges (CBV/SRV/UAV) and zero + /// or more sampler ranges. It maps to a view descriptor table + /// if the number of view ranges is non-zero and to a sampler + /// descriptor table if the number of sampler ranges is non-zero. + /// + uint32_t addDescriptorSet(); + + Result addDescriptorRange( + Index physicalDescriptorSetIndex, + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT registerIndex, + UINT spaceIndex, + UINT count, + bool isRootParameter); + /// Add one descriptor range as specified in Slang reflection information to the layout. + /// + /// The layout information is taken from `typeLayout` for the descriptor + /// range with the given `descriptorRangeIndex` within the logical + /// descriptor set (reflected by Slang) with the given `logicalDescriptorSetIndex`. + /// + /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of + /// the descriptor set that the range should be added to. + /// + /// The `offset` encodes information about space and/or register offsets that + /// should be applied to descrptor ranges. + /// + /// This operation can fail if the given descriptor range encodes a range that + /// doesn't map to anything directly supported by D3D12. Higher-level routines + /// will often want to ignore such failures. + /// + Result addDescriptorRange( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffset const& containerOffset, + BindingRegisterOffset const& elementOffset, + Index logicalDescriptorSetIndex, + Index descriptorRangeIndex, + bool isRootParameter); + + /// Add one binding range to the computed layout. + /// + /// The layout information is taken from `typeLayout` for the binding + /// range with the given `bindingRangeIndex`. + /// + /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of + /// the descriptor set that the range should be added to. + /// + /// The `offset` encodes information about space and/or register offsets that + /// should be applied to descrptor ranges. + /// + /// Note that a single binding range may encompass zero or more descriptor ranges. + /// + void addBindingRange( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffset const& containerOffset, + BindingRegisterOffset const& elementOffset, + Index bindingRangeIndex); + + void addAsValue( + slang::VariableLayoutReflection* varLayout, Index physicalDescriptorSetIndex); + + /// Add binding ranges and parameter blocks to the root signature. + /// + /// The layout information is taken from `typeLayout` which should + /// be a layout for either a program or an entry point. + /// + /// The `physicalDescriptorSetIndex` is the index in the `m_descriptorSets` array of + /// the descriptor set that binding ranges not belonging to nested + /// parameter blocks should be added to. + /// + /// The `offset` encodes information about space and/or register offsets that + /// should be applied to descrptor ranges. + /// + void addAsConstantBuffer( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffsetPair const& containerOffset, + BindingRegisterOffsetPair const& elementOffset); + + void addAsValue( + slang::TypeLayoutReflection* typeLayout, + Index physicalDescriptorSetIndex, + BindingRegisterOffsetPair const& containerOffset, + BindingRegisterOffsetPair const& elementOffset); + + D3D12_ROOT_SIGNATURE_DESC& build(); + }; + + static Result createRootSignatureFromSlang( + DeviceImpl* device, + RootShaderObjectLayoutImpl* rootLayout, + slang::IComponentType* program, + ID3D12RootSignature** outRootSignature, + ID3DBlob** outError); + + static Result create( + DeviceImpl* device, + slang::IComponentType* program, + slang::ProgramLayout* programLayout, + RootShaderObjectLayoutImpl** outLayout, + ID3DBlob** outError); + + slang::IComponentType* getSlangProgram() const { return m_program; } + slang::ProgramLayout* getSlangProgramLayout() const { return m_programLayout; } + +protected: + Result init(Builder* builder); + + ComPtr m_program; + slang::ProgramLayout* m_programLayout = nullptr; + + List m_entryPoints; + +public: + ComPtr m_rootSignature; +}; + +struct ShaderBinary +{ + SlangStage stage; + slang::EntryPointReflection* entryPointInfo; + String actualEntryPointNameInAPI; + List code; +}; + +class ShaderProgramImpl : public ShaderProgramBase +{ +public: + RefPtr m_rootObjectLayout; + List m_shaders; + + virtual Result createShaderModule( + slang::EntryPointReflection* entryPointInfo, ComPtr kernelCode) override; +}; + +class ShaderObjectImpl + : public ShaderObjectBaseImpl +{ + typedef ShaderObjectBaseImpl + Super; + +public: + static Result create( + DeviceImpl* device, ShaderObjectLayoutImpl* layout, ShaderObjectImpl** outShaderObject); + + ~ShaderObjectImpl(); + + RendererBase* getDevice() { return m_device.get(); } + + virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + getEntryPoint(UInt index, IShaderObject** outEntryPoint) override; + + virtual SLANG_NO_THROW const void* SLANG_MCALL getRawData() override; + + virtual SLANG_NO_THROW size_t SLANG_MCALL getSize() override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + setData(ShaderOffset const& inOffset, void const* data, size_t inSize) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + setObject(ShaderOffset const& offset, IShaderObject* object) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + setResource(ShaderOffset const& offset, IResourceView* resourceView) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + setSampler(ShaderOffset const& offset, ISamplerState* sampler) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL setCombinedTextureSampler( + ShaderOffset const& offset, IResourceView* textureView, ISamplerState* sampler) override; + +protected: + Result init( + DeviceImpl* device, + ShaderObjectLayoutImpl* layout, + DescriptorHeapReference viewHeap, + DescriptorHeapReference samplerHeap); + + /// Write the uniform/ordinary data of this object into the given `dest` buffer at the given + /// `offset` + Result _writeOrdinaryData( + PipelineCommandEncoder* encoder, + BufferResourceImpl* buffer, + size_t offset, + size_t destSize, + ShaderObjectLayoutImpl* specializedLayout); + + bool shouldAllocateConstantBuffer(TransientResourceHeapImpl* transientHeap); + + /// Ensure that the `m_ordinaryDataBuffer` has been created, if it is needed + Result _ensureOrdinaryDataBufferCreatedIfNeeded( + PipelineCommandEncoder* encoder, ShaderObjectLayoutImpl* specializedLayout); + +public: + void updateSubObjectsRecursive(); + /// Prepare to bind this object as a parameter block. + /// + /// This involves allocating and binding any descriptor tables necessary + /// to to store the state of the object. The function returns a descriptor + /// set formed from any table(s) allocated. In addition, the `ioOffset` + /// parameter will be adjusted to be correct for binding values into + /// the resulting descriptor set. + /// + /// Returns: + /// SLANG_OK when successful, + /// SLANG_E_OUT_OF_MEMORY when descriptor heap is full. + /// + Result prepareToBindAsParameterBlock( + BindingContext* context, + BindingOffset& ioOffset, + ShaderObjectLayoutImpl* specializedLayout, + DescriptorSet& outDescriptorSet); + + bool checkIfCachedDescriptorSetIsValidRecursive(BindingContext* context); + + /// Bind this object as a `ParameterBlock` + Result bindAsParameterBlock( + BindingContext* context, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout); + + /// Bind this object as a `ConstantBuffer` + Result bindAsConstantBuffer( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout); + + /// Bind this object as a value (for an interface-type parameter) + Result bindAsValue( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout); + + /// Shared logic for `bindAsConstantBuffer()` and `bindAsValue()` + Result _bindImpl( + BindingContext* context, + DescriptorSet const& descriptorSet, + BindingOffset const& offset, + ShaderObjectLayoutImpl* specializedLayout); + + Result bindRootArguments(BindingContext* context, uint32_t& index); + /// A CPU-memory descriptor set holding any descriptors used to represent the + /// resources/samplers in this object's state + DescriptorSet m_descriptorSet; + /// A cached descriptor set on GPU heap. + DescriptorSet m_cachedGPUDescriptorSet; + + ShortList, 8> m_boundResources; + List m_rootArguments; + /// A constant buffer used to stored ordinary data for this object + /// and existential-type sub-objects. + /// + /// Allocated from transient heap on demand with `_createOrdinaryDataBufferIfNeeded()` + IBufferResource* m_constantBufferWeakPtr = nullptr; + size_t m_constantBufferOffset = 0; + size_t m_constantBufferSize = 0; + + /// Dirty bit tracking whether the constant buffer needs to be updated. + bool m_isConstantBufferDirty = true; + /// The transient heap from which the constant buffer and descriptor set is allocated. + TransientResourceHeapImpl* m_cachedTransientHeap; + /// The version of the transient heap when the constant buffer and descriptor set is + /// allocated. + uint64_t m_cachedTransientHeapVersion; + + /// Whether this shader object is allowed to be mutable. + bool m_isMutable = false; + /// The version of a mutable shader object. + uint32_t m_version = 0; + /// The version of this mutable shader object when the gpu descriptor table is cached. + uint32_t m_cachedGPUDescriptorSetVersion = -1; + /// The versions of bound subobjects. + List m_subObjectVersions; + + /// Get the layout of this shader object with specialization arguments considered + /// + /// This operation should only be called after the shader object has been + /// fully filled in and finalized. + /// + Result getSpecializedLayout(ShaderObjectLayoutImpl** outLayout); + + /// Create the layout for this shader object with specialization arguments considered + /// + /// This operation is virtual so that it can be customized by `RootShaderObject`. + /// + virtual Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout); + + RefPtr m_specializedLayout; +}; + +class RootShaderObjectImpl : public ShaderObjectImpl +{ + typedef ShaderObjectImpl Super; + +public: + // Override default reference counting behavior to disable lifetime management via ComPtr. + // Root objects are managed by command buffer and does not need to be freed by the user. + SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 1; } + SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 1; } + +public: + RootShaderObjectLayoutImpl* getLayout(); + + virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override; + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + getEntryPoint(UInt index, IShaderObject** outEntryPoint) override; + virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + copyFrom(IShaderObject* object, ITransientResourceHeap* transientHeap) override; + +public: + Result bindAsRoot(BindingContext* context, RootShaderObjectLayoutImpl* specializedLayout); + +public: + Result init(DeviceImpl* device) { return SLANG_OK; } + + Result resetImpl( + DeviceImpl* device, + RootShaderObjectLayoutImpl* layout, + DescriptorHeapReference viewHeap, + DescriptorHeapReference samplerHeap, + bool isMutable); + + Result reset( + DeviceImpl* device, RootShaderObjectLayoutImpl* layout, TransientResourceHeapImpl* heap); + +protected: + virtual Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) override; + + List> m_entryPoints; +}; + +class MutableRootShaderObjectImpl : public RootShaderObjectImpl +{ +public: + // Override default reference counting behavior to disable lifetime management via ComPtr. + // Root objects are managed by command buffer and does not need to be freed by the user. + SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return ShaderObjectBase::addRef(); } + SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return ShaderObjectBase::release(); } +}; + +class ShaderTableImpl : public ShaderTableBase +{ +public: + uint32_t m_rayGenTableOffset; + uint32_t m_missTableOffset; + uint32_t m_hitGroupTableOffset; + + DeviceImpl* m_device; + + virtual RefPtr createDeviceBuffer( + PipelineStateBase* pipeline, + TransientResourceHeapBase* transientHeap, + IResourceCommandEncoder* encoder) override; +}; + +class ResourceCommandEncoderImpl + : public IResourceCommandEncoder + , public PipelineCommandEncoder +{ +public: + virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) override; + virtual SLANG_NO_THROW void SLANG_MCALL + uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data) override; + virtual SLANG_NO_THROW void SLANG_MCALL textureBarrier( + size_t count, + ITextureResource* const* textures, + ResourceState src, + ResourceState dst) override; + virtual SLANG_NO_THROW void SLANG_MCALL bufferBarrier( + size_t count, + IBufferResource* const* buffers, + ResourceState src, + ResourceState dst) override; + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} + virtual SLANG_NO_THROW void SLANG_MCALL + writeTimestamp(IQueryPool* pool, SlangInt index) override; + virtual SLANG_NO_THROW void SLANG_MCALL copyTexture( + ITextureResource* dst, + ResourceState dstState, + SubresourceRange dstSubresource, + ITextureResource::Offset3D dstOffset, + ITextureResource* src, + ResourceState srcState, + SubresourceRange srcSubresource, + ITextureResource::Offset3D srcOffset, + ITextureResource::Size extent) override; + + virtual SLANG_NO_THROW void SLANG_MCALL uploadTextureData( + ITextureResource* dst, + SubresourceRange subResourceRange, + ITextureResource::Offset3D offset, + ITextureResource::Size extent, + ITextureResource::SubresourceData* subResourceData, + size_t subResourceDataCount) override; + + virtual SLANG_NO_THROW void SLANG_MCALL clearResourceView( + IResourceView* view, ClearValue* clearValue, ClearResourceViewFlags::Enum flags) override; + + virtual SLANG_NO_THROW void SLANG_MCALL resolveResource( + ITextureResource* source, + ResourceState sourceState, + SubresourceRange sourceRange, + ITextureResource* dest, + ResourceState destState, + SubresourceRange destRange) override; + + virtual SLANG_NO_THROW void SLANG_MCALL resolveQuery( + IQueryPool* queryPool, + uint32_t index, + uint32_t count, + IBufferResource* buffer, + uint64_t offset) override; + + virtual SLANG_NO_THROW void SLANG_MCALL copyTextureToBuffer( + IBufferResource* dst, + size_t dstOffset, + size_t dstSize, + size_t dstRowStride, + ITextureResource* src, + ResourceState srcState, + SubresourceRange srcSubresource, + ITextureResource::Offset3D srcOffset, + ITextureResource::Size extent) override; + + virtual SLANG_NO_THROW void SLANG_MCALL textureSubresourceBarrier( + ITextureResource* texture, + SubresourceRange subresourceRange, + ResourceState src, + ResourceState dst) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + beginDebugEvent(const char* name, float rgbColor[3]) override; + virtual SLANG_NO_THROW void SLANG_MCALL endDebugEvent() override; +}; + +class ComputeCommandEncoderImpl + : public IComputeCommandEncoder + , public ResourceCommandEncoderImpl +{ +public: + SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) +public: + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override; + void init( + DeviceImpl* renderer, + TransientResourceHeapImpl* transientHeap, + CommandBufferImpl* cmdBuffer); + + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) override; + + virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + dispatchComputeIndirect(IBufferResource* argBuffer, uint64_t offset) override; +}; +class RenderCommandEncoderImpl + : public IRenderCommandEncoder + , public ResourceCommandEncoderImpl +{ +public: + SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) +public: + RefPtr m_renderPass; + RefPtr m_framebuffer; + + List m_boundVertexBuffers; + + RefPtr m_boundIndexBuffer; + + D3D12_VIEWPORT m_viewports[kMaxRTVCount]; + D3D12_RECT m_scissorRects[kMaxRTVCount]; + + DXGI_FORMAT m_boundIndexFormat; + UINT m_boundIndexOffset; + + D3D12_PRIMITIVE_TOPOLOGY_TYPE m_primitiveTopologyType; + D3D12_PRIMITIVE_TOPOLOGY m_primitiveTopology; + + void init( + DeviceImpl* renderer, + TransientResourceHeapImpl* transientHeap, + CommandBufferImpl* cmdBuffer, + RenderPassLayoutImpl* renderPass, + FramebufferImpl* framebuffer); + + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + setViewports(uint32_t count, const Viewport* viewports) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + setScissorRects(uint32_t count, const ScissorRect* rects) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + setPrimitiveTopology(PrimitiveTopology topology) override; + + virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( + uint32_t startSlot, + uint32_t slotCount, + IBufferResource* const* buffers, + const uint32_t* offsets) override; + + virtual SLANG_NO_THROW void SLANG_MCALL + setIndexBuffer(IBufferResource* buffer, Format indexFormat, uint32_t offset = 0) override; + + void prepareDraw(); + virtual SLANG_NO_THROW void SLANG_MCALL + draw(uint32_t vertexCount, uint32_t startVertex = 0) override; + virtual SLANG_NO_THROW void SLANG_MCALL + drawIndexed(uint32_t indexCount, uint32_t startIndex = 0, uint32_t baseVertex = 0) override; + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override; + + virtual SLANG_NO_THROW void SLANG_MCALL setStencilReference(uint32_t referenceValue) override; + + virtual SLANG_NO_THROW void SLANG_MCALL drawIndirect( + uint32_t maxDrawCount, + IBufferResource* argBuffer, + uint64_t argOffset, + IBufferResource* countBuffer, + uint64_t countOffset) override; + + virtual SLANG_NO_THROW void SLANG_MCALL drawIndexedIndirect( + uint32_t maxDrawCount, + IBufferResource* argBuffer, + uint64_t argOffset, + IBufferResource* countBuffer, + uint64_t countOffset) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL setSamplePositions( + uint32_t samplesPerPixel, + uint32_t pixelCount, + const SamplePosition* samplePositions) override; + + virtual SLANG_NO_THROW void SLANG_MCALL drawInstanced( + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t startVertex, + uint32_t startInstanceLocation) override; + + virtual SLANG_NO_THROW void SLANG_MCALL drawIndexedInstanced( + uint32_t indexCount, + uint32_t instanceCount, + uint32_t startIndexLocation, + int32_t baseVertexLocation, + uint32_t startInstanceLocation) override; +}; + +#if SLANG_GFX_HAS_DXR_SUPPORT +class RayTracingCommandEncoderImpl + : public IRayTracingCommandEncoder + , public ResourceCommandEncoderImpl +{ +public: + SLANG_GFX_FORWARD_RESOURCE_COMMAND_ENCODER_IMPL(ResourceCommandEncoderImpl) +public: + virtual SLANG_NO_THROW void SLANG_MCALL buildAccelerationStructure( + const IAccelerationStructure::BuildDesc& desc, + int propertyQueryCount, + AccelerationStructureQueryDesc* queryDescs) override; + virtual SLANG_NO_THROW void SLANG_MCALL copyAccelerationStructure( + IAccelerationStructure* dest, + IAccelerationStructure* src, + AccelerationStructureCopyMode mode) override; + virtual SLANG_NO_THROW void SLANG_MCALL queryAccelerationStructureProperties( + int accelerationStructureCount, + IAccelerationStructure* const* accelerationStructures, + int queryCount, + AccelerationStructureQueryDesc* queryDescs) override; + virtual SLANG_NO_THROW void SLANG_MCALL + serializeAccelerationStructure(DeviceAddress dest, IAccelerationStructure* source) override; + virtual SLANG_NO_THROW void SLANG_MCALL deserializeAccelerationStructure( + IAccelerationStructure* dest, DeviceAddress source) override; + virtual SLANG_NO_THROW void SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) override + { + return bindPipelineWithRootObjectImpl(state, rootObject); + } + virtual SLANG_NO_THROW void SLANG_MCALL dispatchRays( + uint32_t rayGenShaderIndex, + IShaderTable* shaderTable, + int32_t width, + int32_t height, + int32_t depth) override; + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} +}; +#endif + +class CommandBufferImpl + : public ICommandBuffer + , public ComObject +{ +public: + // There are a pair of cyclic references between a `TransientResourceHeap` and + // a `CommandBuffer` created from the heap. We need to break the cycle upon + // the public reference count of a command buffer dropping to 0. + SLANG_COM_OBJECT_IUNKNOWN_ALL + + ICommandBuffer* getInterface(const Guid& guid); + virtual void comFree() override { m_transientHeap.breakStrongReference(); } + + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* handle) override; + +public: + ComPtr m_cmdList; + ComPtr m_cmdList1; + ComPtr m_cmdList4; + + BreakableReference m_transientHeap; + // Weak reference is fine here since `m_transientHeap` already holds strong reference to + // device. + DeviceImpl* m_renderer; + RootShaderObjectImpl m_rootShaderObject; + RefPtr m_mutableRootShaderObject; + bool m_descriptorHeapsBound = false; + + void bindDescriptorHeaps(); + + void invalidateDescriptorHeapBinding() { m_descriptorHeapsBound = false; } + + void reinit(); + + void init( + DeviceImpl* renderer, + ID3D12GraphicsCommandList* d3dCommandList, + TransientResourceHeapImpl* transientHeap); + + ResourceCommandEncoderImpl m_resourceCommandEncoder; + + virtual SLANG_NO_THROW void SLANG_MCALL + encodeResourceCommands(IResourceCommandEncoder** outEncoder) override; + + RenderCommandEncoderImpl m_renderCommandEncoder; + virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( + IRenderPassLayout* renderPass, + IFramebuffer* framebuffer, + IRenderCommandEncoder** outEncoder) override; + + ComputeCommandEncoderImpl m_computeCommandEncoder; + virtual SLANG_NO_THROW void SLANG_MCALL + encodeComputeCommands(IComputeCommandEncoder** outEncoder) override; + +#if SLANG_GFX_HAS_DXR_SUPPORT + RayTracingCommandEncoderImpl m_rayTracingCommandEncoder; +#endif + virtual SLANG_NO_THROW void SLANG_MCALL + encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) override; + virtual SLANG_NO_THROW void SLANG_MCALL close() override; +}; + +class FenceImpl : public FenceBase +{ +public: + ComPtr m_fence; + HANDLE m_waitEvent = 0; + + ~FenceImpl(); + + HANDLE getWaitEvent(); + + Result init(DeviceImpl* device, const IFence::Desc& desc); + + virtual SLANG_NO_THROW Result SLANG_MCALL getCurrentValue(uint64_t* outValue) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL setCurrentValue(uint64_t value) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL + getNativeHandle(InteropHandle* outNativeHandle) override; +}; + +class CommandQueueImpl + : public ICommandQueue + , public ComObject +{ +public: + SLANG_COM_OBJECT_IUNKNOWN_ALL + ICommandQueue* getInterface(const Guid& guid); + void breakStrongReferenceToDevice() { m_renderer.breakStrongReference(); } + + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* handle) override; + +public: + BreakableReference m_renderer; + ComPtr m_device; + ComPtr m_d3dQueue; + ComPtr m_fence; + uint64_t m_fenceValue = 0; + HANDLE globalWaitHandle; + Desc m_desc; + uint32_t m_queueIndex = 0; + + Result init(DeviceImpl* device, uint32_t queueIndex); + ~CommandQueueImpl(); + virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override; + + virtual SLANG_NO_THROW void SLANG_MCALL executeCommandBuffers( + uint32_t count, + ICommandBuffer* const* commandBuffers, + IFence* fence, + uint64_t valueToSignal) override; + + virtual SLANG_NO_THROW void SLANG_MCALL waitOnHost() override; + + virtual SLANG_NO_THROW Result SLANG_MCALL waitForFenceValuesOnDevice( + uint32_t fenceCount, IFence** fences, uint64_t* waitValues) override; +}; + +class SwapchainImpl : public D3DSwapchainBase +{ +public: + ComPtr m_queue; + ComPtr m_dxgiFactory; + ComPtr m_swapChain3; + ComPtr m_fence; + ShortList m_frameEvents; + uint64_t fenceValue = 0; + Result init(DeviceImpl* renderer, const ISwapchain::Desc& swapchainDesc, WindowHandle window); + + virtual SLANG_NO_THROW Result SLANG_MCALL resize(uint32_t width, uint32_t height) override; + + virtual void createSwapchainBufferImages() override; + virtual IDXGIFactory* getDXGIFactory() override { return m_dxgiFactory; } + virtual IUnknown* getOwningDevice() override { return m_queue; } + virtual SLANG_NO_THROW int SLANG_MCALL acquireNextImage() override; + virtual SLANG_NO_THROW Result SLANG_MCALL present() override; + virtual SLANG_NO_THROW bool SLANG_MCALL isOccluded() override; + virtual SLANG_NO_THROW Result SLANG_MCALL setFullScreenMode(bool mode) override; +}; + +#if SLANG_GFX_HAS_DXR_SUPPORT + +class AccelerationStructureImpl + : public AccelerationStructureBase + , public ResourceViewInternalImpl +{ +public: + RefPtr m_buffer; + uint64_t m_offset; + uint64_t m_size; + ComPtr m_device5; + +public: + virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() override; + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) override; +}; -SlangResult SLANG_MCALL createD3D12Device(const IDevice::Desc* desc, IDevice** outDevice); +#endif -} // gfx +} // namespace d3d12 +} // namespace gfx diff --git a/tools/gfx/render.cpp b/tools/gfx/render.cpp index 2125ed47f..3cb19f205 100644 --- a/tools/gfx/render.cpp +++ b/tools/gfx/render.cpp @@ -1,9 +1,7 @@ // render.cpp #include "renderer-shared.h" #include "../../source/core/slang-math.h" - #include "d3d11/render-d3d11.h" -#include "d3d12/render-d3d12.h" #include "open-gl/render-gl.h" #include "vulkan/render-vk.h" #include "cuda/render-cuda.h" @@ -15,6 +13,8 @@ namespace gfx { using namespace Slang; +Result SLANG_MCALL createD3D12Device(const IDevice::Desc* desc, IDevice** outDevice); + static bool debugLayerEnabled = false; /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Global Renderer Functions !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ diff --git a/tools/gfx/renderer-shared.h b/tools/gfx/renderer-shared.h index 5dc4da59a..24d51c87a 100644 --- a/tools/gfx/renderer-shared.h +++ b/tools/gfx/renderer-shared.h @@ -1327,13 +1327,13 @@ protected: virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc); protected: Slang::List m_features; - Slang::ComPtr m_pipelineCreationAPIDispatcher; public: SlangContext slangContext; ShaderCache shaderCache; Slang::Dictionary> m_shaderObjectLayoutCache; + Slang::ComPtr m_pipelineCreationAPIDispatcher; }; bool isDepthFormat(Format format); diff --git a/tools/gfx/vulkan/render-vk.cpp b/tools/gfx/vulkan/render-vk.cpp index 5f3c8f8df..6f4bbb2e5 100644 --- a/tools/gfx/vulkan/render-vk.cpp +++ b/tools/gfx/vulkan/render-vk.cpp @@ -2942,7 +2942,7 @@ public: pipeline = VK_NULL_HANDLE; } - static void _uploadBufferData( + static void uploadBufferData( VkCommandBuffer commandBuffer, TransientResourceHeapImpl* transientHeap, BufferResourceImpl* buffer, @@ -2986,7 +2986,7 @@ public: void uploadBufferDataImpl(IBufferResource* buffer, size_t offset, size_t size, void* data) { m_vkPreCommandBuffer = m_commandBuffer->getPreCommandBuffer(); - _uploadBufferData( + uploadBufferData( m_vkPreCommandBuffer, m_commandBuffer->m_transientHeap.get(), static_cast(buffer), @@ -4816,7 +4816,7 @@ public: virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferData( IBufferResource* buffer, size_t offset, size_t size, void* data) override { - PipelineCommandEncoder::_uploadBufferData( + PipelineCommandEncoder::uploadBufferData( m_commandBuffer->m_commandBuffer, m_commandBuffer->m_transientHeap.get(), static_cast(buffer), -- cgit v1.2.3