From d0f7b7f0ed1d0d1388ce944cd1ad906bbd9afb35 Mon Sep 17 00:00:00 2001 From: Yong He Date: Mon, 22 Mar 2021 16:33:51 -0700 Subject: `gfx` D3D12 shader objects rewrite. (#1763) --- examples/hello-world/main.cpp | 1 - examples/shader-object/main.cpp | 1 - examples/shader-toy/main.cpp | 1 - tools/gfx/d3d12/render-d3d12.cpp | 6426 +++++++++++++++++++++----------------- 4 files changed, 3568 insertions(+), 2861 deletions(-) diff --git a/examples/hello-world/main.cpp b/examples/hello-world/main.cpp index c4bce7f79..b3bee9c07 100644 --- a/examples/hello-world/main.cpp +++ b/examples/hello-world/main.cpp @@ -244,7 +244,6 @@ Slang::Result initialize() // platforms/APIs. // IDevice::Desc deviceDesc = {}; - deviceDesc.deviceType = DeviceType::DirectX11; gfx::Result res = gfxCreateDevice(&deviceDesc, gDevice.writeRef()); if(SLANG_FAILED(res)) return res; diff --git a/examples/shader-object/main.cpp b/examples/shader-object/main.cpp index 796429c16..aaafc010a 100644 --- a/examples/shader-object/main.cpp +++ b/examples/shader-object/main.cpp @@ -136,7 +136,6 @@ int main() // interacting with the graphics API. Slang::ComPtr device; IDevice::Desc deviceDesc = {}; - deviceDesc.deviceType = DeviceType::DirectX11; SLANG_RETURN_ON_FAIL(gfxCreateDevice(&deviceDesc, device.writeRef())); // Now we can load the shader code. diff --git a/examples/shader-toy/main.cpp b/examples/shader-toy/main.cpp index 8e377b42f..c4424b294 100644 --- a/examples/shader-toy/main.cpp +++ b/examples/shader-toy/main.cpp @@ -315,7 +315,6 @@ Result initialize() gWindow->events.sizeChanged = Slang::Action<>(this, &ShaderToyApp::windowSizeChanged); IDevice::Desc deviceDesc; - deviceDesc.deviceType = DeviceType::Vulkan; Result res = gfxCreateDevice(&deviceDesc, gDevice.writeRef()); if(SLANG_FAILED(res)) return res; diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index 52e9b33a6..e0bfea838 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -5,7 +5,6 @@ //WORKING:#include "options.h" #include "../renderer-shared.h" -#include "../render-graphics-common.h" #include "../simple-render-pass-layout.h" #include "../d3d/d3d-swapchain.h" #include "core/slang-blob.h" @@ -66,7 +65,7 @@ using namespace Slang; static D3D12_RESOURCE_STATES _calcResourceState(IResource::Usage usage); -class D3D12Device : public GraphicsAPIRenderer +class D3D12Device : public RendererBase { public: // Renderer implementation @@ -114,13 +113,39 @@ public: IInputLayout** outLayout) override; virtual SLANG_NO_THROW Result SLANG_MCALL createDescriptorSetLayout( - const IDescriptorSetLayout::Desc& desc, IDescriptorSetLayout** outLayout) override; + const IDescriptorSetLayout::Desc& desc, + IDescriptorSetLayout** outLayout) override + { + SLANG_UNUSED(desc); + SLANG_UNUSED(outLayout); + return SLANG_FAIL; + } virtual SLANG_NO_THROW Result SLANG_MCALL createPipelineLayout( - const IPipelineLayout::Desc& desc, IPipelineLayout** outLayout) override; + const IPipelineLayout::Desc& desc, + IPipelineLayout** outLayout) override + { + SLANG_UNUSED(desc); + SLANG_UNUSED(outLayout); + return SLANG_FAIL; + } virtual SLANG_NO_THROW Result SLANG_MCALL createDescriptorSet( IDescriptorSetLayout* layout, IDescriptorSet::Flag::Enum flag, - IDescriptorSet** outDescriptorSet) override; + IDescriptorSet** outDescriptorSet) override + { + SLANG_UNUSED(layout); + SLANG_UNUSED(flag); + SLANG_UNUSED(outDescriptorSet); + return SLANG_FAIL; + } + + virtual Result createShaderObjectLayout( + slang::TypeLayoutReflection* typeLayout, + ShaderObjectLayoutBase** outLayout) override; + virtual Result createShaderObject(ShaderObjectLayoutBase* layout, IShaderObject** outObject) + override; + virtual SLANG_NO_THROW Result SLANG_MCALL + createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override; virtual SLANG_NO_THROW Result SLANG_MCALL createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) override; @@ -149,7 +174,7 @@ public: ~D3D12Device(); -protected: +public: static const Int kMaxNumRenderFrames = 4; static const Int kMaxNumRenderTargets = 3; @@ -208,15 +233,6 @@ protected: D3D12DescriptorHeap m_samplerHeap; ///< Heap for samplers }; - class ShaderProgramImpl : public GraphicsCommonShaderProgram - { - public: - PipelineType m_pipelineType; - List m_vertexShader; - List m_pixelShader; - List m_computeShader; - }; - class BufferResourceImpl: public gfx::BufferResource { public: @@ -271,7 +287,7 @@ protected: D3D12Device* m_renderer; ~SamplerStateImpl() { - m_renderer->m_samplerAllocator.free(m_descriptor); + m_renderer->m_cpuSamplerHeap.free(m_descriptor); } }; @@ -366,195 +382,6 @@ protected: List m_text; ///< Holds all strings to keep in scope }; - class DescriptorSetLayoutImpl : public IDescriptorSetLayout, public RefObject - { - public: - SLANG_REF_OBJECT_IUNKNOWN_ALL - IDescriptorSetLayout* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IDescriptorSetLayout) - return static_cast(this); - return nullptr; - } - public: - // A "descriptor set" at the level of the `Renderer` API - // is similar to a D3D12 "descriptor table," but the match - // isn't perfect for a few reasons: - // - // * Our descriptor sets can contain both resources and - // samplers, while D3D12 descriptor tables are always - // resource-only or sampler-only. - // - // * Our descriptor sets can include root constant ranges, - // while under D3D12 a root constant range is thought - // of as belonging to the root signature directly. - // - // We navigate this mismatch in our implementation with - // the idea that a single `Renderer`-level descriptor set - // maps to zero or more D3D12 root parameters, which can - // include: - // - // * Zero or one root parameter that is used to bind a - // descriptor table of resources. - // - // * Zero or one root parameter that is used to bind a - // descriptor table of samplers. - // - // * Zero or more root parameters that represent ranges - // of root constants. - // - // Binding a descriptor set will band all of its associated - // root parameters. - // - // (Note: this representation could in theory be extended - // to also support root resources that are not table-bound) - // - // Each descriptor slot range in the original `Desc` maps - // to a single `RangeInfo` stored here, which captures - // derived information used when binding values into - // a descriptor table. - // - struct RangeInfo - { - /// The type of descriptor slot in the original `Desc` - DescriptorSlotType type; - - /// The number of slots in this range - Int count; - - /// The start index of this range in the appropriate type-specific array. - /// - /// E.g., for a sampler slot range, this would be the start index - /// for the range in the descriptor table used to store all the samplers. - Int arrayIndex; - }; - List m_ranges; - - // We need to track additional information about - // root cosntant ranges that isn't captured in - // `RangeInfo`, so we store an additional array - // that just captures the root constant ranges. - // - struct RootConstantRangeInfo - { - /// The D3D12 "root parameter index" for this range - Int rootParamIndex; - - /// The size in bytes of this range - Int size; - - /// The byte offset of this range's data in the backing storage for a descriptor set - Int offset; - }; - List m_rootConstantRanges; - - /// The total size (in bytes) of root constant data across all contained ranged. - Int m_rootConstantDataSize = 0; - - /// The D3D12-format descriptions of the descriptor ranges in this set - List m_dxRanges; - - /// The D3D12-format description of the root parameters introduced by this set - List m_dxRootParameters; - - /// How many resource slots (total) were introduced by ranges? - Int m_resourceCount; - - /// How many sampler slots (total) were introduce by ranges? - Int m_samplerCount; - }; - - class PipelineLayoutImpl : public IPipelineLayout, public RefObject - { - public: - SLANG_REF_OBJECT_IUNKNOWN_ALL - IPipelineLayout* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IPipelineLayout) - { - return static_cast(this); - } - return nullptr; - } - public: - ComPtr m_rootSignature; - UInt m_descriptorSetCount; - }; - - class DescriptorSetImpl : public IDescriptorSet, public RefObject - { - public: - SLANG_REF_OBJECT_IUNKNOWN_ALL - IDescriptorSet* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IDescriptorSet) - return static_cast(this); - return nullptr; - } - public: - virtual SLANG_NO_THROW void SLANG_MCALL - setConstantBuffer(UInt range, UInt index, IBufferResource* buffer) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setResource(UInt range, UInt index, IResourceView* view) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setSampler(UInt range, UInt index, ISamplerState* sampler) override; - virtual SLANG_NO_THROW void SLANG_MCALL setCombinedTextureSampler( - UInt range, - UInt index, - IResourceView* textureView, - ISamplerState* sampler) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setRootConstants( - UInt range, - UInt offset, - UInt size, - void const* data) override; - - D3D12Device* m_renderer = nullptr; ///< Weak pointer - must be because if set on Renderer, will have a circular reference - RefPtr m_layout; - - D3D12HostVisibleDescriptorAllocator* m_resourceHeap = nullptr; - D3D12HostVisibleDescriptorAllocator* m_samplerHeap = nullptr; - - Int m_resourceTable = 0; - Int m_samplerTable = 0; - - // The following arrays are used to retain the relevant - // objects so that they will not be released while this - // descriptor-set is still alive. - // - // For the `m_resourceObjects` array, the values are either - // the relevant `ResourceViewImpl` for SRV/UAV slots, or - // a `BufferResourceImpl` for a CBV slot. - // - List> m_resourceObjects; - List> m_samplerObjects; - - /// Backing storage for root constant ranges in this descriptor set. - List m_rootConstantData; - - ~DescriptorSetImpl() - { - if (m_layout->m_resourceCount) - m_resourceHeap->free((int)m_resourceTable, (int)m_layout->m_resourceCount); - if (m_layout->m_samplerCount) - m_samplerHeap->free((int)m_samplerTable, (int)m_layout->m_samplerCount); - } - }; - - D3D12HostVisibleDescriptorAllocator m_rtvAllocator; - D3D12HostVisibleDescriptorAllocator m_dsvAllocator; - - D3D12HostVisibleDescriptorAllocator m_viewAllocator; - D3D12HostVisibleDescriptorAllocator m_samplerAllocator; - - // Space in the GPU-visible heaps is precious, so we will also keep - // around CPU-visible heaps for storing descriptors in a format - // that is ready for copying into the GPU-visible heaps as needed. - // - D3D12HostVisibleDescriptorAllocator m_cpuViewHeap; ///< Cbv, Srv, Uav - D3D12HostVisibleDescriptorAllocator m_cpuSamplerHeap; ///< Heap for samplers - class PipelineStateImpl : public PipelineStateBase { public: @@ -743,6 +570,7 @@ protected: m_commandAllocator, nullptr, IID_PPV_ARGS(cmdList.writeRef())); + m_commandListPool.add(cmdList); } assert((Index)m_commandListAllocId < m_commandListPool.getCount()); @@ -752,3283 +580,4160 @@ protected: } }; - class CommandBufferImpl - : public ICommandBuffer - , public RefObject + class CommandBufferImpl; + + class PipelineCommandEncoder { public: - SLANG_REF_OBJECT_IUNKNOWN_ALL - ICommandBuffer* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer) - return static_cast(this); - return nullptr; - } - public: - ComPtr m_cmdList; + bool m_isOpen = false; + bool m_bindingDirty = true; + CommandBufferImpl* m_commandBuffer; ExecutionFrameResources* m_frame; D3D12Device* m_renderer; - void init(D3D12Device* renderer, ExecutionFrameResources* frame) - { - m_frame = frame; - m_renderer = renderer; - m_cmdList = m_frame->createCommandList(renderer->m_device); - } - class PipelineCommandEncoder : public GraphicsComputeCommandEncoderBase + ID3D12Device* m_device; + ID3D12GraphicsCommandList* m_d3dCmdList; + ID3D12GraphicsCommandList* m_preCmdList = nullptr; + + RefPtr m_currentPipeline; + RefPtr m_rootShaderObject; + + static int getBindPointIndex(PipelineType type) { - public: - bool m_isOpen = false; - CommandBufferImpl* m_commandBuffer; - ExecutionFrameResources* m_frame; - ID3D12Device* m_device; - ID3D12GraphicsCommandList* m_d3dCmdList; - ID3D12GraphicsCommandList* m_preCmdList = nullptr; - - ID3D12PipelineState* m_boundPipelines[3] = {}; - RefPtr m_boundDescriptorSets[int(PipelineType::CountOf)] - [kMaxDescriptorSetCount]; - static int getBindPointIndex(PipelineType type) + switch (type) { - switch (type) - { - case PipelineType::Graphics: - return 0; - case PipelineType::Compute: - return 1; - case PipelineType::RayTracing: - return 2; - default: - assert(!"unknown pipeline type."); - return -1; - } + case PipelineType::Graphics: + return 0; + case PipelineType::Compute: + return 1; + case PipelineType::RayTracing: + return 2; + default: + assert(!"unknown pipeline type."); + return -1; } + } - RefPtr m_currentPipelineLayout; + void init(CommandBufferImpl* commandBuffer) + { + m_commandBuffer = commandBuffer; + m_d3dCmdList = m_commandBuffer->m_cmdList; + m_renderer = commandBuffer->m_renderer; + m_frame = commandBuffer->m_frame; + } + + void endEncodingImpl() { m_isOpen = false; } + + void bindRootShaderObjectImpl(IShaderObject* object) + { + m_rootShaderObject = static_cast(object); + m_bindingDirty = true; + } + + void setPipelineStateImpl(IPipelineState* pipelineState) + { + m_currentPipeline = static_cast(pipelineState); + m_bindingDirty = true; + } + + Result _bindRenderState(Submitter* submitter); + }; + + struct DescriptorHeapReference + { + bool isCpuHeap; + union Reference + { + D3D12DescriptorHeap* gpuHeap; + D3D12HostVisibleDescriptorAllocator* cpuHeap; + } ptr; + DescriptorHeapReference& operator=(D3D12DescriptorHeap* gpuHeap) + { + ptr.gpuHeap = gpuHeap; + isCpuHeap = false; + return *this; + } + DescriptorHeapReference& operator=(D3D12HostVisibleDescriptorAllocator* cpuHeap) + { + ptr.cpuHeap = cpuHeap; + isCpuHeap = true; + return *this; + } + SLANG_FORCE_INLINE D3D12_CPU_DESCRIPTOR_HANDLE getCpuHandle(int index) const + { + if (isCpuHeap) + return ptr.cpuHeap->getCpuHandle(index); + else + return ptr.gpuHeap->getCpuHandle(index); + } + SLANG_FORCE_INLINE D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(int index) const + { + SLANG_ASSERT(!isCpuHeap); + return ptr.gpuHeap->getGpuHandle(index); + } + }; + + struct DescriptorTable + { + DescriptorHeapReference heap; + uint32_t table; + }; + + struct BindingOffset + { + int32_t resource; + int32_t sampler; + }; + + struct RootBindingState + { + ExecutionFrameResources* frame; + D3D12Device* device; + ArrayView descriptorTables; + BindingOffset offset; + uint32_t rootParamIndex; // The root parameter index of this object. + uint32_t futureRootParamOffset; // The starting offset of additional sub-object descriptor tables. + }; + + struct DescriptorSetInfo + { + uint32_t resourceDescriptorCount = 0; + uint32_t samplerDescriptorCount = 0; + }; - void init(CommandBufferImpl* commandBuffer) + struct BindingLocation + { + int32_t index; + BindingOffset offsetInDescriptorTable; + }; + + // Provides information on how binding ranges are stored in descriptor tables for + // a shader object. + // We allocate one CPU descriptor table for each descriptor heap type for the shader + // object. In `ShaderObjectLayoutImpl`, we store the offset into the descriptor tables + // for each binding, so we know where to write the descriptor when the user sets + // a resource or sampler binding. + class ShaderObjectLayoutImpl : public ShaderObjectLayoutBase + { + public: + struct BindingRangeInfo + { + slang::BindingType bindingType; + uint32_t count; + uint32_t spaceIndex; + uint32_t flatResourceOffset; // Offset in flattend array of resource binding slots. + BindingLocation binding; + + // Returns true if this binding range consumes a specialization argument slot. + bool isSpecializationArg() const { - m_commandBuffer = commandBuffer; - m_rendererBase = static_cast(commandBuffer->m_renderer); - m_d3dCmdList = m_commandBuffer->m_cmdList; + return bindingType == slang::BindingType::ExistentialValue; } + }; + struct SubObjectRangeInfo + { + RefPtr layout; + Index bindingRangeIndex; + slang::BindingType bindingType; + + // The offset for the constant buffer descriptor if this + // sub-object is referenced as a `ConstantBuffer`. + // For a `ParameterBlock` binding range, this is always 0 since + // parameter blocks start in a fresh descriptor table. + BindingOffset descriptorOffset; + }; - void endEncodingImpl() + struct Builder + { + public: + Builder(RendererBase* renderer) + : m_renderer(renderer) + {} + + RendererBase* m_renderer; + slang::TypeLayoutReflection* m_elementTypeLayout; + List m_bindingRanges; + List m_subObjectRanges; + DescriptorSetInfo m_descriptorSetInfo; + uint32_t m_subObjectCount = 0; + uint32_t m_flatResourceCount = 0; + + void addBindingRangesOfType(slang::TypeLayoutReflection* typeLayout) { - m_isOpen = false; - for (int i = 0; i < int(PipelineType::CountOf); i++) + SlangInt bindingRangeCount = typeLayout->getBindingRangeCount(); + + // Reserve CBV slot for the implicit constant buffer if the type contains + // ordinary uniform data fields. + if (typeLayout->getSize(slang::ParameterCategory::Uniform) != 0) + { + m_descriptorSetInfo.resourceDescriptorCount = 1; + } + + for (SlangInt r = 0; r < bindingRangeCount; ++r) { - for (auto& descSet : m_boundDescriptorSets[i]) + slang::BindingType slangBindingType = typeLayout->getBindingRangeType(r); + uint32_t count = (uint32_t)typeLayout->getBindingRangeBindingCount(r); + slang::TypeLayoutReflection* slangLeafTypeLayout = + typeLayout->getBindingRangeLeafTypeLayout(r); + BindingRangeInfo bindingRangeInfo = {}; + bindingRangeInfo.bindingType = slangBindingType; + bindingRangeInfo.count = count; + bindingRangeInfo.flatResourceOffset = m_flatResourceCount; + bindingRangeInfo.spaceIndex = + (uint32_t)typeLayout->getBindingRangeDescriptorSetIndex(r); + + switch (slangBindingType) { - descSet = nullptr; + case slang::BindingType::ConstantBuffer: + case slang::BindingType::ParameterBlock: + case slang::BindingType::ExistentialValue: + bindingRangeInfo.binding.index = m_subObjectCount; + m_subObjectCount += count; + break; + + case slang::BindingType::Sampler: + bindingRangeInfo.binding.offsetInDescriptorTable.sampler = + m_descriptorSetInfo.samplerDescriptorCount; + m_descriptorSetInfo.samplerDescriptorCount += count; + break; + + case slang::BindingType::CombinedTextureSampler: + bindingRangeInfo.binding.offsetInDescriptorTable.sampler = + m_descriptorSetInfo.samplerDescriptorCount; + bindingRangeInfo.binding.offsetInDescriptorTable.resource = + m_descriptorSetInfo.resourceDescriptorCount; + m_descriptorSetInfo.samplerDescriptorCount += count; + m_descriptorSetInfo.resourceDescriptorCount += count; + m_flatResourceCount += count; + break; + + case slang::BindingType::MutableRawBuffer: + case slang::BindingType::MutableTexture: + case slang::BindingType::MutableTypedBuffer: + bindingRangeInfo.binding.offsetInDescriptorTable.resource = + m_descriptorSetInfo.resourceDescriptorCount; + m_descriptorSetInfo.resourceDescriptorCount += count; + m_flatResourceCount += count; + break; + + case slang::BindingType::VaryingInput: + case slang::BindingType::VaryingOutput: + break; + + default: + bindingRangeInfo.binding.offsetInDescriptorTable.resource = + m_descriptorSetInfo.resourceDescriptorCount; + m_descriptorSetInfo.resourceDescriptorCount += count; + m_flatResourceCount += count; + break; } + m_bindingRanges.add(bindingRangeInfo); } } - virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSetImpl( - PipelineType pipelineType, - IPipelineLayout* layout, - UInt index, - IDescriptorSet* descriptorSet) override + Result setElementTypeLayout(slang::TypeLayoutReflection* typeLayout) { - // In D3D12, unlike Vulkan, binding a root signature invalidates *all* descriptor - // table - // bindings (rather than preserving those that are part of the longest common prefix - // between the old and new layout). - // - // In order to accomodate having descriptor-set bindings that persist across changes - // in pipeline state (which may also change pipeline layout), we will shadow the - // descriptor-set bindings and only flush them on-demand at draw tiume once the - // final pipline layout is known. - // + typeLayout = _unwrapParameterGroups(typeLayout); - auto descriptorSetImpl = (DescriptorSetImpl*)descriptorSet; - m_boundDescriptorSets[int(pipelineType)][index] = descriptorSetImpl; - } + m_elementTypeLayout = typeLayout; - virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferDataImpl( - IBufferResource* buffer, - size_t offset, - size_t size, - void* data) override - { - _uploadBufferData( - m_commandBuffer->m_cmdList, - static_cast(buffer), - offset, - size, - data); + // Compute the binding ranges that are used to store + // the logical contents of the object in memory. + + addBindingRangesOfType(typeLayout); + + SlangInt subObjectRangeCount = typeLayout->getSubObjectRangeCount(); + for (SlangInt r = 0; r < subObjectRangeCount; ++r) + { + SlangInt bindingRangeIndex = typeLayout->getSubObjectRangeBindingRangeIndex(r); + auto slangBindingType = typeLayout->getBindingRangeType(bindingRangeIndex); + slang::TypeLayoutReflection* slangLeafTypeLayout = + typeLayout->getBindingRangeLeafTypeLayout(bindingRangeIndex); + + // A sub-object range can either represent a sub-object of a known + // type, like a `ConstantBuffer` or `ParameterBlock` + // (in which case we can pre-compute a layout to use, based on + // the type `Foo`) *or* it can represent a sub-object of some + // existential type (e.g., `IBar`) in which case we cannot + // know the appropraite type/layout of sub-object to allocate. + // + RefPtr subObjectLayout; + if (slangBindingType != slang::BindingType::ExistentialValue) + { + createForElementType( + m_renderer, + slangLeafTypeLayout->getElementTypeLayout(), + subObjectLayout.writeRef()); + } + + SubObjectRangeInfo subObjectRange; + subObjectRange.bindingRangeIndex = bindingRangeIndex; + subObjectRange.layout = subObjectLayout; + subObjectRange.bindingType = slangBindingType; + subObjectRange.descriptorOffset.resource = + m_descriptorSetInfo.resourceDescriptorCount; + subObjectRange.descriptorOffset.sampler = + m_descriptorSetInfo.samplerDescriptorCount; + m_subObjectRanges.add(subObjectRange); + } + + return SLANG_OK; } - void setPipelineStateImpl(IPipelineState* state) + SlangResult build(ShaderObjectLayoutImpl** outLayout) { - m_currentPipeline = static_cast(state); - } + auto layout = RefPtr(new ShaderObjectLayoutImpl()); + SLANG_RETURN_ON_FAIL(layout->_init(this)); - Result _bindRenderState( - PipelineStateImpl* pipelineStateImpl, - Submitter* submitter); + *outLayout = layout.detach(); + return SLANG_OK; + } }; - class RenderCommandEncoderImpl - : public IRenderCommandEncoder - , public PipelineCommandEncoder + static Result createForElementType( + RendererBase* renderer, + slang::TypeLayoutReflection* elementType, + ShaderObjectLayoutImpl** outLayout) { - public: - virtual SLANG_NO_THROW SlangResult SLANG_MCALL - queryInterface(SlangUUID const& uuid, void** outObject) override - { - if (uuid == GfxGUID::IID_ISlangUnknown || - uuid == GfxGUID::IID_IRenderCommandEncoder) - { - *outObject = static_cast(this); - return SLANG_OK; - } - *outObject = nullptr; - return SLANG_E_NO_INTERFACE; - } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } - public: - RefPtr m_renderPass; - RefPtr m_framebuffer; + Builder builder(renderer); + builder.setElementTypeLayout(elementType); + return builder.build(outLayout); + } - List m_boundVertexBuffers; + List const& getBindingRanges() { return m_bindingRanges; } - RefPtr m_boundIndexBuffer; + Index getBindingRangeCount() { return m_bindingRanges.getCount(); } - D3D12_VIEWPORT m_viewports[kMaxRTVCount]; - D3D12_RECT m_scissorRects[kMaxRTVCount]; + BindingRangeInfo const& getBindingRange(Index index) { return m_bindingRanges[index]; } - DXGI_FORMAT m_boundIndexFormat; - UINT m_boundIndexOffset; + DescriptorSetInfo getDescriptorSetInfo() { return m_descriptorSetInfo; } - D3D12_PRIMITIVE_TOPOLOGY_TYPE m_primitiveTopologyType; - D3D12_PRIMITIVE_TOPOLOGY m_primitiveTopology; + slang::TypeLayoutReflection* getElementTypeLayout() { return m_elementTypeLayout; } - void init( - D3D12Device* renderer, - ExecutionFrameResources* frame, - CommandBufferImpl* cmdBuffer, - RenderPassLayoutImpl* renderPass, - FramebufferImpl* framebuffer) - { - m_commandBuffer = cmdBuffer; - m_d3dCmdList = cmdBuffer->m_cmdList; - m_preCmdList = nullptr; - m_device = renderer->m_device; - m_rendererBase = renderer; - m_renderPass = renderPass; - m_framebuffer = framebuffer; - m_frame = frame; - m_boundVertexBuffers.clear(); - m_boundIndexBuffer = nullptr; - m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - m_primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - m_boundIndexFormat = DXGI_FORMAT_UNKNOWN; - m_boundIndexOffset = 0; - for (auto& boundPipeline : m_boundPipelines) - boundPipeline = nullptr; + uint32_t getResourceCount() { return m_resourceSlotCount; } - // Set render target states. - m_d3dCmdList->OMSetRenderTargets( - (UINT)framebuffer->renderTargetViews.getCount(), - framebuffer->renderTargetDescriptors.getArrayView().getBuffer(), - FALSE, - framebuffer->depthStencilView ? &framebuffer->depthStencilDescriptor : nullptr); + Index getSubObjectCount() { return m_subObjectCount; } - // Issue clear commands based on render pass set up. - for (Index i = 0; i < renderPass->m_renderTargetAccesses.getCount(); i++) - { - auto& access = renderPass->m_renderTargetAccesses[i]; + SubObjectRangeInfo const& getSubObjectRange(Index index) + { + return m_subObjectRanges[index]; + } + List const& getSubObjectRanges() { return m_subObjectRanges; } - // Transit resource states. - { - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = - static_cast(framebuffer->renderTargetViews[i].get()); - auto textureResource = - static_cast(resourceViewImpl->m_resource.Ptr()); - D3D12_RESOURCE_STATES initialState; - if (access.initialState == ResourceState::Undefined) - { - initialState = textureResource->m_defaultState; - } - else - { - initialState = D3DUtil::translateResourceState(access.initialState); - } - textureResource->m_resource.transition( - initialState, - D3D12_RESOURCE_STATE_RENDER_TARGET, - submitter); - } - // Clear. - if (access.loadOp == IRenderPassLayout::AttachmentLoadOp::Clear) - { - m_d3dCmdList->ClearRenderTargetView( - framebuffer->renderTargetDescriptors[i], - framebuffer->renderTargetClearValues[i].values, - 0, - nullptr); - } - } + RendererBase* getRenderer() { return m_renderer; } - if (renderPass->m_hasDepthStencil) - { - // Transit resource states. - { - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = - static_cast(framebuffer->depthStencilView.get()); - auto textureResource = - static_cast(resourceViewImpl->m_resource.Ptr()); - D3D12_RESOURCE_STATES initialState; - if (renderPass->m_depthStencilAccess.initialState == - ResourceState::Undefined) - { - initialState = textureResource->m_defaultState; - } - else - { - initialState = D3DUtil::translateResourceState( - renderPass->m_depthStencilAccess.initialState); - } - textureResource->m_resource.transition( - initialState, - D3D12_RESOURCE_STATE_DEPTH_WRITE, - submitter); - } - // Clear. - uint32_t clearFlags = 0; - if (renderPass->m_depthStencilAccess.loadOp == - IRenderPassLayout::AttachmentLoadOp::Clear) - { - clearFlags |= D3D12_CLEAR_FLAG_DEPTH; - } - if (renderPass->m_depthStencilAccess.stencilLoadOp == - IRenderPassLayout::AttachmentLoadOp::Clear) - { - clearFlags |= D3D12_CLEAR_FLAG_STENCIL; - } - if (clearFlags) - { - m_d3dCmdList->ClearDepthStencilView( - framebuffer->depthStencilDescriptor, - (D3D12_CLEAR_FLAGS)clearFlags, - framebuffer->depthStencilClearValue.depth, - framebuffer->depthStencilClearValue.stencil, - 0, - nullptr); - } - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override + slang::TypeReflection* getType() { return m_elementTypeLayout->getType(); } + + protected: + Result _init(Builder* builder) + { + auto renderer = builder->m_renderer; + + initBase(renderer, builder->m_elementTypeLayout); + + m_descriptorSetInfo = builder->m_descriptorSetInfo; + m_bindingRanges = _Move(builder->m_bindingRanges); + m_subObjectCount = builder->m_subObjectCount; + m_subObjectRanges = builder->m_subObjectRanges; + m_resourceSlotCount = builder->m_flatResourceCount; + return SLANG_OK; + } + + List m_bindingRanges; + DescriptorSetInfo m_descriptorSetInfo; + Index m_subObjectCount = 0; + List m_subObjectRanges; + uint32_t m_resourceSlotCount; + }; + + class RootShaderObjectLayoutImpl : public ShaderObjectLayoutImpl + { + typedef ShaderObjectLayoutImpl Super; + + public: + struct EntryPointInfo + { + RefPtr layout; + }; + + struct Builder : Super::Builder + { + Builder( + RendererBase* renderer, + slang::IComponentType* program, + slang::ProgramLayout* programLayout) + : Super::Builder(renderer) + , m_program(program) + , m_programLayout(programLayout) + {} + + Result build(RootShaderObjectLayoutImpl** outLayout) { - setPipelineStateImpl(state); + RefPtr layout = new RootShaderObjectLayoutImpl(); + SLANG_RETURN_ON_FAIL(layout->_init(this)); + + *outLayout = layout.detach(); + return SLANG_OK; } - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) override + + void addGlobalParams(slang::VariableLayoutReflection* globalsLayout) { - bindRootShaderObjectImpl(PipelineType::Graphics, object); + setElementTypeLayout(globalsLayout->getTypeLayout()); } - virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( - IPipelineLayout* layout, - UInt index, - IDescriptorSet* descriptorSet) override + void addEntryPoint(SlangStage stage, ShaderObjectLayoutImpl* entryPointLayout) { - setDescriptorSetImpl(PipelineType::Graphics, layout, index, descriptorSet); + EntryPointInfo info; + info.layout = entryPointLayout; + m_entryPoints.add(info); } - virtual SLANG_NO_THROW void SLANG_MCALL - setViewports(uint32_t count, const Viewport* viewports) override - { - static const int kMaxViewports = - D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; - assert(count <= kMaxViewports && count <= kMaxRTVCount); - for (UInt ii = 0; ii < count; ++ii) - { - auto& inViewport = viewports[ii]; - auto& dxViewport = m_viewports[ii]; + slang::IComponentType* m_program; + slang::ProgramLayout* m_programLayout; + List m_entryPoints; + }; - dxViewport.TopLeftX = inViewport.originX; - dxViewport.TopLeftY = inViewport.originY; - dxViewport.Width = inViewport.extentX; - dxViewport.Height = inViewport.extentY; - dxViewport.MinDepth = inViewport.minZ; - dxViewport.MaxDepth = inViewport.maxZ; - } - m_d3dCmdList->RSSetViewports(UINT(count), m_viewports); - } + EntryPointInfo& getEntryPoint(Index index) { return m_entryPoints[index]; } - virtual SLANG_NO_THROW void SLANG_MCALL - setScissorRects(uint32_t count, const ScissorRect* rects) override - { - static const int kMaxScissorRects = - D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; - assert(count <= kMaxScissorRects && count <= kMaxRTVCount); + List& getEntryPoints() { return m_entryPoints; } - for (UInt ii = 0; ii < count; ++ii) - { - auto& inRect = rects[ii]; - auto& dxRect = m_scissorRects[ii]; + struct DescriptorSetLayout + { + List m_resourceRanges; + List m_samplerRanges; + uint32_t m_resourceCount = 0; + uint32_t m_samplerCount = 0; + }; - dxRect.left = LONG(inRect.minX); - dxRect.top = LONG(inRect.minY); - dxRect.right = LONG(inRect.maxX); - dxRect.bottom = LONG(inRect.maxY); + struct RootSignatureDescBuilder + { + // We will use one descriptor set for the global scope and one additional + // descriptor set for each `ParameterBlock` binding range in the shader object + // hierarchy, regardless of the shader's `space` indices. + List m_descriptorSets; + List m_rootParameters; + D3D12_ROOT_SIGNATURE_DESC m_rootSignatureDesc = {}; + + static Result translateDescriptorRangeType( + slang::BindingType c, + D3D12_DESCRIPTOR_RANGE_TYPE* outType) + { + switch (c) + { + case slang::BindingType::ConstantBuffer: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + return SLANG_OK; + case slang::BindingType::RawBuffer: + case slang::BindingType::Texture: + case slang::BindingType::TypedBuffer: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + return SLANG_OK; + case slang::BindingType::MutableRawBuffer: + case slang::BindingType::MutableTexture: + case slang::BindingType::MutableTypedBuffer: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + return SLANG_OK; + case slang::BindingType::Sampler: + *outType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + return SLANG_OK; + default: + return SLANG_FAIL; } - - m_d3dCmdList->RSSetScissorRects(UINT(count), m_scissorRects); } - virtual SLANG_NO_THROW void SLANG_MCALL - setPrimitiveTopology(PrimitiveTopology topology) override + struct BindingRegisterOffset { - switch (topology) + // The index to the physical descriptor set that stores the binding. + uint32_t descriptorSetIndex; + + uint32_t spaceOffset; // The `space` index as specified in shader. + uint32_t textureOffset; // `t` registers + uint32_t samplerOffset; // `s` registers + uint32_t constantBufferOffset; // `b` registers + uint32_t uavOffset; // `u` registers + void set(D3D12_DESCRIPTOR_RANGE_TYPE type, uint32_t value) { - case PrimitiveTopology::TriangleList: + switch (type) { - m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - m_primitiveTopology = D3DUtil::getPrimitiveTopology(topology); + case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: + constantBufferOffset = value; + return; + case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: + uavOffset = value; + return; + case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: + textureOffset = value; + return; + case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: + samplerOffset = value; + return; + default: break; } - default: + } + uint32_t get(D3D12_DESCRIPTOR_RANGE_TYPE type) + { + switch (type) { - assert(!"Unhandled type"); + case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: + return constantBufferOffset; + case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: + return uavOffset; + case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: + return textureOffset; + case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: + return samplerOffset; + default: + return 0; } } + }; + + void addDescriptorRange( + slang::TypeLayoutReflection* typeLayout, + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + Index bindingRangeIndex, + BindingRegisterOffset* offset, + BindingRegisterOffset* newOffset) + { + D3D12_DESCRIPTOR_RANGE range = {}; + range.RangeType = rangeType; + auto descriptorRangeIndex = + typeLayout->getBindingRangeFirstDescriptorRangeIndex(bindingRangeIndex); + auto relativeSpaceIndex = + (uint32_t)typeLayout->getBindingRangeDescriptorSetIndex(bindingRangeIndex); + auto space = offset->spaceOffset + relativeSpaceIndex; + // Update descriptor range descs in current descriptor set. + auto& descriptorSet = m_descriptorSets[offset->descriptorSetIndex]; + range.NumDescriptors = + (UINT)typeLayout->getDescriptorSetDescriptorRangeDescriptorCount( + relativeSpaceIndex, descriptorRangeIndex); + range.BaseShaderRegister = + (UINT)typeLayout->getDescriptorSetDescriptorRangeIndexOffset( + relativeSpaceIndex, descriptorRangeIndex) + + offset->get(range.RangeType); + newOffset->set( + range.RangeType, + Math::Max(range.BaseShaderRegister + 1, newOffset->get(range.RangeType))); + range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + range.RegisterSpace = space; + if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER) + { + descriptorSet.m_samplerRanges.add(range); + descriptorSet.m_samplerCount += range.NumDescriptors; + } + else + { + descriptorSet.m_resourceRanges.add(range); + descriptorSet.m_resourceCount += range.NumDescriptors; + } } - virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( - UInt startSlot, - UInt slotCount, - IBufferResource* const* buffers, - const UInt* strides, - const UInt* offsets) override + void addObject(slang::TypeLayoutReflection* typeLayout, BindingRegisterOffset* offset) { + typeLayout = _unwrapParameterGroups(typeLayout); + SlangInt bindingRangeCount = typeLayout->getBindingRangeCount(); + // `register` and `space` index offset of future sub-objects. + BindingRegisterOffset subObjectOffset = *offset; + for (SlangInt i = 0; i < bindingRangeCount; i++) { - const Index num = startSlot + slotCount; - if (num > m_boundVertexBuffers.getCount()) + auto bindingType = typeLayout->getBindingRangeType(i); + D3D12_DESCRIPTOR_RANGE_TYPE rangeType; + if (translateDescriptorRangeType(bindingType, &rangeType) != SLANG_OK) { - m_boundVertexBuffers.setCount(num); + // Ignore all descriptor ranges that does not map directly into a + // d3d descriptor. + continue; } + // The CBV descriptor range, along with any additional descriptor ranges associated + // with the constant buffer binding range, will be appended to the end of this object's + // descriptor table, so we skip them now. + if (bindingType == slang::BindingType::ConstantBuffer) + continue; + addDescriptorRange(typeLayout, rangeType, i, offset, &subObjectOffset); } - - for (UInt i = 0; i < slotCount; i++) + auto subObjectCount = typeLayout->getSubObjectRangeCount(); + for (SlangInt i = 0; i < subObjectCount; i++) { - BufferResourceImpl* buffer = static_cast(buffers[i]); - if (buffer) + auto rangeIndex = typeLayout->getSubObjectRangeBindingRangeIndex(i); + switch (typeLayout->getBindingRangeType(rangeIndex)) { - assert(buffer->m_initialUsage == IResource::Usage::VertexBuffer); + case slang::BindingType::ConstantBuffer: + { + auto subObjectType = typeLayout->getBindingRangeLeafTypeLayout(rangeIndex); + auto subObjectElementType = _unwrapParameterGroups(subObjectType); + if (subObjectElementType->getSize(SLANG_PARAMETER_CATEGORY_UNIFORM) != 0) + { + addDescriptorRange( + typeLayout, + D3D12_DESCRIPTOR_RANGE_TYPE_CBV, + rangeIndex, + offset, + &subObjectOffset); + } + addObject(subObjectType, &subObjectOffset); + } + break; + case slang::BindingType::ParameterBlock: + { + BindingRegisterOffset newOffset = {}; + newOffset.descriptorSetIndex = (uint32_t)m_descriptorSets.getCount(); + m_descriptorSets.add(DescriptorSetLayout{}); + newOffset.spaceOffset = + offset->spaceOffset + + (uint32_t)typeLayout->getBindingRangeDescriptorSetIndex(rangeIndex); + auto subObjectType = + typeLayout->getBindingRangeLeafTypeLayout(rangeIndex); + addObject(subObjectType, &newOffset); + } + break; } - - BoundVertexBuffer& boundBuffer = m_boundVertexBuffers[startSlot + i]; - boundBuffer.m_buffer = buffer; - boundBuffer.m_stride = int(strides[i]); - boundBuffer.m_offset = int(offsets[i]); } + *offset = subObjectOffset; } - virtual SLANG_NO_THROW void SLANG_MCALL setIndexBuffer( - IBufferResource* buffer, - Format indexFormat, - UInt offset = 0) override + static BindingRegisterOffset getOffsetFromVarLayout( + slang::VariableLayoutReflection* varLayout) { - m_boundIndexBuffer = (BufferResourceImpl*)buffer; - m_boundIndexFormat = D3DUtil::getMapFormat(indexFormat); - m_boundIndexOffset = UINT(offset); + BindingRegisterOffset offset; + offset.descriptorSetIndex = 0; + offset.spaceOffset = + (uint32_t)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_REGISTER_SPACE); + offset.samplerOffset = + (uint32_t)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_SAMPLER_STATE); + offset.textureOffset = + (uint32_t)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE); + offset.constantBufferOffset = + (uint32_t)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER); + offset.uavOffset = + (uint32_t)varLayout->getOffset(SLANG_PARAMETER_CATEGORY_UNORDERED_ACCESS); + return offset; } - void prepareDraw() + void addObject( + slang::TypeLayoutReflection* typeLayout, + slang::VariableLayoutReflection* varLayout) { - auto pipelineState = m_currentPipeline.Ptr(); - if (!pipelineState || (pipelineState->desc.type != PipelineType::Graphics)) - { - assert(!"No graphics pipeline state set"); - return; - } + auto offset = getOffsetFromVarLayout(varLayout); + addObject(typeLayout, &offset); + } - // Submit - setting for graphics + void addEntryPoint(slang::EntryPointReflection* entryPoint) + { + BindingRegisterOffset offset = getOffsetFromVarLayout(entryPoint->getVarLayout()); + if (entryPoint->hasDefaultConstantBuffer()) { - GraphicsSubmitter submitter(m_d3dCmdList); - _bindRenderState(static_cast(pipelineState), &submitter); + addDescriptorRange( + entryPoint->getTypeLayout(), + D3D12_DESCRIPTOR_RANGE_TYPE_CBV, + 0, + &offset, + &offset); } + addObject(entryPoint->getTypeLayout(), &offset); + } - m_d3dCmdList->IASetPrimitiveTopology(m_primitiveTopology); - - // Set up vertex buffer views + D3D12_ROOT_SIGNATURE_DESC& build( + List& outRootDescriptorSetInfos) + { + for (Index i = 0; i < m_descriptorSets.getCount(); i++) { - int numVertexViews = 0; - D3D12_VERTEX_BUFFER_VIEW vertexViews[16]; - for (Index i = 0; i < m_boundVertexBuffers.getCount(); i++) + auto& descriptorSet = m_descriptorSets[i]; + D3D12Device::DescriptorSetInfo setInfo; + setInfo.resourceDescriptorCount = descriptorSet.m_resourceCount; + setInfo.samplerDescriptorCount = descriptorSet.m_samplerCount; + outRootDescriptorSetInfos.add(setInfo); + if (descriptorSet.m_resourceRanges.getCount()) { - const BoundVertexBuffer& boundVertexBuffer = m_boundVertexBuffers[i]; - BufferResourceImpl* buffer = boundVertexBuffer.m_buffer; - if (buffer) - { - D3D12_VERTEX_BUFFER_VIEW& vertexView = vertexViews[numVertexViews++]; - vertexView.BufferLocation = - buffer->m_resource.getResource()->GetGPUVirtualAddress() + - boundVertexBuffer.m_offset; - vertexView.SizeInBytes = - UINT(buffer->getDesc()->sizeInBytes - boundVertexBuffer.m_offset); - vertexView.StrideInBytes = UINT(boundVertexBuffer.m_stride); - } + D3D12_ROOT_PARAMETER rootParam = {}; + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParam.DescriptorTable.NumDescriptorRanges = + (UINT)descriptorSet.m_resourceRanges.getCount(); + rootParam.DescriptorTable.pDescriptorRanges = + descriptorSet.m_resourceRanges.getBuffer(); + m_rootParameters.add(rootParam); + } + if (descriptorSet.m_samplerRanges.getCount()) + { + D3D12_ROOT_PARAMETER rootParam = {}; + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParam.DescriptorTable.NumDescriptorRanges = + (UINT)descriptorSet.m_samplerRanges.getCount(); + rootParam.DescriptorTable.pDescriptorRanges = + descriptorSet.m_samplerRanges.getBuffer(); + m_rootParameters.add(rootParam); } - m_d3dCmdList->IASetVertexBuffers(0, numVertexViews, vertexViews); } - // Set up index buffer - if (m_boundIndexBuffer) - { - D3D12_INDEX_BUFFER_VIEW indexBufferView; - indexBufferView.BufferLocation = - m_boundIndexBuffer->m_resource.getResource()->GetGPUVirtualAddress() + - m_boundIndexOffset; - indexBufferView.SizeInBytes = - UINT(m_boundIndexBuffer->getDesc()->sizeInBytes - m_boundIndexOffset); - indexBufferView.Format = m_boundIndexFormat; - m_d3dCmdList->IASetIndexBuffer(&indexBufferView); - } - } - virtual SLANG_NO_THROW void SLANG_MCALL - draw(UInt vertexCount, UInt startVertex = 0) override - { - prepareDraw(); - m_d3dCmdList->DrawInstanced(UINT(vertexCount), 1, UINT(startVertex), 0); - } - virtual SLANG_NO_THROW void SLANG_MCALL - drawIndexed(UInt indexCount, UInt startIndex = 0, UInt baseVertex = 0) override - { - prepareDraw(); - m_d3dCmdList->DrawIndexedInstanced( - (UINT)indexCount, 1, (UINT)startIndex, (UINT)baseVertex, 0); - } - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override - { - PipelineCommandEncoder::endEncodingImpl(); - // Issue clear commands based on render pass set up. - for (Index i = 0; i < m_renderPass->m_renderTargetAccesses.getCount(); i++) - { - auto& access = m_renderPass->m_renderTargetAccesses[i]; + m_rootSignatureDesc.NumParameters = UINT(m_rootParameters.getCount()); + m_rootSignatureDesc.pParameters = m_rootParameters.getBuffer(); - // Transit resource states. - { - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = static_cast( - m_framebuffer->renderTargetViews[i].get()); - auto textureResource = - static_cast(resourceViewImpl->m_resource.Ptr()); - textureResource->m_resource.transition( - D3D12_RESOURCE_STATE_RENDER_TARGET, - D3DUtil::translateResourceState(access.finalState), - submitter); - } - } + // TODO: static samplers should be reasonably easy to support... + m_rootSignatureDesc.NumStaticSamplers = 0; + m_rootSignatureDesc.pStaticSamplers = nullptr; - if (m_renderPass->m_hasDepthStencil) - { - // Transit resource states. - D3D12BarrierSubmitter submitter(m_d3dCmdList); - auto resourceViewImpl = - static_cast(m_framebuffer->depthStencilView.get()); - auto textureResource = - static_cast(resourceViewImpl->m_resource.Ptr()); - textureResource->m_resource.transition( - D3D12_RESOURCE_STATE_DEPTH_WRITE, - D3DUtil::translateResourceState( - m_renderPass->m_depthStencilAccess.finalState), - submitter); - } - m_framebuffer = nullptr; - } + // TODO: only set this flag if needed (requires creating root + // signature at same time as pipeline state...). + // + m_rootSignatureDesc.Flags = + D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; - virtual SLANG_NO_THROW void SLANG_MCALL - setStencilReference(uint32_t referenceValue) override - { - m_d3dCmdList->OMSetStencilRef((UINT)referenceValue); + return m_rootSignatureDesc; } }; - RenderCommandEncoderImpl m_renderCommandEncoder; - virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( - IRenderPassLayout* renderPass, - IFramebuffer* framebuffer, - IRenderCommandEncoder** outEncoder) override + static Result createRootSignatureFromSlang( + D3D12Device* device, + slang::IComponentType* program, + ID3D12RootSignature** outRootSignature, + List& outRootDescriptorSetInfos) { - m_renderCommandEncoder.init( - m_renderer, - m_frame, - this, - static_cast(renderPass), - static_cast(framebuffer)); - *outEncoder = &m_renderCommandEncoder; - } + RootSignatureDescBuilder builder; + builder.m_descriptorSets.add(DescriptorSetLayout{}); - class ComputeCommandEncoderImpl - : public IComputeCommandEncoder - , public PipelineCommandEncoder - { - public: - virtual SLANG_NO_THROW SlangResult SLANG_MCALL - queryInterface(SlangUUID const& uuid, void** outObject) override - { - if (uuid == GfxGUID::IID_ISlangUnknown || - uuid == GfxGUID::IID_IComputeCommandEncoder) - { - *outObject = static_cast(this); - return SLANG_OK; - } - *outObject = nullptr; - return SLANG_E_NO_INTERFACE; - } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } + auto layout = program->getLayout(); + auto globalParamLayout = layout->getGlobalParamsTypeLayout(); + auto globalVarLayout = layout->getGlobalParamsVarLayout(); - public: - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override - { - PipelineCommandEncoder::endEncodingImpl(); - } - void init( - D3D12Device* renderer, - ExecutionFrameResources* frame, - CommandBufferImpl* cmdBuffer) - { - m_rendererBase = renderer; - m_commandBuffer = cmdBuffer; - m_d3dCmdList = cmdBuffer->m_cmdList; - m_preCmdList = nullptr; - m_device = renderer->m_device; - m_frame = frame; - for (auto& boundPipeline : m_boundPipelines) - boundPipeline = nullptr; - } + builder.addObject(globalParamLayout, globalVarLayout); - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override - { - setPipelineStateImpl(state); - } - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) override + for (SlangUInt i = 0; i < layout->getEntryPointCount(); i++) { - bindRootShaderObjectImpl(PipelineType::Compute, object); + auto entryPoint = layout->getEntryPointByIndex(i); + builder.addEntryPoint(entryPoint); } - virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( - IPipelineLayout* layout, - UInt index, - IDescriptorSet* descriptorSet) override - { - setDescriptorSetImpl(PipelineType::Compute, layout, index, descriptorSet); - } + auto& rootSignatureDesc = builder.build(outRootDescriptorSetInfos); - virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override + ComPtr signature; + ComPtr error; + if (SLANG_FAILED(device->m_D3D12SerializeRootSignature( + &rootSignatureDesc, + D3D_ROOT_SIGNATURE_VERSION_1, + signature.writeRef(), + error.writeRef()))) { - auto pipelineStateImpl = static_cast(m_currentPipeline.Ptr()); - - // Submit binding for compute + fprintf(stderr, "error: D3D12SerializeRootSignature failed"); + if (error) { - ComputeSubmitter submitter(m_d3dCmdList); - _bindRenderState(pipelineStateImpl, &submitter); + fprintf(stderr, ": %s\n", (const char*)error->GetBufferPointer()); } - - m_d3dCmdList->Dispatch(x, y, z); + return SLANG_FAIL; } - }; - ComputeCommandEncoderImpl m_computeCommandEncoder; - virtual SLANG_NO_THROW void SLANG_MCALL - encodeComputeCommands(IComputeCommandEncoder** outEncoder) override - { - m_computeCommandEncoder.init(m_renderer, m_frame, this); - *outEncoder = &m_computeCommandEncoder; + SLANG_RETURN_ON_FAIL(device->m_device->CreateRootSignature( + 0, + signature->GetBufferPointer(), + signature->GetBufferSize(), + IID_PPV_ARGS(outRootSignature))); + return SLANG_OK; } - class ResourceCommandEncoderImpl : public IResourceCommandEncoder + static Result create( + D3D12Device* device, + slang::IComponentType* program, + slang::ProgramLayout* programLayout, + RootShaderObjectLayoutImpl** outLayout) { - public: - virtual SLANG_NO_THROW SlangResult SLANG_MCALL - queryInterface(SlangUUID const& uuid, void** outObject) override - { - if (uuid == GfxGUID::IID_ISlangUnknown || - uuid == GfxGUID::IID_IResourceCommandEncoder) - { - *outObject = static_cast(this); - return SLANG_OK; - } - *outObject = nullptr; - return SLANG_E_NO_INTERFACE; - } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } + RootShaderObjectLayoutImpl::Builder builder(device, program, programLayout); + builder.addGlobalParams(programLayout->getGlobalParamsVarLayout()); - public: - CommandBufferImpl* m_commandBuffer; - void init(D3D12Device* renderer, CommandBufferImpl* commandBuffer) - { - m_commandBuffer = commandBuffer; - } - virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( - IBufferResource* dst, - size_t dstOffset, - IBufferResource* src, - size_t srcOffset, - size_t size) override + SlangInt entryPointCount = programLayout->getEntryPointCount(); + for (SlangInt e = 0; e < entryPointCount; ++e) { - SLANG_UNUSED(dst); - SLANG_UNUSED(srcOffset); - SLANG_UNUSED(src); - SLANG_UNUSED(dstOffset); - SLANG_UNUSED(size); + auto slangEntryPoint = programLayout->getEntryPointByIndex(e); + RefPtr entryPointLayout; + SLANG_RETURN_ON_FAIL(ShaderObjectLayoutImpl::createForElementType( + device, slangEntryPoint->getTypeLayout(), entryPointLayout.writeRef())); + builder.addEntryPoint(slangEntryPoint->getStage(), entryPointLayout); } - virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferData( - IBufferResource* dst, - size_t offset, - size_t size, - void* data) override + + SLANG_RETURN_ON_FAIL(builder.build(outLayout)); + + if (program->getSpecializationParamCount() == 0) { - _uploadBufferData( - m_commandBuffer->m_cmdList, - static_cast(dst), - offset, - size, - data); + // For root object, we would like know the union of all binding slots + // including all sub-objects in the shader-object hierarchy, so at + // parameter binding time we can easily know how many GPU descriptor tables + // to create without walking throught the shader-object hierarchy again. + // We build out this array along with root signature construction. + List outRootDescriptorSetInfos; + SLANG_RETURN_ON_FAIL(createRootSignatureFromSlang( + device, + program, + (*outLayout)->m_rootSignature.writeRef(), + (*outLayout)->m_gpuDescriptorSetInfos)); } - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} - }; + return SLANG_OK; + } - ResourceCommandEncoderImpl m_resourceCommandEncoder; + slang::IComponentType* getSlangProgram() const { return m_program; } + slang::ProgramLayout* getSlangProgramLayout() const { return m_programLayout; } - virtual SLANG_NO_THROW void SLANG_MCALL - encodeResourceCommands(IResourceCommandEncoder** outEncoder) override + protected: + Result _init(Builder* builder) { - m_resourceCommandEncoder.init(m_renderer, this); - *outEncoder = &m_resourceCommandEncoder; + auto renderer = builder->m_renderer; + + SLANG_RETURN_ON_FAIL(Super::_init(builder)); + + m_program = builder->m_program; + m_programLayout = builder->m_programLayout; + m_entryPoints = builder->m_entryPoints; + return SLANG_OK; } - virtual SLANG_NO_THROW void SLANG_MCALL close() override { m_cmdList->Close(); } + ComPtr m_program; + slang::ProgramLayout* m_programLayout = nullptr; + + List m_entryPoints; + + public: + ComPtr m_rootSignature; + List m_gpuDescriptorSetInfos; }; - class CommandQueueImpl - : public ICommandQueue - , public RefObject + class ShaderProgramImpl : public ShaderProgramBase { public: - SLANG_REF_OBJECT_IUNKNOWN_ALL - ICommandQueue* getInterface(const Guid& guid) + PipelineType m_pipelineType; + List m_vertexShader; + List m_pixelShader; + List m_computeShader; + RefPtr m_rootObjectLayout; + }; + + class ShaderObjectImpl : public ShaderObjectBase + { + public: + static Result create( + D3D12Device* device, + ShaderObjectLayoutImpl* layout, + ShaderObjectImpl** outShaderObject) { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue) - return static_cast(this); - return nullptr; + auto object = ComPtr(new ShaderObjectImpl()); + SLANG_RETURN_ON_FAIL(object->init(device, layout)); + + *outShaderObject = object.detach(); + return SLANG_OK; } - public: - struct CommandBufferPool + ~ShaderObjectImpl() { - List> pool; - uint32_t allocIndex = 0; - RefPtr allocCommandBuffer(D3D12Device* renderer, ExecutionFrameResources* frame) + auto layoutImpl = static_cast(m_layout.Ptr()); + if (m_descriptorSet.m_resourceCount) { - if ((Index)allocIndex < pool.getCount()) - { - RefPtr result = pool[allocIndex]; - result->init(renderer, frame); - allocIndex++; - return result; - } - RefPtr cmdBuffer = new CommandBufferImpl(); - cmdBuffer->init(renderer, frame); - pool.add(cmdBuffer); - return cmdBuffer; + m_resourceHeap->free( + m_descriptorSet.m_resourceTable, m_descriptorSet.m_resourceCount); } - void reset() + if (m_descriptorSet.m_samplerCount) { - allocIndex = 0; + m_samplerHeap->free(m_descriptorSet.m_samplerTable, m_descriptorSet.m_samplerCount); } - }; - List m_commandBufferPools; - List m_frames; - uint32_t m_frameIndex = 0; - D3D12Device* m_renderer; - ComPtr m_device; - ComPtr m_d3dQueue; - ComPtr m_fence; - uint64_t m_fenceValue = 0; - HANDLE globalWaitHandle; - Desc m_desc; - Result init( - D3D12Device* renderer, - uint32_t frameCount, - uint32_t viewHeapSize, - uint32_t samplerHeapSize) - { - m_renderer = renderer; - m_device = renderer->m_device; - m_frames.setCount(frameCount); - m_commandBufferPools.setCount(frameCount); - for (uint32_t i = 0; i < frameCount; i++) - { - SLANG_RETURN_ON_FAIL(m_frames[i].init(m_device, viewHeapSize, samplerHeapSize)); - } - D3D12_COMMAND_QUEUE_DESC queueDesc = {}; - queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; - SLANG_RETURN_ON_FAIL(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(m_d3dQueue.writeRef()))); - SLANG_RETURN_ON_FAIL( - m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); - globalWaitHandle = CreateEventEx( - nullptr, - nullptr, - CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, - EVENT_ALL_ACCESS); - return SLANG_OK; } - ~CommandQueueImpl() + + RendererBase* getDevice() { return m_layout->getDevice(); } + + SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() SLANG_OVERRIDE { return 0; } + + SLANG_NO_THROW Result SLANG_MCALL getEntryPoint(UInt index, IShaderObject** outEntryPoint) + SLANG_OVERRIDE { - wait(); - CloseHandle(globalWaitHandle); + *outEntryPoint = nullptr; + return SLANG_OK; } - virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override + + ShaderObjectLayoutImpl* getLayout() { - return m_desc; + return static_cast(m_layout.Ptr()); } - virtual SLANG_NO_THROW Result SLANG_MCALL - createCommandBuffer(ICommandBuffer** outCommandBuffer) override + + SLANG_NO_THROW slang::TypeLayoutReflection* SLANG_MCALL getElementTypeLayout() + SLANG_OVERRIDE { - RefPtr result = - m_commandBufferPools[m_frameIndex].allocCommandBuffer( - m_renderer, &m_frames[m_frameIndex]); - *outCommandBuffer = result.detach(); - return SLANG_OK; + return m_layout->getElementTypeLayout(); } - - virtual SLANG_NO_THROW void SLANG_MCALL - executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override + + SLANG_NO_THROW Result SLANG_MCALL + setData(ShaderOffset const& inOffset, void const* data, size_t inSize) SLANG_OVERRIDE { - ShortList commandLists; - for (uint32_t i = 0; i < count; i++) + Index offset = inOffset.uniformOffset; + Index size = inSize; + + char* dest = m_ordinaryData.getBuffer(); + Index availableSize = m_ordinaryData.getCount(); + + // TODO: We really should bounds-check access rather than silently ignoring sets + // that are too large, but we have several test cases that set more data than + // an object actually stores on several targets... + // + if (offset < 0) { - auto cmdImpl = static_cast(commandBuffers[i]); - commandLists.add(cmdImpl->m_cmdList); + size += offset; + offset = 0; + } + if ((offset + size) >= availableSize) + { + size = availableSize - offset; } - m_d3dQueue->ExecuteCommandLists((UINT)count, commandLists.getArrayView().getBuffer()); - auto& frame = m_frames[m_frameIndex]; - m_fenceValue++; - m_d3dQueue->Signal(m_fence, m_fenceValue); - ResetEvent(frame.fenceEvent); - ResetEvent(globalWaitHandle); - m_fence->SetEventOnCompletion(m_fenceValue, frame.fenceEvent); - swapExecutionFrame(); - } + memcpy(dest + offset, data, size); - void swapExecutionFrame() - { - m_frameIndex = (m_frameIndex + 1) % m_frames.getCount(); - auto& frame = m_frames[m_frameIndex]; - frame.reset(); - m_commandBufferPools[m_frameIndex].reset(); + return SLANG_OK; } - virtual SLANG_NO_THROW void SLANG_MCALL wait() override + virtual SLANG_NO_THROW Result SLANG_MCALL + setObject(ShaderOffset const& offset, IShaderObject* object) SLANG_OVERRIDE { - m_fenceValue++; - m_d3dQueue->Signal(m_fence, m_fenceValue); - ResetEvent(globalWaitHandle); - m_fence->SetEventOnCompletion(m_fenceValue, globalWaitHandle); - WaitForSingleObject(globalWaitHandle, INFINITE); - } - }; + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; - class SwapchainImpl : public D3DSwapchainBase - { - public: - ComPtr m_queue; - ComPtr m_dxgiFactory; - ComPtr m_swapChain3; - ComPtr m_fence; - ShortList m_frameEvents; - uint64_t fenceValue = 0; - Result init( - D3D12Device* renderer, - const ISwapchain::Desc& swapchainDesc, - WindowHandle window) - { - m_queue = static_cast(swapchainDesc.queue)->m_d3dQueue; - m_dxgiFactory = renderer->m_deviceInfo.m_dxgiFactory; - SLANG_RETURN_ON_FAIL( - D3DSwapchainBase::init(swapchainDesc, window, DXGI_SWAP_EFFECT_FLIP_DISCARD)); - renderer->m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef())); + auto subObject = static_cast(object); - SLANG_RETURN_ON_FAIL(m_swapChain->QueryInterface(m_swapChain3.writeRef())); - for (uint32_t i = 0; i < swapchainDesc.imageCount; i++) + auto bindingRangeIndex = offset.bindingRangeIndex; + auto& bindingRange = layout->getBindingRange(bindingRangeIndex); + + m_objects[bindingRange.binding.index + offset.bindingArrayIndex] = subObject; + + // If the range being assigned into represents an interface/existential-type leaf field, + // then we need to consider how the `object` being assigned here affects specialization. + // We may also need to assign some data from the sub-object into the ordinary data + // buffer for the parent object. + // + if (bindingRange.bindingType == slang::BindingType::ExistentialValue) { - m_frameEvents.add(CreateEventEx( - nullptr, - false, - CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, - EVENT_ALL_ACCESS)); + // A leaf field of interface type is laid out inside of the parent object + // as a tuple of `(RTTI, WitnessTable, Payload)`. The layout of these fields + // is a contract between the compiler and any runtime system, so we will + // need to rely on details of the binary layout. + + // We start by querying the layout/type of the concrete value that the application + // is trying to store into the field, and also the layout/type of the leaf + // existential-type field itself. + // + auto concreteTypeLayout = subObject->getElementTypeLayout(); + auto concreteType = concreteTypeLayout->getType(); + // + auto existentialTypeLayout = + layout->getElementTypeLayout()->getBindingRangeLeafTypeLayout( + bindingRangeIndex); + auto existentialType = existentialTypeLayout->getType(); + + // The first field of the tuple (offset zero) is the run-time type information + // (RTTI) ID for the concrete type being stored into the field. + // + // TODO: We need to be able to gather the RTTI type ID from `object` and then + // use `setData(offset, &TypeID, sizeof(TypeID))`. + + // The second field of the tuple (offset 8) is the ID of the "witness" for the + // conformance of the concrete type to the interface used by this field. + // + auto witnessTableOffset = offset; + witnessTableOffset.uniformOffset += 8; + // + // Conformances of a type to an interface are computed and then stored by the + // Slang runtime, so we can look up the ID for this particular conformance (which + // will create it on demand). + // + ComPtr slangSession; + SLANG_RETURN_ON_FAIL(getRenderer()->getSlangSession(slangSession.writeRef())); + // + // Note: If the type doesn't actually conform to the required interface for + // this sub-object range, then this is the point where we will detect that + // fact and error out. + // + uint32_t conformanceID = 0xFFFFFFFF; + SLANG_RETURN_ON_FAIL(slangSession->getTypeConformanceWitnessSequentialID( + concreteType, existentialType, &conformanceID)); + // + // Once we have the conformance ID, then we can write it into the object + // at the required offset. + // + SLANG_RETURN_ON_FAIL( + setData(witnessTableOffset, &conformanceID, sizeof(conformanceID))); + + // The third field of the tuple (offset 16) is the "payload" that is supposed to + // hold the data for a value of the given concrete type. + // + auto payloadOffset = offset; + payloadOffset.uniformOffset += 16; + + // There are two cases we need to consider here for how the payload might be used: + // + // * If the concrete type of the value being bound is one that can "fit" into the + // available payload space, then it should be stored in the payload. + // + // * If the concrete type of the value cannot fit in the payload space, then it + // will need to be stored somewhere else. + // + if (_doesValueFitInExistentialPayload(concreteTypeLayout, existentialTypeLayout)) + { + // If the value can fit in the payload area, then we will go ahead and copy + // its bytes into that area. + // + setData( + payloadOffset, + subObject->m_ordinaryData.getBuffer(), + subObject->m_ordinaryData.getCount()); + } + else + { + // If the value does *not *fit in the payload area, then there is nothing + // we can do at this point (beyond saving a reference to the sub-object, which + // was handled above). + // + // Once all the sub-objects have been set into the parent object, we can + // compute a specialized layout for it, and that specialized layout can tell + // us where the data for these sub-objects has been laid out. + return SLANG_E_NOT_IMPLEMENTED; + } } return SLANG_OK; } - virtual void createSwapchainBufferImages() override + virtual SLANG_NO_THROW Result SLANG_MCALL + getObject(ShaderOffset const& offset, IShaderObject** outObject) SLANG_OVERRIDE { - m_images.clear(); - - for (uint32_t i = 0; i < m_desc.imageCount; i++) - { - ComPtr d3dResource; - m_swapChain->GetBuffer(i, IID_PPV_ARGS(d3dResource.writeRef())); - ITextureResource::Desc imageDesc = {}; - imageDesc.setDefaults(IResource::Usage::RenderTarget); - imageDesc.init2D( - IResource::Type::Texture2D, m_desc.format, m_desc.width, m_desc.height, 0); - RefPtr image = new TextureResourceImpl(imageDesc); - image->m_resource.setResource(d3dResource.get()); - image->m_defaultState = D3D12_RESOURCE_STATE_PRESENT; - ComPtr imageResourcePtr; - imageResourcePtr = image.Ptr(); - m_images.add(imageResourcePtr); - } + SLANG_ASSERT(outObject); + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + + auto object = m_objects[bindingRange.binding.index + offset.bindingArrayIndex].Ptr(); + object->addRef(); + *outObject = object; + return SLANG_OK; } - virtual IDXGIFactory* getDXGIFactory() override { return m_dxgiFactory; } - virtual IUnknown* getOwningDevice() override { return m_queue; } - virtual SLANG_NO_THROW int SLANG_MCALL acquireNextImage() override + + SLANG_NO_THROW Result SLANG_MCALL + setResource(ShaderOffset const& offset, IResourceView* resourceView) SLANG_OVERRIDE { - auto result = (int)m_swapChain3->GetCurrentBackBufferIndex(); - WaitForSingleObject(m_frameEvents[result], INFINITE); - ResetEvent(m_frameEvents[result]); - return result; + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; + + auto resourceViewImpl = static_cast(resourceView); + + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + auto descriptorSlotIndex = bindingRange.binding.offsetInDescriptorTable.resource + + (int32_t)offset.bindingArrayIndex; + // Hold a reference to the resource to prevent its destruction. + m_boundResources[bindingRange.flatResourceOffset + offset.bindingArrayIndex] = + resourceViewImpl->m_resource; + ID3D12Device* d3dDevice = static_cast(getDevice())->m_device; + d3dDevice->CopyDescriptorsSimple( + 1, + m_resourceHeap->getCpuHandle( + m_descriptorSet.m_resourceTable + + bindingRange.binding.offsetInDescriptorTable.resource + + (int32_t)offset.bindingArrayIndex), + resourceViewImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + return SLANG_OK; } - virtual SLANG_NO_THROW Result SLANG_MCALL present() override + + SLANG_NO_THROW Result SLANG_MCALL + setSampler(ShaderOffset const& offset, ISamplerState* sampler) SLANG_OVERRIDE { - SLANG_RETURN_ON_FAIL(D3DSwapchainBase::present()); - fenceValue++; - m_fence->SetEventOnCompletion(fenceValue, m_frameEvents[m_swapChain3->GetCurrentBackBufferIndex()]); - m_queue->Signal(m_fence, fenceValue); + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + auto samplerImpl = static_cast(sampler); + ID3D12Device* d3dDevice = static_cast(getDevice())->m_device; + d3dDevice->CopyDescriptorsSimple( + 1, + m_samplerHeap->getCpuHandle( + m_descriptorSet.m_samplerTable + + bindingRange.binding.offsetInDescriptorTable.sampler + + (int32_t)offset.bindingArrayIndex), + samplerImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); return SLANG_OK; } - }; - static PROC loadProc(HMODULE module, char const* name); + SLANG_NO_THROW Result SLANG_MCALL setCombinedTextureSampler( + ShaderOffset const& offset, + IResourceView* textureView, + ISamplerState* sampler) SLANG_OVERRIDE + { + if (offset.bindingRangeIndex < 0) + return SLANG_E_INVALID_ARG; + auto layout = getLayout(); + if (offset.bindingRangeIndex >= layout->getBindingRangeCount()) + return SLANG_E_INVALID_ARG; + auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); + auto resourceViewImpl = static_cast(textureView); + ID3D12Device* d3dDevice = static_cast(getDevice())->m_device; + d3dDevice->CopyDescriptorsSimple( + 1, + m_resourceHeap->getCpuHandle( + m_descriptorSet.m_resourceTable + + bindingRange.binding.offsetInDescriptorTable.resource + + (int32_t)offset.bindingArrayIndex), + resourceViewImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + auto samplerImpl = static_cast(sampler); + d3dDevice->CopyDescriptorsSimple( + 1, + m_samplerHeap->getCpuHandle( + m_descriptorSet.m_samplerTable + + bindingRange.binding.offsetInDescriptorTable.sampler + + (int32_t)offset.bindingArrayIndex), + samplerImpl->m_descriptor.cpuHandle, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + return SLANG_OK; + } - Result createCommandQueueImpl( - uint32_t frameCount, - uint32_t viewHeapSize, - uint32_t samplerHeapSize, - CommandQueueImpl** outQueue); + public: + // Appends all types that are used to specialize the element type of this shader object in + // `args` list. + virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override + { + auto& subObjectRanges = getLayout()->getSubObjectRanges(); + // The following logic is built on the assumption that all fields that involve + // existential types (and therefore require specialization) will results in a sub-object + // range in the type layout. This allows us to simply scan the sub-object ranges to find + // out all specialization arguments. + Index subObjectRangeCount = subObjectRanges.getCount(); + for (Index subObjectRangeIndex = 0; subObjectRangeIndex < subObjectRangeCount; + subObjectRangeIndex++) + { + auto const& subObjectRange = subObjectRanges[subObjectRangeIndex]; + auto const& bindingRange = + getLayout()->getBindingRange(subObjectRange.bindingRangeIndex); - Result createBuffer( - const D3D12_RESOURCE_DESC& resourceDesc, - const void* srcData, - size_t srcDataSize, - D3D12Resource& uploadResource, - D3D12_RESOURCE_STATES finalState, - D3D12Resource& resourceOut); + Index count = bindingRange.count; + SLANG_ASSERT(count == 1); - Result captureTextureToSurface( - D3D12Resource& resource, - ResourceState state, - ISlangBlob** blob, - size_t* outRowPitch, - size_t* outPixelSize); + Index subObjectIndexInRange = 0; + auto subObject = m_objects[bindingRange.binding.index + subObjectIndexInRange]; - Result _createDevice( - DeviceCheckFlags deviceCheckFlags, - const UnownedStringSlice& nameMatch, - D3D_FEATURE_LEVEL featureLevel, - DeviceInfo& outDeviceInfo); + switch (bindingRange.bindingType) + { + case slang::BindingType::ExistentialValue: + { + // A binding type of `ExistentialValue` means the sub-object represents a + // interface-typed field. In this case the specialization argument for this + // field is the actual specialized type of the bound shader object. If the + // shader object's type is an ordinary type without existential fields, then + // the type argument will simply be the ordinary type. But if the sub + // object's type is itself a specialized type, we need to make sure to use + // that type as the specialization argument. + + ExtendedShaderObjectType specializedSubObjType; + SLANG_RETURN_ON_FAIL( + subObject->getSpecializedShaderObjectType(&specializedSubObjType)); + args.add(specializedSubObjType); + break; + } + case slang::BindingType::ParameterBlock: + case slang::BindingType::ConstantBuffer: + // Currently we only handle the case where the field's type is + // `ParameterBlock` or `ConstantBuffer`, where + // `SomeStruct` is a struct type (not directly an interface type). In this case, + // we just recursively collect the specialization arguments from the bound sub + // object. + SLANG_RETURN_ON_FAIL(subObject->collectSpecializationArgs(args)); + // TODO: we need to handle the case where the field is of the form + // `ParameterBlock`. We should treat this case the same way as the + // `ExistentialValue` case here, but currently we lack a mechanism to + // distinguish the two scenarios. + break; + } + // TODO: need to handle another case where specialization happens on resources + // fields e.g. `StructuredBuffer`. + } + return SLANG_OK; + } - - struct ResourceCommandRecordInfo - { - ComPtr commandBuffer; - ID3D12GraphicsCommandList* d3dCommandList; - }; - ResourceCommandRecordInfo encodeResourceCommands() - { - ResourceCommandRecordInfo info; - m_resourceCommandQueue->createCommandBuffer(info.commandBuffer.writeRef()); - info.d3dCommandList = static_cast(info.commandBuffer.get())->m_cmdList; - return info; - } - void submitResourceCommandsAndWait(const ResourceCommandRecordInfo& info) - { - info.commandBuffer->close(); - m_resourceCommandQueue->executeCommandBuffer(info.commandBuffer); - m_resourceCommandQueue->wait(); - } + protected: + Result init(D3D12Device* device, ShaderObjectLayoutImpl* layout) + { + m_layout = layout; - // D3D12Device members. + // If the layout tells us that there is any uniform data, + // then we will allocate a CPU memory buffer to hold that data + // while it is being set from the host. + // + // Once the user is done setting the parameters/fields of this + // shader object, we will produce a GPU-memory version of the + // uniform data (which includes values from this object and + // any existential-type sub-objects). + // + size_t uniformSize = layout->getElementTypeLayout()->getSize(); + if (uniformSize) + { + m_ordinaryData.setCount(uniformSize); + memset(m_ordinaryData.getBuffer(), 0, uniformSize); + } - Desc m_desc; + // Allocate descriptor tables for this shader object. + m_resourceHeap = &device->m_cpuViewHeap; + m_samplerHeap = &device->m_cpuSamplerHeap; + auto descSetInfo = layout->getDescriptorSetInfo(); + m_descriptorSet.m_resourceCount = descSetInfo.resourceDescriptorCount; + if (descSetInfo.resourceDescriptorCount) + { + m_descriptorSet.m_resourceTable = + m_resourceHeap->allocate(descSetInfo.resourceDescriptorCount); + } + m_descriptorSet.m_samplerCount = descSetInfo.samplerDescriptorCount; + if (descSetInfo.samplerDescriptorCount) + { + m_descriptorSet.m_samplerTable = + m_samplerHeap->allocate(descSetInfo.samplerDescriptorCount); + } - gfx::DeviceInfo m_info; - String m_adapterName; + m_boundResources.setCount(layout->getResourceCount()); - bool m_isInitialized = false; + // If the layout specifies that we have any sub-objects, then + // we need to size the array to account for them. + // + Index subObjectCount = layout->getSubObjectCount(); + m_objects.setCount(subObjectCount); - ComPtr m_dxDebug; + for (auto subObjectRangeInfo : layout->getSubObjectRanges()) + { + auto subObjectLayout = subObjectRangeInfo.layout; - DeviceInfo m_deviceInfo; - ID3D12Device* m_device = nullptr; + // In the case where the sub-object range represents an + // existential-type leaf field (e.g., an `IBar`), we + // cannot pre-allocate the object(s) to go into that + // range, since we can't possibly know what to allocate + // at this point. + // + if (!subObjectLayout) + continue; + // + // Otherwise, we will allocate a sub-object to fill + // in each entry in this range, based on the layout + // information we already have. - RefPtr m_resourceCommandQueue; + auto& bindingRangeInfo = + layout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); + for (uint32_t i = 0; i < bindingRangeInfo.count; ++i) + { + RefPtr subObject; + SLANG_RETURN_ON_FAIL( + ShaderObjectImpl::create(device, subObjectLayout, subObject.writeRef())); + m_objects[bindingRangeInfo.binding.index + i] = subObject; + } + } - // Dll entry points - PFN_D3D12_GET_DEBUG_INTERFACE m_D3D12GetDebugInterface = nullptr; - PFN_D3D12_CREATE_DEVICE m_D3D12CreateDevice = nullptr; - PFN_D3D12_SERIALIZE_ROOT_SIGNATURE m_D3D12SerializeRootSignature = nullptr; + return SLANG_OK; + } - bool m_nvapi = false; -}; + /// Write the uniform/ordinary data of this object into the given `dest` buffer at the given + /// `offset` + Result _writeOrdinaryData( + PipelineCommandEncoder* encoder, + BufferResourceImpl* buffer, + size_t offset, + size_t destSize, + ShaderObjectLayoutImpl* specializedLayout) + { + auto src = m_ordinaryData.getBuffer(); + auto srcSize = size_t(m_ordinaryData.getCount()); + SLANG_ASSERT(srcSize <= destSize); -Result D3D12Device::CommandBufferImpl::PipelineCommandEncoder::_bindRenderState( - PipelineStateImpl* pipelineStateImpl, - Submitter* submitter) -{ - auto commandList = m_commandBuffer->m_cmdList; - // TODO: we should only set some of this state as needed... + _uploadBufferData(encoder->m_d3dCmdList, buffer, offset, srcSize, src); - auto pipelineTypeIndex = (int)pipelineStateImpl->desc.type; - auto pipelineLayout = static_cast(pipelineStateImpl->m_pipelineLayout.get()); + // In the case where this object has any sub-objects of + // existential/interface type, we need to recurse on those objects + // that need to write their state into an appropriate "pending" allocation. + // + // Note: Any values that could fit into the "payload" included + // in the existential-type field itself will have already been + // written as part of `setObject()`. This loop only needs to handle + // those sub-objects that do not "fit." + // + // An implementers looking at this code might wonder if things could be changed + // so that *all* writes related to sub-objects for interface-type fields could + // be handled in this one location, rather than having some in `setObject()` and + // others handled here. + // + Index subObjectRangeCounter = 0; + for (auto const& subObjectRangeInfo : specializedLayout->getSubObjectRanges()) + { + Index subObjectRangeIndex = subObjectRangeCounter++; + auto const& bindingRangeInfo = + specializedLayout->getBindingRange(subObjectRangeInfo.bindingRangeIndex); - submitter->setRootSignature(pipelineLayout->m_rootSignature); - commandList->SetPipelineState(pipelineStateImpl->m_pipelineState); + // We only need to handle sub-object ranges for interface/existential-type fields, + // because fields of constant-buffer or parameter-block type are responsible for + // the ordinary/uniform data of their own existential/interface-type sub-objects. + // + if (bindingRangeInfo.bindingType != slang::BindingType::ExistentialValue) + continue; - ID3D12DescriptorHeap* heaps[] = { - m_frame->m_viewHeap.getHeap(), - m_frame->m_samplerHeap.getHeap(), - }; - commandList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); + // Each sub-object range represents a single "leaf" field, but might be nested + // under zero or more outer arrays, such that the number of existential values + // in the same range can be one or more. + // + auto count = bindingRangeInfo.count; - // We need to copy descriptors over from the descriptor sets - // (where they are stored in CPU-visible heaps) to the GPU-visible - // heaps so that they can be accessed by shader code. + // We are not concerned with the case where the existential value(s) in the range + // git into the payload part of the leaf field. + // + // In the case where the value didn't fit, the Slang layout strategy would have + // considered the requirements of the value as a "pending" allocation, and would + // allocate storage for the ordinary/uniform part of that pending allocation inside + // of the parent object's type layout. + // + // Here we assume that the Slang reflection API can provide us with a single byte + // offset and stride for the location of the pending data allocation in the + // specialized type layout, which will store the values for this sub-object range. + // + // TODO: The reflection API functions we are assuming here haven't been implemented + // yet, so the functions being called here are stubs. + // + // TODO: It might not be that a single sub-object range can reliably map to a single + // contiguous array with a single stride; we need to carefully consider what the + // layout logic does for complex cases with multiple layers of nested arrays and + // structures. + // + size_t subObjectRangePendingDataOffset = + _getSubObjectRangePendingDataOffset(specializedLayout, subObjectRangeIndex); + size_t subObjectRangePendingDataStride = + _getSubObjectRangePendingDataStride(specializedLayout, subObjectRangeIndex); - Int descriptorSetCount = pipelineLayout->m_descriptorSetCount; - Int rootParameterIndex = 0; - for (Int dd = 0; dd < descriptorSetCount; ++dd) - { - auto descriptorSet = m_boundDescriptorSets[pipelineTypeIndex][dd]; - auto descriptorSetLayout = descriptorSet->m_layout; + // If the range doesn't actually need/use the "pending" allocation at all, then + // we need to detect that case and skip such ranges. + // + // TODO: This should probably be handled on a per-object basis by caching a "does it + // fit?" bit as part of the information for bound sub-objects, given that we already + // compute the "does it fit?" status as part of `setObject()`. + // + if (subObjectRangePendingDataOffset == 0) + continue; + + for (uint32_t i = 0; i < count; ++i) + { + auto subObject = m_objects[bindingRangeInfo.binding.index + i]; + + RefPtr subObjectLayout; + SLANG_RETURN_ON_FAIL( + subObject->getSpecializedLayout(subObjectLayout.writeRef())); + + auto subObjectOffset = + subObjectRangePendingDataOffset + i * subObjectRangePendingDataStride; + + subObject->_writeOrdinaryData( + encoder, + buffer, + offset + subObjectOffset, + destSize - subObjectOffset, + subObjectLayout); + } + } - // TODO: require that `descriptorSetLayout` is compatible with - // `pipelineLayout->descriptorSetlayouts[dd]`. + return SLANG_OK; + } + // As discussed in `_writeOrdinaryData()`, these methods are just stubs waiting for + // the "flat" Slang refelction information to provide access to the relevant data. + // + size_t _getSubObjectRangePendingDataOffset( + ShaderObjectLayoutImpl* specializedLayout, + Index subObjectRangeIndex) { - if (auto descriptorCount = descriptorSetLayout->m_resourceCount) - { - auto& gpuHeap = m_frame->m_viewHeap; - auto gpuDescriptorTable = gpuHeap.allocate(int(descriptorCount)); + return 0; + } + size_t _getSubObjectRangePendingDataStride( + ShaderObjectLayoutImpl* specializedLayout, + Index subObjectRangeIndex) + { + return 0; + } - auto& cpuHeap = *descriptorSet->m_resourceHeap; - auto cpuDescriptorTable = descriptorSet->m_resourceTable; + /// Ensure that the `m_ordinaryDataBuffer` has been created, if it is needed + Result _ensureOrdinaryDataBufferCreatedIfNeeded(PipelineCommandEncoder* encoder) + { + // If we have already created a buffer to hold ordinary data, then we should + // simply re-use that buffer rather than re-create it. + // + // TODO: Simply re-using the buffer without any kind of validation checks + // means that we are assuming that users cannot or will not perform any `set` + // operations on a shader object once an operation has requested this buffer + // be created. We need to enforce that rule if we want to rely on it. + // + if (m_ordinaryDataBuffer) + return SLANG_OK; + + // Computing the size of the ordinary data buffer is *not* just as simple + // as using the size of the `m_ordinayData` array that we store. The reason + // for the added complexity is that interface-type fields may lead to the + // storage being specialized such that it needs extra appended data to + // store the concrete values that logically belong in those interface-type + // fields but wouldn't fit in the fixed-size allocation we gave them. + // + // TODO: We need to actually implement that logic by using reflection + // data computed for the specialized type of this shader object. + // For now we just make the simple assumption described above despite + // knowing that it is false. + // + RefPtr specializedLayout; + SLANG_RETURN_ON_FAIL(getSpecializedLayout(specializedLayout.writeRef())); - m_device->CopyDescriptorsSimple( - UINT(descriptorCount), - gpuHeap.getCpuHandle(gpuDescriptorTable), - cpuHeap.getCpuHandle(int(cpuDescriptorTable)), - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + auto specializedOrdinaryDataSize = specializedLayout->getElementTypeLayout()->getSize(); + if (specializedOrdinaryDataSize == 0) + return SLANG_OK; - submitter->setRootDescriptorTable( - int(rootParameterIndex++), gpuHeap.getGpuHandle(gpuDescriptorTable)); - } + // Once we have computed how large the buffer should be, we can allocate + // it using the existing public `IDevice` API. + // + + ComPtr bufferResourcePtr; + IBufferResource::Desc bufferDesc; + bufferDesc.init(specializedOrdinaryDataSize); + bufferDesc.cpuAccessFlags |= IResource::AccessFlag::Write; + SLANG_RETURN_ON_FAIL(encoder->m_renderer->createBufferResource( + IResource::Usage::ConstantBuffer, + bufferDesc, + nullptr, + bufferResourcePtr.writeRef())); + m_ordinaryDataBuffer = static_cast(bufferResourcePtr.get()); + + // Once the buffer is allocated, we can use `_writeOrdinaryData` to fill it in. + // + // Note that `_writeOrdinaryData` is potentially recursive in the case + // where this object contains interface/existential-type fields, so we + // don't need or want to inline it into this call site. + // + SLANG_RETURN_ON_FAIL(_writeOrdinaryData( + encoder, m_ordinaryDataBuffer, 0, specializedOrdinaryDataSize, specializedLayout)); + + return SLANG_OK; } + + /// Bind the buffer for ordinary/uniform data, if needed + Result _bindOrdinaryDataBufferIfNeeded(PipelineCommandEncoder* encoder) { - if (auto descriptorCount = descriptorSetLayout->m_samplerCount) + // We start by ensuring that the buffer is created, if it is needed. + // + SLANG_RETURN_ON_FAIL(_ensureOrdinaryDataBufferCreatedIfNeeded(encoder)); + + // If we did indeed need/create a buffer, then we must bind it + // into root binding state. + // + if (m_ordinaryDataBuffer) { - auto& gpuHeap = m_frame->m_samplerHeap; - auto gpuDescriptorTable = gpuHeap.allocate(int(descriptorCount)); + auto descriptorTable = m_descriptorSet.m_resourceTable; + D3D12_CONSTANT_BUFFER_VIEW_DESC viewDesc = {}; + viewDesc.BufferLocation = + m_ordinaryDataBuffer->m_resource.getResource()->GetGPUVirtualAddress(); + viewDesc.SizeInBytes = + (UINT)D3DUtil::calcAligned((UInt)m_ordinaryData.getCount(), 256); + encoder->m_device->CreateConstantBufferView( + &viewDesc, + m_resourceHeap->getCpuHandle(descriptorTable)); + } - auto& cpuHeap = *descriptorSet->m_samplerHeap; - auto cpuDescriptorTable = descriptorSet->m_samplerTable; + return SLANG_OK; + } - m_device->CopyDescriptorsSimple( - UINT(descriptorCount), - gpuHeap.getCpuHandle(gpuDescriptorTable), - cpuHeap.getCpuHandle(int(cpuDescriptorTable)), + public: + virtual Result bindObject(PipelineCommandEncoder* encoder, RootBindingState* bindingState) + { + ShaderObjectLayoutImpl* layout = getLayout(); + SLANG_RETURN_ON_FAIL(_bindOrdinaryDataBufferIfNeeded(encoder)); + uint32_t descTableIndex = bindingState->rootParamIndex; + auto& descSet = m_descriptorSet; + if (descSet.m_resourceCount) + { + auto gpuDescriptorTable = bindingState->descriptorTables[descTableIndex]; + auto& gpuHeap = gpuDescriptorTable.heap; + auto& cpuHeap = *m_resourceHeap; + auto cpuDescriptorTable = descSet.m_resourceTable; + + bindingState->device->m_device->CopyDescriptorsSimple( + UINT(descSet.m_resourceCount), + gpuHeap.getCpuHandle(gpuDescriptorTable.table + bindingState->offset.resource), + cpuHeap.getCpuHandle(cpuDescriptorTable), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + bindingState->offset.resource += descSet.m_resourceCount; + descTableIndex++; + } + if (descSet.m_samplerCount) + { + auto gpuDescriptorTable = bindingState->descriptorTables[descTableIndex]; + auto& gpuHeap = gpuDescriptorTable.heap; + auto& cpuHeap = *m_samplerHeap; + auto cpuDescriptorTable = (int)descSet.m_samplerTable; + + bindingState->device->m_device->CopyDescriptorsSimple( + UINT(descSet.m_samplerCount), + gpuHeap.getCpuHandle(gpuDescriptorTable.table + bindingState->offset.sampler), + cpuHeap.getCpuHandle(cpuDescriptorTable), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - - submitter->setRootDescriptorTable( - int(rootParameterIndex++), gpuHeap.getGpuHandle(gpuDescriptorTable)); + bindingState->offset.sampler += descSet.m_samplerCount; + descTableIndex++; + } + bindingState->futureRootParamOffset = + Math::Max(descTableIndex, bindingState->futureRootParamOffset); + for (Index i = 0; i < layout->getSubObjectCount(); i++) + { + switch (layout->getSubObjectRange(i).bindingType) + { + case slang::BindingType::ParameterBlock: + { + auto newBindingState = *bindingState; + newBindingState.offset.resource = 0; + newBindingState.offset.sampler = 0; + newBindingState.rootParamIndex = + bindingState->futureRootParamOffset; + newBindingState.futureRootParamOffset = newBindingState.rootParamIndex; + m_objects[i]->bindObject(encoder, &newBindingState); + bindingState->futureRootParamOffset = newBindingState.futureRootParamOffset; + } + break; + case slang::BindingType::ConstantBuffer: + { + m_objects[i]->bindObject(encoder, bindingState); + } + break; + case slang::BindingType::ExistentialValue: + // If the existential object contains only ordinary data fields, + // the data is already written into m_ordinaryDataBuffer during `setObject`, + // so we don't need to do anything here. + // If the existential object has resource fields, this is the time to set + // those fields as in the "pendingLayout" section. + // TODO: implement resource fields binding for inline existential values. + default: + break; + } } + return SLANG_OK; } - if (auto rootConstantRangeCount = descriptorSetLayout->m_rootConstantRanges.getCount()) - { - auto srcData = descriptorSet->m_rootConstantData.getBuffer(); - for (auto& rootConstantRangeInfo : descriptorSetLayout->m_rootConstantRanges) + /// Any "ordinary" / uniform data for this object + List m_ordinaryData; + + List> m_objects; + + D3D12HostVisibleDescriptorAllocator* m_resourceHeap = nullptr; + D3D12HostVisibleDescriptorAllocator* m_samplerHeap = nullptr; + + struct DescriptorSet + { + int32_t m_resourceTable = 0; + int32_t m_samplerTable = 0; + uint32_t m_resourceCount = 0; + uint32_t m_samplerCount = 0; + }; + DescriptorSet m_descriptorSet; + + ShortList, 8> m_boundResources; + + /// A constant buffer used to stored ordinary data for this object + /// and existential-type sub-objects. + /// + /// Created on demand with `_createOrdinaryDataBufferIfNeeded()` + RefPtr m_ordinaryDataBuffer; + + /// Get the layout of this shader object with specialization arguments considered + /// + /// This operation should only be called after the shader object has been + /// fully filled in and finalized. + /// + Result getSpecializedLayout(ShaderObjectLayoutImpl** outLayout) + { + if (!m_specializedLayout) { - auto countOf32bitValues = rootConstantRangeInfo.size / sizeof(uint32_t); - submitter->setRootConstants( - rootConstantRangeInfo.rootParamIndex, - 0, - countOf32bitValues, - srcData + rootConstantRangeInfo.offset); + SLANG_RETURN_ON_FAIL(_createSpecializedLayout(m_specializedLayout.writeRef())); } + *outLayout = RefPtr(m_specializedLayout).detach(); + return SLANG_OK; } - } - return SLANG_OK; -} + /// Create the layout for this shader object with specialization arguments considered + /// + /// This operation is virtual so that it can be customized by `RootShaderObject`. + /// + virtual Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) + { + ExtendedShaderObjectType extendedType; + SLANG_RETURN_ON_FAIL(getSpecializedShaderObjectType(&extendedType)); -Result D3D12Device::createCommandQueueImpl( - uint32_t frameCount, - uint32_t viewHeapSize, - uint32_t samplerHeapSize, - D3D12Device::CommandQueueImpl** outQueue) -{ - RefPtr queue = new D3D12Device::CommandQueueImpl(); - SLANG_RETURN_ON_FAIL(queue->init(this, frameCount, viewHeapSize, samplerHeapSize)); - *outQueue = queue.detach(); - return SLANG_OK; -} + auto renderer = getRenderer(); + RefPtr layout; + SLANG_RETURN_ON_FAIL( + renderer->getShaderObjectLayout(extendedType.slangType, layout.writeRef())); -SlangResult SLANG_MCALL createD3D12Device(const IDevice::Desc* desc, IDevice** outDevice) -{ - RefPtr result = new D3D12Device(); - SLANG_RETURN_ON_FAIL(result->initialize(*desc)); - *outDevice = result.detach(); - return SLANG_OK; -} + *outLayout = static_cast(layout.detach()); + return SLANG_OK; + } -/* static */PROC D3D12Device::loadProc(HMODULE module, char const* name) -{ - PROC proc = ::GetProcAddress(module, name); - if (!proc) + RefPtr m_specializedLayout; + }; + + class RootShaderObjectImpl : public ShaderObjectImpl { - fprintf(stderr, "error: failed load symbol '%s'\n", name); - return nullptr; - } - return proc; -} + typedef ShaderObjectImpl Super; -D3D12Device::~D3D12Device() -{ -} + public: + static Result create( + D3D12Device* device, + RootShaderObjectLayoutImpl* layout, + RootShaderObjectImpl** outShaderObject) + { + RefPtr object = new RootShaderObjectImpl(); + SLANG_RETURN_ON_FAIL(object->init(device, layout)); -static void _initSrvDesc(IResource::Type resourceType, const ITextureResource::Desc& textureDesc, const D3D12_RESOURCE_DESC& desc, DXGI_FORMAT pixelFormat, D3D12_SHADER_RESOURCE_VIEW_DESC& descOut) -{ - // create SRV - descOut = D3D12_SHADER_RESOURCE_VIEW_DESC(); + *outShaderObject = object.detach(); + return SLANG_OK; + } - descOut.Format = (pixelFormat == DXGI_FORMAT_UNKNOWN) ? D3DUtil::calcFormat(D3DUtil::USAGE_SRV, desc.Format) : pixelFormat; - descOut.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - if (desc.DepthOrArraySize == 1) - { - switch (desc.Dimension) + RootShaderObjectLayoutImpl* getLayout() { - case D3D12_RESOURCE_DIMENSION_TEXTURE1D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; break; - case D3D12_RESOURCE_DIMENSION_TEXTURE2D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; break; - case D3D12_RESOURCE_DIMENSION_TEXTURE3D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; break; - default: assert(!"Unknown dimension"); + return static_cast(m_layout.Ptr()); } - descOut.Texture2D.MipLevels = desc.MipLevels; - descOut.Texture2D.MostDetailedMip = 0; - descOut.Texture2D.PlaneSlice = 0; - descOut.Texture2D.ResourceMinLODClamp = 0.0f; - } - else if (resourceType == IResource::Type::TextureCube) - { - if (textureDesc.arraySize > 1) + UInt SLANG_MCALL getEntryPointCount() SLANG_OVERRIDE { - descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; - - descOut.TextureCubeArray.NumCubes = textureDesc.arraySize; - descOut.TextureCubeArray.First2DArrayFace = 0; - descOut.TextureCubeArray.MipLevels = desc.MipLevels; - descOut.TextureCubeArray.MostDetailedMip = 0; - descOut.TextureCubeArray.ResourceMinLODClamp = 0; + return (UInt)m_entryPoints.getCount(); } - else + SlangResult SLANG_MCALL getEntryPoint(UInt index, IShaderObject** outEntryPoint) + SLANG_OVERRIDE { - descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; - - descOut.TextureCube.MipLevels = desc.MipLevels; - descOut.TextureCube.MostDetailedMip = 0; - descOut.TextureCube.ResourceMinLODClamp = 0; + *outEntryPoint = m_entryPoints[index]; + m_entryPoints[index]->addRef(); + return SLANG_OK; } - } - else - { - assert(desc.DepthOrArraySize > 1); - switch (desc.Dimension) + virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override { - case D3D12_RESOURCE_DIMENSION_TEXTURE1D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; break; - case D3D12_RESOURCE_DIMENSION_TEXTURE2D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; break; - case D3D12_RESOURCE_DIMENSION_TEXTURE3D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; break; - - default: assert(!"Unknown dimension"); + SLANG_RETURN_ON_FAIL(ShaderObjectImpl::collectSpecializationArgs(args)); + for (auto& entryPoint : m_entryPoints) + { + SLANG_RETURN_ON_FAIL(entryPoint->collectSpecializationArgs(args)); + } + return SLANG_OK; } - descOut.Texture2DArray.ArraySize = desc.DepthOrArraySize; - descOut.Texture2DArray.MostDetailedMip = 0; - descOut.Texture2DArray.MipLevels = desc.MipLevels; - descOut.Texture2DArray.FirstArraySlice = 0; - descOut.Texture2DArray.PlaneSlice = 0; - descOut.Texture2DArray.ResourceMinLODClamp = 0; - } -} - -static void _initBufferResourceDesc(size_t bufferSize, D3D12_RESOURCE_DESC& out) -{ - out = {}; + public: + virtual Result bindObject(PipelineCommandEncoder* encoder, RootBindingState* bindingState) override + { + RootBindingState globalBindingState = *bindingState; + SLANG_RETURN_ON_FAIL(Super::bindObject(encoder, bindingState)); - out.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - out.Alignment = 0; - out.Width = bufferSize; - out.Height = 1; - out.DepthOrArraySize = 1; - out.MipLevels = 1; - out.Format = DXGI_FORMAT_UNKNOWN; - out.SampleDesc.Count = 1; - out.SampleDesc.Quality = 0; - out.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - out.Flags = D3D12_RESOURCE_FLAG_NONE; -} + auto entryPointCount = m_entryPoints.getCount(); + for (Index i = 0; i < entryPointCount; ++i) + { + auto entryPoint = m_entryPoints[i]; + auto bindingStateCopy = globalBindingState; + SLANG_RETURN_ON_FAIL(entryPoint->bindObject(encoder, &bindingStateCopy)); + } -Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12Resource& uploadResource, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut) -{ - const size_t bufferSize = size_t(resourceDesc.Width); + return SLANG_OK; + } + protected: - { - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + Result init(D3D12Device* device, RootShaderObjectLayoutImpl* layout) + { + SLANG_RETURN_ON_FAIL(Super::init(device, layout)); - const D3D12_RESOURCE_STATES initialState = srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState; + for (auto entryPointInfo : layout->getEntryPoints()) + { + RefPtr entryPoint; + SLANG_RETURN_ON_FAIL( + ShaderObjectImpl::create(device, entryPointInfo.layout, entryPoint.writeRef())); + m_entryPoints.add(entryPoint); + } - SLANG_RETURN_ON_FAIL(resourceOut.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, resourceDesc, initialState, nullptr)); - } + return SLANG_OK; + } - { - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) SLANG_OVERRIDE + { + ExtendedShaderObjectTypeList specializationArgs; + SLANG_RETURN_ON_FAIL(collectSpecializationArgs(specializationArgs)); - D3D12_RESOURCE_DESC uploadResourceDesc(resourceDesc); - uploadResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + // Note: There is an important policy decision being made here that we need + // to approach carefully. + // + // We are doing two different things that affect the layout of a program: + // + // 1. We are *composing* one or more pieces of code (notably the shared global/module + // stuff and the per-entry-point stuff). + // + // 2. We are *specializing* code that includes generic/existential parameters + // to concrete types/values. + // + // We need to decide the relative *order* of these two steps, because of how it impacts + // layout. The layout for `specialize(compose(A,B), X, Y)` is potentially different + // form that of `compose(specialize(A,X), speciealize(B,Y))`, even when both are + // semantically equivalent programs. + // + // Right now we are using the first option: we are first generating a full composition + // of all the code we plan to use (global scope plus all entry points), and then + // specializing it to the concatenated specialization argumenst for all of that. + // + // In some cases, though, this model isn't appropriate. For example, when dealing with + // ray-tracing shaders and local root signatures, we really want the parameters of each + // entry point (actually, each entry-point *group*) to be allocated distinct storage, + // which really means we want to compute something like: + // + // SpecializedGlobals = specialize(compose(ModuleA, ModuleB, ...), X, Y, ...) + // + // SpecializedEP1 = compose(SpecializedGlobals, specialize(EntryPoint1, T, U, ...)) + // SpecializedEP2 = compose(SpecializedGlobals, specialize(EntryPoint2, A, B, ...)) + // + // Note how in this case all entry points agree on the layout for the shared/common + // parmaeters, but their layouts are also independent of one another. + // + // Furthermore, in this example, loading another entry point into the system would not + // rquire re-computing the layouts (or generated kernel code) for any of the entry + // points that had already been loaded (in contrast to a compose-then-specialize + // approach). + // + ComPtr specializedComponentType; + ComPtr diagnosticBlob; + auto result = getLayout()->getSlangProgram()->specialize( + specializationArgs.components.getArrayView().getBuffer(), + specializationArgs.getCount(), + specializedComponentType.writeRef(), + diagnosticBlob.writeRef()); + + // TODO: print diagnostic message via debug output interface. + + if (result != SLANG_OK) + return result; + + auto slangSpecializedLayout = specializedComponentType->getLayout(); + RefPtr specializedLayout; + RootShaderObjectLayoutImpl::create( + static_cast(getRenderer()), + specializedComponentType, + slangSpecializedLayout, + specializedLayout.writeRef()); + + // Note: Computing the layout for the specialized program will have also computed + // the layouts for the entry points, and we really need to attach that information + // to them so that they don't go and try to compute their own specializations. + // + // TODO: Well, if we move to the specialization model described above then maybe + // we *will* want entry points to do their own specialization work... + // + auto entryPointCount = m_entryPoints.getCount(); + for (Index i = 0; i < entryPointCount; ++i) + { + auto entryPointInfo = specializedLayout->getEntryPoint(i); + auto entryPointVars = m_entryPoints[i]; - SLANG_RETURN_ON_FAIL(uploadResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadResourceDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); - } + entryPointVars->m_specializedLayout = entryPointInfo.layout; + } - if (srcData) - { - // Copy data to the intermediate upload heap and then schedule a copy - // from the upload heap to the vertex buffer. - UINT8* dstData; - D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU. + *outLayout = specializedLayout.detach(); + return SLANG_OK; + } - ID3D12Resource* dxUploadResource = uploadResource.getResource(); + List> m_entryPoints; + }; - SLANG_RETURN_ON_FAIL(dxUploadResource->Map(0, &readRange, reinterpret_cast(&dstData))); - ::memcpy(dstData, srcData, srcDataSize); - dxUploadResource->Unmap(0, nullptr); + class CommandBufferImpl + : public ICommandBuffer + , public RefObject + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ICommandBuffer* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer) + return static_cast(this); + return nullptr; + } + public: + ComPtr m_cmdList; + ExecutionFrameResources* m_frame; + D3D12Device* m_renderer; + void init(D3D12Device* renderer, ExecutionFrameResources* frame) + { + m_frame = frame; + m_renderer = renderer; + m_cmdList = m_frame->createCommandList(renderer->m_device); - auto encodeInfo = encodeResourceCommands(); - encodeInfo.d3dCommandList->CopyBufferRegion(resourceOut, 0, uploadResource, 0, bufferSize); - submitResourceCommandsAndWait(encodeInfo); - } + ID3D12DescriptorHeap* heaps[] = { + m_frame->m_viewHeap.getHeap(), + m_frame->m_samplerHeap.getHeap(), + }; + m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); + } - return SLANG_OK; -} + class RenderCommandEncoderImpl + : public IRenderCommandEncoder + , public PipelineCommandEncoder + { + public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override + { + if (uuid == GfxGUID::IID_ISlangUnknown || + uuid == GfxGUID::IID_IRenderCommandEncoder) + { + *outObject = static_cast(this); + return SLANG_OK; + } + *outObject = nullptr; + return SLANG_E_NO_INTERFACE; + } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } + public: + RefPtr m_renderPass; + RefPtr m_framebuffer; -Result D3D12Device::captureTextureToSurface( - D3D12Resource& resource, - ResourceState state, - ISlangBlob** outBlob, - size_t* outRowPitch, - size_t* outPixelSize) -{ - const D3D12_RESOURCE_STATES initialState = D3DUtil::translateResourceState(state); + List m_boundVertexBuffers; - const D3D12_RESOURCE_DESC desc = resource.getResource()->GetDesc(); + RefPtr m_boundIndexBuffer; - // Don't bother supporting MSAA for right now - if (desc.SampleDesc.Count > 1) - { - fprintf(stderr, "ERROR: cannot capture multi-sample texture\n"); - return SLANG_FAIL; - } + D3D12_VIEWPORT m_viewports[kMaxRTVCount]; + D3D12_RECT m_scissorRects[kMaxRTVCount]; - size_t bytesPerPixel = sizeof(uint32_t); - size_t rowPitch = int(desc.Width) * bytesPerPixel; - size_t bufferSize = rowPitch * int(desc.Height); - if (outRowPitch) - *outRowPitch = rowPitch; - if (outPixelSize) - *outPixelSize = bytesPerPixel; - - D3D12Resource stagingResource; - { - D3D12_RESOURCE_DESC stagingDesc; - _initBufferResourceDesc(bufferSize, stagingDesc); + DXGI_FORMAT m_boundIndexFormat; + UINT m_boundIndexOffset; - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + D3D12_PRIMITIVE_TOPOLOGY_TYPE m_primitiveTopologyType; + D3D12_PRIMITIVE_TOPOLOGY m_primitiveTopology; - SLANG_RETURN_ON_FAIL(stagingResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); - } + void init( + D3D12Device* renderer, + ExecutionFrameResources* frame, + CommandBufferImpl* cmdBuffer, + RenderPassLayoutImpl* renderPass, + FramebufferImpl* framebuffer) + { + PipelineCommandEncoder::init(cmdBuffer); + m_preCmdList = nullptr; + m_device = renderer->m_device; + m_renderPass = renderPass; + m_framebuffer = framebuffer; + m_frame = frame; + m_boundVertexBuffers.clear(); + m_boundIndexBuffer = nullptr; + m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + m_primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + m_boundIndexFormat = DXGI_FORMAT_UNKNOWN; + m_boundIndexOffset = 0; + m_currentPipeline = nullptr; - auto encodeInfo = encodeResourceCommands(); - auto currentState = D3DUtil::translateResourceState(state); + // Set render target states. + m_d3dCmdList->OMSetRenderTargets( + (UINT)framebuffer->renderTargetViews.getCount(), + framebuffer->renderTargetDescriptors.getArrayView().getBuffer(), + FALSE, + framebuffer->depthStencilView ? &framebuffer->depthStencilDescriptor : nullptr); - { - D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); - resource.transition(currentState, D3D12_RESOURCE_STATE_COPY_SOURCE, submitter); - } + // Issue clear commands based on render pass set up. + for (Index i = 0; i < renderPass->m_renderTargetAccesses.getCount(); i++) + { + auto& access = renderPass->m_renderTargetAccesses[i]; - // Do the copy - { - D3D12_TEXTURE_COPY_LOCATION srcLoc; - srcLoc.pResource = resource; - srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - srcLoc.SubresourceIndex = 0; + // Transit resource states. + { + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = + static_cast(framebuffer->renderTargetViews[i].get()); + auto textureResource = + static_cast(resourceViewImpl->m_resource.Ptr()); + D3D12_RESOURCE_STATES initialState; + if (access.initialState == ResourceState::Undefined) + { + initialState = textureResource->m_defaultState; + } + else + { + initialState = D3DUtil::translateResourceState(access.initialState); + } + textureResource->m_resource.transition( + initialState, + D3D12_RESOURCE_STATE_RENDER_TARGET, + submitter); + } + // Clear. + if (access.loadOp == IRenderPassLayout::AttachmentLoadOp::Clear) + { + m_d3dCmdList->ClearRenderTargetView( + framebuffer->renderTargetDescriptors[i], + framebuffer->renderTargetClearValues[i].values, + 0, + nullptr); + } + } - D3D12_TEXTURE_COPY_LOCATION dstLoc; - dstLoc.pResource = stagingResource; - dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dstLoc.PlacedFootprint.Offset = 0; - dstLoc.PlacedFootprint.Footprint.Format = desc.Format; - dstLoc.PlacedFootprint.Footprint.Width = UINT(desc.Width); - dstLoc.PlacedFootprint.Footprint.Height = UINT(desc.Height); - dstLoc.PlacedFootprint.Footprint.Depth = 1; - dstLoc.PlacedFootprint.Footprint.RowPitch = UINT(rowPitch); + if (renderPass->m_hasDepthStencil) + { + // Transit resource states. + { + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = + static_cast(framebuffer->depthStencilView.get()); + auto textureResource = + static_cast(resourceViewImpl->m_resource.Ptr()); + D3D12_RESOURCE_STATES initialState; + if (renderPass->m_depthStencilAccess.initialState == + ResourceState::Undefined) + { + initialState = textureResource->m_defaultState; + } + else + { + initialState = D3DUtil::translateResourceState( + renderPass->m_depthStencilAccess.initialState); + } + textureResource->m_resource.transition( + initialState, + D3D12_RESOURCE_STATE_DEPTH_WRITE, + submitter); + } + // Clear. + uint32_t clearFlags = 0; + if (renderPass->m_depthStencilAccess.loadOp == + IRenderPassLayout::AttachmentLoadOp::Clear) + { + clearFlags |= D3D12_CLEAR_FLAG_DEPTH; + } + if (renderPass->m_depthStencilAccess.stencilLoadOp == + IRenderPassLayout::AttachmentLoadOp::Clear) + { + clearFlags |= D3D12_CLEAR_FLAG_STENCIL; + } + if (clearFlags) + { + m_d3dCmdList->ClearDepthStencilView( + framebuffer->depthStencilDescriptor, + (D3D12_CLEAR_FLAGS)clearFlags, + framebuffer->depthStencilClearValue.depth, + framebuffer->depthStencilClearValue.stencil, + 0, + nullptr); + } + } + } - encodeInfo.d3dCommandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr); - } + virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override + { + setPipelineStateImpl(state); + } - { - D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); - resource.transition(D3D12_RESOURCE_STATE_COPY_SOURCE, currentState, submitter); - } + virtual SLANG_NO_THROW void SLANG_MCALL + bindRootShaderObject(IShaderObject* object) override + { + bindRootShaderObjectImpl(object); + } - // Submit the copy, and wait for copy to complete - submitResourceCommandsAndWait(encodeInfo); + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) override + { + SLANG_UNUSED(layout); + SLANG_UNUSED(index); + SLANG_UNUSED(descriptorSet); + } - { - ID3D12Resource* dxResource = stagingResource; + virtual SLANG_NO_THROW void SLANG_MCALL + setViewports(uint32_t count, const Viewport* viewports) override + { + static const int kMaxViewports = + D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + assert(count <= kMaxViewports && count <= kMaxRTVCount); + for (UInt ii = 0; ii < count; ++ii) + { + auto& inViewport = viewports[ii]; + auto& dxViewport = m_viewports[ii]; - UINT8* data; - D3D12_RANGE readRange = {0, bufferSize}; + dxViewport.TopLeftX = inViewport.originX; + dxViewport.TopLeftY = inViewport.originY; + dxViewport.Width = inViewport.extentX; + dxViewport.Height = inViewport.extentY; + dxViewport.MinDepth = inViewport.minZ; + dxViewport.MaxDepth = inViewport.maxZ; + } + m_d3dCmdList->RSSetViewports(UINT(count), m_viewports); + } - SLANG_RETURN_ON_FAIL(dxResource->Map(0, &readRange, reinterpret_cast(&data))); + virtual SLANG_NO_THROW void SLANG_MCALL + setScissorRects(uint32_t count, const ScissorRect* rects) override + { + static const int kMaxScissorRects = + D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + assert(count <= kMaxScissorRects && count <= kMaxRTVCount); - RefPtr resultBlob = new Slang::ListBlob(); - resultBlob->m_data.setCount(bufferSize); - memcpy(resultBlob->m_data.getBuffer(), data, bufferSize); - dxResource->Unmap(0, nullptr); - *outBlob = resultBlob.detach(); - return SLANG_OK; - } -} + for (UInt ii = 0; ii < count; ++ii) + { + auto& inRect = rects[ii]; + auto& dxRect = m_scissorRects[ii]; -// !!!!!!!!!!!!!!!!!!!!!!!!!!!! Renderer interface !!!!!!!!!!!!!!!!!!!!!!!!!! + dxRect.left = LONG(inRect.minX); + dxRect.top = LONG(inRect.minY); + dxRect.right = LONG(inRect.maxX); + dxRect.bottom = LONG(inRect.maxY); + } -Result D3D12Device::_createDevice(DeviceCheckFlags deviceCheckFlags, const UnownedStringSlice& nameMatch, D3D_FEATURE_LEVEL featureLevel, DeviceInfo& outDeviceInfo) -{ - outDeviceInfo.clear(); + m_d3dCmdList->RSSetScissorRects(UINT(count), m_scissorRects); + } - ComPtr dxgiFactory; - SLANG_RETURN_ON_FAIL(D3DUtil::createFactory(deviceCheckFlags, dxgiFactory)); + virtual SLANG_NO_THROW void SLANG_MCALL + setPrimitiveTopology(PrimitiveTopology topology) override + { + switch (topology) + { + case PrimitiveTopology::TriangleList: + { + m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + m_primitiveTopology = D3DUtil::getPrimitiveTopology(topology); + break; + } + default: + { + assert(!"Unhandled type"); + } + } + } - List> dxgiAdapters; - SLANG_RETURN_ON_FAIL(D3DUtil::findAdapters(deviceCheckFlags, nameMatch, dxgiFactory, dxgiAdapters)); + virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( + UInt startSlot, + UInt slotCount, + IBufferResource* const* buffers, + const UInt* strides, + const UInt* offsets) override + { + { + const Index num = startSlot + slotCount; + if (num > m_boundVertexBuffers.getCount()) + { + m_boundVertexBuffers.setCount(num); + } + } - ComPtr device; - ComPtr adapter; + for (UInt i = 0; i < slotCount; i++) + { + BufferResourceImpl* buffer = static_cast(buffers[i]); + if (buffer) + { + assert(buffer->m_initialUsage == IResource::Usage::VertexBuffer); + } - for (Index i = 0; i < dxgiAdapters.getCount(); ++i) - { - IDXGIAdapter* dxgiAdapter = dxgiAdapters[i]; - if (SLANG_SUCCEEDED(m_D3D12CreateDevice(dxgiAdapter, featureLevel, IID_PPV_ARGS(device.writeRef())))) - { - adapter = dxgiAdapter; - break; - } - } + BoundVertexBuffer& boundBuffer = m_boundVertexBuffers[startSlot + i]; + boundBuffer.m_buffer = buffer; + boundBuffer.m_stride = int(strides[i]); + boundBuffer.m_offset = int(offsets[i]); + } + } - if (!device) - { - return SLANG_FAIL; - } + virtual SLANG_NO_THROW void SLANG_MCALL setIndexBuffer( + IBufferResource* buffer, + Format indexFormat, + UInt offset = 0) override + { + m_boundIndexBuffer = (BufferResourceImpl*)buffer; + m_boundIndexFormat = D3DUtil::getMapFormat(indexFormat); + m_boundIndexOffset = UINT(offset); + } - if (m_dxDebug && (deviceCheckFlags & DeviceCheckFlag::UseDebug)) - { - m_dxDebug->EnableDebugLayer(); + void prepareDraw() + { + auto pipelineState = m_currentPipeline.Ptr(); + if (!pipelineState || (pipelineState->desc.type != PipelineType::Graphics)) + { + assert(!"No graphics pipeline state set"); + return; + } - ComPtr infoQueue; - if (SLANG_SUCCEEDED(device->QueryInterface(infoQueue.writeRef()))) - { - // Make break - infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true); - infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true); - // infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, true); + // Submit - setting for graphics + { + GraphicsSubmitter submitter(m_d3dCmdList); + _bindRenderState(&submitter); + } - // Apparently there is a problem with sm 6.3 with spurious errors, with debug layer enabled - D3D12_FEATURE_DATA_SHADER_MODEL featureShaderModel; - featureShaderModel.HighestShaderModel = D3D_SHADER_MODEL(0x63); - SLANG_SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &featureShaderModel, sizeof(featureShaderModel))); + m_d3dCmdList->IASetPrimitiveTopology(m_primitiveTopology); - if (featureShaderModel.HighestShaderModel >= D3D_SHADER_MODEL(0x63)) - { - // Filter out any messages that cause issues - // TODO: Remove this when the debug layers work properly - D3D12_MESSAGE_ID messageIds[] = + // Set up vertex buffer views { - // When the debug layer is enabled this error is triggered sometimes after a CopyDescriptorsSimple - // call The failed check validates that the source and destination ranges of the copy do not - // overlap. The check assumes descriptor handles are pointers to memory, but this is not always the - // case and the check fails (even though everything is okay). - D3D12_MESSAGE_ID_COPY_DESCRIPTORS_INVALID_RANGES, - }; - - // We filter INFO messages because they are way too many - D3D12_MESSAGE_SEVERITY severities[] = { D3D12_MESSAGE_SEVERITY_INFO }; + int numVertexViews = 0; + D3D12_VERTEX_BUFFER_VIEW vertexViews[16]; + for (Index i = 0; i < m_boundVertexBuffers.getCount(); i++) + { + const BoundVertexBuffer& boundVertexBuffer = m_boundVertexBuffers[i]; + BufferResourceImpl* buffer = boundVertexBuffer.m_buffer; + if (buffer) + { + D3D12_VERTEX_BUFFER_VIEW& vertexView = vertexViews[numVertexViews++]; + vertexView.BufferLocation = + buffer->m_resource.getResource()->GetGPUVirtualAddress() + + boundVertexBuffer.m_offset; + vertexView.SizeInBytes = + UINT(buffer->getDesc()->sizeInBytes - boundVertexBuffer.m_offset); + vertexView.StrideInBytes = UINT(boundVertexBuffer.m_stride); + } + } + m_d3dCmdList->IASetVertexBuffers(0, numVertexViews, vertexViews); + } + // Set up index buffer + if (m_boundIndexBuffer) + { + D3D12_INDEX_BUFFER_VIEW indexBufferView; + indexBufferView.BufferLocation = + m_boundIndexBuffer->m_resource.getResource()->GetGPUVirtualAddress() + + m_boundIndexOffset; + indexBufferView.SizeInBytes = + UINT(m_boundIndexBuffer->getDesc()->sizeInBytes - m_boundIndexOffset); + indexBufferView.Format = m_boundIndexFormat; - D3D12_INFO_QUEUE_FILTER infoQueueFilter = {}; - infoQueueFilter.DenyList.NumSeverities = SLANG_COUNT_OF(severities); - infoQueueFilter.DenyList.pSeverityList = severities; - infoQueueFilter.DenyList.NumIDs = SLANG_COUNT_OF(messageIds); - infoQueueFilter.DenyList.pIDList = messageIds; + m_d3dCmdList->IASetIndexBuffer(&indexBufferView); + } + } + virtual SLANG_NO_THROW void SLANG_MCALL + draw(UInt vertexCount, UInt startVertex = 0) override + { + prepareDraw(); + m_d3dCmdList->DrawInstanced(UINT(vertexCount), 1, UINT(startVertex), 0); + } + virtual SLANG_NO_THROW void SLANG_MCALL + drawIndexed(UInt indexCount, UInt startIndex = 0, UInt baseVertex = 0) override + { + prepareDraw(); + m_d3dCmdList->DrawIndexedInstanced( + (UINT)indexCount, 1, (UINT)startIndex, (UINT)baseVertex, 0); + } + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override + { + PipelineCommandEncoder::endEncodingImpl(); + // Issue clear commands based on render pass set up. + for (Index i = 0; i < m_renderPass->m_renderTargetAccesses.getCount(); i++) + { + auto& access = m_renderPass->m_renderTargetAccesses[i]; - infoQueue->PushStorageFilter(&infoQueueFilter); + // Transit resource states. + { + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = static_cast( + m_framebuffer->renderTargetViews[i].get()); + auto textureResource = + static_cast(resourceViewImpl->m_resource.Ptr()); + textureResource->m_resource.transition( + D3D12_RESOURCE_STATE_RENDER_TARGET, + D3DUtil::translateResourceState(access.finalState), + submitter); + } + } + + if (m_renderPass->m_hasDepthStencil) + { + // Transit resource states. + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = + static_cast(m_framebuffer->depthStencilView.get()); + auto textureResource = + static_cast(resourceViewImpl->m_resource.Ptr()); + textureResource->m_resource.transition( + D3D12_RESOURCE_STATE_DEPTH_WRITE, + D3DUtil::translateResourceState( + m_renderPass->m_depthStencilAccess.finalState), + submitter); + } + m_framebuffer = nullptr; } - } - } - // Get the descs - { - adapter->GetDesc(&outDeviceInfo.m_desc); + virtual SLANG_NO_THROW void SLANG_MCALL + setStencilReference(uint32_t referenceValue) override + { + m_d3dCmdList->OMSetStencilRef((UINT)referenceValue); + } + }; - // Look up GetDesc1 info - ComPtr adapter1; - if (SLANG_SUCCEEDED(adapter->QueryInterface(adapter1.writeRef()))) + RenderCommandEncoderImpl m_renderCommandEncoder; + virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( + IRenderPassLayout* renderPass, + IFramebuffer* framebuffer, + IRenderCommandEncoder** outEncoder) override { - adapter1->GetDesc1(&outDeviceInfo.m_desc1); + m_renderCommandEncoder.init( + m_renderer, + m_frame, + this, + static_cast(renderPass), + static_cast(framebuffer)); + *outEncoder = &m_renderCommandEncoder; } - } - - // Save other info - outDeviceInfo.m_device = device; - outDeviceInfo.m_dxgiFactory = dxgiFactory; - outDeviceInfo.m_adapter = adapter; - outDeviceInfo.m_isWarp = D3DUtil::isWarp(dxgiFactory, adapter); - - return SLANG_OK; -} - -static bool _isSupportedNVAPIOp(ID3D12Device* dev, uint32_t op) -{ -#ifdef GFX_NVAPI - { - bool isSupported; - NvAPI_Status status = NvAPI_D3D12_IsNvShaderExtnOpCodeSupported(dev, NvU32(op), &isSupported); - return status == NVAPI_OK && isSupported; - } -#else - return false; -#endif -} -Result D3D12Device::initialize(const Desc& desc) -{ - SLANG_RETURN_ON_FAIL(slangContext.initialize(desc.slang, SLANG_DXBC, "sm_5_1")); + class ComputeCommandEncoderImpl + : public IComputeCommandEncoder + , public PipelineCommandEncoder + { + public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override + { + if (uuid == GfxGUID::IID_ISlangUnknown || + uuid == GfxGUID::IID_IComputeCommandEncoder) + { + *outObject = static_cast(this); + return SLANG_OK; + } + *outObject = nullptr; + return SLANG_E_NO_INTERFACE; + } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } - SLANG_RETURN_ON_FAIL(GraphicsAPIRenderer::initialize(desc)); + public: + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override + { + PipelineCommandEncoder::endEncodingImpl(); + } + void init( + D3D12Device* renderer, + ExecutionFrameResources* frame, + CommandBufferImpl* cmdBuffer) + { + PipelineCommandEncoder::init(cmdBuffer); + m_preCmdList = nullptr; + m_device = renderer->m_device; + m_frame = frame; + m_currentPipeline = nullptr; + } - // Initialize DeviceInfo - { - m_info.deviceType = DeviceType::DirectX12; - m_info.bindingStyle = BindingStyle::DirectX; - m_info.projectionStyle = ProjectionStyle::DirectX; - m_info.apiName = "Direct3D 12"; - static const float kIdentity[] = {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}; - ::memcpy(m_info.identityProjectionMatrix, kIdentity, sizeof(kIdentity)); - } + virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override + { + setPipelineStateImpl(state); + } - // Rather than statically link against D3D, we load it dynamically. + virtual SLANG_NO_THROW void SLANG_MCALL + bindRootShaderObject(IShaderObject* object) override + { + bindRootShaderObjectImpl(object); + } - HMODULE d3dModule = LoadLibraryA("d3d12.dll"); - if (!d3dModule) - { - fprintf(stderr, "error: failed load 'd3d12.dll'\n"); - return SLANG_FAIL; - } + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) override + { + SLANG_UNUSED(layout); + SLANG_UNUSED(index); + SLANG_UNUSED(descriptorSet); + } - // Get all the dll entry points - m_D3D12SerializeRootSignature = (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)loadProc(d3dModule, "D3D12SerializeRootSignature"); - if (!m_D3D12SerializeRootSignature) - { - return SLANG_FAIL; - } + virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override + { + // Submit binding for compute + { + ComputeSubmitter submitter(m_d3dCmdList); + _bindRenderState(&submitter); + } + m_d3dCmdList->Dispatch(x, y, z); + } + }; -#if ENABLE_DEBUG_LAYER - m_D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)loadProc(d3dModule, "D3D12GetDebugInterface"); - if (m_D3D12GetDebugInterface) - { - if (SLANG_SUCCEEDED(m_D3D12GetDebugInterface(IID_PPV_ARGS(m_dxDebug.writeRef())))) + ComputeCommandEncoderImpl m_computeCommandEncoder; + virtual SLANG_NO_THROW void SLANG_MCALL + encodeComputeCommands(IComputeCommandEncoder** outEncoder) override { -#if 0 - // Can enable for extra validation. NOTE! That d3d12 warns if you do.... - // D3D12 MESSAGE : Device Debug Layer Startup Options : GPU - Based Validation is enabled(disabled by default). - // This results in new validation not possible during API calls on the CPU, by creating patched shaders that have validation - // added directly to the shader. However, it can slow things down a lot, especially for applications with numerous - // PSOs.Time to see the first render frame may take several minutes. - // [INITIALIZATION MESSAGE #1016: CREATEDEVICE_DEBUG_LAYER_STARTUP_OPTIONS] + m_computeCommandEncoder.init(m_renderer, m_frame, this); + *outEncoder = &m_computeCommandEncoder; + } - ComPtr debug1; - if (SLANG_SUCCEEDED(m_dxDebug->QueryInterface(debug1.writeRef()))) + class ResourceCommandEncoderImpl : public IResourceCommandEncoder + { + public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override { - debug1->SetEnableGPUBasedValidation(true); + if (uuid == GfxGUID::IID_ISlangUnknown || + uuid == GfxGUID::IID_IResourceCommandEncoder) + { + *outObject = static_cast(this); + return SLANG_OK; + } + *outObject = nullptr; + return SLANG_E_NO_INTERFACE; } -#endif - - m_dxDebug->EnableDebugLayer(); - } - } -#endif + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } - m_D3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)loadProc(d3dModule, "D3D12CreateDevice"); - if (!m_D3D12CreateDevice) - { - return SLANG_FAIL; - } + public: + CommandBufferImpl* m_commandBuffer; + void init(D3D12Device* renderer, CommandBufferImpl* commandBuffer) + { + m_commandBuffer = commandBuffer; + } + virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) override + { + SLANG_UNUSED(dst); + SLANG_UNUSED(srcOffset); + SLANG_UNUSED(src); + SLANG_UNUSED(dstOffset); + SLANG_UNUSED(size); + } + virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferData( + IBufferResource* dst, + size_t offset, + size_t size, + void* data) override + { + _uploadBufferData( + m_commandBuffer->m_cmdList, + static_cast(dst), + offset, + size, + data); + } + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} + }; - FlagCombiner combiner; - // TODO: we should probably provide a command-line option - // to override UseDebug of default rather than leave it - // up to each back-end to specify. -#if ENABLE_DEBUG_LAYER - combiner.add(DeviceCheckFlag::UseDebug, ChangeType::OnOff); ///< First try debug then non debug -#else - combiner.add(DeviceCheckFlag::UseDebug, ChangeType::Off); ///< Don't bother with debug -#endif - combiner.add(DeviceCheckFlag::UseHardwareDevice, ChangeType::OnOff); ///< First try hardware, then reference - - const D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0; + ResourceCommandEncoderImpl m_resourceCommandEncoder; - const int numCombinations = combiner.getNumCombinations(); - for (int i = 0; i < numCombinations; ++i) - { - if (SLANG_SUCCEEDED(_createDevice(combiner.getCombination(i), UnownedStringSlice(desc.adapter), featureLevel, m_deviceInfo))) + virtual SLANG_NO_THROW void SLANG_MCALL + encodeResourceCommands(IResourceCommandEncoder** outEncoder) override { - break; + m_resourceCommandEncoder.init(m_renderer, this); + *outEncoder = &m_resourceCommandEncoder; } - } - - if (!m_deviceInfo.m_adapter) - { - // Couldn't find an adapter - return SLANG_FAIL; - } - // Set the device - m_device = m_deviceInfo.m_device; + virtual SLANG_NO_THROW void SLANG_MCALL close() override { m_cmdList->Close(); } + }; - // NVAPI - if (desc.nvapiExtnSlot >= 0) + class CommandQueueImpl + : public ICommandQueue + , public RefObject { - if (SLANG_FAILED(NVAPIUtil::initialize())) + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ICommandQueue* getInterface(const Guid& guid) { - return SLANG_E_NOT_AVAILABLE; + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue) + return static_cast(this); + return nullptr; } -#ifdef GFX_NVAPI - // From DOCS: Applications are expected to bind null UAV to this slot. - // NOTE! We don't currently do this, but doesn't seem to be a problem. - - const NvAPI_Status status = NvAPI_D3D12_SetNvShaderExtnSlotSpace(m_device, NvU32(desc.nvapiExtnSlot), NvU32(0)); - - if (status != NVAPI_OK) + public: + struct CommandBufferPool { - return SLANG_E_NOT_AVAILABLE; - } - - if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC)) + List> pool; + uint32_t allocIndex = 0; + RefPtr allocCommandBuffer(D3D12Device* renderer, ExecutionFrameResources* frame) + { + if ((Index)allocIndex < pool.getCount()) + { + RefPtr result = pool[allocIndex]; + result->init(renderer, frame); + allocIndex++; + return result; + } + RefPtr cmdBuffer = new CommandBufferImpl(); + cmdBuffer->init(renderer, frame); + pool.add(cmdBuffer); + return cmdBuffer; + } + void reset() + { + allocIndex = 0; + } + }; + List m_commandBufferPools; + List m_frames; + uint32_t m_frameIndex = 0; + D3D12Device* m_renderer; + ComPtr m_device; + ComPtr m_d3dQueue; + ComPtr m_fence; + uint64_t m_fenceValue = 0; + HANDLE globalWaitHandle; + Desc m_desc; + Result init( + D3D12Device* renderer, + uint32_t frameCount, + uint32_t viewHeapSize, + uint32_t samplerHeapSize) { - m_features.add("atomic-int64"); + m_renderer = renderer; + m_device = renderer->m_device; + m_frames.setCount(frameCount); + m_commandBufferPools.setCount(frameCount); + for (uint32_t i = 0; i < frameCount; i++) + { + SLANG_RETURN_ON_FAIL(m_frames[i].init(m_device, viewHeapSize, samplerHeapSize)); + } + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + SLANG_RETURN_ON_FAIL(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(m_d3dQueue.writeRef()))); + SLANG_RETURN_ON_FAIL( + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); + globalWaitHandle = CreateEventEx( + nullptr, + nullptr, + CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, + EVENT_ALL_ACCESS); + return SLANG_OK; } - if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC)) + ~CommandQueueImpl() { - m_features.add("atomic-float"); + wait(); + CloseHandle(globalWaitHandle); } - - m_nvapi = true; -#endif - - } - - // Find what features are supported - { - // Check this is how this is laid out... - SLANG_COMPILE_TIME_ASSERT(D3D_SHADER_MODEL_6_0 == 0x60); - + virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override { - D3D12_FEATURE_DATA_SHADER_MODEL featureShaderModel; - featureShaderModel.HighestShaderModel = D3D_SHADER_MODEL(0x62); - - // TODO: Currently warp causes a crash when using half, so disable for now - if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &featureShaderModel, sizeof(featureShaderModel))) && - m_deviceInfo.m_isWarp == false && - featureShaderModel.HighestShaderModel >= 0x62) - { - // With sm_6_2 we have half - m_features.add("half"); - } + return m_desc; } - // Check what min precision support we have + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandBuffer(ICommandBuffer** outCommandBuffer) override { - D3D12_FEATURE_DATA_D3D12_OPTIONS options; - if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)))) - { - auto minPrecisionSupport = options.MinPrecisionSupport; - } + RefPtr result = + m_commandBufferPools[m_frameIndex].allocCommandBuffer( + m_renderer, &m_frames[m_frameIndex]); + *outCommandBuffer = result.detach(); + return SLANG_OK; } - } - - m_desc = desc; - - // Create a command queue for internal resource transfer operations. - SLANG_RETURN_ON_FAIL(createCommandQueueImpl(1, 32, 4, m_resourceCommandQueue.writeRef())); - - SLANG_RETURN_ON_FAIL(m_cpuViewHeap.init (m_device, 1024, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); - SLANG_RETURN_ON_FAIL(m_cpuSamplerHeap.init(m_device, 64, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); - - SLANG_RETURN_ON_FAIL(m_rtvAllocator.init (m_device, 16, D3D12_DESCRIPTOR_HEAP_TYPE_RTV)); - SLANG_RETURN_ON_FAIL(m_dsvAllocator.init (m_device, 16, D3D12_DESCRIPTOR_HEAP_TYPE_DSV)); - SLANG_RETURN_ON_FAIL(m_viewAllocator.init (m_device, 64, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); - SLANG_RETURN_ON_FAIL(m_samplerAllocator.init(m_device, 16, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); - - ComPtr dxgiDevice; - if (m_deviceInfo.m_adapter) - { - DXGI_ADAPTER_DESC adapterDesc; - m_deviceInfo.m_adapter->GetDesc(&adapterDesc); - m_adapterName = String::fromWString(adapterDesc.Description); - m_info.adapterName = m_adapterName.begin(); - } - - m_isInitialized = true; - return SLANG_OK; -} - -Result D3D12Device::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) -{ - RefPtr queue; - SLANG_RETURN_ON_FAIL(createCommandQueueImpl(8, 4096, 1024, queue.writeRef())); - *outQueue = queue.detach(); - return SLANG_OK; -} - -SLANG_NO_THROW Result SLANG_MCALL D3D12Device::createSwapchain( - const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) -{ - RefPtr swapchain = new SwapchainImpl(); - SLANG_RETURN_ON_FAIL(swapchain->init(this, desc, window)); - *outSwapchain = swapchain.detach(); - return SLANG_OK; -} - -SlangResult D3D12Device::readTextureResource( - ITextureResource* resource, - ResourceState state, - ISlangBlob** outBlob, - size_t* outRowPitch, - size_t* outPixelSize) -{ - return captureTextureToSurface( - static_cast(resource)->m_resource, - state, - outBlob, - outRowPitch, - outPixelSize); -} - -static D3D12_RESOURCE_STATES _calcResourceState(IResource::Usage usage) -{ - typedef IResource::Usage Usage; - switch (usage) - { - case Usage::VertexBuffer: return D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; - case Usage::IndexBuffer: return D3D12_RESOURCE_STATE_INDEX_BUFFER; - case Usage::ConstantBuffer: return D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; - case Usage::StreamOutput: return D3D12_RESOURCE_STATE_STREAM_OUT; - case Usage::RenderTarget: return D3D12_RESOURCE_STATE_RENDER_TARGET; - case Usage::DepthWrite: return D3D12_RESOURCE_STATE_DEPTH_WRITE; - case Usage::DepthRead: return D3D12_RESOURCE_STATE_DEPTH_READ; - case Usage::UnorderedAccess: return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - case Usage::PixelShaderResource: return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; - case Usage::NonPixelShaderResource: return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; - case Usage::ShaderResource: return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | - D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; - case Usage::GenericRead: return D3D12_RESOURCE_STATE_GENERIC_READ; - default: return D3D12_RESOURCE_STATES(0); - } -} - -static D3D12_RESOURCE_FLAGS _calcResourceFlag(IResource::BindFlag::Enum bindFlag) -{ - typedef IResource::BindFlag BindFlag; - switch (bindFlag) - { - case BindFlag::RenderTarget: return D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - case BindFlag::DepthStencil: return D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; - case BindFlag::UnorderedAccess: return D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - default: return D3D12_RESOURCE_FLAG_NONE; - } -} - -static D3D12_RESOURCE_FLAGS _calcResourceBindFlags(IResource::Usage initialUsage, int bindFlags) -{ - int dstFlags = 0; - while (bindFlags) - { - int lsb = bindFlags & -bindFlags; - - dstFlags |= _calcResourceFlag(IResource::BindFlag::Enum(lsb)); - bindFlags &= ~lsb; - } - return D3D12_RESOURCE_FLAGS(dstFlags); -} - -static D3D12_RESOURCE_DIMENSION _calcResourceDimension(IResource::Type type) -{ - switch (type) - { - case IResource::Type::Buffer: return D3D12_RESOURCE_DIMENSION_BUFFER; - case IResource::Type::Texture1D: return D3D12_RESOURCE_DIMENSION_TEXTURE1D; - case IResource::Type::TextureCube: - case IResource::Type::Texture2D: + + virtual SLANG_NO_THROW void SLANG_MCALL + executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override { - return D3D12_RESOURCE_DIMENSION_TEXTURE2D; + ShortList commandLists; + for (uint32_t i = 0; i < count; i++) + { + auto cmdImpl = static_cast(commandBuffers[i]); + commandLists.add(cmdImpl->m_cmdList); + } + m_d3dQueue->ExecuteCommandLists((UINT)count, commandLists.getArrayView().getBuffer()); + + auto& frame = m_frames[m_frameIndex]; + m_fenceValue++; + m_d3dQueue->Signal(m_fence, m_fenceValue); + ResetEvent(frame.fenceEvent); + ResetEvent(globalWaitHandle); + m_fence->SetEventOnCompletion(m_fenceValue, frame.fenceEvent); + swapExecutionFrame(); } - case IResource::Type::Texture3D: return D3D12_RESOURCE_DIMENSION_TEXTURE3D; - default: return D3D12_RESOURCE_DIMENSION_UNKNOWN; - } -} -Result D3D12Device::createTextureResource(IResource::Usage initialUsage, const ITextureResource::Desc& descIn, const ITextureResource::SubresourceData* initData, ITextureResource** outResource) -{ - // Description of uploading on Dx12 - // https://msdn.microsoft.com/en-us/library/windows/desktop/dn899215%28v=vs.85%29.aspx + void swapExecutionFrame() + { + m_frameIndex = (m_frameIndex + 1) % m_frames.getCount(); + auto& frame = m_frames[m_frameIndex]; + frame.reset(); + m_commandBufferPools[m_frameIndex].reset(); + } - TextureResource::Desc srcDesc(descIn); - srcDesc.setDefaults(initialUsage); + virtual SLANG_NO_THROW void SLANG_MCALL wait() override + { + m_fenceValue++; + m_d3dQueue->Signal(m_fence, m_fenceValue); + ResetEvent(globalWaitHandle); + m_fence->SetEventOnCompletion(m_fenceValue, globalWaitHandle); + WaitForSingleObject(globalWaitHandle, INFINITE); + } + }; - const DXGI_FORMAT pixelFormat = D3DUtil::getMapFormat(srcDesc.format); - if (pixelFormat == DXGI_FORMAT_UNKNOWN) + class SwapchainImpl : public D3DSwapchainBase { - return SLANG_FAIL; - } + public: + ComPtr m_queue; + ComPtr m_dxgiFactory; + ComPtr m_swapChain3; + ComPtr m_fence; + ShortList m_frameEvents; + uint64_t fenceValue = 0; + Result init( + D3D12Device* renderer, + const ISwapchain::Desc& swapchainDesc, + WindowHandle window) + { + m_queue = static_cast(swapchainDesc.queue)->m_d3dQueue; + m_dxgiFactory = renderer->m_deviceInfo.m_dxgiFactory; + SLANG_RETURN_ON_FAIL( + D3DSwapchainBase::init(swapchainDesc, window, DXGI_SWAP_EFFECT_FLIP_DISCARD)); + renderer->m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef())); - const int arraySize = srcDesc.calcEffectiveArraySize(); + SLANG_RETURN_ON_FAIL(m_swapChain->QueryInterface(m_swapChain3.writeRef())); + for (uint32_t i = 0; i < swapchainDesc.imageCount; i++) + { + m_frameEvents.add(CreateEventEx( + nullptr, + false, + CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, + EVENT_ALL_ACCESS)); + } + return SLANG_OK; + } - const D3D12_RESOURCE_DIMENSION dimension = _calcResourceDimension(srcDesc.type); - if (dimension == D3D12_RESOURCE_DIMENSION_UNKNOWN) - { - return SLANG_FAIL; - } + virtual void createSwapchainBufferImages() override + { + m_images.clear(); + + for (uint32_t i = 0; i < m_desc.imageCount; i++) + { + ComPtr d3dResource; + m_swapChain->GetBuffer(i, IID_PPV_ARGS(d3dResource.writeRef())); + ITextureResource::Desc imageDesc = {}; + imageDesc.setDefaults(IResource::Usage::RenderTarget); + imageDesc.init2D( + IResource::Type::Texture2D, m_desc.format, m_desc.width, m_desc.height, 0); + RefPtr image = new TextureResourceImpl(imageDesc); + image->m_resource.setResource(d3dResource.get()); + image->m_defaultState = D3D12_RESOURCE_STATE_PRESENT; + ComPtr imageResourcePtr; + imageResourcePtr = image.Ptr(); + m_images.add(imageResourcePtr); + } + for (auto evt : m_frameEvents) + SetEvent(evt); + } + virtual IDXGIFactory* getDXGIFactory() override { return m_dxgiFactory; } + virtual IUnknown* getOwningDevice() override { return m_queue; } + virtual SLANG_NO_THROW int SLANG_MCALL acquireNextImage() override + { + auto result = (int)m_swapChain3->GetCurrentBackBufferIndex(); + WaitForSingleObject(m_frameEvents[result], INFINITE); + ResetEvent(m_frameEvents[result]); + return result; + } + virtual SLANG_NO_THROW Result SLANG_MCALL present() override + { + SLANG_RETURN_ON_FAIL(D3DSwapchainBase::present()); + fenceValue++; + m_fence->SetEventOnCompletion(fenceValue, m_frameEvents[m_swapChain3->GetCurrentBackBufferIndex()]); + m_queue->Signal(m_fence, fenceValue); + return SLANG_OK; + } + }; - const int numMipMaps = srcDesc.numMipLevels; + static PROC loadProc(HMODULE module, char const* name); - // Setup desc - D3D12_RESOURCE_DESC resourceDesc; + Result createCommandQueueImpl( + uint32_t frameCount, + uint32_t viewHeapSize, + uint32_t samplerHeapSize, + CommandQueueImpl** outQueue); - resourceDesc.Dimension = dimension; - resourceDesc.Format = pixelFormat; - resourceDesc.Width = srcDesc.size.width; - resourceDesc.Height = srcDesc.size.height; - resourceDesc.DepthOrArraySize = (srcDesc.size.depth > 1) ? srcDesc.size.depth : arraySize; + Result createBuffer( + const D3D12_RESOURCE_DESC& resourceDesc, + const void* srcData, + size_t srcDataSize, + D3D12Resource& uploadResource, + D3D12_RESOURCE_STATES finalState, + D3D12Resource& resourceOut); - resourceDesc.MipLevels = numMipMaps; - resourceDesc.SampleDesc.Count = srcDesc.sampleDesc.numSamples; - resourceDesc.SampleDesc.Quality = srcDesc.sampleDesc.quality; + Result captureTextureToSurface( + D3D12Resource& resource, + ResourceState state, + ISlangBlob** blob, + size_t* outRowPitch, + size_t* outPixelSize); - resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; - resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + Result _createDevice( + DeviceCheckFlags deviceCheckFlags, + const UnownedStringSlice& nameMatch, + D3D_FEATURE_LEVEL featureLevel, + DeviceInfo& outDeviceInfo); - switch (initialUsage) + struct ResourceCommandRecordInfo { - case IResource::Usage::RenderTarget: - resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - break; - case IResource::Usage::DepthWrite: - resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; - break; - case IResource::Usage::UnorderedAccess: - resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - break; - default: - break; - } - - resourceDesc.Alignment = 0; - - RefPtr texture(new TextureResourceImpl(srcDesc)); - - // Create the target resource + ComPtr commandBuffer; + ID3D12GraphicsCommandList* d3dCommandList; + }; + ResourceCommandRecordInfo encodeResourceCommands() { - D3D12_HEAP_PROPERTIES heapProps; - - heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; - - D3D12_CLEAR_VALUE clearValue; - D3D12_CLEAR_VALUE* clearValuePtr = &clearValue; - if ((resourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | - D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) == 0) - { - clearValuePtr = nullptr; - } - clearValue.Format = pixelFormat; - memcpy(clearValue.Color, &descIn.optimalClearValue.color, sizeof(clearValue.Color)); - clearValue.DepthStencil.Depth = descIn.optimalClearValue.depthStencil.depth; - clearValue.DepthStencil.Stencil = descIn.optimalClearValue.depthStencil.stencil; - SLANG_RETURN_ON_FAIL(texture->m_resource.initCommitted( - m_device, - heapProps, - D3D12_HEAP_FLAG_NONE, - resourceDesc, - D3D12_RESOURCE_STATE_COPY_DEST, - clearValuePtr)); - - texture->m_resource.setDebugName(L"Texture"); + ResourceCommandRecordInfo info; + m_resourceCommandQueue->createCommandBuffer(info.commandBuffer.writeRef()); + info.d3dCommandList = static_cast(info.commandBuffer.get())->m_cmdList; + return info; + } + void submitResourceCommandsAndWait(const ResourceCommandRecordInfo& info) + { + info.commandBuffer->close(); + m_resourceCommandQueue->executeCommandBuffer(info.commandBuffer); + m_resourceCommandQueue->wait(); } - // Calculate the layout - List layouts; - layouts.setCount(numMipMaps); - List mipRowSizeInBytes; - mipRowSizeInBytes.setCount(numMipMaps); - List mipNumRows; - mipNumRows.setCount(numMipMaps); - - // NOTE! This is just the size for one array upload -> not for the whole texture - UInt64 requiredSize = 0; - m_device->GetCopyableFootprints(&resourceDesc, 0, numMipMaps, 0, layouts.begin(), mipNumRows.begin(), mipRowSizeInBytes.begin(), &requiredSize); - - // Sub resource indexing - // https://msdn.microsoft.com/en-us/library/windows/desktop/dn705766(v=vs.85).aspx#subresource_indexing - if (initData) - { - // Create the upload texture - D3D12Resource uploadTexture; - - { - D3D12_HEAP_PROPERTIES heapProps; - - heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; - - D3D12_RESOURCE_DESC uploadResourceDesc; - - uploadResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - uploadResourceDesc.Format = DXGI_FORMAT_UNKNOWN; - uploadResourceDesc.Width = requiredSize; - uploadResourceDesc.Height = 1; - uploadResourceDesc.DepthOrArraySize = 1; - uploadResourceDesc.MipLevels = 1; - uploadResourceDesc.SampleDesc.Count = 1; - uploadResourceDesc.SampleDesc.Quality = 0; - uploadResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; - uploadResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - uploadResourceDesc.Alignment = 0; - - SLANG_RETURN_ON_FAIL(uploadTexture.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadResourceDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); + // D3D12Device members. - uploadTexture.setDebugName(L"TextureUpload"); - } - // Get the pointer to the upload resource - ID3D12Resource* uploadResource = uploadTexture; + Desc m_desc; - int subResourceIndex = 0; - for (int arrayIndex = 0; arrayIndex < arraySize; arrayIndex++) - { - uint8_t* p; - uploadResource->Map(0, nullptr, reinterpret_cast(&p)); + gfx::DeviceInfo m_info; + String m_adapterName; - for (int j = 0; j < numMipMaps; ++j) - { - auto srcSubresource = initData[j]; + bool m_isInitialized = false; - const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& layout = layouts[j]; - const D3D12_SUBRESOURCE_FOOTPRINT& footprint = layout.Footprint; + ComPtr m_dxDebug; - const TextureResource::Size mipSize = srcDesc.size.calcMipSize(j); + DeviceInfo m_deviceInfo; + ID3D12Device* m_device = nullptr; - assert(footprint.Width == mipSize.width && footprint.Height == mipSize.height && footprint.Depth == mipSize.depth); + RefPtr m_resourceCommandQueue; - auto mipRowSize = mipRowSizeInBytes[j]; + D3D12HostVisibleDescriptorAllocator m_rtvAllocator; + D3D12HostVisibleDescriptorAllocator m_dsvAllocator; + // Space in the GPU-visible heaps is precious, so we will also keep + // around CPU-visible heaps for storing descriptors in a format + // that is ready for copying into the GPU-visible heaps as needed. + // + D3D12HostVisibleDescriptorAllocator m_cpuViewHeap; ///< Cbv, Srv, Uav + D3D12HostVisibleDescriptorAllocator m_cpuSamplerHeap; ///< Heap for samplers - const ptrdiff_t dstMipRowPitch = ptrdiff_t(footprint.RowPitch); - const ptrdiff_t srcMipRowPitch = ptrdiff_t(srcSubresource.strideY); + // Dll entry points + PFN_D3D12_GET_DEBUG_INTERFACE m_D3D12GetDebugInterface = nullptr; + PFN_D3D12_CREATE_DEVICE m_D3D12CreateDevice = nullptr; + PFN_D3D12_SERIALIZE_ROOT_SIGNATURE m_D3D12SerializeRootSignature = nullptr; - const ptrdiff_t dstMipLayerPitch = ptrdiff_t(footprint.RowPitch*footprint.Height); - const ptrdiff_t srcMipLayerPitch = ptrdiff_t(srcSubresource.strideZ); + bool m_nvapi = false; +}; - // Our outer loop will copy the depth layers one at a time. - // - const uint8_t* srcLayer = (const uint8_t*) srcSubresource.data; - uint8_t* dstLayer = p + layouts[j].Offset; - for (int l = 0; l < mipSize.depth; l++) - { - // Our inner loop will copy the rows one at a time. - // - const uint8_t* srcRow = srcLayer; - uint8_t* dstRow = dstLayer; - for (int k = 0; k < mipSize.height; ++k) - { - ::memcpy(dstRow, srcRow, (size_t)mipRowSize); - srcRow += srcMipRowPitch; - dstRow += dstMipRowPitch; - } +Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitter) +{ + RefPtr newPipeline; + m_renderer->maybeSpecializePipeline( + m_currentPipeline, m_rootShaderObject, newPipeline); + RootShaderObjectImpl* rootObjectImpl = + static_cast(m_rootShaderObject.Ptr()); + PipelineStateImpl* newPipelineImpl = static_cast(newPipeline.Ptr()); + auto commandList = m_d3dCmdList; + auto pipelineTypeIndex = (int)newPipelineImpl->desc.type; + auto programImpl = static_cast(newPipelineImpl->m_program.get()); + commandList->SetPipelineState(newPipelineImpl->m_pipelineState); + submitter->setRootSignature(programImpl->m_rootObjectLayout->m_rootSignature); + ShortList descriptorTables; + RefPtr specializedRootLayout; + rootObjectImpl->getSpecializedLayout(specializedRootLayout.writeRef()); + RootShaderObjectLayoutImpl* rootLayoutImpl = + static_cast(specializedRootLayout.Ptr()); + for (auto& descSet : rootLayoutImpl->m_gpuDescriptorSetInfos) + { + if (descSet.resourceDescriptorCount) + { + DescriptorTable table; + table.heap = &m_frame->m_viewHeap; + table.table = m_frame->m_viewHeap.allocate((int)descSet.resourceDescriptorCount); + descriptorTables.add(table); + } + if (descSet.samplerDescriptorCount) + { + DescriptorTable table; + table.heap = &m_frame->m_samplerHeap; + table.table = m_frame->m_samplerHeap.allocate((int)descSet.samplerDescriptorCount); + descriptorTables.add(table); + } + } + RootBindingState bindState = {}; + bindState.device = m_renderer; + bindState.frame = m_frame; + auto descTablesView = descriptorTables.getArrayView(); + bindState.descriptorTables = descTablesView.arrayView; + SLANG_RETURN_ON_FAIL(rootObjectImpl->bindObject(this, &bindState)); + + for (Index i = 0; i < descriptorTables.getCount(); i++) + { + submitter->setRootDescriptorTable( + (int)i, descriptorTables[i].heap.getGpuHandle(descriptorTables[i].table)); + } + return SLANG_OK; +} - srcLayer += srcMipLayerPitch; - dstLayer += dstMipLayerPitch; - } +Result D3D12Device::createCommandQueueImpl( + uint32_t frameCount, + uint32_t viewHeapSize, + uint32_t samplerHeapSize, + D3D12Device::CommandQueueImpl** outQueue) +{ + RefPtr queue = new D3D12Device::CommandQueueImpl(); + SLANG_RETURN_ON_FAIL(queue->init(this, frameCount, viewHeapSize, samplerHeapSize)); + *outQueue = queue.detach(); + return SLANG_OK; +} - //assert(srcRow == (const uint8_t*)(srcMip.getBuffer() + srcMip.getCount())); - } - uploadResource->Unmap(0, nullptr); +SlangResult SLANG_MCALL createD3D12Device(const IDevice::Desc* desc, IDevice** outDevice) +{ + RefPtr result = new D3D12Device(); + SLANG_RETURN_ON_FAIL(result->initialize(*desc)); + *outDevice = result.detach(); + return SLANG_OK; +} - auto encodeInfo = encodeResourceCommands(); - for (int mipIndex = 0; mipIndex < numMipMaps; ++mipIndex) - { - // https://msdn.microsoft.com/en-us/library/windows/desktop/dn903862(v=vs.85).aspx +/* static */PROC D3D12Device::loadProc(HMODULE module, char const* name) +{ + PROC proc = ::GetProcAddress(module, name); + if (!proc) + { + fprintf(stderr, "error: failed load symbol '%s'\n", name); + return nullptr; + } + return proc; +} - D3D12_TEXTURE_COPY_LOCATION src; - src.pResource = uploadTexture; - src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src.PlacedFootprint = layouts[mipIndex]; +D3D12Device::~D3D12Device() +{ +} - D3D12_TEXTURE_COPY_LOCATION dst; - dst.pResource = texture->m_resource; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst.SubresourceIndex = subResourceIndex; - encodeInfo.d3dCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); +static void _initSrvDesc(IResource::Type resourceType, const ITextureResource::Desc& textureDesc, const D3D12_RESOURCE_DESC& desc, DXGI_FORMAT pixelFormat, D3D12_SHADER_RESOURCE_VIEW_DESC& descOut) +{ + // create SRV + descOut = D3D12_SHADER_RESOURCE_VIEW_DESC(); - subResourceIndex++; - } + descOut.Format = (pixelFormat == DXGI_FORMAT_UNKNOWN) ? D3DUtil::calcFormat(D3DUtil::USAGE_SRV, desc.Format) : pixelFormat; + descOut.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + if (desc.DepthOrArraySize == 1) + { + switch (desc.Dimension) + { + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; break; + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; break; + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; break; + default: assert(!"Unknown dimension"); + } - // Block - waiting for copy to complete (so can drop upload texture) - submitResourceCommandsAndWait(encodeInfo); + descOut.Texture2D.MipLevels = desc.MipLevels; + descOut.Texture2D.MostDetailedMip = 0; + descOut.Texture2D.PlaneSlice = 0; + descOut.Texture2D.ResourceMinLODClamp = 0.0f; + } + else if (resourceType == IResource::Type::TextureCube) + { + if (textureDesc.arraySize > 1) + { + descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + + descOut.TextureCubeArray.NumCubes = textureDesc.arraySize; + descOut.TextureCubeArray.First2DArrayFace = 0; + descOut.TextureCubeArray.MipLevels = desc.MipLevels; + descOut.TextureCubeArray.MostDetailedMip = 0; + descOut.TextureCubeArray.ResourceMinLODClamp = 0; + } + else + { + descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + + descOut.TextureCube.MipLevels = desc.MipLevels; + descOut.TextureCube.MostDetailedMip = 0; + descOut.TextureCube.ResourceMinLODClamp = 0; } } + else { - auto encodeInfo = encodeResourceCommands(); - const D3D12_RESOURCE_STATES finalState = _calcResourceState(initialUsage); + assert(desc.DepthOrArraySize > 1); + + switch (desc.Dimension) { - D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); - texture->m_resource.transition(D3D12_RESOURCE_STATE_COPY_DEST, finalState, submitter); + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; break; + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; break; + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: descOut.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; break; + + default: assert(!"Unknown dimension"); } - submitResourceCommandsAndWait(encodeInfo); + + descOut.Texture2DArray.ArraySize = desc.DepthOrArraySize; + descOut.Texture2DArray.MostDetailedMip = 0; + descOut.Texture2DArray.MipLevels = desc.MipLevels; + descOut.Texture2DArray.FirstArraySlice = 0; + descOut.Texture2DArray.PlaneSlice = 0; + descOut.Texture2DArray.ResourceMinLODClamp = 0; } +} - *outResource = texture.detach(); - return SLANG_OK; +static void _initBufferResourceDesc(size_t bufferSize, D3D12_RESOURCE_DESC& out) +{ + out = {}; + + out.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + out.Alignment = 0; + out.Width = bufferSize; + out.Height = 1; + out.DepthOrArraySize = 1; + out.MipLevels = 1; + out.Format = DXGI_FORMAT_UNKNOWN; + out.SampleDesc.Count = 1; + out.SampleDesc.Quality = 0; + out.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + out.Flags = D3D12_RESOURCE_FLAG_NONE; } -Result D3D12Device::createBufferResource(IResource::Usage initialUsage, const IBufferResource::Desc& descIn, const void* initData, IBufferResource** outResource) +Result D3D12Device::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12Resource& uploadResource, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut) { - BufferResource::Desc srcDesc(descIn); - srcDesc.setDefaults(initialUsage); + const size_t bufferSize = size_t(resourceDesc.Width); - // Always align up to 256 bytes, since that is required for constant buffers. - // - // TODO: only do this for buffers that could potentially be bound as constant buffers... - // - const size_t alignedSizeInBytes = D3DUtil::calcAligned(srcDesc.sizeInBytes, 256); + { + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - RefPtr buffer(new BufferResourceImpl(initialUsage, srcDesc)); + const D3D12_RESOURCE_STATES initialState = srcData ? D3D12_RESOURCE_STATE_COPY_DEST : finalState; - D3D12_RESOURCE_DESC bufferDesc; - _initBufferResourceDesc(alignedSizeInBytes, bufferDesc); + SLANG_RETURN_ON_FAIL(resourceOut.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, resourceDesc, initialState, nullptr)); + } + + { + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; + + D3D12_RESOURCE_DESC uploadResourceDesc(resourceDesc); + uploadResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + SLANG_RETURN_ON_FAIL(uploadResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadResourceDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); + } + + if (srcData) + { + // Copy data to the intermediate upload heap and then schedule a copy + // from the upload heap to the vertex buffer. + UINT8* dstData; + D3D12_RANGE readRange = {}; // We do not intend to read from this resource on the CPU. + + ID3D12Resource* dxUploadResource = uploadResource.getResource(); - bufferDesc.Flags = _calcResourceBindFlags(initialUsage, srcDesc.bindFlags); + SLANG_RETURN_ON_FAIL(dxUploadResource->Map(0, &readRange, reinterpret_cast(&dstData))); + ::memcpy(dstData, srcData, srcDataSize); + dxUploadResource->Unmap(0, nullptr); - const D3D12_RESOURCE_STATES initialState = _calcResourceState(initialUsage); - SLANG_RETURN_ON_FAIL(createBuffer(bufferDesc, initData, srcDesc.sizeInBytes, buffer->m_uploadResource, initialState, buffer->m_resource)); + auto encodeInfo = encodeResourceCommands(); + encodeInfo.d3dCommandList->CopyBufferRegion(resourceOut, 0, uploadResource, 0, bufferSize); + submitResourceCommandsAndWait(encodeInfo); + } - *outResource = buffer.detach(); return SLANG_OK; } -D3D12_FILTER_TYPE translateFilterMode(TextureFilteringMode mode) +Result D3D12Device::captureTextureToSurface( + D3D12Resource& resource, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) { - switch (mode) + const D3D12_RESOURCE_STATES initialState = D3DUtil::translateResourceState(state); + + const D3D12_RESOURCE_DESC desc = resource.getResource()->GetDesc(); + + // Don't bother supporting MSAA for right now + if (desc.SampleDesc.Count > 1) { - default: - return D3D12_FILTER_TYPE(0); + fprintf(stderr, "ERROR: cannot capture multi-sample texture\n"); + return SLANG_FAIL; + } -#define CASE(SRC, DST) \ - case TextureFilteringMode::SRC: return D3D12_FILTER_TYPE_##DST + size_t bytesPerPixel = sizeof(uint32_t); + size_t rowPitch = int(desc.Width) * bytesPerPixel; + size_t bufferSize = rowPitch * int(desc.Height); + if (outRowPitch) + *outRowPitch = rowPitch; + if (outPixelSize) + *outPixelSize = bytesPerPixel; + + D3D12Resource stagingResource; + { + D3D12_RESOURCE_DESC stagingDesc; + _initBufferResourceDesc(bufferSize, stagingDesc); - CASE(Point, POINT); - CASE(Linear, LINEAR); + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; -#undef CASE + SLANG_RETURN_ON_FAIL(stagingResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); } -} -D3D12_FILTER_REDUCTION_TYPE translateFilterReduction(TextureReductionOp op) -{ - switch (op) + auto encodeInfo = encodeResourceCommands(); + auto currentState = D3DUtil::translateResourceState(state); + { - default: - return D3D12_FILTER_REDUCTION_TYPE(0); + D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); + resource.transition(currentState, D3D12_RESOURCE_STATE_COPY_SOURCE, submitter); + } -#define CASE(SRC, DST) \ - case TextureReductionOp::SRC: return D3D12_FILTER_REDUCTION_TYPE_##DST + // Do the copy + { + D3D12_TEXTURE_COPY_LOCATION srcLoc; + srcLoc.pResource = resource; + srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcLoc.SubresourceIndex = 0; - CASE(Average, STANDARD); - CASE(Comparison, COMPARISON); - CASE(Minimum, MINIMUM); - CASE(Maximum, MAXIMUM); + D3D12_TEXTURE_COPY_LOCATION dstLoc; + dstLoc.pResource = stagingResource; + dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstLoc.PlacedFootprint.Offset = 0; + dstLoc.PlacedFootprint.Footprint.Format = desc.Format; + dstLoc.PlacedFootprint.Footprint.Width = UINT(desc.Width); + dstLoc.PlacedFootprint.Footprint.Height = UINT(desc.Height); + dstLoc.PlacedFootprint.Footprint.Depth = 1; + dstLoc.PlacedFootprint.Footprint.RowPitch = UINT(rowPitch); -#undef CASE + encodeInfo.d3dCommandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr); } -} -D3D12_TEXTURE_ADDRESS_MODE translateAddressingMode(TextureAddressingMode mode) -{ - switch (mode) { - default: - return D3D12_TEXTURE_ADDRESS_MODE(0); + D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); + resource.transition(D3D12_RESOURCE_STATE_COPY_SOURCE, currentState, submitter); + } -#define CASE(SRC, DST) \ - case TextureAddressingMode::SRC: return D3D12_TEXTURE_ADDRESS_MODE_##DST + // Submit the copy, and wait for copy to complete + submitResourceCommandsAndWait(encodeInfo); - CASE(Wrap, WRAP); - CASE(ClampToEdge, CLAMP); - CASE(ClampToBorder, BORDER); - CASE(MirrorRepeat, MIRROR); - CASE(MirrorOnce, MIRROR_ONCE); + { + ID3D12Resource* dxResource = stagingResource; -#undef CASE + UINT8* data; + D3D12_RANGE readRange = {0, bufferSize}; + + SLANG_RETURN_ON_FAIL(dxResource->Map(0, &readRange, reinterpret_cast(&data))); + + RefPtr resultBlob = new Slang::ListBlob(); + resultBlob->m_data.setCount(bufferSize); + memcpy(resultBlob->m_data.getBuffer(), data, bufferSize); + dxResource->Unmap(0, nullptr); + *outBlob = resultBlob.detach(); + return SLANG_OK; } } -static D3D12_COMPARISON_FUNC translateComparisonFunc(ComparisonFunc func) +// !!!!!!!!!!!!!!!!!!!!!!!!!!!! Renderer interface !!!!!!!!!!!!!!!!!!!!!!!!!! + +Result D3D12Device::_createDevice(DeviceCheckFlags deviceCheckFlags, const UnownedStringSlice& nameMatch, D3D_FEATURE_LEVEL featureLevel, DeviceInfo& outDeviceInfo) { - switch (func) - { - default: - // TODO: need to report failures - return D3D12_COMPARISON_FUNC_ALWAYS; + outDeviceInfo.clear(); -#define CASE(FROM, TO) \ - case ComparisonFunc::FROM: return D3D12_COMPARISON_FUNC_##TO + ComPtr dxgiFactory; + SLANG_RETURN_ON_FAIL(D3DUtil::createFactory(deviceCheckFlags, dxgiFactory)); - CASE(Never, NEVER); - CASE(Less, LESS); - CASE(Equal, EQUAL); - CASE(LessEqual, LESS_EQUAL); - CASE(Greater, GREATER); - CASE(NotEqual, NOT_EQUAL); - CASE(GreaterEqual, GREATER_EQUAL); - CASE(Always, ALWAYS); -#undef CASE + List> dxgiAdapters; + SLANG_RETURN_ON_FAIL(D3DUtil::findAdapters(deviceCheckFlags, nameMatch, dxgiFactory, dxgiAdapters)); + + ComPtr device; + ComPtr adapter; + + for (Index i = 0; i < dxgiAdapters.getCount(); ++i) + { + IDXGIAdapter* dxgiAdapter = dxgiAdapters[i]; + if (SLANG_SUCCEEDED(m_D3D12CreateDevice(dxgiAdapter, featureLevel, IID_PPV_ARGS(device.writeRef())))) + { + adapter = dxgiAdapter; + break; + } } -} -Result D3D12Device::createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) -{ - D3D12_FILTER_REDUCTION_TYPE dxReduction = translateFilterReduction(desc.reductionOp); - D3D12_FILTER dxFilter; - if (desc.maxAnisotropy > 1) + if (!device) { - dxFilter = D3D12_ENCODE_ANISOTROPIC_FILTER(dxReduction); + return SLANG_FAIL; } - else + + if (m_dxDebug && (deviceCheckFlags & DeviceCheckFlag::UseDebug)) { - D3D12_FILTER_TYPE dxMin = translateFilterMode(desc.minFilter); - D3D12_FILTER_TYPE dxMag = translateFilterMode(desc.magFilter); - D3D12_FILTER_TYPE dxMip = translateFilterMode(desc.mipFilter); + m_dxDebug->EnableDebugLayer(); - dxFilter = D3D12_ENCODE_BASIC_FILTER(dxMin, dxMag, dxMip, dxReduction); + ComPtr infoQueue; + if (SLANG_SUCCEEDED(device->QueryInterface(infoQueue.writeRef()))) + { + // Make break + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true); + // infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, true); + + // Apparently there is a problem with sm 6.3 with spurious errors, with debug layer enabled + D3D12_FEATURE_DATA_SHADER_MODEL featureShaderModel; + featureShaderModel.HighestShaderModel = D3D_SHADER_MODEL(0x63); + SLANG_SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &featureShaderModel, sizeof(featureShaderModel))); + + if (featureShaderModel.HighestShaderModel >= D3D_SHADER_MODEL(0x63)) + { + // Filter out any messages that cause issues + // TODO: Remove this when the debug layers work properly + D3D12_MESSAGE_ID messageIds[] = + { + // When the debug layer is enabled this error is triggered sometimes after a CopyDescriptorsSimple + // call The failed check validates that the source and destination ranges of the copy do not + // overlap. The check assumes descriptor handles are pointers to memory, but this is not always the + // case and the check fails (even though everything is okay). + D3D12_MESSAGE_ID_COPY_DESCRIPTORS_INVALID_RANGES, + }; + + // We filter INFO messages because they are way too many + D3D12_MESSAGE_SEVERITY severities[] = { D3D12_MESSAGE_SEVERITY_INFO }; + + D3D12_INFO_QUEUE_FILTER infoQueueFilter = {}; + infoQueueFilter.DenyList.NumSeverities = SLANG_COUNT_OF(severities); + infoQueueFilter.DenyList.pSeverityList = severities; + infoQueueFilter.DenyList.NumIDs = SLANG_COUNT_OF(messageIds); + infoQueueFilter.DenyList.pIDList = messageIds; + + infoQueue->PushStorageFilter(&infoQueueFilter); + } + } } - D3D12_SAMPLER_DESC dxDesc = {}; - dxDesc.Filter = dxFilter; - dxDesc.AddressU = translateAddressingMode(desc.addressU); - dxDesc.AddressV = translateAddressingMode(desc.addressV); - dxDesc.AddressW = translateAddressingMode(desc.addressW); - dxDesc.MipLODBias = desc.mipLODBias; - dxDesc.MaxAnisotropy = desc.maxAnisotropy; - dxDesc.ComparisonFunc = translateComparisonFunc(desc.comparisonFunc); - for (int ii = 0; ii < 4; ++ii) - dxDesc.BorderColor[ii] = desc.borderColor[ii]; - dxDesc.MinLOD = desc.minLOD; - dxDesc.MaxLOD = desc.maxLOD; + // Get the descs + { + adapter->GetDesc(&outDeviceInfo.m_desc); - auto samplerHeap = &m_cpuSamplerHeap; + // Look up GetDesc1 info + ComPtr adapter1; + if (SLANG_SUCCEEDED(adapter->QueryInterface(adapter1.writeRef()))) + { + adapter1->GetDesc1(&outDeviceInfo.m_desc1); + } + } - D3D12HostVisibleDescriptor cpuDescriptor; - samplerHeap->allocate(&cpuDescriptor); - m_device->CreateSampler(&dxDesc, cpuDescriptor.cpuHandle); + // Save other info + outDeviceInfo.m_device = device; + outDeviceInfo.m_dxgiFactory = dxgiFactory; + outDeviceInfo.m_adapter = adapter; + outDeviceInfo.m_isWarp = D3DUtil::isWarp(dxgiFactory, adapter); - // TODO: We really ought to have a free-list of sampler-heap - // entries that we check before we go to the heap, and then - // when we are done with a sampler we simply add it to the free list. - // - RefPtr samplerImpl = new SamplerStateImpl(); - samplerImpl->m_renderer = this; - samplerImpl->m_descriptor = cpuDescriptor; - *outSampler = samplerImpl.detach(); return SLANG_OK; } -Result D3D12Device::createTextureView(ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) +static bool _isSupportedNVAPIOp(ID3D12Device* dev, uint32_t op) +{ +#ifdef GFX_NVAPI + { + bool isSupported; + NvAPI_Status status = NvAPI_D3D12_IsNvShaderExtnOpCodeSupported(dev, NvU32(op), &isSupported); + return status == NVAPI_OK && isSupported; + } +#else + return false; +#endif +} + +Result D3D12Device::initialize(const Desc& desc) { - auto resourceImpl = (TextureResourceImpl*) texture; + SLANG_RETURN_ON_FAIL(slangContext.initialize(desc.slang, SLANG_DXBC, "sm_5_1")); - RefPtr viewImpl = new ResourceViewImpl(); - viewImpl->m_resource = resourceImpl; + SLANG_RETURN_ON_FAIL(RendererBase::initialize(desc)); - switch (desc.type) + // Initialize DeviceInfo { - default: + m_info.deviceType = DeviceType::DirectX12; + m_info.bindingStyle = BindingStyle::DirectX; + m_info.projectionStyle = ProjectionStyle::DirectX; + m_info.apiName = "Direct3D 12"; + static const float kIdentity[] = {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}; + ::memcpy(m_info.identityProjectionMatrix, kIdentity, sizeof(kIdentity)); + } + + // Rather than statically link against D3D, we load it dynamically. + + HMODULE d3dModule = LoadLibraryA("d3d12.dll"); + if (!d3dModule) + { + fprintf(stderr, "error: failed load 'd3d12.dll'\n"); return SLANG_FAIL; + } - case IResourceView::Type::RenderTarget: + // Get all the dll entry points + m_D3D12SerializeRootSignature = (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)loadProc(d3dModule, "D3D12SerializeRootSignature"); + if (!m_D3D12SerializeRootSignature) + { + return SLANG_FAIL; + } + +#if ENABLE_DEBUG_LAYER + m_D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)loadProc(d3dModule, "D3D12GetDebugInterface"); + if (m_D3D12GetDebugInterface) + { + if (SLANG_SUCCEEDED(m_D3D12GetDebugInterface(IID_PPV_ARGS(m_dxDebug.writeRef())))) { - SLANG_RETURN_ON_FAIL(m_rtvAllocator.allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = &m_rtvAllocator; - D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; - rtvDesc.Format = D3DUtil::getMapFormat(desc.format); - switch (desc.renderTarget.shape) +#if 0 + // Can enable for extra validation. NOTE! That d3d12 warns if you do.... + // D3D12 MESSAGE : Device Debug Layer Startup Options : GPU - Based Validation is enabled(disabled by default). + // This results in new validation not possible during API calls on the CPU, by creating patched shaders that have validation + // added directly to the shader. However, it can slow things down a lot, especially for applications with numerous + // PSOs.Time to see the first render frame may take several minutes. + // [INITIALIZATION MESSAGE #1016: CREATEDEVICE_DEBUG_LAYER_STARTUP_OPTIONS] + + ComPtr debug1; + if (SLANG_SUCCEEDED(m_dxDebug->QueryInterface(debug1.writeRef()))) { - case IResource::Type::Texture1D: - rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; - rtvDesc.Texture1D.MipSlice = desc.renderTarget.mipSlice; - break; - case IResource::Type::Texture2D: - rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rtvDesc.Texture2D.MipSlice = desc.renderTarget.mipSlice; - rtvDesc.Texture2D.PlaneSlice = desc.renderTarget.planeIndex; - break; - case IResource::Type::Texture3D: - rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; - rtvDesc.Texture3D.MipSlice = desc.renderTarget.mipSlice; - rtvDesc.Texture3D.FirstWSlice = desc.renderTarget.arrayIndex; - rtvDesc.Texture3D.WSize = desc.renderTarget.arraySize; - break; - default: - return SLANG_FAIL; + debug1->SetEnableGPUBasedValidation(true); } - m_device->CreateRenderTargetView( - resourceImpl->m_resource, &rtvDesc, viewImpl->m_descriptor.cpuHandle); +#endif + + m_dxDebug->EnableDebugLayer(); } - break; + } +#endif - case IResourceView::Type::DepthStencil: + m_D3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)loadProc(d3dModule, "D3D12CreateDevice"); + if (!m_D3D12CreateDevice) + { + return SLANG_FAIL; + } + + FlagCombiner combiner; + // TODO: we should probably provide a command-line option + // to override UseDebug of default rather than leave it + // up to each back-end to specify. +#if ENABLE_DEBUG_LAYER + combiner.add(DeviceCheckFlag::UseDebug, ChangeType::OnOff); ///< First try debug then non debug +#else + combiner.add(DeviceCheckFlag::UseDebug, ChangeType::Off); ///< Don't bother with debug +#endif + combiner.add(DeviceCheckFlag::UseHardwareDevice, ChangeType::OnOff); ///< First try hardware, then reference + + const D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0; + + const int numCombinations = combiner.getNumCombinations(); + for (int i = 0; i < numCombinations; ++i) + { + if (SLANG_SUCCEEDED(_createDevice(combiner.getCombination(i), UnownedStringSlice(desc.adapter), featureLevel, m_deviceInfo))) { - SLANG_RETURN_ON_FAIL(m_dsvAllocator.allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = &m_dsvAllocator; - D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {}; - dsvDesc.Format = D3DUtil::getMapFormat(desc.format); - switch (desc.renderTarget.shape) - { - case IResource::Type::Texture1D: - dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1D; - dsvDesc.Texture1D.MipSlice = desc.renderTarget.mipSlice; - break; - case IResource::Type::Texture2D: - dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; - dsvDesc.Texture2D.MipSlice = desc.renderTarget.mipSlice; - break; - default: - return SLANG_FAIL; - } - m_device->CreateDepthStencilView( - resourceImpl->m_resource, &dsvDesc, viewImpl->m_descriptor.cpuHandle); + break; } - break; + } - case IResourceView::Type::UnorderedAccess: - { - // TODO: need to support the separate "counter resource" for the case - // of append/consume buffers with attached counters. + if (!m_deviceInfo.m_adapter) + { + // Couldn't find an adapter + return SLANG_FAIL; + } - SLANG_RETURN_ON_FAIL(m_viewAllocator.allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = &m_viewAllocator; - m_device->CreateUnorderedAccessView(resourceImpl->m_resource, nullptr, nullptr, viewImpl->m_descriptor.cpuHandle); - } - break; + // Set the device + m_device = m_deviceInfo.m_device; - case IResourceView::Type::ShaderResource: + // NVAPI + if (desc.nvapiExtnSlot >= 0) + { + if (SLANG_FAILED(NVAPIUtil::initialize())) { - SLANG_RETURN_ON_FAIL(m_viewAllocator.allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = &m_viewAllocator; - - // Need to construct the D3D12_SHADER_RESOURCE_VIEW_DESC because otherwise TextureCube is not accessed - // appropriately (rather than just passing nullptr to CreateShaderResourceView) - const D3D12_RESOURCE_DESC resourceDesc = resourceImpl->m_resource.getResource()->GetDesc(); - const DXGI_FORMAT pixelFormat = resourceDesc.Format; + return SLANG_E_NOT_AVAILABLE; + } - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc; - _initSrvDesc(resourceImpl->getType(), *resourceImpl->getDesc(), resourceDesc, pixelFormat, srvDesc); +#ifdef GFX_NVAPI + // From DOCS: Applications are expected to bind null UAV to this slot. + // NOTE! We don't currently do this, but doesn't seem to be a problem. - m_device->CreateShaderResourceView(resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle); + const NvAPI_Status status = NvAPI_D3D12_SetNvShaderExtnSlotSpace(m_device, NvU32(desc.nvapiExtnSlot), NvU32(0)); + + if (status != NVAPI_OK) + { + return SLANG_E_NOT_AVAILABLE; } - break; - } - *outView = viewImpl.detach(); - return SLANG_OK; -} + if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC)) + { + m_features.add("atomic-int64"); + } + if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC)) + { + m_features.add("atomic-float"); + } -Result D3D12Device::createBufferView(IBufferResource* buffer, IResourceView::Desc const& desc, IResourceView** outView) -{ - auto resourceImpl = (BufferResourceImpl*) buffer; - auto resourceDesc = *resourceImpl->getDesc(); + m_nvapi = true; +#endif - RefPtr viewImpl = new ResourceViewImpl(); - viewImpl->m_resource = resourceImpl; + } - switch (desc.type) + // Find what features are supported { - default: - return SLANG_FAIL; + // Check this is how this is laid out... + SLANG_COMPILE_TIME_ASSERT(D3D_SHADER_MODEL_6_0 == 0x60); - case IResourceView::Type::UnorderedAccess: { - D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - uavDesc.Format = D3DUtil::getMapFormat(desc.format); - uavDesc.Buffer.FirstElement = 0; + D3D12_FEATURE_DATA_SHADER_MODEL featureShaderModel; + featureShaderModel.HighestShaderModel = D3D_SHADER_MODEL(0x62); - if(resourceDesc.elementSize) - { - uavDesc.Buffer.StructureByteStride = resourceDesc.elementSize; - uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / resourceDesc.elementSize); - } - else if(desc.format == Format::Unknown) - { - uavDesc.Buffer.Flags |= D3D12_BUFFER_UAV_FLAG_RAW; - uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; - uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / 4); - } - else + // TODO: Currently warp causes a crash when using half, so disable for now + if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &featureShaderModel, sizeof(featureShaderModel))) && + m_deviceInfo.m_isWarp == false && + featureShaderModel.HighestShaderModel >= 0x62) { - uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / gfxGetFormatSize(desc.format)); + // With sm_6_2 we have half + m_features.add("half"); } - - - // TODO: need to support the separate "counter resource" for the case - // of append/consume buffers with attached counters. - - SLANG_RETURN_ON_FAIL(m_viewAllocator.allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = &m_viewAllocator; - m_device->CreateUnorderedAccessView(resourceImpl->m_resource, nullptr, &uavDesc, viewImpl->m_descriptor.cpuHandle); } - break; - - case IResourceView::Type::ShaderResource: + // Check what min precision support we have { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - srvDesc.Format = D3DUtil::getMapFormat(desc.format); - srvDesc.Buffer.StructureByteStride = 0; - srvDesc.Buffer.FirstElement = 0; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - if(resourceDesc.elementSize) - { - srvDesc.Buffer.StructureByteStride = resourceDesc.elementSize; - srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / resourceDesc.elementSize); - } - else if(desc.format == Format::Unknown) - { - srvDesc.Buffer.Flags |= D3D12_BUFFER_SRV_FLAG_RAW; - srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; - srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / 4); - } - else + D3D12_FEATURE_DATA_D3D12_OPTIONS options; + if (SLANG_SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)))) { - srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / gfxGetFormatSize(desc.format)); + auto minPrecisionSupport = options.MinPrecisionSupport; } - - SLANG_RETURN_ON_FAIL(m_viewAllocator.allocate(&viewImpl->m_descriptor)); - viewImpl->m_allocator = &m_viewAllocator; - m_device->CreateShaderResourceView(resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle); } - break; } - *outView = viewImpl.detach(); - return SLANG_OK; -} + m_desc = desc; -Result D3D12Device::createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFb) -{ - RefPtr framebuffer = new FramebufferImpl(); - framebuffer->renderTargetViews.setCount(desc.renderTargetCount); - framebuffer->renderTargetDescriptors.setCount(desc.renderTargetCount); - framebuffer->renderTargetClearValues.setCount(desc.renderTargetCount); - for (uint32_t i = 0; i < desc.renderTargetCount; i++) - { - framebuffer->renderTargetViews[i] = desc.renderTargetViews[i]; - framebuffer->renderTargetDescriptors[i] = - static_cast(desc.renderTargetViews[i])->m_descriptor.cpuHandle; - auto clearValue = - static_cast( - static_cast(desc.renderTargetViews[i])->m_resource.Ptr()) - ->getDesc() - ->optimalClearValue.color; - memcpy(&framebuffer->renderTargetClearValues[i], &clearValue, sizeof(ColorClearValue)); - } - framebuffer->depthStencilView = desc.depthStencilView; - if (desc.depthStencilView) - { - framebuffer->depthStencilClearValue = - static_cast( - static_cast(desc.depthStencilView)->m_resource.Ptr()) - ->getDesc() - ->optimalClearValue.depthStencil; - framebuffer->depthStencilDescriptor = - static_cast(desc.depthStencilView)->m_descriptor.cpuHandle; - } - else + // Create a command queue for internal resource transfer operations. + SLANG_RETURN_ON_FAIL(createCommandQueueImpl(1, 32, 4, m_resourceCommandQueue.writeRef())); + + SLANG_RETURN_ON_FAIL(m_cpuViewHeap.init (m_device, 8192, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); + SLANG_RETURN_ON_FAIL(m_cpuSamplerHeap.init(m_device, 1024, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); + + SLANG_RETURN_ON_FAIL(m_rtvAllocator.init (m_device, 16, D3D12_DESCRIPTOR_HEAP_TYPE_RTV)); + SLANG_RETURN_ON_FAIL(m_dsvAllocator.init (m_device, 16, D3D12_DESCRIPTOR_HEAP_TYPE_DSV)); + + ComPtr dxgiDevice; + if (m_deviceInfo.m_adapter) { - framebuffer->depthStencilDescriptor.ptr = 0; + DXGI_ADAPTER_DESC adapterDesc; + m_deviceInfo.m_adapter->GetDesc(&adapterDesc); + m_adapterName = String::fromWString(adapterDesc.Description); + m_info.adapterName = m_adapterName.begin(); } - *outFb = framebuffer.detach(); + + m_isInitialized = true; return SLANG_OK; } -Result D3D12Device::createFramebufferLayout( - IFramebufferLayout::Desc const& desc, IFramebufferLayout** outLayout) +Result D3D12Device::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) { - RefPtr layout = new FramebufferLayoutImpl(); - layout->m_renderTargets.setCount(desc.renderTargetCount); - for (uint32_t i = 0; i < desc.renderTargetCount; i++) - { - layout->m_renderTargets[i] = desc.renderTargets[i]; - } - - if (desc.depthStencil) - { - layout->m_hasDepthStencil = true; - layout->m_depthStencil = *desc.depthStencil; - } - else - { - layout->m_hasDepthStencil = false; - } - *outLayout = layout.detach(); + RefPtr queue; + SLANG_RETURN_ON_FAIL(createCommandQueueImpl(8, 4096, 1024, queue.writeRef())); + *outQueue = queue.detach(); return SLANG_OK; } -Result D3D12Device::createRenderPassLayout( - const IRenderPassLayout::Desc& desc, - IRenderPassLayout** outRenderPassLayout) +SLANG_NO_THROW Result SLANG_MCALL D3D12Device::createSwapchain( + const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) { - RefPtr result = new RenderPassLayoutImpl(); - result->init(desc); - *outRenderPassLayout = result.detach(); + RefPtr swapchain = new SwapchainImpl(); + SLANG_RETURN_ON_FAIL(swapchain->init(this, desc, window)); + *outSwapchain = swapchain.detach(); return SLANG_OK; } -Result D3D12Device::createInputLayout(const InputElementDesc* inputElements, UInt inputElementCount, IInputLayout** outLayout) +SlangResult D3D12Device::readTextureResource( + ITextureResource* resource, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) { - RefPtr layout(new InputLayoutImpl); + return captureTextureToSurface( + static_cast(resource)->m_resource, + state, + outBlob, + outRowPitch, + outPixelSize); +} - // Work out a buffer size to hold all text - size_t textSize = 0; - for (int i = 0; i < Int(inputElementCount); ++i) +static D3D12_RESOURCE_STATES _calcResourceState(IResource::Usage usage) +{ + typedef IResource::Usage Usage; + switch (usage) { - const char* text = inputElements[i].semanticName; - textSize += text ? (::strlen(text) + 1) : 0; + case Usage::VertexBuffer: return D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + case Usage::IndexBuffer: return D3D12_RESOURCE_STATE_INDEX_BUFFER; + case Usage::ConstantBuffer: return D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + case Usage::StreamOutput: return D3D12_RESOURCE_STATE_STREAM_OUT; + case Usage::RenderTarget: return D3D12_RESOURCE_STATE_RENDER_TARGET; + case Usage::DepthWrite: return D3D12_RESOURCE_STATE_DEPTH_WRITE; + case Usage::DepthRead: return D3D12_RESOURCE_STATE_DEPTH_READ; + case Usage::UnorderedAccess: return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + case Usage::PixelShaderResource: return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + case Usage::NonPixelShaderResource: return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + case Usage::ShaderResource: return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + case Usage::GenericRead: return D3D12_RESOURCE_STATE_GENERIC_READ; + default: return D3D12_RESOURCE_STATES(0); } - layout->m_text.setCount(textSize); - char* textPos = layout->m_text.getBuffer(); - - // - List& elements = layout->m_elements; - elements.setCount(inputElementCount); +} +static D3D12_RESOURCE_FLAGS _calcResourceFlag(IResource::BindFlag::Enum bindFlag) +{ + typedef IResource::BindFlag BindFlag; + switch (bindFlag) + { + case BindFlag::RenderTarget: return D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + case BindFlag::DepthStencil: return D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + case BindFlag::UnorderedAccess: return D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + default: return D3D12_RESOURCE_FLAG_NONE; + } +} - for (UInt i = 0; i < inputElementCount; ++i) +static D3D12_RESOURCE_FLAGS _calcResourceBindFlags(IResource::Usage initialUsage, int bindFlags) +{ + int dstFlags = 0; + while (bindFlags) { - const InputElementDesc& srcEle = inputElements[i]; - D3D12_INPUT_ELEMENT_DESC& dstEle = elements[i]; + int lsb = bindFlags & -bindFlags; - // Add text to the buffer - const char* semanticName = srcEle.semanticName; - if (semanticName) + dstFlags |= _calcResourceFlag(IResource::BindFlag::Enum(lsb)); + bindFlags &= ~lsb; + } + return D3D12_RESOURCE_FLAGS(dstFlags); +} + +static D3D12_RESOURCE_DIMENSION _calcResourceDimension(IResource::Type type) +{ + switch (type) + { + case IResource::Type::Buffer: return D3D12_RESOURCE_DIMENSION_BUFFER; + case IResource::Type::Texture1D: return D3D12_RESOURCE_DIMENSION_TEXTURE1D; + case IResource::Type::TextureCube: + case IResource::Type::Texture2D: { - const int len = int(::strlen(semanticName)); - ::memcpy(textPos, semanticName, len + 1); - semanticName = textPos; - textPos += len + 1; + return D3D12_RESOURCE_DIMENSION_TEXTURE2D; } - - dstEle.SemanticName = semanticName; - dstEle.SemanticIndex = (UINT)srcEle.semanticIndex; - dstEle.Format = D3DUtil::getMapFormat(srcEle.format); - dstEle.InputSlot = 0; - dstEle.AlignedByteOffset = (UINT)srcEle.offset; - dstEle.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; - dstEle.InstanceDataStepRate = 0; + case IResource::Type::Texture3D: return D3D12_RESOURCE_DIMENSION_TEXTURE3D; + default: return D3D12_RESOURCE_DIMENSION_UNKNOWN; } - - *outLayout = layout.detach(); - return SLANG_OK; } -Result D3D12Device::readBufferResource( - IBufferResource* bufferIn, - size_t offset, - size_t size, - ISlangBlob** outBlob) +Result D3D12Device::createTextureResource(IResource::Usage initialUsage, const ITextureResource::Desc& descIn, const ITextureResource::SubresourceData* initData, ITextureResource** outResource) { - auto encodeInfo = encodeResourceCommands(); + // Description of uploading on Dx12 + // https://msdn.microsoft.com/en-us/library/windows/desktop/dn899215%28v=vs.85%29.aspx - BufferResourceImpl* buffer = static_cast(bufferIn); + TextureResource::Desc srcDesc(descIn); + srcDesc.setDefaults(initialUsage); - const size_t bufferSize = buffer->getDesc()->sizeInBytes; + const DXGI_FORMAT pixelFormat = D3DUtil::getMapFormat(srcDesc.format); + if (pixelFormat == DXGI_FORMAT_UNKNOWN) + { + return SLANG_FAIL; + } - // This will be slow!!! - it blocks CPU on GPU completion - D3D12Resource& resource = buffer->m_resource; + const int arraySize = srcDesc.calcEffectiveArraySize(); - // Readback heap - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; + const D3D12_RESOURCE_DIMENSION dimension = _calcResourceDimension(srcDesc.type); + if (dimension == D3D12_RESOURCE_DIMENSION_UNKNOWN) + { + return SLANG_FAIL; + } - // Resource to readback to - D3D12_RESOURCE_DESC stagingDesc; - _initBufferResourceDesc(bufferSize, stagingDesc); + const int numMipMaps = srcDesc.numMipLevels; - D3D12Resource stageBuf; - SLANG_RETURN_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); + // Setup desc + D3D12_RESOURCE_DESC resourceDesc; + + resourceDesc.Dimension = dimension; + resourceDesc.Format = pixelFormat; + resourceDesc.Width = srcDesc.size.width; + resourceDesc.Height = srcDesc.size.height; + resourceDesc.DepthOrArraySize = (srcDesc.size.depth > 1) ? srcDesc.size.depth : arraySize; + + resourceDesc.MipLevels = numMipMaps; + resourceDesc.SampleDesc.Count = srcDesc.sampleDesc.numSamples; + resourceDesc.SampleDesc.Quality = srcDesc.sampleDesc.quality; + + resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + + switch (initialUsage) + { + case IResource::Usage::RenderTarget: + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + break; + case IResource::Usage::DepthWrite: + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + break; + case IResource::Usage::UnorderedAccess: + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + break; + default: + break; + } - // Do the copy - encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, 0, bufferSize); + resourceDesc.Alignment = 0; - // Wait until complete - submitResourceCommandsAndWait(encodeInfo); + RefPtr texture(new TextureResourceImpl(srcDesc)); - // Map and copy - RefPtr blob = new ListBlob(); + // Create the target resource { - UINT8* data; - D3D12_RANGE readRange = { 0, bufferSize }; + D3D12_HEAP_PROPERTIES heapProps; - SLANG_RETURN_ON_FAIL(stageBuf.getResource()->Map(0, &readRange, reinterpret_cast(&data))); + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - // Copy to memory buffer - blob->m_data.setCount(bufferSize); - ::memcpy(blob->m_data.getBuffer(), data, bufferSize); + D3D12_CLEAR_VALUE clearValue; + D3D12_CLEAR_VALUE* clearValuePtr = &clearValue; + if ((resourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | + D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) == 0) + { + clearValuePtr = nullptr; + } + clearValue.Format = pixelFormat; + memcpy(clearValue.Color, &descIn.optimalClearValue.color, sizeof(clearValue.Color)); + clearValue.DepthStencil.Depth = descIn.optimalClearValue.depthStencil.depth; + clearValue.DepthStencil.Stencil = descIn.optimalClearValue.depthStencil.stencil; + SLANG_RETURN_ON_FAIL(texture->m_resource.initCommitted( + m_device, + heapProps, + D3D12_HEAP_FLAG_NONE, + resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + clearValuePtr)); - stageBuf.getResource()->Unmap(0, nullptr); + texture->m_resource.setDebugName(L"Texture"); } - *outBlob = blob.detach(); - return SLANG_OK; -} - -void D3D12Device::DescriptorSetImpl::setConstantBuffer(UInt range, UInt index, IBufferResource* buffer) -{ - auto dxDevice = m_renderer->m_device; - - auto resourceImpl = (BufferResourceImpl*) buffer; - auto resourceDesc = resourceImpl->getDesc(); - - // Constant buffer view size must be a multiple of 256 bytes, so we round it up here. - const size_t alignedSizeInBytes = D3DUtil::calcAligned(resourceDesc->sizeInBytes, 256); - D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; - cbvDesc.BufferLocation = resourceImpl->m_resource.getResource()->GetGPUVirtualAddress(); - cbvDesc.SizeInBytes = UINT(alignedSizeInBytes); + // Calculate the layout + List layouts; + layouts.setCount(numMipMaps); + List mipRowSizeInBytes; + mipRowSizeInBytes.setCount(numMipMaps); + List mipNumRows; + mipNumRows.setCount(numMipMaps); - auto& rangeInfo = m_layout->m_ranges[range]; + // NOTE! This is just the size for one array upload -> not for the whole texture + UInt64 requiredSize = 0; + m_device->GetCopyableFootprints(&resourceDesc, 0, numMipMaps, 0, layouts.begin(), mipNumRows.begin(), mipRowSizeInBytes.begin(), &requiredSize); -#ifdef _DEBUG - switch(rangeInfo.type) + // Sub resource indexing + // https://msdn.microsoft.com/en-us/library/windows/desktop/dn705766(v=vs.85).aspx#subresource_indexing + if (initData) { - default: - assert(!"incorrect slot type"); - break; - - case DescriptorSlotType::UniformBuffer: - case DescriptorSlotType::DynamicUniformBuffer: - break; - } -#endif + // Create the upload texture + D3D12Resource uploadTexture; + + { + D3D12_HEAP_PROPERTIES heapProps; - auto arrayIndex = rangeInfo.arrayIndex + index; - auto descriptorIndex = m_resourceTable + arrayIndex; + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - m_resourceObjects[arrayIndex] = resourceImpl; - dxDevice->CreateConstantBufferView( - &cbvDesc, - m_resourceHeap->getCpuHandle(int(descriptorIndex))); -} + D3D12_RESOURCE_DESC uploadResourceDesc; -void D3D12Device::DescriptorSetImpl::setResource(UInt range, UInt index, IResourceView* view) -{ - auto dxDevice = m_renderer->m_device; + uploadResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + uploadResourceDesc.Format = DXGI_FORMAT_UNKNOWN; + uploadResourceDesc.Width = requiredSize; + uploadResourceDesc.Height = 1; + uploadResourceDesc.DepthOrArraySize = 1; + uploadResourceDesc.MipLevels = 1; + uploadResourceDesc.SampleDesc.Count = 1; + uploadResourceDesc.SampleDesc.Quality = 0; + uploadResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + uploadResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + uploadResourceDesc.Alignment = 0; - auto viewImpl = (ResourceViewImpl*) view; + SLANG_RETURN_ON_FAIL(uploadTexture.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, uploadResourceDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr)); - auto& rangeInfo = m_layout->m_ranges[range]; + uploadTexture.setDebugName(L"TextureUpload"); + } + // Get the pointer to the upload resource + ID3D12Resource* uploadResource = uploadTexture; - // TODO: validation that slot type matches view + int subResourceIndex = 0; + for (int arrayIndex = 0; arrayIndex < arraySize; arrayIndex++) + { + uint8_t* p; + uploadResource->Map(0, nullptr, reinterpret_cast(&p)); - auto arrayIndex = rangeInfo.arrayIndex + index; - auto descriptorIndex = m_resourceTable + arrayIndex; + for (int j = 0; j < numMipMaps; ++j) + { + auto srcSubresource = initData[j]; - m_resourceObjects[arrayIndex] = viewImpl; - if (viewImpl) - { - dxDevice->CopyDescriptorsSimple( - 1, - m_resourceHeap->getCpuHandle(int(descriptorIndex)), - viewImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - } -} + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& layout = layouts[j]; + const D3D12_SUBRESOURCE_FOOTPRINT& footprint = layout.Footprint; -void D3D12Device::DescriptorSetImpl::setSampler(UInt range, UInt index, ISamplerState* sampler) -{ - auto dxDevice = m_renderer->m_device; + const TextureResource::Size mipSize = srcDesc.size.calcMipSize(j); - auto samplerImpl = (SamplerStateImpl*) sampler; + assert(footprint.Width == mipSize.width && footprint.Height == mipSize.height && footprint.Depth == mipSize.depth); - auto& rangeInfo = m_layout->m_ranges[range]; + auto mipRowSize = mipRowSizeInBytes[j]; -#ifdef _DEBUG - switch(rangeInfo.type) - { - default: - assert(!"incorrect slot type"); - break; + const ptrdiff_t dstMipRowPitch = ptrdiff_t(footprint.RowPitch); + const ptrdiff_t srcMipRowPitch = ptrdiff_t(srcSubresource.strideY); - case DescriptorSlotType::Sampler: - break; - } -#endif + const ptrdiff_t dstMipLayerPitch = ptrdiff_t(footprint.RowPitch*footprint.Height); + const ptrdiff_t srcMipLayerPitch = ptrdiff_t(srcSubresource.strideZ); - auto arrayIndex = rangeInfo.arrayIndex + index; - auto descriptorIndex = m_samplerTable + arrayIndex; + // Our outer loop will copy the depth layers one at a time. + // + const uint8_t* srcLayer = (const uint8_t*) srcSubresource.data; + uint8_t* dstLayer = p + layouts[j].Offset; + for (int l = 0; l < mipSize.depth; l++) + { + // Our inner loop will copy the rows one at a time. + // + const uint8_t* srcRow = srcLayer; + uint8_t* dstRow = dstLayer; + for (int k = 0; k < mipSize.height; ++k) + { + ::memcpy(dstRow, srcRow, (size_t)mipRowSize); - m_samplerObjects[arrayIndex] = samplerImpl; - if (samplerImpl) - { - dxDevice->CopyDescriptorsSimple( - 1, - m_samplerHeap->getCpuHandle(int(descriptorIndex)), - samplerImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - } -} + srcRow += srcMipRowPitch; + dstRow += dstMipRowPitch; + } -void D3D12Device::DescriptorSetImpl::setCombinedTextureSampler( - UInt range, - UInt index, - IResourceView* textureView, - ISamplerState* sampler) -{ - auto dxDevice = m_renderer->m_device; + srcLayer += srcMipLayerPitch; + dstLayer += dstMipLayerPitch; + } - auto viewImpl = (ResourceViewImpl*)textureView; - auto samplerImpl = (SamplerStateImpl*)sampler; + //assert(srcRow == (const uint8_t*)(srcMip.getBuffer() + srcMip.getCount())); + } + uploadResource->Unmap(0, nullptr); - auto& rangeInfo = m_layout->m_ranges[range]; + auto encodeInfo = encodeResourceCommands(); + for (int mipIndex = 0; mipIndex < numMipMaps; ++mipIndex) + { + // https://msdn.microsoft.com/en-us/library/windows/desktop/dn903862(v=vs.85).aspx -#ifdef _DEBUG - switch (rangeInfo.type) - { - default: - assert(!"incorrect slot type"); - break; + D3D12_TEXTURE_COPY_LOCATION src; + src.pResource = uploadTexture; + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src.PlacedFootprint = layouts[mipIndex]; - case DescriptorSlotType::CombinedImageSampler: - break; - } -#endif + D3D12_TEXTURE_COPY_LOCATION dst; + dst.pResource = texture->m_resource; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst.SubresourceIndex = subResourceIndex; + encodeInfo.d3dCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); - auto arrayIndex = rangeInfo.arrayIndex + index; - auto resourceDescriptorIndex = m_resourceTable + arrayIndex; - auto samplerDescriptorIndex = m_samplerTable + arrayIndex; + subResourceIndex++; + } - m_resourceObjects[arrayIndex] = viewImpl; - if (viewImpl) - { - dxDevice->CopyDescriptorsSimple( - 1, - m_resourceHeap->getCpuHandle(int(resourceDescriptorIndex)), - viewImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + // Block - waiting for copy to complete (so can drop upload texture) + submitResourceCommandsAndWait(encodeInfo); + } } - - m_samplerObjects[arrayIndex] = samplerImpl; - if (samplerImpl) { - dxDevice->CopyDescriptorsSimple( - 1, - m_samplerHeap->getCpuHandle(int(samplerDescriptorIndex)), - samplerImpl->m_descriptor.cpuHandle, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + auto encodeInfo = encodeResourceCommands(); + const D3D12_RESOURCE_STATES finalState = _calcResourceState(initialUsage); + { + D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); + texture->m_resource.transition(D3D12_RESOURCE_STATE_COPY_DEST, finalState, submitter); + } + submitResourceCommandsAndWait(encodeInfo); } + + *outResource = texture.detach(); + return SLANG_OK; } -void D3D12Device::DescriptorSetImpl::setRootConstants( - UInt range, - UInt offset, - UInt size, - void const* data) +Result D3D12Device::createBufferResource(IResource::Usage initialUsage, const IBufferResource::Desc& descIn, const void* initData, IBufferResource** outResource) { - // The `range` parameter is the index of the range in - // the original `DescriptorSetLayout::Desc`, which must - // have been a root-constant range for this call to be - // valid. + BufferResource::Desc srcDesc(descIn); + srcDesc.setDefaults(initialUsage); + + // Always align up to 256 bytes, since that is required for constant buffers. // - SLANG_ASSERT(range < UInt(m_layout->m_ranges.getCount())); - auto& rangeInfo = m_layout->m_ranges[range]; - SLANG_ASSERT(rangeInfo.type == DescriptorSlotType::RootConstant); - - // The `arrayIndex` in that descriptor slot range is the "flat" - // index of the root constant range that the user is trying - // to write into. The root constant range represents a range - // of bytes in the `m_rootConstantData` buffer. + // TODO: only do this for buffers that could potentially be bound as constant buffers... // - auto rootConstantIndex = rangeInfo.arrayIndex; - SLANG_ASSERT(rootConstantIndex >= 0); - SLANG_ASSERT(rootConstantIndex < m_layout->m_rootConstantRanges.getCount()); - auto& rootConstantRangeInfo = m_layout->m_rootConstantRanges[rootConstantIndex]; - SLANG_ASSERT(offset + size <= UInt(rootConstantRangeInfo.size)); + const size_t alignedSizeInBytes = D3DUtil::calcAligned(srcDesc.sizeInBytes, 256); + + RefPtr buffer(new BufferResourceImpl(initialUsage, srcDesc)); + + D3D12_RESOURCE_DESC bufferDesc; + _initBufferResourceDesc(alignedSizeInBytes, bufferDesc); + + bufferDesc.Flags = _calcResourceBindFlags(initialUsage, srcDesc.bindFlags); + + const D3D12_RESOURCE_STATES initialState = _calcResourceState(initialUsage); + SLANG_RETURN_ON_FAIL(createBuffer(bufferDesc, initData, srcDesc.sizeInBytes, buffer->m_uploadResource, initialState, buffer->m_resource)); - memcpy((char*)m_rootConstantData.getBuffer() + rootConstantRangeInfo.offset + offset, data, size); + *outResource = buffer.detach(); + return SLANG_OK; } -Result D3D12Device::createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) +D3D12_FILTER_TYPE translateFilterMode(TextureFilteringMode mode) { - if (desc.slangProgram && desc.slangProgram->getSpecializationParamCount() != 0) - { - // For a specializable program, we don't invoke any actual slang compilation yet. - RefPtr shaderProgram = new ShaderProgramImpl(); - initProgramCommon(shaderProgram, desc); - *outProgram = shaderProgram.detach(); - return SLANG_OK; - } - - if( desc.kernelCount == 0 ) + switch (mode) { - return createProgramFromSlang(this, desc, outProgram); - } + default: + return D3D12_FILTER_TYPE(0); - RefPtr program(new ShaderProgramImpl()); - program->m_pipelineType = desc.pipelineType; +#define CASE(SRC, DST) \ + case TextureFilteringMode::SRC: return D3D12_FILTER_TYPE_##DST - if (desc.pipelineType == PipelineType::Compute) - { - auto computeKernel = desc.findKernel(StageType::Compute); - program->m_computeShader.insertRange(0, (const uint8_t*) computeKernel->codeBegin, computeKernel->getCodeSize()); - } - else - { - auto vertexKernel = desc.findKernel(StageType::Vertex); - auto fragmentKernel = desc.findKernel(StageType::Fragment); + CASE(Point, POINT); + CASE(Linear, LINEAR); - program->m_vertexShader.insertRange(0, (const uint8_t*) vertexKernel->codeBegin, vertexKernel->getCodeSize()); - program->m_pixelShader.insertRange(0, (const uint8_t*) fragmentKernel->codeBegin, fragmentKernel->getCodeSize()); +#undef CASE } - initProgramCommon(program, desc); - - *outProgram = program.detach(); - return SLANG_OK; } -Result D3D12Device::createDescriptorSetLayout(const IDescriptorSetLayout::Desc& desc, IDescriptorSetLayout** outLayout) +D3D12_FILTER_REDUCTION_TYPE translateFilterReduction(TextureReductionOp op) { - Int rangeCount = desc.slotRangeCount; + switch (op) + { + default: + return D3D12_FILTER_REDUCTION_TYPE(0); - // For our purposes, there are three main cases of descriptor ranges to consider: - // - // 1. Resources: CBV, SRV, UAV - // - // 2. Samplers - // - // 3. Combined texture/sampler pairs - // - // The combined case presents challenges, because we will implement - // them as both a resource slot and a sampler slot, and for conveience - // in the indexing logic, it would be nice it they "lined up." - // - // We will start by counting how many ranges, and how many - // descriptors, of each type we have. - // +#define CASE(SRC, DST) \ + case TextureReductionOp::SRC: return D3D12_FILTER_REDUCTION_TYPE_##DST - Int dedicatedResourceCount = 0; - Int dedicatedSamplerCount = 0; - Int combinedCount = 0; + CASE(Average, STANDARD); + CASE(Comparison, COMPARISON); + CASE(Minimum, MINIMUM); + CASE(Maximum, MAXIMUM); - Int dedicatedResourceRangeCount = 0; - Int dedicatedSamplerRangeCount = 0; - Int combinedRangeCount = 0; +#undef CASE + } +} - for(Int rr = 0; rr < rangeCount; ++rr) +D3D12_TEXTURE_ADDRESS_MODE translateAddressingMode(TextureAddressingMode mode) +{ + switch (mode) { - auto rangeDesc = desc.slotRanges[rr]; - switch(rangeDesc.type) - { - case DescriptorSlotType::Sampler: - dedicatedSamplerCount += rangeDesc.count; - dedicatedSamplerRangeCount++; - break; + default: + return D3D12_TEXTURE_ADDRESS_MODE(0); - case DescriptorSlotType::CombinedImageSampler: - combinedCount += rangeDesc.count; - combinedRangeCount++; - break; +#define CASE(SRC, DST) \ + case TextureAddressingMode::SRC: return D3D12_TEXTURE_ADDRESS_MODE_##DST - case DescriptorSlotType::RootConstant: - // A root constant slot range doesn't contribute - // to the toal number of resources or samplers. - break; + CASE(Wrap, WRAP); + CASE(ClampToEdge, CLAMP); + CASE(ClampToBorder, BORDER); + CASE(MirrorRepeat, MIRROR); + CASE(MirrorOnce, MIRROR_ONCE); - default: - dedicatedResourceCount += rangeDesc.count; - dedicatedResourceRangeCount++; - break; - } +#undef CASE } +} - // Now we know how many ranges we have to allocate space for, - // and also how they need to be arranged. - // - // Each "combined" range will map to two ranges in the D3D - // descriptor tables. - - RefPtr descriptorSetLayoutImpl = new DescriptorSetLayoutImpl(); - - // We know the total number of resource and sampler "slots" that an instance - // of this descriptor-set layout would need: - // - descriptorSetLayoutImpl->m_resourceCount = combinedCount + dedicatedResourceCount; - descriptorSetLayoutImpl->m_samplerCount = combinedCount + dedicatedSamplerCount; +static D3D12_COMPARISON_FUNC translateComparisonFunc(ComparisonFunc func) +{ + switch (func) + { + default: + // TODO: need to report failures + return D3D12_COMPARISON_FUNC_ALWAYS; - // We can start by allocating the D3D root parameter info needed for the - // descriptor set, based on the total number or ranges we need, which - // we can compute from the combined and dedicated counts: - // - Int totalResourceRangeCount = combinedRangeCount + dedicatedResourceRangeCount; - Int totalSamplerRangeCount = combinedRangeCount + dedicatedSamplerRangeCount; +#define CASE(FROM, TO) \ + case ComparisonFunc::FROM: return D3D12_COMPARISON_FUNC_##TO - if( totalResourceRangeCount ) - { - D3D12_ROOT_PARAMETER dxRootParameter = {}; - dxRootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - dxRootParameter.DescriptorTable.NumDescriptorRanges = UINT(totalResourceRangeCount); - descriptorSetLayoutImpl->m_dxRootParameters.add(dxRootParameter); + CASE(Never, NEVER); + CASE(Less, LESS); + CASE(Equal, EQUAL); + CASE(LessEqual, LESS_EQUAL); + CASE(Greater, GREATER); + CASE(NotEqual, NOT_EQUAL); + CASE(GreaterEqual, GREATER_EQUAL); + CASE(Always, ALWAYS); +#undef CASE } - if( totalSamplerRangeCount ) +} + +Result D3D12Device::createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) +{ + D3D12_FILTER_REDUCTION_TYPE dxReduction = translateFilterReduction(desc.reductionOp); + D3D12_FILTER dxFilter; + if (desc.maxAnisotropy > 1) { - D3D12_ROOT_PARAMETER dxRootParameter = {}; - dxRootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - dxRootParameter.DescriptorTable.NumDescriptorRanges = UINT(totalSamplerRangeCount); - descriptorSetLayoutImpl->m_dxRootParameters.add(dxRootParameter); + dxFilter = D3D12_ENCODE_ANISOTROPIC_FILTER(dxReduction); } - - // Next we can allocate space for all the D3D register ranges we need, - // again based on totals that we can compute easily: - // - Int totalRangeCount = totalResourceRangeCount + totalSamplerRangeCount; - descriptorSetLayoutImpl->m_dxRanges.setCount(totalRangeCount); - - // Now we will walk through the ranges in the order they were - // specified, so that we can fill in the "range info" required for - // binding parameters into descriptor sets allocated with this layout. - // - // This effectively determines the space required in two arrays - // in each descriptor set: one for resources, and one for samplers. - // A "combined" descriptor requires space in both arrays. The entries - // for "dedicated" samplers/resources always come after those for - // "combined" descriptors in the same array, so that a single index - // can be used for both arrays in the combined case. - // - + else { - Int samplerCounter = 0; - Int resourceCounter = 0; - Int combinedCounter = 0; - for(Int rr = 0; rr < rangeCount; ++rr) - { - auto rangeDesc = desc.slotRanges[rr]; - - DescriptorSetLayoutImpl::RangeInfo rangeInfo; - - rangeInfo.type = rangeDesc.type; - rangeInfo.count = rangeDesc.count; - - switch(rangeDesc.type) - { - default: - // Default case is a dedicated resource, and its index in the - // resource array will come after all the combined entries. - rangeInfo.arrayIndex = combinedCount + resourceCounter; - resourceCounter += rangeInfo.count; - break; - - case DescriptorSlotType::Sampler: - // A dedicated sampler comes after all the entries for - // combined texture/samplers in the sampler array. - rangeInfo.arrayIndex = combinedCount + samplerCounter; - samplerCounter += rangeInfo.count; - break; + D3D12_FILTER_TYPE dxMin = translateFilterMode(desc.minFilter); + D3D12_FILTER_TYPE dxMag = translateFilterMode(desc.magFilter); + D3D12_FILTER_TYPE dxMip = translateFilterMode(desc.mipFilter); - case DescriptorSlotType::CombinedImageSampler: - // Combined descriptors take entries at the front of - // the resource and sampler arrays. - rangeInfo.arrayIndex = combinedCounter; - combinedCounter += rangeInfo.count; - break; + dxFilter = D3D12_ENCODE_BASIC_FILTER(dxMin, dxMag, dxMip, dxReduction); + } - case DescriptorSlotType::RootConstant: - { - // A root constant range is a bit different than - // the other cases because it does *not* introduce - // any descriptor rangess into D3D12 descriptor tables, - // while it *does* introduce a distinct root parameter. - // - D3D12_ROOT_PARAMETER dxRootParameter = {}; - dxRootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; - dxRootParameter.Constants.Num32BitValues = UINT(rangeInfo.count) / UINT(sizeof(uint32_t)); + D3D12_SAMPLER_DESC dxDesc = {}; + dxDesc.Filter = dxFilter; + dxDesc.AddressU = translateAddressingMode(desc.addressU); + dxDesc.AddressV = translateAddressingMode(desc.addressV); + dxDesc.AddressW = translateAddressingMode(desc.addressW); + dxDesc.MipLODBias = desc.mipLODBias; + dxDesc.MaxAnisotropy = desc.maxAnisotropy; + dxDesc.ComparisonFunc = translateComparisonFunc(desc.comparisonFunc); + for (int ii = 0; ii < 4; ++ii) + dxDesc.BorderColor[ii] = desc.borderColor[ii]; + dxDesc.MinLOD = desc.minLOD; + dxDesc.MaxLOD = desc.maxLOD; - // When binding the data for the range to the pipeline, - // we will need to know the "root parameter index" in - // order to identify the range to D3D12. - // - auto rootParameterIndex = descriptorSetLayoutImpl->m_dxRootParameters.getCount(); - descriptorSetLayoutImpl->m_dxRootParameters.add(dxRootParameter); + auto samplerHeap = &m_cpuSamplerHeap; - // We need to create and store additional tracking data - // to remember this root constant range and how to set it. - // - // The additional data includes the D3D12 root parameter index, - // and the size of the range (in bytes). - // - DescriptorSetLayoutImpl::RootConstantRangeInfo rootConstantRangeInfo; - rootConstantRangeInfo.rootParamIndex = rootParameterIndex; - rootConstantRangeInfo.size = rangeDesc.count; - // - // We also need to compute an offset for the data in the backing - // storage of a particular descriptor set; we also use this as - // a place to update the total size of the root constant data. - // - // Note: We don't deal with alignment issues here. D3D12 requires - // all root-constant data to be in multiples of 4 bytes and to be - // 4-byte aligned, and that should mean that alignment works - // out without extra effort on our part. - // - rootConstantRangeInfo.offset = descriptorSetLayoutImpl->m_rootConstantDataSize; - descriptorSetLayoutImpl->m_rootConstantDataSize += rootConstantRangeInfo.size; + D3D12HostVisibleDescriptor cpuDescriptor; + samplerHeap->allocate(&cpuDescriptor); + m_device->CreateSampler(&dxDesc, cpuDescriptor.cpuHandle); - auto rootConstantIndex = descriptorSetLayoutImpl->m_rootConstantRanges.getCount(); - descriptorSetLayoutImpl->m_rootConstantRanges.add(rootConstantRangeInfo); + // TODO: We really ought to have a free-list of sampler-heap + // entries that we check before we go to the heap, and then + // when we are done with a sampler we simply add it to the free list. + // + RefPtr samplerImpl = new SamplerStateImpl(); + samplerImpl->m_renderer = this; + samplerImpl->m_descriptor = cpuDescriptor; + *outSampler = samplerImpl.detach(); + return SLANG_OK; +} - rangeInfo.arrayIndex = rootConstantIndex; - rangeInfo.count = 1; - } - break; - } +Result D3D12Device::createTextureView(ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) +{ + auto resourceImpl = (TextureResourceImpl*) texture; - descriptorSetLayoutImpl->m_ranges.add(rangeInfo); - } - } + RefPtr viewImpl = new ResourceViewImpl(); + viewImpl->m_resource = resourceImpl; - // Finally, we will go through and fill in ready-to-go D3D - // register range information. + switch (desc.type) { - UInt cbvRegisterCounter = 0; - UInt srvRegisterCounter = 0; - UInt uavRegisterCounter = 0; - UInt samplerRegisterCounter = 0; - - Int resourceRangeCounter = 0; - Int samplerRangeCounter = 0; - Int combinedRangeCounter = 0; + default: + return SLANG_FAIL; - for(Int rr = 0; rr < rangeCount; ++rr) + case IResourceView::Type::RenderTarget: { - auto rangeDesc = desc.slotRanges[rr]; - Int bindingCount = rangeDesc.count; - - // All of these descriptor ranges will be initialized - // with a "space" of zero, with the assumption that - // the actual space number will come from when they are - // used as part of a pipeline layout. - // - Int bindingSpace = 0; - - Int dxRangeIndex = -1; - Int dxPairedSamplerRangeIndex = -1; - switch(rangeDesc.type) + SLANG_RETURN_ON_FAIL(m_rtvAllocator.allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = &m_rtvAllocator; + D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + rtvDesc.Format = D3DUtil::getMapFormat(desc.format); + switch (desc.renderTarget.shape) { - default: - // Default case is a dedicated resource, and its index in the - // resource array will come after all the combined entries. - dxRangeIndex = combinedRangeCount + resourceRangeCounter; - resourceRangeCounter++; - break; - - case DescriptorSlotType::Sampler: - // A dedicated sampler comes after all the entries for - // combined texture/samplers in the sampler array. - dxRangeIndex = totalResourceRangeCount + combinedRangeCount + samplerRangeCounter; - samplerRangeCounter++; + case IResource::Type::Texture1D: + rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; + rtvDesc.Texture1D.MipSlice = desc.renderTarget.mipSlice; break; - - case DescriptorSlotType::CombinedImageSampler: - // Combined descriptors take entries at the front of - // the resource and sampler arrays. - dxRangeIndex = combinedRangeCounter; - dxPairedSamplerRangeIndex = totalResourceRangeCount + combinedRangeCounter; - combinedRangeCounter++; + case IResource::Type::Texture2D: + rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rtvDesc.Texture2D.MipSlice = desc.renderTarget.mipSlice; + rtvDesc.Texture2D.PlaneSlice = desc.renderTarget.planeIndex; break; - - - case DescriptorSlotType::RootConstant: - { - // A root constant range consumes a `b` register binding - // under the D3D12 rules, because it is represented as - // a `cbuffer` or `ConstantBuffer` declaration in HLSL. - // - // We need to allocate a register for the root constant - // buffer here to make the bindings line up, but we - // will skip out of the rest of the logic (via a `continue` - // so that this range doesn't turn into a descriptor - // range in one of the D3D12 descriptor tables. - // - Int dxRegister = rangeDesc.binding; - if( dxRegister < 0 ) - { - dxRegister = cbvRegisterCounter; - } - cbvRegisterCounter = dxRegister + bindingCount; - - auto rootConstantRangeIndex = descriptorSetLayoutImpl->m_ranges[rr].arrayIndex; - auto rootParamIndex = descriptorSetLayoutImpl->m_rootConstantRanges[rootConstantRangeIndex].rootParamIndex; - - // The root constant range is represented in the D3D12 - // root signature as its own root parameter (not in any - // table), and that root parameter needs to be set up - // to reference the correct binding space and index. - // - auto& dxRootParam = descriptorSetLayoutImpl->m_dxRootParameters[rootParamIndex]; - dxRootParam.Constants.RegisterSpace = UINT(bindingSpace); - dxRootParam.Constants.ShaderRegister = UINT(dxRegister); - continue; - } + case IResource::Type::Texture3D: + rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; + rtvDesc.Texture3D.MipSlice = desc.renderTarget.mipSlice; + rtvDesc.Texture3D.FirstWSlice = desc.renderTarget.arrayIndex; + rtvDesc.Texture3D.WSize = desc.renderTarget.arraySize; break; + default: + return SLANG_FAIL; } + m_device->CreateRenderTargetView( + resourceImpl->m_resource, &rtvDesc, viewImpl->m_descriptor.cpuHandle); + } + break; - D3D12_DESCRIPTOR_RANGE& dxRange = descriptorSetLayoutImpl->m_dxRanges[dxRangeIndex]; - memset(&dxRange, 0, sizeof(dxRange)); - - Int dxRegister = rangeDesc.binding; - - switch(rangeDesc.type) + case IResourceView::Type::DepthStencil: + { + SLANG_RETURN_ON_FAIL(m_dsvAllocator.allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = &m_dsvAllocator; + D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {}; + dsvDesc.Format = D3DUtil::getMapFormat(desc.format); + switch (desc.renderTarget.shape) { - default: - // ERROR: unsupported slot type. + case IResource::Type::Texture1D: + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1D; + dsvDesc.Texture1D.MipSlice = desc.renderTarget.mipSlice; break; - - case DescriptorSlotType::Sampler: - { - if( dxRegister < 0 ) - { - dxRegister = samplerRegisterCounter; - } - samplerRegisterCounter = dxRegister + bindingCount; - - dxRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - dxRange.NumDescriptors = UINT(bindingCount); - dxRange.BaseShaderRegister = UINT(dxRegister); - dxRange.RegisterSpace = UINT(bindingSpace); - dxRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - } + case IResource::Type::Texture2D: + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + dsvDesc.Texture2D.MipSlice = desc.renderTarget.mipSlice; break; + default: + return SLANG_FAIL; + } + m_device->CreateDepthStencilView( + resourceImpl->m_resource, &dsvDesc, viewImpl->m_descriptor.cpuHandle); + } + break; - case DescriptorSlotType::SampledImage: - case DescriptorSlotType::UniformTexelBuffer: - { - if( dxRegister < 0 ) - { - dxRegister = srvRegisterCounter; - } - srvRegisterCounter = dxRegister + bindingCount; + case IResourceView::Type::UnorderedAccess: + { + // TODO: need to support the separate "counter resource" for the case + // of append/consume buffers with attached counters. - dxRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - dxRange.NumDescriptors = UINT(bindingCount); - dxRange.BaseShaderRegister = UINT(dxRegister); - dxRange.RegisterSpace = UINT(bindingSpace); - dxRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - } - break; + SLANG_RETURN_ON_FAIL(m_cpuViewHeap.allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = &m_cpuViewHeap; + m_device->CreateUnorderedAccessView(resourceImpl->m_resource, nullptr, nullptr, viewImpl->m_descriptor.cpuHandle); + } + break; - case DescriptorSlotType::CombinedImageSampler: - { - // The combined texture/sampler case basically just - // does the work of both the SRV and sampler cases above. - // - // TODO(tfoley): The current API for passing down an - // explicit register/binding can't handle the requirement - // that we specify *two* registers/bindings for the - // combined image/sampler case. - // - // Realistically, the `Renderer` implementation for - // targes that don't support combined texture/sampler - // bindings should just error out when a client attempts - // to create a descriptor set that uses them (rather than - // the current behavior which adds a lot of complexity - // in the name of trying to make them work). + case IResourceView::Type::ShaderResource: + { + SLANG_RETURN_ON_FAIL(m_cpuViewHeap.allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = &m_cpuViewHeap; - { - // Here's the SRV logic: - Int srvRegister = dxRegister; - if( srvRegister < 0 ) - { - srvRegister = srvRegisterCounter; - } - srvRegisterCounter = srvRegister + bindingCount; + // Need to construct the D3D12_SHADER_RESOURCE_VIEW_DESC because otherwise TextureCube is not accessed + // appropriately (rather than just passing nullptr to CreateShaderResourceView) + const D3D12_RESOURCE_DESC resourceDesc = resourceImpl->m_resource.getResource()->GetDesc(); + const DXGI_FORMAT pixelFormat = resourceDesc.Format; - dxRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - dxRange.NumDescriptors = UINT(bindingCount); - dxRange.BaseShaderRegister = UINT(srvRegister); - dxRange.RegisterSpace = UINT(bindingSpace); - dxRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - } + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc; + _initSrvDesc(resourceImpl->getType(), *resourceImpl->getDesc(), resourceDesc, pixelFormat, srvDesc); - { - // And here we do the sampler logic at the "paired" index. - D3D12_DESCRIPTOR_RANGE& dxPairedSamplerRange = descriptorSetLayoutImpl->m_dxRanges[dxPairedSamplerRangeIndex]; - memset(&dxPairedSamplerRange, 0, sizeof(dxPairedSamplerRange)); + m_device->CreateShaderResourceView(resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle); + } + break; + } - Int samplerRegister = dxRegister; - if( samplerRegister < 0 ) - { - samplerRegister = samplerRegisterCounter; - } - samplerRegisterCounter = samplerRegister + bindingCount; + *outView = viewImpl.detach(); + return SLANG_OK; +} - dxPairedSamplerRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - dxPairedSamplerRange.NumDescriptors = UINT(bindingCount); - dxPairedSamplerRange.BaseShaderRegister = UINT(samplerRegister); - dxPairedSamplerRange.RegisterSpace = UINT(bindingSpace); - dxPairedSamplerRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - } +Result D3D12Device::createBufferView(IBufferResource* buffer, IResourceView::Desc const& desc, IResourceView** outView) +{ + auto resourceImpl = (BufferResourceImpl*) buffer; + auto resourceDesc = *resourceImpl->getDesc(); - } - break; + RefPtr viewImpl = new ResourceViewImpl(); + viewImpl->m_resource = resourceImpl; + switch (desc.type) + { + default: + return SLANG_FAIL; - case DescriptorSlotType::InputAttachment: - case DescriptorSlotType::StorageImage: - case DescriptorSlotType::StorageTexelBuffer: - case DescriptorSlotType::StorageBuffer: - case DescriptorSlotType::DynamicStorageBuffer: - { - if( dxRegister < 0 ) - { - dxRegister = uavRegisterCounter; - } - uavRegisterCounter = dxRegister + bindingCount; + case IResourceView::Type::UnorderedAccess: + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Format = D3DUtil::getMapFormat(desc.format); + uavDesc.Buffer.FirstElement = 0; - dxRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - dxRange.NumDescriptors = UINT(bindingCount); - dxRange.BaseShaderRegister = UINT(dxRegister); - dxRange.RegisterSpace = UINT(bindingSpace); - dxRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - } - break; - case DescriptorSlotType::ReadOnlyStorageBuffer: + if(resourceDesc.elementSize) + { + uavDesc.Buffer.StructureByteStride = resourceDesc.elementSize; + uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / resourceDesc.elementSize); + } + else if(desc.format == Format::Unknown) + { + uavDesc.Buffer.Flags |= D3D12_BUFFER_UAV_FLAG_RAW; + uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; + uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / 4); + } + else { - if (dxRegister < 0) - { - dxRegister = srvRegisterCounter; - } - srvRegisterCounter = dxRegister + bindingCount; - - dxRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - dxRange.NumDescriptors = UINT(bindingCount); - dxRange.BaseShaderRegister = UINT(dxRegister); - dxRange.RegisterSpace = UINT(bindingSpace); - dxRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / gfxGetFormatSize(desc.format)); } - break; - case DescriptorSlotType::UniformBuffer: - case DescriptorSlotType::DynamicUniformBuffer: - { - if( dxRegister < 0 ) - { - dxRegister = cbvRegisterCounter; - } - cbvRegisterCounter = dxRegister + bindingCount; - - dxRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - dxRange.NumDescriptors = UINT(bindingCount); - dxRange.BaseShaderRegister = UINT(dxRegister); - dxRange.RegisterSpace = UINT(bindingSpace); - dxRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - } - break; + // TODO: need to support the separate "counter resource" for the case + // of append/consume buffers with attached counters. + SLANG_RETURN_ON_FAIL(m_cpuViewHeap.allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = &m_cpuViewHeap; + m_device->CreateUnorderedAccessView(resourceImpl->m_resource, nullptr, &uavDesc, viewImpl->m_descriptor.cpuHandle); + } + break; + case IResourceView::Type::ShaderResource: + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Format = D3DUtil::getMapFormat(desc.format); + srvDesc.Buffer.StructureByteStride = 0; + srvDesc.Buffer.FirstElement = 0; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + if(resourceDesc.elementSize) + { + srvDesc.Buffer.StructureByteStride = resourceDesc.elementSize; + srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / resourceDesc.elementSize); + } + else if(desc.format == Format::Unknown) + { + srvDesc.Buffer.Flags |= D3D12_BUFFER_SRV_FLAG_RAW; + srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; + srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / 4); + } + else + { + srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / gfxGetFormatSize(desc.format)); } + + SLANG_RETURN_ON_FAIL(m_cpuViewHeap.allocate(&viewImpl->m_descriptor)); + viewImpl->m_allocator = &m_cpuViewHeap; + m_device->CreateShaderResourceView(resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle); } + break; } - *outLayout = descriptorSetLayoutImpl.detach(); + *outView = viewImpl.detach(); return SLANG_OK; } -Result D3D12Device::createPipelineLayout(const IPipelineLayout::Desc& desc, IPipelineLayout** outLayout) +Result D3D12Device::createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFb) { - static const UInt kMaxRanges = 16; - static const UInt kMaxRootParameters = 32; + RefPtr framebuffer = new FramebufferImpl(); + framebuffer->renderTargetViews.setCount(desc.renderTargetCount); + framebuffer->renderTargetDescriptors.setCount(desc.renderTargetCount); + framebuffer->renderTargetClearValues.setCount(desc.renderTargetCount); + for (uint32_t i = 0; i < desc.renderTargetCount; i++) + { + framebuffer->renderTargetViews[i] = desc.renderTargetViews[i]; + framebuffer->renderTargetDescriptors[i] = + static_cast(desc.renderTargetViews[i])->m_descriptor.cpuHandle; + auto clearValue = + static_cast( + static_cast(desc.renderTargetViews[i])->m_resource.Ptr()) + ->getDesc() + ->optimalClearValue.color; + memcpy(&framebuffer->renderTargetClearValues[i], &clearValue, sizeof(ColorClearValue)); + } + framebuffer->depthStencilView = desc.depthStencilView; + if (desc.depthStencilView) + { + framebuffer->depthStencilClearValue = + static_cast( + static_cast(desc.depthStencilView)->m_resource.Ptr()) + ->getDesc() + ->optimalClearValue.depthStencil; + framebuffer->depthStencilDescriptor = + static_cast(desc.depthStencilView)->m_descriptor.cpuHandle; + } + else + { + framebuffer->depthStencilDescriptor.ptr = 0; + } + *outFb = framebuffer.detach(); + return SLANG_OK; +} - D3D12_DESCRIPTOR_RANGE ranges[kMaxRanges]; - D3D12_ROOT_PARAMETER rootParameters[kMaxRootParameters]; +Result D3D12Device::createFramebufferLayout( + IFramebufferLayout::Desc const& desc, IFramebufferLayout** outLayout) +{ + RefPtr layout = new FramebufferLayoutImpl(); + layout->m_renderTargets.setCount(desc.renderTargetCount); + for (uint32_t i = 0; i < desc.renderTargetCount; i++) + { + layout->m_renderTargets[i] = desc.renderTargets[i]; + } + + if (desc.depthStencil) + { + layout->m_hasDepthStencil = true; + layout->m_depthStencil = *desc.depthStencil; + } + else + { + layout->m_hasDepthStencil = false; + } + *outLayout = layout.detach(); + return SLANG_OK; +} - UInt rangeCount = 0; - UInt rootParameterCount = 0; +Result D3D12Device::createRenderPassLayout( + const IRenderPassLayout::Desc& desc, + IRenderPassLayout** outRenderPassLayout) +{ + RefPtr result = new RenderPassLayoutImpl(); + result->init(desc); + *outRenderPassLayout = result.detach(); + return SLANG_OK; +} - auto descriptorSetCount = desc.descriptorSetCount; +Result D3D12Device::createInputLayout(const InputElementDesc* inputElements, UInt inputElementCount, IInputLayout** outLayout) +{ + RefPtr layout(new InputLayoutImpl); - Int spaceCounter = 0; + // Work out a buffer size to hold all text + size_t textSize = 0; + for (int i = 0; i < Int(inputElementCount); ++i) + { + const char* text = inputElements[i].semanticName; + textSize += text ? (::strlen(text) + 1) : 0; + } + layout->m_text.setCount(textSize); + char* textPos = layout->m_text.getBuffer(); - // We are going to make two passes over the descriptor set layouts - // that are being used to build the pipeline layout. In the first - // pass we will collect all the descriptor ranges that have been - // specified, applying an offset to their register spaces as needed. // - for(UInt dd = 0; dd < descriptorSetCount; ++dd) + List& elements = layout->m_elements; + elements.setCount(inputElementCount); + + + for (UInt i = 0; i < inputElementCount; ++i) { - auto& descriptorSetInfo = desc.descriptorSets[dd]; - auto descriptorSetLayout = (DescriptorSetLayoutImpl*) descriptorSetInfo.layout; + const InputElementDesc& srcEle = inputElements[i]; + D3D12_INPUT_ELEMENT_DESC& dstEle = elements[i]; - // For now we assume that the register space used for - // logical descriptor set #N will be space N. - // - // TODO: This might need to be revisited in the future because - // a single logical descriptor set might need to encompass stuff - // that comes from multiple spaces (e.g., if it contains an unbounded - // array). - // - Int space = descriptorSetInfo.space; - if( space < 0 ) + // Add text to the buffer + const char* semanticName = srcEle.semanticName; + if (semanticName) { - space = spaceCounter; + const int len = int(::strlen(semanticName)); + ::memcpy(textPos, semanticName, len + 1); + semanticName = textPos; + textPos += len + 1; } - spaceCounter = space+1; - - // Copy descriptor range information from the set layout into our - // temporary copy (this is required because the same set layout - // might be applied to different ranges). - // - // API design note: this copy step could be avoided if the D3D - // API allowed for a "space offset" to be applied as part of - // a descriptor-table root parameter. - // - for(auto setDescriptorRange : descriptorSetLayout->m_dxRanges) - { - auto& range = ranges[rangeCount++]; - range = setDescriptorRange; - range.RegisterSpace = UINT(space); - // HACK: in order to deal with SM5.0 shaders, `u` registers - // in `space0` need to start with a number *after* the number - // of `SV_Target` outputs that will be used. - // - // TODO: This is clearly a mess, and doing this behavior here - // means it *won't* work for SM5.1 where the restriction is - // lifted. The only real alternative is to rely on explicit - // register numbers (e.g., from shader reflection) but that - // goes against the simplicity that this API layer strives for - // (everything so far has been set up to work correctly with - // automatic assignment of bindings). - // - if( range.RegisterSpace == 0 - && range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_UAV ) - { - range.BaseShaderRegister += UINT(desc.renderTargetCount); - } - } + dstEle.SemanticName = semanticName; + dstEle.SemanticIndex = (UINT)srcEle.semanticIndex; + dstEle.Format = D3DUtil::getMapFormat(srcEle.format); + dstEle.InputSlot = 0; + dstEle.AlignedByteOffset = (UINT)srcEle.offset; + dstEle.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + dstEle.InstanceDataStepRate = 0; } - // In our second pass, we will copy over root parameters, which - // may end up pointing into the list of ranges from the first step. - // - auto rangePtr = &ranges[0]; - for(UInt dd = 0; dd < descriptorSetCount; ++dd) - { - auto& descriptorSetInfo = desc.descriptorSets[dd]; - auto descriptorSetLayout = (DescriptorSetLayoutImpl*) descriptorSetInfo.layout; + *outLayout = layout.detach(); + return SLANG_OK; +} - // For now we assume that the register space used for - // logical descriptor set #N will be space N. - // - // Note: this is the same assumption made in the first - // loop, and any change/fix will need to be made to - // both places consistently. - // - UInt bindingSpace = dd; +Result D3D12Device::readBufferResource( + IBufferResource* bufferIn, + size_t offset, + size_t size, + ISlangBlob** outBlob) +{ + auto encodeInfo = encodeResourceCommands(); - // Copy root parameter information from the set layout to our - // overall pipeline layout. - for( auto setRootParameter : descriptorSetLayout->m_dxRootParameters ) - { - auto& rootParameter = rootParameters[rootParameterCount++]; - rootParameter = setRootParameter; + BufferResourceImpl* buffer = static_cast(bufferIn); - switch( rootParameter.ParameterType ) - { - default: - break; + const size_t bufferSize = buffer->getDesc()->sizeInBytes; - case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: - // In the case where this parameter is a descriptor table, it - // needs to point into our array of ranges (with offsets applied), - // so we will fix up those pointers here. - // - rootParameter.DescriptorTable.pDescriptorRanges = rangePtr; - rangePtr += rootParameter.DescriptorTable.NumDescriptorRanges; - break; + // This will be slow!!! - it blocks CPU on GPU completion + D3D12Resource& resource = buffer->m_resource; - case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: - // In the case where the parameter is a root constant range, - // it needs to reflect the register space for the descriptor - // set, as computed based on sets specified. - // - rootParameter.Constants.RegisterSpace = UINT(bindingSpace); - break; - } - } - } + // Readback heap + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; - D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; - rootSignatureDesc.NumParameters = UINT(rootParameterCount); - rootSignatureDesc.pParameters = rootParameters; + // Resource to readback to + D3D12_RESOURCE_DESC stagingDesc; + _initBufferResourceDesc(bufferSize, stagingDesc); - // TODO: static samplers should be reasonably easy to support... - rootSignatureDesc.NumStaticSamplers = 0; - rootSignatureDesc.pStaticSamplers = nullptr; + D3D12Resource stageBuf; + SLANG_RETURN_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); - // TODO: only set this flag if needed (requires creating root - // signature at same time as pipeline state...). - // - rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + // Do the copy + encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, 0, bufferSize); + + // Wait until complete + submitResourceCommandsAndWait(encodeInfo); - ComPtr signature; - ComPtr error; - if( SLANG_FAILED(m_D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, signature.writeRef(), error.writeRef())) ) + // Map and copy + RefPtr blob = new ListBlob(); { - fprintf(stderr, "error: D3D12SerializeRootSignature failed"); - if( error ) - { - fprintf(stderr, ": %s\n", (const char*) error->GetBufferPointer()); - } - return SLANG_FAIL; - } + UINT8* data; + D3D12_RANGE readRange = { 0, bufferSize }; - ComPtr rootSignature; - SLANG_RETURN_ON_FAIL(m_device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(rootSignature.writeRef()))); + SLANG_RETURN_ON_FAIL(stageBuf.getResource()->Map(0, &readRange, reinterpret_cast(&data))); + // Copy to memory buffer + blob->m_data.setCount(bufferSize); + ::memcpy(blob->m_data.getBuffer(), data, bufferSize); - RefPtr pipelineLayoutImpl = new PipelineLayoutImpl(); - pipelineLayoutImpl->m_rootSignature = rootSignature; - pipelineLayoutImpl->m_descriptorSetCount = descriptorSetCount; - *outLayout = pipelineLayoutImpl.detach(); + stageBuf.getResource()->Unmap(0, nullptr); + } + *outBlob = blob.detach(); return SLANG_OK; } -Result D3D12Device::createDescriptorSet( - IDescriptorSetLayout* layout, - IDescriptorSet::Flag::Enum flag, - IDescriptorSet** outDescriptorSet) +Result D3D12Device::createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) { - auto layoutImpl = (DescriptorSetLayoutImpl*) layout; - - RefPtr descriptorSetImpl = new DescriptorSetImpl(); - descriptorSetImpl->m_renderer = this; - descriptorSetImpl->m_layout = layoutImpl; - - // We allocate CPU-visible descriptor tables to providing the - // backing storage for each descriptor set. GPU-visible storage - // will only be allocated as needed during per-frame logic in - // order to ensure that a descriptor set it available for use - // in rendering. - // - Int resourceCount = layoutImpl->m_resourceCount; - if( resourceCount ) + RefPtr shaderProgram = new ShaderProgramImpl(); + shaderProgram->m_pipelineType = desc.pipelineType; + shaderProgram->slangProgram = desc.slangProgram; + RootShaderObjectLayoutImpl::create( + this, + desc.slangProgram, + desc.slangProgram->getLayout(), + shaderProgram->m_rootObjectLayout.writeRef()); + if (desc.slangProgram->getSpecializationParamCount() != 0) { - auto resourceHeap = &m_cpuViewHeap; - descriptorSetImpl->m_resourceHeap = resourceHeap; - descriptorSetImpl->m_resourceTable = resourceHeap->allocate(int(resourceCount)); - descriptorSetImpl->m_resourceObjects.setCount(resourceCount); + // For a specializable program, we don't invoke any actual slang compilation yet. + *outProgram = shaderProgram.detach(); + return SLANG_OK; } - - Int samplerCount = layoutImpl->m_samplerCount; - if( samplerCount ) + // For a fully specialized program, read and store its kernel code in `shaderProgram`. + auto programReflection = desc.slangProgram->getLayout(); + for (SlangUInt i = 0; i < programReflection->getEntryPointCount(); i++) { - auto samplerHeap = &m_cpuSamplerHeap; - descriptorSetImpl->m_samplerHeap = samplerHeap; - descriptorSetImpl->m_samplerTable = samplerHeap->allocate(int(samplerCount)); - descriptorSetImpl->m_samplerObjects.setCount(samplerCount); + auto entryPointInfo = programReflection->getEntryPointByIndex(i); + auto stage = entryPointInfo->getStage(); + ComPtr kernelCode; + ComPtr diagnostics; + auto compileResult = desc.slangProgram->getEntryPointCode( + (SlangInt)i, 0, kernelCode.writeRef(), diagnostics.writeRef()); + if (diagnostics) + { + // TODO: report compile error. + } + SLANG_RETURN_ON_FAIL(compileResult); + List* shaderCodeDestBuffer = nullptr; + switch (stage) + { + case SLANG_STAGE_COMPUTE: + shaderCodeDestBuffer = &shaderProgram->m_computeShader; + break; + case SLANG_STAGE_VERTEX: + shaderCodeDestBuffer = &shaderProgram->m_vertexShader; + break; + case SLANG_STAGE_FRAGMENT: + shaderCodeDestBuffer = &shaderProgram->m_pixelShader; + break; + default: + SLANG_ASSERT(!"unsupported shader stage."); + return SLANG_FAIL; + } + shaderCodeDestBuffer->addRange( + reinterpret_cast(kernelCode->getBufferPointer()), + (Index)kernelCode->getBufferSize()); } + *outProgram = shaderProgram.detach(); + return SLANG_OK; +} + +Result D3D12Device::createShaderObjectLayout( + slang::TypeLayoutReflection* typeLayout, + ShaderObjectLayoutBase** outLayout) +{ + RefPtr layout; + SLANG_RETURN_ON_FAIL( + ShaderObjectLayoutImpl::createForElementType( + this, typeLayout, layout.writeRef())); + *outLayout = layout.detach(); + return SLANG_OK; +} - descriptorSetImpl->m_rootConstantData.setCount(layoutImpl->m_rootConstantDataSize); +Result D3D12Device::createShaderObject( + ShaderObjectLayoutBase* layout, + IShaderObject** outObject) +{ + RefPtr shaderObject; + SLANG_RETURN_ON_FAIL(ShaderObjectImpl::create( + this, reinterpret_cast(layout), + shaderObject.writeRef())); + *outObject = shaderObject.detach(); + return SLANG_OK; +} - *outDescriptorSet = descriptorSetImpl.detach(); +Result SLANG_MCALL + D3D12Device::createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) +{ + auto programImpl = dynamic_cast(program); + RefPtr shaderObject; + SLANG_RETURN_ON_FAIL(RootShaderObjectImpl::create( + this, programImpl->m_rootObjectLayout, shaderObject.writeRef())); + *outObject = shaderObject.detach(); return SLANG_OK; } Result D3D12Device::createGraphicsPipelineState(const GraphicsPipelineStateDesc& inDesc, IPipelineState** outState) { GraphicsPipelineStateDesc desc = inDesc; - preparePipelineDesc(desc); - - auto pipelineLayoutImpl = (PipelineLayoutImpl*) desc.pipelineLayout; auto programImpl = (ShaderProgramImpl*) desc.program; + + if (!programImpl->m_rootObjectLayout->m_rootSignature) + { + RefPtr pipelineStateImpl = new PipelineStateImpl(); + pipelineStateImpl->init(desc); + *outState = pipelineStateImpl.detach(); + return SLANG_OK; + } + + // Only actually create a D3D12 pipeline state if the pipeline is fully specialized. auto inputLayoutImpl = (InputLayoutImpl*) desc.inputLayout; // Describe and create the graphics pipeline state object (PSO) D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; - psoDesc.pRootSignature = pipelineLayoutImpl->m_rootSignature; + psoDesc.pRootSignature = programImpl->m_rootObjectLayout->m_rootSignature; psoDesc.VS = { programImpl->m_vertexShader.getBuffer(), SIZE_T(programImpl->m_vertexShader.getCount()) }; psoDesc.PS = { programImpl->m_pixelShader .getBuffer(), SIZE_T(programImpl->m_pixelShader .getCount()) }; @@ -4124,10 +4829,15 @@ Result D3D12Device::createGraphicsPipelineState(const GraphicsPipelineStateDesc& Result D3D12Device::createComputePipelineState(const ComputePipelineStateDesc& inDesc, IPipelineState** outState) { ComputePipelineStateDesc desc = inDesc; - preparePipelineDesc(desc); - auto pipelineLayoutImpl = (PipelineLayoutImpl*) desc.pipelineLayout; auto programImpl = (ShaderProgramImpl*) desc.program; + if (!programImpl->m_rootObjectLayout->m_rootSignature) + { + RefPtr pipelineStateImpl = new PipelineStateImpl(); + pipelineStateImpl->init(desc); + *outState = pipelineStateImpl.detach(); + return SLANG_OK; + } // Only actually create a D3D12 pipeline state if the pipeline is fully specialized. ComPtr pipelineState; @@ -4135,7 +4845,7 @@ Result D3D12Device::createComputePipelineState(const ComputePipelineStateDesc& i { // Describe and create the compute pipeline state object D3D12_COMPUTE_PIPELINE_STATE_DESC computeDesc = {}; - computeDesc.pRootSignature = pipelineLayoutImpl->m_rootSignature; + computeDesc.pRootSignature = programImpl->m_rootObjectLayout->m_rootSignature; computeDesc.CS = { programImpl->m_computeShader.getBuffer(), SIZE_T(programImpl->m_computeShader.getCount())}; -- cgit v1.2.3