diff options
| author | Yong He <yonghe@outlook.com> | 2021-03-04 16:25:58 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-03-04 16:25:58 -0800 |
| commit | a5ac4999b4dea546a7ef824669ab1809224b6448 (patch) | |
| tree | 15bb22eb98a94f7f81489deef55396461501d3dc /tools | |
| parent | 13ff0bd345990c0fdfb7b52ebd5339cddb04889e (diff) | |
Refactor `gfx` to surface `CommandBuffer` interface. (#1735)
* Refactor `gfx` to surface `CommandBuffer` interface.
* Fixes.
* Fix code review issues, and make vulkan runnable on devices without VK_EXT_extended_dynamic_states.
* Update solution files
* Move out-of-date examples to examples/experimental
Co-authored-by: Yong He <yhe@nvidia.com>
Diffstat (limited to 'tools')
40 files changed, 4975 insertions, 3055 deletions
diff --git a/tools/gfx/command-writer.h b/tools/gfx/command-writer.h new file mode 100644 index 000000000..8aba454f4 --- /dev/null +++ b/tools/gfx/command-writer.h @@ -0,0 +1,271 @@ +#pragma once + +#include "slang-gfx.h" +#include "slang-com-ptr.h" +#include "core/slang-basic.h" + +namespace gfx +{ + +enum class CommandName +{ + SetPipelineState, + SetDescriptorSet, + BindRootShaderObject, + SetFramebuffer, + ClearFrame, + SetViewports, + SetScissorRects, + SetPrimitiveTopology, + SetVertexBuffers, + SetIndexBuffer, + Draw, + DrawIndexed, + SetStencilReference, + DispatchCompute, + UploadBufferData, + CopyBuffer +}; + +const uint8_t kMaxCommandOperands = 5; + +struct Command +{ + CommandName name; + uint32_t operands[kMaxCommandOperands]; + Command() = default; + Command(CommandName inName, uint32_t op) + : name(inName) + { + operands[0] = op; + } + Command(CommandName inName, uint32_t op1, uint32_t op2) + : name(inName) + { + operands[0] = op1; + operands[1] = op2; + } + Command(CommandName inName, uint32_t op1, uint32_t op2, uint32_t op3) + : name(inName) + { + operands[0] = op1; + operands[1] = op2; + operands[2] = op3; + } + Command(CommandName inName, uint32_t op1, uint32_t op2, uint32_t op3, uint32_t op4) + : name(inName) + { + operands[0] = op1; + operands[1] = op2; + operands[2] = op3; + operands[3] = op4; + } + Command( + CommandName inName, + uint32_t op1, + uint32_t op2, + uint32_t op3, + uint32_t op4, + uint32_t op5) + : name(inName) + { + operands[0] = op1; + operands[1] = op2; + operands[2] = op3; + operands[3] = op4; + operands[4] = op5; + } +}; + +class CommandWriter +{ +public: + Slang::List<Command> m_commands; + Slang::List<Slang::ComPtr<ISlangUnknown>> m_objects; + Slang::List<uint8_t> m_data; + +public: + void clear() + { + m_commands.clear(); + for (auto& obj : m_objects) + obj = nullptr; + m_objects.clear(); + m_data.clear(); + } + + // Copies user data into `m_data` buffer and returns the offset to retrieve the data. + uint32_t encodeData(const void* data, size_t size) + { + uint32_t offset = (uint32_t)m_data.getCount(); + m_data.setCount(m_data.getCount() + (Slang::Index)size); + memcpy(m_data.getBuffer() + offset, data, size); + return offset; + } + + uint32_t encodeObject(ISlangUnknown* obj) + { + uint32_t offset = (uint32_t)m_objects.getCount(); + ComPtr<ISlangUnknown> ptr; + ptr = obj; + m_objects.add(ptr); + return offset; + } + + template <typename T> T* getObject(uint32_t offset) + { + return static_cast<T*>(m_objects[offset].get()); + } + + template <typename T> T* getData(uint32_t offset) + { + return reinterpret_cast<T*>(m_data.getBuffer() + offset); + } + + void setPipelineState(IPipelineState* state) + { + auto offset = encodeObject(state); + m_commands.add(Command(CommandName::SetPipelineState, offset)); + } + + void setDescriptorSet( + PipelineType pipelineType, + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) + { + uint32_t pipelineLayoutOffset = encodeObject(layout); + uint32_t descSetOffset = encodeObject(descriptorSet); + m_commands.add(Command( + CommandName::SetDescriptorSet, + (uint32_t)pipelineType, + pipelineLayoutOffset, + (uint32_t)index, + descSetOffset)); + } + + void bindRootShaderObject(PipelineType pipelineType, IShaderObject* object) + { + auto rootOffset = encodeObject(object); + m_commands.add(Command( + CommandName::BindRootShaderObject, + (uint32_t)pipelineType, rootOffset)); + } + + void uploadBufferData(IBufferResource* buffer, size_t offset, size_t size, void* data) + { + auto bufferOffset = encodeObject(buffer); + auto dataOffset = encodeData(data, size); + m_commands.add(Command( + CommandName::UploadBufferData, + bufferOffset, + (uint32_t)offset, + (uint32_t)size, + dataOffset)); + } + + void copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) + { + auto dstBuffer = encodeObject(dst); + auto srcBuffer = encodeObject(src); + m_commands.add(Command( + CommandName::CopyBuffer, + dstBuffer, + (uint32_t)dstOffset, + srcBuffer, + (uint32_t)srcOffset, + (uint32_t)size)); + } + + void setFramebuffer(IFramebuffer* frameBuffer) + { + uint32_t framebufferOffset = encodeObject(frameBuffer); + m_commands.add(Command(CommandName::SetFramebuffer, framebufferOffset)); + } + + void clearFrame(uint32_t colorBufferMask, bool clearDepth, bool clearStencil) + { + m_commands.add(Command( + CommandName::ClearFrame, colorBufferMask, clearDepth ? 1 : 0, clearStencil ? 1 : 0)); + } + + void setViewports(UInt count, const Viewport* viewports) + { + auto offset = encodeData(viewports, sizeof(Viewport) * count); + m_commands.add(Command(CommandName::SetViewports, (uint32_t)count, offset)); + } + + void setScissorRects(UInt count, const ScissorRect* scissors) + { + auto offset = encodeData(scissors, sizeof(ScissorRect) * count); + m_commands.add(Command(CommandName::SetScissorRects, (uint32_t)count, offset)); + } + + void setPrimitiveTopology(PrimitiveTopology topology) + { + m_commands.add(Command(CommandName::SetPrimitiveTopology, (uint32_t)topology)); + } + + void setVertexBuffers( + UInt startSlot, + UInt slotCount, + IBufferResource* const* buffers, + const UInt* strides, + const UInt* offsets) + { + uint32_t bufferOffset = 0; + for (UInt i = 0; i < slotCount; i++) + { + auto offset = encodeObject(buffers[i]); + if (i == 0) + bufferOffset = offset; + } + uint32_t stridesOffset = encodeData(strides, sizeof(UInt) * slotCount); + uint32_t offsetsOffset = encodeData(offsets, sizeof(UInt) * slotCount); + m_commands.add(Command( + CommandName::SetVertexBuffers, + (uint32_t)startSlot, + (uint32_t)slotCount, + bufferOffset, + stridesOffset, + offsetsOffset)); + } + + void setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) + { + auto bufferOffset = encodeObject(buffer); + m_commands.add(Command( + CommandName::SetIndexBuffer, bufferOffset, (uint32_t)indexFormat, (uint32_t)offset)); + } + + void draw(UInt vertexCount, UInt startVertex) + { + m_commands.add(Command(CommandName::Draw, (uint32_t)vertexCount, (uint32_t)startVertex)); + } + + void drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) + { + m_commands.add(Command( + CommandName::DrawIndexed, + (uint32_t)indexCount, + (uint32_t)startIndex, + (uint32_t)baseVertex)); + } + + void setStencilReference(uint32_t referenceValue) + { + m_commands.add(Command(CommandName::SetStencilReference, referenceValue)); + } + + void dispatchCompute(int x, int y, int z) + { + m_commands.add( + Command(CommandName::DispatchCompute, (uint32_t)x, (uint32_t)y, (uint32_t)z)); + } +}; +} diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp index a32bd2d03..47738068e 100644 --- a/tools/gfx/cuda/render-cuda.cpp +++ b/tools/gfx/cuda/render-cuda.cpp @@ -3,7 +3,9 @@ #include "slang-com-ptr.h" #include "slang-com-helper.h" #include "core/slang-basic.h" +#include "core/slang-blob.h" +#include "../command-writer.h" #include "../renderer-shared.h" #include "../render-graphics-common.h" #include "../slang-context.h" @@ -954,13 +956,366 @@ private: int m_deviceIndex = -1; CUdevice m_device = 0; CUcontext m_context = nullptr; - RefPtr<CUDAPipelineState> currentPipeline = nullptr; - RefPtr<CUDARootShaderObject> currentRootObject = nullptr; - public: + +public: + class CommandQueueImpl; + + class CommandBufferImpl + : public ICommandBuffer + , public CommandWriter + , public RefObject + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ICommandBuffer* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer) + return static_cast<ICommandBuffer*>(this); + return nullptr; + } + public: + virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( + IRenderPassLayout* renderPass, + IFramebuffer* framebuffer, + IRenderCommandEncoder** outEncoder) override + { + SLANG_UNUSED(renderPass); + SLANG_UNUSED(framebuffer); + *outEncoder = nullptr; + } + + class ComputeCommandEncoderImpl + : public IComputeCommandEncoder + { + public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override + { + if (uuid == GfxGUID::IID_ISlangUnknown || + uuid == GfxGUID::IID_IComputeCommandEncoder) + { + *outObject = static_cast<IComputeCommandEncoder*>(this); + return SLANG_OK; + } + *outObject = nullptr; + return SLANG_E_NO_INTERFACE; + } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } + + public: + CommandWriter* m_writer; + + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {} + void init(CommandBufferImpl* cmdBuffer) + { + m_writer = cmdBuffer; + } + + virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override + { + m_writer->setPipelineState(state); + } + virtual SLANG_NO_THROW void SLANG_MCALL + bindRootShaderObject(IShaderObject* object) override + { + m_writer->bindRootShaderObject(PipelineType::Compute, object); + } + + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) override + { + m_writer->setDescriptorSet(PipelineType::Compute, layout, index, descriptorSet); + } + + virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override + { + m_writer->dispatchCompute(x, y, z); + } + }; + + ComputeCommandEncoderImpl m_computeCommandEncoder; + virtual SLANG_NO_THROW void SLANG_MCALL + encodeComputeCommands(IComputeCommandEncoder** outEncoder) override + { + m_computeCommandEncoder.init(this); + *outEncoder = &m_computeCommandEncoder; + } + + class ResourceCommandEncoderImpl + : public IResourceCommandEncoder + { + public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override + { + if (uuid == GfxGUID::IID_ISlangUnknown || + uuid == GfxGUID::IID_IResourceCommandEncoder) + { + *outObject = static_cast<IResourceCommandEncoder*>(this); + return SLANG_OK; + } + *outObject = nullptr; + return SLANG_E_NO_INTERFACE; + } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } + + public: + CommandWriter* m_writer; + + void init(CommandBufferImpl* cmdBuffer) + { + m_writer = cmdBuffer; + } + + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {} + virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) override + { + m_writer->copyBuffer(dst, dstOffset, src, srcOffset, size); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data) + { + m_writer->uploadBufferData(dst, offset, size, data); + } + }; + + ResourceCommandEncoderImpl m_resourceCommandEncoder; + + virtual SLANG_NO_THROW void SLANG_MCALL + encodeResourceCommands(IResourceCommandEncoder** outEncoder) override + { + m_resourceCommandEncoder.init(this); + *outEncoder = &m_resourceCommandEncoder; + } + + virtual SLANG_NO_THROW void SLANG_MCALL close() override {} + }; + + class CommandQueueImpl + : public ICommandQueue + , public RefObject + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ICommandQueue* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue) + return static_cast<ICommandQueue*>(this); + return nullptr; + } + + public: + RefPtr<CUDAPipelineState> currentPipeline; + RefPtr<CUDARootShaderObject> currentRootObject; + RefPtr<CUDARenderer> renderer; + CUstream stream; + Desc m_desc; + public: + void init(CUDARenderer* inRenderer) + { + renderer = inRenderer; + m_desc.type = ICommandQueue::QueueType::Graphics; + cuStreamCreate(&stream, 0); + } + ~CommandQueueImpl() + { + cuStreamSynchronize(stream); + cuStreamDestroy(stream); + currentPipeline = nullptr; + currentRootObject = nullptr; + } + + public: + virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override + { + return m_desc; + } + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandBuffer(ICommandBuffer** outCommandBuffer) override + { + RefPtr<CommandBufferImpl> result = new CommandBufferImpl(); + *outCommandBuffer = result.detach(); + return SLANG_OK; + } + + virtual SLANG_NO_THROW void SLANG_MCALL + executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override + { + for (uint32_t i = 0; i < count; i++) + { + execute(static_cast<CommandBufferImpl*>(commandBuffers[i])); + } + } + + virtual SLANG_NO_THROW void SLANG_MCALL wait() override + { + cuStreamSynchronize(stream); + } + + public: + void setPipelineState(IPipelineState* state) + { + currentPipeline = dynamic_cast<CUDAPipelineState*>(state); + } + + Result bindRootShaderObject(PipelineType pipelineType, IShaderObject* object) + { + currentRootObject = dynamic_cast<CUDARootShaderObject*>(object); + if (currentRootObject) + return SLANG_OK; + return SLANG_E_INVALID_ARG; + } + + void dispatchCompute(int x, int y, int z) + { + // Specialize the compute kernel based on the shader object bindings. + RefPtr<PipelineStateBase> newPipeline; + renderer->maybeSpecializePipeline(currentPipeline, currentRootObject, newPipeline); + currentPipeline = static_cast<CUDAPipelineState*>(newPipeline.Ptr()); + + // Find out thread group size from program reflection. + auto& kernelName = currentPipeline->shaderProgram->kernelName; + auto programLayout = static_cast<CUDAProgramLayout*>(currentRootObject->getLayout()); + int kernelId = programLayout->getKernelIndex(kernelName.getUnownedSlice()); + SLANG_ASSERT(kernelId != -1); + UInt threadGroupSize[3]; + programLayout->getKernelThreadGroupSize(kernelId, threadGroupSize); + + int sharedSizeInBytes; + cuFuncGetAttribute( + &sharedSizeInBytes, + CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, + currentPipeline->shaderProgram->cudaKernel); + + // Copy global parameter data to the `SLANG_globalParams` symbol. + { + CUdeviceptr globalParamsSymbol = 0; + size_t globalParamsSymbolSize = 0; + cuModuleGetGlobal( + &globalParamsSymbol, + &globalParamsSymbolSize, + currentPipeline->shaderProgram->cudaModule, + "SLANG_globalParams"); + + CUdeviceptr globalParamsCUDAData = + currentRootObject->bufferResource + ? (CUdeviceptr)currentRootObject->bufferResource->getBindlessHandle() + : 0; + cudaMemcpyAsync( + (void*)globalParamsSymbol, + (void*)globalParamsCUDAData, + globalParamsSymbolSize, + cudaMemcpyDeviceToDevice, + 0); + } + // + // The argument data for the entry-point parameters are already + // stored in host memory in a CUDAEntryPointShaderObject, as expected by cuLaunchKernel. + // + auto entryPointBuffer = currentRootObject->entryPointObjects[kernelId]->getBuffer(); + auto entryPointDataSize = + currentRootObject->entryPointObjects[kernelId]->getBufferSize(); + + void* extraOptions[] = { + CU_LAUNCH_PARAM_BUFFER_POINTER, + entryPointBuffer, + CU_LAUNCH_PARAM_BUFFER_SIZE, + &entryPointDataSize, + CU_LAUNCH_PARAM_END, + }; + + // Once we have all the decessary data extracted and/or + // set up, we can launch the kernel and see what happens. + // + auto cudaLaunchResult = cuLaunchKernel( + currentPipeline->shaderProgram->cudaKernel, + x, + y, + z, + int(threadGroupSize[0]), + int(threadGroupSize[1]), + int(threadGroupSize[2]), + sharedSizeInBytes, + stream, + nullptr, + extraOptions); + + SLANG_ASSERT(cudaLaunchResult == CUDA_SUCCESS); + } + + void copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) + { + auto dstImpl = static_cast<MemoryCUDAResource*>(dst); + auto srcImpl = static_cast<MemoryCUDAResource*>(src); + cudaMemcpy( + (uint8_t*)dstImpl->m_cudaMemory + dstOffset, + (uint8_t*)srcImpl->m_cudaMemory + srcOffset, + size, + cudaMemcpyDefault); + } + + void uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data) + { + auto dstImpl = static_cast<MemoryCUDAResource*>(dst); + cudaMemcpy((uint8_t*)dstImpl->m_cudaMemory + offset, data, size, cudaMemcpyDefault); + } + + void execute(CommandBufferImpl* commandBuffer) + { + for (auto& cmd : commandBuffer->m_commands) + { + switch (cmd.name) + { + case CommandName::SetPipelineState: + setPipelineState(commandBuffer->getObject<IPipelineState>(cmd.operands[0])); + break; + case CommandName::BindRootShaderObject: + bindRootShaderObject( + (PipelineType)cmd.operands[0], + commandBuffer->getObject<IShaderObject>(cmd.operands[1])); + break; + case CommandName::DispatchCompute: + dispatchCompute( + int(cmd.operands[0]), int(cmd.operands[1]), int(cmd.operands[2])); + break; + case CommandName::CopyBuffer: + copyBuffer( + commandBuffer->getObject<IBufferResource>(cmd.operands[0]), + cmd.operands[1], + commandBuffer->getObject<IBufferResource>(cmd.operands[2]), + cmd.operands[3], + cmd.operands[4]); + break; + case CommandName::UploadBufferData: + uploadBufferData( + commandBuffer->getObject<IBufferResource>(cmd.operands[0]), + cmd.operands[1], + cmd.operands[2], + commandBuffer->getData<uint8_t>(cmd.operands[3])); + break; + } + } + } + }; + +public: ~CUDARenderer() { - currentPipeline = nullptr; - currentRootObject = nullptr; if (m_context) { cuCtxDestroy(m_context); @@ -1470,15 +1825,6 @@ private: } virtual SLANG_NO_THROW Result SLANG_MCALL - bindRootShaderObject(PipelineType pipelineType, IShaderObject* object) override - { - currentRootObject = dynamic_cast<CUDARootShaderObject*>(object); - if (currentRootObject) - return SLANG_OK; - return SLANG_E_INVALID_ARG; - } - - virtual SLANG_NO_THROW Result SLANG_MCALL createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) override { // If this is a specializable program, we just keep a reference to the slang program and @@ -1535,125 +1881,29 @@ private: return Result(); } - virtual SLANG_NO_THROW void* SLANG_MCALL map(IBufferResource* buffer, MapFlavor flavor) override + void* map(IBufferResource* buffer) { return dynamic_cast<MemoryCUDAResource*>(buffer)->m_cudaMemory; } - virtual SLANG_NO_THROW void SLANG_MCALL unmap(IBufferResource* buffer) override + void unmap(IBufferResource* buffer) { SLANG_UNUSED(buffer); } - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override - { - currentPipeline = dynamic_cast<CUDAPipelineState*>(state); - } - - virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override - { - // Specialize the compute kernel based on the shader object bindings. - maybeSpecializePipeline(currentRootObject); - - // Find out thread group size from program reflection. - auto& kernelName = currentPipeline->shaderProgram->kernelName; - auto programLayout = static_cast<CUDAProgramLayout*>(currentRootObject->getLayout()); - int kernelId = programLayout->getKernelIndex(kernelName.getUnownedSlice()); - SLANG_ASSERT(kernelId != -1); - UInt threadGroupSize[3]; - programLayout->getKernelThreadGroupSize(kernelId, threadGroupSize); - - int sharedSizeInBytes; - cuFuncGetAttribute( - &sharedSizeInBytes, - CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, - currentPipeline->shaderProgram->cudaKernel); - - // Copy global parameter data to the `SLANG_globalParams` symbol. - { - CUdeviceptr globalParamsSymbol = 0; - size_t globalParamsSymbolSize = 0; - cuModuleGetGlobal( - &globalParamsSymbol, - &globalParamsSymbolSize, - currentPipeline->shaderProgram->cudaModule, - "SLANG_globalParams"); - - CUdeviceptr globalParamsCUDAData = - currentRootObject->bufferResource - ? (CUdeviceptr)currentRootObject->bufferResource->getBindlessHandle() - : 0; - cudaMemcpyAsync( - (void*)globalParamsSymbol, - (void*)globalParamsCUDAData, - globalParamsSymbolSize, - cudaMemcpyDeviceToDevice, - 0); - } - // - // The argument data for the entry-point parameters are already - // stored in host memory in a CUDAEntryPointShaderObject, as expected by cuLaunchKernel. - // - auto entryPointBuffer = currentRootObject->entryPointObjects[kernelId]->getBuffer(); - auto entryPointDataSize = currentRootObject->entryPointObjects[kernelId]->getBufferSize(); - - void* extraOptions[] = { - CU_LAUNCH_PARAM_BUFFER_POINTER, - entryPointBuffer, - CU_LAUNCH_PARAM_BUFFER_SIZE, - &entryPointDataSize, - CU_LAUNCH_PARAM_END, - }; - - // Once we have all the decessary data extracted and/or - // set up, we can launch the kernel and see what happens. - // - auto cudaLaunchResult = cuLaunchKernel( - currentPipeline->shaderProgram->cudaKernel, - x, - y, - z, - int(threadGroupSize[0]), - int(threadGroupSize[1]), - int(threadGroupSize[2]), - sharedSizeInBytes, - 0, - nullptr, - extraOptions); - - SLANG_ASSERT(cudaLaunchResult == CUDA_SUCCESS); - } - - virtual SLANG_NO_THROW void SLANG_MCALL submitGpuWork() override {} - - virtual SLANG_NO_THROW void SLANG_MCALL waitForGpu() override - { - auto result = cudaDeviceSynchronize(); - SLANG_ASSERT(result == CUDA_SUCCESS); - } - virtual SLANG_NO_THROW RendererType SLANG_MCALL getRendererType() const override { return RendererType::CUDA; } - virtual PipelineStateBase* getCurrentPipeline() override - { - return currentPipeline; - } - public: - virtual SLANG_NO_THROW void SLANG_MCALL setClearColor(const float color[4]) override - { - SLANG_UNUSED(color); - } - virtual SLANG_NO_THROW void SLANG_MCALL clearFrame() override {} - virtual SLANG_NO_THROW void SLANG_MCALL beginFrame() override {} - virtual SLANG_NO_THROW void SLANG_MCALL endFrame() override {} - virtual SLANG_NO_THROW void SLANG_MCALL - makeSwapchainImagePresentable(ISwapchain* swapchain) override + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override { - SLANG_UNUSED(swapchain); + RefPtr<CommandQueueImpl> queue = new CommandQueueImpl(); + queue->init(this); + *outQueue = queue.detach(); + return SLANG_OK; } virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override @@ -1677,9 +1927,13 @@ public: SLANG_UNUSED(outFramebuffer); return SLANG_FAIL; } - virtual SLANG_NO_THROW void SLANG_MCALL setFramebuffer(IFramebuffer* frameBuffer) override + virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( + const IRenderPassLayout::Desc& desc, + IRenderPassLayout** outRenderPassLayout) override { - SLANG_UNUSED(frameBuffer); + SLANG_UNUSED(desc); + SLANG_UNUSED(outRenderPassLayout); + return SLANG_FAIL; } virtual SLANG_NO_THROW Result SLANG_MCALL createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) override @@ -1699,6 +1953,7 @@ public: SLANG_UNUSED(outLayout); return SLANG_E_NOT_AVAILABLE; } + virtual SLANG_NO_THROW Result SLANG_MCALL createDescriptorSetLayout( const IDescriptorSetLayout::Desc& desc, IDescriptorSetLayout** outLayout) override { @@ -1706,6 +1961,7 @@ public: SLANG_UNUSED(outLayout); return SLANG_E_NOT_AVAILABLE; } + virtual SLANG_NO_THROW Result SLANG_MCALL createPipelineLayout(const IPipelineLayout::Desc& desc, IPipelineLayout** outLayout) override { @@ -1713,6 +1969,7 @@ public: SLANG_UNUSED(outLayout); return SLANG_E_NOT_AVAILABLE; } + virtual SLANG_NO_THROW Result SLANG_MCALL createDescriptorSet(IDescriptorSetLayout* layout, IDescriptorSet::Flag::Enum flags, IDescriptorSet** outDescriptorSet) override { @@ -1721,6 +1978,7 @@ public: SLANG_UNUSED(outDescriptorSet); return SLANG_E_NOT_AVAILABLE; } + virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState( const GraphicsPipelineStateDesc& desc, IPipelineState** outState) override { @@ -1728,8 +1986,13 @@ public: SLANG_UNUSED(outState); return SLANG_E_NOT_AVAILABLE; } + virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( - ITextureResource* texture, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) override + ITextureResource* texture, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) override { SLANG_UNUSED(texture); SLANG_UNUSED(outBlob); @@ -1738,65 +2001,23 @@ public: return SLANG_E_NOT_AVAILABLE; } - virtual SLANG_NO_THROW void SLANG_MCALL - setPrimitiveTopology(PrimitiveTopology topology) override - { - SLANG_UNUSED(topology); - } - virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( - PipelineType pipelineType, - IPipelineLayout* layout, - UInt index, - IDescriptorSet* descriptorSet) override - { - SLANG_UNUSED(pipelineType); - SLANG_UNUSED(layout); - SLANG_UNUSED(index); - SLANG_UNUSED(descriptorSet); - } - virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( - UInt startSlot, - UInt slotCount, - IBufferResource* const* buffers, - const UInt* strides, - const UInt* offsets) override - { - SLANG_UNUSED(startSlot); - SLANG_UNUSED(slotCount); - SLANG_UNUSED(buffers); - SLANG_UNUSED(strides); - SLANG_UNUSED(offsets); - } - virtual SLANG_NO_THROW void SLANG_MCALL - setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset = 0) override - { - SLANG_UNUSED(buffer); - SLANG_UNUSED(indexFormat); - SLANG_UNUSED(offset); - } - virtual SLANG_NO_THROW void SLANG_MCALL - setViewports(UInt count, Viewport const* viewports) override - { - SLANG_UNUSED(count); - SLANG_UNUSED(viewports); - } - virtual SLANG_NO_THROW void SLANG_MCALL - setScissorRects(UInt count, ScissorRect const* rects) override - { - SLANG_UNUSED(count); - SLANG_UNUSED(rects); - } - virtual SLANG_NO_THROW void SLANG_MCALL draw(UInt vertexCount, UInt startVertex) override - { - SLANG_UNUSED(vertexCount); - SLANG_UNUSED(startVertex); - } - virtual SLANG_NO_THROW void SLANG_MCALL - drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) override + + virtual SLANG_NO_THROW Result SLANG_MCALL readBufferResource( + IBufferResource* buffer, + size_t offset, + size_t size, + ISlangBlob** outBlob) override { - SLANG_UNUSED(indexCount); - SLANG_UNUSED(startIndex); - SLANG_UNUSED(baseVertex); + auto bufferImpl = static_cast<MemoryCUDAResource*>(buffer); + RefPtr<ListBlob> blob = new ListBlob(); + blob->m_data.setCount((Index)size); + cudaMemcpy( + blob->m_data.getBuffer(), + (uint8_t*)bufferImpl->m_cudaMemory + offset, + size, + cudaMemcpyDefault); + *outBlob = blob.detach(); + return SLANG_OK; } }; diff --git a/tools/gfx/d3d/d3d-util.cpp b/tools/gfx/d3d/d3d-util.cpp index cb96c6211..a9686ab7d 100644 --- a/tools/gfx/d3d/d3d-util.cpp +++ b/tools/gfx/d3d/d3d-util.cpp @@ -1,8 +1,8 @@ // d3d-util.cpp #include "d3d-util.h" +#include <d3d12.h> #include <d3dcompiler.h> - #include <dxgi1_4.h> // We will use the C standard library just for printing error messages. @@ -26,6 +26,84 @@ using namespace Slang; return D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; } +D3D12_PRIMITIVE_TOPOLOGY_TYPE D3DUtil::getPrimitiveType(PrimitiveType type) +{ + switch (type) + { + case PrimitiveType::Point: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + case PrimitiveType::Line: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + case PrimitiveType::Triangle: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + case PrimitiveType::Patch: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH; + default: + break; + } + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; +} + +D3D12_COMPARISON_FUNC D3DUtil::getComparisonFunc(ComparisonFunc func) +{ + switch (func) + { + case gfx::ComparisonFunc::Never: + return D3D12_COMPARISON_FUNC_NEVER; + case gfx::ComparisonFunc::Less: + return D3D12_COMPARISON_FUNC_LESS; + case gfx::ComparisonFunc::Equal: + return D3D12_COMPARISON_FUNC_EQUAL; + case gfx::ComparisonFunc::LessEqual: + return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case gfx::ComparisonFunc::Greater: + return D3D12_COMPARISON_FUNC_GREATER; + case gfx::ComparisonFunc::NotEqual: + return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case gfx::ComparisonFunc::GreaterEqual: + return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case gfx::ComparisonFunc::Always: + return D3D12_COMPARISON_FUNC_ALWAYS; + default: + return D3D12_COMPARISON_FUNC_NEVER; + } +} + +static D3D12_STENCIL_OP translateStencilOp(StencilOp op) +{ + switch (op) + { + case gfx::StencilOp::Keep: + return D3D12_STENCIL_OP_KEEP; + case gfx::StencilOp::Zero: + return D3D12_STENCIL_OP_ZERO; + case gfx::StencilOp::Replace: + return D3D12_STENCIL_OP_REPLACE; + case gfx::StencilOp::IncrementSaturate: + return D3D12_STENCIL_OP_INCR_SAT; + case gfx::StencilOp::DecrementSaturate: + return D3D12_STENCIL_OP_DECR_SAT; + case gfx::StencilOp::Invert: + return D3D12_STENCIL_OP_INVERT; + case gfx::StencilOp::IncrementWrap: + return D3D12_STENCIL_OP_INCR; + case gfx::StencilOp::DecrementWrap: + return D3D12_STENCIL_OP_DECR; + default: + return D3D12_STENCIL_OP_KEEP; + } +} + +D3D12_DEPTH_STENCILOP_DESC D3DUtil::translateStencilOpDesc(DepthStencilOpDesc desc) +{ + D3D12_DEPTH_STENCILOP_DESC rs; + rs.StencilDepthFailOp = translateStencilOp(desc.stencilDepthFailOp); + rs.StencilFailOp = translateStencilOp(desc.stencilFailOp); + rs.StencilFunc = getComparisonFunc(desc.stencilFunc); + rs.StencilPassOp = translateStencilOp(desc.stencilPassOp); + return rs; +} + /* static */DXGI_FORMAT D3DUtil::getMapFormat(Format format) { switch (format) @@ -47,6 +125,40 @@ using namespace Slang; } } +D3D12_RESOURCE_STATES D3DUtil::translateResourceState(ResourceState state) +{ + switch (state) + { + case gfx::ResourceState::Undefined: + return D3D12_RESOURCE_STATE_COMMON; + case gfx::ResourceState::ShaderResource: + return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + case gfx::ResourceState::UnorderedAccess: + return D3D12_RESOURCE_STATE_UNORDERED_ACCESS | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + case gfx::ResourceState::RenderTarget: + return D3D12_RESOURCE_STATE_RENDER_TARGET; + case gfx::ResourceState::DepthRead: + return D3D12_RESOURCE_STATE_DEPTH_READ; + case gfx::ResourceState::DepthWrite: + return D3D12_RESOURCE_STATE_DEPTH_WRITE; + case gfx::ResourceState::Present: + return D3D12_RESOURCE_STATE_PRESENT; + case gfx::ResourceState::CopySource: + return D3D12_RESOURCE_STATE_COPY_SOURCE; + case gfx::ResourceState::CopyDestination: + return D3D12_RESOURCE_STATE_COPY_DEST; + case gfx::ResourceState::ResolveSource: + return D3D12_RESOURCE_STATE_RESOLVE_SOURCE; + case gfx::ResourceState::ResolveDestination: + return D3D12_RESOURCE_STATE_RESOLVE_DEST; + default: + return D3D12_RESOURCE_STATE_COMMON; + } +} + + /* static */DXGI_FORMAT D3DUtil::calcResourceFormat(UsageType usage, Int usageFlags, DXGI_FORMAT format) { SLANG_UNUSED(usage); diff --git a/tools/gfx/d3d/d3d-util.h b/tools/gfx/d3d/d3d-util.h index 63a897206..4cbdcb61b 100644 --- a/tools/gfx/d3d/d3d-util.h +++ b/tools/gfx/d3d/d3d-util.h @@ -15,6 +15,7 @@ #include <D3Dcommon.h> #include <DXGIFormat.h> #include <dxgi.h> +#include <d3d12.h> namespace gfx { @@ -38,6 +39,12 @@ class D3DUtil /// Get primitive topology as D3D primitive topology static D3D_PRIMITIVE_TOPOLOGY getPrimitiveTopology(PrimitiveTopology prim); + static D3D12_PRIMITIVE_TOPOLOGY_TYPE getPrimitiveType(PrimitiveType type); + + static D3D12_COMPARISON_FUNC getComparisonFunc(ComparisonFunc func); + + static D3D12_DEPTH_STENCILOP_DESC translateStencilOpDesc(DepthStencilOpDesc desc); + /// Calculate size taking into account alignment. Alignment must be a power of 2 static UInt calcAligned(UInt size, UInt alignment) { return (size + alignment - 1) & ~(alignment - 1); } @@ -47,6 +54,8 @@ class D3DUtil /// Given a slang pixel format returns the equivalent DXGI_ pixel format. If the format is not known, will return DXGI_FORMAT_UNKNOWN static DXGI_FORMAT getMapFormat(Format format); + static D3D12_RESOURCE_STATES translateResourceState(ResourceState state); + /// Given the usage, flags, and format will return the most suitable format. Will return DXGI_UNKNOWN if combination is not possible static DXGI_FORMAT calcFormat(UsageType usage, DXGI_FORMAT format); /// Calculate appropriate format for creating a buffer for usage and flags diff --git a/tools/gfx/d3d11/render-d3d11.cpp b/tools/gfx/d3d11/render-d3d11.cpp index c64b1c3bd..cf743fd72 100644 --- a/tools/gfx/d3d11/render-d3d11.cpp +++ b/tools/gfx/d3d11/render-d3d11.cpp @@ -6,8 +6,7 @@ #include "core/slang-blob.h" //WORKING: #include "options.h" -#include "../renderer-shared.h" -#include "../render-graphics-common.h" +#include "../immediate-renderer-base.h" #include "../d3d/d3d-util.h" #include "../nvapi/nvapi-util.h" @@ -51,7 +50,7 @@ using namespace Slang; namespace gfx { -class D3D11Renderer : public GraphicsAPIRenderer +class D3D11Renderer : public ImmediateRendererBase { public: enum @@ -64,15 +63,8 @@ public: // Renderer implementation virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; - virtual SLANG_NO_THROW void SLANG_MCALL setClearColor(const float color[4]) override; - virtual SLANG_NO_THROW void SLANG_MCALL clearFrame() override; - virtual SLANG_NO_THROW void SLANG_MCALL beginFrame() override; - virtual SLANG_NO_THROW void SLANG_MCALL endFrame() override; virtual SLANG_NO_THROW void SLANG_MCALL - makeSwapchainImagePresentable(ISwapchain* swapchain) override - { - SLANG_UNUSED(swapchain); - } + clearFrame(uint32_t colorBufferMask, bool clearDepth, bool clearStencil) override; virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override; virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout( @@ -80,6 +72,7 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffer** outFramebuffer) override; virtual SLANG_NO_THROW void SLANG_MCALL setFramebuffer(IFramebuffer* frameBuffer) override; + virtual SLANG_NO_THROW void SLANG_MCALL setStencilReference(uint32_t referenceValue) override; virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource( IResource::Usage initialUsage, @@ -120,11 +113,17 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState( const ComputePipelineStateDesc& desc, IPipelineState** outState) override; + virtual void* map(IBufferResource* buffer, MapFlavor flavor) override; + virtual void unmap(IBufferResource* buffer) override; + virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) override; virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( - ITextureResource* texture, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) override; + ITextureResource* texture, ResourceState state, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) override; - virtual SLANG_NO_THROW void* SLANG_MCALL map(IBufferResource* buffer, MapFlavor flavor) override; - virtual SLANG_NO_THROW void SLANG_MCALL unmap(IBufferResource* buffer) override; virtual SLANG_NO_THROW void SLANG_MCALL setPrimitiveTopology(PrimitiveTopology topology) override; @@ -157,11 +156,8 @@ public: { return RendererType::DirectX11; } - virtual PipelineStateBase* getCurrentPipeline() override - { - return m_currentPipelineState; - } - protected: + +protected: class ScopeNVAPI { @@ -436,12 +432,14 @@ public: { public: ComPtr<ID3D11DepthStencilView> m_dsv; + DepthStencilClearValue m_clearValue; }; class RenderTargetViewImpl : public ResourceViewImpl { public: ComPtr<ID3D11RenderTargetView> m_rtv; + float m_clearValue[4]; }; class FramebufferLayoutImpl @@ -630,8 +628,6 @@ public: class PipelineStateImpl : public PipelineStateBase { public: - RefPtr<ShaderProgramImpl> m_program; - RefPtr<PipelineLayoutImpl> m_pipelineLayout; }; @@ -645,7 +641,6 @@ public: ComPtr<ID3D11RasterizerState> m_rasterizerState; ComPtr<ID3D11BlendState> m_blendState; - UINT m_stencilRef; float m_blendColor[4]; UINT m_sampleMask; @@ -688,6 +683,9 @@ public: bool m_framebufferBindingDirty = true; bool m_shaderBindingDirty = true; + uint32_t m_stencilRef = 0; + bool m_depthStencilStateDirty = true; + Desc m_desc; float m_clearColor[4] = { 0, 0, 0, 0 }; @@ -910,30 +908,34 @@ SlangResult D3D11Renderer::initialize(const Desc& desc) return SLANG_OK; } -void D3D11Renderer::setClearColor(const float color[4]) -{ - memcpy(m_clearColor, color, sizeof(m_clearColor)); -} - -void D3D11Renderer::clearFrame() +void D3D11Renderer::clearFrame(uint32_t colorBufferMask, bool clearDepth, bool clearStencil) { + uint32_t mask = 1; for (auto rtv : m_currentFramebuffer->renderTargetViews) - m_immediateContext->ClearRenderTargetView(rtv->m_rtv, m_clearColor); + { + if (colorBufferMask & mask) + m_immediateContext->ClearRenderTargetView(rtv->m_rtv, rtv->m_clearValue); + mask <<= 1; + } if (m_currentFramebuffer->depthStencilView) { - m_immediateContext->ClearDepthStencilView( - m_currentFramebuffer->depthStencilView->m_dsv, - D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, - 1.0f, - 0); + UINT clearFlags = 0; + if (clearDepth) + clearFlags = D3D11_CLEAR_DEPTH; + if (clearStencil) + clearFlags |= D3D11_CLEAR_STENCIL; + if (clearFlags) + { + m_immediateContext->ClearDepthStencilView( + m_currentFramebuffer->depthStencilView->m_dsv, + clearFlags, + m_currentFramebuffer->depthStencilView->m_clearValue.depth, + m_currentFramebuffer->depthStencilView->m_clearValue.stencil); + } } } -void D3D11Renderer::beginFrame() { } - -void D3D11Renderer::endFrame() {} - Result D3D11Renderer::createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) { @@ -989,9 +991,21 @@ void D3D11Renderer::setFramebuffer(IFramebuffer* frameBuffer) m_currentFramebuffer = static_cast<FramebufferImpl*>(frameBuffer); } +void D3D11Renderer::setStencilReference(uint32_t referenceValue) +{ + m_stencilRef = referenceValue; + m_depthStencilStateDirty = true; +} + SlangResult D3D11Renderer::readTextureResource( - ITextureResource* resource, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) + ITextureResource* resource, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) { + SLANG_UNUSED(state); + auto texture = static_cast<TextureResourceImpl*>(resource); // Don't bother supporting MSAA for right now if (texture->getDesc()->sampleDesc.numSamples > 1) @@ -1458,6 +1472,10 @@ Result D3D11Renderer::createTextureView(ITextureResource* texture, IResourceView RefPtr<RenderTargetViewImpl> viewImpl = new RenderTargetViewImpl(); viewImpl->m_type = ResourceViewImpl::Type::RTV; viewImpl->m_rtv = rtv; + memcpy( + viewImpl->m_clearValue, + &resourceImpl->getDesc()->optimalClearValue.color, + sizeof(float) * 4); *outView = viewImpl.detach(); return SLANG_OK; } @@ -1471,6 +1489,7 @@ Result D3D11Renderer::createTextureView(ITextureResource* texture, IResourceView RefPtr<DepthStencilViewImpl> viewImpl = new DepthStencilViewImpl(); viewImpl->m_type = ResourceViewImpl::Type::DSV; viewImpl->m_dsv = dsv; + viewImpl->m_clearValue = resourceImpl->getDesc()->optimalClearValue.depthStencil; *outView = viewImpl.detach(); return SLANG_OK; } @@ -1806,7 +1825,7 @@ void D3D11Renderer::setPipelineState(IPipelineState* state) case PipelineType::Graphics: { auto stateImpl = (GraphicsPipelineStateImpl*) state; - auto programImpl = stateImpl->m_program; + auto programImpl = static_cast<ShaderProgramImpl*>(stateImpl->m_program.get()); // TODO: We could conceivably do some lightweight state // differencing here (e.g., check if `programImpl` is the @@ -1840,16 +1859,17 @@ void D3D11Renderer::setPipelineState(IPipelineState* state) // OM m_immediateContext->OMSetBlendState(stateImpl->m_blendState, stateImpl->m_blendColor, stateImpl->m_sampleMask); - m_immediateContext->OMSetDepthStencilState(stateImpl->m_depthStencilState, stateImpl->m_stencilRef); m_currentPipelineState = stateImpl; + + m_depthStencilStateDirty = true; } break; case PipelineType::Compute: { auto stateImpl = (ComputePipelineStateImpl*) state; - auto programImpl = stateImpl->m_program; + auto programImpl = static_cast<ShaderProgramImpl*>(stateImpl->m_program.get()); // CS @@ -2176,12 +2196,9 @@ Result D3D11Renderer::createGraphicsPipelineState(const GraphicsPipelineStateDes } RefPtr<GraphicsPipelineStateImpl> state = new GraphicsPipelineStateImpl(); - state->m_program = programImpl; - state->m_stencilRef = desc.depthStencil.stencilRef; state->m_depthStencilState = depthStencilState; state->m_rasterizerState = rasterizerState; state->m_blendState = blendState; - state->m_pipelineLayout = static_cast<PipelineLayoutImpl*>(desc.pipelineLayout); state->m_inputLayout = static_cast<InputLayoutImpl*>(desc.inputLayout); state->m_rtvCount = (UINT) static_cast<FramebufferLayoutImpl*>(desc.framebufferLayout) ->m_renderTargets.getCount(); @@ -2200,17 +2217,29 @@ Result D3D11Renderer::createComputePipelineState(const ComputePipelineStateDesc& ComputePipelineStateDesc desc = inDesc; preparePipelineDesc(desc); - auto programImpl = (ShaderProgramImpl*) desc.program; - auto pipelineLayoutImpl = (PipelineLayoutImpl*) desc.pipelineLayout; - RefPtr<ComputePipelineStateImpl> state = new ComputePipelineStateImpl(); - state->m_program = programImpl; - state->m_pipelineLayout = pipelineLayoutImpl; state->init(desc); *outState = state.detach(); return SLANG_OK; } +void D3D11Renderer::copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) +{ + auto dstImpl = static_cast<BufferResourceImpl*>(dst); + auto srcImpl = static_cast<BufferResourceImpl*>(src); + D3D11_BOX srcBox = {}; + srcBox.left = (UINT)srcOffset; + srcBox.right = (UINT)(srcOffset + size); + srcBox.bottom = srcBox.back = 1; + m_immediateContext->CopySubresourceRegion( + dstImpl->m_buffer, 0, (UINT)dstOffset, 0, 0, srcImpl->m_buffer, 0, &srcBox); +} + void D3D11Renderer::dispatchCompute(int x, int y, int z) { _flushComputeState(); @@ -2414,9 +2443,10 @@ void D3D11Renderer::_flushGraphicsState() m_shaderBindingDirty = false; auto pipelineState = static_cast<GraphicsPipelineStateImpl*>(m_currentPipelineState.get()); - + auto pipelineLayout = + static_cast<PipelineLayoutImpl*>(pipelineState->m_pipelineLayout.get()); auto rtvCount = (UINT)m_currentFramebuffer->renderTargetViews.getCount(); - auto uavCount = pipelineState->m_pipelineLayout->m_uavCount; + auto uavCount = pipelineLayout->m_uavCount; m_immediateContext->OMSetRenderTargetsAndUnorderedAccessViews( rtvCount, m_currentFramebuffer->d3dRenderTargetViews.getArrayView().getBuffer(), @@ -2426,6 +2456,13 @@ void D3D11Renderer::_flushGraphicsState() m_uavBindings[pipelineType][0].readRef(), nullptr); } + if (m_depthStencilStateDirty) + { + m_depthStencilStateDirty = false; + auto pipelineState = static_cast<GraphicsPipelineStateImpl*>(m_currentPipelineState.get()); + m_immediateContext->OMSetDepthStencilState( + pipelineState->m_depthStencilState, m_stencilRef); + } } void D3D11Renderer::_flushComputeState() @@ -2436,8 +2473,10 @@ void D3D11Renderer::_flushComputeState() m_shaderBindingDirty = false; auto pipelineState = static_cast<ComputePipelineStateImpl*>(m_currentPipelineState.get()); + auto pipelineLayout = + static_cast<PipelineLayoutImpl*>(pipelineState->m_pipelineLayout.get()); - auto uavCount = pipelineState->m_pipelineLayout->m_uavCount; + auto uavCount = pipelineLayout->m_uavCount; m_immediateContext->CSSetUnorderedAccessViews( 0, @@ -2660,4 +2699,5 @@ void D3D11Renderer::setDescriptorSet(PipelineType pipelineType, IPipelineLayout* } } -} // renderer_test +} + diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index 4e80ff47a..374a78cdd 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -6,6 +6,7 @@ //WORKING:#include "options.h" #include "../renderer-shared.h" #include "../render-graphics-common.h" +#include "../simple-render-pass-layout.h" #include "core/slang-blob.h" #include "core/slang-basic.h" @@ -62,17 +63,15 @@ struct ID3D12GraphicsCommandList1 {}; namespace gfx { using namespace Slang; +static D3D12_RESOURCE_STATES _calcResourceState(IResource::Usage usage); + class D3D12Renderer : public GraphicsAPIRenderer { public: // Renderer implementation virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; - virtual SLANG_NO_THROW void SLANG_MCALL setClearColor(const float color[4]) override; - virtual SLANG_NO_THROW void SLANG_MCALL clearFrame() override; - virtual SLANG_NO_THROW void SLANG_MCALL beginFrame() override; - virtual SLANG_NO_THROW void SLANG_MCALL endFrame() override; - virtual SLANG_NO_THROW void SLANG_MCALL - makeSwapchainImagePresentable(ISwapchain* swapchain) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override; virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, @@ -104,6 +103,10 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout(IFramebufferLayout::Desc const& desc, IFramebufferLayout** outLayout) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( + const IRenderPassLayout::Desc& desc, + IRenderPassLayout** outRenderPassLayout) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createInputLayout( const InputElementDesc* inputElements, UInt inputElementCount, @@ -126,49 +129,23 @@ public: const ComputePipelineStateDesc& desc, IPipelineState** outState) override; virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( - ITextureResource* resource, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) override; - - virtual SLANG_NO_THROW void* SLANG_MCALL - map(IBufferResource* buffer, MapFlavor flavor) override; - virtual SLANG_NO_THROW void SLANG_MCALL unmap(IBufferResource* buffer) override; - // virtual void setInputLayout(InputLayout* inputLayout) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setPrimitiveTopology(PrimitiveTopology topology) override; - - virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( - PipelineType pipelineType, - IPipelineLayout* layout, - UInt index, - IDescriptorSet* descriptorSet) override; - - virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( - UInt startSlot, - UInt slotCount, - IBufferResource* const* buffers, - const UInt* strides, - const UInt* offsets) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setViewports(UInt count, Viewport const* viewports) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setScissorRects(UInt count, ScissorRect const* rects) override; - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override; - virtual SLANG_NO_THROW void SLANG_MCALL setFramebuffer(IFramebuffer* frameBuffer) override; - virtual SLANG_NO_THROW void SLANG_MCALL draw(UInt vertexCount, UInt startVertex) override; - virtual SLANG_NO_THROW void SLANG_MCALL - drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) override; - virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override; - virtual SLANG_NO_THROW void SLANG_MCALL submitGpuWork() override; - virtual SLANG_NO_THROW void SLANG_MCALL waitForGpu() override; + ITextureResource* resource, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) override; + + virtual SLANG_NO_THROW SlangResult SLANG_MCALL readBufferResource( + IBufferResource* resource, + size_t offset, + size_t size, + ISlangBlob** outBlob) override; + virtual SLANG_NO_THROW RendererType SLANG_MCALL getRendererType() const override { return RendererType::DirectX12; } - virtual PipelineStateBase* getCurrentPipeline() override - { - return m_currentPipelineState; - } + ~D3D12Renderer(); protected: @@ -244,77 +221,23 @@ protected: public: typedef BufferResource Parent; - enum class BackingStyle - { - Unknown, - ResourceBacked, ///< The contents is only held within the resource - MemoryBacked, ///< The current contents is held in m_memory and copied to GPU every time it's used (typically used for constant buffers) - }; - void bindConstantBufferView(D3D12CircularResourceHeap& circularHeap, int index, Submitter* submitter) const { - switch (m_backingStyle) - { - case BackingStyle::MemoryBacked: - { - const size_t bufferSize = m_memory.getCount(); - D3D12CircularResourceHeap::Cursor cursor = circularHeap.allocateConstantBuffer(bufferSize); - ::memcpy(cursor.m_position, m_memory.getBuffer(), bufferSize); - // Set the constant buffer - submitter->setRootConstantBufferView(index, circularHeap.getGpuHandle(cursor)); - break; - } - case BackingStyle::ResourceBacked: - { - // Set the constant buffer - submitter->setRootConstantBufferView(index, m_resource.getResource()->GetGPUVirtualAddress()); - break; - } - default: break; - } + // Set the constant buffer + submitter->setRootConstantBufferView(index, m_resource.getResource()->GetGPUVirtualAddress()); } BufferResourceImpl(IResource::Usage initialUsage, const Desc& desc): - Parent(desc), - m_mapFlavor(MapFlavor::HostRead), - m_initialUsage(initialUsage) + Parent(desc), m_initialUsage(initialUsage) + , m_defaultState(_calcResourceState(initialUsage)) { } - static BackingStyle _calcResourceBackingStyle(Usage usage) - { - // Note: the D3D12 back-end has support for "versioning" of constant buffers, - // where the same logical `BufferResource` can actually point to different - // backing storage over its lifetime, to emulate the ability to modify the - // buffer contents as in D3D11, etc. - // - // The VK back-end doesn't have the same behavior, and it is difficult - // to both support this degree of flexibility *and* efficeintly exploit - // descriptor tables (since any table referencing the buffer would need - // to be updated when a new buffer "version" gets allocated). - // - // I'm choosing to disable this for now, and make all buffers be memory-backed, - // although this creates synchronization issues that we'll have to address - // next. - - return BackingStyle::ResourceBacked; -#if 0 - switch (usage) - { - case Usage::ConstantBuffer: return BackingStyle::MemoryBacked; - default: return BackingStyle::ResourceBacked; - } -#endif - } - - BackingStyle m_backingStyle; ///< How the resource is 'backed' - either as a resource or cpu memory. Cpu memory is typically used for constant buffers. D3D12Resource m_resource; ///< The resource typically in gpu memory D3D12Resource m_uploadResource; ///< If the resource can be written to, and is in gpu memory (ie not Memory backed), will have upload resource Usage m_initialUsage; - - List<uint8_t> m_memory; ///< Cpu memory buffer, used if the m_backingStyle is MemoryBacked - MapFlavor m_mapFlavor; ///< If the resource is mapped holds the current mapping flavor + D3D12_RESOURCE_STATES m_defaultState; }; class TextureResourceImpl: public TextureResource @@ -325,9 +248,11 @@ protected: TextureResourceImpl(const Desc& desc): Parent(desc) { + m_defaultState = _calcResourceState(desc.initialUsage); } D3D12Resource m_resource; + D3D12_RESOURCE_STATES m_defaultState; }; class SamplerStateImpl : public ISamplerState, public RefObject @@ -400,151 +325,24 @@ protected: ShortList<ComPtr<IResourceView>> renderTargetViews; ComPtr<IResourceView> depthStencilView; ShortList<D3D12_CPU_DESCRIPTOR_HANDLE> renderTargetDescriptors; + struct Color4f + { + float values[4]; + }; + ShortList<Color4f> renderTargetClearValues; D3D12_CPU_DESCRIPTOR_HANDLE depthStencilDescriptor; + DepthStencilClearValue depthStencilClearValue; }; - class SwapchainImpl - : public ISwapchain - , public RefObject + class RenderPassLayoutImpl : public SimpleRenderPassLayout { public: - SLANG_REF_OBJECT_IUNKNOWN_ALL - ISwapchain* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ISwapchain) - return static_cast<ISwapchain*>(this); - return nullptr; - } - - public: - Result init(D3D12Renderer* renderer, const ISwapchain::Desc& desc, WindowHandle window) - { - // Return fail on non-supported platforms. - switch (window.type) - { - case WindowHandle::Type::Win32Handle: - break; - default: - return SLANG_FAIL; - } - - m_renderer = renderer; - m_desc = desc; - - // Describe the swap chain. - DXGI_SWAP_CHAIN_DESC swapChainDesc = {}; - swapChainDesc.BufferCount = desc.imageCount; - swapChainDesc.BufferDesc.Width = desc.width; - swapChainDesc.BufferDesc.Height = desc.height; - swapChainDesc.BufferDesc.Format = D3DUtil::getMapFormat(desc.format); - swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; - swapChainDesc.OutputWindow = (HWND)window.handleValues[0]; - swapChainDesc.SampleDesc.Count = 1; - swapChainDesc.Windowed = TRUE; - - if (!desc.enableVSync) - { - swapChainDesc.Flags |= DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; - } - - // Swap chain needs the queue so that it can force a flush on it. - ComPtr<IDXGISwapChain> swapChain; - SLANG_RETURN_ON_FAIL(m_renderer->m_deviceInfo.m_dxgiFactory->CreateSwapChain( - m_renderer->m_commandQueue, &swapChainDesc, swapChain.writeRef())); - SLANG_RETURN_ON_FAIL(swapChain->QueryInterface(m_swapChain.writeRef())); - - if (!desc.enableVSync) - { - m_swapChainWaitableObject = m_swapChain->GetFrameLatencyWaitableObject(); - - int maxLatency = desc.imageCount - 2; - - // Make sure the maximum latency is in the range required by dx12 runtime - maxLatency = (maxLatency < 1) ? 1 : maxLatency; - maxLatency = (maxLatency > DXGI_MAX_SWAP_CHAIN_BUFFERS) - ? DXGI_MAX_SWAP_CHAIN_BUFFERS - : maxLatency; - - m_swapChain->SetMaximumFrameLatency(maxLatency); - } - - // This sample does not support fullscreen transitions. - SLANG_RETURN_ON_FAIL(m_renderer->m_deviceInfo.m_dxgiFactory->MakeWindowAssociation( - (HWND)window.handleValues[0], DXGI_MWA_NO_ALT_ENTER)); - - m_renderTargetIndex = m_swapChain->GetCurrentBackBufferIndex(); - - for (uint32_t i = 0; i < desc.imageCount; i++) - { - ComPtr<ID3D12Resource> d3dResource; - m_swapChain->GetBuffer(i, IID_PPV_ARGS(d3dResource.writeRef())); - ITextureResource::Desc imageDesc = {}; - imageDesc.init2D( - IResource::Type::Texture2D, desc.format, desc.width, desc.height, 0); - RefPtr<TextureResourceImpl> image = new TextureResourceImpl(imageDesc); - image->m_resource.setResource(d3dResource.get(), D3D12_RESOURCE_STATE_COMMON); - m_images.add(image); - } - return SLANG_OK; - } - virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override { return m_desc; } - virtual SLANG_NO_THROW Result getImage(uint32_t index, ITextureResource** outResource) override + RefPtr<FramebufferLayoutImpl> m_framebufferLayout; + void init(const IRenderPassLayout::Desc& desc) { - m_images[index]->addRef(); - *outResource = m_images[index].Ptr(); - return SLANG_OK; + SimpleRenderPassLayout::init(desc); + m_framebufferLayout = static_cast<FramebufferLayoutImpl*>(desc.framebufferLayout); } - void makeBackbufferPresentable() - { - D3D12BarrierSubmitter submitter(m_renderer->m_commandList); - m_images[m_renderTargetIndex]->m_resource.transition( - D3D12_RESOURCE_STATE_PRESENT, submitter); - } - virtual SLANG_NO_THROW Result present() override - { - if (m_swapChainWaitableObject) - { - // check if now is good time to present - // This doesn't wait - because the wait time is 0. If it returns WAIT_TIMEOUT it - // means that no frame is waiting to be be displayed so there is no point doing a - // present. - const bool shouldPresent = - (WaitForSingleObjectEx(m_swapChainWaitableObject, 0, TRUE) != WAIT_TIMEOUT); - if (shouldPresent) - { - m_swapChain->Present(0, 0); - } - } - else - { - if (SLANG_FAILED(m_swapChain->Present(1, 0))) - { - return SLANG_FAIL; - } - } - // Update the render target index. - m_renderTargetIndex = m_swapChain->GetCurrentBackBufferIndex(); - return SLANG_OK; - } - - virtual SLANG_NO_THROW uint32_t acquireNextImage() override - { - // `IRenderer::beginFrame()` must be called before `acquireNextImage`. - SLANG_RELEASE_ASSERT(m_renderer->m_commandListOpenCount == 1); - - D3D12BarrierSubmitter submitter(m_renderer->m_commandList); - m_images[m_renderTargetIndex]->m_resource.transition( - D3D12_RESOURCE_STATE_RENDER_TARGET, submitter); - return m_renderTargetIndex; - } - public: - D3D12Renderer* m_renderer = nullptr; - ISwapchain::Desc m_desc; - HANDLE m_swapChainWaitableObject = nullptr; - ComPtr<IDXGISwapChain3> m_swapChain; - uint32_t m_renderTargetIndex; - ShortList<RefPtr<TextureResourceImpl>> m_images; }; class InputLayoutImpl: public IInputLayout, public RefObject @@ -731,10 +529,10 @@ protected: ~DescriptorSetImpl() { - if (m_resourceObjects.getCount()) - m_resourceHeap->free((int)m_resourceTable, (int)m_resourceObjects.getCount()); - if (m_samplerObjects.getCount()) - m_samplerHeap->free((int)m_samplerTable, (int)m_samplerObjects.getCount()); + if (m_layout->m_resourceCount) + m_resourceHeap->free((int)m_resourceTable, (int)m_layout->m_resourceCount); + if (m_layout->m_samplerCount) + m_samplerHeap->free((int)m_samplerTable, (int)m_layout->m_samplerCount); } }; @@ -754,7 +552,6 @@ protected: class PipelineStateImpl : public PipelineStateBase { public: - RefPtr<PipelineLayoutImpl> m_pipelineLayout; ComPtr<ID3D12PipelineState> m_pipelineState; void init(const GraphicsPipelineStateDesc& inDesc) { @@ -841,84 +638,1100 @@ protected: ID3D12GraphicsCommandList* m_commandList; }; - static PROC loadProc(HMODULE module, char const* name); - Result createFrameResources(); - /// Blocks until gpu has completed all work - void releaseFrameResources(); + static Result _uploadBufferData( + ID3D12GraphicsCommandList* cmdList, + BufferResourceImpl* buffer, + size_t offset, + size_t size, + void* data) + { + D3D12_RANGE readRange = {}; + readRange.Begin = offset; + readRange.End = offset + size; + + void* uploadData; + SLANG_RETURN_ON_FAIL(buffer->m_uploadResource.getResource()->Map( + 0, &readRange, reinterpret_cast<void**>(&uploadData))); + memcpy(uploadData, data, size); + buffer->m_uploadResource.getResource()->Unmap(0, &readRange); + { + D3D12BarrierSubmitter submitter(cmdList); + submitter.transition( + buffer->m_resource, buffer->m_defaultState, D3D12_RESOURCE_STATE_COPY_DEST); + } + cmdList->CopyBufferRegion( + buffer->m_resource.getResource(), + offset, + buffer->m_uploadResource.getResource(), + offset, + size); + { + D3D12BarrierSubmitter submitter(cmdList); + submitter.transition( + buffer->m_resource, D3D12_RESOURCE_STATE_COPY_DEST, buffer->m_defaultState); + } + return SLANG_OK; + } + + // Use a circular buffer of execution frames to manage in-flight GPU command buffers. + // Each call to `executeCommandLists` advances the frame by 1. + // If we run out of avaialble frames, wait for the earliest submitted frame to finish. + struct ExecutionFrameResources + { + ComPtr<ID3D12CommandAllocator> m_commandAllocator; + List<ComPtr<ID3D12GraphicsCommandList>> m_commandListPool; + uint32_t m_commandListAllocId = 0; + HANDLE fenceEvent; - Result createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, size_t srcDataSize, D3D12Resource& uploadResource, D3D12_RESOURCE_STATES finalState, D3D12Resource& resourceOut); + // During command submission, we need all the descriptor tables that get + // used to come from a single heap (for each descriptor heap type). + // + // We will thus keep a single heap of each type that we hope will hold + // all the descriptors that actually get needed in a frame. + // + // TODO: we need an allocation policy to reallocate and resize these + // if/when we run out of space during a frame. + D3D12DescriptorHeap m_viewHeap; // Cbv, Srv, Uav + D3D12DescriptorHeap m_samplerHeap; // Heap for samplers - void submitGpuWorkAndWait(); - void _resetCommandList(); + ~ExecutionFrameResources() { CloseHandle(fenceEvent); } + Result init(ID3D12Device* device, uint32_t viewHeapSize, uint32_t samplerHeapSize) + { + SLANG_RETURN_ON_FAIL(device->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef()))); + fenceEvent = CreateEventEx( + nullptr, + false, + CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, + EVENT_ALL_ACCESS); + SLANG_RETURN_ON_FAIL(m_viewHeap.init( + device, + viewHeapSize, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); + SLANG_RETURN_ON_FAIL(m_samplerHeap.init( + device, + samplerHeapSize, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); + return SLANG_OK; + } + void reset() + { + WaitForSingleObject(fenceEvent, INFINITE); + m_viewHeap.deallocateAll(); + m_samplerHeap.deallocateAll(); + m_commandListAllocId = 0; + m_commandAllocator->Reset(); + for (auto cmdBuffer : m_commandListPool) + cmdBuffer->Reset(m_commandAllocator, nullptr); + } + ComPtr<ID3D12GraphicsCommandList> createCommandList(ID3D12Device* device) + { + if (m_commandListAllocId == m_commandListPool.getCount()) + { + ComPtr<ID3D12GraphicsCommandList> cmdList; + device->CreateCommandList( + 0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + m_commandAllocator, + nullptr, + IID_PPV_ARGS(cmdList.writeRef())); + m_commandListPool.add(cmdList); + } + assert((Index)m_commandListAllocId < m_commandListPool.getCount()); + auto& result = m_commandListPool[m_commandListAllocId]; + ++m_commandListAllocId; + return result; + } + }; - Result captureTextureToSurface( - D3D12Resource& resource, - ISlangBlob** blob, - size_t* outRowPitch, - size_t* outPixelSize); + class CommandBufferImpl + : public ICommandBuffer + , public RefObject + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ICommandBuffer* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer) + return static_cast<ICommandBuffer*>(this); + return nullptr; + } + public: + ComPtr<ID3D12GraphicsCommandList> m_cmdList; + ExecutionFrameResources* m_frame; + D3D12Renderer* m_renderer; + void init(D3D12Renderer* renderer, ExecutionFrameResources* frame) + { + m_frame = frame; + m_renderer = renderer; + m_cmdList = m_frame->createCommandList(renderer->m_device); + } + class PipelineCommandEncoder : public GraphicsComputeCommandEncoderBase + { + public: + bool m_isOpen = false; + CommandBufferImpl* m_commandBuffer; + ExecutionFrameResources* m_frame; + ID3D12Device* m_device; + ID3D12GraphicsCommandList* m_d3dCmdList; + ID3D12GraphicsCommandList* m_preCmdList = nullptr; + + ID3D12PipelineState* m_boundPipelines[3] = {}; + RefPtr<DescriptorSetImpl> m_boundDescriptorSets[int(PipelineType::CountOf)] + [kMaxDescriptorSetCount]; + static int getBindPointIndex(PipelineType type) + { + switch (type) + { + case PipelineType::Graphics: + return 0; + case PipelineType::Compute: + return 1; + case PipelineType::RayTracing: + return 2; + default: + assert(!"unknown pipeline type."); + return -1; + } + } - FrameInfo& getFrame() { return m_frameInfos[m_frameIndex]; } - const FrameInfo& getFrame() const { return m_frameInfos[m_frameIndex]; } + RefPtr<PipelineLayoutImpl> m_currentPipelineLayout; - ID3D12GraphicsCommandList* getCommandList() const { return m_commandList; } + void init(CommandBufferImpl* commandBuffer) + { + m_commandBuffer = commandBuffer; + m_rendererBase = static_cast<RendererBase*>(commandBuffer->m_renderer); + m_d3dCmdList = m_commandBuffer->m_cmdList; + } - Result _bindRenderState(PipelineStateImpl* pipelineStateImpl, ID3D12GraphicsCommandList* commandList, Submitter* submitter); + void endEncodingImpl() + { + m_isOpen = false; + for (int i = 0; i < int(PipelineType::CountOf); i++) + { + for (auto& descSet : m_boundDescriptorSets[i]) + { + descSet = nullptr; + } + } + } - Result _createDevice(DeviceCheckFlags deviceCheckFlags, const UnownedStringSlice& nameMatch, D3D_FEATURE_LEVEL featureLevel, DeviceInfo& outDeviceInfo); - - int m_commandListOpenCount = 0; ///< If >0 the command list should be open + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSetImpl( + PipelineType pipelineType, + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) override + { + // In D3D12, unlike Vulkan, binding a root signature invalidates *all* descriptor + // table + // bindings (rather than preserving those that are part of the longest common prefix + // between the old and new layout). + // + // In order to accomodate having descriptor-set bindings that persist across changes + // in pipeline state (which may also change pipeline layout), we will shadow the + // descriptor-set bindings and only flush them on-demand at draw tiume once the + // final pipline layout is known. + // - List<BoundVertexBuffer> m_boundVertexBuffers; + auto descriptorSetImpl = (DescriptorSetImpl*)descriptorSet; + m_boundDescriptorSets[int(pipelineType)][index] = descriptorSetImpl; + } - RefPtr<BufferResourceImpl> m_boundIndexBuffer; - DXGI_FORMAT m_boundIndexFormat; - UINT m_boundIndexOffset; + virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferDataImpl( + IBufferResource* buffer, + size_t offset, + size_t size, + void* data) override + { + _uploadBufferData( + m_commandBuffer->m_cmdList, + static_cast<BufferResourceImpl*>(buffer), + offset, + size, + data); + } - RefPtr<PipelineStateImpl> m_currentPipelineState; + void setPipelineStateImpl(IPipelineState* state) + { + m_currentPipeline = static_cast<PipelineStateImpl*>(state); + } - RefPtr<DescriptorSetImpl> m_boundDescriptorSets[int(PipelineType::CountOf)][kMaxDescriptorSetCount]; + Result _bindRenderState( + PipelineStateImpl* pipelineStateImpl, + Submitter* submitter); + }; - Desc m_desc; + class RenderCommandEncoderImpl + : public IRenderCommandEncoder + , public PipelineCommandEncoder + { + public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override + { + if (uuid == GfxGUID::IID_ISlangUnknown || + uuid == GfxGUID::IID_IRenderCommandEncoder) + { + *outObject = static_cast<IRenderCommandEncoder*>(this); + return SLANG_OK; + } + *outObject = nullptr; + return SLANG_E_NO_INTERFACE; + } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } + public: + RefPtr<RenderPassLayoutImpl> m_renderPass; + RefPtr<FramebufferImpl> m_framebuffer; - bool m_isInitialized = false; + List<BoundVertexBuffer> m_boundVertexBuffers; - D3D12_PRIMITIVE_TOPOLOGY_TYPE m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - D3D12_PRIMITIVE_TOPOLOGY m_primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + RefPtr<BufferResourceImpl> m_boundIndexBuffer; - float m_clearColor[4] = { 0, 0, 0, 0 }; + D3D12_VIEWPORT m_viewports[kMaxRTVCount]; + D3D12_RECT m_scissorRects[kMaxRTVCount]; - D3D12_VIEWPORT m_viewports[kMaxRTVCount] = {}; + DXGI_FORMAT m_boundIndexFormat; + UINT m_boundIndexOffset; - ComPtr<ID3D12Debug> m_dxDebug; + D3D12_PRIMITIVE_TOPOLOGY_TYPE m_primitiveTopologyType; + D3D12_PRIMITIVE_TOPOLOGY m_primitiveTopology; - DeviceInfo m_deviceInfo; - ID3D12Device* m_device = nullptr; + void init( + D3D12Renderer* renderer, + ExecutionFrameResources* frame, + CommandBufferImpl* cmdBuffer, + RenderPassLayoutImpl* renderPass, + FramebufferImpl* framebuffer) + { + m_commandBuffer = cmdBuffer; + m_d3dCmdList = cmdBuffer->m_cmdList; + m_preCmdList = nullptr; + m_device = renderer->m_device; + m_rendererBase = renderer; + m_renderPass = renderPass; + m_framebuffer = framebuffer; + m_frame = frame; + m_boundVertexBuffers.clear(); + m_boundIndexBuffer = nullptr; + m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + m_primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + m_boundIndexFormat = DXGI_FORMAT_UNKNOWN; + m_boundIndexOffset = 0; + for (auto& boundPipeline : m_boundPipelines) + boundPipeline = nullptr; + + // Set render target states. + m_d3dCmdList->OMSetRenderTargets( + (UINT)framebuffer->renderTargetViews.getCount(), + framebuffer->renderTargetDescriptors.getArrayView().getBuffer(), + FALSE, + framebuffer->depthStencilView ? &framebuffer->depthStencilDescriptor : nullptr); + + // Issue clear commands based on render pass set up. + for (Index i = 0; i < renderPass->m_renderTargetAccesses.getCount(); i++) + { + auto& access = renderPass->m_renderTargetAccesses[i]; - ComPtr<ID3D12CommandQueue> m_commandQueue; - ComPtr<ID3D12GraphicsCommandList> m_commandList; + // Transit resource states. + { + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = + static_cast<ResourceViewImpl*>(framebuffer->renderTargetViews[i].get()); + auto textureResource = + static_cast<TextureResourceImpl*>(resourceViewImpl->m_resource.Ptr()); + D3D12_RESOURCE_STATES initialState; + if (access.initialState == ResourceState::Undefined) + { + initialState = textureResource->m_defaultState; + } + else + { + initialState = D3DUtil::translateResourceState(access.initialState); + } + textureResource->m_resource.transition( + initialState, + D3D12_RESOURCE_STATE_RENDER_TARGET, + submitter); + } + // Clear. + if (access.loadOp == IRenderPassLayout::AttachmentLoadOp::Clear) + { + m_d3dCmdList->ClearRenderTargetView( + framebuffer->renderTargetDescriptors[i], + framebuffer->renderTargetClearValues[i].values, + 0, + nullptr); + } + } + + if (renderPass->m_hasDepthStencil) + { + // Transit resource states. + { + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = + static_cast<ResourceViewImpl*>(framebuffer->depthStencilView.get()); + auto textureResource = + static_cast<TextureResourceImpl*>(resourceViewImpl->m_resource.Ptr()); + D3D12_RESOURCE_STATES initialState; + if (renderPass->m_depthStencilAccess.initialState == + ResourceState::Undefined) + { + initialState = textureResource->m_defaultState; + } + else + { + initialState = D3DUtil::translateResourceState( + renderPass->m_depthStencilAccess.initialState); + } + textureResource->m_resource.transition( + initialState, + D3D12_RESOURCE_STATE_DEPTH_WRITE, + submitter); + } + // Clear. + uint32_t clearFlags = 0; + if (renderPass->m_depthStencilAccess.loadOp == + IRenderPassLayout::AttachmentLoadOp::Clear) + { + clearFlags |= D3D12_CLEAR_FLAG_DEPTH; + } + if (renderPass->m_depthStencilAccess.stencilLoadOp == + IRenderPassLayout::AttachmentLoadOp::Clear) + { + clearFlags |= D3D12_CLEAR_FLAG_STENCIL; + } + if (clearFlags) + { + m_d3dCmdList->ClearDepthStencilView( + framebuffer->depthStencilDescriptor, + (D3D12_CLEAR_FLAGS)clearFlags, + framebuffer->depthStencilClearValue.depth, + framebuffer->depthStencilClearValue.stencil, + 0, + nullptr); + } + } + } + + virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override + { + setPipelineStateImpl(state); + } + virtual SLANG_NO_THROW void SLANG_MCALL + bindRootShaderObject(IShaderObject* object) override + { + bindRootShaderObjectImpl(PipelineType::Graphics, object); + } + + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) override + { + setDescriptorSetImpl(PipelineType::Graphics, layout, index, descriptorSet); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setViewports(uint32_t count, const Viewport* viewports) override + { + static const int kMaxViewports = + D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + assert(count <= kMaxViewports && count <= kMaxRTVCount); + for (UInt ii = 0; ii < count; ++ii) + { + auto& inViewport = viewports[ii]; + auto& dxViewport = m_viewports[ii]; + + dxViewport.TopLeftX = inViewport.originX; + dxViewport.TopLeftY = inViewport.originY; + dxViewport.Width = inViewport.extentX; + dxViewport.Height = inViewport.extentY; + dxViewport.MinDepth = inViewport.minZ; + dxViewport.MaxDepth = inViewport.maxZ; + } + m_d3dCmdList->RSSetViewports(UINT(count), m_viewports); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setScissorRects(uint32_t count, const ScissorRect* rects) override + { + static const int kMaxScissorRects = + D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + assert(count <= kMaxScissorRects && count <= kMaxRTVCount); + + for (UInt ii = 0; ii < count; ++ii) + { + auto& inRect = rects[ii]; + auto& dxRect = m_scissorRects[ii]; + + dxRect.left = LONG(inRect.minX); + dxRect.top = LONG(inRect.minY); + dxRect.right = LONG(inRect.maxX); + dxRect.bottom = LONG(inRect.maxY); + } + + m_d3dCmdList->RSSetScissorRects(UINT(count), m_scissorRects); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setPrimitiveTopology(PrimitiveTopology topology) override + { + switch (topology) + { + case PrimitiveTopology::TriangleList: + { + m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + m_primitiveTopology = D3DUtil::getPrimitiveTopology(topology); + break; + } + default: + { + assert(!"Unhandled type"); + } + } + } + + virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( + UInt startSlot, + UInt slotCount, + IBufferResource* const* buffers, + const UInt* strides, + const UInt* offsets) override + { + { + const Index num = startSlot + slotCount; + if (num > m_boundVertexBuffers.getCount()) + { + m_boundVertexBuffers.setCount(num); + } + } + + for (UInt i = 0; i < slotCount; i++) + { + BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(buffers[i]); + if (buffer) + { + assert(buffer->m_initialUsage == IResource::Usage::VertexBuffer); + } + + BoundVertexBuffer& boundBuffer = m_boundVertexBuffers[startSlot + i]; + boundBuffer.m_buffer = buffer; + boundBuffer.m_stride = int(strides[i]); + boundBuffer.m_offset = int(offsets[i]); + } + } + + virtual SLANG_NO_THROW void SLANG_MCALL setIndexBuffer( + IBufferResource* buffer, + Format indexFormat, + UInt offset = 0) override + { + m_boundIndexBuffer = (BufferResourceImpl*)buffer; + m_boundIndexFormat = D3DUtil::getMapFormat(indexFormat); + m_boundIndexOffset = UINT(offset); + } + + void prepareDraw() + { + auto pipelineState = m_currentPipeline.Ptr(); + if (!pipelineState || (pipelineState->desc.type != PipelineType::Graphics)) + { + assert(!"No graphics pipeline state set"); + return; + } + + // Submit - setting for graphics + { + GraphicsSubmitter submitter(m_d3dCmdList); + _bindRenderState(static_cast<PipelineStateImpl*>(pipelineState), &submitter); + } + + m_d3dCmdList->IASetPrimitiveTopology(m_primitiveTopology); + + // Set up vertex buffer views + { + int numVertexViews = 0; + D3D12_VERTEX_BUFFER_VIEW vertexViews[16]; + for (Index i = 0; i < m_boundVertexBuffers.getCount(); i++) + { + const BoundVertexBuffer& boundVertexBuffer = m_boundVertexBuffers[i]; + BufferResourceImpl* buffer = boundVertexBuffer.m_buffer; + if (buffer) + { + D3D12_VERTEX_BUFFER_VIEW& vertexView = vertexViews[numVertexViews++]; + vertexView.BufferLocation = + buffer->m_resource.getResource()->GetGPUVirtualAddress() + + boundVertexBuffer.m_offset; + vertexView.SizeInBytes = + UINT(buffer->getDesc()->sizeInBytes - boundVertexBuffer.m_offset); + vertexView.StrideInBytes = UINT(boundVertexBuffer.m_stride); + } + } + m_d3dCmdList->IASetVertexBuffers(0, numVertexViews, vertexViews); + } + // Set up index buffer + if (m_boundIndexBuffer) + { + D3D12_INDEX_BUFFER_VIEW indexBufferView; + indexBufferView.BufferLocation = + m_boundIndexBuffer->m_resource.getResource()->GetGPUVirtualAddress() + + m_boundIndexOffset; + indexBufferView.SizeInBytes = + UINT(m_boundIndexBuffer->getDesc()->sizeInBytes - m_boundIndexOffset); + indexBufferView.Format = m_boundIndexFormat; + + m_d3dCmdList->IASetIndexBuffer(&indexBufferView); + } + } + virtual SLANG_NO_THROW void SLANG_MCALL + draw(UInt vertexCount, UInt startVertex = 0) override + { + prepareDraw(); + m_d3dCmdList->DrawInstanced(UINT(vertexCount), 1, UINT(startVertex), 0); + } + virtual SLANG_NO_THROW void SLANG_MCALL + drawIndexed(UInt indexCount, UInt startIndex = 0, UInt baseVertex = 0) override + { + prepareDraw(); + m_d3dCmdList->DrawIndexedInstanced( + (UINT)indexCount, 1, (UINT)startIndex, (UINT)baseVertex, 0); + } + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override + { + PipelineCommandEncoder::endEncodingImpl(); + // Issue clear commands based on render pass set up. + for (Index i = 0; i < m_renderPass->m_renderTargetAccesses.getCount(); i++) + { + auto& access = m_renderPass->m_renderTargetAccesses[i]; + + // Transit resource states. + { + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = static_cast<ResourceViewImpl*>( + m_framebuffer->renderTargetViews[i].get()); + auto textureResource = + static_cast<TextureResourceImpl*>(resourceViewImpl->m_resource.Ptr()); + textureResource->m_resource.transition( + D3D12_RESOURCE_STATE_RENDER_TARGET, + D3DUtil::translateResourceState(access.finalState), + submitter); + } + } + + if (m_renderPass->m_hasDepthStencil) + { + // Transit resource states. + D3D12BarrierSubmitter submitter(m_d3dCmdList); + auto resourceViewImpl = + static_cast<ResourceViewImpl*>(m_framebuffer->depthStencilView.get()); + auto textureResource = + static_cast<TextureResourceImpl*>(resourceViewImpl->m_resource.Ptr()); + textureResource->m_resource.transition( + D3D12_RESOURCE_STATE_DEPTH_WRITE, + D3DUtil::translateResourceState( + m_renderPass->m_depthStencilAccess.finalState), + submitter); + } + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setStencilReference(uint32_t referenceValue) override + { + m_d3dCmdList->OMSetStencilRef((UINT)referenceValue); + } + }; + + RenderCommandEncoderImpl m_renderCommandEncoder; + virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( + IRenderPassLayout* renderPass, + IFramebuffer* framebuffer, + IRenderCommandEncoder** outEncoder) override + { + m_renderCommandEncoder.init( + m_renderer, + m_frame, + this, + static_cast<RenderPassLayoutImpl*>(renderPass), + static_cast<FramebufferImpl*>(framebuffer)); + *outEncoder = &m_renderCommandEncoder; + } + + class ComputeCommandEncoderImpl + : public IComputeCommandEncoder + , public PipelineCommandEncoder + { + public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override + { + if (uuid == GfxGUID::IID_ISlangUnknown || + uuid == GfxGUID::IID_IComputeCommandEncoder) + { + *outObject = static_cast<IComputeCommandEncoder*>(this); + return SLANG_OK; + } + *outObject = nullptr; + return SLANG_E_NO_INTERFACE; + } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } + + public: + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override + { + PipelineCommandEncoder::endEncodingImpl(); + } + void init( + D3D12Renderer* renderer, + ExecutionFrameResources* frame, + CommandBufferImpl* cmdBuffer) + { + m_rendererBase = renderer; + m_commandBuffer = cmdBuffer; + m_d3dCmdList = cmdBuffer->m_cmdList; + m_preCmdList = nullptr; + m_device = renderer->m_device; + m_frame = frame; + for (auto& boundPipeline : m_boundPipelines) + boundPipeline = nullptr; + } + + virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override + { + setPipelineStateImpl(state); + } + virtual SLANG_NO_THROW void SLANG_MCALL + bindRootShaderObject(IShaderObject* object) override + { + bindRootShaderObjectImpl(PipelineType::Compute, object); + } + + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) override + { + setDescriptorSetImpl(PipelineType::Compute, layout, index, descriptorSet); + } - D3D12_RECT m_scissorRects[kMaxRTVCount] = {}; + virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override + { + auto pipelineStateImpl = static_cast<PipelineStateImpl*>(m_currentPipeline.Ptr()); - UINT m_rtvDescriptorSize = 0; + // Submit binding for compute + { + ComputeSubmitter submitter(m_d3dCmdList); + _bindRenderState(pipelineStateImpl, &submitter); + } - UINT m_dsvDescriptorSize = 0; + m_d3dCmdList->Dispatch(x, y, z); + } + }; - // Synchronization objects. - D3D12CounterFence m_fence; + ComputeCommandEncoderImpl m_computeCommandEncoder; + virtual SLANG_NO_THROW void SLANG_MCALL + encodeComputeCommands(IComputeCommandEncoder** outEncoder) override + { + m_computeCommandEncoder.init(m_renderer, m_frame, this); + *outEncoder = &m_computeCommandEncoder; + } - HANDLE m_swapChainWaitableObject; + class ResourceCommandEncoderImpl : public IResourceCommandEncoder + { + public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override + { + if (uuid == GfxGUID::IID_ISlangUnknown || + uuid == GfxGUID::IID_IResourceCommandEncoder) + { + *outObject = static_cast<IResourceCommandEncoder*>(this); + return SLANG_OK; + } + *outObject = nullptr; + return SLANG_E_NO_INTERFACE; + } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } - // Frame specific data - int m_numRenderFrames = 0; - UINT m_frameIndex = 0; - FrameInfo m_frameInfos[kMaxNumRenderFrames]; + public: + CommandBufferImpl* m_commandBuffer; + void init(D3D12Renderer* renderer, CommandBufferImpl* commandBuffer) + { + m_commandBuffer = commandBuffer; + } + virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) override + { + SLANG_UNUSED(dst); + SLANG_UNUSED(srcOffset); + SLANG_UNUSED(src); + SLANG_UNUSED(dstOffset); + SLANG_UNUSED(size); + } + virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferData( + IBufferResource* dst, + size_t offset, + size_t size, + void* data) override + { + _uploadBufferData( + m_commandBuffer->m_cmdList, + static_cast<BufferResourceImpl*>(dst), + offset, + size, + data); + } + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() {} + }; + + ResourceCommandEncoderImpl m_resourceCommandEncoder; + + virtual SLANG_NO_THROW void SLANG_MCALL + encodeResourceCommands(IResourceCommandEncoder** outEncoder) override + { + m_resourceCommandEncoder.init(m_renderer, this); + *outEncoder = &m_resourceCommandEncoder; + } + + virtual SLANG_NO_THROW void SLANG_MCALL close() override { m_cmdList->Close(); } + }; + + class CommandQueueImpl + : public ICommandQueue + , public RefObject + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ICommandQueue* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue) + return static_cast<ICommandQueue*>(this); + return nullptr; + } + + public: + struct CommandBufferPool + { + List<RefPtr<CommandBufferImpl>> pool; + uint32_t allocIndex = 0; + RefPtr<CommandBufferImpl> allocCommandBuffer(D3D12Renderer* renderer, ExecutionFrameResources* frame) + { + if ((Index)allocIndex < pool.getCount()) + { + RefPtr<CommandBufferImpl> result = pool[allocIndex]; + result->init(renderer, frame); + allocIndex++; + return result; + } + RefPtr<CommandBufferImpl> cmdBuffer = new CommandBufferImpl(); + cmdBuffer->init(renderer, frame); + pool.add(cmdBuffer); + return cmdBuffer; + } + void reset() + { + allocIndex = 0; + } + }; + List<CommandBufferPool> m_commandBufferPools; + List<ExecutionFrameResources> m_frames; + uint32_t m_frameIndex = 0; + D3D12Renderer* m_renderer; + ComPtr<ID3D12Device> m_device; + ComPtr<ID3D12CommandQueue> m_d3dQueue; + ComPtr<ID3D12Fence> m_fence; + uint64_t m_fenceValue = 0; + HANDLE globalWaitHandle; + Desc m_desc; + Result init( + D3D12Renderer* renderer, + uint32_t frameCount, + uint32_t viewHeapSize, + uint32_t samplerHeapSize) + { + m_renderer = renderer; + m_device = renderer->m_device; + m_frames.setCount(frameCount); + m_commandBufferPools.setCount(frameCount); + for (uint32_t i = 0; i < frameCount; i++) + { + SLANG_RETURN_ON_FAIL(m_frames[i].init(m_device, viewHeapSize, samplerHeapSize)); + } + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + SLANG_RETURN_ON_FAIL(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(m_d3dQueue.writeRef()))); + SLANG_RETURN_ON_FAIL( + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_fence.writeRef()))); + globalWaitHandle = CreateEventEx( + nullptr, + nullptr, + CREATE_EVENT_INITIAL_SET | CREATE_EVENT_MANUAL_RESET, + EVENT_ALL_ACCESS); + return SLANG_OK; + } + ~CommandQueueImpl() + { + wait(); + CloseHandle(globalWaitHandle); + } + virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override + { + return m_desc; + } + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandBuffer(ICommandBuffer** outCommandBuffer) override + { + RefPtr<CommandBufferImpl> result = + m_commandBufferPools[m_frameIndex].allocCommandBuffer( + m_renderer, &m_frames[m_frameIndex]); + *outCommandBuffer = result.detach(); + return SLANG_OK; + } + + virtual SLANG_NO_THROW void SLANG_MCALL + executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override + { + ShortList<ID3D12CommandList*> commandLists; + for (uint32_t i = 0; i < count; i++) + { + auto cmdImpl = static_cast<CommandBufferImpl*>(commandBuffers[i]); + commandLists.add(cmdImpl->m_cmdList); + } + m_d3dQueue->ExecuteCommandLists((UINT)count, commandLists.getArrayView().getBuffer()); + + auto& frame = m_frames[m_frameIndex]; + m_fenceValue++; + m_d3dQueue->Signal(m_fence, m_fenceValue); + ResetEvent(frame.fenceEvent); + ResetEvent(globalWaitHandle); + m_fence->SetEventOnCompletion(m_fenceValue, frame.fenceEvent); + swapExecutionFrame(); + } + + void swapExecutionFrame() + { + m_frameIndex = (m_frameIndex + 1) % m_frames.getCount(); + auto& frame = m_frames[m_frameIndex]; + frame.reset(); + m_commandBufferPools[m_frameIndex].reset(); + } + + virtual SLANG_NO_THROW void SLANG_MCALL wait() override + { + m_fenceValue++; + m_d3dQueue->Signal(m_fence, m_fenceValue); + ResetEvent(globalWaitHandle); + m_fence->SetEventOnCompletion(m_fenceValue, globalWaitHandle); + WaitForSingleObject(globalWaitHandle, INFINITE); + } + }; + + class SwapchainImpl + : public ISwapchain + , public RefObject + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ISwapchain* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ISwapchain) + return static_cast<ISwapchain*>(this); + return nullptr; + } + + public: + Result init(D3D12Renderer* renderer, const ISwapchain::Desc& desc, WindowHandle window) + { + // Return fail on non-supported platforms. + switch (window.type) + { + case WindowHandle::Type::Win32Handle: + break; + default: + return SLANG_FAIL; + } + + m_renderer = renderer; + m_desc = desc; + m_queue = static_cast<CommandQueueImpl*>(desc.queue); + + // Describe the swap chain. + DXGI_SWAP_CHAIN_DESC swapChainDesc = {}; + swapChainDesc.BufferCount = desc.imageCount; + swapChainDesc.BufferDesc.Width = desc.width; + swapChainDesc.BufferDesc.Height = desc.height; + swapChainDesc.BufferDesc.Format = D3DUtil::getMapFormat(desc.format); + swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + swapChainDesc.OutputWindow = (HWND)window.handleValues[0]; + swapChainDesc.SampleDesc.Count = 1; + swapChainDesc.Windowed = TRUE; + + if (!desc.enableVSync) + { + swapChainDesc.Flags |= DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + } + + // Swap chain needs the queue so that it can force a flush on it. + ComPtr<IDXGISwapChain> swapChain; + SLANG_RETURN_ON_FAIL(m_renderer->m_deviceInfo.m_dxgiFactory->CreateSwapChain( + m_queue->m_d3dQueue, &swapChainDesc, swapChain.writeRef())); + SLANG_RETURN_ON_FAIL(swapChain->QueryInterface(m_swapChain.writeRef())); + + if (!desc.enableVSync) + { + m_swapChainWaitableObject = m_swapChain->GetFrameLatencyWaitableObject(); + + int maxLatency = desc.imageCount - 2; + + // Make sure the maximum latency is in the range required by dx12 runtime + maxLatency = (maxLatency < 1) ? 1 : maxLatency; + maxLatency = (maxLatency > DXGI_MAX_SWAP_CHAIN_BUFFERS) + ? DXGI_MAX_SWAP_CHAIN_BUFFERS + : maxLatency; + + m_swapChain->SetMaximumFrameLatency(maxLatency); + } + + // This sample does not support fullscreen transitions. + SLANG_RETURN_ON_FAIL(m_renderer->m_deviceInfo.m_dxgiFactory->MakeWindowAssociation( + (HWND)window.handleValues[0], DXGI_MWA_NO_ALT_ENTER)); + + m_renderTargetIndex = m_swapChain->GetCurrentBackBufferIndex(); + + for (uint32_t i = 0; i < desc.imageCount; i++) + { + ComPtr<ID3D12Resource> d3dResource; + m_swapChain->GetBuffer(i, IID_PPV_ARGS(d3dResource.writeRef())); + ITextureResource::Desc imageDesc = {}; + imageDesc.setDefaults(IResource::Usage::RenderTarget); + imageDesc.init2D( + IResource::Type::Texture2D, desc.format, desc.width, desc.height, 0); + RefPtr<TextureResourceImpl> image = new TextureResourceImpl(imageDesc); + image->m_resource.setResource(d3dResource.get()); + image->m_defaultState = D3D12_RESOURCE_STATE_PRESENT; + m_images.add(image); + } + return SLANG_OK; + } + virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override { return m_desc; } + virtual SLANG_NO_THROW Result + getImage(uint32_t index, ITextureResource** outResource) override + { + m_images[index]->addRef(); + *outResource = m_images[index].Ptr(); + return SLANG_OK; + } + virtual SLANG_NO_THROW Result present() override + { + if (m_swapChainWaitableObject) + { + // check if now is good time to present + // This doesn't wait - because the wait time is 0. If it returns WAIT_TIMEOUT it + // means that no frame is waiting to be be displayed so there is no point doing a + // present. + const bool shouldPresent = + (WaitForSingleObjectEx(m_swapChainWaitableObject, 0, TRUE) != WAIT_TIMEOUT); + if (shouldPresent) + { + m_swapChain->Present(0, 0); + } + } + else + { + if (SLANG_FAILED(m_swapChain->Present(1, 0))) + { + return SLANG_FAIL; + } + } + // Update the render target index. + m_renderTargetIndex = m_swapChain->GetCurrentBackBufferIndex(); + return SLANG_OK; + } + + virtual SLANG_NO_THROW uint32_t acquireNextImage() override + { + return m_renderTargetIndex; + } + + public: + D3D12Renderer* m_renderer = nullptr; + ISwapchain::Desc m_desc; + HANDLE m_swapChainWaitableObject = nullptr; + ComPtr<IDXGISwapChain3> m_swapChain; + RefPtr<CommandQueueImpl> m_queue; + uint32_t m_renderTargetIndex; + ShortList<RefPtr<TextureResourceImpl>> m_images; + }; + + static PROC loadProc(HMODULE module, char const* name); + + Result createCommandQueueImpl( + uint32_t frameCount, + uint32_t viewHeapSize, + uint32_t samplerHeapSize, + CommandQueueImpl** outQueue); + + Result createBuffer( + const D3D12_RESOURCE_DESC& resourceDesc, + const void* srcData, + size_t srcDataSize, + D3D12Resource& uploadResource, + D3D12_RESOURCE_STATES finalState, + D3D12Resource& resourceOut); + + Result captureTextureToSurface( + D3D12Resource& resource, + ResourceState state, + ISlangBlob** blob, + size_t* outRowPitch, + size_t* outPixelSize); + + Result _createDevice( + DeviceCheckFlags deviceCheckFlags, + const UnownedStringSlice& nameMatch, + D3D_FEATURE_LEVEL featureLevel, + DeviceInfo& outDeviceInfo); - int m_numRenderTargets = 2; - RefPtr<FramebufferImpl> m_frameBuffer; + struct ResourceCommandRecordInfo + { + ComPtr<ICommandBuffer> commandBuffer; + ID3D12GraphicsCommandList* d3dCommandList; + }; + ResourceCommandRecordInfo encodeResourceCommands() + { + ResourceCommandRecordInfo info; + m_resourceCommandQueue->createCommandBuffer(info.commandBuffer.writeRef()); + info.d3dCommandList = static_cast<CommandBufferImpl*>(info.commandBuffer.get())->m_cmdList; + return info; + } + void submitResourceCommandsAndWait(const ResourceCommandRecordInfo& info) + { + info.commandBuffer->close(); + m_resourceCommandQueue->executeCommandBuffer(info.commandBuffer); + m_resourceCommandQueue->wait(); + } + + Desc m_desc; + + bool m_isInitialized = false; + + ComPtr<ID3D12Debug> m_dxDebug; + + DeviceInfo m_deviceInfo; + ID3D12Device* m_device = nullptr; - int32_t m_depthStencilUsageFlags = 0; ///< D3DUtil::UsageFlag combination for depth stencil - int32_t m_targetUsageFlags = 0; ///< D3DUtil::UsageFlag combination for target + RefPtr<CommandQueueImpl> m_resourceCommandQueue; // Dll entry points PFN_D3D12_GET_DEBUG_INTERFACE m_D3D12GetDebugInterface = nullptr; @@ -928,6 +1741,109 @@ protected: bool m_nvapi = false; }; + +Result D3D12Renderer::CommandBufferImpl::PipelineCommandEncoder::_bindRenderState( + PipelineStateImpl* pipelineStateImpl, + Submitter* submitter) +{ + auto commandList = m_commandBuffer->m_cmdList; + // TODO: we should only set some of this state as needed... + + auto pipelineTypeIndex = (int)pipelineStateImpl->desc.type; + auto pipelineLayout = static_cast<PipelineLayoutImpl*>(pipelineStateImpl->m_pipelineLayout.get()); + + submitter->setRootSignature(pipelineLayout->m_rootSignature); + commandList->SetPipelineState(pipelineStateImpl->m_pipelineState); + + ID3D12DescriptorHeap* heaps[] = { + m_frame->m_viewHeap.getHeap(), + m_frame->m_samplerHeap.getHeap(), + }; + commandList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); + + // We need to copy descriptors over from the descriptor sets + // (where they are stored in CPU-visible heaps) to the GPU-visible + // heaps so that they can be accessed by shader code. + + Int descriptorSetCount = pipelineLayout->m_descriptorSetCount; + Int rootParameterIndex = 0; + for (Int dd = 0; dd < descriptorSetCount; ++dd) + { + auto descriptorSet = m_boundDescriptorSets[pipelineTypeIndex][dd]; + auto descriptorSetLayout = descriptorSet->m_layout; + + // TODO: require that `descriptorSetLayout` is compatible with + // `pipelineLayout->descriptorSetlayouts[dd]`. + + { + if (auto descriptorCount = descriptorSetLayout->m_resourceCount) + { + auto& gpuHeap = m_frame->m_viewHeap; + auto gpuDescriptorTable = gpuHeap.allocate(int(descriptorCount)); + + auto& cpuHeap = *descriptorSet->m_resourceHeap; + auto cpuDescriptorTable = descriptorSet->m_resourceTable; + + m_device->CopyDescriptorsSimple( + UINT(descriptorCount), + gpuHeap.getCpuHandle(gpuDescriptorTable), + cpuHeap.getCpuHandle(int(cpuDescriptorTable)), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + submitter->setRootDescriptorTable( + int(rootParameterIndex++), gpuHeap.getGpuHandle(gpuDescriptorTable)); + } + } + { + if (auto descriptorCount = descriptorSetLayout->m_samplerCount) + { + auto& gpuHeap = m_frame->m_samplerHeap; + auto gpuDescriptorTable = gpuHeap.allocate(int(descriptorCount)); + + auto& cpuHeap = *descriptorSet->m_samplerHeap; + auto cpuDescriptorTable = descriptorSet->m_samplerTable; + + m_device->CopyDescriptorsSimple( + UINT(descriptorCount), + gpuHeap.getCpuHandle(gpuDescriptorTable), + cpuHeap.getCpuHandle(int(cpuDescriptorTable)), + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + + submitter->setRootDescriptorTable( + int(rootParameterIndex++), gpuHeap.getGpuHandle(gpuDescriptorTable)); + } + } + if (auto rootConstantRangeCount = descriptorSetLayout->m_rootConstantRanges.getCount()) + { + auto srcData = descriptorSet->m_rootConstantData.getBuffer(); + + for (auto& rootConstantRangeInfo : descriptorSetLayout->m_rootConstantRanges) + { + auto countOf32bitValues = rootConstantRangeInfo.size / sizeof(uint32_t); + submitter->setRootConstants( + rootConstantRangeInfo.rootParamIndex, + 0, + countOf32bitValues, + srcData + rootConstantRangeInfo.offset); + } + } + } + + return SLANG_OK; +} + +Result D3D12Renderer::createCommandQueueImpl( + uint32_t frameCount, + uint32_t viewHeapSize, + uint32_t samplerHeapSize, + D3D12Renderer::CommandQueueImpl** outQueue) +{ + RefPtr<D3D12Renderer::CommandQueueImpl> queue = new D3D12Renderer::CommandQueueImpl(); + SLANG_RETURN_ON_FAIL(queue->init(this, frameCount, viewHeapSize, samplerHeapSize)); + *outQueue = queue.detach(); + return SLANG_OK; +} + SlangResult SLANG_MCALL createD3D12Renderer(const IRenderer::Desc* desc, IRenderer** outRenderer) { RefPtr<D3D12Renderer> result = new D3D12Renderer(); @@ -947,29 +1863,8 @@ SlangResult SLANG_MCALL createD3D12Renderer(const IRenderer::Desc* desc, IRender return proc; } -void D3D12Renderer::releaseFrameResources() -{ - for (int i = 0; i < m_numRenderFrames; i++) - { - FrameInfo& info = m_frameInfos[i]; - info.reset(); - info.m_fenceValue = m_fence.getCurrentValue(); - } -} - -void D3D12Renderer::waitForGpu() -{ - m_fence.nextSignalAndWait(m_commandQueue); -} - D3D12Renderer::~D3D12Renderer() { - if (m_isInitialized) - { - // Ensure that the GPU is no longer referencing resources that are about to be - // cleaned up by the destructor. - waitForGpu(); - } } static void _initSrvDesc(IResource::Type resourceType, const ITextureResource::Desc& textureDesc, const D3D12_RESOURCE_DESC& desc, DXGI_FORMAT pixelFormat, D3D12_SHADER_RESOURCE_VIEW_DESC& descOut) @@ -1098,105 +1993,22 @@ Result D3D12Renderer::createBuffer(const D3D12_RESOURCE_DESC& resourceDesc, cons ::memcpy(dstData, srcData, srcDataSize); dxUploadResource->Unmap(0, nullptr); - m_commandList->CopyBufferRegion(resourceOut, 0, uploadResource, 0, bufferSize); - - // Make sure it's in the right state - { - D3D12BarrierSubmitter submitter(m_commandList); - resourceOut.transition(finalState, submitter); - } - - submitGpuWorkAndWait(); + auto encodeInfo = encodeResourceCommands(); + encodeInfo.d3dCommandList->CopyBufferRegion(resourceOut, 0, uploadResource, 0, bufferSize); + submitResourceCommandsAndWait(encodeInfo); } return SLANG_OK; } -void D3D12Renderer::_resetCommandList() -{ - const FrameInfo& frame = getFrame(); - - ID3D12GraphicsCommandList* commandList = getCommandList(); - commandList->Reset(frame.m_commandAllocator, nullptr); -} - -void D3D12Renderer::beginFrame() -{ -} - -void D3D12Renderer::makeSwapchainImagePresentable(ISwapchain* swapchain) -{ - static_cast<SwapchainImpl*>(swapchain)->makeBackbufferPresentable(); -} - -void D3D12Renderer::endFrame() -{ - assert(m_commandListOpenCount == 1); - SLANG_ASSERT_VOID_ON_FAIL(m_commandList->Close()); - { - // Execute the command list. - ID3D12CommandList* commandLists[] = { m_commandList }; - m_commandQueue->ExecuteCommandLists(SLANG_COUNT_OF(commandLists), commandLists); - } - - assert(m_commandListOpenCount == 1); - // Must be 0 - m_commandListOpenCount = 0; - - - // Increment the fence value. Save on the frame - we'll know that frame is done when the fence - // value >= - m_frameInfos[m_frameIndex].m_fenceValue = m_fence.nextSignal(m_commandQueue); - - // increment frame index after signal - m_frameIndex = (m_frameIndex + 1) % m_numRenderFrames; - - // On the current frame wait until it is completed - { - FrameInfo& frame = m_frameInfos[m_frameIndex]; - // If the next frame is not ready to be rendered yet, wait until it is ready. - m_fence.waitUntilCompleted(frame.m_fenceValue); - } - - getFrame().m_commandAllocator->Reset(); - - _resetCommandList(); - - m_commandListOpenCount = 1; - - getFrame().m_viewHeap.deallocateAll(); - getFrame().m_samplerHeap.deallocateAll(); -} - -void D3D12Renderer::submitGpuWork() -{ - assert(m_commandListOpenCount); - ID3D12GraphicsCommandList* commandList = getCommandList(); - - SLANG_ASSERT_VOID_ON_FAIL(commandList->Close()); - { - // Execute the command list. - ID3D12CommandList* commandLists[] = { commandList }; - m_commandQueue->ExecuteCommandLists(SLANG_COUNT_OF(commandLists), commandLists); - } - - // Reset the render target - _resetCommandList(); -} - -void D3D12Renderer::submitGpuWorkAndWait() -{ - submitGpuWork(); - waitForGpu(); -} - Result D3D12Renderer::captureTextureToSurface( D3D12Resource& resource, + ResourceState state, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) { - const D3D12_RESOURCE_STATES initialState = resource.getState(); + const D3D12_RESOURCE_STATES initialState = D3DUtil::translateResourceState(state); const D3D12_RESOURCE_DESC desc = resource.getResource()->GetDesc(); @@ -1230,9 +2042,12 @@ Result D3D12Renderer::captureTextureToSurface( SLANG_RETURN_ON_FAIL(stagingResource.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); } + auto encodeInfo = encodeResourceCommands(); + auto currentState = D3DUtil::translateResourceState(state); + { - D3D12BarrierSubmitter submitter(m_commandList); - resource.transition(D3D12_RESOURCE_STATE_COPY_SOURCE, submitter); + D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); + resource.transition(currentState, D3D12_RESOURCE_STATE_COPY_SOURCE, submitter); } // Do the copy @@ -1252,16 +2067,16 @@ Result D3D12Renderer::captureTextureToSurface( dstLoc.PlacedFootprint.Footprint.Depth = 1; dstLoc.PlacedFootprint.Footprint.RowPitch = UINT(rowPitch); - m_commandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr); + encodeInfo.d3dCommandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr); } { - D3D12BarrierSubmitter submitter(m_commandList); - resource.transition(initialState, submitter); + D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); + resource.transition(D3D12_RESOURCE_STATE_COPY_SOURCE, currentState, submitter); } // Submit the copy, and wait for copy to complete - submitGpuWorkAndWait(); + submitResourceCommandsAndWait(encodeInfo); { ID3D12Resource* dxResource = stagingResource; @@ -1280,88 +2095,6 @@ Result D3D12Renderer::captureTextureToSurface( } } -Result D3D12Renderer::_bindRenderState(PipelineStateImpl* pipelineStateImpl, ID3D12GraphicsCommandList* commandList, Submitter* submitter) -{ - // TODO: we should only set some of this state as needed... - - auto pipelineTypeIndex = (int) pipelineStateImpl->desc.type; - auto pipelineLayout = pipelineStateImpl->m_pipelineLayout; - - submitter->setRootSignature(pipelineLayout->m_rootSignature); - commandList->SetPipelineState(pipelineStateImpl->m_pipelineState); - - ID3D12DescriptorHeap* heaps[] = - { - getFrame().m_viewHeap.getHeap(), - getFrame().m_samplerHeap.getHeap(), - }; - commandList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); - - // We need to copy descriptors over from the descriptor sets - // (where they are stored in CPU-visible heaps) to the GPU-visible - // heaps so that they can be accessed by shader code. - - Int descriptorSetCount = pipelineLayout->m_descriptorSetCount; - Int rootParameterIndex = 0; - for(Int dd = 0; dd < descriptorSetCount; ++dd) - { - auto descriptorSet = m_boundDescriptorSets[pipelineTypeIndex][dd]; - auto descriptorSetLayout = descriptorSet->m_layout; - - // TODO: require that `descriptorSetLayout` is compatible with - // `pipelineLayout->descriptorSetlayouts[dd]`. - - { - if(auto descriptorCount = descriptorSetLayout->m_resourceCount) - { - auto& gpuHeap = getFrame().m_viewHeap; - auto gpuDescriptorTable = gpuHeap.allocate(int(descriptorCount)); - - auto& cpuHeap = *descriptorSet->m_resourceHeap; - auto cpuDescriptorTable = descriptorSet->m_resourceTable; - - m_device->CopyDescriptorsSimple( - UINT(descriptorCount), - gpuHeap.getCpuHandle(gpuDescriptorTable), - cpuHeap.getCpuHandle(int(cpuDescriptorTable)), - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - - submitter->setRootDescriptorTable(int(rootParameterIndex++), gpuHeap.getGpuHandle(gpuDescriptorTable)); - } - } - { - if(auto descriptorCount = descriptorSetLayout->m_samplerCount) - { - auto& gpuHeap = getFrame().m_samplerHeap; - auto gpuDescriptorTable = gpuHeap.allocate(int(descriptorCount)); - - auto& cpuHeap = *descriptorSet->m_samplerHeap; - auto cpuDescriptorTable = descriptorSet->m_samplerTable; - - m_device->CopyDescriptorsSimple( - UINT(descriptorCount), - gpuHeap.getCpuHandle(gpuDescriptorTable), - cpuHeap.getCpuHandle(int(cpuDescriptorTable)), - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - - submitter->setRootDescriptorTable(int(rootParameterIndex++), gpuHeap.getGpuHandle(gpuDescriptorTable)); - } - } - if(auto rootConstantRangeCount = descriptorSetLayout->m_rootConstantRanges.getCount()) - { - auto srcData = descriptorSet->m_rootConstantData.getBuffer(); - - for(auto& rootConstantRangeInfo : descriptorSetLayout->m_rootConstantRanges) - { - auto countOf32bitValues = rootConstantRangeInfo.size / sizeof(uint32_t); - submitter->setRootConstants(rootConstantRangeInfo.rootParamIndex, 0, countOf32bitValues, srcData + rootConstantRangeInfo.offset); - } - } - } - - return SLANG_OK; -} - // !!!!!!!!!!!!!!!!!!!!!!!!!!!! Renderer interface !!!!!!!!!!!!!!!!!!!!!!!!!! Result D3D12Renderer::_createDevice(DeviceCheckFlags deviceCheckFlags, const UnownedStringSlice& nameMatch, D3D_FEATURE_LEVEL featureLevel, DeviceInfo& outDeviceInfo) @@ -1616,32 +2349,11 @@ Result D3D12Renderer::initialize(const Desc& desc) } } - m_numRenderFrames = 3; - m_numRenderTargets = 2; - m_desc = desc; - // Describe and create the command queue. - D3D12_COMMAND_QUEUE_DESC queueDesc = {}; - queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; - queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + // Create a command queue for internal resource transfer operations. + SLANG_RETURN_ON_FAIL(createCommandQueueImpl(1, 32, 4, m_resourceCommandQueue.writeRef())); - SLANG_RETURN_ON_FAIL(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(m_commandQueue.writeRef()))); - - // Create descriptor heaps. - for (int i = 0; i < m_numRenderFrames; i++) - { - SLANG_RETURN_ON_FAIL(m_frameInfos[i].m_viewHeap.init( - m_device, - 256, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); - SLANG_RETURN_ON_FAIL(m_frameInfos[i].m_samplerHeap.init( - m_device, - 16, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); - } SLANG_RETURN_ON_FAIL(m_cpuViewHeap.init (m_device, 1024, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); SLANG_RETURN_ON_FAIL(m_cpuSamplerHeap.init(m_device, 64, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); @@ -1650,59 +2362,18 @@ Result D3D12Renderer::initialize(const Desc& desc) SLANG_RETURN_ON_FAIL(m_viewAllocator.init (m_device, 64, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); SLANG_RETURN_ON_FAIL(m_samplerAllocator.init(m_device, 16, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); - // Setup frame resources - SLANG_RETURN_ON_FAIL(createFrameResources()); - - // Setup fence, and close the command list (as default state without begin/endRender is closed) - { - SLANG_RETURN_ON_FAIL(m_fence.init(m_device)); - // Create the command list. When command lists are created they are open, so close it. - FrameInfo& frame = m_frameInfos[m_frameIndex]; - SLANG_RETURN_ON_FAIL(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, frame.m_commandAllocator, nullptr, IID_PPV_ARGS(m_commandList.writeRef()))); - m_commandList->Close(); - } - - _resetCommandList(); - - m_commandListOpenCount = 1; - m_isInitialized = true; return SLANG_OK; } -Result D3D12Renderer::createFrameResources() +Result D3D12Renderer::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) { - // Set up frames - for (int i = 0; i < m_numRenderFrames; i++) - { - FrameInfo& frame = m_frameInfos[i]; - SLANG_RETURN_ON_FAIL(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(frame.m_commandAllocator.writeRef()))); - } - + RefPtr<CommandQueueImpl> queue; + SLANG_RETURN_ON_FAIL(createCommandQueueImpl(8, 4096, 1024, queue.writeRef())); + *outQueue = queue.detach(); return SLANG_OK; } -void D3D12Renderer::setClearColor(const float color[4]) -{ - memcpy(m_clearColor, color, sizeof(m_clearColor)); -} - -void D3D12Renderer::clearFrame() -{ - // Record commands - if (!m_frameBuffer) - return; - for (auto rtv : m_frameBuffer->renderTargetDescriptors) - { - m_commandList->ClearRenderTargetView(rtv, m_clearColor, 0, nullptr); - } - if (m_frameBuffer->depthStencilView) - { - m_commandList->ClearDepthStencilView( - m_frameBuffer->depthStencilDescriptor, D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr); - } -} - SLANG_NO_THROW Result SLANG_MCALL D3D12Renderer::createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) { @@ -1714,11 +2385,17 @@ SLANG_NO_THROW Result SLANG_MCALL D3D12Renderer::createSwapchain( SlangResult D3D12Renderer::readTextureResource( ITextureResource* resource, + ResourceState state, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) { - return captureTextureToSurface(static_cast<TextureResourceImpl*>(resource)->m_resource, outBlob, outRowPitch, outPixelSize); + return captureTextureToSurface( + static_cast<TextureResourceImpl*>(resource)->m_resource, + state, + outBlob, + outRowPitch, + outPixelSize); } static D3D12_RESOURCE_STATES _calcResourceState(IResource::Usage usage) @@ -1736,6 +2413,8 @@ static D3D12_RESOURCE_STATES _calcResourceState(IResource::Usage usage) case Usage::UnorderedAccess: return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; case Usage::PixelShaderResource: return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; case Usage::NonPixelShaderResource: return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + case Usage::ShaderResource: return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; case Usage::GenericRead: return D3D12_RESOURCE_STATE_GENERIC_READ; default: return D3D12_RESOURCE_STATES(0); } @@ -1859,7 +2538,9 @@ Result D3D12Renderer::createTextureResource(IResource::Usage initialUsage, const clearValuePtr = nullptr; } clearValue.Format = pixelFormat; - memcpy(clearValue.Color, descIn.optimalClearValue, sizeof(clearValue.Color)); + memcpy(clearValue.Color, &descIn.optimalClearValue.color, sizeof(clearValue.Color)); + clearValue.DepthStencil.Depth = descIn.optimalClearValue.depthStencil.depth; + clearValue.DepthStencil.Stencil = descIn.optimalClearValue.depthStencil.stencil; SLANG_RETURN_ON_FAIL(texture->m_resource.initCommitted( m_device, heapProps, @@ -1960,6 +2641,7 @@ Result D3D12Renderer::createTextureResource(IResource::Usage initialUsage, const } uploadResource->Unmap(0, nullptr); + auto encodeInfo = encodeResourceCommands(); for (int mipIndex = 0; mipIndex < numMipMaps; ++mipIndex) { // https://msdn.microsoft.com/en-us/library/windows/desktop/dn903862(v=vs.85).aspx @@ -1973,21 +2655,23 @@ Result D3D12Renderer::createTextureResource(IResource::Usage initialUsage, const dst.pResource = texture->m_resource; dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dst.SubresourceIndex = subResourceIndex; - m_commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + encodeInfo.d3dCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); subResourceIndex++; } // Block - waiting for copy to complete (so can drop upload texture) - submitGpuWorkAndWait(); + submitResourceCommandsAndWait(encodeInfo); } } { + auto encodeInfo = encodeResourceCommands(); const D3D12_RESOURCE_STATES finalState = _calcResourceState(initialUsage); - D3D12BarrierSubmitter submitter(m_commandList); - texture->m_resource.transition(finalState, submitter); - - submitGpuWorkAndWait(); + { + D3D12BarrierSubmitter submitter(encodeInfo.d3dCommandList); + texture->m_resource.transition(D3D12_RESOURCE_STATE_COPY_DEST, finalState, submitter); + } + submitResourceCommandsAndWait(encodeInfo); } *outResource = texture.detach(); @@ -1996,8 +2680,6 @@ Result D3D12Renderer::createTextureResource(IResource::Usage initialUsage, const Result D3D12Renderer::createBufferResource(IResource::Usage initialUsage, const IBufferResource::Desc& descIn, const void* initData, IBufferResource** outResource) { - typedef BufferResourceImpl::BackingStyle Style; - BufferResource::Desc srcDesc(descIn); srcDesc.setDefaults(initialUsage); @@ -2009,37 +2691,13 @@ Result D3D12Renderer::createBufferResource(IResource::Usage initialUsage, const RefPtr<BufferResourceImpl> buffer(new BufferResourceImpl(initialUsage, srcDesc)); - // Save the style - buffer->m_backingStyle = BufferResourceImpl::_calcResourceBackingStyle(initialUsage); - D3D12_RESOURCE_DESC bufferDesc; _initBufferResourceDesc(alignedSizeInBytes, bufferDesc); bufferDesc.Flags = _calcResourceBindFlags(initialUsage, srcDesc.bindFlags); - switch (buffer->m_backingStyle) - { - case Style::MemoryBacked: - { - // Assume the constant buffer will change every frame. We'll just keep a copy of the contents - // in regular memory until it needed - buffer->m_memory.setCount(UInt(alignedSizeInBytes)); - // Initialize - if (initData) - { - ::memcpy(buffer->m_memory.getBuffer(), initData, srcDesc.sizeInBytes); - } - break; - } - case Style::ResourceBacked: - { - const D3D12_RESOURCE_STATES initialState = _calcResourceState(initialUsage); - SLANG_RETURN_ON_FAIL(createBuffer(bufferDesc, initData, srcDesc.sizeInBytes, buffer->m_uploadResource, initialState, buffer->m_resource)); - break; - } - default: - return SLANG_FAIL; - } + const D3D12_RESOURCE_STATES initialState = _calcResourceState(initialUsage); + SLANG_RETURN_ON_FAIL(createBuffer(bufferDesc, initData, srcDesc.sizeInBytes, buffer->m_uploadResource, initialState, buffer->m_resource)); *outResource = buffer.detach(); return SLANG_OK; @@ -2351,15 +3009,27 @@ Result D3D12Renderer::createFramebuffer(IFramebuffer::Desc const& desc, IFramebu RefPtr<FramebufferImpl> framebuffer = new FramebufferImpl(); framebuffer->renderTargetViews.setCount(desc.renderTargetCount); framebuffer->renderTargetDescriptors.setCount(desc.renderTargetCount); + framebuffer->renderTargetClearValues.setCount(desc.renderTargetCount); for (uint32_t i = 0; i < desc.renderTargetCount; i++) { framebuffer->renderTargetViews[i] = desc.renderTargetViews[i]; framebuffer->renderTargetDescriptors[i] = static_cast<ResourceViewImpl*>(desc.renderTargetViews[i])->m_descriptor.cpuHandle; + auto clearValue = + static_cast<TextureResourceImpl*>( + static_cast<ResourceViewImpl*>(desc.renderTargetViews[i])->m_resource.Ptr()) + ->getDesc() + ->optimalClearValue.color; + memcpy(&framebuffer->renderTargetClearValues[i], &clearValue, sizeof(ColorClearValue)); } framebuffer->depthStencilView = desc.depthStencilView; if (desc.depthStencilView) { + framebuffer->depthStencilClearValue = + static_cast<TextureResourceImpl*>( + static_cast<ResourceViewImpl*>(desc.depthStencilView)->m_resource.Ptr()) + ->getDesc() + ->optimalClearValue.depthStencil; framebuffer->depthStencilDescriptor = static_cast<ResourceViewImpl*>(desc.depthStencilView)->m_descriptor.cpuHandle; } @@ -2394,6 +3064,16 @@ Result D3D12Renderer::createFramebufferLayout( return SLANG_OK; } +Result D3D12Renderer::createRenderPassLayout( + const IRenderPassLayout::Desc& desc, + IRenderPassLayout** outRenderPassLayout) +{ + RefPtr<RenderPassLayoutImpl> result = new RenderPassLayoutImpl(); + result->init(desc); + *outRenderPassLayout = result.detach(); + return SLANG_OK; +} + Result D3D12Renderer::createInputLayout(const InputElementDesc* inputElements, UInt inputElementCount, IInputLayout** outLayout) { RefPtr<InputLayoutImpl> layout(new InputLayoutImpl); @@ -2429,7 +3109,7 @@ Result D3D12Renderer::createInputLayout(const InputElementDesc* inputElements, U } dstEle.SemanticName = semanticName; - dstEle.SemanticIndex = (UINT)srcEle.semanticIndex; + dstEle.SemanticIndex = (UINT)srcEle.semanticIndex; dstEle.Format = D3DUtil::getMapFormat(srcEle.format); dstEle.InputSlot = 0; dstEle.AlignedByteOffset = (UINT)srcEle.offset; @@ -2441,337 +3121,58 @@ Result D3D12Renderer::createInputLayout(const InputElementDesc* inputElements, U return SLANG_OK; } -void* D3D12Renderer::map(IBufferResource* bufferIn, MapFlavor flavor) +Result D3D12Renderer::readBufferResource( + IBufferResource* bufferIn, + size_t offset, + size_t size, + ISlangBlob** outBlob) { - typedef BufferResourceImpl::BackingStyle Style; + auto encodeInfo = encodeResourceCommands(); BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(bufferIn); - buffer->m_mapFlavor = flavor; const size_t bufferSize = buffer->getDesc()->sizeInBytes; - switch (buffer->m_backingStyle) - { - case Style::ResourceBacked: - { - // We need this in a state so we can upload - switch (flavor) - { - case MapFlavor::HostWrite: - case MapFlavor::WriteDiscard: - { - D3D12BarrierSubmitter submitter(m_commandList); - buffer->m_uploadResource.transition(D3D12_RESOURCE_STATE_GENERIC_READ, submitter); - buffer->m_resource.transition(D3D12_RESOURCE_STATE_COPY_DEST, submitter); - - const D3D12_RANGE readRange = {}; - - void* uploadData; - SLANG_RETURN_NULL_ON_FAIL(buffer->m_uploadResource.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&uploadData))); - return uploadData; - - break; - } - case MapFlavor::HostRead: - { - // This will be slow!!! - it blocks CPU on GPU completion - D3D12Resource& resource = buffer->m_resource; - - // Readback heap - D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_READBACK; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; - - // Resource to readback to - D3D12_RESOURCE_DESC stagingDesc; - _initBufferResourceDesc(bufferSize, stagingDesc); - - D3D12Resource stageBuf; - SLANG_RETURN_NULL_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); - - const D3D12_RESOURCE_STATES initialState = resource.getState(); - - // Make it a source - { - D3D12BarrierSubmitter submitter(m_commandList); - resource.transition(D3D12_RESOURCE_STATE_COPY_SOURCE, submitter); - } - // Do the copy - m_commandList->CopyBufferRegion(stageBuf, 0, resource, 0, bufferSize); - // Switch it back - { - D3D12BarrierSubmitter submitter(m_commandList); - resource.transition(initialState, submitter); - } - - // Wait until complete - submitGpuWorkAndWait(); - - // Map and copy - { - UINT8* data; - D3D12_RANGE readRange = { 0, bufferSize }; - - SLANG_RETURN_NULL_ON_FAIL(stageBuf.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&data))); - - // Copy to memory buffer - buffer->m_memory.setCount(bufferSize); - ::memcpy(buffer->m_memory.getBuffer(), data, bufferSize); - - stageBuf.getResource()->Unmap(0, nullptr); - } - - return buffer->m_memory.getBuffer(); - } - } - break; - } - case Style::MemoryBacked: - { - return buffer->m_memory.getBuffer(); - } - default: return nullptr; - } + // This will be slow!!! - it blocks CPU on GPU completion + D3D12Resource& resource = buffer->m_resource; - return nullptr; -} + // Readback heap + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; -void D3D12Renderer::unmap(IBufferResource* bufferIn) -{ - typedef BufferResourceImpl::BackingStyle Style; - BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(bufferIn); + // Resource to readback to + D3D12_RESOURCE_DESC stagingDesc; + _initBufferResourceDesc(bufferSize, stagingDesc); - switch (buffer->m_backingStyle) - { - case Style::MemoryBacked: - { - // Don't need to do anything, as will be uploaded automatically when used - break; - } - case Style::ResourceBacked: - { - // We need this in a state so we can upload - switch (buffer->m_mapFlavor) - { - case MapFlavor::HostWrite: - case MapFlavor::WriteDiscard: - { - // Unmap - ID3D12Resource* uploadResource = buffer->m_uploadResource; - ID3D12Resource* resource = buffer->m_resource; - - uploadResource->Unmap(0, nullptr); - - const D3D12_RESOURCE_STATES initialState = buffer->m_resource.getState(); - - { - D3D12BarrierSubmitter submitter(m_commandList); - buffer->m_uploadResource.transition(D3D12_RESOURCE_STATE_GENERIC_READ, submitter); - buffer->m_resource.transition(D3D12_RESOURCE_STATE_COPY_DEST, submitter); - } - - m_commandList->CopyBufferRegion(resource, 0, uploadResource, 0, buffer->getDesc()->sizeInBytes); + D3D12Resource stageBuf; + SLANG_RETURN_ON_FAIL(stageBuf.initCommitted(m_device, heapProps, D3D12_HEAP_FLAG_NONE, stagingDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr)); - { - D3D12BarrierSubmitter submitter(m_commandList); - buffer->m_resource.transition(initialState, submitter); - } - break; - } - case MapFlavor::HostRead: - { - break; - } - } - } - } -} - -#if 0 -void D3D12Renderer::setInputLayout(InputLayout* inputLayout) -{ - m_boundInputLayout = static_cast<InputLayoutImpl*>(inputLayout); -} -#endif - -void D3D12Renderer::setPrimitiveTopology(PrimitiveTopology topology) -{ - switch (topology) - { - case PrimitiveTopology::TriangleList: - { - m_primitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - m_primitiveTopology = D3DUtil::getPrimitiveTopology(topology); - break; - } - default: - { - assert(!"Unhandled type"); - } - } -} - -void D3D12Renderer::setVertexBuffers(UInt startSlot, UInt slotCount, IBufferResource*const* buffers, const UInt* strides, const UInt* offsets) -{ - { - const Index num = startSlot + slotCount; - if (num > m_boundVertexBuffers.getCount()) - { - m_boundVertexBuffers.setCount(num); - } - } - - for (UInt i = 0; i < slotCount; i++) - { - BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(buffers[i]); - if (buffer) - { - assert(buffer->m_initialUsage == IResource::Usage::VertexBuffer); - } - - BoundVertexBuffer& boundBuffer = m_boundVertexBuffers[startSlot + i]; - boundBuffer.m_buffer = buffer; - boundBuffer.m_stride = int(strides[i]); - boundBuffer.m_offset = int(offsets[i]); - } -} - -void D3D12Renderer::setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) -{ - m_boundIndexBuffer = (BufferResourceImpl*) buffer; - m_boundIndexFormat = D3DUtil::getMapFormat(indexFormat); - m_boundIndexOffset = UINT(offset); -} - -void D3D12Renderer::setViewports(UInt count, Viewport const* viewports) -{ - static const int kMaxViewports = D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; - assert(count <= kMaxViewports && count <= kMaxRTVCount); - for(UInt ii = 0; ii < count; ++ii) - { - auto& inViewport = viewports[ii]; - auto& dxViewport = m_viewports[ii]; - - dxViewport.TopLeftX = inViewport.originX; - dxViewport.TopLeftY = inViewport.originY; - dxViewport.Width = inViewport.extentX; - dxViewport.Height = inViewport.extentY; - dxViewport.MinDepth = inViewport.minZ; - dxViewport.MaxDepth = inViewport.maxZ; - } - m_commandList->RSSetViewports(UINT(count), m_viewports); -} - -void D3D12Renderer::setScissorRects(UInt count, ScissorRect const* rects) -{ - static const int kMaxScissorRects = D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; - assert(count <= kMaxScissorRects && count <= kMaxRTVCount); - - for(UInt ii = 0; ii < count; ++ii) - { - auto& inRect = rects[ii]; - auto& dxRect = m_scissorRects[ii]; - - dxRect.left = LONG(inRect.minX); - dxRect.top = LONG(inRect.minY); - dxRect.right = LONG(inRect.maxX); - dxRect.bottom = LONG(inRect.maxY); - } - - m_commandList->RSSetScissorRects(UINT(count), m_scissorRects); -} - -void D3D12Renderer::setPipelineState(IPipelineState* state) -{ - m_currentPipelineState = (PipelineStateImpl*)state; -} - -void D3D12Renderer::setFramebuffer(IFramebuffer* frameBuffer) -{ - ID3D12GraphicsCommandList* commandList = m_commandList; - auto framebufferImpl = static_cast<FramebufferImpl*>(frameBuffer); - commandList->OMSetRenderTargets( - (UINT)framebufferImpl->renderTargetViews.getCount(), - framebufferImpl->renderTargetDescriptors.getArrayView().getBuffer(), - FALSE, - framebufferImpl->depthStencilView ? &framebufferImpl->depthStencilDescriptor : nullptr); - m_frameBuffer = framebufferImpl; -} - - -void D3D12Renderer::draw(UInt vertexCount, UInt startVertex) -{ - ID3D12GraphicsCommandList* commandList = m_commandList; - - auto pipelineState = m_currentPipelineState.Ptr(); - if (!pipelineState || (pipelineState->desc.type != PipelineType::Graphics)) - { - assert(!"No graphics pipeline state set"); - return; - } - - // Submit - setting for graphics - { - GraphicsSubmitter submitter(commandList); - _bindRenderState(pipelineState, commandList, &submitter); - } + // Do the copy + encodeInfo.d3dCommandList->CopyBufferRegion(stageBuf, 0, resource, 0, bufferSize); - commandList->IASetPrimitiveTopology(m_primitiveTopology); + // Wait until complete + submitResourceCommandsAndWait(encodeInfo); - // Set up vertex buffer views + // Map and copy + RefPtr<ListBlob> blob = new ListBlob(); { - int numVertexViews = 0; - D3D12_VERTEX_BUFFER_VIEW vertexViews[16]; - for (Index i = 0; i < m_boundVertexBuffers.getCount(); i++) - { - const BoundVertexBuffer& boundVertexBuffer = m_boundVertexBuffers[i]; - BufferResourceImpl* buffer = boundVertexBuffer.m_buffer; - if (buffer) - { - D3D12_VERTEX_BUFFER_VIEW& vertexView = vertexViews[numVertexViews++]; - vertexView.BufferLocation = buffer->m_resource.getResource()->GetGPUVirtualAddress() - + boundVertexBuffer.m_offset; - vertexView.SizeInBytes = UINT(buffer->getDesc()->sizeInBytes - boundVertexBuffer.m_offset); - vertexView.StrideInBytes = UINT(boundVertexBuffer.m_stride); - } - } - commandList->IASetVertexBuffers(0, numVertexViews, vertexViews); - } - - // Set up index buffer - if(m_boundIndexBuffer) - { - D3D12_INDEX_BUFFER_VIEW indexBufferView; - indexBufferView.BufferLocation = m_boundIndexBuffer->m_resource.getResource()->GetGPUVirtualAddress() - + m_boundIndexOffset; - indexBufferView.SizeInBytes = UINT(m_boundIndexBuffer->getDesc()->sizeInBytes - m_boundIndexOffset); - indexBufferView.Format = m_boundIndexFormat; - - commandList->IASetIndexBuffer(&indexBufferView); - } - - commandList->DrawInstanced(UINT(vertexCount), 1, UINT(startVertex), 0); -} + UINT8* data; + D3D12_RANGE readRange = { 0, bufferSize }; -void D3D12Renderer::drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) -{ -} + SLANG_RETURN_ON_FAIL(stageBuf.getResource()->Map(0, &readRange, reinterpret_cast<void**>(&data))); -void D3D12Renderer::dispatchCompute(int x, int y, int z) -{ - ID3D12GraphicsCommandList* commandList = m_commandList; - auto pipelineStateImpl = m_currentPipelineState; + // Copy to memory buffer + blob->m_data.setCount(bufferSize); + ::memcpy(blob->m_data.getBuffer(), data, bufferSize); - // Submit binding for compute - { - ComputeSubmitter submitter(commandList); - _bindRenderState(pipelineStateImpl, commandList, &submitter); + stageBuf.getResource()->Unmap(0, nullptr); } - - commandList->Dispatch(x, y, z); + *outBlob = blob.detach(); + return SLANG_OK; } void D3D12Renderer::DescriptorSetImpl::setConstantBuffer(UInt range, UInt index, IBufferResource* buffer) @@ -2937,22 +3338,6 @@ void D3D12Renderer::DescriptorSetImpl::setRootConstants( memcpy((char*)m_rootConstantData.getBuffer() + rootConstantRangeInfo.offset + offset, data, size); } -void D3D12Renderer::setDescriptorSet(PipelineType pipelineType, IPipelineLayout* layout, UInt index, IDescriptorSet* descriptorSet) -{ - // In D3D12, unlike Vulkan, binding a root signature invalidates *all* descriptor table - // bindings (rather than preserving those that are part of the longest common prefix - // between the old and new layout). - // - // In order to accomodate having descriptor-set bindings that persist across changes - // in pipeline state (which may also change pipeline layout), we will shadow the - // descriptor-set bindings and only flush them on-demand at draw tiume once the final - // pipline layout is known. - // - - auto descriptorSetImpl = (DescriptorSetImpl*) descriptorSet; - m_boundDescriptorSets[int(pipelineType)][index] = descriptorSetImpl; -} - Result D3D12Renderer::createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) { if (desc.slangProgram && desc.slangProgram->getSpecializationParamCount() != 0) @@ -3656,7 +4041,7 @@ Result D3D12Renderer::createGraphicsPipelineState(const GraphicsPipelineStateDes psoDesc.PS = { programImpl->m_pixelShader .getBuffer(), SIZE_T(programImpl->m_pixelShader .getCount()) }; psoDesc.InputLayout = { inputLayoutImpl->m_elements.getBuffer(), UINT(inputLayoutImpl->m_elements.getCount()) }; - psoDesc.PrimitiveTopologyType = m_primitiveTopologyType; + psoDesc.PrimitiveTopologyType = D3DUtil::getPrimitiveType(desc.primitiveType); { auto framebufferLayout = static_cast<FramebufferLayoutImpl*>(desc.framebufferLayout); @@ -3720,30 +4105,24 @@ Result D3D12Renderer::createGraphicsPipelineState(const GraphicsPipelineStateDes { auto& ds = psoDesc.DepthStencilState; - ds.DepthEnable = FALSE; - ds.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; - ds.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; - //ds.DepthFunc = D3D12_COMPARISON_FUNC_LESS; - ds.StencilEnable = FALSE; - ds.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; - ds.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; - const D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = - { - D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS - }; - ds.FrontFace = defaultStencilOp; - ds.BackFace = defaultStencilOp; + ds.DepthEnable = inDesc.depthStencil.depthTestEnable; + ds.DepthWriteMask = inDesc.depthStencil.depthWriteEnable ? D3D12_DEPTH_WRITE_MASK_ALL + : D3D12_DEPTH_WRITE_MASK_ZERO; + ds.DepthFunc = D3DUtil::getComparisonFunc(inDesc.depthStencil.depthFunc); + ds.StencilEnable = inDesc.depthStencil.stencilEnable; + ds.StencilReadMask = (UINT8)inDesc.depthStencil.stencilReadMask; + ds.StencilWriteMask = (UINT8)inDesc.depthStencil.stencilWriteMask; + ds.FrontFace = D3DUtil::translateStencilOpDesc(inDesc.depthStencil.frontFace); + ds.BackFace = D3DUtil::translateStencilOpDesc(inDesc.depthStencil.backFace); } - psoDesc.PrimitiveTopologyType = m_primitiveTopologyType; + psoDesc.PrimitiveTopologyType = D3DUtil::getPrimitiveType(desc.primitiveType); ComPtr<ID3D12PipelineState> pipelineState; SLANG_RETURN_ON_FAIL(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(pipelineState.writeRef()))); RefPtr<PipelineStateImpl> pipelineStateImpl = new PipelineStateImpl(); - pipelineStateImpl->m_pipelineLayout = pipelineLayoutImpl; pipelineStateImpl->m_pipelineState = pipelineState; - pipelineStateImpl->m_program = programImpl; pipelineStateImpl->init(desc); *outState = pipelineStateImpl.detach(); return SLANG_OK; @@ -3806,7 +4185,6 @@ Result D3D12Renderer::createComputePipelineState(const ComputePipelineStateDesc& } } RefPtr<PipelineStateImpl> pipelineStateImpl = new PipelineStateImpl(); - pipelineStateImpl->m_pipelineLayout = pipelineLayoutImpl; pipelineStateImpl->m_pipelineState = pipelineState; pipelineStateImpl->init(desc); *outState = pipelineStateImpl.detach(); diff --git a/tools/gfx/d3d12/resource-d3d12.cpp b/tools/gfx/d3d12/resource-d3d12.cpp index 27de868b6..397eee665 100644 --- a/tools/gfx/d3d12/resource-d3d12.cpp +++ b/tools/gfx/d3d12/resource-d3d12.cpp @@ -60,13 +60,15 @@ void D3D12BarrierSubmitter::transition(ID3D12Resource* resource, D3D12_RESOURCE_ return resource ? D3DUtil::calcFormat(usage, resource->GetDesc().Format) : DXGI_FORMAT_UNKNOWN; } -void D3D12ResourceBase::transition(D3D12_RESOURCE_STATES nextState, D3D12BarrierSubmitter& submitter) +void D3D12ResourceBase::transition( + D3D12_RESOURCE_STATES oldState, + D3D12_RESOURCE_STATES nextState, + D3D12BarrierSubmitter& submitter) { // Transition only if there is a resource - if (m_resource) + if (m_resource && oldState != nextState) { - submitter.transition(m_resource, m_state, nextState); - m_state = nextState; + submitter.transition(m_resource, oldState, nextState); } } @@ -155,7 +157,7 @@ void D3D12Resource::setDebugName(const wchar_t* name) } } -void D3D12Resource::setResource(ID3D12Resource* resource, D3D12_RESOURCE_STATES initialState) +void D3D12Resource::setResource(ID3D12Resource* resource) { if (resource != m_resource) { @@ -169,8 +171,6 @@ void D3D12Resource::setResource(ID3D12Resource* resource, D3D12_RESOURCE_STATES } m_resource = resource; } - m_prevState = initialState; - m_state = initialState; } void D3D12Resource::setResourceNull() @@ -187,7 +187,7 @@ Result D3D12Resource::initCommitted(ID3D12Device* device, const D3D12_HEAP_PROPE setResourceNull(); ComPtr<ID3D12Resource> resource; SLANG_RETURN_ON_FAIL(device->CreateCommittedResource(&heapProps, heapFlags, &resourceDesc, initState, clearValue, IID_PPV_ARGS(resource.writeRef()))); - setResource(resource, initState); + setResource(resource); return SLANG_OK; } @@ -205,10 +205,4 @@ void D3D12Resource::swap(ComPtr<ID3D12Resource>& resourceInOut) resourceInOut.attach(tmp); } -void D3D12Resource::setState(D3D12_RESOURCE_STATES state) -{ - m_prevState = state; - m_state = state; -} - } // renderer_test diff --git a/tools/gfx/d3d12/resource-d3d12.h b/tools/gfx/d3d12/resource-d3d12.h index 7a26854ff..8b6c28114 100644 --- a/tools/gfx/d3d12/resource-d3d12.h +++ b/tools/gfx/d3d12/resource-d3d12.h @@ -98,10 +98,10 @@ an interface can return a D3D12ResourceBase, and a client cant manipulate it's s struct D3D12ResourceBase { /// Add a transition if necessary to the list - void transition(D3D12_RESOURCE_STATES nextState, D3D12BarrierSubmitter& submitter); - /// Get the current state - SLANG_FORCE_INLINE D3D12_RESOURCE_STATES getState() const { return m_state; } - + void transition( + D3D12_RESOURCE_STATES currentState, + D3D12_RESOURCE_STATES nextState, + D3D12BarrierSubmitter& submitter); /// Get the associated resource SLANG_FORCE_INLINE ID3D12Resource* getResource() const { return m_resource; } @@ -111,20 +111,11 @@ struct D3D12ResourceBase /// Coercible into ID3D12Resource SLANG_FORCE_INLINE operator ID3D12Resource*() const { return m_resource; } - /// restore previous state -#if SLANG_ENABLE_CONSERVATIVE_RESOURCE_BARRIERS - SLANG_FORCE_INLINE Void restore(D3D12BarrierSubmitter& submitter) { transition(m_prevState, submitter); } -#else - SLANG_FORCE_INLINE void restore(D3D12BarrierSubmitter& submitter) { SLANG_UNUSED(submitter) } -#endif - /// Given the usage, flags, and format will return the most suitable format. Will return DXGI_UNKNOWN if combination is not possible static DXGI_FORMAT calcFormat(D3DUtil::UsageType usage, ID3D12Resource* resource); /// Ctor SLANG_FORCE_INLINE D3D12ResourceBase() : - m_state(D3D12_RESOURCE_STATE_COMMON), - m_prevState(D3D12_RESOURCE_STATE_COMMON), m_resource(nullptr) {} @@ -133,8 +124,6 @@ protected: ~D3D12ResourceBase() {} ID3D12Resource* m_resource; ///< The resource (ref counted) - D3D12_RESOURCE_STATES m_state; ///< The current tracked expected state, if all associated transitions have completed on ID3D12CommandList - D3D12_RESOURCE_STATES m_prevState; ///< The previous state }; struct D3D12Resource : public D3D12ResourceBase @@ -152,8 +141,8 @@ struct D3D12Resource : public D3D12ResourceBase /// Initialize as committed resource Slang::Result initCommitted(ID3D12Device* device, const D3D12_HEAP_PROPERTIES& heapProps, D3D12_HEAP_FLAGS heapFlags, const D3D12_RESOURCE_DESC& resourceDesc, D3D12_RESOURCE_STATES initState, const D3D12_CLEAR_VALUE * clearValue); - /// Set a resource with an initial state - void setResource(ID3D12Resource* resource, D3D12_RESOURCE_STATES initialState); + /// Set a resource. + void setResource(ID3D12Resource* resource); /// Make the resource null void setResourceNull(); /// Returns the attached resource (with any ref counts) and sets to nullptr on this. @@ -162,10 +151,6 @@ struct D3D12Resource : public D3D12ResourceBase /// Swaps the resource contents with the contents of the smart pointer void swap(Slang::ComPtr<ID3D12Resource>& resourceInOut); - /// Sets the current state of the resource (the current state is taken to be the future state once the command list has executed) - /// NOTE! This must be used with care, otherwise state tracking can be made incorrect. - void setState(D3D12_RESOURCE_STATES state); - /// Set the debug name on a resource static void setDebugName(ID3D12Resource* resource, const char* name); diff --git a/tools/gfx/immediate-renderer-base.cpp b/tools/gfx/immediate-renderer-base.cpp new file mode 100644 index 000000000..36d98f957 --- /dev/null +++ b/tools/gfx/immediate-renderer-base.cpp @@ -0,0 +1,551 @@ +#include "immediate-renderer-base.h" +#include "simple-render-pass-layout.h" +#include "command-writer.h" +#include "core/slang-basic.h" +#include "core/slang-blob.h" + +namespace gfx +{ +using Slang::RefPtr; +using Slang::List; +using Slang::ShortList; +using Slang::ListBlob; +using Slang::Index; +using Slang::RefObject; +using Slang::ComPtr; +using Slang::Guid; + +namespace +{ + +class CommandBufferImpl : public ICommandBuffer, public RefObject +{ +public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ICommandBuffer* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer) + return static_cast<ICommandBuffer*>(this); + return nullptr; + } + +public: + CommandWriter m_writer; + ImmediateRendererBase* m_renderer; + + void init(ImmediateRendererBase* renderer) + { + m_renderer = renderer; + } + + void reset() + { m_writer.clear(); + } + + class RenderCommandEncoderImpl + : public IRenderCommandEncoder + { + public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override + { + if (uuid == GfxGUID::IID_ISlangUnknown || uuid == GfxGUID::IID_IRenderCommandEncoder) + { + *outObject = static_cast<IRenderCommandEncoder*>(this); + return SLANG_OK; + } + *outObject = nullptr; + return SLANG_E_NO_INTERFACE; + } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } + + public: + CommandWriter* m_writer; + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {} + + void init(CommandBufferImpl* cmdBuffer, SimpleRenderPassLayout* renderPass, IFramebuffer* framebuffer) + { + m_writer = &cmdBuffer->m_writer; + + // Encode clear commands. + m_writer->setFramebuffer(framebuffer); + uint32_t clearMask = 0; + for (Index i = 0; i < renderPass->m_renderTargetAccesses.getCount(); i++) + { + auto& access = renderPass->m_renderTargetAccesses[i]; + // Clear. + if (access.loadOp == IRenderPassLayout::AttachmentLoadOp::Clear) + { + clearMask |= (1 << (uint32_t)i); + } + } + bool clearDepth = false; + bool clearStencil = false; + if (renderPass->m_hasDepthStencil) + { + // Clear. + if (renderPass->m_depthStencilAccess.loadOp == + IRenderPassLayout::AttachmentLoadOp::Clear) + { + clearDepth = true; + } + if (renderPass->m_depthStencilAccess.stencilLoadOp == + IRenderPassLayout::AttachmentLoadOp::Clear) + { + clearStencil = true; + } + } + m_writer->clearFrame(clearMask, clearDepth, clearStencil); + } + + virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) + { + m_writer->setPipelineState(state); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + bindRootShaderObject(IShaderObject* object) + { + m_writer->bindRootShaderObject(PipelineType::Graphics, object); + } + + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) + { + m_writer->setDescriptorSet(PipelineType::Graphics, layout, index, descriptorSet); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setViewports(uint32_t count, const Viewport* viewports) + { + m_writer->setViewports(count, viewports); + } + virtual SLANG_NO_THROW void SLANG_MCALL + setScissorRects(uint32_t count, const ScissorRect* scissors) + { + m_writer->setScissorRects(count, scissors); + } + virtual SLANG_NO_THROW void SLANG_MCALL setPrimitiveTopology(PrimitiveTopology topology) + { + m_writer->setPrimitiveTopology(topology); + } + virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( + UInt startSlot, + UInt slotCount, + IBufferResource* const* buffers, + const UInt* strides, + const UInt* offsets) + { + m_writer->setVertexBuffers(startSlot, slotCount, buffers, strides, offsets); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) + { + m_writer->setIndexBuffer(buffer, indexFormat, offset); + } + + virtual SLANG_NO_THROW void SLANG_MCALL draw(UInt vertexCount, UInt startVertex) + { + m_writer->draw(vertexCount, startVertex); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) + { + m_writer->drawIndexed(indexCount, startIndex, baseVertex); + } + + virtual SLANG_NO_THROW void SLANG_MCALL setStencilReference(uint32_t referenceValue) + { + m_writer->setStencilReference(referenceValue); + } + }; + + RenderCommandEncoderImpl m_renderCommandEncoder; + virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( + IRenderPassLayout* renderPass, + IFramebuffer* framebuffer, + IRenderCommandEncoder** outEncoder) override + { + m_renderCommandEncoder.init( + this, + static_cast<SimpleRenderPassLayout*>(renderPass), + framebuffer); + *outEncoder = &m_renderCommandEncoder; + } + + class ComputeCommandEncoderImpl + : public IComputeCommandEncoder + { + public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override + { + if (uuid == GfxGUID::IID_ISlangUnknown || uuid == GfxGUID::IID_IComputeCommandEncoder) + { + *outObject = static_cast<IComputeCommandEncoder*>(this); + return SLANG_OK; + } + *outObject = nullptr; + return SLANG_E_NO_INTERFACE; + } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } + + public: + CommandWriter* m_writer; + + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override + { + } + + void init(CommandBufferImpl* cmdBuffer) + { + m_writer = &cmdBuffer->m_writer; + } + + virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override + { + m_writer->setPipelineState(state); + } + virtual SLANG_NO_THROW void SLANG_MCALL + bindRootShaderObject(IShaderObject* object) override + { + m_writer->bindRootShaderObject(PipelineType::Compute, object); + } + + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) override + { + m_writer->setDescriptorSet(PipelineType::Compute, layout, index, descriptorSet); + } + + virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override + { + m_writer->dispatchCompute(x, y, z); + } + }; + + ComputeCommandEncoderImpl m_computeCommandEncoder; + virtual SLANG_NO_THROW void SLANG_MCALL + encodeComputeCommands(IComputeCommandEncoder** outEncoder) override + { + m_computeCommandEncoder.init(this); + *outEncoder = &m_computeCommandEncoder; + } + + class ResourceCommandEncoderImpl + : public IResourceCommandEncoder + { + public: + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + queryInterface(SlangUUID const& uuid, void** outObject) override + { + if (uuid == GfxGUID::IID_ISlangUnknown || uuid == GfxGUID::IID_IResourceCommandEncoder) + { + *outObject = static_cast<IResourceCommandEncoder*>(this); + return SLANG_OK; + } + *outObject = nullptr; + return SLANG_E_NO_INTERFACE; + } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() { return 1; } + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() { return 1; } + + public: + CommandWriter* m_writer; + + void init(CommandBufferImpl* cmdBuffer) + { + m_writer = &cmdBuffer->m_writer; + } + + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {} + virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) override + { + m_writer->copyBuffer(dst, dstOffset, src, srcOffset, size); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data) + { + m_writer->uploadBufferData(dst, offset, size, data); + } + }; + + ResourceCommandEncoderImpl m_resourceCommandEncoder; + + virtual SLANG_NO_THROW void SLANG_MCALL + encodeResourceCommands(IResourceCommandEncoder** outEncoder) override + { + m_resourceCommandEncoder.init(this); + *outEncoder = &m_resourceCommandEncoder; + } + + virtual SLANG_NO_THROW void SLANG_MCALL close() override { } + + void execute() + { + for (auto& cmd : m_writer.m_commands) + { + auto name = cmd.name; + switch (name) + { + case CommandName::SetPipelineState: + m_renderer->_setPipelineState(m_writer.getObject<IPipelineState>(cmd.operands[0])); + break; + case CommandName::BindRootShaderObject: + m_renderer->bindRootShaderObject( + (PipelineType)cmd.operands[0], + m_writer.getObject<IShaderObject>(cmd.operands[1])); + break; + case CommandName::SetDescriptorSet: + m_renderer->setDescriptorSet( + (gfx::PipelineType)cmd.operands[0], + m_writer.getObject<IPipelineLayout>(cmd.operands[1]), + (UInt)cmd.operands[2], + m_writer.getObject<IDescriptorSet>(cmd.operands[3])); + break; + case CommandName::SetFramebuffer: + m_renderer->setFramebuffer(m_writer.getObject<IFramebuffer>(cmd.operands[0])); + break; + case CommandName::ClearFrame: + m_renderer->clearFrame( + cmd.operands[0], (cmd.operands[1] != 0), (cmd.operands[2] != 0)); + break; + case CommandName::SetViewports: + m_renderer->setViewports( + (UInt)cmd.operands[0], m_writer.getData<Viewport>(cmd.operands[1])); + break; + case CommandName::SetScissorRects: + m_renderer->setScissorRects( + (UInt)cmd.operands[0], m_writer.getData<ScissorRect>(cmd.operands[1])); + break; + case CommandName::SetPrimitiveTopology: + m_renderer->setPrimitiveTopology((PrimitiveTopology)cmd.operands[0]); + break; + case CommandName::SetVertexBuffers: + { + ShortList<IBufferResource*> bufferResources; + for (uint32_t i = 0; i < cmd.operands[1]; i++) + { + bufferResources.add( + m_writer.getObject<IBufferResource>(cmd.operands[2] + i)); + } + m_renderer->setVertexBuffers( + (UInt)cmd.operands[0], + (UInt)cmd.operands[1], + bufferResources.getArrayView().getBuffer(), + m_writer.getData<UInt>(cmd.operands[3]), + m_writer.getData<UInt>(cmd.operands[4])); + } + break; + case CommandName::SetIndexBuffer: + m_renderer->setIndexBuffer( + m_writer.getObject<IBufferResource>(cmd.operands[0]), + (Format)cmd.operands[1], + (UInt)cmd.operands[2]); + break; + case CommandName::Draw: + m_renderer->draw((UInt)cmd.operands[0], (UInt)cmd.operands[1]); + break; + case CommandName::DrawIndexed: + m_renderer->drawIndexed( + (UInt)cmd.operands[0], (UInt)cmd.operands[1], (UInt)cmd.operands[2]); + break; + case CommandName::SetStencilReference: + m_renderer->setStencilReference(cmd.operands[0]); + break; + case CommandName::DispatchCompute: + m_renderer->dispatchCompute( + int(cmd.operands[0]), int(cmd.operands[1]), int(cmd.operands[2])); + break; + case CommandName::UploadBufferData: + m_renderer->uploadBufferData( + m_writer.getObject<IBufferResource>(cmd.operands[0]), + cmd.operands[1], + cmd.operands[2], + m_writer.getData<uint8_t>(cmd.operands[3])); + break; + case CommandName::CopyBuffer: + m_renderer->copyBuffer( + m_writer.getObject<IBufferResource>(cmd.operands[0]), + cmd.operands[1], + m_writer.getObject<IBufferResource>(cmd.operands[2]), + cmd.operands[3], + cmd.operands[4]); + break; + default: + assert(!"unknown command"); + break; + } + } + m_writer.clear(); + } +}; + +class CommandQueueImpl + : public ICommandQueue + , public RefObject +{ +public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ICommandQueue* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue) + return static_cast<ICommandQueue*>(this); + return nullptr; + } + +public: + ImmediateRendererBase* m_renderer; + ICommandQueue::Desc m_desc; + + CommandQueueImpl(ImmediateRendererBase* renderer) + : m_renderer(renderer) + { + m_desc.type = ICommandQueue::QueueType::Graphics; + } + + ~CommandQueueImpl() + { + m_renderer->m_queueCreateCount--; + } + + virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override { return m_desc; } + + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandBuffer(ICommandBuffer** outCommandBuffer) override + { + RefPtr<CommandBufferImpl> newCmdBuffer = new CommandBufferImpl(); + newCmdBuffer->init(m_renderer); + *outCommandBuffer = newCmdBuffer.detach(); + return SLANG_OK; + } + + virtual SLANG_NO_THROW void SLANG_MCALL + executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override + { + for (uint32_t i = 0; i < count; i++) + { + static_cast<CommandBufferImpl*>(commandBuffers[i])->execute(); + } + } + + virtual SLANG_NO_THROW void SLANG_MCALL wait() override + { + m_renderer->waitForGpu(); + } +}; +} + + +ImmediateRendererBase::ImmediateRendererBase() { + m_queue = new CommandQueueImpl(this); +} + +void ImmediateRendererBase::bindRootShaderObject(PipelineType pipelineType, IShaderObject* shaderObject) +{ + class ImmediateCommandEncoder : public GraphicsComputeCommandEncoderBase + { + public: + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSetImpl( + PipelineType pipelineType, + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) override + { + auto renderer = static_cast<ImmediateRendererBase*>(m_rendererBase); + renderer->setDescriptorSet(pipelineType, layout, index, descriptorSet); + } + + virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferDataImpl( + IBufferResource* buffer, + size_t offset, + size_t size, + void* data) override + { + auto renderer = static_cast<ImmediateRendererBase*>(m_rendererBase); + renderer->uploadBufferData(buffer, offset, size, data); + } + }; + ImmediateCommandEncoder encoder; + encoder.m_rendererBase = this; + encoder.m_currentPipeline = static_cast<PipelineStateBase*>(m_currentPipelineState.get()); + encoder.bindRootShaderObjectImpl(pipelineType, shaderObject); + _setPipelineState(encoder.m_currentPipeline); +} + +SLANG_NO_THROW Result SLANG_MCALL ImmediateRendererBase::createCommandQueue( + const ICommandQueue::Desc& desc, + ICommandQueue** outQueue) +{ + SLANG_UNUSED(desc); + // Only one queue is supported. + if (m_queueCreateCount != 0) + return SLANG_FAIL; + *outQueue = m_queue.get(); + m_queue->addRef(); + return SLANG_OK; +} + +SLANG_NO_THROW Result SLANG_MCALL ImmediateRendererBase::createRenderPassLayout( + const IRenderPassLayout::Desc& desc, + IRenderPassLayout** outRenderPassLayout) +{ + RefPtr<SimpleRenderPassLayout> renderPass = new SimpleRenderPassLayout(); + renderPass->init(desc); + *outRenderPassLayout = renderPass.detach(); + return SLANG_OK; +} + +void ImmediateRendererBase::_setPipelineState(IPipelineState* state) +{ + PipelineStateBase* pipelineImpl = static_cast<PipelineStateBase*>(state); + if (!pipelineImpl->isSpecializable) + { + setPipelineState(state); + } + m_currentPipelineState = state; +} + +void ImmediateRendererBase::uploadBufferData( + IBufferResource* dst, + size_t offset, + size_t size, + void* data) +{ + auto buffer = map(dst, gfx::MapFlavor::WriteDiscard); + memcpy((uint8_t*)buffer + offset, data, size); + unmap(dst); +} + +SLANG_NO_THROW SlangResult SLANG_MCALL ImmediateRendererBase::readBufferResource( + IBufferResource* buffer, + size_t offset, + size_t size, + ISlangBlob** outBlob) +{ + RefPtr<ListBlob> blob = new ListBlob(); + blob->m_data.setCount((Index)size); + auto content = (uint8_t*)map(buffer, gfx::MapFlavor::HostRead); + if (!content) + return SLANG_FAIL; + memcpy(blob->m_data.getBuffer(), content + offset, size); + unmap(buffer); + *outBlob = blob.detach(); + return SLANG_OK; +} + +} diff --git a/tools/gfx/immediate-renderer-base.h b/tools/gfx/immediate-renderer-base.h new file mode 100644 index 000000000..477c25b13 --- /dev/null +++ b/tools/gfx/immediate-renderer-base.h @@ -0,0 +1,89 @@ +// immediate-renderer-base.h +#pragma once + +// Provides shared implementation of public API objects for targets with +// an immediate mode execution context. + +#include "render-graphics-common.h" + +namespace gfx +{ + +enum class MapFlavor +{ + Unknown, ///< Unknown mapping type + HostRead, + HostWrite, + WriteDiscard, +}; + +class ImmediateRendererBase : public GraphicsAPIRenderer +{ +private: + ComPtr<IPipelineState> m_currentPipelineState; + +public: + // Immediate commands to be implemented by each target. + virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( + PipelineType pipelineType, + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL setFramebuffer(IFramebuffer* frameBuffer) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL clearFrame(uint32_t colorBufferMask, bool clearDepth, bool clearStencil) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL setViewports(UInt count, const Viewport* viewports) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL + setScissorRects(UInt count, const ScissorRect* scissors) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL setPrimitiveTopology(PrimitiveTopology topology) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( + UInt startSlot, + UInt slotCount, + IBufferResource* const* buffers, + const UInt* strides, + const UInt* offsets) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL + setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset = 0) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL draw(UInt vertexCount, UInt startVertex = 0) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL + drawIndexed(UInt indexCount, UInt startIndex = 0, UInt baseVertex = 0) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL setStencilReference(uint32_t referenceValue) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL submitGpuWork() = 0; + virtual SLANG_NO_THROW void SLANG_MCALL waitForGpu() = 0; + virtual void* map(IBufferResource* buffer, MapFlavor flavor) = 0; + virtual void unmap(IBufferResource* buffer) = 0; + void bindRootShaderObject(PipelineType pipelineType, IShaderObject* shaderObject); + +public: + Slang::ComPtr<ICommandQueue> m_queue; + uint32_t m_queueCreateCount = 0; + + ImmediateRendererBase(); + + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( + const IRenderPassLayout::Desc& desc, + IRenderPassLayout** outRenderPassLayout) override; + + void _setPipelineState(IPipelineState* state); + + void uploadBufferData( + IBufferResource* dst, + size_t offset, + size_t size, void* data); + + virtual SLANG_NO_THROW SlangResult SLANG_MCALL readBufferResource( + IBufferResource* buffer, + size_t offset, + size_t size, + ISlangBlob** outBlob) override; +}; +} diff --git a/tools/gfx/open-gl/render-gl.cpp b/tools/gfx/open-gl/render-gl.cpp index 34ea70eef..01f285f65 100644 --- a/tools/gfx/open-gl/render-gl.cpp +++ b/tools/gfx/open-gl/render-gl.cpp @@ -3,8 +3,7 @@ #include "../nvapi/nvapi-util.h" -#include "../renderer-shared.h" -#include "../render-graphics-common.h" +#include "../immediate-renderer-base.h" #include "core/slang-basic.h" #include "core/slang-blob.h" @@ -52,6 +51,7 @@ F(glGenBuffers, PFNGLGENBUFFERSPROC) \ F(glBindBuffer, PFNGLBINDBUFFERPROC) \ F(glBufferData, PFNGLBUFFERDATAPROC) \ + F(glCopyBufferSubData, PFNGLCOPYBUFFERSUBDATAPROC) \ F(glDeleteBuffers, PFNGLDELETEBUFFERSPROC) \ F(glMapBuffer, PFNGLMAPBUFFERPROC) \ F(glUnmapBuffer, PFNGLUNMAPBUFFERPROC) \ @@ -71,6 +71,7 @@ F(glGenFramebuffers, PFNGLGENFRAMEBUFFERSPROC) \ F(glDeleteFramebuffers, PFNGLDELETEFRAMEBUFFERSPROC) \ F(glBindFramebuffer, PFNGLBINDFRAMEBUFFERPROC) \ + F(glDrawBuffers, PFNGLDRAWBUFFERSPROC) \ F(glFramebufferTexture2D, PFNGLFRAMEBUFFERTEXTURE2DPROC) \ F(glFramebufferTextureLayer, PFNGLFRAMEBUFFERTEXTURELAYERPROC) \ F(glBlitFramebuffer, PFNGLBLITFRAMEBUFFERPROC) \ @@ -87,20 +88,12 @@ using namespace Slang; namespace gfx { -class GLRenderer : public GraphicsAPIRenderer +class GLRenderer : public ImmediateRendererBase { public: // Renderer implementation virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; - virtual SLANG_NO_THROW void SLANG_MCALL setClearColor(const float color[4]) override; - virtual SLANG_NO_THROW void SLANG_MCALL clearFrame() override; - virtual SLANG_NO_THROW void SLANG_MCALL beginFrame() override; - virtual SLANG_NO_THROW void SLANG_MCALL endFrame() override; - virtual SLANG_NO_THROW void SLANG_MCALL - makeSwapchainImagePresentable(ISwapchain* swapchain) override - { - SLANG_UNUSED(swapchain); - } + virtual SLANG_NO_THROW void SLANG_MCALL clearFrame(uint32_t mask, bool clearDepth, bool clearStencil) override; virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override; virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout( @@ -108,6 +101,7 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffer** outFramebuffer) override; virtual SLANG_NO_THROW void SLANG_MCALL setFramebuffer(IFramebuffer* frameBuffer) override; + virtual SLANG_NO_THROW void SLANG_MCALL setStencilReference(uint32_t referenceValue) override; virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource( IResource::Usage initialUsage, @@ -146,11 +140,17 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState( const ComputePipelineStateDesc& desc, IPipelineState** outState) override; - virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( - ITextureResource* texture, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) override; - - virtual SLANG_NO_THROW void* SLANG_MCALL map(IBufferResource* buffer, MapFlavor flavor) override; - virtual SLANG_NO_THROW void SLANG_MCALL unmap(IBufferResource* buffer) override; + virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) override; + virtual SLANG_NO_THROW Result SLANG_MCALL readTextureResource( + ITextureResource* texture, ResourceState state, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) override; + + virtual void* map(IBufferResource* buffer, MapFlavor flavor) override; + virtual void unmap(IBufferResource* buffer) override; virtual SLANG_NO_THROW void SLANG_MCALL setPrimitiveTopology(PrimitiveTopology topology) override; @@ -183,10 +183,6 @@ public: { return RendererType::OpenGl; } - virtual PipelineStateBase* getCurrentPipeline() override - { - return m_currentPipelineState.Ptr(); - } HGLRC createGLContext(HDC hdc); GLRenderer(); ~GLRenderer(); @@ -354,9 +350,14 @@ public: public: GLuint m_framebuffer; + ShortList<GLenum> m_drawBuffers; WeakSink<GLRenderer>* m_renderer; ShortList<RefPtr<TextureViewImpl>> renderTargetViews; RefPtr<TextureViewImpl> depthStencilView; + ShortList<ColorClearValue> m_colorClearValues; + bool m_sameClearValues = true; + DepthStencilClearValue m_depthStencilClearValue; + FramebufferImpl(WeakSink<GLRenderer>* renderer) :m_renderer(renderer) {} ~FramebufferImpl() { @@ -370,11 +371,28 @@ public: auto renderer = m_renderer->get(); renderer->glGenFramebuffers(1, &m_framebuffer); renderer->glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer); + m_drawBuffers.clear(); + m_colorClearValues.clear(); for (Index i = 0; i < renderTargetViews.getCount(); i++) { auto rtv = renderTargetViews[i].Ptr(); renderer->glFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + (uint32_t)i, GL_TEXTURE_2D, rtv->m_textureID, 0); + m_drawBuffers.add((GLenum)(GL_COLOR_ATTACHMENT0 + i)); + m_colorClearValues.add(rtv->m_resource->getDesc()->optimalClearValue.color); + } + m_sameClearValues = true; + for (Index i = 1; i < m_colorClearValues.getCount() && m_sameClearValues; i++) + { + for (int j = 0; j < 4; j++) + { + if (m_colorClearValues[i].floatValues[j] != + m_colorClearValues[0].floatValues[j]) + { + m_sameClearValues = false; + break; + } + } } if (depthStencilView) { @@ -384,6 +402,8 @@ public: GL_TEXTURE_2D, depthStencilView->m_textureID, 0); + m_depthStencilClearValue = + depthStencilView->m_resource->getDesc()->optimalClearValue.depthStencil; } auto error = renderer->glCheckFramebufferStatus(GL_FRAMEBUFFER); if (error != GL_FRAMEBUFFER_COMPLETE) @@ -518,7 +538,7 @@ public: { ConstantBuffer, CombinedTextureSampler, - + StorageBuffer, CountOf, }; @@ -595,6 +615,7 @@ public: RefPtr<DescriptorSetLayoutImpl> m_layout; List<RefPtr<BufferResourceImpl>> m_constantBuffers; + List<RefPtr<BufferResourceImpl>> m_storageBuffers; List<RefPtr<TextureViewImpl>> m_textures; List<RefPtr<SamplerStateImpl>> m_samplers; }; @@ -622,8 +643,6 @@ public: class PipelineStateImpl : public PipelineStateBase { public: - RefPtr<ShaderProgramImpl> m_program; - RefPtr<PipelineLayoutImpl> m_pipelineLayout; RefPtr<InputLayoutImpl> m_inputLayout; void init(const GraphicsPipelineStateDesc& inDesc) { @@ -674,7 +693,8 @@ public: HDC m_hdc; HGLRC m_glContext = 0; - float m_clearColor[4] = { 0, 0, 0, 0 }; + uint32_t m_stencilRef = 0; + GLuint m_vao; RefPtr<PipelineStateImpl> m_currentPipelineState; RefPtr<FramebufferImpl> m_currentFramebuffer; @@ -791,7 +811,11 @@ void GLRenderer::flushStateForDraw() if (m_currentFramebuffer) { glBindFramebuffer(GL_FRAMEBUFFER, m_currentFramebuffer->m_framebuffer); + glDrawBuffers( + (GLsizei)m_currentFramebuffer->m_drawBuffers.getCount(), + m_currentFramebuffer->m_drawBuffers.getArrayView().getBuffer()); } + glBindVertexArray(m_vao); auto inputLayout = m_currentPipelineState->m_inputLayout.Ptr(); auto attrCount = Index(inputLayout->m_attributeCount); @@ -818,7 +842,8 @@ void GLRenderer::flushStateForDraw() glDisableVertexAttribArray((GLuint)ii); } // Next bind the descriptor sets as required by the layout - auto pipelineLayout = m_currentPipelineState->m_pipelineLayout; + auto pipelineLayout = + static_cast<PipelineLayoutImpl*>(m_currentPipelineState->m_pipelineLayout.get()); auto descriptorSetCount = pipelineLayout->m_sets.getCount(); for(Index ii = 0; ii < descriptorSetCount; ++ii) { @@ -843,6 +868,19 @@ void GLRenderer::flushStateForDraw() } } + { + // Then we will bind any storage buffers that were specified. + + auto slotTypeIndex = int(GLDescriptorSlotType::StorageBuffer); + auto count = descriptorSetLayout->m_counts[slotTypeIndex]; + auto baseIndex = descriptorSetInfo.baseArrayIndex[slotTypeIndex]; + + for (Int ii = 0; ii < count; ++ii) + { + auto bufferImpl = descriptorSet->m_storageBuffers[ii]; + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, GLuint(ii), bufferImpl->m_handle); + } + } { // Next we will bind any combined texture/sampler slots. @@ -1179,21 +1217,71 @@ SLANG_NO_THROW Result SLANG_MCALL GLRenderer::initialize(const Desc& desc) return SLANG_OK; } -SLANG_NO_THROW void SLANG_MCALL GLRenderer::setClearColor(const float color[4]) -{ - glClearColor(color[0], color[1], color[2], color[3]); -} - -SLANG_NO_THROW void SLANG_MCALL GLRenderer::clearFrame() -{ - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); -} - -SLANG_NO_THROW void SLANG_MCALL GLRenderer::beginFrame() { } - -SLANG_NO_THROW void SLANG_MCALL GLRenderer::endFrame() +SLANG_NO_THROW void SLANG_MCALL + GLRenderer::clearFrame(uint32_t mask, bool clearDepth, bool clearStencil) { - glFlush(); + uint32_t clearMask = 0; + if (clearDepth) + { + clearMask |= GL_DEPTH_BUFFER_BIT; + glClearDepth(m_currentFramebuffer->m_depthStencilClearValue.depth); + } + if (clearStencil) + { + clearMask |= GL_STENCIL_BUFFER_BIT; + glClearStencil(m_currentFramebuffer->m_depthStencilClearValue.stencil); + } + if (clearMask) + { + // If clear value for all attachments are the same, issue one `glClear` command. + if (m_currentFramebuffer->m_sameClearValues && + m_currentFramebuffer->m_colorClearValues.getCount() > 0) + { + ShortList<GLenum> clearBuffers; + auto clearColor = m_currentFramebuffer->m_colorClearValues[0]; + glClearColor( + clearColor.floatValues[0], + clearColor.floatValues[1], + clearColor.floatValues[2], + clearColor.floatValues[3]); + for (Index i = 0; i < m_currentFramebuffer->m_colorClearValues.getCount(); i++) + { + if (mask & uint32_t(1 << i)) + clearBuffers.add(GLenum(GL_COLOR_ATTACHMENT0 + i)); + } + if (clearBuffers.getCount()) + { + glDrawBuffers((GLsizei)clearBuffers.getCount(), clearBuffers.getArrayView().getBuffer()); + clearMask |= GL_COLOR_BUFFER_BIT; + } + glClear(clearMask); + glDrawBuffers( + (GLsizei)m_currentFramebuffer->m_drawBuffers.getCount(), + m_currentFramebuffer->m_drawBuffers.getArrayView().getBuffer()); + return; + } + // If clear values are different, clear attachments separately. + for (Index i = 0; i < m_currentFramebuffer->m_colorClearValues.getCount(); i++) + { + if (mask & uint32_t(1 << i)) + { + GLenum drawBuffer = GLenum(GL_COLOR_ATTACHMENT0 + i); + glDrawBuffers(1, &drawBuffer); + auto clearColor = m_currentFramebuffer->m_colorClearValues[i]; + glClearColor( + clearColor.floatValues[0], + clearColor.floatValues[1], + clearColor.floatValues[2], + clearColor.floatValues[3]); + glClear(GL_COLOR_BUFFER_BIT); + } + } + // Clear depth/stencil attachments. + glClear(clearMask); + glDrawBuffers( + (GLsizei)m_currentFramebuffer->m_drawBuffers.getCount(), + m_currentFramebuffer->m_drawBuffers.getArrayView().getBuffer()); + } } SLANG_NO_THROW Result SLANG_MCALL GLRenderer::createSwapchain( @@ -1250,9 +1338,30 @@ SLANG_NO_THROW void SLANG_MCALL GLRenderer::setFramebuffer(IFramebuffer* frameBu m_currentFramebuffer = static_cast<FramebufferImpl*>(frameBuffer); } +void GLRenderer::setStencilReference(uint32_t referenceValue) +{ + m_stencilRef = referenceValue; + // TODO: actually set the stencil state. +} + +void GLRenderer::copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) +{ + auto dstImpl = static_cast<BufferResourceImpl*>(dst); + auto srcImpl = static_cast<BufferResourceImpl*>(src); + glBindBuffer(GL_COPY_READ_BUFFER, srcImpl->m_handle); + glBindBuffer(GL_COPY_WRITE_BUFFER, dstImpl->m_handle); + glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, srcOffset, dstOffset, size); +} + SLANG_NO_THROW Result SLANG_MCALL GLRenderer::readTextureResource( - ITextureResource* texture, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) + ITextureResource* texture, ResourceState state, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) { + SLANG_UNUSED(state); auto resource = static_cast<TextureResourceImpl*>(texture); auto size = resource->getDesc()->size; size_t requiredSize = size.width * size.height * sizeof(uint32_t); @@ -1585,7 +1694,7 @@ SLANG_NO_THROW Result SLANG_MCALL GLRenderer::createInputLayout( return SLANG_OK; } -SLANG_NO_THROW void* SLANG_MCALL GLRenderer::map(IBufferResource* bufferIn, MapFlavor flavor) +void* GLRenderer::map(IBufferResource* bufferIn, MapFlavor flavor) { BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(bufferIn); @@ -1608,7 +1717,7 @@ SLANG_NO_THROW void* SLANG_MCALL GLRenderer::map(IBufferResource* bufferIn, MapF return glMapBuffer(buffer->m_target, access); } -SLANG_NO_THROW void SLANG_MCALL GLRenderer::unmap(IBufferResource* bufferIn) +void GLRenderer::unmap(IBufferResource* bufferIn) { BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(bufferIn); glUnmapBuffer(buffer->m_target); @@ -1695,11 +1804,11 @@ SLANG_NO_THROW void SLANG_MCALL GLRenderer::setScissorRects(UInt count, ScissorR SLANG_NO_THROW void SLANG_MCALL GLRenderer::setPipelineState(IPipelineState* state) { - auto pipelineStateImpl = (PipelineStateImpl*) state; + auto pipelineStateImpl = static_cast<PipelineStateImpl*>(state); m_currentPipelineState = pipelineStateImpl; - auto program = pipelineStateImpl->m_program; + auto program = static_cast<ShaderProgramImpl*>(pipelineStateImpl->m_program.get()); GLuint programID = program ? program->m_id : 0; glUseProgram(programID); } @@ -1737,13 +1846,21 @@ SLANG_NO_THROW void SLANG_MCALL GLRenderer::DescriptorSetImpl::setConstantBuffer SLANG_NO_THROW void SLANG_MCALL GLRenderer::DescriptorSetImpl::setResource(UInt range, UInt index, IResourceView* view) { - auto viewImpl = (ResourceViewImpl*) view; + auto viewImpl = static_cast<ResourceViewImpl*>(view); auto layout = m_layout; auto rangeInfo = layout->m_ranges[range]; auto arrayIndex = rangeInfo.arrayIndex + index; - assert(!"unimplemented"); + switch (rangeInfo.type) + { + case GLDescriptorSlotType::StorageBuffer: + m_storageBuffers[arrayIndex] = static_cast<BufferViewImpl*>(viewImpl)->m_resource; + break; + default: + assert(!"unimplemented"); + break; + } } SLANG_NO_THROW void SLANG_MCALL @@ -1808,9 +1925,9 @@ SLANG_NO_THROW Result SLANG_MCALL GLRenderer::createDescriptorSetLayout( assert(!"unsupported"); break; - // TODO: There are many other slot types we could support here, - // in particular including storage buffers. - + case DescriptorSlotType::StorageBuffer: + glSlotType = GLDescriptorSlotType::StorageBuffer; + break; case DescriptorSlotType::CombinedImageSampler: glSlotType = GLDescriptorSlotType::CombinedTextureSampler; break; @@ -1888,7 +2005,11 @@ SLANG_NO_THROW Result SLANG_MCALL auto slotCount = layoutImpl->m_counts[slotTypeIndex]; descriptorSetImpl->m_constantBuffers.setCount(slotCount); } - + { + auto slotTypeIndex = int(GLDescriptorSlotType::StorageBuffer); + auto slotCount = layoutImpl->m_counts[slotTypeIndex]; + descriptorSetImpl->m_storageBuffers.setCount(slotCount); + } { auto slotTypeIndex = int(GLDescriptorSlotType::CombinedTextureSampler); auto slotCount = layoutImpl->m_counts[slotTypeIndex]; @@ -1978,12 +2099,9 @@ Result GLRenderer::createGraphicsPipelineState(const GraphicsPipelineStateDesc& preparePipelineDesc(desc); auto programImpl = (ShaderProgramImpl*) desc.program; - auto pipelineLayoutImpl = (PipelineLayoutImpl*) desc.pipelineLayout; auto inputLayoutImpl = (InputLayoutImpl*) desc.inputLayout; RefPtr<PipelineStateImpl> pipelineStateImpl = new PipelineStateImpl(); - pipelineStateImpl->m_program = programImpl; - pipelineStateImpl->m_pipelineLayout = pipelineLayoutImpl; pipelineStateImpl->m_inputLayout = inputLayoutImpl; pipelineStateImpl->init(desc); *outState = pipelineStateImpl.detach(); @@ -2008,3 +2126,4 @@ Result GLRenderer::createComputePipelineState(const ComputePipelineStateDesc& in } // renderer_test + diff --git a/tools/gfx/render-graphics-common.cpp b/tools/gfx/render-graphics-common.cpp index 7bdaddf73..2e460982e 100644 --- a/tools/gfx/render-graphics-common.cpp +++ b/tools/gfx/render-graphics-common.cpp @@ -644,27 +644,29 @@ protected: m_renderTargetCount = fragmentEntryPoint.layout->getVaryingOutputs().getCount(); } - IPipelineLayout::Desc pipelineLayoutDesc; - - // HACK: we set `renderTargetCount` to zero here becasue otherwise the D3D12 - // render back-end will adjust all UAV registers by this value to account - // for the `SV_Target<N>` outputs implicitly consuming `u<N>` registers for - // Shader Model 5.0. - // - // When using the shader object path, all registers are being set via Slang - // reflection information, and we do not need/want the automatic adjustment. - // - // TODO: Once we eliminate the non-shader-object path, this whole issue should - // be moot, because the `ProgramLayout` should own/be the pipeline layout anyway. - // - pipelineLayoutDesc.renderTargetCount = 0; + if (m_program->getSpecializationParamCount() == 0) + { + IPipelineLayout::Desc pipelineLayoutDesc; - pipelineLayoutDesc.descriptorSetCount = pipelineDescriptorSets.getCount(); - pipelineLayoutDesc.descriptorSets = pipelineDescriptorSets.getBuffer(); + // HACK: we set `renderTargetCount` to zero here becasue otherwise the D3D12 + // render back-end will adjust all UAV registers by this value to account + // for the `SV_Target<N>` outputs implicitly consuming `u<N>` registers for + // Shader Model 5.0. + // + // When using the shader object path, all registers are being set via Slang + // reflection information, and we do not need/want the automatic adjustment. + // + // TODO: Once we eliminate the non-shader-object path, this whole issue should + // be moot, because the `ProgramLayout` should own/be the pipeline layout anyway. + // + pipelineLayoutDesc.renderTargetCount = 0; - SLANG_RETURN_ON_FAIL( - renderer->createPipelineLayout(pipelineLayoutDesc, m_pipelineLayout.writeRef())); + pipelineLayoutDesc.descriptorSetCount = pipelineDescriptorSets.getCount(); + pipelineLayoutDesc.descriptorSets = pipelineDescriptorSets.getBuffer(); + SLANG_RETURN_ON_FAIL( + renderer->createPipelineLayout(pipelineLayoutDesc, m_pipelineLayout.writeRef())); + } return SLANG_OK; } @@ -1082,7 +1084,8 @@ protected: } Result apply( - IRenderer* renderer, + RendererBase* renderer, + GraphicsComputeCommandEncoderBase* encoder, PipelineType pipelineType, IPipelineLayout* pipelineLayout, Index& ioRootIndex) @@ -1100,11 +1103,11 @@ protected: descriptorSets.add(descriptorSet); } - SLANG_RETURN_ON_FAIL(_bindIntoDescriptorSets(descriptorSets.getBuffer())); + SLANG_RETURN_ON_FAIL(_bindIntoDescriptorSets(encoder, descriptorSets.getBuffer())); for (auto descriptorSet : descriptorSets) { - renderer->setDescriptorSet(pipelineType, pipelineLayout, ioRootIndex++, descriptorSet); + encoder->setDescriptorSetImpl(pipelineType, pipelineLayout, ioRootIndex++, descriptorSet); } return SLANG_OK; @@ -1112,7 +1115,9 @@ protected: /// Write the uniform/ordinary data of this object into the given `dest` buffer at the given `offset` Result _writeOrdinaryData( - char* dest, + GraphicsComputeCommandEncoderBase* encoder, + IBufferResource* buffer, + size_t offset, size_t destSize, GraphicsCommonShaderObjectLayout* specializedLayout) { @@ -1121,7 +1126,7 @@ protected: SLANG_ASSERT(srcSize <= destSize); - memcpy(dest, src, srcSize); + encoder->uploadBufferDataImpl(buffer, offset, srcSize, src); // In the case where this object has any sub-objects of // existential/interface type, we need to recurse on those objects @@ -1197,7 +1202,7 @@ protected: auto subObjectOffset = subObjectRangePendingDataOffset + i*subObjectRangePendingDataStride; - subObject->_writeOrdinaryData(dest + subObjectOffset, destSize - subObjectOffset, subObjectLayout); + subObject->_writeOrdinaryData(encoder, buffer, offset + subObjectOffset, destSize - subObjectOffset, subObjectLayout); } } @@ -1211,7 +1216,7 @@ protected: size_t _getSubObjectRangePendingDataStride(GraphicsCommonShaderObjectLayout* specializedLayout, Index subObjectRangeIndex) { return 0; } /// Ensure that the `m_ordinaryDataBuffer` has been created, if it is needed - Result _ensureOrdinaryDataBufferCreatedIfNeeded() + Result _ensureOrdinaryDataBufferCreatedIfNeeded(GraphicsComputeCommandEncoderBase* encoder) { // If we have already created a buffer to hold ordinary data, then we should // simply re-use that buffer rather than re-create it. @@ -1259,15 +1264,17 @@ protected: // where this object contains interface/existential-type fields, so we // don't need or want to inline it into this call site. // - char* dest = (char*)renderer->map(m_ordinaryDataBuffer, MapFlavor::HostWrite); - SLANG_RETURN_ON_FAIL(_writeOrdinaryData(dest, specializedOrdinaryDataSize, specializedLayout)); - renderer->unmap(m_ordinaryDataBuffer); - + SLANG_RETURN_ON_FAIL(_writeOrdinaryData( + encoder, m_ordinaryDataBuffer, 0, specializedOrdinaryDataSize, specializedLayout)); return SLANG_OK; } /// Bind the buffer for ordinary/uniform data, if needed - Result _bindOrdinaryDataBufferIfNeeded(IDescriptorSet* descriptorSet, Index* ioBaseRangeIndex, Index subObjectRangeArrayIndex) + Result _bindOrdinaryDataBufferIfNeeded( + GraphicsComputeCommandEncoderBase* encoder, + IDescriptorSet* descriptorSet, + Index* ioBaseRangeIndex, + Index subObjectRangeArrayIndex) { // We are going to need to tweak the base binding range index // used for descriptor-set writes if and only if we actually @@ -1277,7 +1284,7 @@ protected: // We start by ensuring that the buffer is created, if it is needed. // - SLANG_RETURN_ON_FAIL(_ensureOrdinaryDataBufferCreatedIfNeeded()); + SLANG_RETURN_ON_FAIL(_ensureOrdinaryDataBufferCreatedIfNeeded(encoder)); // If we did indeed need/create a buffer, then we must bind it into // the given `descriptorSet` and update the base range index for @@ -1293,11 +1300,15 @@ protected: } Result _bindIntoDescriptorSet( - IDescriptorSet* descriptorSet, Index baseRangeIndex, Index subObjectRangeArrayIndex) + GraphicsComputeCommandEncoderBase* encoder, + IDescriptorSet* descriptorSet, + Index baseRangeIndex, + Index subObjectRangeArrayIndex) { GraphicsCommonShaderObjectLayout* layout = getLayout(); - _bindOrdinaryDataBufferIfNeeded(descriptorSet, &baseRangeIndex, subObjectRangeArrayIndex); + _bindOrdinaryDataBufferIfNeeded( + encoder, descriptorSet, &baseRangeIndex, subObjectRangeArrayIndex); for (auto bindingRangeInfo : layout->getBindingRanges()) { @@ -1373,12 +1384,12 @@ protected: } public: - virtual Result _bindIntoDescriptorSets(ComPtr<IDescriptorSet>* descriptorSets) + virtual Result _bindIntoDescriptorSets(GraphicsComputeCommandEncoderBase* encoder, ComPtr<IDescriptorSet>* descriptorSets) { GraphicsCommonShaderObjectLayout* layout = getLayout(); Index baseRangeIndex = 0; - _bindOrdinaryDataBufferIfNeeded(descriptorSets[0], &baseRangeIndex, 0); + _bindOrdinaryDataBufferIfNeeded(encoder, descriptorSets[0], &baseRangeIndex, 0); // Fill in the descriptor sets based on binding ranges // @@ -1396,7 +1407,7 @@ public: { GraphicsCommonShaderObject* subObject = m_objects[baseIndex + i]; - subObject->_bindIntoDescriptorSet(descriptorSet, rangeIndex, i); + subObject->_bindIntoDescriptorSet(encoder, descriptorSet, rangeIndex, i); } break; @@ -1428,7 +1439,7 @@ public: { GraphicsCommonShaderObject* subObject = m_objects[baseIndex + i]; - subObject->_bindIntoDescriptorSet(descriptorSet, rangeIndex, i); + subObject->_bindIntoDescriptorSet(encoder, descriptorSet, rangeIndex, i); } break; @@ -1541,12 +1552,12 @@ public: GraphicsCommonProgramLayout* getLayout() { return static_cast<GraphicsCommonProgramLayout*>(m_layout.Ptr()); } - void apply(IRenderer* renderer, PipelineType pipelineType) + void apply(RendererBase* renderer, GraphicsComputeCommandEncoderBase* encoder, PipelineType pipelineType) { - auto pipelineLayout = getLayout()->getPipelineLayout(); + auto pipelineLayout = encoder->m_currentPipeline->m_pipelineLayout.get(); Index rootIndex = 0; - GraphicsCommonShaderObject::apply(renderer, pipelineType, pipelineLayout, rootIndex); + GraphicsCommonShaderObject::apply(renderer, encoder, pipelineType, pipelineLayout, rootIndex); #if 0 @@ -1587,9 +1598,10 @@ public: } protected: - virtual Result _bindIntoDescriptorSets(ComPtr<IDescriptorSet>* descriptorSets) override + virtual Result _bindIntoDescriptorSets( + GraphicsComputeCommandEncoderBase* encoder, ComPtr<IDescriptorSet>* descriptorSets) override { - SLANG_RETURN_ON_FAIL(Super::_bindIntoDescriptorSets(descriptorSets)); + SLANG_RETURN_ON_FAIL(Super::_bindIntoDescriptorSets(encoder, descriptorSets)); auto entryPointCount = m_entryPoints.getCount(); for (Index i = 0; i < entryPointCount; ++i) @@ -1598,7 +1610,7 @@ protected: auto& entryPointInfo = getLayout()->getEntryPoint(i); SLANG_RETURN_ON_FAIL(entryPoint->_bindIntoDescriptorSet( - descriptorSets[0], entryPointInfo.rangeOffset, 0)); + encoder, descriptorSets[0], entryPointInfo.rangeOffset, 0)); } return SLANG_OK; @@ -1760,17 +1772,20 @@ Result GraphicsAPIRenderer::initProgramCommon( return SLANG_OK; } -Result SLANG_MCALL - GraphicsAPIRenderer::bindRootShaderObject(PipelineType pipelineType, IShaderObject* object) +Result GraphicsComputeCommandEncoderBase::bindRootShaderObjectImpl( + PipelineType pipelineType, + IShaderObject* object) { auto programVars = dynamic_cast<ProgramVars*>(object); if (!programVars) return SLANG_E_INVALID_HANDLE; - SLANG_RETURN_ON_FAIL(maybeSpecializePipeline(programVars)); - + RefPtr<PipelineStateBase> specializedPipeline; + SLANG_RETURN_ON_FAIL(m_rendererBase->maybeSpecializePipeline(m_currentPipeline, programVars, specializedPipeline)); + m_currentPipeline = specializedPipeline; + // Apply shader parameter bindings. - programVars->apply(this, pipelineType); + programVars->apply(m_rendererBase, this, pipelineType); return SLANG_OK; } diff --git a/tools/gfx/render-graphics-common.h b/tools/gfx/render-graphics-common.h index e41fc6ee1..96ab8e831 100644 --- a/tools/gfx/render-graphics-common.h +++ b/tools/gfx/render-graphics-common.h @@ -17,6 +17,23 @@ private: Slang::RefPtr<ShaderObjectLayoutBase> m_layout; }; +class GraphicsComputeCommandEncoderBase +{ +public: + RendererBase* m_rendererBase; + Slang::RefPtr<PipelineStateBase> m_currentPipeline; + + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSetImpl( + PipelineType pipelineType, + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferDataImpl( + IBufferResource* buffer, size_t offset, size_t size, void* data) = 0; + + Result bindRootShaderObjectImpl(PipelineType pipelineType, IShaderObject* object); +}; + class GraphicsAPIRenderer : public RendererBase { public: @@ -29,8 +46,6 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL createRootShaderObject( IShaderProgram* program, IShaderObject** outObject) SLANG_OVERRIDE; - virtual SLANG_NO_THROW Result SLANG_MCALL - bindRootShaderObject(PipelineType pipelineType, IShaderObject* object) SLANG_OVERRIDE; void preparePipelineDesc(GraphicsPipelineStateDesc& desc); void preparePipelineDesc(ComputePipelineStateDesc& desc); diff --git a/tools/gfx/renderer-shared.cpp b/tools/gfx/renderer-shared.cpp index e423cd7b6..1ad7e2aac 100644 --- a/tools/gfx/renderer-shared.cpp +++ b/tools/gfx/renderer-shared.cpp @@ -27,6 +27,14 @@ const Slang::Guid GfxGUID::IID_ITextureResource = SLANG_UUID_ITextureResource; const Slang::Guid GfxGUID::IID_IRenderer = SLANG_UUID_IRenderer; const Slang::Guid GfxGUID::IID_IShaderObject = SLANG_UUID_IShaderObject; +const Slang::Guid GfxGUID::IID_IRenderPassLayout = SLANG_UUID_IRenderPassLayout; +const Slang::Guid GfxGUID::IID_ICommandEncoder = SLANG_UUID_ICommandEncoder; +const Slang::Guid GfxGUID::IID_IRenderCommandEncoder = SLANG_UUID_IRenderCommandEncoder; +const Slang::Guid GfxGUID::IID_IComputeCommandEncoder = SLANG_UUID_IComputeCommandEncoder; +const Slang::Guid GfxGUID::IID_IResourceCommandEncoder = SLANG_UUID_IResourceCommandEncoder; +const Slang::Guid GfxGUID::IID_ICommandBuffer = SLANG_UUID_ICommandBuffer; +const Slang::Guid GfxGUID::IID_ICommandQueue = SLANG_UUID_ICommandQueue; + gfx::StageType translateStage(SlangStage slangStage) { switch (slangStage) @@ -235,6 +243,19 @@ void PipelineStateBase::initializeBase(const PipelineStateDesc& inDesc) auto program = desc.getProgram(); m_program = program; isSpecializable = (program->slangProgram && program->slangProgram->getSpecializationParamCount() != 0); + + switch (desc.type) + { + case PipelineType::Graphics: + m_pipelineLayout = inDesc.graphics.pipelineLayout; + break; + case PipelineType::Compute: + m_pipelineLayout = inDesc.compute.pipelineLayout; + break; + default: + assert(!"unknown pipeline type"); + break; + } } IRenderer* gfx::RendererBase::getInterface(const Guid& guid) @@ -246,7 +267,6 @@ IRenderer* gfx::RendererBase::getInterface(const Guid& guid) SLANG_NO_THROW Result SLANG_MCALL RendererBase::initialize(const Desc& desc) { - shaderCache.init(desc.shaderCacheFileSystem); return SLANG_OK; } @@ -339,71 +359,6 @@ ShaderComponentID ShaderCache::getComponentId(ComponentKey key) return resultId; } -void ShaderCache::init(ISlangFileSystem* cacheFileSystem) -{ - fileSystem = cacheFileSystem; - - ComPtr<ISlangBlob> indexFileBlob; - if (fileSystem && fileSystem->loadFile("index", indexFileBlob.writeRef()) == SLANG_OK) - { - UnownedStringSlice indexText = UnownedStringSlice(static_cast<const char*>(indexFileBlob->getBufferPointer())); - TokenReader reader = TokenReader(indexText); - auto componentCountInFileSystem = reader.ReadUInt(); - for (uint32_t i = 0; i < componentCountInFileSystem; i++) - { - OwningComponentKey key; - auto componentId = reader.ReadUInt(); - key.typeName = reader.ReadWord(); - key.specializationArgs.setCount(reader.ReadUInt()); - for (auto& arg : key.specializationArgs) - arg = reader.ReadUInt(); - componentIds[key] = componentId; - } - } -} - -void ShaderCache::writeToFileSystem(ISlangMutableFileSystem* outputFileSystem) -{ - StringBuilder indexBuilder; - indexBuilder << componentIds.Count() << Slang::EndLine; - for (auto id : componentIds) - { - indexBuilder << id.Value << " "; - indexBuilder << id.Key.typeName << " " << id.Key.specializationArgs.getCount(); - for (auto arg : id.Key.specializationArgs) - indexBuilder << " " << arg; - indexBuilder << Slang::EndLine; - } - outputFileSystem->saveFile("index", indexBuilder.getBuffer(), indexBuilder.getLength()); - for (auto& binary : shaderBinaries) - { - ComPtr<ISlangBlob> blob; - binary.Value->writeToBlob(blob.writeRef()); - outputFileSystem->saveFile(String(binary.Key).getBuffer(), blob->getBufferPointer(), blob->getBufferSize()); - } -} - -Slang::RefPtr<ShaderBinary> ShaderCache::tryLoadShaderBinary(ShaderComponentID componentId) -{ - Slang::ComPtr<ISlangBlob> entryBlob; - Slang::RefPtr<ShaderBinary> binary; - if (shaderBinaries.TryGetValue(componentId, binary)) - return binary; - - if (fileSystem && fileSystem->loadFile(String(componentId).getBuffer(), entryBlob.writeRef()) == SLANG_OK) - { - binary = new ShaderBinary(); - binary->loadFromBlob(entryBlob.get()); - return binary; - } - return nullptr; -} - -void ShaderCache::addShaderBinary(ShaderComponentID componentId, ShaderBinary* binary) -{ - shaderBinaries[componentId] = binary; -} - void ShaderCache::addSpecializedPipeline(PipelineKey key, Slang::ComPtr<IPipelineState> specializedPipeline) { specializedPipelines[key] = specializedPipeline; @@ -484,9 +439,13 @@ Result ShaderObjectBase::_getSpecializedShaderObjectType(ExtendedShaderObjectTyp return SLANG_OK; } -Result RendererBase::maybeSpecializePipeline(ShaderObjectBase* rootObject) +Result RendererBase::maybeSpecializePipeline( + PipelineStateBase* currentPipeline, + ShaderObjectBase* rootObject, + RefPtr<PipelineStateBase>& outNewPipeline) { - auto currentPipeline = getCurrentPipeline(); + outNewPipeline = static_cast<PipelineStateBase*>(currentPipeline); + auto pipelineType = currentPipeline->desc.type; if (currentPipeline->unspecializedPipelineState) currentPipeline = currentPipeline->unspecializedPipelineState; @@ -502,78 +461,32 @@ Result RendererBase::maybeSpecializePipeline(ShaderObjectBase* rootObject) pipelineKey.specializationArgs.addRange(specializationArgs.componentIDs); pipelineKey.updateHash(); - ComPtr<gfx::IPipelineState> specializedPipelineState = shaderCache.getSpecializedPipelineState(pipelineKey); + ComPtr<IPipelineState> specializedPipelineState = shaderCache.getSpecializedPipelineState(pipelineKey); // Try to find specialized pipeline from shader cache. if (!specializedPipelineState) { auto unspecializedProgram = static_cast<ShaderProgramBase*>(pipelineType == PipelineType::Compute ? currentPipeline->desc.compute.program : currentPipeline->desc.graphics.program); - List<RefPtr<ShaderBinary>> entryPointBinaries; auto unspecializedProgramLayout = unspecializedProgram->slangProgram->getLayout(); - for (SlangUInt i = 0; i < unspecializedProgramLayout->getEntryPointCount(); i++) + + ComPtr<slang::IComponentType> specializedComponentType; + ComPtr<slang::IBlob> diagnosticBlob; + auto compileRs = unspecializedProgram->slangProgram->specialize( + specializationArgs.components.getArrayView().getBuffer(), + specializationArgs.getCount(), + specializedComponentType.writeRef(), + diagnosticBlob.writeRef()); + if (compileRs != SLANG_OK) { - auto unspecializedEntryPoint = unspecializedProgramLayout->getEntryPointByIndex(i); - UnownedStringSlice entryPointName = UnownedStringSlice(unspecializedEntryPoint->getName()); - ComponentKey specializedKernelKey; - specializedKernelKey.typeName = entryPointName; - specializedKernelKey.specializationArgs.addRange(specializationArgs.componentIDs); - specializedKernelKey.updateHash(); - // If the pipeline is not created, check if the kernel binaries has been compiled. - auto specializedKernelComponentID = shaderCache.getComponentId(specializedKernelKey); - RefPtr<ShaderBinary> binary = shaderCache.tryLoadShaderBinary(specializedKernelComponentID); - if (!binary) - { - // If the specialized shader binary does not exist in cache, use slang to generate it. - entryPointBinaries.clear(); - ComPtr<slang::IComponentType> specializedComponentType; - ComPtr<slang::IBlob> diagnosticBlob; - auto result = unspecializedProgram->slangProgram->specialize(specializationArgs.components.getArrayView().getBuffer(), - specializationArgs.getCount(), specializedComponentType.writeRef(), diagnosticBlob.writeRef()); - - // TODO: print diagnostic message via debug output interface. - - if (result != SLANG_OK) - return result; - - // Cache specialized binaries. - auto programLayout = specializedComponentType->getLayout(); - for (SlangUInt j = 0; j < programLayout->getEntryPointCount(); j++) - { - auto entryPointLayout = programLayout->getEntryPointByIndex(j); - ComPtr<slang::IBlob> entryPointCode; - SLANG_RETURN_ON_FAIL(specializedComponentType->getEntryPointCode(j, 0, entryPointCode.writeRef(), diagnosticBlob.writeRef())); - binary = new ShaderBinary(); - binary->stage = gfx::translateStage(entryPointLayout->getStage()); - binary->entryPointName = entryPointLayout->getName(); - binary->source.addRange((uint8_t*)entryPointCode->getBufferPointer(), entryPointCode->getBufferSize()); - entryPointBinaries.add(binary); - shaderCache.addShaderBinary(specializedKernelComponentID, binary); - } - - // We have already obtained all kernel binaries from this program, so break out of the outer loop since we no longer - // need to examine the rest of the kernels. - break; - } - entryPointBinaries.add(binary); + printf("%s\n", (char*)diagnosticBlob->getBufferPointer()); + return SLANG_FAIL; } // Now create specialized shader program using compiled binaries. ComPtr<IShaderProgram> specializedProgram; IShaderProgram::Desc specializedProgramDesc = {}; - specializedProgramDesc.kernelCount = unspecializedProgramLayout->getEntryPointCount(); - ShortList<IShaderProgram::KernelDesc> kernelDescs; - kernelDescs.setCount(entryPointBinaries.getCount()); - for (Slang::Index i = 0; i < entryPointBinaries.getCount(); i++) - { - auto entryPoint = unspecializedProgramLayout->getEntryPointByIndex(i);; - auto& kernelDesc = kernelDescs[i]; - kernelDesc.stage = entryPointBinaries[i]->stage; - kernelDesc.entryPointName = entryPointBinaries[i]->entryPointName.getBuffer(); - kernelDesc.codeBegin = entryPointBinaries[i]->source.begin(); - kernelDesc.codeEnd = entryPointBinaries[i]->source.end(); - } - specializedProgramDesc.kernels = kernelDescs.getArrayView().getBuffer(); + specializedProgramDesc.slangProgram = specializedComponentType; specializedProgramDesc.pipelineType = pipelineType; SLANG_RETURN_ON_FAIL(createProgram(specializedProgramDesc, specializedProgram.writeRef())); @@ -601,7 +514,7 @@ Result RendererBase::maybeSpecializePipeline(ShaderObjectBase* rootObject) specializedPipelineStateBase->unspecializedPipelineState = currentPipeline; shaderCache.addSpecializedPipeline(pipelineKey, specializedPipelineState); } - setPipelineState(specializedPipelineState); + outNewPipeline = static_cast<PipelineStateBase*>(specializedPipelineState.get()); } return SLANG_OK; } diff --git a/tools/gfx/renderer-shared.h b/tools/gfx/renderer-shared.h index f16db900a..5846aad34 100644 --- a/tools/gfx/renderer-shared.h +++ b/tools/gfx/renderer-shared.h @@ -27,6 +27,13 @@ struct GfxGUID static const Slang::Guid IID_IRenderer; static const Slang::Guid IID_IShaderObjectLayout; static const Slang::Guid IID_IShaderObject; + static const Slang::Guid IID_IRenderPassLayout; + static const Slang::Guid IID_ICommandEncoder; + static const Slang::Guid IID_IRenderCommandEncoder; + static const Slang::Guid IID_IComputeCommandEncoder; + static const Slang::Guid IID_IResourceCommandEncoder; + static const Slang::Guid IID_ICommandBuffer; + static const Slang::Guid IID_ICommandQueue; }; gfx::StageType translateStage(SlangStage slangStage); @@ -244,6 +251,9 @@ public: // pipeline cannot be used directly and must be specialized first. bool isSpecializable = false; ComPtr<IShaderProgram> m_program; + + ComPtr<IPipelineLayout> m_pipelineLayout; + protected: void initializeBase(const PipelineStateDesc& inDesc); }; @@ -338,8 +348,6 @@ public: ShaderComponentID getComponentId(Slang::UnownedStringSlice name); ShaderComponentID getComponentId(ComponentKey key); - void init(ISlangFileSystem* cacheFileSystem); - void writeToFileSystem(ISlangMutableFileSystem* outputFileSystem); Slang::ComPtr<IPipelineState> getSpecializedPipelineState(PipelineKey programKey) { Slang::ComPtr<IPipelineState> result; @@ -347,15 +355,16 @@ public: return result; return nullptr; } - Slang::RefPtr<ShaderBinary> tryLoadShaderBinary(ShaderComponentID componentId); - void addShaderBinary(ShaderComponentID componentId, ShaderBinary* binary); void addSpecializedPipeline(PipelineKey key, Slang::ComPtr<IPipelineState> specializedPipeline); + void free() + { + specializedPipelines = decltype(specializedPipelines)(); + componentIds = decltype(componentIds)(); + } protected: - Slang::ComPtr<ISlangFileSystem> fileSystem; Slang::OrderedDictionary<OwningComponentKey, ShaderComponentID> componentIds; Slang::OrderedDictionary<PipelineKey, Slang::ComPtr<IPipelineState>> specializedPipelines; - Slang::OrderedDictionary<ShaderComponentID, Slang::RefPtr<ShaderBinary>> shaderBinaries; }; // Renderer implementation shared by all platforms. @@ -378,13 +387,13 @@ public: slang::TypeReflection* type, ShaderObjectLayoutBase** outLayout); -protected: - // Retrieves the currently bound unspecialized pipeline. - // If the bound pipeline is not created from a Slang component, an implementation should return null. - virtual PipelineStateBase* getCurrentPipeline() = 0; +public: ExtendedShaderObjectTypeList specializationArgs; // Given current pipeline and root shader object binding, generate and bind a specialized pipeline if necessary. - Result maybeSpecializePipeline(ShaderObjectBase* inRootShaderObject); + Result maybeSpecializePipeline( + PipelineStateBase* currentPipeline, + ShaderObjectBase* rootObject, + Slang::RefPtr<PipelineStateBase>& outNewPipeline); virtual Result createShaderObjectLayout( diff --git a/tools/gfx/simple-render-pass-layout.cpp b/tools/gfx/simple-render-pass-layout.cpp new file mode 100644 index 000000000..8821df87c --- /dev/null +++ b/tools/gfx/simple-render-pass-layout.cpp @@ -0,0 +1,25 @@ +#include "simple-render-pass-layout.h" + +#include "renderer-shared.h" + +namespace gfx +{ + +IRenderPassLayout* SimpleRenderPassLayout::getInterface(const Slang::Guid& guid) +{ + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IRenderPassLayout) + return static_cast<IRenderPassLayout*>(this); + return nullptr; +} + +void SimpleRenderPassLayout::init(const IRenderPassLayout::Desc& desc) +{ + m_renderTargetAccesses.setCount(desc.renderTargetCount); + for (uint32_t i = 0; i < desc.renderTargetCount; i++) + m_renderTargetAccesses[i] = desc.renderTargetAccess[i]; + m_hasDepthStencil = (desc.depthStencilAccess != nullptr); + if (m_hasDepthStencil) + m_depthStencilAccess = *desc.depthStencilAccess; +} + +} // namespace gfx diff --git a/tools/gfx/simple-render-pass-layout.h b/tools/gfx/simple-render-pass-layout.h new file mode 100644 index 000000000..54d1e5649 --- /dev/null +++ b/tools/gfx/simple-render-pass-layout.h @@ -0,0 +1,30 @@ +// simple-render-pass-layout.h +#pragma once + +// Implementation of a dummy render pass layout object that stores and holds its +// desc value. Used by targets that does not expose an API object for the render pass +// concept. + +#include "slang-gfx.h" +#include "slang-com-helper.h" +#include "core/slang-basic.h" + +namespace gfx +{ + +class SimpleRenderPassLayout + : public IRenderPassLayout + , public Slang::RefObject +{ +public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + IRenderPassLayout* getInterface(const Slang::Guid& guid); + +public: + Slang::ShortList<AttachmentAccessDesc> m_renderTargetAccesses; + AttachmentAccessDesc m_depthStencilAccess; + bool m_hasDepthStencil; + void init(const IRenderPassLayout::Desc& desc); +}; + +} diff --git a/tools/gfx/vulkan/render-vk.cpp b/tools/gfx/vulkan/render-vk.cpp index e89b6a765..859519c6d 100644 --- a/tools/gfx/vulkan/render-vk.cpp +++ b/tools/gfx/vulkan/render-vk.cpp @@ -6,6 +6,7 @@ #include "../render-graphics-common.h" #include "core/slang-basic.h" +#include "core/slang-blob.h" #include "vk-api.h" #include "vk-util.h" @@ -44,20 +45,19 @@ public: kMaxDescriptorSets = 8, }; // Renderer implementation + Result initVulkanInstanceAndDevice(bool useValidationLayer); virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; - virtual SLANG_NO_THROW void SLANG_MCALL setClearColor(const float color[4]) override; - virtual SLANG_NO_THROW void SLANG_MCALL clearFrame() override; - virtual SLANG_NO_THROW void SLANG_MCALL beginFrame() override; - virtual SLANG_NO_THROW void SLANG_MCALL endFrame() override; - virtual SLANG_NO_THROW void SLANG_MCALL - makeSwapchainImagePresentable(ISwapchain* swapchain) override; + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override; virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override; virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout(const IFramebufferLayout::Desc& desc, IFramebufferLayout** outLayout) override; virtual SLANG_NO_THROW Result SLANG_MCALL createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffer** outFramebuffer) override; - virtual SLANG_NO_THROW void SLANG_MCALL setFramebuffer(IFramebuffer* frameBuffer) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( + const IRenderPassLayout::Desc& desc, + IRenderPassLayout** outRenderPassLayout) override; virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource( IResource::Usage initialUsage, const ITextureResource::Desc& desc, @@ -99,54 +99,25 @@ public: IPipelineState** outState) override; virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( - ITextureResource* texture, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) override; - - virtual SLANG_NO_THROW void* SLANG_MCALL map(IBufferResource* buffer, MapFlavor flavor) override; - virtual SLANG_NO_THROW void SLANG_MCALL unmap(IBufferResource* buffer) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setPrimitiveTopology(PrimitiveTopology topology) override; - - virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( - PipelineType pipelineType, - IPipelineLayout* layout, - UInt index, - IDescriptorSet* descriptorSet) override; - - virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( - UInt startSlot, - UInt slotCount, - IBufferResource* const* buffers, - const UInt* strides, - const UInt* offsets) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setViewports(UInt count, Viewport const* viewports) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setScissorRects(UInt count, ScissorRect const* rects) override; - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override; - virtual SLANG_NO_THROW void SLANG_MCALL draw(UInt vertexCount, UInt startVertex) override; - virtual SLANG_NO_THROW void SLANG_MCALL - drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) override; - virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override; - virtual SLANG_NO_THROW void SLANG_MCALL submitGpuWork() override; - virtual SLANG_NO_THROW void SLANG_MCALL waitForGpu() override; + ITextureResource* texture, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) override; + + virtual SLANG_NO_THROW SlangResult SLANG_MCALL readBufferResource( + IBufferResource* buffer, + size_t offset, + size_t size, + ISlangBlob** outBlob) override; + void waitForGpu(); virtual SLANG_NO_THROW RendererType SLANG_MCALL getRendererType() const override { return RendererType::Vulkan; } - virtual PipelineStateBase* getCurrentPipeline() override - { - return m_currentPipeline.Ptr(); - } /// Dtor ~VKRenderer(); - protected: - - /// Flush state from descriptor set bindings into `commandBuffer` - void _flushBindingState(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint); - class Buffer { public: @@ -208,9 +179,6 @@ public: VKRenderer* m_renderer; Buffer m_buffer; Buffer m_uploadBuffer; - List<uint8_t> m_readBuffer; ///< Stores the contents when a map read is performed - - MapFlavor m_mapFlavor = MapFlavor::Unknown; ///< If resource is mapped, records what kind of mapping else Unknown (if not mapped) }; class TextureResourceImpl : public TextureResource @@ -339,99 +307,6 @@ public: VkDeviceSize size; }; - class SwapchainImpl - : public ISwapchain - , public RefObject - { - public: - SLANG_REF_OBJECT_IUNKNOWN_ALL - ISwapchain* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ISwapchain) - return static_cast<ISwapchain*>(this); - return nullptr; - } - - public: - VulkanSwapChain m_swapChain; - ISwapchain::Desc m_desc; - ShortList<RefPtr<TextureResourceImpl>> m_images; - VKRenderer* m_renderer; - uint32_t m_currentImageIndex = 0; - public: - Result init(VKRenderer* renderer, const ISwapchain::Desc& desc, WindowHandle window) - { - m_desc = desc; - m_renderer = renderer; - - VulkanSwapChain::Desc swapchainDesc; - VulkanSwapChain::PlatformDesc* platformDesc = nullptr; - swapchainDesc.m_imageCount = desc.imageCount; - swapchainDesc.init(); - swapchainDesc.m_format = desc.format; - swapchainDesc.m_vsync = desc.enableVSync; -#if SLANG_WINDOWS_FAMILY - VulkanSwapChain::WinPlatformDesc winPlatformDesc; - winPlatformDesc.m_hinstance = ::GetModuleHandle(nullptr); - winPlatformDesc.m_hwnd = (HWND)window.handleValues[0]; - platformDesc = &winPlatformDesc; -#endif - - SLANG_RETURN_ON_FAIL(m_swapChain.init(&renderer->m_deviceQueue, swapchainDesc, platformDesc)); - m_desc.format = m_swapChain.getDesc().m_format; - m_desc.width = m_swapChain.getWidth(); - m_desc.height = m_swapChain.getHeight(); - m_desc.imageCount = m_swapChain.getDesc().m_imageCount; - auto& images = m_swapChain.getImages(); - for (uint32_t i = 0; i < desc.imageCount; i++) - { - ITextureResource::Desc imageDesc = {}; - - imageDesc.init2D( - IResource::Type::Texture2D, - m_swapChain.getDesc().m_format, - m_swapChain.getWidth(), - m_swapChain.getHeight(), - 1); - RefPtr<TextureResourceImpl> image = new TextureResourceImpl(imageDesc, gfx::IResource::Usage::RenderTarget, &renderer->m_api); - image->m_image = images[i]; - image->m_imageMemory = 0; - image->m_vkformat = m_swapChain.getVkFormat(); - image->m_isWeakImageReference = true; - m_images.add(image); - } - return SLANG_OK; - } - - virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() - { - return m_desc; - } - virtual SLANG_NO_THROW Result getImage(uint32_t index, ITextureResource** outResource) - { - *outResource = m_images[index]; - m_images[index]->addRef(); - return SLANG_OK; - } - virtual SLANG_NO_THROW Result present() - { - m_swapChain.present(m_desc.enableVSync); - return SLANG_OK; - } - virtual SLANG_NO_THROW uint32_t acquireNextImage() - { - m_currentImageIndex = (uint32_t)m_swapChain.nextFrontImageIndex(); - auto image = m_images[m_currentImageIndex]; - m_renderer->_transitionImageLayout( - image->m_image, - image->m_vkformat, - *image->getDesc(), - VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - return m_currentImageIndex; - } - }; - class FramebufferLayoutImpl : public IFramebufferLayout , public RefObject @@ -448,7 +323,11 @@ public: public: VkRenderPass m_renderPass; VKRenderer* m_renderer; - + Array<VkAttachmentDescription, kMaxAttachments> m_attachmentDescs; + Array<VkAttachmentReference, kMaxRenderTargets> m_colorReferences; + VkAttachmentReference m_depthReference; + bool m_hasDepthStencilAttachment; + uint32_t m_renderTargetCount; public: ~FramebufferLayoutImpl() { @@ -457,29 +336,31 @@ public: Result init(VKRenderer* renderer, const IFramebufferLayout::Desc& desc) { m_renderer = renderer; + m_renderTargetCount = desc.renderTargetCount; // Create render pass. - int numAttachments = desc.renderTargetCount; - if (desc.depthStencil) + int numAttachments = m_renderTargetCount; + m_hasDepthStencilAttachment = (desc.depthStencil!=nullptr); + if (m_hasDepthStencilAttachment) { numAttachments++; } - bool shouldClear = false; - bool shouldClearDepth = false; - bool shouldClearStencil = false; - // We need extra space if we have depth buffer - Array<VkAttachmentDescription, kMaxAttachments> attachmentDesc; - attachmentDesc.setCount(numAttachments); + m_attachmentDescs.setCount(numAttachments); for (uint32_t i = 0; i < desc.renderTargetCount; ++i) { auto& renderTarget = desc.renderTargets[i]; - VkAttachmentDescription& dst = attachmentDesc[i]; + VkAttachmentDescription& dst = m_attachmentDescs[i]; dst.flags = 0; dst.format = VulkanUtil::getVkFormat(renderTarget.format); dst.samples = (VkSampleCountFlagBits)renderTarget.sampleCount; - dst.loadOp = - shouldClear ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD; + + // The following load/store/layout settings does not matter. + // In FramebufferLayout we just need a "compatible" render pass that + // can be used to create a framebuffer. A framebuffer created + // with this render pass setting can be used with actual render passes + // that has a different loadOp/storeOp/layout setting. + dst.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; dst.storeOp = VK_ATTACHMENT_STORE_OP_STORE; dst.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; dst.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; @@ -489,32 +370,30 @@ public: if (desc.depthStencil) { - VkAttachmentDescription& dst = attachmentDesc[desc.renderTargetCount]; + VkAttachmentDescription& dst = m_attachmentDescs[desc.renderTargetCount]; dst.flags = 0; dst.format = VulkanUtil::getVkFormat(desc.depthStencil->format); dst.samples = (VkSampleCountFlagBits)desc.depthStencil->sampleCount; - dst.loadOp = - shouldClearDepth ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD; + dst.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; dst.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - dst.stencilLoadOp = shouldClearStencil ? VK_ATTACHMENT_LOAD_OP_CLEAR - : VK_ATTACHMENT_LOAD_OP_LOAD; + dst.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; dst.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; dst.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; dst.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; } - Array<VkAttachmentReference, kMaxRenderTargets> colorAttachments; - colorAttachments.setCount(desc.renderTargetCount); + Array<VkAttachmentReference, kMaxRenderTargets>& colorReferences = m_colorReferences; + colorReferences.setCount(desc.renderTargetCount); for (uint32_t i = 0; i < desc.renderTargetCount; ++i) { - VkAttachmentReference& dst = colorAttachments[i]; + VkAttachmentReference& dst = colorReferences[i]; dst.attachment = i; dst.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; } - VkAttachmentReference depthAttachment = {}; - depthAttachment.attachment = desc.renderTargetCount; - depthAttachment.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + m_depthReference = VkAttachmentReference{}; + m_depthReference.attachment = desc.renderTargetCount; + m_depthReference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkSubpassDescription subpassDesc = {}; subpassDesc.flags = 0; @@ -522,16 +401,126 @@ public: subpassDesc.inputAttachmentCount = 0u; subpassDesc.pInputAttachments = nullptr; subpassDesc.colorAttachmentCount = desc.renderTargetCount; - subpassDesc.pColorAttachments = colorAttachments.getBuffer(); + subpassDesc.pColorAttachments = colorReferences.getBuffer(); subpassDesc.pResolveAttachments = nullptr; - subpassDesc.pDepthStencilAttachment = desc.depthStencil ? &depthAttachment : nullptr; + subpassDesc.pDepthStencilAttachment = + m_hasDepthStencilAttachment ? &m_depthReference : nullptr; subpassDesc.preserveAttachmentCount = 0u; subpassDesc.pPreserveAttachments = nullptr; VkRenderPassCreateInfo renderPassCreateInfo = {}; renderPassCreateInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; renderPassCreateInfo.attachmentCount = numAttachments; - renderPassCreateInfo.pAttachments = attachmentDesc.getBuffer(); + renderPassCreateInfo.pAttachments = m_attachmentDescs.getBuffer(); + renderPassCreateInfo.subpassCount = 1; + renderPassCreateInfo.pSubpasses = &subpassDesc; + SLANG_VK_RETURN_ON_FAIL(m_renderer->m_api.vkCreateRenderPass( + m_renderer->m_api.m_device, &renderPassCreateInfo, nullptr, &m_renderPass)); + return SLANG_OK; + } + }; + + class RenderPassLayoutImpl + : public IRenderPassLayout + , public RefObject + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + IRenderPassLayout* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IRenderPassLayout) + return static_cast<IRenderPassLayout*>(this); + return nullptr; + } + + public: + VkRenderPass m_renderPass; + VKRenderer* m_renderer; + + ~RenderPassLayoutImpl() + { + m_renderer->m_api.vkDestroyRenderPass( + m_renderer->m_api.m_device, m_renderPass, nullptr); + } + + static VkAttachmentLoadOp translateLoadOp(IRenderPassLayout::AttachmentLoadOp loadOp) + { + switch (loadOp) + { + case IRenderPassLayout::AttachmentLoadOp::Clear: + return VK_ATTACHMENT_LOAD_OP_CLEAR; + case IRenderPassLayout::AttachmentLoadOp::Load: + return VK_ATTACHMENT_LOAD_OP_LOAD; + default: + return VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } + } + + static VkAttachmentStoreOp translateStoreOp(IRenderPassLayout::AttachmentStoreOp storeOp) + { + switch (storeOp) + { + case IRenderPassLayout::AttachmentStoreOp::Store: + return VK_ATTACHMENT_STORE_OP_STORE; + default: + return VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + } + + Result init(VKRenderer* renderer, const IRenderPassLayout::Desc& desc) + { + m_renderer = renderer; + + // Create render pass using load/storeOp and layouts info from `desc`. + auto framebufferLayout = static_cast<FramebufferLayoutImpl*>(desc.framebufferLayout); + assert(desc.renderTargetCount == framebufferLayout->m_renderTargetCount); + + // We need extra space if we have depth buffer + Array<VkAttachmentDescription, kMaxAttachments> attachmentDescs; + attachmentDescs = framebufferLayout->m_attachmentDescs; + for (uint32_t i = 0; i < desc.renderTargetCount; ++i) + { + VkAttachmentDescription& dst = attachmentDescs[i]; + auto access = desc.renderTargetAccess[i]; + // Fill in loadOp/storeOp and layout from desc. + dst.loadOp = translateLoadOp(access.loadOp); + dst.storeOp = translateStoreOp(access.storeOp); + dst.stencilLoadOp = translateLoadOp(access.stencilLoadOp); + dst.stencilStoreOp = translateStoreOp(access.stencilStoreOp); + dst.initialLayout = VulkanUtil::mapResourceStateToLayout(access.initialState); + dst.finalLayout = VulkanUtil::mapResourceStateToLayout(access.finalState); + } + + if (framebufferLayout->m_hasDepthStencilAttachment) + { + VkAttachmentDescription& dst = attachmentDescs[desc.renderTargetCount]; + auto access = *desc.depthStencilAccess; + dst.loadOp = translateLoadOp(access.loadOp); + dst.storeOp = translateStoreOp(access.storeOp); + dst.stencilLoadOp = translateLoadOp(access.stencilLoadOp); + dst.stencilStoreOp = translateStoreOp(access.stencilStoreOp); + dst.initialLayout = VulkanUtil::mapResourceStateToLayout(access.initialState); + dst.finalLayout = VulkanUtil::mapResourceStateToLayout(access.finalState); + } + + VkSubpassDescription subpassDesc = {}; + subpassDesc.flags = 0; + subpassDesc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpassDesc.inputAttachmentCount = 0u; + subpassDesc.pInputAttachments = nullptr; + subpassDesc.colorAttachmentCount = desc.renderTargetCount; + subpassDesc.pColorAttachments = framebufferLayout->m_colorReferences.getBuffer(); + subpassDesc.pResolveAttachments = nullptr; + subpassDesc.pDepthStencilAttachment = framebufferLayout->m_hasDepthStencilAttachment + ? &framebufferLayout->m_depthReference + : nullptr; + subpassDesc.preserveAttachmentCount = 0u; + subpassDesc.pPreserveAttachments = nullptr; + + VkRenderPassCreateInfo renderPassCreateInfo = {}; + renderPassCreateInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + renderPassCreateInfo.attachmentCount = (uint32_t)attachmentDescs.getCount(); + renderPassCreateInfo.pAttachments = attachmentDescs.getBuffer(); renderPassCreateInfo.subpassCount = 1; renderPassCreateInfo.pSubpasses = &subpassDesc; SLANG_VK_RETURN_ON_FAIL(m_renderer->m_api.vkCreateRenderPass( @@ -560,6 +549,7 @@ public: uint32_t m_width; uint32_t m_height; VKRenderer* m_renderer; + VkClearValue m_clearValues[kMaxAttachments]; RefPtr<FramebufferLayoutImpl> m_layout; public: ~FramebufferImpl() @@ -604,12 +594,20 @@ public: static_cast<TextureResourceViewImpl*>(desc.renderTargetViews[i]); renderTargetViews[i] = resourceView; imageViews[i] = resourceView->m_view; + memcpy( + &m_clearValues[i], + &resourceView->m_texture->getDesc()->optimalClearValue.color, + sizeof(gfx::ColorClearValue)); } if (dsv) { imageViews[desc.renderTargetCount] = dsv->m_view; depthStencilView = dsv; + memcpy( + &m_clearValues[desc.renderTargetCount], + &dsv->m_texture->getDesc()->optimalClearValue.depthStencil, + sizeof(gfx::DepthStencilClearValue)); } @@ -872,15 +870,1033 @@ public: const VulkanApi* m_api; - RefPtr<PipelineLayoutImpl> m_pipelineLayout; - RefPtr<FramebufferLayoutImpl> m_framebufferLayout; - RefPtr<ShaderProgramImpl> m_shaderProgram; - VkPipeline m_pipeline = VK_NULL_HANDLE; }; + class CommandBufferImpl + : public ICommandBuffer + , public RefObject + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ICommandBuffer* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer) + return static_cast<ICommandBuffer*>(this); + return nullptr; + } + + public: + VkCommandBuffer m_commandBuffer; + VkCommandBuffer m_preCommandBuffer = VK_NULL_HANDLE; + VkCommandPool m_pool; + VKRenderer* m_renderer; + DescriptorSetAllocator* m_transientDescSetAllocator; + // Command buffers are deallocated by its command pool, + // so no need to free individually. + ~CommandBufferImpl() = default; + + Result init( + VKRenderer* renderer, + VkCommandPool pool, + DescriptorSetAllocator* transientDescSetAllocator) + { + m_renderer = renderer; + m_transientDescSetAllocator = transientDescSetAllocator; + m_pool = pool; + + auto& api = renderer->m_api; + VkCommandBufferAllocateInfo allocInfo = {}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.commandPool = pool; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandBufferCount = 1; + SLANG_VK_RETURN_ON_FAIL( + api.vkAllocateCommandBuffers(api.m_device, &allocInfo, &m_commandBuffer)); + + VkCommandBufferBeginInfo beginInfo = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + nullptr, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + api.vkBeginCommandBuffer(m_commandBuffer, &beginInfo); + return SLANG_OK; + } + + Result createPreCommandBuffer() + { + VkCommandBufferAllocateInfo allocInfo = {}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.commandPool = m_pool; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandBufferCount = 1; + auto& api = m_renderer->m_api; + SLANG_VK_RETURN_ON_FAIL( + api.vkAllocateCommandBuffers(api.m_device, &allocInfo, &m_preCommandBuffer)); + VkCommandBufferBeginInfo beginInfo = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + nullptr, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + api.vkBeginCommandBuffer(m_preCommandBuffer, &beginInfo); + return SLANG_OK; + } + + VkCommandBuffer getPreCommandBuffer() + { + if (m_preCommandBuffer) + return m_preCommandBuffer; + createPreCommandBuffer(); + return m_preCommandBuffer; + } + + public: + static void _uploadBufferData( + VkCommandBuffer commandBuffer, + BufferResourceImpl* buffer, + size_t offset, + size_t size, + void* data) + { + auto& api = buffer->m_renderer->m_api; + + assert(buffer->m_uploadBuffer.isInitialized()); + + void* mappedData = nullptr; + SLANG_VK_CHECK(api.vkMapMemory( + api.m_device, buffer->m_uploadBuffer.m_memory, offset, size, 0, &mappedData)); + memcpy(mappedData, data, size); + api.vkUnmapMemory(api.m_device, buffer->m_uploadBuffer.m_memory); + + // Copy from staging buffer to real buffer + VkBufferCopy copyInfo = {}; + copyInfo.size = size; + copyInfo.dstOffset = offset; + copyInfo.srcOffset = offset; + api.vkCmdCopyBuffer( + commandBuffer, + buffer->m_uploadBuffer.m_buffer, + buffer->m_buffer.m_buffer, + 1, + ©Info); + } + + class PipelineCommandEncoder + : public GraphicsComputeCommandEncoderBase + , public RefObject + { + public: + bool m_isOpen = false; + CommandBufferImpl* m_commandBuffer; + VkCommandBuffer m_vkCommandBuffer; + VkCommandBuffer m_vkPreCommandBuffer = VK_NULL_HANDLE; + VkPipeline m_boundPipelines[3] = {}; + static int getBindPointIndex(VkPipelineBindPoint bindPoint) + { + switch (bindPoint) + { + case VK_PIPELINE_BIND_POINT_GRAPHICS: + return 0; + case VK_PIPELINE_BIND_POINT_COMPUTE: + return 1; + case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: + return 2; + default: + assert(!"unknown pipeline type."); + return -1; + } + } + VulkanApi* m_api; + + RefPtr<PipelineLayoutImpl> m_currentPipelineLayout; + + RefPtr<DescriptorSetImpl> m_currentDescriptorSetImpls[kMaxDescriptorSets]; + VkDescriptorSet m_currentDescriptorSets[kMaxDescriptorSets]; + + // Temporary list used by flushBindingState to avoid per-frame allocation. + List<VkCopyDescriptorSet> m_descSetCopies; + + void init(CommandBufferImpl* commandBuffer) + { + m_commandBuffer = commandBuffer; + m_rendererBase = static_cast<RendererBase*>(commandBuffer->m_renderer); + m_vkCommandBuffer = m_commandBuffer->m_commandBuffer; + m_api = &m_commandBuffer->m_renderer->m_api; + } + + void endEncodingImpl() + { + m_isOpen = false; + + // Make m_currentDescriptorSets consistent with m_currentDescriptorSetImpls + // so that we don't mistakenly treat any transient descriptor sets as "copied" + // later. + for (uint32_t i = 0; i < kMaxDescriptorSets; i++) + { + if (m_currentDescriptorSetImpls[i]) + { + m_currentDescriptorSets[i] = + m_currentDescriptorSetImpls[i]->m_descriptorSet.handle; + } + } + for (auto& pipeline : m_boundPipelines) + pipeline = VK_NULL_HANDLE; + } + + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSetImpl( + PipelineType pipelineType, + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) override + { + // Ideally this should eventually be as simple as: + // + // m_api.vkCmdBindDescriptorSets( + // commandBuffer, + // translatePipelineBindPoint(pipelineType), + // layout->m_pipelineLayout, + // index, + // 1, + // ((DescriptorSetImpl*) descriptorSet)->m_descriptorSet, + // 0, + // nullptr); + // + // For now we are lazily flushing state right before drawing, so + // we will hang onto the parameters that were passed in and then + // use them later. + // + + auto descriptorSetImpl = (DescriptorSetImpl*)descriptorSet; + m_currentDescriptorSetImpls[index] = descriptorSetImpl; + m_currentDescriptorSets[index] = descriptorSetImpl->m_descriptorSet.handle; + } + + virtual SLANG_NO_THROW void SLANG_MCALL uploadBufferDataImpl( + IBufferResource* buffer, + size_t offset, + size_t size, + void* data) override + { + m_vkPreCommandBuffer = m_commandBuffer->getPreCommandBuffer(); + _uploadBufferData( + m_vkPreCommandBuffer, + static_cast<BufferResourceImpl*>(buffer), + offset, + size, + data); + } + + void setPipelineStateImpl(IPipelineState* state) + { + m_currentPipeline = static_cast<PipelineStateImpl*>(state); + } + + void flushBindingState(VkPipelineBindPoint pipelineBindPoint) + { + auto& api = *m_api; + + auto pipeline = static_cast<PipelineStateImpl*>(m_currentPipeline.Ptr()); + auto& descSetCopies = m_descSetCopies; + descSetCopies.clear(); + // We start by binding the pipeline state. + // + auto pipelineBindPointId = getBindPointIndex(pipelineBindPoint); + if (m_boundPipelines[pipelineBindPointId] != pipeline->m_pipeline) + { + api.vkCmdBindPipeline(m_vkCommandBuffer, pipelineBindPoint, pipeline->m_pipeline); + m_boundPipelines[pipelineBindPointId] = pipeline->m_pipeline; + } + + // Next we bind all the descriptor sets that were set in the `VKRenderer`. + // + auto pipelineLayoutImpl = static_cast<PipelineLayoutImpl*>(pipeline->m_pipelineLayout.get()); + auto vkPipelineLayout = pipelineLayoutImpl->m_pipelineLayout; + auto descriptorSetCount = pipelineLayoutImpl->m_descriptorSetCount; + for (uint32_t i = 0; i < (uint32_t)descriptorSetCount; i++) + { + if (m_currentDescriptorSetImpls[i]->m_isTransient) + { + // A transient descriptor set may go out of life cycle after command list + // recording, therefore we must make a copy of it in the per-frame + // descriptor pool. + + // If we have already created a transient copy for this descriptor set, skip + // the copy. + if (m_currentDescriptorSetImpls[i]->m_descriptorSet.handle != + m_currentDescriptorSets[i]) + continue; + + auto descSet = m_commandBuffer->m_transientDescSetAllocator->allocate( + m_currentDescriptorSetImpls[i]->m_layout->m_descriptorSetLayout); + uint32_t bindingIndex = 0; + for (auto binding : m_currentDescriptorSetImpls[i]->m_layout->m_vkBindings) + { + VkCopyDescriptorSet copy = {}; + copy.sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET; + copy.srcSet = m_currentDescriptorSetImpls[i]->m_descriptorSet.handle; + copy.dstSet = descSet.handle; + copy.srcBinding = copy.dstBinding = bindingIndex; + copy.srcArrayElement = copy.dstArrayElement = 0; + copy.descriptorCount = binding.descriptorCount; + descSetCopies.add(copy); + bindingIndex++; + } + m_currentDescriptorSets[i] = descSet.handle; + } + } + if (descSetCopies.getCount()) + { + api.vkUpdateDescriptorSets( + api.m_device, + 0, + nullptr, + (uint32_t)descSetCopies.getCount(), + descSetCopies.getBuffer()); + } + api.vkCmdBindDescriptorSets( + m_vkCommandBuffer, + pipelineBindPoint, + vkPipelineLayout, + 0, + uint32_t(descriptorSetCount), + &m_currentDescriptorSets[0], + 0, + nullptr); + + // For any descriptor sets with root-constant ranges, we need to + // bind the relevant data to the context. + // + for (gfx::UInt ii = 0; ii < descriptorSetCount; ++ii) + { + auto descriptorSet = m_currentDescriptorSetImpls[ii]; + auto descriptorSetLayout = descriptorSet->m_layout; + auto size = descriptorSetLayout->m_rootConstantDataSize; + if (size == 0) + continue; + auto data = descriptorSet->m_rootConstantData.getBuffer(); + + // The absolute offset of the descriptor set's data in + // the push-constant data for the entire pipeline was + // computed and cached in the pipeline layout. + // + uint32_t offset = pipelineLayoutImpl->m_descriptorSetRootConstantOffsets[ii]; + + api.vkCmdPushConstants( + m_vkCommandBuffer, + vkPipelineLayout, + VK_SHADER_STAGE_ALL, + offset, + size, + data); + } + } + }; + class RenderCommandEncoder + : public IRenderCommandEncoder + , public PipelineCommandEncoder + + { + public: + List<VkViewport> m_viewports; + List<VkRect2D> m_scissorRects; + List<BoundVertexBuffer> m_boundVertexBuffers; + BoundVertexBuffer m_boundIndexBuffer; + VkIndexType m_boundIndexFormat; + + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + IRenderCommandEncoder* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || + guid == GfxGUID::IID_IRenderCommandEncoder || + guid == GfxGUID::IID_ICommandEncoder) + return static_cast<IRenderCommandEncoder*>(this); + return nullptr; + } + + void beginPass(IRenderPassLayout* renderPass, IFramebuffer* framebuffer) + { + FramebufferImpl* framebufferImpl = static_cast<FramebufferImpl*>(framebuffer); + RenderPassLayoutImpl* renderPassImpl = + static_cast<RenderPassLayoutImpl*>(renderPass); + VkClearValue clearValues[kMaxAttachments] = {}; + VkRenderPassBeginInfo beginInfo = {}; + beginInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + beginInfo.framebuffer = framebufferImpl->m_handle; + beginInfo.renderPass = renderPassImpl->m_renderPass; + uint32_t attachmentCount = (uint32_t)framebufferImpl->renderTargetViews.getCount(); + if (framebufferImpl->depthStencilView) + attachmentCount++; + beginInfo.clearValueCount = attachmentCount; + beginInfo.renderArea.extent.width = framebufferImpl->m_width; + beginInfo.renderArea.extent.height = framebufferImpl->m_height; + beginInfo.pClearValues = framebufferImpl->m_clearValues; + auto& api = *m_api; + api.vkCmdBeginRenderPass(m_vkCommandBuffer, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); + m_isOpen = true; + } + + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override + { + auto& api = *m_api; + api.vkCmdEndRenderPass(m_vkCommandBuffer); + endEncodingImpl(); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setPipelineState(IPipelineState* pipelineState) override + { + setPipelineStateImpl(pipelineState); + } + + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) override + { + setDescriptorSetImpl(PipelineType::Graphics, layout, index, descriptorSet); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + bindRootShaderObject(IShaderObject* object) override + { + bindRootShaderObjectImpl(PipelineType::Graphics, object); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setViewports(uint32_t count, const Viewport* viewports) override + { + static const int kMaxViewports = 8; // TODO: base on device caps + assert(count <= kMaxViewports); + + m_viewports.setCount(count); + for (UInt ii = 0; ii < count; ++ii) + { + auto& inViewport = viewports[ii]; + auto& vkViewport = m_viewports[ii]; + + vkViewport.x = inViewport.originX; + vkViewport.y = inViewport.originY; + vkViewport.width = inViewport.extentX; + vkViewport.height = inViewport.extentY; + vkViewport.minDepth = inViewport.minZ; + vkViewport.maxDepth = inViewport.maxZ; + } + + auto& api = *m_api; + api.vkCmdSetViewport(m_vkCommandBuffer, 0, uint32_t(count), m_viewports.getBuffer()); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setScissorRects(uint32_t count, const ScissorRect* rects) override + { + static const int kMaxScissorRects = 8; // TODO: base on device caps + assert(count <= kMaxScissorRects); + + m_scissorRects.setCount(count); + for (UInt ii = 0; ii < count; ++ii) + { + auto& inRect = rects[ii]; + auto& vkRect = m_scissorRects[ii]; + + vkRect.offset.x = int32_t(inRect.minX); + vkRect.offset.y = int32_t(inRect.minY); + vkRect.extent.width = uint32_t(inRect.maxX - inRect.minX); + vkRect.extent.height = uint32_t(inRect.maxY - inRect.minY); + } + + auto& api = *m_api; + api.vkCmdSetScissor( + m_vkCommandBuffer, + 0, + uint32_t(count), + m_scissorRects.getBuffer()); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setPrimitiveTopology(PrimitiveTopology topology) override + { + auto& api = *m_api; + if (api.vkCmdSetPrimitiveTopologyEXT) + { + api.vkCmdSetPrimitiveTopologyEXT( + m_vkCommandBuffer, + VulkanUtil::getVkPrimitiveTopology(topology)); + } + else + { + switch (topology) + { + case PrimitiveTopology::TriangleList: + break; + default: + // We are using a non-list topology, but we don't have dynmaic state + // extension, error out. + assert(!"Non-list topology requires VK_EXT_extended_dynamic_states, which is not present."); + break; + } + } + } + + virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( + UInt startSlot, + UInt slotCount, + IBufferResource* const* buffers, + const UInt* strides, + const UInt* offsets) override + { + { + const Index num = Index(startSlot + slotCount); + if (num > m_boundVertexBuffers.getCount()) + { + m_boundVertexBuffers.setCount(num); + } + } + + for (Index i = 0; i < Index(slotCount); i++) + { + BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(buffers[i]); + if (buffer) + { + assert(buffer->m_initialUsage == IResource::Usage::VertexBuffer); + } + + BoundVertexBuffer& boundBuffer = m_boundVertexBuffers[startSlot + i]; + boundBuffer.m_buffer = buffer; + boundBuffer.m_stride = int(strides[i]); + boundBuffer.m_offset = int(offsets[i]); + } + } + + virtual SLANG_NO_THROW void SLANG_MCALL setIndexBuffer( + IBufferResource* buffer, + Format indexFormat, + UInt offset = 0) override + { + switch (indexFormat) + { + case Format::R_UInt16: + m_boundIndexFormat = VK_INDEX_TYPE_UINT16; + break; + case Format::R_UInt32: + m_boundIndexFormat = VK_INDEX_TYPE_UINT32; + break; + default: + assert(!"unsupported index format"); + } + m_boundIndexBuffer.m_buffer = static_cast<BufferResourceImpl*>(buffer); + m_boundIndexBuffer.m_stride = 0; + m_boundIndexBuffer.m_offset = int(offset); + } + + void prepareDraw() + { + auto pipeline = static_cast<PipelineStateImpl*>(m_currentPipeline.Ptr()); + if (!pipeline || static_cast<ShaderProgramImpl*>(pipeline->m_program.get()) + ->m_pipelineType != PipelineType::Graphics) + { + assert(!"Invalid render pipeline"); + return; + } + flushBindingState(VK_PIPELINE_BIND_POINT_GRAPHICS); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + draw(UInt vertexCount, UInt startVertex = 0) override + { + prepareDraw(); + auto& api = *m_api; + // Bind the vertex buffer + if (m_boundVertexBuffers.getCount() > 0 && m_boundVertexBuffers[0].m_buffer) + { + const BoundVertexBuffer& boundVertexBuffer = m_boundVertexBuffers[0]; + + VkBuffer vertexBuffers[] = {boundVertexBuffer.m_buffer->m_buffer.m_buffer}; + VkDeviceSize offsets[] = {VkDeviceSize(boundVertexBuffer.m_offset)}; + + api.vkCmdBindVertexBuffers(m_vkCommandBuffer, 0, 1, vertexBuffers, offsets); + } + api.vkCmdDraw(m_vkCommandBuffer, static_cast<uint32_t>(vertexCount), 1, 0, 0); + } + virtual SLANG_NO_THROW void SLANG_MCALL + drawIndexed(UInt indexCount, UInt startIndex = 0, UInt baseVertex = 0) override + { + prepareDraw(); + auto& api = *m_api; + api.vkCmdBindIndexBuffer( + m_vkCommandBuffer, + m_boundIndexBuffer.m_buffer->m_buffer.m_buffer, + m_boundIndexBuffer.m_offset, + m_boundIndexFormat); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setStencilReference(uint32_t referenceValue) override + { + auto& api = *m_api; + api.vkCmdSetStencilReference( + m_vkCommandBuffer, VK_STENCIL_FRONT_AND_BACK, referenceValue); + } + }; + + RefPtr<RenderCommandEncoder> m_renderCommandEncoder; + + virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( + IRenderPassLayout* renderPass, + IFramebuffer* framebuffer, + IRenderCommandEncoder** outEncoder) override + { + if (!m_renderCommandEncoder) + { + m_renderCommandEncoder = new RenderCommandEncoder(); + m_renderCommandEncoder->init(this); + } + assert(!m_renderCommandEncoder->m_isOpen); + m_renderCommandEncoder->beginPass(renderPass, framebuffer); + *outEncoder = m_renderCommandEncoder.Ptr(); + m_renderCommandEncoder->addRef(); + } + + class ComputeCommandEncoder + : public IComputeCommandEncoder + , public PipelineCommandEncoder + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + IComputeCommandEncoder* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || + guid == GfxGUID::IID_IComputeCommandEncoder || + guid == GfxGUID::IID_ICommandEncoder) + return static_cast<IComputeCommandEncoder*>(this); + return nullptr; + } + + public: + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override + { + endEncodingImpl(); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setPipelineState(IPipelineState* pipelineState) override + { + setPipelineStateImpl(pipelineState); + } + + virtual SLANG_NO_THROW void SLANG_MCALL setDescriptorSet( + IPipelineLayout* layout, + UInt index, + IDescriptorSet* descriptorSet) override + { + setDescriptorSetImpl(PipelineType::Compute, layout, index, descriptorSet); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + bindRootShaderObject(IShaderObject* object) override + { + bindRootShaderObjectImpl(PipelineType::Compute, object); + } + + virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override + { + auto pipeline = static_cast<PipelineStateImpl*>(m_currentPipeline.Ptr()); + if (!pipeline || + static_cast<ShaderProgramImpl*>(pipeline->m_program.get())->m_pipelineType != + PipelineType::Compute) + { + assert(!"Invalid compute pipeline"); + return; + } + + // Also create descriptor sets based on the given pipeline layout + flushBindingState(VK_PIPELINE_BIND_POINT_COMPUTE); + m_api->vkCmdDispatch(m_vkCommandBuffer, x, y, z); + } + }; + + RefPtr<ComputeCommandEncoder> m_computeCommandEncoder; + + virtual SLANG_NO_THROW void SLANG_MCALL + encodeComputeCommands(IComputeCommandEncoder** outEncoder) override + { + if (!m_computeCommandEncoder) + { + m_computeCommandEncoder = new ComputeCommandEncoder(); + m_computeCommandEncoder->init(this); + } + assert(!m_computeCommandEncoder->m_isOpen); + *outEncoder = m_computeCommandEncoder.Ptr(); + m_computeCommandEncoder->addRef(); + } + + class ResourceCommandEncoder + : public IResourceCommandEncoder + , public RefObject + { + public: + CommandBufferImpl* m_commandBuffer; + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + IResourceCommandEncoder* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || + guid == GfxGUID::IID_IResourceCommandEncoder || + guid == GfxGUID::IID_ICommandEncoder) + return static_cast<IResourceCommandEncoder*>(this); + return nullptr; + } + + public: + virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) + { + SLANG_UNUSED(dst); + SLANG_UNUSED(srcOffset); + SLANG_UNUSED(src); + SLANG_UNUSED(dstOffset); + SLANG_UNUSED(size); + } + virtual SLANG_NO_THROW void SLANG_MCALL + uploadBufferData(IBufferResource* buffer, size_t offset, size_t size, void* data) + { + _uploadBufferData( + m_commandBuffer->m_commandBuffer, + static_cast<BufferResourceImpl*>(buffer), + offset, + size, + data); + } + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override + { + // Insert memory barrier to ensure transfers are visible to the GPU. + auto& vkAPI = m_commandBuffer->m_renderer->m_api; + + VkMemoryBarrier memBarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER}; + memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + memBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + vkAPI.vkCmdPipelineBarrier( + m_commandBuffer->m_commandBuffer, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + 0, + 1, + &memBarrier, + 0, + nullptr, + 0, + nullptr); + } + + void init(CommandBufferImpl* commandBuffer) + { + m_commandBuffer = commandBuffer; + } + }; + + RefPtr<ResourceCommandEncoder> m_resourceCommandEncoder; + + virtual SLANG_NO_THROW void SLANG_MCALL + encodeResourceCommands(IResourceCommandEncoder** outEncoder) override + { + if (!m_resourceCommandEncoder) + { + m_resourceCommandEncoder = new ResourceCommandEncoder(); + m_resourceCommandEncoder->init(this); + } + *outEncoder = m_resourceCommandEncoder.Ptr(); + m_resourceCommandEncoder->addRef(); + } + + virtual SLANG_NO_THROW void SLANG_MCALL close() override + { + auto& vkAPI = m_renderer->m_api; + if (m_preCommandBuffer != VK_NULL_HANDLE) + { + // `preCmdBuffer` contains buffer transfer commands for shader object + // uniform buffers, and we need a memory barrier here to ensure the + // transfers are visible to shaders. + VkMemoryBarrier memBarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER}; + memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + memBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + vkAPI.vkCmdPipelineBarrier( + m_preCommandBuffer, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + 0, + 1, + &memBarrier, + 0, + nullptr, + 0, + nullptr); + vkAPI.vkEndCommandBuffer(m_preCommandBuffer); + } + vkAPI.vkEndCommandBuffer(m_commandBuffer); + } + }; + + class CommandQueueImpl + : public ICommandQueue + , public RefObject + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ICommandQueue* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue) + return static_cast<ICommandQueue*>(this); + return nullptr; + } + + public: + Desc m_desc; + uint32_t m_poolIndex; + RefPtr<VKRenderer> m_renderer; + VkQueue m_queue; + uint32_t m_queueFamilyIndex; + VkSemaphore m_pendingWaitSemaphore = VK_NULL_HANDLE; + List<VkCommandBuffer> m_submitCommandBuffers; + static const int kCommandPoolCount = 8; + VkCommandPool m_commandPools[kCommandPoolCount]; + DescriptorSetAllocator m_descSetAllocators[kCommandPoolCount]; + VkFence m_fences[kCommandPoolCount]; + VkSemaphore m_semaphores[kCommandPoolCount]; + ~CommandQueueImpl() + { + m_renderer->m_api.vkQueueWaitIdle(m_queue); + + m_renderer->m_queueAllocCount--; + for (int i = 0; i < kCommandPoolCount; i++) + { + m_renderer->m_api.vkDestroyCommandPool( + m_renderer->m_api.m_device, m_commandPools[i], nullptr); + m_renderer->m_api.vkDestroyFence(m_renderer->m_api.m_device, m_fences[i], nullptr); + m_renderer->m_api.vkDestroySemaphore( + m_renderer->m_api.m_device, m_semaphores[i], nullptr); + m_descSetAllocators[i].close(); + } + } + + void init(VKRenderer* renderer, VkQueue queue, uint32_t queueFamilyIndex) + { + m_renderer = renderer; + m_poolIndex = 0; + m_queue = queue; + m_queueFamilyIndex = queueFamilyIndex; + for (int i = 0; i < kCommandPoolCount; i++) + { + m_descSetAllocators[i].m_api = &m_renderer->m_api; + + VkCommandPoolCreateInfo poolCreateInfo = {}; + poolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + poolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + poolCreateInfo.queueFamilyIndex = queueFamilyIndex; + m_renderer->m_api.vkCreateCommandPool( + m_renderer->m_api.m_device, &poolCreateInfo, nullptr, &m_commandPools[i]); + + VkFenceCreateInfo fenceCreateInfo = {}; + fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceCreateInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; + m_renderer->m_api.vkCreateFence( + m_renderer->m_api.m_device, &fenceCreateInfo, nullptr, &m_fences[i]); + + VkSemaphoreCreateInfo semaphoreCreateInfo = {}; + semaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphoreCreateInfo.flags = 0; + m_renderer->m_api.vkCreateSemaphore( + m_renderer->m_api.m_device, &semaphoreCreateInfo, nullptr, &m_semaphores[i]); + } + } + + // Swaps to and resets the next command pool. + // Wait if command lists in the next pool are still in flight. + Result swapPools() + { + auto& vkAPI = m_renderer->m_api; + m_poolIndex++; + m_poolIndex = m_poolIndex % kCommandPoolCount; + + if (vkAPI.vkWaitForFences(vkAPI.m_device, 1, &m_fences[m_poolIndex], 1, UINT64_MAX) != + VK_SUCCESS) + { + return SLANG_FAIL; + } + vkAPI.vkResetCommandPool(vkAPI.m_device, m_commandPools[m_poolIndex], 0); + m_descSetAllocators[m_poolIndex].reset(); + return SLANG_OK; + } + + virtual SLANG_NO_THROW void SLANG_MCALL wait() override + { + auto& vkAPI = m_renderer->m_api; + vkAPI.vkQueueWaitIdle(m_queue); + } + + virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override + { + return m_desc; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandBuffer(ICommandBuffer** result) override + { + RefPtr<CommandBufferImpl> commandBuffer = new CommandBufferImpl(); + SLANG_RETURN_ON_FAIL(commandBuffer->init( + m_renderer, m_commandPools[m_poolIndex], &m_descSetAllocators[m_poolIndex])); + *result = commandBuffer.detach(); + return SLANG_OK; + } + + virtual SLANG_NO_THROW void SLANG_MCALL + executeCommandBuffers( + uint32_t count, + ICommandBuffer* const* commandBuffers) override + { + auto& vkAPI = m_renderer->m_api; + m_submitCommandBuffers.clear(); + for (uint32_t i = 0; i < count; i++) + { + auto cmdBufImpl = static_cast<CommandBufferImpl*>(commandBuffers[i]); + if (cmdBufImpl->m_preCommandBuffer != VK_NULL_HANDLE) + m_submitCommandBuffers.add(cmdBufImpl->m_preCommandBuffer); + auto vkCmdBuf = cmdBufImpl->m_commandBuffer; + m_submitCommandBuffers.add(vkCmdBuf); + } + VkSemaphore waitSemaphore = m_pendingWaitSemaphore; + VkSemaphore signalSemaphore = m_semaphores[m_poolIndex]; + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + VkPipelineStageFlags stageFlag = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + submitInfo.pWaitDstStageMask = &stageFlag; + submitInfo.commandBufferCount = (uint32_t)m_submitCommandBuffers.getCount(); + submitInfo.pCommandBuffers = m_submitCommandBuffers.getBuffer(); + if (m_pendingWaitSemaphore != VK_NULL_HANDLE) + { + submitInfo.waitSemaphoreCount = 1; + submitInfo.pWaitSemaphores = &waitSemaphore; + } + submitInfo.signalSemaphoreCount = 1; + submitInfo.pSignalSemaphores = &signalSemaphore; + vkAPI.vkResetFences(vkAPI.m_device, 1, &m_fences[m_poolIndex]); + vkAPI.vkQueueSubmit(m_queue, 1, &submitInfo, m_fences[m_poolIndex]); + m_pendingWaitSemaphore = signalSemaphore; + swapPools(); + } + }; + + class SwapchainImpl + : public ISwapchain + , public RefObject + { + public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ISwapchain* getInterface(const Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ISwapchain) + return static_cast<ISwapchain*>(this); + return nullptr; + } + + public: + VulkanSwapChain m_swapChain; + VkSemaphore m_nextImageSemaphore; // Semaphore to signal after `acquireNextImage`. + ISwapchain::Desc m_desc; + RefPtr<CommandQueueImpl> m_queue; + ShortList<RefPtr<TextureResourceImpl>> m_images; + RefPtr<VKRenderer> m_renderer; + uint32_t m_currentImageIndex = 0; + + public: + ~SwapchainImpl() + { + m_swapChain.destroy(); + m_renderer->m_api.vkDestroySemaphore( + m_renderer->m_api.m_device, m_nextImageSemaphore, nullptr); + } + Result init(VKRenderer* renderer, const ISwapchain::Desc& desc, WindowHandle window) + { + m_desc = desc; + m_renderer = renderer; + m_queue = static_cast<CommandQueueImpl*>(desc.queue); + + VkSemaphoreCreateInfo semaphoreCreateInfo = {}; + semaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + SLANG_VK_RETURN_ON_FAIL(renderer->m_api.vkCreateSemaphore( + renderer->m_api.m_device, &semaphoreCreateInfo, nullptr, &m_nextImageSemaphore)); + + VulkanSwapChain::Desc swapchainDesc; + VulkanSwapChain::PlatformDesc* platformDesc = nullptr; + swapchainDesc.m_imageCount = desc.imageCount; + swapchainDesc.init(); + swapchainDesc.m_format = desc.format; + swapchainDesc.m_vsync = desc.enableVSync; +#if SLANG_WINDOWS_FAMILY + VulkanSwapChain::WinPlatformDesc winPlatformDesc; + winPlatformDesc.m_hinstance = ::GetModuleHandle(nullptr); + winPlatformDesc.m_hwnd = (HWND)window.handleValues[0]; + platformDesc = &winPlatformDesc; +#endif + + m_queue = static_cast<CommandQueueImpl*>(desc.queue); + SLANG_RETURN_ON_FAIL(m_swapChain.init( + &renderer->m_api, + m_queue->m_queue, + m_queue->m_queueFamilyIndex, + swapchainDesc, + platformDesc)); + m_desc.format = m_swapChain.getDesc().m_format; + m_desc.width = m_swapChain.getWidth(); + m_desc.height = m_swapChain.getHeight(); + m_desc.imageCount = m_swapChain.getDesc().m_imageCount; + auto& images = m_swapChain.getImages(); + for (uint32_t i = 0; i < desc.imageCount; i++) + { + ITextureResource::Desc imageDesc = {}; + + imageDesc.init2D( + IResource::Type::Texture2D, + m_swapChain.getDesc().m_format, + m_swapChain.getWidth(), + m_swapChain.getHeight(), + 1); + RefPtr<TextureResourceImpl> image = new TextureResourceImpl( + imageDesc, gfx::IResource::Usage::RenderTarget, &renderer->m_api); + image->m_image = images[i]; + image->m_imageMemory = 0; + image->m_vkformat = m_swapChain.getVkFormat(); + image->m_isWeakImageReference = true; + m_images.add(image); + } + return SLANG_OK; + } + + virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() { return m_desc; } + virtual SLANG_NO_THROW Result getImage(uint32_t index, ITextureResource** outResource) + { + *outResource = m_images[index]; + m_images[index]->addRef(); + return SLANG_OK; + } + virtual SLANG_NO_THROW Result present() + { + m_swapChain.present(m_queue->m_pendingWaitSemaphore); + m_queue->m_pendingWaitSemaphore = VK_NULL_HANDLE; + return SLANG_OK; + } + virtual SLANG_NO_THROW uint32_t acquireNextImage() + { + m_currentImageIndex = (uint32_t)m_swapChain.nextFrontImageIndex(m_nextImageSemaphore); + // Make the queue's next submit wait on `m_nextImageSemaphore`. + m_queue->m_pendingWaitSemaphore = m_nextImageSemaphore; + return m_currentImageIndex; + } + }; + VkBool32 handleDebugMessage(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objType, uint64_t srcObject, size_t location, int32_t msgCode, const char* pLayerPrefix, const char* pMsg); @@ -895,44 +1911,23 @@ public: static VKAPI_ATTR VkBool32 VKAPI_CALL debugMessageCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objType, uint64_t srcObject, size_t location, int32_t msgCode, const char* pLayerPrefix, const char* pMsg, void* pUserData); - void _endRender(); - - Slang::Result _beginPass(); - void _endPass(); void _transitionImageLayout(VkImage image, VkFormat format, const TextureResource::Desc& desc, VkImageLayout oldLayout, VkImageLayout newLayout); VkDebugReportCallbackEXT m_debugReportCallback; - RefPtr<PipelineLayoutImpl> m_currentPipelineLayout; - - RefPtr<DescriptorSetImpl> m_currentDescriptorSetImpls [kMaxDescriptorSets]; - VkDescriptorSet m_currentDescriptorSets [kMaxDescriptorSets]; - - RefPtr<PipelineStateImpl> m_currentPipeline; - - RefPtr<FramebufferImpl> m_currentFramebuffer; - - List<BoundVertexBuffer> m_boundVertexBuffers; - - VkPrimitiveTopology m_primitiveTopology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - VkDevice m_device = VK_NULL_HANDLE; VulkanModule m_module; VulkanApi m_api; VulkanDeviceQueue m_deviceQueue; - - float m_clearColor[4] = { 0, 0, 0, 0 }; - List<VkViewport> m_viewports; - List<VkRect2D> m_scissorRects; + uint32_t m_queueFamilyIndex; Desc m_desc; DescriptorSetAllocator descriptorSetAllocator; - // Temporary list used by flushBindingState to avoid per-frame allocation. - List<VkCopyDescriptorSet> m_descSetCopies; + uint32_t m_queueAllocCount; }; /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! VkRenderer::Buffer !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ @@ -971,77 +1966,6 @@ Result VKRenderer::Buffer::init(const VulkanApi& api, size_t bufferSize, VkBuffe /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! VkRenderer !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ -Result VKRenderer::_beginPass() -{ - const int numRenderTargets = 1; - - int numAttachments = 0; - - // Start render pass - VkClearValue clearValues[kMaxAttachments]; - clearValues[numAttachments++] = VkClearValue{ m_clearColor[0], m_clearColor[1], m_clearColor[2], m_clearColor[3] }; - - bool hasDepthBuffer = false; - if (hasDepthBuffer) - { - VkClearValue& clearValue = clearValues[numAttachments++]; - - clearValue.depthStencil.depth = 1.0f; - clearValue.depthStencil.stencil = 0; - } - - const int width = m_currentFramebuffer->m_width; - const int height = m_currentFramebuffer->m_height; - - VkCommandBuffer cmdBuffer = m_deviceQueue.getCommandBuffer(); - - VkRenderPassBeginInfo renderPassBegin = {}; - renderPassBegin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - renderPassBegin.renderPass = m_currentFramebuffer->m_layout->m_renderPass; - renderPassBegin.framebuffer = m_currentFramebuffer->m_handle; - renderPassBegin.renderArea.offset.x = 0; - renderPassBegin.renderArea.offset.y = 0; - renderPassBegin.renderArea.extent.width = width; - renderPassBegin.renderArea.extent.height = height; - renderPassBegin.clearValueCount = numAttachments; - renderPassBegin.pClearValues = clearValues; - - m_api.vkCmdBeginRenderPass(cmdBuffer, &renderPassBegin, VK_SUBPASS_CONTENTS_INLINE); - - // Set up scissor and viewport - if (m_scissorRects.getCount()) - { - m_api.vkCmdSetScissor( - cmdBuffer, 0, (uint32_t)m_scissorRects.getCount(), m_scissorRects.getBuffer()); - } - if (m_viewports.getCount()) - { - m_api.vkCmdSetViewport( - cmdBuffer, 0, (uint32_t)m_viewports.getCount(), m_viewports.getBuffer()); - } - - return SLANG_OK; -} - -void VKRenderer::_endPass() -{ - VkCommandBuffer cmdBuffer = m_deviceQueue.getCommandBuffer(); - m_api.vkCmdEndRenderPass(cmdBuffer); -} - -void VKRenderer::_endRender() -{ - m_deviceQueue.flush(); - - // Make m_currentDescriptorSets consistent with m_currentDescriptorSetImpls - // so that we don't mistakenly treat any transient descriptor sets as "copied" in the next frame. - for (uint32_t i = 0; i < kMaxDescriptorSets; i++) - { - if (m_currentDescriptorSetImpls[i]) - m_currentDescriptorSets[i] = m_currentDescriptorSetImpls[i]->m_descriptorSet.handle; - } -} - Result SLANG_MCALL createVKRenderer(const IRenderer::Desc* desc, IRenderer** outRenderer) { RefPtr<VKRenderer> result = new VKRenderer(); @@ -1058,19 +1982,9 @@ VKRenderer::~VKRenderer() waitForGpu(); } - m_currentFramebuffer.setNull(); - - m_currentPipeline.setNull(); + shaderCache.free(); // Same as clear but, also dtors all elements, which clear does not - m_boundVertexBuffers = List<BoundVertexBuffer>(); - - m_currentPipelineLayout.setNull(); - for (auto& impl : m_currentDescriptorSetImpls) - { - impl.setNull(); - } - m_deviceQueue.destroy(); descriptorSetAllocator.close(); @@ -1079,6 +1993,8 @@ VKRenderer::~VKRenderer() { m_api.vkDestroyDevice(m_device, nullptr); m_device = VK_NULL_HANDLE; + if (m_api.m_instance != VK_NULL_HANDLE) + m_api.vkDestroyInstance(m_api.m_instance, nullptr); } } @@ -1157,16 +2073,9 @@ VkPipelineShaderStageCreateInfo VKRenderer::compileEntryPoint( // !!!!!!!!!!!!!!!!!!!!!!!!!!!! Renderer interface !!!!!!!!!!!!!!!!!!!!!!!!!! -SlangResult VKRenderer::initialize(const Desc& desc) +Result VKRenderer::initVulkanInstanceAndDevice(bool useValidationLayer) { - SLANG_RETURN_ON_FAIL(slangContext.initialize(desc.slang, SLANG_SPIRV, "sm_5_1")); - - SLANG_RETURN_ON_FAIL(GraphicsAPIRenderer::initialize(desc)); - - SLANG_RETURN_ON_FAIL(m_module.init()); - SLANG_RETURN_ON_FAIL(m_api.initGlobalProcs(m_module)); - descriptorSetAllocator.m_api = &m_api; - m_desc = desc; + m_queueAllocCount = 0; VkApplicationInfo applicationInfo = { VK_STRUCTURE_TYPE_APPLICATION_INFO }; applicationInfo.pApplicationName = "slang-render-test"; @@ -1198,69 +2107,71 @@ SlangResult VKRenderer::initialize(const Desc& desc) instanceCreateInfo.enabledExtensionCount = SLANG_COUNT_OF(instanceExtensions); instanceCreateInfo.ppEnabledExtensionNames = &instanceExtensions[0]; -#if ENABLE_VALIDATION_LAYER - // Depending on driver version, validation layer may or may not exist. - // Newer drivers comes with "VK_LAYER_KHRONOS_validation", while older - // drivers provide only the deprecated - // "VK_LAYER_LUNARG_standard_validation" layer. - // We will check what layers are available, and use the newer - // "VK_LAYER_KHRONOS_validation" layer when possible. - uint32_t layerCount; - m_api.vkEnumerateInstanceLayerProperties(&layerCount, nullptr); - - List<VkLayerProperties> availableLayers; - availableLayers.setCount(layerCount); - m_api.vkEnumerateInstanceLayerProperties(&layerCount, availableLayers.getBuffer()); - - const char* layerNames[] = { nullptr }; - for (auto& layer : availableLayers) - { - if (strncmp( + if (useValidationLayer) + { + // Depending on driver version, validation layer may or may not exist. + // Newer drivers comes with "VK_LAYER_KHRONOS_validation", while older + // drivers provide only the deprecated + // "VK_LAYER_LUNARG_standard_validation" layer. + // We will check what layers are available, and use the newer + // "VK_LAYER_KHRONOS_validation" layer when possible. + uint32_t layerCount; + m_api.vkEnumerateInstanceLayerProperties(&layerCount, nullptr); + + List<VkLayerProperties> availableLayers; + availableLayers.setCount(layerCount); + m_api.vkEnumerateInstanceLayerProperties(&layerCount, availableLayers.getBuffer()); + + const char* layerNames[] = { nullptr }; + for (auto& layer : availableLayers) + { + if (strncmp( layer.layerName, "VK_LAYER_KHRONOS_validation", sizeof("VK_LAYER_KHRONOS_validation")) == 0) - { - layerNames[0] = "VK_LAYER_KHRONOS_validation"; - break; + { + layerNames[0] = "VK_LAYER_KHRONOS_validation"; + break; + } } - } - // On older drivers, only "VK_LAYER_LUNARG_standard_validation" exists, - // so we try to use it if we can't find "VK_LAYER_KHRONOS_validation". - if (!layerNames[0]) - { - for (auto& layer : availableLayers) + // On older drivers, only "VK_LAYER_LUNARG_standard_validation" exists, + // so we try to use it if we can't find "VK_LAYER_KHRONOS_validation". + if (!layerNames[0]) { - if (strncmp( + for (auto& layer : availableLayers) + { + if (strncmp( layer.layerName, "VK_LAYER_LUNARG_standard_validation", sizeof("VK_LAYER_LUNARG_standard_validation")) == 0) - { - layerNames[0] = "VK_LAYER_LUNARG_standard_validation"; - break; + { + layerNames[0] = "VK_LAYER_LUNARG_standard_validation"; + break; + } } } + if (layerNames[0]) + { + instanceCreateInfo.enabledLayerCount = SLANG_COUNT_OF(layerNames); + instanceCreateInfo.ppEnabledLayerNames = layerNames; + } } - if (layerNames[0]) - { - instanceCreateInfo.enabledLayerCount = SLANG_COUNT_OF(layerNames); - instanceCreateInfo.ppEnabledLayerNames = layerNames; - } -#endif if (m_api.vkCreateInstance(&instanceCreateInfo, nullptr, &instance) != VK_SUCCESS) return SLANG_FAIL; SLANG_RETURN_ON_FAIL(m_api.initInstanceProcs(instance)); -#if ENABLE_VALIDATION_LAYER - VkDebugReportFlagsEXT debugFlags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT; + if (useValidationLayer) + { + VkDebugReportFlagsEXT debugFlags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT; - VkDebugReportCallbackCreateInfoEXT debugCreateInfo = { VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT }; - debugCreateInfo.pfnCallback = &debugMessageCallback; - debugCreateInfo.pUserData = this; - debugCreateInfo.flags = debugFlags; + VkDebugReportCallbackCreateInfoEXT debugCreateInfo = { VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT }; + debugCreateInfo.pfnCallback = &debugMessageCallback; + debugCreateInfo.pUserData = this; + debugCreateInfo.flags = debugFlags; - SLANG_VK_RETURN_ON_FAIL(m_api.vkCreateDebugReportCallbackEXT(instance, &debugCreateInfo, nullptr, &m_debugReportCallback)); -#endif + SLANG_VK_RETURN_ON_FAIL(m_api.vkCreateDebugReportCallbackEXT(instance, &debugCreateInfo, nullptr, &m_debugReportCallback)); + } uint32_t numPhysicalDevices = 0; SLANG_VK_RETURN_ON_FAIL(m_api.vkEnumeratePhysicalDevices(instance, &numPhysicalDevices, nullptr)); @@ -1271,11 +2182,11 @@ SlangResult VKRenderer::initialize(const Desc& desc) Index selectedDeviceIndex = 0; - if (desc.adapter) + if (m_desc.adapter) { selectedDeviceIndex = -1; - String lowerAdapter = String(desc.adapter).toLower(); + String lowerAdapter = String(m_desc.adapter).toLower(); for (Index i = 0; i < physicalDevices.getCount(); ++i) { @@ -1306,7 +2217,6 @@ SlangResult VKRenderer::initialize(const Desc& desc) VkDeviceCreateInfo deviceCreateInfo = { VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO }; deviceCreateInfo.queueCreateInfoCount = 1; - deviceCreateInfo.pEnabledFeatures = &m_api.m_deviceFeatures; // Get the device features (doesn't use, but useful when debugging) @@ -1332,17 +2242,27 @@ SlangResult VKRenderer::initialize(const Desc& desc) VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomicInt64Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR }; // Atomic Float features VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomicFloatFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT }; - + // Timeline Semaphore features + VkPhysicalDeviceTimelineSemaphoreFeatures timelineFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES }; + // Extended dynamic state features + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extendedDynamicStateFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT }; // API version check, can't use vkGetPhysicalDeviceProperties2 yet since this device might not support it if (VK_MAKE_VERSION(majorVersion, minorVersion, 0) >= VK_API_VERSION_1_1 && m_api.vkGetPhysicalDeviceProperties2 && m_api.vkGetPhysicalDeviceFeatures2) { - // Get device features VkPhysicalDeviceFeatures2 deviceFeatures2 = {}; deviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + // Extended dynamic states + extendedDynamicStateFeatures.pNext = deviceFeatures2.pNext; + deviceFeatures2.pNext = &extendedDynamicStateFeatures; + + // Timeline Semaphore + timelineFeatures.pNext = deviceFeatures2.pNext; + deviceFeatures2.pNext = &timelineFeatures; + // Float16 float16Features.pNext = deviceFeatures2.pNext; deviceFeatures2.pNext = &float16Features; @@ -1393,14 +2313,32 @@ SlangResult VKRenderer::initialize(const Desc& desc) deviceExtensions.add(VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME); m_features.add("atomic-float"); } + + if (timelineFeatures.timelineSemaphore) + { + // Link into the creation features + timelineFeatures.pNext = (void*)deviceCreateInfo.pNext; + deviceCreateInfo.pNext = &timelineFeatures; + deviceExtensions.add(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME); + m_features.add("timeline-semaphore"); + } + + if (extendedDynamicStateFeatures.extendedDynamicState) + { + // Link into the creation features + extendedDynamicStateFeatures.pNext = (void*)deviceCreateInfo.pNext; + deviceCreateInfo.pNext = &extendedDynamicStateFeatures; + deviceExtensions.add(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); + m_features.add("extended-dynamic-states"); + } } - int queueFamilyIndex = m_api.findQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT); - assert(queueFamilyIndex >= 0); + m_queueFamilyIndex = m_api.findQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT); + assert(m_queueFamilyIndex >= 0); float queuePriority = 0.0f; VkDeviceQueueCreateInfo queueCreateInfo = { VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO }; - queueCreateInfo.queueFamilyIndex = queueFamilyIndex; + queueCreateInfo.queueFamilyIndex = m_queueFamilyIndex; queueCreateInfo.queueCount = 1; queueCreateInfo.pQueuePriorities = &queuePriority; @@ -1409,90 +2347,51 @@ SlangResult VKRenderer::initialize(const Desc& desc) deviceCreateInfo.enabledExtensionCount = uint32_t(deviceExtensions.getCount()); deviceCreateInfo.ppEnabledExtensionNames = deviceExtensions.getBuffer(); - SLANG_VK_RETURN_ON_FAIL(m_api.vkCreateDevice(m_api.m_physicalDevice, &deviceCreateInfo, nullptr, &m_device)); + if (m_api.vkCreateDevice(m_api.m_physicalDevice, &deviceCreateInfo, nullptr, &m_device) != VK_SUCCESS) + return SLANG_FAIL; SLANG_RETURN_ON_FAIL(m_api.initDeviceProcs(m_device)); - { - VkQueue queue; - m_api.vkGetDeviceQueue(m_device, queueFamilyIndex, 0, &queue); - SLANG_RETURN_ON_FAIL(m_deviceQueue.init(m_api, queue, queueFamilyIndex)); - } return SLANG_OK; } -void VKRenderer::submitGpuWork() -{ - m_deviceQueue.flush(); -} - -void VKRenderer::waitForGpu() +SlangResult VKRenderer::initialize(const Desc& desc) { - m_deviceQueue.flushAndWait(); -} + m_desc = desc; -void VKRenderer::setClearColor(const float color[4]) -{ - for (int ii = 0; ii < 4; ++ii) - m_clearColor[ii] = color[ii]; -} + SLANG_RETURN_ON_FAIL(GraphicsAPIRenderer::initialize(desc)); -void VKRenderer::clearFrame() -{ - _beginPass(); - ShortList<VkClearAttachment> clears; - for (Index i = 0; i < m_currentFramebuffer->renderTargetViews.getCount(); i++) - { - VkClearAttachment attachment; - attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - memcpy(attachment.clearValue.color.float32, m_clearColor, sizeof(float) * 4); - attachment.colorAttachment = (int)i; - clears.add(attachment); - } - if (m_currentFramebuffer->depthStencilView) + SLANG_RETURN_ON_FAIL(m_module.init()); + SLANG_RETURN_ON_FAIL(m_api.initGlobalProcs(m_module)); + descriptorSetAllocator.m_api = &m_api; + SLANG_RETURN_ON_FAIL(initVulkanInstanceAndDevice(false)); { - VkClearAttachment attachment; - attachment.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - attachment.clearValue.depthStencil.depth = 1.0f; - attachment.clearValue.depthStencil.stencil = 0; - clears.add(attachment); + VkQueue queue; + m_api.vkGetDeviceQueue(m_device, m_queueFamilyIndex, 0, &queue); + SLANG_RETURN_ON_FAIL(m_deviceQueue.init(m_api, queue, m_queueFamilyIndex)); } - VkClearRect rect = {}; - rect.baseArrayLayer = 0; - rect.layerCount = 1; - rect.rect.extent.width = m_currentFramebuffer->m_width; - rect.rect.extent.height = m_currentFramebuffer->m_height; - m_api.vkCmdClearAttachments( - m_deviceQueue.getCommandBuffer(), - (uint32_t)clears.getCount(), - clears.getArrayView().getBuffer(), - 1, - &rect); - _endPass(); -} -void VKRenderer::beginFrame() -{ - if (m_deviceQueue.isCurrent(VulkanDeviceQueue::EventType::EndFrame)) - m_deviceQueue.makeCompleted(VulkanDeviceQueue::EventType::EndFrame); + SLANG_RETURN_ON_FAIL(slangContext.initialize(desc.slang, SLANG_SPIRV, "sm_5_1")); + return SLANG_OK; } -void VKRenderer::endFrame() +void VKRenderer::waitForGpu() { - _endRender(); + m_deviceQueue.flushAndWait(); } -void VKRenderer::makeSwapchainImagePresentable(ISwapchain* swapchain) +Result VKRenderer::createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) { - auto swapchainImpl = static_cast<SwapchainImpl*>(swapchain); - auto image = swapchainImpl->m_images[swapchainImpl->m_currentImageIndex]; - _transitionImageLayout( - image->m_image, - image->m_vkformat, - *image->getDesc(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - VK_IMAGE_LAYOUT_PRESENT_SRC_KHR); - if (!m_deviceQueue.isCurrent(VulkanDeviceQueue::EventType::EndFrame)) - m_deviceQueue.makeCurrent(VulkanDeviceQueue::EventType::EndFrame); + // Only support one queue for now. + if (m_queueAllocCount != 0) + return SLANG_FAIL; + auto queueFamilyIndex = m_api.findQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT); + VkQueue vkQueue; + m_api.vkGetDeviceQueue(m_api.m_device, queueFamilyIndex, 0, &vkQueue); + RefPtr<CommandQueueImpl> result = new CommandQueueImpl(); + result->init(this, vkQueue, queueFamilyIndex); + *outQueue = result.detach(); + m_queueAllocCount++; + return SLANG_OK; } Result VKRenderer::createSwapchain( @@ -1512,6 +2411,16 @@ Result VKRenderer::createFramebufferLayout(const IFramebufferLayout::Desc& desc, return SLANG_OK; } +Result VKRenderer::createRenderPassLayout( + const IRenderPassLayout::Desc& desc, + IRenderPassLayout** outRenderPassLayout) +{ + RefPtr<RenderPassLayoutImpl> result = new RenderPassLayoutImpl(); + SLANG_RETURN_ON_FAIL(result->init(this, desc)); + *outRenderPassLayout = result.detach(); + return SLANG_OK; +} + Result VKRenderer::createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffer** outFramebuffer) { RefPtr<FramebufferImpl> fb = new FramebufferImpl(); @@ -1520,13 +2429,12 @@ Result VKRenderer::createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffe return SLANG_OK; } -void VKRenderer::setFramebuffer(IFramebuffer* framebuffer) -{ - m_currentFramebuffer = static_cast<FramebufferImpl*>(framebuffer); -} - SlangResult VKRenderer::readTextureResource( - ITextureResource* texture, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) + ITextureResource* texture, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) { SLANG_UNUSED(texture); SLANG_UNUSED(outBlob); @@ -1535,6 +2443,48 @@ SlangResult VKRenderer::readTextureResource( return SLANG_FAIL; } +SlangResult VKRenderer::readBufferResource( + IBufferResource* inBuffer, + size_t offset, + size_t size, + ISlangBlob** outBlob) +{ + BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(inBuffer); + + RefPtr<ListBlob> blob = new ListBlob(); + blob->m_data.setCount(size); + + // create staging buffer + Buffer staging; + + SLANG_RETURN_ON_FAIL(staging.init( + m_api, + size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)); + + // Copy from real buffer to staging buffer + VkCommandBuffer commandBuffer = m_deviceQueue.getCommandBuffer(); + + VkBufferCopy copyInfo = {}; + copyInfo.size = size; + copyInfo.srcOffset = offset; + m_api.vkCmdCopyBuffer(commandBuffer, buffer->m_buffer.m_buffer, staging.m_buffer, 1, ©Info); + + m_deviceQueue.flushAndWait(); + + // Write out the data from the buffer + void* mappedData = nullptr; + SLANG_RETURN_ON_FAIL( + m_api.vkMapMemory(m_device, staging.m_memory, 0, size, 0, &mappedData)); + + ::memcpy(blob->m_data.getBuffer(), mappedData, size); + m_api.vkUnmapMemory(m_device, staging.m_memory); + + *outBlob = blob.detach(); + return SLANG_OK; +} + static VkBufferUsageFlagBits _calcBufferUsageFlags(IResource::BindFlag::Enum bind) { typedef IResource::BindFlag BindFlag; @@ -1868,6 +2818,7 @@ Result VKRenderer::createTextureResource(IResource::Usage initialUsage, const IT // Bind the memory to the image m_api.vkBindImageMemory(m_device, texture->m_image, texture->m_imageMemory, 0); + Buffer uploadBuffer; if (initData) { List<TextureResource::Size> mipSizes; @@ -1896,7 +2847,6 @@ Result VKRenderer::createTextureResource(IResource::Usage initialUsage, const IT // Calculate the total size taking into account the array bufferSize *= arraySize; - Buffer uploadBuffer; SLANG_RETURN_ON_FAIL(uploadBuffer.init(m_api, bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)); assert(mipSizes.getCount() == numMipMaps); @@ -1977,10 +2927,7 @@ Result VKRenderer::createTextureResource(IResource::Usage initialUsage, const IT } } } - _transitionImageLayout(texture->m_image, format, *texture->getDesc(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - m_deviceQueue.flushAndWait(); } else { @@ -2006,6 +2953,7 @@ Result VKRenderer::createTextureResource(IResource::Usage initialUsage, const IT break; } } + m_deviceQueue.flushAndWait(); *outResource = texture.detach(); return SLANG_OK; } @@ -2056,15 +3004,13 @@ Result VKRenderer::createBufferResource(IResource::Usage initialUsage, const IBu VkBufferCopy copyInfo = {}; copyInfo.size = bufferSize; m_api.vkCmdCopyBuffer(commandBuffer, buffer->m_uploadBuffer.m_buffer, buffer->m_buffer.m_buffer, 1, ©Info); - - //flushCommandBuffer(commandBuffer); + m_deviceQueue.flush(); } *outResource = buffer.detach(); return SLANG_OK; } - VkFilter translateFilterMode(TextureFilteringMode mode) { switch (mode) @@ -2170,13 +3116,13 @@ static VkStencilOp translateStencilOp(StencilOp op) static VkStencilOpState translateStencilState(DepthStencilOpDesc desc) { VkStencilOpState rs; - rs.compareMask = desc.stencilCompareMask; + rs.compareMask = 0xFF; rs.compareOp = translateComparisonFunc(desc.stencilFunc); rs.depthFailOp = translateStencilOp(desc.stencilDepthFailOp); rs.failOp = translateStencilOp(desc.stencilFailOp); rs.passOp = translateStencilOp(desc.stencilPassOp); - rs.reference = desc.stencilReference; - rs.writeMask = desc.stencilWriteMask; + rs.reference = 0; + rs.writeMask = 0xFF; return rs; } @@ -2399,311 +3345,6 @@ Result VKRenderer::createInputLayout(const InputElementDesc* elements, UInt numE return SLANG_OK; } -void* VKRenderer::map(IBufferResource* bufferIn, MapFlavor flavor) -{ - BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(bufferIn); - assert(buffer->m_mapFlavor == MapFlavor::Unknown); - - // Make sure everything has completed before reading... - m_deviceQueue.flushAndWait(); - - const size_t bufferSize = buffer->getDesc()->sizeInBytes; - - switch (flavor) - { - case MapFlavor::WriteDiscard: - case MapFlavor::HostWrite: - { - if (!buffer->m_uploadBuffer.isInitialized()) - { - return nullptr; - } - - void* mappedData = nullptr; - SLANG_VK_CHECK(m_api.vkMapMemory(m_device, buffer->m_uploadBuffer.m_memory, 0, bufferSize, 0, &mappedData)); - buffer->m_mapFlavor = flavor; - return mappedData; - } - case MapFlavor::HostRead: - { - // Make sure there is space in the read buffer - buffer->m_readBuffer.setCount(bufferSize); - - // create staging buffer - Buffer staging; - - SLANG_RETURN_NULL_ON_FAIL(staging.init(m_api, bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)); - - // Copy from real buffer to staging buffer - VkCommandBuffer commandBuffer = m_deviceQueue.getCommandBuffer(); - - VkBufferCopy copyInfo = {}; - copyInfo.size = bufferSize; - m_api.vkCmdCopyBuffer(commandBuffer, buffer->m_buffer.m_buffer, staging.m_buffer, 1, ©Info); - - m_deviceQueue.flushAndWait(); - - // Write out the data from the buffer - void* mappedData = nullptr; - SLANG_VK_CHECK(m_api.vkMapMemory(m_device, staging.m_memory, 0, bufferSize, 0, &mappedData)); - - ::memcpy(buffer->m_readBuffer.getBuffer(), mappedData, bufferSize); - m_api.vkUnmapMemory(m_device, staging.m_memory); - - buffer->m_mapFlavor = flavor; - - return buffer->m_readBuffer.getBuffer(); - } - default: - return nullptr; - } -} - -void VKRenderer::unmap(IBufferResource* bufferIn) -{ - BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(bufferIn); - assert(buffer->m_mapFlavor != MapFlavor::Unknown); - - const size_t bufferSize = buffer->getDesc()->sizeInBytes; - - switch (buffer->m_mapFlavor) - { - case MapFlavor::WriteDiscard: - case MapFlavor::HostWrite: - { - m_api.vkUnmapMemory(m_device, buffer->m_uploadBuffer.m_memory); - - // Copy from staging buffer to real buffer - VkCommandBuffer commandBuffer = m_deviceQueue.getCommandBuffer(); - - VkBufferCopy copyInfo = {}; - copyInfo.size = bufferSize; - m_api.vkCmdCopyBuffer(commandBuffer, buffer->m_uploadBuffer.m_buffer, buffer->m_buffer.m_buffer, 1, ©Info); - - // TODO: is this necessary? - //m_deviceQueue.flushAndWait(); - break; - } - default: break; - } - - // Mark as no longer mapped - buffer->m_mapFlavor = MapFlavor::Unknown; -} - -void VKRenderer::setPrimitiveTopology(PrimitiveTopology topology) -{ - m_primitiveTopology = VulkanUtil::getVkPrimitiveTopology(topology); -} - -void VKRenderer::setVertexBuffers(UInt startSlot, UInt slotCount, IBufferResource*const* buffers, const UInt* strides, const UInt* offsets) -{ - { - const Index num = Index(startSlot + slotCount); - if (num > m_boundVertexBuffers.getCount()) - { - m_boundVertexBuffers.setCount(num); - } - } - - for (Index i = 0; i < Index(slotCount); i++) - { - BufferResourceImpl* buffer = static_cast<BufferResourceImpl*>(buffers[i]); - if (buffer) - { - assert(buffer->m_initialUsage == IResource::Usage::VertexBuffer); - } - - BoundVertexBuffer& boundBuffer = m_boundVertexBuffers[startSlot + i]; - boundBuffer.m_buffer = buffer; - boundBuffer.m_stride = int(strides[i]); - boundBuffer.m_offset = int(offsets[i]); - } -} - -void VKRenderer::setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) -{ -} - -void VKRenderer::setViewports(UInt count, Viewport const* viewports) -{ - static const int kMaxViewports = 8; // TODO: base on device caps - assert(count <= kMaxViewports); - - m_viewports.setCount(count); - for(UInt ii = 0; ii < count; ++ii) - { - auto& inViewport = viewports[ii]; - auto& vkViewport = m_viewports[ii]; - - vkViewport.x = inViewport.originX; - vkViewport.y = inViewport.originY; - vkViewport.width = inViewport.extentX; - vkViewport.height = inViewport.extentY; - vkViewport.minDepth = inViewport.minZ; - vkViewport.maxDepth = inViewport.maxZ; - } - - VkCommandBuffer commandBuffer = m_deviceQueue.getCommandBuffer(); - m_api.vkCmdSetViewport(commandBuffer, 0, uint32_t(count), m_viewports.getBuffer()); -} - -void VKRenderer::setScissorRects(UInt count, ScissorRect const* rects) -{ - static const int kMaxScissorRects = 8; // TODO: base on device caps - assert(count <= kMaxScissorRects); - - m_scissorRects.setCount(count); - for(UInt ii = 0; ii < count; ++ii) - { - auto& inRect = rects[ii]; - auto& vkRect = m_scissorRects[ii]; - - vkRect.offset.x = int32_t(inRect.minX); - vkRect.offset.y = int32_t(inRect.minY); - vkRect.extent.width = uint32_t(inRect.maxX - inRect.minX); - vkRect.extent.height = uint32_t(inRect.maxY - inRect.minY); - - } - - VkCommandBuffer commandBuffer = m_deviceQueue.getCommandBuffer(); - m_api.vkCmdSetScissor(commandBuffer, 0, uint32_t(count), m_scissorRects.getBuffer()); -} - -void VKRenderer::setPipelineState(IPipelineState* state) -{ - m_currentPipeline = static_cast<PipelineStateImpl*>(state); -} - -void VKRenderer::_flushBindingState(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint) -{ - auto pipeline = m_currentPipeline; - auto& descSetCopies = m_descSetCopies; - descSetCopies.clear(); - // We start by binding the pipeline state. - // - m_api.vkCmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline->m_pipeline); - - // Next we bind all the descriptor sets that were set in the `VKRenderer`. - // - auto pipelineLayoutImpl = pipeline->m_pipelineLayout.Ptr(); - auto vkPipelineLayout = pipelineLayoutImpl->m_pipelineLayout; - auto descriptorSetCount = pipelineLayoutImpl->m_descriptorSetCount; - for (uint32_t i = 0; i < (uint32_t)descriptorSetCount; i++) - { - if (m_currentDescriptorSetImpls[i]->m_isTransient) - { - // A transient descriptor set may go out of life cycle after command list recording, - // therefore we must make a copy of it in the per-frame descriptor pool. - - // If we have already created a transient copy for this descriptor set, skip the copy. - if (m_currentDescriptorSetImpls[i]->m_descriptorSet.handle != - m_currentDescriptorSets[i]) - continue; - - auto descSet = m_deviceQueue.allocTransientDescriptorSet( - m_currentDescriptorSetImpls[i]->m_layout->m_descriptorSetLayout); - uint32_t bindingIndex = 0; - for (auto binding : m_currentDescriptorSetImpls[i]->m_layout->m_vkBindings) - { - VkCopyDescriptorSet copy = {}; - copy.sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET; - copy.srcSet = m_currentDescriptorSetImpls[i]->m_descriptorSet.handle; - copy.dstSet = descSet.handle; - copy.srcBinding = copy.dstBinding = bindingIndex; - copy.srcArrayElement = copy.dstArrayElement = 0; - copy.descriptorCount = binding.descriptorCount; - descSetCopies.add(copy); - bindingIndex++; - } - m_currentDescriptorSets[i] = descSet.handle; - } - } - if (descSetCopies.getCount()) - { - m_api.vkUpdateDescriptorSets( - m_api.m_device, 0, nullptr, (uint32_t)descSetCopies.getCount(), descSetCopies.getBuffer()); - } - m_api.vkCmdBindDescriptorSets(commandBuffer, pipelineBindPoint, vkPipelineLayout, - 0, uint32_t(descriptorSetCount), - &m_currentDescriptorSets[0], - 0, nullptr); - - // For any descriptor sets with root-constant ranges, we need to - // bind the relevant data to the context. - // - for(gfx::UInt ii = 0; ii < descriptorSetCount; ++ii) - { - auto descriptorSet = m_currentDescriptorSetImpls[ii]; - auto descriptorSetLayout = descriptorSet->m_layout; - auto size = descriptorSetLayout->m_rootConstantDataSize; - if(size == 0) - continue; - auto data = descriptorSet->m_rootConstantData.getBuffer(); - - // The absolute offset of the descriptor set's data in - // the push-constant data for the entire pipeline was - // computed and cached in the pipeline layout. - // - uint32_t offset = pipelineLayoutImpl->m_descriptorSetRootConstantOffsets[ii]; - - m_api.vkCmdPushConstants(commandBuffer, vkPipelineLayout, VK_SHADER_STAGE_ALL, offset, size, data); - } -} - -void VKRenderer::draw(UInt vertexCount, UInt startVertex = 0) -{ - auto pipeline = m_currentPipeline; - if (!pipeline || pipeline->m_shaderProgram->m_pipelineType != PipelineType::Graphics) - { - assert(!"Invalid render pipeline"); - return; - } - - SLANG_RETURN_VOID_ON_FAIL(_beginPass()); - - // Also create descriptor sets based on the given pipeline layout - VkCommandBuffer commandBuffer = m_deviceQueue.getCommandBuffer(); - - _flushBindingState(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS); - - // Bind the vertex buffer - if (m_boundVertexBuffers.getCount() > 0 && m_boundVertexBuffers[0].m_buffer) - { - const BoundVertexBuffer& boundVertexBuffer = m_boundVertexBuffers[0]; - - VkBuffer vertexBuffers[] = { boundVertexBuffer.m_buffer->m_buffer.m_buffer }; - VkDeviceSize offsets[] = { VkDeviceSize(boundVertexBuffer.m_offset) }; - - m_api.vkCmdBindVertexBuffers(commandBuffer, 0, 1, vertexBuffers, offsets); - } - - m_api.vkCmdDraw(commandBuffer, static_cast<uint32_t>(vertexCount), 1, 0, 0); - - _endPass(); -} - -void VKRenderer::drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) -{ -} - -void VKRenderer::dispatchCompute(int x, int y, int z) -{ - auto pipeline = m_currentPipeline; - if (!pipeline || pipeline->m_shaderProgram->m_pipelineType != PipelineType::Compute) - { - assert(!"Invalid compute pipeline"); - return; - } - - // Also create descriptor sets based on the given pipeline layout - VkCommandBuffer commandBuffer = m_deviceQueue.getCommandBuffer(); - - _flushBindingState(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE); - - m_api.vkCmdDispatch(commandBuffer, x, y, z); -} - static VkImageViewType _calcImageViewType(ITextureResource::Type type, const ITextureResource::Desc& desc) { switch (type) @@ -2914,7 +3555,6 @@ Result VKRenderer::createPipelineLayout(const IPipelineLayout::Desc& desc, IPipe VkPipelineLayout pipelineLayout; SLANG_VK_CHECK(m_api.vkCreatePipelineLayout(m_device, &pipelineLayoutInfo, nullptr, &pipelineLayout)); - RefPtr<PipelineLayoutImpl> pipelineLayoutImpl = new PipelineLayoutImpl(m_api); pipelineLayoutImpl->m_pipelineLayout = pipelineLayout; pipelineLayoutImpl->m_descriptorSetCount = descriptorSetCount; @@ -3116,30 +3756,6 @@ void VKRenderer::DescriptorSetImpl::setRootConstants( memcpy(m_rootConstantData.getBuffer() + rootConstantRangeInfo.offset + offset, data, size); } -void VKRenderer::setDescriptorSet(PipelineType pipelineType, IPipelineLayout* layout, UInt index, IDescriptorSet* descriptorSet) -{ - // Ideally this should eventually be as simple as: - // - // m_api.vkCmdBindDescriptorSets( - // commandBuffer, - // translatePipelineBindPoint(pipelineType), - // layout->m_pipelineLayout, - // index, - // 1, - // ((DescriptorSetImpl*) descriptorSet)->m_descriptorSet, - // 0, - // nullptr); - // - // For now we are lazily flushing state right before drawing, so - // we will hang onto the parameters that were passed in and then - // use them later. - // - - auto descriptorSetImpl = (DescriptorSetImpl*)descriptorSet; - m_currentDescriptorSetImpls[index] = descriptorSetImpl; - m_currentDescriptorSets[index] = descriptorSetImpl->m_descriptorSet.handle; -} - Result VKRenderer::createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) { if (desc.slangProgram && desc.slangProgram->getSpecializationParamCount() != 0) @@ -3224,7 +3840,27 @@ Result VKRenderer::createGraphicsPipelineState(const GraphicsPipelineStateDesc& VkPipelineInputAssemblyStateCreateInfo inputAssembly = {}; inputAssembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // Use PRITIMVE_LIST topology for each primitive type here. + // All other forms of primitive toplogies are specified via dynamic state. + switch (inDesc.primitiveType) + { + case PrimitiveType::Point: + inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + break; + case PrimitiveType::Line: + inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + break; + case PrimitiveType::Triangle: + inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + break; + case PrimitiveType::Patch: + inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + break; + default: + assert(!"unknown topology type."); + break; + } inputAssembly.primitiveRestartEnable = VK_FALSE; VkViewport viewport = {}; @@ -3280,8 +3916,9 @@ Result VKRenderer::createGraphicsPipelineState(const GraphicsPipelineStateDesc& VkPipelineDynamicStateCreateInfo dynamicStateInfo = {}; dynamicStateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamicStateInfo.dynamicStateCount = 2; - VkDynamicState dynamicStates[] = { VK_DYNAMIC_STATE_VIEWPORT , VK_DYNAMIC_STATE_SCISSOR}; + dynamicStateInfo.dynamicStateCount = 3; + VkDynamicState dynamicStates[] = { + VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_STENCIL_REFERENCE}; dynamicStateInfo.pDynamicStates = dynamicStates; VkPipelineDepthStencilStateCreateInfo depthStencilStateInfo = {}; @@ -3289,6 +3926,10 @@ Result VKRenderer::createGraphicsPipelineState(const GraphicsPipelineStateDesc& depthStencilStateInfo.depthTestEnable = inDesc.depthStencil.depthTestEnable ? 1 : 0; depthStencilStateInfo.back = translateStencilState(inDesc.depthStencil.backFace); depthStencilStateInfo.front = translateStencilState(inDesc.depthStencil.frontFace); + depthStencilStateInfo.back.compareMask = inDesc.depthStencil.stencilReadMask; + depthStencilStateInfo.back.writeMask = inDesc.depthStencil.stencilWriteMask; + depthStencilStateInfo.front.compareMask = inDesc.depthStencil.stencilReadMask; + depthStencilStateInfo.front.writeMask = inDesc.depthStencil.stencilWriteMask; depthStencilStateInfo.depthBoundsTestEnable = 0; depthStencilStateInfo.depthCompareOp = translateComparisonFunc(inDesc.depthStencil.depthFunc); depthStencilStateInfo.depthWriteEnable = inDesc.depthStencil.depthWriteEnable ? 1 : 0; @@ -3317,10 +3958,8 @@ Result VKRenderer::createGraphicsPipelineState(const GraphicsPipelineStateDesc& RefPtr<PipelineStateImpl> pipelineStateImpl = new PipelineStateImpl(m_api); pipelineStateImpl->m_pipeline = pipeline; - pipelineStateImpl->m_pipelineLayout = pipelineLayoutImpl; pipelineStateImpl->m_framebufferLayout = static_cast<FramebufferLayoutImpl*>(desc.framebufferLayout); - pipelineStateImpl->m_shaderProgram = programImpl; pipelineStateImpl->init(desc); *outState = pipelineStateImpl.detach(); return SLANG_OK; @@ -3336,49 +3975,24 @@ Result VKRenderer::createComputePipelineState(const ComputePipelineStateDesc& in auto programImpl = (ShaderProgramImpl*) desc.program; auto pipelineLayoutImpl = (PipelineLayoutImpl*) desc.pipelineLayout; - VkComputePipelineCreateInfo computePipelineInfo = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO }; - computePipelineInfo.stage = programImpl->m_compute; - computePipelineInfo.layout = pipelineLayoutImpl->m_pipelineLayout; - VkPipeline pipeline = VK_NULL_HANDLE; - SLANG_VK_CHECK(m_api.vkCreateComputePipelines(m_device, pipelineCache, 1, &computePipelineInfo, nullptr, &pipeline)); + + if (!programImpl->slangProgram || programImpl->slangProgram->getSpecializationParamCount() == 0) + { + VkComputePipelineCreateInfo computePipelineInfo = { + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + computePipelineInfo.stage = programImpl->m_compute; + computePipelineInfo.layout = pipelineLayoutImpl->m_pipelineLayout; + SLANG_VK_CHECK(m_api.vkCreateComputePipelines( + m_device, pipelineCache, 1, &computePipelineInfo, nullptr, &pipeline)); + } RefPtr<PipelineStateImpl> pipelineStateImpl = new PipelineStateImpl(m_api); pipelineStateImpl->m_pipeline = pipeline; pipelineStateImpl->m_pipelineLayout = pipelineLayoutImpl; - pipelineStateImpl->m_shaderProgram = programImpl; pipelineStateImpl->init(desc); *outState = pipelineStateImpl.detach(); return SLANG_OK; } - -#if 0 - else if (m_currentProgram->m_pipelineType == PipelineType::Graphics) - { - // Create the graphics pipeline - - const int width = m_swapChain.getWidth(); - const int height = m_swapChain.getHeight(); - - - - - - // - - - } - else - { - assert(!"Unhandled program type"); - return SLANG_FAIL; - } - - pipelineOut = pipeline; - return SLANG_OK; - - -#endif - } // renderer_test diff --git a/tools/gfx/vulkan/vk-api.cpp b/tools/gfx/vulkan/vk-api.cpp index 9b51b644c..9a4ab130d 100644 --- a/tools/gfx/vulkan/vk-api.cpp +++ b/tools/gfx/vulkan/vk-api.cpp @@ -17,7 +17,7 @@ bool VulkanApi::areDefined(ProcType type) const { case ProcType::Global: return VK_API_CHECK_FUNCTIONS(VK_API_ALL_GLOBAL_PROCS); case ProcType::Instance: return VK_API_CHECK_FUNCTIONS(VK_API_ALL_INSTANCE_PROCS); - case ProcType::Device: return VK_API_CHECK_FUNCTIONS(VK_API_ALL_DEVICE_PROCS); + case ProcType::Device: return VK_API_CHECK_FUNCTIONS(VK_API_DEVICE_PROCS); default: { assert(!"Unhandled type"); diff --git a/tools/gfx/vulkan/vk-api.h b/tools/gfx/vulkan/vk-api.h index 77e6a9a09..c6537ff8f 100644 --- a/tools/gfx/vulkan/vk-api.h +++ b/tools/gfx/vulkan/vk-api.h @@ -9,6 +9,7 @@ namespace gfx { x(vkGetInstanceProcAddr) \ x(vkCreateInstance) \ x(vkEnumerateInstanceLayerProperties) \ + x(vkDestroyInstance) \ /* */ #define VK_API_INSTANCE_PROCS_OPT(x) \ @@ -89,6 +90,7 @@ namespace gfx { x(vkCmdPipelineBarrier) \ x(vkCmdCopyBufferToImage)\ x(vkCmdPushConstants) \ + x(vkCmdSetStencilReference) \ \ x(vkCreateFence) \ x(vkDestroyFence) \ @@ -143,6 +145,10 @@ namespace gfx { x(vkAcquireNextImageKHR) \ /* */ +#define VK_API_DEVICE_OPT_PROCS(x) \ + x(vkCmdSetPrimitiveTopologyEXT) \ + /* */ + #define VK_API_ALL_GLOBAL_PROCS(x) \ VK_API_GLOBAL_PROCS(x) @@ -152,7 +158,8 @@ namespace gfx { #define VK_API_ALL_DEVICE_PROCS(x) \ VK_API_DEVICE_PROCS(x) \ - VK_API_DEVICE_KHR_PROCS(x) + VK_API_DEVICE_KHR_PROCS(x) \ + VK_API_DEVICE_OPT_PROCS(x) #define VK_API_ALL_PROCS(x) \ VK_API_ALL_GLOBAL_PROCS(x) \ diff --git a/tools/gfx/vulkan/vk-device-queue.cpp b/tools/gfx/vulkan/vk-device-queue.cpp index 149e5dec2..1bcfe28c8 100644 --- a/tools/gfx/vulkan/vk-device-queue.cpp +++ b/tools/gfx/vulkan/vk-device-queue.cpp @@ -27,7 +27,6 @@ void VulkanDeviceQueue::destroy() m_api->vkFreeCommandBuffers(m_api->m_device, m_commandPools[i], 1, &m_commandBuffers[i]); m_api->vkDestroyFence(m_api->m_device, m_fences[i].fence, nullptr); m_api->vkDestroyCommandPool(m_api->m_device, m_commandPools[i], nullptr); - m_descSetAllocator[i].close(); } m_api = nullptr; } @@ -74,8 +73,6 @@ SlangResult VulkanDeviceQueue::init(const VulkanApi& api, VkQueue queue, int que api.vkCreateFence(api.m_device, &fenceCreateInfo, nullptr, &fence.fence); fence.active = false; fence.value = 0; - - m_descSetAllocator[i].m_api = &api; } VkSemaphoreCreateInfo semaphoreCreateInfo = {}; @@ -175,7 +172,6 @@ void VulkanDeviceQueue::flushStepB() // blocking update of fence values _updateFenceAtIndex(m_commandBufferIndex, true); - m_descSetAllocator[m_commandBufferIndex].reset(); m_api->vkResetCommandPool(m_api->m_device, m_commandPool, 0); VkCommandBufferBeginInfo beginInfo = {}; diff --git a/tools/gfx/vulkan/vk-device-queue.h b/tools/gfx/vulkan/vk-device-queue.h index 9869a3caf..7cb000dd5 100644 --- a/tools/gfx/vulkan/vk-device-queue.h +++ b/tools/gfx/vulkan/vk-device-queue.h @@ -45,11 +45,6 @@ struct VulkanDeviceQueue /// Get the command buffer VkCommandBuffer getCommandBuffer() const { return m_commandBuffer; } - VulkanDescriptorSet allocTransientDescriptorSet(VkDescriptorSetLayout layout) - { - return m_descSetAllocator[m_commandBufferIndex].allocate(layout); - } - /// Get the queue VkQueue getQueue() const { return m_queue; } @@ -96,8 +91,6 @@ struct VulkanDeviceQueue VkSemaphore m_semaphores[int(EventType::CountOf)]; VkSemaphore m_currentSemaphores[int(EventType::CountOf)]; - DescriptorSetAllocator m_descSetAllocator[kMaxCommandBuffers]; - uint64_t m_lastFenceCompleted = 1; uint64_t m_nextFenceValue = 2; diff --git a/tools/gfx/vulkan/vk-swap-chain.cpp b/tools/gfx/vulkan/vk-swap-chain.cpp index bc6160a02..3a62ccfe2 100644 --- a/tools/gfx/vulkan/vk-swap-chain.cpp +++ b/tools/gfx/vulkan/vk-swap-chain.cpp @@ -26,12 +26,17 @@ static Index _indexOfFormat(List<VkSurfaceFormatKHR>& formatsIn, VkFormat format return -1; } -SlangResult VulkanSwapChain::init(VulkanDeviceQueue* deviceQueue, const Desc& descIn, const PlatformDesc* platformDescIn) +SlangResult VulkanSwapChain::init( + VulkanApi* vkapi, + VkQueue queue, + uint32_t queueFamilyIndex, + const Desc& descIn, + const PlatformDesc* platformDescIn) { assert(platformDescIn); - m_deviceQueue = deviceQueue; - m_api = deviceQueue->getApi(); + m_queue = queue; + m_api = vkapi; // Make sure it's not set initially m_format = VK_FORMAT_UNDEFINED; @@ -61,7 +66,7 @@ SlangResult VulkanSwapChain::init(VulkanDeviceQueue* deviceQueue, const Desc& de #endif VkBool32 supported = false; - m_api->vkGetPhysicalDeviceSurfaceSupportKHR(m_api->m_physicalDevice, deviceQueue->getQueueIndex(), m_surface, &supported); + m_api->vkGetPhysicalDeviceSurfaceSupportKHR(m_api->m_physicalDevice, queueFamilyIndex, m_surface, &supported); uint32_t numSurfaceFormats = 0; List<VkSurfaceFormatKHR> surfaceFormats; @@ -94,12 +99,13 @@ SlangResult VulkanSwapChain::init(VulkanDeviceQueue* deviceQueue, const Desc& de // Save the desc m_desc = desc; - SLANG_RETURN_ON_FAIL(_createSwapChain()); if (descIn.m_format == Format::RGBA_Unorm_UInt8 && m_format == VK_FORMAT_B8G8R8A8_UNORM) { m_desc.m_format = Format::BGRA_Unorm_UInt8; } + + SLANG_RETURN_ON_FAIL(_createSwapChain()); return SLANG_OK; } @@ -125,11 +131,6 @@ void VulkanSwapChain::getWindowSize(int* widthOut, int* heightOut) const SlangResult VulkanSwapChain::_createSwapChain() { - if (hasValidSwapChain()) - { - return SLANG_OK; - } - int width, height; getWindowSize(&width, &height); @@ -211,29 +212,15 @@ SlangResult VulkanSwapChain::_createSwapChain() m_api->vkGetSwapchainImagesKHR(m_api->m_device, m_swapChain, &numSwapChainImages, nullptr); m_desc.m_imageCount = numSwapChainImages; { - List<VkImage> images; - images.setCount(numSwapChainImages); - - m_api->vkGetSwapchainImagesKHR(m_api->m_device, m_swapChain, &numSwapChainImages, images.getBuffer()); - m_images.setCount(numSwapChainImages); - for (int i = 0; i < int(numSwapChainImages); ++i) - { - m_images[i] = images[i]; - } + m_api->vkGetSwapchainImagesKHR( + m_api->m_device, m_swapChain, &numSwapChainImages, m_images.getBuffer()); } return SLANG_OK; } void VulkanSwapChain::_destroySwapChain() { - if (!hasValidSwapChain()) - { - return; - } - - m_deviceQueue->waitForIdle(); - if (m_swapChain != VK_NULL_HANDLE) { m_api->vkDestroySwapchainKHR(m_api->m_device, m_swapChain, nullptr); @@ -261,20 +248,11 @@ VulkanSwapChain::~VulkanSwapChain() destroy(); } -int VulkanSwapChain::nextFrontImageIndex() +int VulkanSwapChain::nextFrontImageIndex(VkSemaphore signalSemaphore) { - if (!hasValidSwapChain()) - { - if (SLANG_FAILED(_createSwapChain())) - { - return -1; - } - } - - VkSemaphore beginFrameSemaphore = m_deviceQueue->makeCurrent(VulkanDeviceQueue::EventType::BeginFrame); - uint32_t swapChainIndex = 0; - VkResult result = m_api->vkAcquireNextImageKHR(m_api->m_device, m_swapChain, UINT64_MAX, beginFrameSemaphore, VK_NULL_HANDLE, &swapChainIndex); + VkResult result = m_api->vkAcquireNextImageKHR( + m_api->m_device, m_swapChain, UINT64_MAX, signalSemaphore, VK_NULL_HANDLE, &swapChainIndex); if (result != VK_SUCCESS) { @@ -285,18 +263,8 @@ int VulkanSwapChain::nextFrontImageIndex() return swapChainIndex; } -void VulkanSwapChain::present(bool vsync) +void VulkanSwapChain::present(VkSemaphore waitSemaphore) { - if (!hasValidSwapChain()) - { - m_deviceQueue->flush(); - return; - } - - VkSemaphore endFrameSemaphore = m_deviceQueue->getSemaphore(VulkanDeviceQueue::EventType::EndFrame); - - m_deviceQueue->flushStepA(); - uint32_t swapChainIndices[] = { uint32_t(m_currentSwapChainIndex) }; VkPresentInfoKHR presentInfo = {}; @@ -304,21 +272,12 @@ void VulkanSwapChain::present(bool vsync) presentInfo.swapchainCount = 1; presentInfo.pSwapchains = &m_swapChain; presentInfo.pImageIndices = swapChainIndices; - if (endFrameSemaphore != VK_NULL_HANDLE) + if (waitSemaphore != VK_NULL_HANDLE) { presentInfo.waitSemaphoreCount = 1; - presentInfo.pWaitSemaphores = &endFrameSemaphore; - } - VkResult result = m_api->vkQueuePresentKHR(m_deviceQueue->getQueue(), &presentInfo); - - m_deviceQueue->makeCompleted(VulkanDeviceQueue::EventType::EndFrame); - - m_deviceQueue->flushStepB(); - - if (result != VK_SUCCESS) - { - _destroySwapChain(); + presentInfo.pWaitSemaphores = &waitSemaphore; } + m_api->vkQueuePresentKHR(m_queue, &presentInfo); } } // renderer_test diff --git a/tools/gfx/vulkan/vk-swap-chain.h b/tools/gfx/vulkan/vk-swap-chain.h index 0a2a40b4f..0ddc6f7f5 100644 --- a/tools/gfx/vulkan/vk-swap-chain.h +++ b/tools/gfx/vulkan/vk-swap-chain.h @@ -57,7 +57,12 @@ struct VulkanSwapChain }; /// Must be called before the swap chain can be used - SlangResult init(VulkanDeviceQueue* deviceQueue, const Desc& desc, const PlatformDesc* platformDesc); + SlangResult init( + VulkanApi* vkapi, + VkQueue queue, + uint32_t queueFamilyIndex, + const Desc& desc, + const PlatformDesc* platformDesc); /// Returned the desc used to construct the swap chain. /// Is invalid if init hasn't returned with successful result. @@ -67,7 +72,7 @@ struct VulkanSwapChain bool hasValidSwapChain() const { return m_images.getCount() > 0; } /// Present to the display - void present(bool vsync); + void present(VkSemaphore waitSemaphore); /// Get the current size of the window (in pixels written to widthOut, heightOut) void getWindowSize(int* widthOut, int* heightOut) const; @@ -84,7 +89,7 @@ struct VulkanSwapChain const Slang::List<VkImage>& getImages() const { return m_images; } /// Get the next front render image index. Returns -1, if image couldn't be found - int nextFrontImageIndex(); + int nextFrontImageIndex(VkSemaphore signalSemaphore); void destroy(); @@ -120,7 +125,7 @@ struct VulkanSwapChain Slang::List<VkImage> m_images; - VulkanDeviceQueue* m_deviceQueue = nullptr; + VkQueue m_queue; const VulkanApi* m_api = nullptr; Desc m_desc; ///< The desc used to init this swap chain diff --git a/tools/gfx/vulkan/vk-util.cpp b/tools/gfx/vulkan/vk-util.cpp index 561b97a53..218801d7a 100644 --- a/tools/gfx/vulkan/vk-util.cpp +++ b/tools/gfx/vulkan/vk-util.cpp @@ -57,4 +57,35 @@ namespace gfx { return VK_PRIMITIVE_TOPOLOGY_MAX_ENUM; } +VkImageLayout VulkanUtil::mapResourceStateToLayout(ResourceState state) +{ + switch (state) + { + case ResourceState::Undefined: + return VK_IMAGE_LAYOUT_UNDEFINED; + case ResourceState::ShaderResource: + return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + case ResourceState::UnorderedAccess: + return VK_IMAGE_LAYOUT_GENERAL; + case ResourceState::RenderTarget: + return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + case ResourceState::DepthRead: + return VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + case ResourceState::DepthWrite: + return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + case ResourceState::Present: + return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + case ResourceState::CopySource: + return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + case ResourceState::CopyDestination: + return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + case ResourceState::ResolveSource: + return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + case ResourceState::ResolveDestination: + return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + default: + return VK_IMAGE_LAYOUT_UNDEFINED; + } +} + } // renderer_test diff --git a/tools/gfx/vulkan/vk-util.h b/tools/gfx/vulkan/vk-util.h index 723c3fde5..f374eea8b 100644 --- a/tools/gfx/vulkan/vk-util.h +++ b/tools/gfx/vulkan/vk-util.h @@ -34,6 +34,8 @@ struct VulkanUtil /// Returns VK_PRIMITIVE_TOPOLOGY_MAX_ENUM on failure static VkPrimitiveTopology getVkPrimitiveTopology(PrimitiveTopology topology); + static VkImageLayout mapResourceStateToLayout(ResourceState state); + /// Returns Slang::Result equivalent of a VkResult static Slang::Result toSlangResult(VkResult res); }; diff --git a/tools/graphics-app-framework/gui.cpp b/tools/graphics-app-framework/gui.cpp index 0d416ec85..3d4283131 100644 --- a/tools/graphics-app-framework/gui.cpp +++ b/tools/graphics-app-framework/gui.cpp @@ -36,8 +36,13 @@ void setNativeWindowHook(Window* window, WNDPROC proc); #endif -GUI::GUI(Window* window, IRenderer* inRenderer, IFramebufferLayout* framebufferLayout) +GUI::GUI( + Window* window, + IRenderer* inRenderer, + ICommandQueue* inQueue, + IFramebufferLayout* framebufferLayout) : renderer(inRenderer) + , queue(inQueue) { ImGui::CreateContext(); ImGuiIO& io = ImGui::GetIO(); @@ -236,6 +241,20 @@ GUI::GUI(Window* window, IRenderer* inRenderer, IFramebufferLayout* framebufferL ISamplerState::Desc desc; samplerState = renderer->createSamplerState(desc); } + + { + IRenderPassLayout::Desc desc; + desc.framebufferLayout = framebufferLayout; + IRenderPassLayout::AttachmentAccessDesc colorAccess; + desc.depthStencilAccess = nullptr; + colorAccess.initialState = ResourceState::Present; + colorAccess.finalState = ResourceState::Present; + colorAccess.loadOp = IRenderPassLayout::AttachmentLoadOp::Load; + colorAccess.storeOp = IRenderPassLayout::AttachmentStoreOp::Store; + desc.renderTargetAccess = &colorAccess; + desc.renderTargetCount = 1; + renderPass = renderer->createRenderPassLayout(desc); + } } @@ -248,7 +267,7 @@ void GUI::beginFrame() ImGui::NewFrame(); } -void GUI::endFrame() +void GUI::endFrame(IFramebuffer* framebuffer) { ImGui::Render(); @@ -278,22 +297,23 @@ void GUI::endFrame() auto indexBuffer = renderer->createBufferResource( IResource::Usage::IndexBuffer, indexBufferDesc); - + auto cmdBuf = queue->createCommandBuffer(); + auto encoder = cmdBuf->encodeResourceCommands(); { - ImDrawVert* dstVertex = (ImDrawVert*) renderer->map(vertexBuffer, MapFlavor::WriteDiscard); - ImDrawIdx* dstIndex = (ImDrawIdx*) renderer->map(indexBuffer, MapFlavor::WriteDiscard); - for(int ii = 0; ii < commandListCount; ++ii) { const ImDrawList* commandList = draw_data->CmdLists[ii]; - memcpy(dstVertex, commandList->VtxBuffer.Data, commandList->VtxBuffer.Size * sizeof(ImDrawVert)); - memcpy(dstIndex, commandList->IdxBuffer.Data, commandList->IdxBuffer.Size * sizeof(ImDrawIdx)); - dstVertex += commandList->VtxBuffer.Size; - dstIndex += commandList->IdxBuffer.Size; + encoder->uploadBufferData( + vertexBuffer, + commandList->VtxBuffer.Size * ii * sizeof(ImDrawVert), + commandList->VtxBuffer.Size * sizeof(ImDrawVert), + commandList->VtxBuffer.Data); + encoder->uploadBufferData( + indexBuffer, + commandList->IdxBuffer.Size * ii * sizeof(ImDrawIdx), + commandList->IdxBuffer.Size * sizeof(ImDrawIdx), + commandList->IdxBuffer.Data); } - - renderer->unmap(vertexBuffer); - renderer->unmap(indexBuffer); } // Allocate a transient constant buffer for projection matrix @@ -306,8 +326,6 @@ void GUI::endFrame() constantBufferDesc); { - glm::mat4x4* dstMVP = (glm::mat4x4*) renderer->map(constantBuffer, MapFlavor::WriteDiscard); - float L = draw_data->DisplayPos.x; float R = draw_data->DisplayPos.x + draw_data->DisplaySize.x; float T = draw_data->DisplayPos.y; @@ -319,11 +337,11 @@ void GUI::endFrame() { 0.0f, 0.0f, 0.5f, 0.0f }, { (R+L)/(L-R), (T+B)/(B-T), 0.5f, 1.0f }, }; - memcpy(dstMVP, mvp, sizeof(mvp)); - - renderer->unmap(constantBuffer); + encoder->uploadBufferData(constantBuffer, 0, sizeof(mvp), mvp); } + encoder->endEncoding(); + gfx::Viewport viewport; viewport.originX = 0; viewport.originY = 0; @@ -333,13 +351,15 @@ void GUI::endFrame() viewport.minZ = 0; viewport.maxZ = 1; - renderer->setViewport(viewport); + auto renderEncoder = cmdBuf->encodeRenderCommands(renderPass, framebuffer); + renderEncoder->setViewportAndScissor(viewport); - renderer->setPipelineState(pipelineState); + renderEncoder->setPipelineState(pipelineState); - renderer->setVertexBuffer(0, vertexBuffer, sizeof(ImDrawVert)); - renderer->setIndexBuffer(indexBuffer, sizeof(ImDrawIdx) == 2 ? Format::R_UInt16 : Format::R_UInt32); - renderer->setPrimitiveTopology(PrimitiveTopology::TriangleList); + renderEncoder->setVertexBuffer(0, vertexBuffer, sizeof(ImDrawVert)); + renderEncoder->setIndexBuffer( + indexBuffer, sizeof(ImDrawIdx) == 2 ? Format::R_UInt16 : Format::R_UInt32); + renderEncoder->setPrimitiveTopology(PrimitiveTopology::TriangleList); UInt vertexOffset = 0; UInt indexOffset = 0; @@ -364,7 +384,7 @@ void GUI::endFrame() (Int)(command->ClipRect.z - pos.x), (Int)(command->ClipRect.w - pos.y) }; - renderer->setScissorRect(rect); + renderEncoder->setScissorRects(1, &rect); // TODO: This should be a dynamic/transient descriptor set... auto descriptorSet = renderer->createDescriptorSet(descriptorSetLayout, gfx::IDescriptorSet::Flag::Transient); @@ -374,18 +394,20 @@ void GUI::endFrame() descriptorSet->setSampler(2, 0, samplerState); - renderer->setDescriptorSet( - PipelineType::Graphics, + renderEncoder->setDescriptorSet( pipelineLayout, 0, descriptorSet); - renderer->drawIndexed(command->ElemCount, indexOffset, vertexOffset); + renderEncoder->drawIndexed(command->ElemCount, indexOffset, vertexOffset); } indexOffset += command->ElemCount; } vertexOffset += commandList->VtxBuffer.Size; } + renderEncoder->endEncoding(); + cmdBuf->close(); + queue->executeCommandBuffer(cmdBuf); } GUI::~GUI() diff --git a/tools/graphics-app-framework/gui.h b/tools/graphics-app-framework/gui.h index 22b4bf2f5..680cea14b 100644 --- a/tools/graphics-app-framework/gui.h +++ b/tools/graphics-app-framework/gui.h @@ -12,14 +12,16 @@ namespace gfx { struct GUI : Slang::RefObject { - GUI(Window* window, IRenderer* renderer, IFramebufferLayout* framebufferLayout); + GUI(Window* window, IRenderer* renderer, ICommandQueue* queue, IFramebufferLayout* framebufferLayout); ~GUI(); void beginFrame(); - void endFrame(); + void endFrame(IFramebuffer* framebuffer); private: Slang::ComPtr<IRenderer> renderer; + Slang::ComPtr<ICommandQueue> queue; + Slang::ComPtr<IRenderPassLayout> renderPass; Slang::ComPtr<IPipelineState> pipelineState; Slang::ComPtr<IDescriptorSetLayout> descriptorSetLayout; Slang::ComPtr<IPipelineLayout> pipelineLayout; diff --git a/tools/graphics-app-framework/windows/win-window.cpp b/tools/graphics-app-framework/windows/win-window.cpp index 3bbf2575a..a86e360d0 100644 --- a/tools/graphics-app-framework/windows/win-window.cpp +++ b/tools/graphics-app-framework/windows/win-window.cpp @@ -243,7 +243,7 @@ static ATOM createWindowClassAtom() windowClassDesc.cbWndExtra = 0; windowClassDesc.hInstance = (HINSTANCE) GetModuleHandle(0); windowClassDesc.hIcon = 0; - windowClassDesc.hCursor = 0; + windowClassDesc.hCursor = LoadCursorW(NULL, IDC_ARROW); windowClassDesc.hbrBackground = 0; windowClassDesc.lpszMenuName = 0; windowClassDesc.lpszClassName = L"SlangGraphicsWindow"; @@ -269,22 +269,32 @@ Window* createWindow(WindowDesc const& desc) OSString windowTitle(desc.title); DWORD windowExtendedStyle = 0; - DWORD windowStyle = 0; + DWORD windowStyle = WS_OVERLAPPED | WS_CAPTION | WS_SYSMENU; HINSTANCE instance = (HINSTANCE) GetModuleHandle(0); + RECT windowRect; + windowRect.left = 0; + windowRect.top = 0; + windowRect.bottom = desc.height; + windowRect.right = desc.width; + AdjustWindowRect(&windowRect, windowStyle, FALSE); + HWND windowHandle = CreateWindowExW( windowExtendedStyle, (LPWSTR) getWindowClassAtom(), windowTitle, windowStyle, - 0, 0, // x, y - desc.width, desc.height, + CW_USEDEFAULT, + 0, // x, y + windowRect.right, + windowRect.bottom, NULL, // parent NULL, // menu instance, window); + if(!windowHandle) { delete window; diff --git a/tools/render-test/png-serialize-util.cpp b/tools/render-test/png-serialize-util.cpp index a7f6aa83a..dc1a9f241 100644 --- a/tools/render-test/png-serialize-util.cpp +++ b/tools/render-test/png-serialize-util.cpp @@ -12,25 +12,14 @@ namespace renderer_test { using namespace Slang; -/* static */Slang::Result PngSerializeUtil::write(const char* filename, const Surface& surface) +/* static */ Slang::Result PngSerializeUtil::write( + const char* filename, + ISlangBlob* pixels, + uint32_t width, + uint32_t height) { - int numComps = 0; - switch (surface.m_format) - { - case Format::RGBA_Unorm_UInt8: - { - numComps = 4; - break; - } - default: break; - } - - if (numComps <= 0) - { - return SLANG_FAIL; - } - - int stbResult = stbi_write_png(filename, surface.m_width, surface.m_height, numComps, surface.m_data, surface.m_rowStrideInBytes); + int stbResult = + stbi_write_png(filename, width, height, 4, pixels->getBufferPointer(), width * 4); return stbResult ? SLANG_OK : SLANG_FAIL; } diff --git a/tools/render-test/png-serialize-util.h b/tools/render-test/png-serialize-util.h index 1ec5204f7..80eda3729 100644 --- a/tools/render-test/png-serialize-util.h +++ b/tools/render-test/png-serialize-util.h @@ -1,15 +1,13 @@ // png-serialize-util.h #pragma once -#include "surface.h" +#include "core/slang-blob.h" namespace renderer_test { -using namespace gfx; - struct PngSerializeUtil { - static Slang::Result write(const char* filename, const Surface& surface); + static Slang::Result write(const char* filename, ISlangBlob* pixels, uint32_t width, uint32_t height); }; diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp index 42c14a557..18670537d 100644 --- a/tools/render-test/render-test-main.cpp +++ b/tools/render-test/render-test-main.cpp @@ -6,7 +6,6 @@ #include "slang-gfx.h" #include "tools/gfx-util/shader-cursor.h" #include "slang-support.h" -#include "surface.h" #include "png-serialize-util.h" #include "shader-renderer-util.h" @@ -26,6 +25,14 @@ #include "cpu-compute-util.h" +#define ENABLE_RENDERDOC_INTEGRATION 0 + +#if ENABLE_RENDERDOC_INTEGRATION +# include "external/renderdoc_app.h" +# define WIN32_LEAN_AND_MEAN +# include <Windows.h> +#endif + #if RENDER_TEST_CUDA # include "cuda/cuda-compute-util.h" #endif @@ -92,12 +99,12 @@ public: IRenderer* renderer, const Options& options, const ShaderCompilerUtil::Input& input) = 0; - void runCompute(); - void renderFrame(); + void runCompute(IComputeCommandEncoder* encoder); + void renderFrame(IRenderCommandEncoder* encoder); void finalize(); - virtual void applyBinding(PipelineType pipelineType) = 0; - virtual void setProjectionMatrix() = 0; + virtual void applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) = 0; + virtual void setProjectionMatrix(IResourceCommandEncoder* encoder) = 0; virtual Result writeBindingOutput(BindRoot* bindRoot, const char* fileName) = 0; Result writeScreen(const char* filename); @@ -109,7 +116,7 @@ protected: IRenderer* renderer, Options::ShaderProgramType shaderType, const ShaderCompilerUtil::Input& input); - void _initializeFramebuffer(); + void _initializeRenderPass(); virtual void finalizeImpl(); uint64_t m_startTicks; @@ -118,7 +125,8 @@ protected: uintptr_t m_constantBufferSize; ComPtr<IRenderer> m_renderer; - + ComPtr<ICommandQueue> m_queue; + ComPtr<IRenderPassLayout> m_renderPass; ComPtr<IInputLayout> m_inputLayout; ComPtr<IBufferResource> m_vertexBuffer; ComPtr<IShaderProgram> m_shaderProgram; @@ -137,8 +145,8 @@ protected: class LegacyRenderTestApp : public RenderTestApp { public: - virtual void applyBinding(PipelineType pipelineType) SLANG_OVERRIDE; - virtual void setProjectionMatrix() SLANG_OVERRIDE; + virtual void applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) SLANG_OVERRIDE; + virtual void setProjectionMatrix(IResourceCommandEncoder* encoder) SLANG_OVERRIDE; virtual Result initialize( SlangSession* session, IRenderer* renderer, @@ -148,6 +156,7 @@ public: BindingStateImpl* getBindingState() const { return m_bindingState; } virtual Result writeBindingOutput(BindRoot* bindRoot, const char* fileName) override; + virtual void finalizeImpl() SLANG_OVERRIDE; protected: uintptr_t m_constantBufferSize; @@ -159,8 +168,8 @@ protected: class ShaderObjectRenderTestApp : public RenderTestApp { public: - virtual void applyBinding(PipelineType pipelineType) SLANG_OVERRIDE; - virtual void setProjectionMatrix() SLANG_OVERRIDE; + virtual void applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) SLANG_OVERRIDE; + virtual void setProjectionMatrix(IResourceCommandEncoder* encoder) SLANG_OVERRIDE; virtual Result initialize( SlangSession* session, IRenderer* renderer, @@ -456,14 +465,34 @@ SlangResult _assignVarsFromLayout( return SLANG_OK; } -void LegacyRenderTestApp::applyBinding(PipelineType pipelineType) +void LegacyRenderTestApp::applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) { - m_bindingState->apply(m_renderer.get(), pipelineType); + m_bindingState->apply(encoder, pipelineType); } -void ShaderObjectRenderTestApp::applyBinding(PipelineType pipelineType) +void ShaderObjectRenderTestApp::applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) { - m_renderer->bindRootShaderObject(pipelineType, m_programVars); + switch (pipelineType) + { + case PipelineType::Compute: + { + ComPtr<IComputeCommandEncoder> computeEncoder; + encoder->queryInterface( + SLANG_UUID_IComputeCommandEncoder, (void**)computeEncoder.writeRef()); + computeEncoder->bindRootShaderObject(m_programVars); + } + break; + case PipelineType::Graphics: + { + ComPtr<IRenderCommandEncoder> renderEncoder; + encoder->queryInterface( + SLANG_UUID_IRenderCommandEncoder, (void**)renderEncoder.writeRef()); + renderEncoder->bindRootShaderObject(m_programVars); + } + break; + default: + throw "unknown pipeline type"; + } } SlangResult LegacyRenderTestApp::initialize( @@ -478,7 +507,7 @@ SlangResult LegacyRenderTestApp::initialize( SLANG_RETURN_ON_FAIL(_initializeShaders(session, renderer, options.shaderType, input)); - _initializeFramebuffer(); + _initializeRenderPass(); m_numAddedConstantBuffers = 0; @@ -607,7 +636,7 @@ SlangResult ShaderObjectRenderTestApp::initialize( m_renderer = renderer; - _initializeFramebuffer(); + _initializeRenderPass(); { switch(m_options.shaderType) @@ -664,6 +693,13 @@ SlangResult ShaderObjectRenderTestApp::initialize( return m_pipelineState ? SLANG_OK : SLANG_FAIL; } +void LegacyRenderTestApp::finalizeImpl() +{ + m_constantBuffer = nullptr; + m_bindingState = nullptr; + RenderTestApp::finalizeImpl(); +} + void ShaderObjectRenderTestApp::finalizeImpl() { m_programVars = nullptr; @@ -682,8 +718,11 @@ Result RenderTestApp::_initializeShaders( return m_shaderProgram ? SLANG_OK : SLANG_FAIL; } -void RenderTestApp::_initializeFramebuffer() +void RenderTestApp::_initializeRenderPass() { + ICommandQueue::Desc queueDesc = {ICommandQueue::QueueType::Graphics}; + m_queue = m_renderer->createCommandQueue(queueDesc); + gfx::ITextureResource::Desc depthBufferDesc; depthBufferDesc.setDefaults(gfx::IResource::Usage::DepthWrite); depthBufferDesc.init2D( @@ -730,29 +769,43 @@ void RenderTestApp::_initializeFramebuffer() framebufferLayoutDesc.renderTargets = &colorAttachment; framebufferLayoutDesc.depthStencil = &depthAttachment; m_renderer->createFramebufferLayout(framebufferLayoutDesc, m_framebufferLayout.writeRef()); + gfx::IFramebuffer::Desc framebufferDesc; framebufferDesc.renderTargetCount = 1; framebufferDesc.depthStencilView = dsv.get(); framebufferDesc.renderTargetViews = rtv.readRef(); framebufferDesc.layout = m_framebufferLayout; m_renderer->createFramebuffer(framebufferDesc, m_framebuffer.writeRef()); + + IRenderPassLayout::Desc renderPassDesc = {}; + renderPassDesc.framebufferLayout = m_framebufferLayout; + renderPassDesc.renderTargetCount = 1; + IRenderPassLayout::AttachmentAccessDesc renderTargetAccess = {}; + IRenderPassLayout::AttachmentAccessDesc depthStencilAccess = {}; + renderTargetAccess.loadOp = IRenderPassLayout::AttachmentLoadOp::Clear; + renderTargetAccess.storeOp = IRenderPassLayout::AttachmentStoreOp::Store; + renderTargetAccess.initialState = ResourceState::Undefined; + renderTargetAccess.finalState = ResourceState::RenderTarget; + depthStencilAccess.loadOp = IRenderPassLayout::AttachmentLoadOp::Clear; + depthStencilAccess.storeOp = IRenderPassLayout::AttachmentStoreOp::Store; + depthStencilAccess.initialState = ResourceState::Undefined; + depthStencilAccess.finalState = ResourceState::DepthWrite; + renderPassDesc.renderTargetAccess = &renderTargetAccess; + renderPassDesc.depthStencilAccess = &depthStencilAccess; + m_renderer->createRenderPassLayout(renderPassDesc, m_renderPass.writeRef()); } -void LegacyRenderTestApp::setProjectionMatrix() +void LegacyRenderTestApp::setProjectionMatrix(IResourceCommandEncoder* encoder) { - auto mappedData = m_renderer->map(m_constantBuffer, MapFlavor::WriteDiscard); - if (mappedData) - { - const ProjectionStyle projectionStyle = - gfxGetProjectionStyle(m_renderer->getRendererType()); - gfxGetIdentityProjection(projectionStyle, (float*)mappedData); - - m_renderer->unmap(m_constantBuffer); - } + float matrix[16]; + const ProjectionStyle projectionStyle = gfxGetProjectionStyle(m_renderer->getRendererType()); + gfxGetIdentityProjection(projectionStyle, matrix); + encoder->uploadBufferData(m_constantBuffer, 0, sizeof(float) * 16, matrix); } -void ShaderObjectRenderTestApp::setProjectionMatrix() +void ShaderObjectRenderTestApp::setProjectionMatrix(IResourceCommandEncoder* encoder) { + SLANG_UNUSED(encoder); const ProjectionStyle projectionStyle = gfxGetProjectionStyle(m_renderer->getRendererType()); @@ -764,31 +817,29 @@ void ShaderObjectRenderTestApp::setProjectionMatrix() .setData(projectionMatrix, sizeof(projectionMatrix)); } -void RenderTestApp::renderFrame() +void RenderTestApp::renderFrame(IRenderCommandEncoder* encoder) { - setProjectionMatrix(); - auto pipelineType = PipelineType::Graphics; - m_renderer->setPipelineState(m_pipelineState); + encoder->setPipelineState(m_pipelineState); - m_renderer->setPrimitiveTopology(PrimitiveTopology::TriangleList); - m_renderer->setVertexBuffer(0, m_vertexBuffer, sizeof(Vertex)); + encoder->setPrimitiveTopology(PrimitiveTopology::TriangleList); + encoder->setVertexBuffer(0, m_vertexBuffer, sizeof(Vertex)); - applyBinding(pipelineType); + applyBinding(pipelineType, encoder); - m_renderer->draw(3); + encoder->draw(3); } -void RenderTestApp::runCompute() +void RenderTestApp::runCompute(IComputeCommandEncoder* encoder) { auto pipelineType = PipelineType::Compute; - m_renderer->setPipelineState(m_pipelineState); - applyBinding(pipelineType); - - m_startTicks = ProcessUtil::getClockTick(); - - m_renderer->dispatchCompute(m_options.computeDispatchSize[0], m_options.computeDispatchSize[1], m_options.computeDispatchSize[2]); + encoder->setPipelineState(m_pipelineState); + applyBinding(pipelineType, encoder); + encoder->dispatchCompute( + m_options.computeDispatchSize[0], + m_options.computeDispatchSize[1], + m_options.computeDispatchSize[2]); } void RenderTestApp::finalize() @@ -799,7 +850,11 @@ void RenderTestApp::finalize() m_vertexBuffer = nullptr; m_shaderProgram = nullptr; m_pipelineState = nullptr; - + m_renderPass = nullptr; + m_framebuffer = nullptr; + m_framebufferLayout = nullptr; + m_colorBuffer = nullptr; + m_queue = nullptr; m_renderer = nullptr; } @@ -809,10 +864,8 @@ void RenderTestApp::finalizeImpl() Result LegacyRenderTestApp::writeBindingOutput(BindRoot* bindRoot, const char* fileName) { - // Submit the work - m_renderer->submitGpuWork(); // Wait until everything is complete - m_renderer->waitForGpu(); + m_queue->wait(); FILE * f = fopen(fileName, "wb"); if (!f) @@ -832,17 +885,15 @@ Result LegacyRenderTestApp::writeBindingOutput(BindRoot* bindRoot, const char* f { IBufferResource* bufferResource = static_cast<IBufferResource*>(binding.resource.get()); const size_t bufferSize = bufferResource->getDesc()->sizeInBytes; - - unsigned int* ptr = (unsigned int*)m_renderer->map(bufferResource, MapFlavor::HostRead); - if (!ptr) + ComPtr<ISlangBlob> blob; + m_renderer->readBufferResource(bufferResource, 0, bufferSize, blob.writeRef()); + if (!blob) { return SLANG_FAIL; } - const SlangResult res = ShaderInputLayout::writeBinding(bindRoot, m_shaderInputLayout.entries[i], ptr, bufferSize, &writer); - - m_renderer->unmap(bufferResource); - + const SlangResult res = ShaderInputLayout::writeBinding( + bindRoot, m_shaderInputLayout.entries[i], blob->getBufferPointer(), bufferSize, &writer); SLANG_RETURN_ON_FAIL(res); } else @@ -856,10 +907,8 @@ Result LegacyRenderTestApp::writeBindingOutput(BindRoot* bindRoot, const char* f Result ShaderObjectRenderTestApp::writeBindingOutput(BindRoot* bindRoot, const char* fileName) { - // Submit the work - m_renderer->submitGpuWork(); // Wait until everything is complete - m_renderer->waitForGpu(); + m_queue->wait(); FILE * f = fopen(fileName, "wb"); if (!f) @@ -879,16 +928,14 @@ Result ShaderObjectRenderTestApp::writeBindingOutput(BindRoot* bindRoot, const c IBufferResource* bufferResource = static_cast<IBufferResource*>(resource.get()); const size_t bufferSize = bufferResource->getDesc()->sizeInBytes; - unsigned int* ptr = (unsigned int*)m_renderer->map(bufferResource, MapFlavor::HostRead); - if (!ptr) + ComPtr<ISlangBlob> blob; + m_renderer->readBufferResource(bufferResource, 0, bufferSize, blob.writeRef()); + if (!blob) { return SLANG_FAIL; } - - const SlangResult res = ShaderInputLayout::writeBinding(bindRoot, inputEntry, ptr, bufferSize, &writer); - - m_renderer->unmap(bufferResource); - + const SlangResult res = + ShaderInputLayout::writeBinding(bindRoot, inputEntry, blob->getBufferPointer(), bufferSize, &writer); SLANG_RETURN_ON_FAIL(res); } else @@ -904,52 +951,48 @@ Result RenderTestApp::writeScreen(const char* filename) { size_t rowPitch, pixelSize; ComPtr<ISlangBlob> blob; - SLANG_RETURN_ON_FAIL(m_renderer->readTextureResource(m_colorBuffer, blob.writeRef(), &rowPitch, &pixelSize)); + SLANG_RETURN_ON_FAIL(m_renderer->readTextureResource( + m_colorBuffer, ResourceState::RenderTarget, blob.writeRef(), &rowPitch, &pixelSize)); auto bufferSize = blob->getBufferSize(); - Surface surface; - size_t width = rowPitch / pixelSize; - size_t height = bufferSize / rowPitch; - surface.setUnowned( - (int)width, - (int)height, - gfx::Format::RGBA_Unorm_UInt8, - (int)rowPitch, - (void*)blob->getBufferPointer()); - return PngSerializeUtil::write(filename, surface); + uint32_t width = static_cast<uint32_t>(rowPitch / pixelSize); + uint32_t height = static_cast<uint32_t>(bufferSize / rowPitch); + return PngSerializeUtil::write(filename, blob, width, height); } Result RenderTestApp::update() { - m_renderer->beginFrame(); - - // Whenever we don't have Windows events to process, we render a frame. + auto commandBuffer = m_queue->createCommandBuffer(); if (m_options.shaderType == Options::ShaderProgramType::Compute) { - runCompute(); + auto encoder = commandBuffer->encodeComputeCommands(); + runCompute(encoder); + encoder->endEncoding(); } else { - static const float kClearColor[] = { 0.25, 0.25, 0.25, 1.0 }; - m_renderer->setFramebuffer(m_framebuffer); + auto resEncoder = commandBuffer->encodeResourceCommands(); + setProjectionMatrix(resEncoder); + resEncoder->endEncoding(); + auto encoder = commandBuffer->encodeRenderCommands(m_renderPass, m_framebuffer); gfx::Viewport viewport = {}; viewport.maxZ = 1.0f; viewport.extentX = (float)gWindowWidth; viewport.extentY = (float)gWindowHeight; - m_renderer->setViewportAndScissor(viewport); - - m_renderer->setClearColor(kClearColor); - m_renderer->clearFrame(); - renderFrame(); + encoder->setViewportAndScissor(viewport); + renderFrame(encoder); + encoder->endEncoding(); } + commandBuffer->close(); + + m_startTicks = ProcessUtil::getClockTick(); + m_queue->executeCommandBuffer(commandBuffer); + m_queue->wait(); // If we are in a mode where output is requested, we need to snapshot the back buffer here if (m_options.outputPath || m_options.performanceProfile) { - // Submit the work - m_renderer->submitGpuWork(); // Wait until everything is complete - m_renderer->waitForGpu(); if (m_options.performanceProfile) { @@ -1013,8 +1056,6 @@ Result RenderTestApp::update() } return SLANG_OK; } - - m_renderer->endFrame(); return SLANG_OK; } @@ -1049,11 +1090,38 @@ static SlangResult _setSessionPrelude(const Options& options, const char* exePat } // namespace renderer_test +#if ENABLE_RENDERDOC_INTEGRATION +static RENDERDOC_API_1_1_2* rdoc_api = NULL; +static void initializeRenderDoc() +{ + if (HMODULE mod = GetModuleHandleA("renderdoc.dll")) + { + pRENDERDOC_GetAPI RENDERDOC_GetAPI = + (pRENDERDOC_GetAPI)GetProcAddress(mod, "RENDERDOC_GetAPI"); + int ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2, (void**)&rdoc_api); + assert(ret == 1); + } +} +static void renderDocBeginFrame() { if (rdoc_api) rdoc_api->StartFrameCapture(nullptr, nullptr); } +static void renderDocEndFrame() +{ + if (rdoc_api) + rdoc_api->EndFrameCapture(nullptr, nullptr); + _fgetchar(); +} +#else +static void initializeRenderDoc(){} +static void renderDocBeginFrame(){} +static void renderDocEndFrame(){} +#endif + static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* session, int argcIn, const char*const* argvIn) { using namespace renderer_test; using namespace Slang; + initializeRenderDoc(); + StdWriters::setSingleton(stdWriters); Options options; @@ -1400,8 +1468,10 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi app = new ShaderObjectRenderTestApp(); else app = new LegacyRenderTestApp(); + renderDocBeginFrame(); SLANG_RETURN_ON_FAIL(app->initialize(session, renderer, options, input)); app->update(); + renderDocEndFrame(); app->finalize(); return SLANG_OK; } diff --git a/tools/render-test/shader-renderer-util.cpp b/tools/render-test/shader-renderer-util.cpp index 5b3867848..d42c5c7ef 100644 --- a/tools/render-test/shader-renderer-util.cpp +++ b/tools/render-test/shader-renderer-util.cpp @@ -7,13 +7,28 @@ namespace renderer_test { using namespace Slang; using Slang::Result; -void BindingStateImpl::apply(IRenderer* renderer, PipelineType pipelineType) +void BindingStateImpl::apply(ICommandEncoder* encoder, PipelineType pipelineType) { - renderer->setDescriptorSet( - pipelineType, - pipelineLayout, - 0, - descriptorSet); + switch (pipelineType) + { + case PipelineType::Compute: + { + ComPtr<IComputeCommandEncoder> computeEncoder; + encoder->queryInterface(SLANG_UUID_IComputeCommandEncoder, (void**)computeEncoder.writeRef()); + computeEncoder->setDescriptorSet(pipelineLayout, 0, descriptorSet); + } + break; + case PipelineType::Graphics: + { + ComPtr<IRenderCommandEncoder> renderEncoder; + encoder->queryInterface( + SLANG_UUID_IRenderCommandEncoder, (void**)renderEncoder.writeRef()); + renderEncoder->setDescriptorSet(pipelineLayout, 0, descriptorSet); + } + break; + default: + throw "unknown pipeline type"; + } } /* static */ Result ShaderRendererUtil::generateTextureResource( diff --git a/tools/render-test/shader-renderer-util.h b/tools/render-test/shader-renderer-util.h index 815bf2248..b79403bb4 100644 --- a/tools/render-test/shader-renderer-util.h +++ b/tools/render-test/shader-renderer-util.h @@ -33,7 +33,7 @@ struct BindingStateImpl : public Slang::RefObject uint16_t size; ///< The amount of register indices }; - void apply(IRenderer* renderer, PipelineType pipelineType); + void apply(ICommandEncoder* encoder, PipelineType pipelineType); struct OutputBinding { @@ -80,15 +80,6 @@ struct ShaderRendererUtil IRenderer* renderer, IBufferResource* addedConstantBuffer, BindingStateImpl** outBindingState); - -private: - /// Create BindingState::Desc from a list of ShaderInputLayout entries - static Slang::Result _createBindingState( - ShaderInputLayoutEntry* srcEntries, - int numEntries, - IRenderer* renderer, - IBufferResource* addedConstantBuffer, - BindingStateImpl** outBindingState); }; } // renderer_test diff --git a/tools/render-test/surface.cpp b/tools/render-test/surface.cpp deleted file mode 100644 index 636881fca..000000000 --- a/tools/render-test/surface.cpp +++ /dev/null @@ -1,223 +0,0 @@ -// surface.cpp -#include "surface.h" - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> - -#include "../../source/core/slang-list.h" - -namespace gfx { -using namespace Slang; - -class MallocSurfaceAllocator: public SurfaceAllocator -{ - public: - - virtual Slang::Result allocate(int width, int height, Format format, int alignment, Surface& surface) override; - virtual void deallocate(Surface& surface) override; -}; - -static MallocSurfaceAllocator s_mallocSurfaceAllocator; - -/// Get the malloc allocator -/* static */SurfaceAllocator* SurfaceAllocator::getMallocAllocator() -{ - return &s_mallocSurfaceAllocator; -} - -Slang::Result MallocSurfaceAllocator::allocate(int width, int height, Format format, int alignment, Surface& surface) -{ - assert(surface.m_data == nullptr); - - // Calculate row size - - const int rowSizeInBytes = Surface::calcRowSize(format, width); - const int numRows = Surface::calcNumRows(format, height); - - alignment = (alignment <= 0) ? int(sizeof(void*)) : alignment; - // It must be a power of 2 - assert( ((alignment - 1) & alignment) == 0); - - // Align rowSize - const int alignedRowSizeInBytes = (rowSizeInBytes + alignment - 1) & -alignment; - - size_t totalSize = numRows * alignedRowSizeInBytes; - - uint8_t* data = (uint8_t*)::malloc(totalSize); - if (!data) - { - return SLANG_E_OUT_OF_MEMORY; - } - - surface.m_data = data; - surface.m_width = width; - surface.m_height = height; - surface.m_format = format; - surface.m_numRows = numRows; - surface.m_rowStrideInBytes = alignedRowSizeInBytes; - - surface.m_allocator = this; - return SLANG_OK; -} - -void MallocSurfaceAllocator::deallocate(Surface& surface) -{ - assert(surface.m_data); - // Make sure it's not an inverted, cos otherwise m_data is not the start address - assert(surface.m_rowStrideInBytes > 0); - ::free(surface.m_data); -} - -// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Surface !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -/* static */int Surface::calcRowSize(Format format, int width) -{ - size_t pixelSize = gfxGetFormatSize(format); - if (pixelSize == 0) - { - return 0; - } - return int(pixelSize * width); -} - -/* static */int Surface::calcNumRows(Format format, int height) -{ - // Don't have any compressed types, so number of rows is same as the height - return height; -} - -void Surface::init() -{ - m_width = 0; - m_height = 0; - m_format = Format::Unknown; - m_data = nullptr; - m_numRows = 0; - m_rowStrideInBytes = 0; - // NOTE! does not clear the allocator. - // If called with an allocation memory will leak! -} - -Surface::~Surface() -{ - if (m_data && m_allocator) - { - m_allocator->deallocate(*this); - } -} - -void Surface::deallocate() -{ - if (m_data && m_allocator) - { - m_allocator->deallocate(*this); - init(); - } -} - -Result Surface::allocate(int width, int height, Format format, int alignment, SurfaceAllocator* allocator) -{ - deallocate(); - allocator = allocator ? allocator : m_allocator; - if (!allocator) - { - // An allocator needs to be set on the surface, or one passed in. - return SLANG_FAIL; - } - return allocator->allocate(width, height, format, alignment, *this); -} - -void Surface::setUnowned(int width, int height, Format format, int strideInBytes, void* data) -{ - deallocate(); - - // This is unowned - m_allocator = nullptr; - - m_width = width; - m_height = height; - m_format = format; - m_rowStrideInBytes = strideInBytes; - m_data = (uint8_t*)data; - - m_numRows = Surface::calcNumRows(format, height); - - const int rowSizeInBytes = Surface::calcRowSize(format, width); - assert((strideInBytes > 0 && rowSizeInBytes <= strideInBytes) || (strideInBytes < 0 && rowSizeInBytes <= -strideInBytes)); -} - -void Surface::zeroContents() -{ - const int rowSizeInBytes = Surface::calcRowSize(m_format, m_width); - - const int stride = m_rowStrideInBytes; - uint8_t* dst = m_data; - - for (int i = 0; i < m_numRows; i++, dst += stride) - { - ::memset(dst, 0, rowSizeInBytes); - } -} - -void Surface::flipInplaceVertically() -{ - // Can only flip when m_height matches number of rows - assert(m_numRows == m_height); - - const int rowSizeInBytes = Surface::calcRowSize(m_format, m_width); - if (rowSizeInBytes <= 0 || m_numRows <= 1) - { - return; - } - - uint8_t* top = m_data; - uint8_t* bottom = m_data + (m_numRows - 1) * m_rowStrideInBytes; - - List<uint8_t> bufferList; - bufferList.setCount(rowSizeInBytes); - uint8_t* buffer = bufferList.getBuffer(); - - const int stride = m_rowStrideInBytes; - - const int num = m_height >> 1; - for (int i = 0; i < num; ++i, top += stride, bottom -= stride) - { - ::memcpy(buffer, top, rowSizeInBytes); - ::memcpy(top, bottom, rowSizeInBytes); - ::memcpy(bottom, buffer, rowSizeInBytes); - } -} - -SlangResult Surface::set(int width, int height, Format format, int srcRowStride, const void* data, SurfaceAllocator* allocator) -{ - if (hasContents() && m_width == width && m_height == height && m_format == format) - { - // I can just overwrite the contents that is there - } - else - { - SLANG_RETURN_ON_FAIL(allocate(width, height, format, 0, allocator)); - } - - // Okay just need to set the contents - - { - const size_t rowSize = calcRowSize(format, width); - - const uint8_t* srcRow = (const uint8_t*)data; - uint8_t* dstRow = (uint8_t*)m_data; - - for (int i = 0; i < m_numRows; i++) - { - ::memcpy(dstRow, srcRow, rowSize); - - srcRow += srcRowStride; - dstRow += m_rowStrideInBytes; - } - } - - return SLANG_OK; -} - -} // renderer_test diff --git a/tools/render-test/surface.h b/tools/render-test/surface.h deleted file mode 100644 index f6e888745..000000000 --- a/tools/render-test/surface.h +++ /dev/null @@ -1,86 +0,0 @@ -// surface.h -#pragma once - -#include "slang-gfx.h" - -namespace gfx { - -class Surface; - -class SurfaceAllocator -{ - public: - virtual Slang::Result allocate(int width, int height, Format format, int alignment, Surface& surface) = 0; - virtual void deallocate(Surface& surface) = 0; - - /// Get the malloc allocator - static SurfaceAllocator* getMallocAllocator(); -}; - -class Surface -{ - public: - - enum - { - kDefaultAlignment = sizeof(void*) - }; - - /// Allocate - Slang::Result allocate(int width, int height, Format format, int alignment = kDefaultAlignment, SurfaceAllocator* allocator = nullptr); - - /// Deallocate contents - void deallocate(); - /// Initialize contents (zero sized, no data). Note that the allocator pointer is left as is - void init(); - - /// Set unowned - void setUnowned(int width, int height, Format format, int strideInBytes, void* data); - - /// Set the contents - the memory will be owned by this surface (ie will be freed by the allocator when goes out of scope or is deallocated) - Slang::Result set(int width, int height, Format format, int strideInBytes, const void* data, SurfaceAllocator* allocator); - - template <typename T> - T* calcNextRow(T* ptr) const { return (T*)calcNextRow((void*)ptr); } - template <typename T> - const T* calcNextRow(const T* ptr) const { return (const T*)calcNextRow((const void*)ptr); } - - void* calcNextRow(void* ptr) const { return (void*)(((uint8_t*)ptr) + m_rowStrideInBytes); } - const void* calcNextRow(const void* ptr) const { return (const void*)(((const uint8_t*)ptr) + m_rowStrideInBytes); } - - /// Writes zero to all of the contents - void zeroContents(); - - /// Flips the contents vertically in place - void flipInplaceVertically(); - - /// True if has some contents - bool hasContents() const { return m_data != nullptr; } - - /// Ctor - Surface() : - m_allocator(nullptr) - { - init(); - } - /// Dtor - ~Surface(); - - /// Get the size of the row in bytes - static int calcRowSize(Format format, int width); - /// Calculates the number of rows - static int calcNumRows(Format format, int height); - - int m_width; - int m_height; - Format m_format; - - uint8_t* m_data; /// The data that makes up the image. If nullptr, has no data. Pointer to first 'row' of the image. - - int m_numRows; ///< Total amount of rows (typically same as height, but in compressed formats may be less) - int m_rowStrideInBytes; ///< The number of bytes between rows - - SurfaceAllocator* m_allocator; ///< Can be null if so contents is 'unowned', if set -}; - -} // renderer_test diff --git a/tools/render-test/window.cpp b/tools/render-test/window.cpp deleted file mode 100644 index 0ab4ff412..000000000 --- a/tools/render-test/window.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// window.cpp - -#include "window.h" - -namespace renderer_test { -using namespace Slang; - -#if SLANG_WINDOWS_FAMILY -extern Window* createWinWindow(); -#endif - -/* static */Window* Window::create() -{ -#if SLANG_WINDOWS_FAMILY - return createWinWindow(); -#else - return nullptr; -#endif -} - -} // renderer_test diff --git a/tools/render-test/window.h b/tools/render-test/window.h deleted file mode 100644 index e647a602c..000000000 --- a/tools/render-test/window.h +++ /dev/null @@ -1,45 +0,0 @@ -// window.h -#pragma once - -#include <slang.h> -#include "../../source/core/slang-smart-pointer.h" - -namespace renderer_test { - -class Window; - -class WindowListener : public Slang::RefObject -{ -public: - virtual SlangResult update(Window* window) = 0; -}; - -class Window : public Slang::RefObject -{ -public: - virtual SlangResult initialize(int width, int height) = 0; - - virtual void show() = 0; - virtual void* getHandle() const = 0; - virtual void postQuit() { m_isQuitting = true; } - - /// Run the event loop. Events will be sent to the WindowListener - virtual SlangResult runLoop(WindowListener* listener) = 0; - - bool isQuitting() const { return m_isQuitting; } - int getQuitValue() const { return m_quitValue; } - - static Window* create(); - - virtual ~Window() {} - -protected: - Window() {} - - bool m_isQuitting = false; - int m_quitValue = 0; -}; - -Window* createWindow(); - -} // renderer_test diff --git a/tools/render-test/windows/win-window.cpp b/tools/render-test/windows/win-window.cpp deleted file mode 100644 index 7f45f9789..000000000 --- a/tools/render-test/windows/win-window.cpp +++ /dev/null @@ -1,185 +0,0 @@ -// win-window.cpp - -#define _CRT_SECURE_NO_WARNINGS 1 - -#include <slang.h> -#include <slang-com-helper.h> - -#include "../window.h" - -#define WIN32_LEAN_AND_MEAN -#define NOMINMAX -#include <Windows.h> -#undef WIN32_LEAN_AND_MEAN -#undef NOMINMAX - -#ifdef _MSC_VER -#pragma warning(disable: 4996) -#endif - -#include <stdio.h> - -namespace renderer_test { - -class WinWindow : public Window -{ -public: - virtual SlangResult initialize(int width, int height) SLANG_OVERRIDE; - - virtual void show() SLANG_OVERRIDE; - virtual void* getHandle() const SLANG_OVERRIDE { return m_hwnd; } - virtual SlangResult runLoop(WindowListener* listener) SLANG_OVERRIDE; - - virtual ~WinWindow(); - - static LRESULT CALLBACK windowProc(HWND windowHandle, UINT message, WPARAM wParam, LPARAM lParam); - -protected: - - HINSTANCE m_hinst = nullptr; - HWND m_hwnd = nullptr; -}; - -// -// We use a bare-minimum window procedure to get things up and running. -// - -/* static */LRESULT CALLBACK WinWindow::windowProc(HWND windowHandle, UINT message, WPARAM wParam, LPARAM lParam) -{ - switch (message) - { - case WM_CLOSE: - PostQuitMessage(0); - return 0; - } - return DefWindowProcW(windowHandle, message, wParam, lParam); -} - -static ATOM _getWindowClassAtom(HINSTANCE hinst) -{ - static ATOM s_windowClassAtom; - - if (s_windowClassAtom) - { - return s_windowClassAtom; - } - WNDCLASSEXW windowClassDesc; - windowClassDesc.cbSize = sizeof(windowClassDesc); - windowClassDesc.style = CS_OWNDC | CS_HREDRAW | CS_VREDRAW; - windowClassDesc.lpfnWndProc = &WinWindow::windowProc; - windowClassDesc.cbClsExtra = 0; - windowClassDesc.cbWndExtra = 0; - windowClassDesc.hInstance = hinst; - windowClassDesc.hIcon = 0; - windowClassDesc.hCursor = 0; - windowClassDesc.hbrBackground = 0; - windowClassDesc.lpszMenuName = 0; - windowClassDesc.lpszClassName = L"SlangRenderTest"; - windowClassDesc.hIconSm = 0; - s_windowClassAtom = RegisterClassExW(&windowClassDesc); - - return s_windowClassAtom; -} - -SlangResult WinWindow::initialize(int widthIn, int heightIn) -{ - // Do initial window-creation stuff here, rather than in the renderer-specific files - - m_hinst = GetModuleHandleA(0); - - // First we register a window class. - ATOM windowClassAtom = _getWindowClassAtom(m_hinst); - if (!windowClassAtom) - { - fprintf(stderr, "error: failed to register window class\n"); - return SLANG_FAIL; - } - - // Next, we create a window using that window class. - - // We will create a borderless window since our screen-capture logic in GL - // seems to get thrown off by having to deal with a window frame. - DWORD windowStyle = WS_POPUP; - DWORD windowExtendedStyle = 0; - - RECT windowRect = { 0, 0, widthIn, heightIn }; - AdjustWindowRectEx(&windowRect, windowStyle, /*hasMenu=*/false, windowExtendedStyle); - - { - auto width = windowRect.right - windowRect.left; - auto height = windowRect.bottom - windowRect.top; - - LPWSTR windowName = L"Slang Render Test"; - m_hwnd = CreateWindowExW( - windowExtendedStyle, - (LPWSTR)windowClassAtom, - windowName, - windowStyle, - 0, 0, // x, y - width, height, - NULL, // parent - NULL, // menu - m_hinst, - NULL); - } - if (!m_hwnd) - { - fprintf(stderr, "error: failed to create window\n"); - return SLANG_FAIL; - } - - return SLANG_OK; -} - - -void WinWindow::show() -{ - // Once initialization is all complete, we show the window... - int showCommand = SW_SHOW; - ShowWindow(m_hwnd, showCommand); -} - -SlangResult WinWindow::runLoop(WindowListener* listener) -{ - // ... and enter the event loop: - while (!m_isQuitting) - { - MSG message; - int result = PeekMessageW(&message, NULL, 0, 0, PM_REMOVE); - if (result != 0) - { - if (message.message == WM_QUIT) - { - m_quitValue = (int)message.wParam; - return SLANG_OK; - } - - TranslateMessage(&message); - DispatchMessageW(&message); - } - else - { - if (listener) - { - SLANG_RETURN_ON_FAIL(listener->update(this)); - } - } - } - - return SLANG_OK; -} - -WinWindow::~WinWindow() -{ - if (m_hwnd) - { - DestroyWindow(m_hwnd); - } -} - -Window* createWinWindow() -{ - return new WinWindow; -} - -} // namespace renderer_test diff --git a/tools/slang-test/slang-test-main.cpp b/tools/slang-test/slang-test-main.cpp index d57ea50e4..b49dedd2d 100644 --- a/tools/slang-test/slang-test-main.cpp +++ b/tools/slang-test/slang-test-main.cpp @@ -853,7 +853,7 @@ static RenderApiFlags _getAvailableRenderApiFlags(TestContext* context) if (SLANG_SUCCEEDED(spawnAndWaitSharedLibrary(context, "device-startup", cmdLine, exeRes)) && TestToolUtil::getReturnCodeFromInt(exeRes.resultCode) == ToolReturnCode::Success) { - availableRenderApiFlags |= RenderApiFlags(1) << int(apiType); + availableRenderApiFlags |= RenderApiFlags(1) << int(apiType); } } } |
