diff options
| author | Yong He <yonghe@outlook.com> | 2021-04-05 13:31:05 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-04-05 13:31:05 -0700 |
| commit | 086ecf41fa21138899960bb9805bc8ced91690f0 (patch) | |
| tree | c98af81ffc28371a9334e71987a85f9e88bce678 | |
| parent | dd662f5cda97e7a6720ef526509a772a06112d4a (diff) | |
Transient root shader object. (#1782)
| -rw-r--r-- | build/visual-studio/gfx/gfx.vcxproj | 1 | ||||
| -rw-r--r-- | build/visual-studio/gfx/gfx.vcxproj.filters | 3 | ||||
| -rw-r--r-- | examples/gpu-printing/main.cpp | 4 | ||||
| -rw-r--r-- | examples/hello-world/main.cpp | 77 | ||||
| -rw-r--r-- | examples/shader-object/main.cpp | 74 | ||||
| -rw-r--r-- | examples/shader-toy/main.cpp | 12 | ||||
| -rw-r--r-- | slang-gfx.h | 42 | ||||
| -rw-r--r-- | tools/gfx/command-writer.h | 6 | ||||
| -rw-r--r-- | tools/gfx/cpu/render-cpu.cpp | 438 | ||||
| -rw-r--r-- | tools/gfx/cuda/render-cuda.cpp | 25 | ||||
| -rw-r--r-- | tools/gfx/d3d11/render-d3d11.cpp | 62 | ||||
| -rw-r--r-- | tools/gfx/d3d12/descriptor-heap-d3d12.cpp | 3 | ||||
| -rw-r--r-- | tools/gfx/d3d12/descriptor-heap-d3d12.h | 127 | ||||
| -rw-r--r-- | tools/gfx/d3d12/render-d3d12.cpp | 376 | ||||
| -rw-r--r-- | tools/gfx/immediate-renderer-base.cpp | 42 | ||||
| -rw-r--r-- | tools/gfx/immediate-renderer-base.h | 165 | ||||
| -rw-r--r-- | tools/gfx/open-gl/render-gl.cpp | 74 | ||||
| -rw-r--r-- | tools/gfx/renderer-shared.h | 4 | ||||
| -rw-r--r-- | tools/gfx/transient-resource-heap-base.h | 113 | ||||
| -rw-r--r-- | tools/gfx/vulkan/render-vk.cpp | 235 | ||||
| -rw-r--r-- | tools/platform/gui.cpp | 2 | ||||
| -rw-r--r-- | tools/render-test/render-test-main.cpp | 49 |
22 files changed, 965 insertions, 969 deletions
diff --git a/build/visual-studio/gfx/gfx.vcxproj b/build/visual-studio/gfx/gfx.vcxproj index e24e09e07..2bf81058f 100644 --- a/build/visual-studio/gfx/gfx.vcxproj +++ b/build/visual-studio/gfx/gfx.vcxproj @@ -198,6 +198,7 @@ <ClInclude Include="..\..\..\tools\gfx\simple-render-pass-layout.h" /> <ClInclude Include="..\..\..\tools\gfx\simple-transient-resource-heap.h" /> <ClInclude Include="..\..\..\tools\gfx\slang-context.h" /> + <ClInclude Include="..\..\..\tools\gfx\transient-resource-heap-base.h" /> <ClInclude Include="..\..\..\tools\gfx\vulkan\render-vk.h" /> <ClInclude Include="..\..\..\tools\gfx\vulkan\vk-api.h" /> <ClInclude Include="..\..\..\tools\gfx\vulkan\vk-descriptor-allocator.h" /> diff --git a/build/visual-studio/gfx/gfx.vcxproj.filters b/build/visual-studio/gfx/gfx.vcxproj.filters index fa859d6ac..4b79d826d 100644 --- a/build/visual-studio/gfx/gfx.vcxproj.filters +++ b/build/visual-studio/gfx/gfx.vcxproj.filters @@ -66,6 +66,9 @@ <ClInclude Include="..\..\..\tools\gfx\slang-context.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="..\..\..\tools\gfx\transient-resource-heap-base.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="..\..\..\tools\gfx\vulkan\render-vk.h"> <Filter>Header Files</Filter> </ClInclude> diff --git a/examples/gpu-printing/main.cpp b/examples/gpu-printing/main.cpp index a0acb8159..265b4c5ff 100644 --- a/examples/gpu-printing/main.cpp +++ b/examples/gpu-printing/main.cpp @@ -121,9 +121,7 @@ Result execute() auto queue = gDevice->createCommandQueue(queueDesc); auto commandBuffer = transientHeap->createCommandBuffer(); auto encoder = commandBuffer->encodeComputeCommands(); - auto rootShaderObject = gDevice->createRootShaderObject(gProgram); - encoder->setPipelineState(gPipelineState); - encoder->bindRootShaderObject(rootShaderObject); + auto rootShaderObject = encoder->bindPipeline(gPipelineState); encoder->dispatchCompute(1, 1, 1); encoder->endEncoding(); commandBuffer->close(); diff --git a/examples/hello-world/main.cpp b/examples/hello-world/main.cpp index 19413948a..6b9104072 100644 --- a/examples/hello-world/main.cpp +++ b/examples/hello-world/main.cpp @@ -206,7 +206,6 @@ gfx::Result loadShaderProgram( // building an example program. // ComPtr<gfx::IPipelineState> gPipelineState; -ComPtr<gfx::IShaderObject> gRootObject; ComPtr<gfx::IBufferResource> gVertexBuffer; // Now that we've covered the function that actually loads and @@ -251,38 +250,6 @@ Slang::Result initialize() ComPtr<IShaderProgram> shaderProgram; SLANG_RETURN_ON_FAIL(loadShaderProgram(gDevice, shaderProgram.writeRef())); - // In order to bind shader parameters to the pipeline, we need - // to know how those parameters were assigned to locations/bindings/registers - // for the target graphics API. - // - // The Slang compiler assigns locations to parameters in a deterministic - // fashion, so it is possible for a programmer to hard-code locations - // into their application code that will match up with their shaders. - // - // Hard-coding of locations can become intractable as an application needs - // to support more different target platforms and graphics APIs, as well - // as more shaders with different specialized variants. - // - // Rather than rely on hard-coded locations, our examples will make use of - // reflection information provided by the Slang compiler (see `programLayout` - // above), and our example graphics API layer will translate that reflection - // information into a layout for a "root shader object." - // - // The root object will store values/bindings for all of the parameters in - // the `shaderProgram`. At a conceptual level we can think of `rootObject` as - // representing the "global scope" of the shader program that was loaded; - // it has entries for each global shader parameter that was declared. - // - // Multiple root objects can be created from the same program, and will have - // separate storage for parameter values. - // - // Readers who are familiar with D3D12 or Vulkan might think of this root - // layout as being similar in spirit to a "root signature" or "pipeline layout." - // - ComPtr<IShaderObject> rootObject; - SLANG_RETURN_ON_FAIL(gDevice->createRootShaderObject(shaderProgram, rootObject.writeRef())); - gRootObject = rootObject; - // Following the D3D12/Vulkan style of API, we need a pipeline state object // (PSO) to encapsulate the configuration of the overall graphics pipeline. // @@ -315,6 +282,38 @@ virtual void renderFrame(int frameBufferIndex) override viewport.extentY = (float)windowHeight; renderEncoder->setViewportAndScissor(viewport); + // In order to bind shader parameters to the pipeline, we need + // to know how those parameters were assigned to locations/bindings/registers + // for the target graphics API. + // + // The Slang compiler assigns locations to parameters in a deterministic + // fashion, so it is possible for a programmer to hard-code locations + // into their application code that will match up with their shaders. + // + // Hard-coding of locations can become intractable as an application needs + // to support more different target platforms and graphics APIs, as well + // as more shaders with different specialized variants. + // + // Rather than rely on hard-coded locations, our examples will make use of + // reflection information provided by the Slang compiler (see `programLayout` + // above), and our example graphics API layer will translate that reflection + // information into a layout for a "root shader object." + // + // The root object will store values/bindings for all of the parameters in + // the `IShaderProgram` used to create the pipeline state. At a conceptual + // level we can think of `rootObject` as representing the "global scope" of + // the shader program that was loaded; it has entries for each global shader + // parameter that was declared. + // + // Readers who are familiar with D3D12 or Vulkan might think of this root + // layout as being similar in spirit to a "root signature" or "pipeline layout." + // + // We start parameter binding by binding the pipeline state in command encoder. + // This method will return a transient root shader object for us to write our + // shader parameters into. + // + auto rootObject = renderEncoder->bindPipeline(gPipelineState); + // We will update the model-view-projection matrix that is passed // into the shader code via the `Uniforms` buffer on a per-frame // basis, even though the data that is loaded does not change @@ -322,8 +321,7 @@ virtual void renderFrame(int frameBufferIndex) override // auto deviceInfo = gDevice->getDeviceInfo(); - // - // We know that `gRootObject` is a root shader object created + // We know that `rootObject` is a root shader object created // from our program, and that it is set up to hold values for // all the parameter of that program. In order to actually // set values, we need to be able to look up the location @@ -341,7 +339,7 @@ virtual void renderFrame(int frameBufferIndex) override // a diretory path of `/` for the root directory in a file // system. // - ShaderCursor rootCursor(gRootObject); + ShaderCursor rootCursor(rootObject); // // Next, we use a convenience overload of `operator[]` to // navigate from the root cursor down to the parameter we @@ -375,13 +373,6 @@ virtual void renderFrame(int frameBufferIndex) override // hard-coded even in cross-platform code. // - // Now we configure our graphics pipeline state by setting the - // PSO, binding our root shader object to it (which references - // the `Uniforms` buffer that will filled in above). - // - renderEncoder->setPipelineState(gPipelineState); - renderEncoder->bindRootShaderObject(gRootObject); - // We also need to set up a few pieces of fixed-function pipeline // state that are not bound by the pipeline state above. // diff --git a/examples/shader-object/main.cpp b/examples/shader-object/main.cpp index 9329a5418..6efe2f97d 100644 --- a/examples/shader-object/main.cpp +++ b/examples/shader-object/main.cpp @@ -136,7 +136,6 @@ int main() // interacting with the graphics API. Slang::ComPtr<gfx::IDevice> device; IDevice::Desc deviceDesc = {}; - deviceDesc.deviceType = DeviceType::Vulkan; SLANG_RETURN_ON_FAIL(gfxCreateDevice(&deviceDesc, device.writeRef())); Slang::ComPtr<gfx::ITransientResourceHeap> transientHeap; @@ -184,46 +183,51 @@ int main() viewDesc.format = gfx::Format::Unknown; SLANG_RETURN_ON_FAIL(device->createBufferView(numbersBuffer, viewDesc, bufferView.writeRef())); - // Now comes the interesting part: binding the shader parameter for the - // compute kernel that we about to launch. We would like to construct - // a shader object that represents a `f(x)=x+1` transformation and apply - // it to the numbers in `numbersBuffer`. - // To start, we create a root shader object that represents the root level - // scope of the shader parameters. - ComPtr<gfx::IShaderObject> rootObject; - SLANG_RETURN_ON_FAIL(device->createRootShaderObject(shaderProgram, rootObject.writeRef())); - // We can set parameters directly with `rootObject`, but that requires us to use - // the Slang reflection API to obtain the proper offsets into the root object for each parameter. - // We implemented these logic in the `ShaderCursor` helper class, which simplifies the user - // code to find shader parameters. Here we demonstrate how to set parameters with `ShaderCursor`. - gfx::ShaderCursor entryPointCursor(rootObject->getEntryPoint(0)); // get a cursor the the first entry-point. - // Bind buffer view to the entry point. - entryPointCursor.getPath("buffer").setResource(bufferView); - - // Next, we create a shader object that represents the transformer we want to use. - // To do so, we first need to lookup for the `AddTransformer` type defined in the shader code. - slang::TypeReflection* addTransformerType = slangReflection->findTypeByName("AddTransformer"); - - // Now we can use this type to create a shader object that can be bound to the root object. - ComPtr<gfx::IShaderObject> transformer; - SLANG_RETURN_ON_FAIL(device->createShaderObject(addTransformerType, transformer.writeRef())); - // Set the `c` field of the `AddTransformer`. - float c = 1.0f; - gfx::ShaderCursor(transformer).getPath("c").setData(&c, sizeof(float)); - - // Now the transformer object is ready, we can bind it to root object. - entryPointCursor.getPath("transformer").setObject(transformer); - - // We have set up all required parameters in entry-point object, now it is time - // to bind the pipeline and root object and launch the kernel. + // We have done all the set up work, now it is time to start recording a command buffer for + // GPU execution. { ICommandQueue::Desc queueDesc = {ICommandQueue::QueueType::Graphics}; auto queue = device->createCommandQueue(queueDesc); auto commandBuffer = transientHeap->createCommandBuffer(); auto encoder = commandBuffer->encodeComputeCommands(); - encoder->setPipelineState(pipelineState); - encoder->bindRootShaderObject(rootObject); + + + // Now comes the interesting part: binding the shader parameter for the + // compute kernel that we about to launch. We would like to construct + // a shader object that represents a `f(x)=x+1` transformation and apply + // it to the numbers in `numbersBuffer`. + + // First, obtain a root shader object from command encoder to start parameter binding. + auto rootObject = encoder->bindPipeline(pipelineState); + + // Next, we create a shader object that represents the transformer we want to use. + // To do so, we first need to lookup for the `AddTransformer` type defined in the shader + // code. + slang::TypeReflection* addTransformerType = + slangReflection->findTypeByName("AddTransformer"); + + // Now we can use this type to create a shader object that can be bound to the root object. + ComPtr<gfx::IShaderObject> transformer; + SLANG_RETURN_ON_FAIL( + device->createShaderObject(addTransformerType, transformer.writeRef())); + // Set the `c` field of the `AddTransformer`. + float c = 1.0f; + gfx::ShaderCursor(transformer).getPath("c").setData(&c, sizeof(float)); + + // We can set parameters directly with `rootObject`, but that requires us to use + // the Slang reflection API to obtain the proper offsets into the root object for each + // parameter. We implemented these logic in the `ShaderCursor` helper class, which + // simplifies the user code to find shader parameters. Here we demonstrate how to set + // parameters with `ShaderCursor`. + gfx::ShaderCursor entryPointCursor( + rootObject->getEntryPoint(0)); // get a cursor the the first entry-point. + // Bind buffer view to the entry point. + entryPointCursor.getPath("buffer").setResource(bufferView); + + // Bind the previously created transformer object to root object. + entryPointCursor.getPath("transformer").setObject(transformer); + encoder->dispatchCompute(1, 1, 1); encoder->endEncoding(); commandBuffer->close(); diff --git a/examples/shader-toy/main.cpp b/examples/shader-toy/main.cpp index a142c3c15..40c97e0f4 100644 --- a/examples/shader-toy/main.cpp +++ b/examples/shader-toy/main.cpp @@ -286,7 +286,6 @@ Result loadShaderProgram(gfx::IDevice* device, ComPtr<gfx::IShaderProgram>& outS } ComPtr<IShaderProgram> gShaderProgram; -ComPtr<gfx::IShaderObject> gRootObject[kSwapchainImageCount]; ComPtr<gfx::IPipelineState> gPipelineState; ComPtr<gfx::IBufferResource> gVertexBuffer; @@ -371,10 +370,7 @@ virtual void renderFrame(int frameIndex) override uniforms.iResolution[1] = float(windowHeight); } - gRootObject[frameIndex] = gDevice->createRootShaderObject(gShaderProgram); - auto constantBuffer = gRootObject[frameIndex]->getObject(ShaderOffset()); - constantBuffer->setData(ShaderOffset(), &uniforms, sizeof(uniforms)); - + // Encode render commands. auto encoder = commandBuffer->encodeRenderCommands(gRenderPass, gFramebuffers[frameIndex]); @@ -383,8 +379,10 @@ virtual void renderFrame(int frameIndex) override viewport.extentX = (float)windowWidth; viewport.extentY = (float)windowHeight; encoder->setViewportAndScissor(viewport); - encoder->setPipelineState(gPipelineState); - encoder->bindRootShaderObject(gRootObject[frameIndex]); + auto rootObject = encoder->bindPipeline(gPipelineState); + auto constantBuffer = rootObject->getObject(ShaderOffset()); + constantBuffer->setData(ShaderOffset(), &uniforms, sizeof(uniforms)); + encoder->setVertexBuffer(0, gVertexBuffer, sizeof(FullScreenTriangle::Vertex)); encoder->setPrimitiveTopology(PrimitiveTopology::TriangleList); encoder->draw(3); diff --git a/slang-gfx.h b/slang-gfx.h index 02b4a9325..c87c0adb6 100644 --- a/slang-gfx.h +++ b/slang-gfx.h @@ -1110,9 +1110,19 @@ public: class IRenderCommandEncoder : public ICommandEncoder { public: - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) = 0; + // Sets the current pipeline state. This method returns a transient shader object for + // writing shader parameters. This shader object will not retain any resources or + // sub-shader-objects bound to it. The user must be responsible for ensuring that any + // resources or shader objects that is set into `outRooShaderObject` stays alive during + // the execution of the command buffer. + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootShaderObject) = 0; + inline IShaderObject* bindPipeline(IPipelineState* state) + { + IShaderObject* rootObject = nullptr; + SLANG_RETURN_NULL_ON_FAIL(bindPipeline(state, &rootObject)); + return rootObject; + } virtual SLANG_NO_THROW void SLANG_MCALL setViewports(uint32_t count, const Viewport* viewports) = 0; @@ -1156,10 +1166,19 @@ public: class IComputeCommandEncoder : public ICommandEncoder { public: - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) = 0; - - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) = 0; + // Sets the current pipeline state. This method returns a transient shader object for + // writing shader parameters. This shader object will not retain any resources or + // sub-shader-objects bound to it. The user must be responsible for ensuring that any + // resources or shader objects that is set into `outRooShaderObject` stays alive during + // the execution of the command buffer. + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootShaderObject) = 0; + inline IShaderObject* bindPipeline(IPipelineState* state) + { + IShaderObject* rootObject = nullptr; + SLANG_RETURN_NULL_ON_FAIL(bindPipeline(state, &rootObject)); + return rootObject; + } virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) = 0; }; #define SLANG_UUID_IComputeCommandEncoder \ @@ -1534,15 +1553,6 @@ public: return object; } - virtual SLANG_NO_THROW Result SLANG_MCALL createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) = 0; - - inline ComPtr<IShaderObject> createRootShaderObject(IShaderProgram* program) - { - ComPtr<IShaderObject> object; - SLANG_RETURN_NULL_ON_FAIL(createRootShaderObject(program, object.writeRef())); - return object; - } - virtual SLANG_NO_THROW Result SLANG_MCALL createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) = 0; inline ComPtr<IShaderProgram> createProgram(const IShaderProgram::Desc& desc) diff --git a/tools/gfx/command-writer.h b/tools/gfx/command-writer.h index 76a72edc3..e93244ed1 100644 --- a/tools/gfx/command-writer.h +++ b/tools/gfx/command-writer.h @@ -127,12 +127,10 @@ public: m_commands.add(Command(CommandName::SetPipelineState, offset)); } - void bindRootShaderObject(PipelineType pipelineType, IShaderObject* object) + void bindRootShaderObject(IShaderObject* object) { auto rootOffset = encodeObject(object); - m_commands.add(Command( - CommandName::BindRootShaderObject, - (uint32_t)pipelineType, rootOffset)); + m_commands.add(Command(CommandName::BindRootShaderObject, rootOffset)); } void uploadBufferData(IBufferResource* buffer, size_t offset, size_t size, void* data) diff --git a/tools/gfx/cpu/render-cpu.cpp b/tools/gfx/cpu/render-cpu.cpp index f0b612e2f..8dbe5460a 100644 --- a/tools/gfx/cpu/render-cpu.cpp +++ b/tools/gfx/cpu/render-cpu.cpp @@ -7,9 +7,7 @@ #include "core/slang-basic.h" #include "core/slang-blob.h" -#include "../command-writer.h" -#include "../renderer-shared.h" -#include "../simple-transient-resource-heap.h" +#include "../immediate-renderer-base.h" #include "../slang-context.h" #define SLANG_PRELUDE_NAMESPACE slang_prelude @@ -1014,7 +1012,6 @@ public: // type, we need to make sure to use that type as the specialization argument. // TODO: need to implement the case where the field is an array of existential values. - SLANG_ASSERT(bindingRange.count == 1); ExtendedShaderObjectType specializedSubObjType; SLANG_RETURN_ON_FAIL(m_objects[subObjIndex]->getSpecializedShaderObjectType(&specializedSubObjType)); args.add(specializedSubObjType); @@ -1097,308 +1094,72 @@ public: } }; -class CPUDevice : public RendererBase +class CPUDevice : public ImmediateComputeDeviceBase { private: RefPtr<CPUPipelineState> m_currentPipeline = nullptr; RefPtr<CPURootShaderObject> m_currentRootObject = nullptr; DeviceInfo m_info; - class CommandQueueImpl; - - class CommandBufferImpl - : public ICommandBuffer - , public CommandWriter - , public RefObject + virtual void setPipelineState(IPipelineState* state) override { - public: - SLANG_REF_OBJECT_IUNKNOWN_ALL - ICommandBuffer* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandBuffer) - return static_cast<ICommandBuffer*>(this); - return nullptr; - } - public: - void init(CPUDevice* device) - { - SLANG_UNUSED(device); - } - virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( - IRenderPassLayout* renderPass, - IFramebuffer* framebuffer, - IRenderCommandEncoder** outEncoder) override - { - SLANG_UNUSED(renderPass); - SLANG_UNUSED(framebuffer); - *outEncoder = nullptr; - } - - class ComputeCommandEncoderImpl - : public IComputeCommandEncoder - { - public: - virtual SLANG_NO_THROW SlangResult SLANG_MCALL - queryInterface(SlangUUID const& uuid, void** outObject) override - { - if (uuid == GfxGUID::IID_ISlangUnknown || - uuid == GfxGUID::IID_IComputeCommandEncoder) - { - *outObject = static_cast<IComputeCommandEncoder*>(this); - return SLANG_OK; - } - *outObject = nullptr; - return SLANG_E_NO_INTERFACE; - } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 1; } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 1; } - - public: - CommandWriter* m_writer; - - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {} - void init(CommandBufferImpl* cmdBuffer) - { - m_writer = cmdBuffer; - } - - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override - { - m_writer->setPipelineState(state); - } - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) override - { - m_writer->bindRootShaderObject(PipelineType::Compute, object); - } - - virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override - { - m_writer->dispatchCompute(x, y, z); - } - }; - - ComputeCommandEncoderImpl m_computeCommandEncoder; - virtual SLANG_NO_THROW void SLANG_MCALL - encodeComputeCommands(IComputeCommandEncoder** outEncoder) override - { - m_computeCommandEncoder.init(this); - *outEncoder = &m_computeCommandEncoder; - } - - class ResourceCommandEncoderImpl - : public IResourceCommandEncoder - { - public: - virtual SLANG_NO_THROW SlangResult SLANG_MCALL - queryInterface(SlangUUID const& uuid, void** outObject) override - { - if (uuid == GfxGUID::IID_ISlangUnknown || - uuid == GfxGUID::IID_IResourceCommandEncoder) - { - *outObject = static_cast<IResourceCommandEncoder*>(this); - return SLANG_OK; - } - *outObject = nullptr; - return SLANG_E_NO_INTERFACE; - } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 1; } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 1; } - - public: - CommandWriter* m_writer; - - void init(CommandBufferImpl* cmdBuffer) - { - m_writer = cmdBuffer; - } - - virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {} - virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( - IBufferResource* dst, - size_t dstOffset, - IBufferResource* src, - size_t srcOffset, - size_t size) override - { - m_writer->copyBuffer(dst, dstOffset, src, srcOffset, size); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data) override - { - m_writer->uploadBufferData(dst, offset, size, data); - } - }; - - ResourceCommandEncoderImpl m_resourceCommandEncoder; - - virtual SLANG_NO_THROW void SLANG_MCALL - encodeResourceCommands(IResourceCommandEncoder** outEncoder) override - { - m_resourceCommandEncoder.init(this); - *outEncoder = &m_resourceCommandEncoder; - } - - virtual SLANG_NO_THROW void SLANG_MCALL close() override {} - }; + m_currentPipeline = static_cast<CPUPipelineState*>(state); + } - class CommandQueueImpl - : public ICommandQueue - , public RefObject + virtual void bindRootShaderObject(IShaderObject* object) override { - public: - SLANG_REF_OBJECT_IUNKNOWN_ALL - ICommandQueue* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ICommandQueue) - return static_cast<ICommandQueue*>(this); - return nullptr; - } - - public: - RefPtr<CPUPipelineState> currentPipeline; - RefPtr<CPURootShaderObject> currentRootObject; - RefPtr<CPUDevice> renderer; - Desc m_desc; - public: - void init(CPUDevice* inRenderer) - { - renderer = inRenderer; - m_desc.type = ICommandQueue::QueueType::Graphics; - } - ~CommandQueueImpl() - { - currentPipeline = nullptr; - currentRootObject = nullptr; - } - - public: - virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() override - { - return m_desc; - } - - virtual SLANG_NO_THROW void SLANG_MCALL - executeCommandBuffers(uint32_t count, ICommandBuffer* const* commandBuffers) override - { - for (uint32_t i = 0; i < count; i++) - { - execute(static_cast<CommandBufferImpl*>(commandBuffers[i])); - } - } - - virtual SLANG_NO_THROW void SLANG_MCALL wait() override - {} - - public: - void setPipelineState(IPipelineState* state) - { - currentPipeline = static_cast<CPUPipelineState*>(state); - } - - Result bindRootShaderObject(PipelineType pipelineType, IShaderObject* object) - { - currentRootObject = static_cast<CPURootShaderObject*>(object); - if (currentRootObject) - return SLANG_OK; - return SLANG_E_INVALID_ARG; - } - - void dispatchCompute(int x, int y, int z) - { - int entryPointIndex = 0; - int targetIndex = 0; - - // Specialize the compute kernel based on the shader object bindings. - RefPtr<PipelineStateBase> newPipeline; - renderer->maybeSpecializePipeline(currentPipeline, currentRootObject, newPipeline); - currentPipeline = static_cast<CPUPipelineState*>(newPipeline.Ptr()); - - auto program = currentPipeline->getProgram(); - auto entryPointLayout = currentRootObject->getLayout()->getEntryPoint(entryPointIndex); - auto entryPointName = entryPointLayout->getEntryPointName(); + m_currentRootObject = static_cast<CPURootShaderObject*>(object); + } - auto entryPointObject = currentRootObject->getEntryPoint(entryPointIndex); + virtual void dispatchCompute(int x, int y, int z) override + { + int entryPointIndex = 0; + int targetIndex = 0; - ComPtr<ISlangSharedLibrary> sharedLibrary; - program->slangProgram->getEntryPointHostCallable(entryPointIndex, targetIndex, sharedLibrary.writeRef()); + // Specialize the compute kernel based on the shader object bindings. + RefPtr<PipelineStateBase> newPipeline; + maybeSpecializePipeline(m_currentPipeline, m_currentRootObject, newPipeline); + m_currentPipeline = static_cast<CPUPipelineState*>(newPipeline.Ptr()); - auto func = (slang_prelude::ComputeFunc) sharedLibrary->findSymbolAddressByName(entryPointName); + auto program = m_currentPipeline->getProgram(); + auto entryPointLayout = + m_currentRootObject->getLayout()->getEntryPoint(entryPointIndex); + auto entryPointName = entryPointLayout->getEntryPointName(); - slang_prelude::ComputeVaryingInput varyingInput; - varyingInput.startGroupID.x = 0; - varyingInput.startGroupID.y = 0; - varyingInput.startGroupID.z = 0; - varyingInput.endGroupID.x = x; - varyingInput.endGroupID.y = y; - varyingInput.endGroupID.z = z; + auto entryPointObject = m_currentRootObject->getEntryPoint(entryPointIndex); - auto globalParamsData = currentRootObject->m_data; - auto entryPointParamsData = entryPointObject->m_data; - func(&varyingInput, entryPointParamsData, globalParamsData); - } + ComPtr<ISlangSharedLibrary> sharedLibrary; + program->slangProgram->getEntryPointHostCallable(entryPointIndex, targetIndex, sharedLibrary.writeRef()); - void copyBuffer( - IBufferResource* dst, - size_t dstOffset, - IBufferResource* src, - size_t srcOffset, - size_t size) - { - auto dstImpl = static_cast<CPUBufferResource*>(dst); - auto srcImpl = static_cast<CPUBufferResource*>(src); - memcpy( - (uint8_t*)dstImpl->m_data + dstOffset, - (uint8_t*)srcImpl->m_data + srcOffset, - size); - } + auto func = (slang_prelude::ComputeFunc) sharedLibrary->findSymbolAddressByName(entryPointName); - void uploadBufferData(IBufferResource* dst, size_t offset, size_t size, void* data) - { - auto dstImpl = static_cast<CPUBufferResource*>(dst); - memcpy((uint8_t*)dstImpl->m_data + offset, data, size); - } + slang_prelude::ComputeVaryingInput varyingInput; + varyingInput.startGroupID.x = 0; + varyingInput.startGroupID.y = 0; + varyingInput.startGroupID.z = 0; + varyingInput.endGroupID.x = x; + varyingInput.endGroupID.y = y; + varyingInput.endGroupID.z = z; - void execute(CommandBufferImpl* commandBuffer) - { - for (auto& cmd : commandBuffer->m_commands) - { - switch (cmd.name) - { - case CommandName::SetPipelineState: - setPipelineState(commandBuffer->getObject<IPipelineState>(cmd.operands[0])); - break; - case CommandName::BindRootShaderObject: - bindRootShaderObject( - (PipelineType)cmd.operands[0], - commandBuffer->getObject<IShaderObject>(cmd.operands[1])); - break; - case CommandName::DispatchCompute: - dispatchCompute( - int(cmd.operands[0]), int(cmd.operands[1]), int(cmd.operands[2])); - break; - case CommandName::CopyBuffer: - copyBuffer( - commandBuffer->getObject<IBufferResource>(cmd.operands[0]), - cmd.operands[1], - commandBuffer->getObject<IBufferResource>(cmd.operands[2]), - cmd.operands[3], - cmd.operands[4]); - break; - case CommandName::UploadBufferData: - uploadBufferData( - commandBuffer->getObject<IBufferResource>(cmd.operands[0]), - cmd.operands[1], - cmd.operands[2], - commandBuffer->getData<uint8_t>(cmd.operands[3])); - break; - } - } - } - }; + auto globalParamsData = m_currentRootObject->m_data; + auto entryPointParamsData = entryPointObject->m_data; + func(&varyingInput, entryPointParamsData, globalParamsData); + } - using TransientResourceHeapImpl = SimpleTransientResourceHeap<CPUDevice, CommandBufferImpl>; + virtual void copyBuffer( + IBufferResource* dst, + size_t dstOffset, + IBufferResource* src, + size_t srcOffset, + size_t size) override + { + auto dstImpl = static_cast<CPUBufferResource*>(dst); + auto srcImpl = static_cast<CPUBufferResource*>(src); + memcpy( + (uint8_t*)dstImpl->m_data + dstOffset, + (uint8_t*)srcImpl->m_data + srcOffset, + size); + } public: ~CPUDevice() @@ -1498,8 +1259,7 @@ public: return SLANG_OK; } - virtual SLANG_NO_THROW Result SLANG_MCALL - createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override + virtual Result createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override { auto cpuProgram = static_cast<CPUShaderProgram*>(program); auto cpuProgramLayout = cpuProgram->layout; @@ -1551,53 +1311,7 @@ public: } public: - virtual SLANG_NO_THROW Result SLANG_MCALL - createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) override - { - RefPtr<CommandQueueImpl> queue = new CommandQueueImpl(); - queue->init(this); - *outQueue = queue.detach(); - return SLANG_OK; - } - virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap( - const ITransientResourceHeap::Desc& desc, - ITransientResourceHeap** outHeap) override - { - RefPtr<TransientResourceHeapImpl> result = new TransientResourceHeapImpl(); - SLANG_RETURN_ON_FAIL(result->init(this, desc)); - *outHeap = result.detach(); - return SLANG_OK; - } - virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( - const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override - { - SLANG_UNUSED(desc); - SLANG_UNUSED(window); - SLANG_UNUSED(outSwapchain); - return SLANG_FAIL; - } - virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout( - const IFramebufferLayout::Desc& desc, IFramebufferLayout** outLayout) override - { - SLANG_UNUSED(desc); - SLANG_UNUSED(outLayout); - return SLANG_FAIL; - } - virtual SLANG_NO_THROW Result SLANG_MCALL - createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffer** outFramebuffer) override - { - SLANG_UNUSED(desc); - SLANG_UNUSED(outFramebuffer); - return SLANG_FAIL; - } - virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( - const IRenderPassLayout::Desc& desc, - IRenderPassLayout** outRenderPassLayout) override - { - SLANG_UNUSED(desc); - SLANG_UNUSED(outRenderPassLayout); - return SLANG_FAIL; - } + virtual SLANG_NO_THROW Result SLANG_MCALL createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) override { @@ -1605,54 +1319,16 @@ public: *outSampler = nullptr; return SLANG_OK; } - - virtual SLANG_NO_THROW Result SLANG_MCALL createInputLayout( - const InputElementDesc* inputElements, - UInt inputElementCount, - IInputLayout** outLayout) override - { - SLANG_UNUSED(inputElements); - SLANG_UNUSED(inputElementCount); - SLANG_UNUSED(outLayout); - return SLANG_E_NOT_AVAILABLE; - } - virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState( - const GraphicsPipelineStateDesc& desc, IPipelineState** outState) override - { - SLANG_UNUSED(desc); - SLANG_UNUSED(outState); - return SLANG_E_NOT_AVAILABLE; - } - virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( - ITextureResource* texture, - ResourceState state, - ISlangBlob** outBlob, - size_t* outRowPitch, - size_t* outPixelSize) override - { - SLANG_UNUSED(texture); - SLANG_UNUSED(outBlob); - SLANG_UNUSED(outRowPitch); - SLANG_UNUSED(outPixelSize); - return SLANG_E_NOT_AVAILABLE; - } - virtual SLANG_NO_THROW Result SLANG_MCALL readBufferResource( - IBufferResource* buffer, - size_t offset, - size_t size, - ISlangBlob** outBlob) override + virtual void submitGpuWork() override {} + virtual void waitForGpu() override {} + virtual void* map(IBufferResource* buffer, MapFlavor flavor) override { + SLANG_UNUSED(flavor); auto bufferImpl = static_cast<CPUBufferResource*>(buffer); - RefPtr<ListBlob> blob = new ListBlob(); - blob->m_data.setCount((Index)size); - memcpy( - blob->m_data.getBuffer(), - (uint8_t*)bufferImpl->m_data + offset, - size); - *outBlob = blob.detach(); - return SLANG_OK; + return bufferImpl->m_data; } + virtual void unmap(IBufferResource* buffer) override { SLANG_UNUSED(buffer); } }; SlangResult CPUShaderObject::init(IDevice* device, CPUShaderObjectLayout* typeLayout) diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp index b29f7f7e4..dbee0c5f2 100644 --- a/tools/gfx/cuda/render-cuda.cpp +++ b/tools/gfx/cuda/render-cuda.cpp @@ -990,7 +990,9 @@ public: return nullptr; } public: - void init(CUDADevice* device) { SLANG_UNUSED(device); } + CUDADevice* m_device; + + void init(CUDADevice* device) { m_device = device; } virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( IRenderPassLayout* renderPass, IFramebuffer* framebuffer, @@ -1022,25 +1024,29 @@ public: public: CommandWriter* m_writer; - + CommandBufferImpl* m_commandBuffer; + ComPtr<IShaderObject> m_rootObject; virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {} void init(CommandBufferImpl* cmdBuffer) { m_writer = cmdBuffer; + m_commandBuffer = cmdBuffer; } - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override { m_writer->setPipelineState(state); - } - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) override - { - m_writer->bindRootShaderObject(PipelineType::Compute, object); + PipelineStateBase* pipelineImpl = static_cast<PipelineStateBase*>(state); + SLANG_RETURN_ON_FAIL(m_commandBuffer->m_device->createRootShaderObject( + pipelineImpl->m_program, outRootObject)); + m_rootObject = *outRootObject; + return SLANG_OK; } virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override { + m_writer->bindRootShaderObject(m_rootObject); m_writer->dispatchCompute(x, y, z); } }; @@ -1831,8 +1837,7 @@ public: return SLANG_OK; } - virtual SLANG_NO_THROW Result SLANG_MCALL - createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override + Result createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) { auto cudaProgram = dynamic_cast<CUDAShaderProgram*>(program); auto cudaLayout = cudaProgram->layout; diff --git a/tools/gfx/d3d11/render-d3d11.cpp b/tools/gfx/d3d11/render-d3d11.cpp index 08d159415..a099d98c7 100644 --- a/tools/gfx/d3d11/render-d3d11.cpp +++ b/tools/gfx/d3d11/render-d3d11.cpp @@ -63,17 +63,16 @@ public: ~D3D11Device() {} // Renderer implementation - virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; - virtual SLANG_NO_THROW void SLANG_MCALL - clearFrame(uint32_t colorBufferMask, bool clearDepth, bool clearStencil) override; + virtual SLANG_NO_THROW Result SLANG_MCALL initialize(const Desc& desc) override; + virtual void clearFrame(uint32_t colorBufferMask, bool clearDepth, bool clearStencil) override; virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override; virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout( const IFramebufferLayout::Desc& desc, IFramebufferLayout** outLayout) override; virtual SLANG_NO_THROW Result SLANG_MCALL createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffer** outFramebuffer) override; - virtual SLANG_NO_THROW void SLANG_MCALL setFramebuffer(IFramebuffer* frameBuffer) override; - virtual SLANG_NO_THROW void SLANG_MCALL setStencilReference(uint32_t referenceValue) override; + virtual void setFramebuffer(IFramebuffer* frameBuffer) override; + virtual void setStencilReference(uint32_t referenceValue) override; virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource( IResource::Usage initialUsage, @@ -89,9 +88,14 @@ public: createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) override; virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView( - ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) override; + ITextureResource* texture, + IResourceView::Desc const& desc, + IResourceView** outView) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferView( - IBufferResource* buffer, IResourceView::Desc const& desc, IResourceView** outView) override; + IBufferResource* buffer, + IResourceView::Desc const& desc, + IResourceView** outView) override; virtual SLANG_NO_THROW Result SLANG_MCALL createInputLayout( const InputElementDesc* inputElements, @@ -101,10 +105,11 @@ public: virtual Result createShaderObjectLayout( slang::TypeLayoutReflection* typeLayout, ShaderObjectLayoutBase** outLayout) override; - virtual Result createShaderObject(ShaderObjectLayoutBase* layout, IShaderObject** outObject) override; - virtual SLANG_NO_THROW Result SLANG_MCALL - createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override; - virtual void bindRootShaderObject(PipelineType pipelineType, IShaderObject* shaderObject) override; + virtual Result createShaderObject(ShaderObjectLayoutBase* layout, IShaderObject** outObject) + override; + virtual Result createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) + override; + virtual void bindRootShaderObject(IShaderObject* shaderObject) override; virtual SLANG_NO_THROW Result SLANG_MCALL createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) override; @@ -115,7 +120,7 @@ public: virtual void* map(IBufferResource* buffer, MapFlavor flavor) override; virtual void unmap(IBufferResource* buffer) override; - virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + virtual void copyBuffer( IBufferResource* dst, size_t dstOffset, IBufferResource* src, @@ -124,28 +129,23 @@ public: virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( ITextureResource* texture, ResourceState state, ISlangBlob** outBlob, size_t* outRowPitch, size_t* outPixelSize) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setPrimitiveTopology(PrimitiveTopology topology) override; + virtual void setPrimitiveTopology(PrimitiveTopology topology) override; - virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( + virtual void setVertexBuffers( UInt startSlot, UInt slotCount, IBufferResource* const* buffers, const UInt* strides, const UInt* offsets) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setViewports(UInt count, Viewport const* viewports) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setScissorRects(UInt count, ScissorRect const* rects) override; - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override; - virtual SLANG_NO_THROW void SLANG_MCALL draw(UInt vertexCount, UInt startVertex) override; - virtual SLANG_NO_THROW void SLANG_MCALL - drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) override; - virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override; - virtual SLANG_NO_THROW void SLANG_MCALL submitGpuWork() override {} - virtual SLANG_NO_THROW void SLANG_MCALL waitForGpu() override {} + virtual void setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) override; + virtual void setViewports(UInt count, Viewport const* viewports) override; + virtual void setScissorRects(UInt count, ScissorRect const* rects) override; + virtual void setPipelineState(IPipelineState* state) override; + virtual void draw(UInt vertexCount, UInt startVertex) override; + virtual void drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) override; + virtual void dispatchCompute(int x, int y, int z) override; + virtual void submitGpuWork() override {} + virtual void waitForGpu() override {} virtual SLANG_NO_THROW const DeviceInfo& SLANG_MCALL getDeviceInfo() const override { return m_info; @@ -2966,19 +2966,19 @@ Result D3D11Device::createRootShaderObject(IShaderProgram* program, IShaderObjec return SLANG_OK; } -void D3D11Device::bindRootShaderObject(PipelineType pipelineType, IShaderObject* shaderObject) +void D3D11Device::bindRootShaderObject(IShaderObject* shaderObject) { RootShaderObjectImpl* rootShaderObjectImpl = static_cast<RootShaderObjectImpl*>(shaderObject); RefPtr<PipelineStateBase> specializedPipeline; maybeSpecializePipeline(m_currentPipelineState, rootShaderObjectImpl, specializedPipeline); setPipelineState(specializedPipeline.Ptr()); - + m_rootBindingState.samplerBindings.clear(); m_rootBindingState.srvBindings.clear(); m_rootBindingState.uavBindings.clear(); m_rootBindingState.constantBuffers.clear(); static_cast<ShaderObjectImpl*>(shaderObject)->bindObject(this, &m_rootBindingState); - switch (pipelineType) + switch (m_currentPipelineState->desc.type) { case PipelineType::Compute: m_immediateContext->CSSetShaderResources(0, (UINT)m_rootBindingState.srvBindings.getCount(), m_rootBindingState.srvBindings.getBuffer()); diff --git a/tools/gfx/d3d12/descriptor-heap-d3d12.cpp b/tools/gfx/d3d12/descriptor-heap-d3d12.cpp index 382fc3219..4533683fd 100644 --- a/tools/gfx/d3d12/descriptor-heap-d3d12.cpp +++ b/tools/gfx/d3d12/descriptor-heap-d3d12.cpp @@ -13,6 +13,8 @@ D3D12DescriptorHeap::D3D12DescriptorHeap(): Result D3D12DescriptorHeap::init(ID3D12Device* device, int size, D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIPTOR_HEAP_FLAGS flags) { + m_device = device; + D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {}; srvHeapDesc.NumDescriptors = size; srvHeapDesc.Flags = flags; @@ -21,7 +23,6 @@ Result D3D12DescriptorHeap::init(ID3D12Device* device, int size, D3D12_DESCRIPTO m_descriptorSize = device->GetDescriptorHandleIncrementSize(type); m_totalSize = size; - return SLANG_OK; } diff --git a/tools/gfx/d3d12/descriptor-heap-d3d12.h b/tools/gfx/d3d12/descriptor-heap-d3d12.h index 20d867155..4b4a22cc3 100644 --- a/tools/gfx/d3d12/descriptor-heap-d3d12.h +++ b/tools/gfx/d3d12/descriptor-heap-d3d12.h @@ -58,17 +58,18 @@ class D3D12DescriptorHeap D3D12DescriptorHeap(); protected: + Slang::ComPtr<ID3D12Device> m_device; Slang::ComPtr<ID3D12DescriptorHeap> m_heap; ///< The underlying heap being allocated from int m_totalSize; ///< Total amount of allocations available on the heap int m_currentIndex; ///< The current descriptor int m_descriptorSize; ///< The size of each descriptor }; -/// A host-visible descriptor, used as "backing storage" for a view. +/// A d3d12 descriptor, used as "backing storage" for a view. /// /// This type is intended to be used to represent descriptors that -/// are allocated and freed through a `HostVisibleDescriptorAllocator`. -struct D3D12HostVisibleDescriptor +/// are allocated and freed through a `D3D12GeneralDescriptorHeap`. +struct D3D12Descriptor { D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle; }; @@ -78,7 +79,7 @@ struct D3D12HostVisibleDescriptor /// Unlike the `D3D12DescriptorHeap` type, this class allows for both /// allocation and freeing of descriptors, by maintaining a free list. /// -class D3D12HostVisibleDescriptorAllocator +class D3D12GeneralDescriptorHeap { ID3D12Device* m_device; int m_chunkSize; @@ -88,16 +89,16 @@ class D3D12HostVisibleDescriptorAllocator Slang::VirtualObjectPool m_allocator; public: - D3D12HostVisibleDescriptorAllocator() + D3D12GeneralDescriptorHeap() {} - Slang::Result init(ID3D12Device* device, int chunkSize, D3D12_DESCRIPTOR_HEAP_TYPE type) + Slang::Result init(ID3D12Device* device, int chunkSize, D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIPTOR_HEAP_FLAGS flag) { m_device = device; m_chunkSize = chunkSize; m_type = type; - SLANG_RETURN_ON_FAIL(m_heap.init(m_device, m_chunkSize, m_type, D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); + SLANG_RETURN_ON_FAIL(m_heap.init(m_device, m_chunkSize, m_type, flag)); m_allocator.initPool(m_chunkSize); return SLANG_OK; } @@ -107,12 +108,17 @@ public: return m_heap.getCpuHandle(index); } + SLANG_FORCE_INLINE D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(int index) const + { + return m_heap.getGpuHandle(index); + } + int allocate(int count) { return m_allocator.alloc(count); } - Slang::Result allocate(D3D12HostVisibleDescriptor* outDescriptor) + Slang::Result allocate(D3D12Descriptor* outDescriptor) { // TODO: this allocator would take some work to make thread-safe @@ -123,7 +129,7 @@ public: return SLANG_FAIL; } - D3D12HostVisibleDescriptor descriptor; + D3D12Descriptor descriptor; descriptor.cpuHandle = m_heap.getCpuHandle(index); *outDescriptor = descriptor; @@ -135,7 +141,7 @@ public: m_allocator.free(index, count); } - void free(D3D12HostVisibleDescriptor descriptor) + void free(D3D12Descriptor descriptor) { auto index = (int)(descriptor.cpuHandle.ptr - m_heap.getCpuStart().ptr) / m_heap.getDescriptorSize(); @@ -143,27 +149,110 @@ public: } }; -// --------------------------------------------------------------------------- -int D3D12DescriptorHeap::allocate() +struct DescriptorHeapReference { - assert(m_currentIndex < m_totalSize); - if (m_currentIndex < m_totalSize) + enum class Type + { + Linear, General + }; + union Ptr { - return m_currentIndex++; + D3D12DescriptorHeap* linearHeap; + D3D12GeneralDescriptorHeap* generalHeap; + }; + Type type; + Ptr ptr; + DescriptorHeapReference() = default; + DescriptorHeapReference(D3D12DescriptorHeap* heap) + { + type = Type::Linear; + ptr.linearHeap = heap; } - return -1; -} + DescriptorHeapReference(D3D12GeneralDescriptorHeap* heap) + { + type = Type::General; + ptr.generalHeap = heap; + } + D3D12_CPU_DESCRIPTOR_HANDLE getCpuHandle(int index) const + { + switch (type) + { + case Type::Linear: + return ptr.linearHeap->getCpuHandle(index); + default: + return ptr.generalHeap->getCpuHandle(index); + } + } + D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(int index) const + { + switch (type) + { + case Type::Linear: + return ptr.linearHeap->getGpuHandle(index); + default: + return ptr.generalHeap->getGpuHandle(index); + } + } + int allocate(int numDescriptors) + { + switch (type) + { + case Type::Linear: + return ptr.linearHeap->allocate(numDescriptors); + default: + return ptr.generalHeap->allocate(numDescriptors); + } + } + void free(int index, int count) + { + switch (type) + { + case Type::Linear: + SLANG_ASSERT(!"Linear heap does not support free()."); + break; + default: + return ptr.generalHeap->free(index, count); + } + } + void freeIfSupported(int index, int count) + { + switch (type) + { + case Type::Linear: + return; + default: + return ptr.generalHeap->free(index, count); + } + } +}; + +// --------------------------------------------------------------------------- +int D3D12DescriptorHeap::allocate() { return allocate(1); } // --------------------------------------------------------------------------- int D3D12DescriptorHeap::allocate(int numDescriptors) { - assert(m_currentIndex + numDescriptors <= m_totalSize); if (m_currentIndex + numDescriptors <= m_totalSize) { const int index = m_currentIndex; m_currentIndex += numDescriptors; return index; } - return -1; + // We don't have enough heap size, resize the heap. + auto oldHeap = m_heap; + auto oldSize = m_totalSize; + auto currentIndex = m_currentIndex; + auto desc = m_heap->GetDesc(); + this->init(m_device, (int)desc.NumDescriptors * 2, desc.Type, desc.Flags); + m_device->CopyDescriptorsSimple( + (UINT)currentIndex, + m_heap->GetCPUDescriptorHandleForHeapStart(), + oldHeap->GetCPUDescriptorHandleForHeapStart(), + desc.Type); + m_currentIndex = currentIndex; + // Now allocate again. + const int index = m_currentIndex; + m_currentIndex += numDescriptors; + return index; } // --------------------------------------------------------------------------- SLANG_FORCE_INLINE int D3D12DescriptorHeap::placeAt(int index) diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp index 891077a8b..b8ff47ad3 100644 --- a/tools/gfx/d3d12/render-d3d12.cpp +++ b/tools/gfx/d3d12/render-d3d12.cpp @@ -5,6 +5,7 @@ //WORKING:#include "options.h" #include "../renderer-shared.h" +#include "../transient-resource-heap-base.h" #include "../simple-render-pass-layout.h" #include "../d3d/d3d-swapchain.h" #include "core/slang-blob.h" @@ -119,8 +120,6 @@ public: ShaderObjectLayoutBase** outLayout) override; virtual Result createShaderObject(ShaderObjectLayoutBase* layout, IShaderObject** outObject) override; - virtual SLANG_NO_THROW Result SLANG_MCALL - createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override; virtual SLANG_NO_THROW Result SLANG_MCALL createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) override; @@ -185,29 +184,6 @@ public: virtual void setRootConstants(Index rootParamIndex, Index dstOffsetIn32BitValues, Index countOf32BitValues, void const* srcData) = 0; }; - struct FrameInfo - { - FrameInfo() :m_fenceValue(0) {} - void reset() - { - m_commandAllocator.setNull(); - } - ComPtr<ID3D12CommandAllocator> m_commandAllocator; ///< The command allocator for this frame - UINT64 m_fenceValue; ///< The fence value when rendering this Frame is complete - - // During command submission, we need all the descriptor tables that get - // used to come from a single heap (for each descriptor heap type). - // - // We will thus keep a single heap of each type that we hope will hold - // all the descriptors that actually get needed in a frame. - // - // TODO: we need an allocation policy to reallocate and resize these - // if/when we run out of space during a frame. - // - D3D12DescriptorHeap m_viewHeap; ///< Cbv, Srv, Uav - D3D12DescriptorHeap m_samplerHeap; ///< Heap for samplers - }; - class BufferResourceImpl: public gfx::BufferResource { public: @@ -252,7 +228,7 @@ public: return nullptr; } public: - D3D12HostVisibleDescriptor m_descriptor; + D3D12Descriptor m_descriptor; D3D12Device* m_renderer; ~SamplerStateImpl() { @@ -272,8 +248,8 @@ public: } public: RefPtr<Resource> m_resource; - D3D12HostVisibleDescriptor m_descriptor; - D3D12HostVisibleDescriptorAllocator* m_allocator; + D3D12Descriptor m_descriptor; + D3D12GeneralDescriptorHeap* m_allocator; ~ResourceViewImpl() { m_allocator->free(m_descriptor); @@ -471,7 +447,7 @@ public: void* uploadData; SLANG_RETURN_ON_FAIL(buffer->m_uploadResource.getResource()->Map( 0, &readRange, reinterpret_cast<void**>(&uploadData))); - memcpy(uploadData, data, size); + memcpy((uint8_t*)uploadData + offset, data, size); buffer->m_uploadResource.getResource()->Unmap(0, &readRange); { D3D12BarrierSubmitter submitter(cmdList); @@ -493,23 +469,11 @@ public: } class TransientResourceHeapImpl - : public ITransientResourceHeap - , public RefObject + : public TransientResourceHeapBase<D3D12Device, BufferResourceImpl> { + private: + typedef TransientResourceHeapBase<D3D12Device, BufferResourceImpl> Super; public: - SLANG_REF_OBJECT_IUNKNOWN_ALL - ITransientResourceHeap* getInterface(const Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ITransientResourceHeap) - return static_cast<ITransientResourceHeap*>(this); - return nullptr; - } - - public: - D3D12Resource m_constantBuffer; - D3D12Resource m_constantUploadBuffer; - - D3D12Device* m_device; ComPtr<ID3D12CommandAllocator> m_commandAllocator; List<ComPtr<ID3D12GraphicsCommandList>> m_d3dCommandListPool; List<ComPtr<ICommandBuffer>> m_commandBufferPool; @@ -562,7 +526,7 @@ public: uint32_t viewHeapSize, uint32_t samplerHeapSize) { - m_device = device; + Super::init(desc, device); auto d3dDevice = device->m_device; SLANG_RETURN_ON_FAIL(d3dDevice->CreateCommandAllocator( D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.writeRef()))); @@ -580,15 +544,16 @@ public: if (desc.constantBufferSize != 0) { - D3D12_RESOURCE_DESC resourceDesc; - _initBufferResourceDesc(desc.constantBufferSize, resourceDesc); - device->createBuffer( - resourceDesc, + ComPtr<IBufferResource> bufferResourcePtr; + IBufferResource::Desc bufferDesc; + bufferDesc.init(desc.constantBufferSize); + bufferDesc.cpuAccessFlags |= IResource::AccessFlag::Write; + SLANG_RETURN_ON_FAIL(device->createBufferResource( + IResource::Usage::ConstantBuffer, + bufferDesc, nullptr, - 0, - m_constantUploadBuffer, - D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, - m_constantBuffer); + bufferResourcePtr.writeRef())); + m_constantBuffers.add(static_cast<BufferResourceImpl*>(bufferResourcePtr.get())); } return SLANG_OK; } @@ -614,7 +579,6 @@ public: ID3D12GraphicsCommandList* m_preCmdList = nullptr; RefPtr<PipelineStateImpl> m_currentPipeline; - RefPtr<ShaderObjectBase> m_rootShaderObject; static int getBindPointIndex(PipelineType type) { @@ -642,55 +606,22 @@ public: void endEncodingImpl() { m_isOpen = false; } - void bindRootShaderObjectImpl(IShaderObject* object) - { - m_rootShaderObject = static_cast<RootShaderObjectImpl*>(object); - m_bindingDirty = true; - } - - void setPipelineStateImpl(IPipelineState* pipelineState) + Result bindPipelineImpl(IPipelineState* pipelineState, IShaderObject** outRootObject) { m_currentPipeline = static_cast<PipelineStateImpl*>(pipelineState); + auto rootObject = &m_commandBuffer->m_rootShaderObject; + SLANG_RETURN_ON_FAIL(rootObject->reset( + m_renderer, + m_currentPipeline->getProgram<ShaderProgramImpl>()->m_rootObjectLayout, + m_commandBuffer->m_transientHeap)); + *outRootObject = rootObject; m_bindingDirty = true; + return SLANG_OK; } Result _bindRenderState(Submitter* submitter); }; - struct DescriptorHeapReference - { - bool isCpuHeap; - union Reference - { - D3D12DescriptorHeap* gpuHeap; - D3D12HostVisibleDescriptorAllocator* cpuHeap; - } ptr; - DescriptorHeapReference& operator=(D3D12DescriptorHeap* gpuHeap) - { - ptr.gpuHeap = gpuHeap; - isCpuHeap = false; - return *this; - } - DescriptorHeapReference& operator=(D3D12HostVisibleDescriptorAllocator* cpuHeap) - { - ptr.cpuHeap = cpuHeap; - isCpuHeap = true; - return *this; - } - SLANG_FORCE_INLINE D3D12_CPU_DESCRIPTOR_HANDLE getCpuHandle(int index) const - { - if (isCpuHeap) - return ptr.cpuHeap->getCpuHandle(index); - else - return ptr.gpuHeap->getCpuHandle(index); - } - SLANG_FORCE_INLINE D3D12_GPU_DESCRIPTOR_HANDLE getGpuHandle(int index) const - { - SLANG_ASSERT(!isCpuHeap); - return ptr.gpuHeap->getGpuHandle(index); - } - }; - struct DescriptorTable { DescriptorHeapReference heap; @@ -1500,9 +1431,9 @@ public: // For root object, we would like know the union of all binding slots // including all sub-objects in the shader-object hierarchy, so at // parameter binding time we can easily know how many GPU descriptor tables - // to create without walking throught the shader-object hierarchy again. - // We build out this array along with root signature construction. - List<DescriptorSetInfo> outRootDescriptorSetInfos; + // to create without walking through the shader-object hierarchy again. + // We build out this array along with root signature construction and store + // it in `m_gpuDescriptorSetInfos`. SLANG_RETURN_ON_FAIL(createRootSignatureFromSlang( device, program, @@ -1557,7 +1488,8 @@ public: ShaderObjectImpl** outShaderObject) { auto object = ComPtr<ShaderObjectImpl>(new ShaderObjectImpl()); - SLANG_RETURN_ON_FAIL(object->init(device, layout)); + SLANG_RETURN_ON_FAIL( + object->init(device, layout, &device->m_cpuViewHeap, &device->m_cpuSamplerHeap)); *outShaderObject = object.detach(); return SLANG_OK; @@ -1568,12 +1500,13 @@ public: auto layoutImpl = static_cast<ShaderObjectLayoutImpl*>(m_layout.Ptr()); if (m_descriptorSet.m_resourceCount) { - m_resourceHeap->free( + m_resourceHeap.freeIfSupported( m_descriptorSet.m_resourceTable, m_descriptorSet.m_resourceCount); } if (m_descriptorSet.m_samplerCount) { - m_samplerHeap->free(m_descriptorSet.m_samplerTable, m_descriptorSet.m_samplerCount); + m_samplerHeap.freeIfSupported( + m_descriptorSet.m_samplerTable, m_descriptorSet.m_samplerCount); } } @@ -1776,7 +1709,7 @@ public: ID3D12Device* d3dDevice = static_cast<D3D12Device*>(getDevice())->m_device; d3dDevice->CopyDescriptorsSimple( 1, - m_resourceHeap->getCpuHandle( + m_resourceHeap.getCpuHandle( m_descriptorSet.m_resourceTable + bindingRange.binding.offsetInDescriptorTable.resource + (int32_t)offset.bindingArrayIndex), @@ -1798,7 +1731,7 @@ public: ID3D12Device* d3dDevice = static_cast<D3D12Device*>(getDevice())->m_device; d3dDevice->CopyDescriptorsSimple( 1, - m_samplerHeap->getCpuHandle( + m_samplerHeap.getCpuHandle( m_descriptorSet.m_samplerTable + bindingRange.binding.offsetInDescriptorTable.sampler + (int32_t)offset.bindingArrayIndex), @@ -1822,7 +1755,7 @@ public: ID3D12Device* d3dDevice = static_cast<D3D12Device*>(getDevice())->m_device; d3dDevice->CopyDescriptorsSimple( 1, - m_resourceHeap->getCpuHandle( + m_resourceHeap.getCpuHandle( m_descriptorSet.m_resourceTable + bindingRange.binding.offsetInDescriptorTable.resource + (int32_t)offset.bindingArrayIndex), @@ -1831,7 +1764,7 @@ public: auto samplerImpl = static_cast<SamplerStateImpl*>(sampler); d3dDevice->CopyDescriptorsSimple( 1, - m_samplerHeap->getCpuHandle( + m_samplerHeap.getCpuHandle( m_descriptorSet.m_samplerTable + bindingRange.binding.offsetInDescriptorTable.sampler + (int32_t)offset.bindingArrayIndex), @@ -1903,10 +1836,16 @@ public: } protected: - Result init(D3D12Device* device, ShaderObjectLayoutImpl* layout) + Result init( + D3D12Device* device, + ShaderObjectLayoutImpl* layout, + DescriptorHeapReference viewHeap, + DescriptorHeapReference samplerHeap) { m_layout = layout; + m_upToDateConstantBufferHeapVersion = 0; + // If the layout tells us that there is any uniform data, // then we will allocate a CPU memory buffer to hold that data // while it is being set from the host. @@ -1924,20 +1863,20 @@ public: } // Allocate descriptor tables for this shader object. - m_resourceHeap = &device->m_cpuViewHeap; - m_samplerHeap = &device->m_cpuSamplerHeap; + m_resourceHeap = viewHeap; + m_samplerHeap = samplerHeap; auto descSetInfo = layout->getDescriptorSetInfo(); m_descriptorSet.m_resourceCount = descSetInfo.resourceDescriptorCount; if (descSetInfo.resourceDescriptorCount) { m_descriptorSet.m_resourceTable = - m_resourceHeap->allocate(descSetInfo.resourceDescriptorCount); + viewHeap.allocate(descSetInfo.resourceDescriptorCount); } m_descriptorSet.m_samplerCount = descSetInfo.samplerDescriptorCount; if (descSetInfo.samplerDescriptorCount) { m_descriptorSet.m_samplerTable = - m_samplerHeap->allocate(descSetInfo.samplerDescriptorCount); + samplerHeap.allocate(descSetInfo.samplerDescriptorCount); } m_boundResources.setCount(layout->getResourceCount()); @@ -2114,8 +2053,11 @@ public: // operations on a shader object once an operation has requested this buffer // be created. We need to enforce that rule if we want to rely on it. // - if (m_ordinaryDataBuffer) + if (m_upToDateConstantBufferHeapVersion == + encoder->m_commandBuffer->m_transientHeap->getVersion()) + { return SLANG_OK; + } // Computing the size of the ordinary data buffer is *not* just as simple // as using the size of the `m_ordinayData` array that we store. The reason @@ -2132,24 +2074,20 @@ public: RefPtr<ShaderObjectLayoutImpl> specializedLayout; SLANG_RETURN_ON_FAIL(getSpecializedLayout(specializedLayout.writeRef())); - auto specializedOrdinaryDataSize = specializedLayout->getElementTypeLayout()->getSize(); - if (specializedOrdinaryDataSize == 0) + m_constantBufferSize = specializedLayout->getElementTypeLayout()->getSize(); + if (m_constantBufferSize == 0) + { + m_upToDateConstantBufferHeapVersion = + encoder->m_commandBuffer->m_transientHeap->getVersion(); return SLANG_OK; + } // Once we have computed how large the buffer should be, we can allocate - // it using the existing public `IDevice` API. + // it from the transient resource heap. // - - ComPtr<IBufferResource> bufferResourcePtr; - IBufferResource::Desc bufferDesc; - bufferDesc.init(specializedOrdinaryDataSize); - bufferDesc.cpuAccessFlags |= IResource::AccessFlag::Write; - SLANG_RETURN_ON_FAIL(encoder->m_renderer->createBufferResource( - IResource::Usage::ConstantBuffer, - bufferDesc, - nullptr, - bufferResourcePtr.writeRef())); - m_ordinaryDataBuffer = static_cast<BufferResourceImpl*>(bufferResourcePtr.get()); + auto alignedConstantBufferSize = D3DUtil::calcAligned(m_constantBufferSize, 256); + SLANG_RETURN_ON_FAIL(encoder->m_commandBuffer->m_transientHeap->allocateConstantBuffer( + alignedConstantBufferSize, m_constantBufferWeakPtr, m_constantBufferOffset)); // Once the buffer is allocated, we can use `_writeOrdinaryData` to fill it in. // @@ -2158,7 +2096,16 @@ public: // don't need or want to inline it into this call site. // SLANG_RETURN_ON_FAIL(_writeOrdinaryData( - encoder, m_ordinaryDataBuffer, 0, specializedOrdinaryDataSize, specializedLayout)); + encoder, + static_cast<BufferResourceImpl*>(m_constantBufferWeakPtr), + m_constantBufferOffset, + m_constantBufferSize, + specializedLayout)); + + // Update version tracker so that we don't redundantly alloc and fill in + // constant buffers for the same transient heap. + m_upToDateConstantBufferHeapVersion = + encoder->m_commandBuffer->m_transientHeap->getVersion(); { // We also create and store a descriptor for our root constant buffer @@ -2169,20 +2116,20 @@ public: // auto descriptorTable = m_descriptorSet.m_resourceTable; D3D12_CONSTANT_BUFFER_VIEW_DESC viewDesc = {}; - viewDesc.BufferLocation = - m_ordinaryDataBuffer->m_resource.getResource()->GetGPUVirtualAddress(); - viewDesc.SizeInBytes = - (UINT)D3DUtil::calcAligned((UInt)m_ordinaryData.getCount(), 256); + viewDesc.BufferLocation = static_cast<BufferResourceImpl*>(m_constantBufferWeakPtr) + ->m_resource.getResource() + ->GetGPUVirtualAddress() + + m_constantBufferOffset; + viewDesc.SizeInBytes = (UINT)alignedConstantBufferSize; encoder->m_device->CreateConstantBufferView( - &viewDesc, - m_resourceHeap->getCpuHandle(descriptorTable)); + &viewDesc, m_resourceHeap.getCpuHandle(descriptorTable)); } return SLANG_OK; } public: - virtual Result bindObject(PipelineCommandEncoder* encoder, RootBindingState* bindingState) + Result bindObject(PipelineCommandEncoder* encoder, RootBindingState* bindingState) { ShaderObjectLayoutImpl* layout = getLayout(); SLANG_RETURN_ON_FAIL(_ensureOrdinaryDataBufferCreatedIfNeeded(encoder)); @@ -2192,12 +2139,13 @@ public: { auto gpuDescriptorTable = bindingState->descriptorTables[descTableIndex]; auto& gpuHeap = gpuDescriptorTable.heap; - auto& cpuHeap = *m_resourceHeap; + auto& cpuHeap = m_resourceHeap; auto cpuDescriptorTable = descSet.m_resourceTable; bindingState->device->m_device->CopyDescriptorsSimple( UINT(descSet.m_resourceCount), - gpuHeap.getCpuHandle(gpuDescriptorTable.table + bindingState->offset.resource), + gpuHeap.getCpuHandle( + gpuDescriptorTable.table + bindingState->offset.resource), cpuHeap.getCpuHandle(cpuDescriptorTable), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); bindingState->offset.resource += descSet.m_resourceCount; @@ -2207,12 +2155,13 @@ public: { auto gpuDescriptorTable = bindingState->descriptorTables[descTableIndex]; auto& gpuHeap = gpuDescriptorTable.heap; - auto& cpuHeap = *m_samplerHeap; + auto& cpuHeap = m_samplerHeap; auto cpuDescriptorTable = (int)descSet.m_samplerTable; bindingState->device->m_device->CopyDescriptorsSimple( UINT(descSet.m_samplerCount), - gpuHeap.getCpuHandle(gpuDescriptorTable.table + bindingState->offset.sampler), + gpuHeap.getCpuHandle( + gpuDescriptorTable.table + bindingState->offset.sampler), cpuHeap.getCpuHandle(cpuDescriptorTable), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); bindingState->offset.sampler += descSet.m_samplerCount; @@ -2271,8 +2220,9 @@ public: List<RefPtr<ShaderObjectImpl>> m_objects; - D3D12HostVisibleDescriptorAllocator* m_resourceHeap = nullptr; - D3D12HostVisibleDescriptorAllocator* m_samplerHeap = nullptr; + // The resource and sampler heaps used to allocate the descriptor tables. + DescriptorHeapReference m_resourceHeap; + DescriptorHeapReference m_samplerHeap; struct DescriptorSet { @@ -2288,8 +2238,17 @@ public: /// A constant buffer used to stored ordinary data for this object /// and existential-type sub-objects. /// - /// Created on demand with `_createOrdinaryDataBufferIfNeeded()` - RefPtr<BufferResourceImpl> m_ordinaryDataBuffer; + /// Allocated from transient heap on demand with `_createOrdinaryDataBufferIfNeeded()` + IBufferResource* m_constantBufferWeakPtr = nullptr; + size_t m_constantBufferOffset = 0; + size_t m_constantBufferSize = 0; + + /// The version number of the transient resource heap that contains up-to-date + /// constant buffer content for this shader object. If this is equal to the version number + /// of currently active transient heap, then the current set-up of constant buffer contents + /// as defined by the above `m_constantBuffer*` fields is valid and up-to-date so we can + /// use them directly. + uint64_t m_upToDateConstantBufferHeapVersion; /// Get the layout of this shader object with specialization arguments considered /// @@ -2332,18 +2291,11 @@ public: typedef ShaderObjectImpl Super; public: - static Result create( - D3D12Device* device, - RootShaderObjectLayoutImpl* layout, - RootShaderObjectImpl** outShaderObject) - { - RefPtr<RootShaderObjectImpl> object = new RootShaderObjectImpl(); - SLANG_RETURN_ON_FAIL(object->init(device, layout)); - - *outShaderObject = object.detach(); - return SLANG_OK; - } - + // Override default reference counting behavior to disable lifetime management. + // Root objects are managed by command buffer and does not need to be freed by the user. + SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 1; } + SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 1; } + public: RootShaderObjectLayoutImpl* getLayout() { return static_cast<RootShaderObjectLayoutImpl*>(m_layout.Ptr()); @@ -2372,7 +2324,7 @@ public: } public: - virtual Result bindObject(PipelineCommandEncoder* encoder, RootBindingState* bindingState) override + Result bindObject(PipelineCommandEncoder* encoder, RootBindingState* bindingState) { SLANG_RETURN_ON_FAIL(Super::bindObject(encoder, bindingState)); @@ -2385,12 +2337,34 @@ public: return SLANG_OK; } - protected: - Result init(D3D12Device* device, RootShaderObjectLayoutImpl* layout) + public: + + Result init(D3D12Device* device) { - SLANG_RETURN_ON_FAIL(Super::init(device, layout)); + SLANG_RETURN_ON_FAIL(m_cpuViewHeap.init( + device->m_device, + 64, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); + SLANG_RETURN_ON_FAIL(m_cpuSamplerHeap.init( + device->m_device, + 8, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); + return SLANG_OK; + } + Result reset( + D3D12Device* device, + RootShaderObjectLayoutImpl* layout, + TransientResourceHeapImpl* heap) + { + m_cpuViewHeap.deallocateAll(); + m_cpuSamplerHeap.deallocateAll(); + SLANG_RETURN_ON_FAIL(Super::init(device, layout, &m_cpuViewHeap, &m_cpuSamplerHeap)); + m_specializedLayout = nullptr; + m_entryPoints.clear(); for (auto entryPointInfo : layout->getEntryPoints()) { RefPtr<ShaderObjectImpl> entryPoint; @@ -2398,10 +2372,10 @@ public: ShaderObjectImpl::create(device, entryPointInfo.layout, entryPoint.writeRef())); m_entryPoints.add(entryPoint); } - return SLANG_OK; } + protected: Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) SLANG_OVERRIDE { ExtendedShaderObjectTypeList specializationArgs; @@ -2487,6 +2461,12 @@ public: } List<RefPtr<ShaderObjectImpl>> m_entryPoints; + + public: + // Descriptor heaps for the root object. Resets with the life cycle of each root shader + // object use. + D3D12DescriptorHeap m_cpuViewHeap; + D3D12DescriptorHeap m_cpuSamplerHeap; }; class CommandBufferImpl @@ -2505,6 +2485,8 @@ public: ComPtr<ID3D12GraphicsCommandList> m_cmdList; TransientResourceHeapImpl* m_transientHeap; D3D12Device* m_renderer; + RootShaderObjectImpl m_rootShaderObject; + void init( D3D12Device* renderer, ID3D12GraphicsCommandList* d3dCommandList, @@ -2519,6 +2501,7 @@ public: m_transientHeap->m_samplerHeap.getHeap(), }; m_cmdList->SetDescriptorHeaps(SLANG_COUNT_OF(heaps), heaps); + m_rootShaderObject.init(renderer); } class RenderCommandEncoderImpl @@ -2672,15 +2655,10 @@ public: } } - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override { - setPipelineStateImpl(state); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) override - { - bindRootShaderObjectImpl(object); + return bindPipelineImpl(state, outRootObject); } virtual SLANG_NO_THROW void SLANG_MCALL @@ -2949,15 +2927,10 @@ public: m_currentPipeline = nullptr; } - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override - { - setPipelineStateImpl(state); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) override + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override { - bindRootShaderObjectImpl(object); + return bindPipelineImpl(state, outRootObject); } virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override @@ -3282,14 +3255,14 @@ public: RefPtr<CommandQueueImpl> m_resourceCommandQueue; RefPtr<TransientResourceHeapImpl> m_resourceCommandTransientHeap; - D3D12HostVisibleDescriptorAllocator m_rtvAllocator; - D3D12HostVisibleDescriptorAllocator m_dsvAllocator; + D3D12GeneralDescriptorHeap m_rtvAllocator; + D3D12GeneralDescriptorHeap m_dsvAllocator; // Space in the GPU-visible heaps is precious, so we will also keep // around CPU-visible heaps for storing descriptors in a format // that is ready for copying into the GPU-visible heaps as needed. // - D3D12HostVisibleDescriptorAllocator m_cpuViewHeap; ///< Cbv, Srv, Uav - D3D12HostVisibleDescriptorAllocator m_cpuSamplerHeap; ///< Heap for samplers + D3D12GeneralDescriptorHeap m_cpuViewHeap; ///< Cbv, Srv, Uav + D3D12GeneralDescriptorHeap m_cpuSamplerHeap; ///< Heap for samplers // Dll entry points PFN_D3D12_GET_DEBUG_INTERFACE m_D3D12GetDebugInterface = nullptr; @@ -3312,6 +3285,7 @@ SLANG_NO_THROW Result SLANG_MCALL D3D12Device::TransientResourceHeapImpl::synchr m_samplerHeap.deallocateAll(); m_commandListAllocId = 0; SLANG_RETURN_ON_FAIL(m_commandAllocator->Reset()); + Super::reset(); return SLANG_OK; } @@ -3350,37 +3324,36 @@ Result D3D12Device::TransientResourceHeapImpl::createCommandBuffer(ICommandBuffe Result D3D12Device::PipelineCommandEncoder::_bindRenderState(Submitter* submitter) { RefPtr<PipelineStateBase> newPipeline; - m_renderer->maybeSpecializePipeline( - m_currentPipeline, m_rootShaderObject, newPipeline); - RootShaderObjectImpl* rootObjectImpl = - static_cast<RootShaderObjectImpl*>(m_rootShaderObject.Ptr()); + RootShaderObjectImpl* rootObjectImpl = &m_commandBuffer->m_rootShaderObject; + m_renderer->maybeSpecializePipeline(m_currentPipeline, rootObjectImpl, newPipeline); PipelineStateImpl* newPipelineImpl = static_cast<PipelineStateImpl*>(newPipeline.Ptr()); auto commandList = m_d3dCmdList; auto pipelineTypeIndex = (int)newPipelineImpl->desc.type; auto programImpl = static_cast<ShaderProgramImpl*>(newPipelineImpl->m_program.get()); commandList->SetPipelineState(newPipelineImpl->m_pipelineState); submitter->setRootSignature(programImpl->m_rootObjectLayout->m_rootSignature); - ShortList<DescriptorTable, kMaxDescriptorSetCount> descriptorTables; RefPtr<ShaderObjectLayoutImpl> specializedRootLayout; SLANG_RETURN_ON_FAIL(rootObjectImpl->getSpecializedLayout(specializedRootLayout.writeRef())); RootShaderObjectLayoutImpl* rootLayoutImpl = static_cast<RootShaderObjectLayoutImpl*>(specializedRootLayout.Ptr()); + + ShortList<DescriptorTable> descriptorTables; + auto descSetInfo = rootLayoutImpl->getDescriptorSetInfo(); + auto heap = m_commandBuffer->m_transientHeap; for (auto& descSet : rootLayoutImpl->m_gpuDescriptorSetInfos) { if (descSet.resourceDescriptorCount) { DescriptorTable table; - table.heap = &m_transientHeap->m_viewHeap; - table.table = - m_transientHeap->m_viewHeap.allocate((int)descSet.resourceDescriptorCount); + table.heap = &heap->m_viewHeap; + table.table = heap->m_viewHeap.allocate((int)descSet.resourceDescriptorCount); descriptorTables.add(table); } if (descSet.samplerDescriptorCount) { DescriptorTable table; - table.heap = &m_transientHeap->m_samplerHeap; - table.table = - m_transientHeap->m_samplerHeap.allocate((int)descSet.samplerDescriptorCount); + table.heap = &heap->m_samplerHeap; + table.table = heap->m_samplerHeap.allocate((int)descSet.samplerDescriptorCount); descriptorTables.add(table); } } @@ -3934,11 +3907,15 @@ Result D3D12Device::initialize(const Desc& desc) SLANG_RETURN_ON_FAIL(createCommandQueueImpl(m_resourceCommandQueue.writeRef())); SLANG_RETURN_ON_FAIL(createTransientResourceHeapImpl(0, 8, 4, m_resourceCommandTransientHeap.writeRef())); - SLANG_RETURN_ON_FAIL(m_cpuViewHeap.init (m_device, 8192, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); - SLANG_RETURN_ON_FAIL(m_cpuSamplerHeap.init(m_device, 1024, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); + SLANG_RETURN_ON_FAIL(m_cpuViewHeap.init( + m_device, 8192, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); + SLANG_RETURN_ON_FAIL(m_cpuSamplerHeap.init( + m_device, 1024, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); - SLANG_RETURN_ON_FAIL(m_rtvAllocator.init (m_device, 16, D3D12_DESCRIPTOR_HEAP_TYPE_RTV)); - SLANG_RETURN_ON_FAIL(m_dsvAllocator.init (m_device, 16, D3D12_DESCRIPTOR_HEAP_TYPE_DSV)); + SLANG_RETURN_ON_FAIL(m_rtvAllocator.init( + m_device, 16, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); + SLANG_RETURN_ON_FAIL(m_dsvAllocator.init( + m_device, 16, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE)); ComPtr<IDXGIDevice> dxgiDevice; if (m_deviceInfo.m_adapter) @@ -4425,7 +4402,7 @@ Result D3D12Device::createSamplerState(ISamplerState::Desc const& desc, ISampler auto samplerHeap = &m_cpuSamplerHeap; - D3D12HostVisibleDescriptor cpuDescriptor; + D3D12Descriptor cpuDescriptor; samplerHeap->allocate(&cpuDescriptor); m_device->CreateSampler(&dxDesc, cpuDescriptor.cpuHandle); @@ -4871,17 +4848,6 @@ Result D3D12Device::createShaderObject( return SLANG_OK; } -Result SLANG_MCALL - D3D12Device::createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) -{ - auto programImpl = dynamic_cast<ShaderProgramImpl*>(program); - RefPtr<RootShaderObjectImpl> shaderObject; - SLANG_RETURN_ON_FAIL(RootShaderObjectImpl::create( - this, programImpl->m_rootObjectLayout, shaderObject.writeRef())); - *outObject = shaderObject.detach(); - return SLANG_OK; -} - Result D3D12Device::createGraphicsPipelineState(const GraphicsPipelineStateDesc& inDesc, IPipelineState** outState) { GraphicsPipelineStateDesc desc = inDesc; diff --git a/tools/gfx/immediate-renderer-base.cpp b/tools/gfx/immediate-renderer-base.cpp index 97bc63634..8c2f3b927 100644 --- a/tools/gfx/immediate-renderer-base.cpp +++ b/tools/gfx/immediate-renderer-base.cpp @@ -33,6 +33,7 @@ public: public: CommandWriter m_writer; ImmediateRendererBase* m_renderer; + RefPtr<ShaderObjectBase> m_rootShaderObject; void init(ImmediateRendererBase* renderer) { @@ -40,7 +41,8 @@ public: } void reset() - { m_writer.clear(); + { + m_writer.clear(); } class RenderCommandEncoderImpl @@ -63,11 +65,13 @@ public: public: CommandWriter* m_writer; + CommandBufferImpl* m_commandBuffer; virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override {} void init(CommandBufferImpl* cmdBuffer, SimpleRenderPassLayout* renderPass, IFramebuffer* framebuffer) { m_writer = &cmdBuffer->m_writer; + m_commandBuffer = cmdBuffer; // Encode clear commands. m_writer->setFramebuffer(framebuffer); @@ -100,15 +104,15 @@ public: m_writer->clearFrame(clearMask, clearDepth, clearStencil); } - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override { m_writer->setPipelineState(state); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) override - { - m_writer->bindRootShaderObject(PipelineType::Graphics, object); + auto stateImpl = static_cast<PipelineStateBase*>(state); + SLANG_RETURN_ON_FAIL(m_commandBuffer->m_renderer->createRootShaderObject( + stateImpl->m_program, outRootObject)); + *m_commandBuffer->m_rootShaderObject.writeRef() = static_cast<ShaderObjectBase*>(*outRootObject); + return SLANG_OK; } virtual SLANG_NO_THROW void SLANG_MCALL @@ -143,12 +147,14 @@ public: virtual SLANG_NO_THROW void SLANG_MCALL draw(UInt vertexCount, UInt startVertex) override { + m_writer->bindRootShaderObject(m_commandBuffer->m_rootShaderObject); m_writer->draw(vertexCount, startVertex); } virtual SLANG_NO_THROW void SLANG_MCALL drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) override { + m_writer->bindRootShaderObject(m_commandBuffer->m_rootShaderObject); m_writer->drawIndexed(indexCount, startIndex, baseVertex); } @@ -191,6 +197,7 @@ public: public: CommandWriter* m_writer; + CommandBufferImpl* m_commandBuffer; virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override { @@ -199,20 +206,23 @@ public: void init(CommandBufferImpl* cmdBuffer) { m_writer = &cmdBuffer->m_writer; + m_commandBuffer = cmdBuffer; } - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) override { m_writer->setPipelineState(state); - } - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) override - { - m_writer->bindRootShaderObject(PipelineType::Compute, object); + auto stateImpl = static_cast<PipelineStateBase*>(state); + SLANG_RETURN_ON_FAIL(m_commandBuffer->m_renderer->createRootShaderObject( + stateImpl->m_program, outRootObject)); + *m_commandBuffer->m_rootShaderObject.writeRef() = static_cast<ShaderObjectBase*>(*outRootObject); + return SLANG_OK; } virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override { + m_writer->bindRootShaderObject(m_commandBuffer->m_rootShaderObject); m_writer->dispatchCompute(x, y, z); } }; @@ -291,9 +301,7 @@ public: m_renderer->setPipelineState(m_writer.getObject<IPipelineState>(cmd.operands[0])); break; case CommandName::BindRootShaderObject: - m_renderer->bindRootShaderObject( - (PipelineType)cmd.operands[0], - m_writer.getObject<IShaderObject>(cmd.operands[1])); + m_renderer->bindRootShaderObject(m_writer.getObject<IShaderObject>(cmd.operands[0])); break; case CommandName::SetFramebuffer: m_renderer->setFramebuffer(m_writer.getObject<IFramebuffer>(cmd.operands[0])); diff --git a/tools/gfx/immediate-renderer-base.h b/tools/gfx/immediate-renderer-base.h index 296cd15cb..5f1770be0 100644 --- a/tools/gfx/immediate-renderer-base.h +++ b/tools/gfx/immediate-renderer-base.h @@ -24,37 +24,35 @@ private: public: // Immediate commands to be implemented by each target. - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL setFramebuffer(IFramebuffer* frameBuffer) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL clearFrame(uint32_t colorBufferMask, bool clearDepth, bool clearStencil) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL setViewports(UInt count, const Viewport* viewports) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL - setScissorRects(UInt count, const ScissorRect* scissors) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL setPrimitiveTopology(PrimitiveTopology topology) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( + virtual Result createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) = 0; + virtual void bindRootShaderObject(IShaderObject* rootObject) = 0; + virtual void setPipelineState(IPipelineState* state) = 0; + virtual void setFramebuffer(IFramebuffer* frameBuffer) = 0; + virtual void clearFrame(uint32_t colorBufferMask, bool clearDepth, bool clearStencil) = 0; + virtual void setViewports(UInt count, const Viewport* viewports) = 0; + virtual void setScissorRects(UInt count, const ScissorRect* scissors) = 0; + virtual void setPrimitiveTopology(PrimitiveTopology topology) = 0; + virtual void setVertexBuffers( UInt startSlot, UInt slotCount, IBufferResource* const* buffers, const UInt* strides, const UInt* offsets) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL - setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset = 0) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL draw(UInt vertexCount, UInt startVertex = 0) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL - drawIndexed(UInt indexCount, UInt startIndex = 0, UInt baseVertex = 0) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL setStencilReference(uint32_t referenceValue) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + virtual void setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset = 0) = 0; + virtual void draw(UInt vertexCount, UInt startVertex = 0) = 0; + virtual void drawIndexed(UInt indexCount, UInt startIndex = 0, UInt baseVertex = 0) = 0; + virtual void setStencilReference(uint32_t referenceValue) = 0; + virtual void dispatchCompute(int x, int y, int z) = 0; + virtual void copyBuffer( IBufferResource* dst, size_t dstOffset, IBufferResource* src, size_t srcOffset, size_t size) = 0; - virtual SLANG_NO_THROW void SLANG_MCALL submitGpuWork() = 0; - virtual SLANG_NO_THROW void SLANG_MCALL waitForGpu() = 0; + virtual void submitGpuWork() = 0; + virtual void waitForGpu() = 0; virtual void* map(IBufferResource* buffer, MapFlavor flavor) = 0; virtual void unmap(IBufferResource* buffer) = 0; - virtual void bindRootShaderObject(PipelineType pipelineType, IShaderObject* shaderObject) = 0; public: Slang::ComPtr<ICommandQueue> m_queue; @@ -82,4 +80,133 @@ public: size_t size, ISlangBlob** outBlob) override; }; + +class ImmediateComputeDeviceBase : public ImmediateRendererBase +{ +public: + // Provide empty implementation for devices without graphics support. + virtual void setFramebuffer(IFramebuffer* frameBuffer) override { SLANG_UNUSED(frameBuffer); } + virtual void clearFrame(uint32_t colorBufferMask, bool clearDepth, bool clearStencil) override + { + SLANG_UNUSED(colorBufferMask); + SLANG_UNUSED(clearDepth); + SLANG_UNUSED(clearStencil); + } + virtual void setViewports(UInt count, const Viewport* viewports) override + { + SLANG_UNUSED(count); + SLANG_UNUSED(viewports); + } + virtual void setScissorRects(UInt count, const ScissorRect* scissors) override + { + SLANG_UNUSED(count); + SLANG_UNUSED(scissors); + } + virtual void setPrimitiveTopology(PrimitiveTopology topology) override + { + SLANG_UNUSED(topology); + } + virtual void setVertexBuffers( + UInt startSlot, + UInt slotCount, + IBufferResource* const* buffers, + const UInt* strides, + const UInt* offsets) override + { + SLANG_UNUSED(startSlot); + SLANG_UNUSED(slotCount); + SLANG_UNUSED(buffers); + SLANG_UNUSED(strides); + SLANG_UNUSED(offsets); + } + virtual void setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset = 0) + override + { + SLANG_UNUSED(buffer); + SLANG_UNUSED(indexFormat); + SLANG_UNUSED(offset); + } + virtual void draw(UInt vertexCount, UInt startVertex = 0) override + { + SLANG_UNUSED(vertexCount); + SLANG_UNUSED(startVertex); + } + virtual void drawIndexed(UInt indexCount, UInt startIndex = 0, UInt baseVertex = 0) override + { + SLANG_UNUSED(indexCount); + SLANG_UNUSED(startIndex); + SLANG_UNUSED(baseVertex); + } + virtual void setStencilReference(uint32_t referenceValue) override + { + SLANG_UNUSED(referenceValue); + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( + const ISwapchain::Desc& desc, + WindowHandle window, + ISwapchain** outSwapchain) override + { + SLANG_UNUSED(desc); + SLANG_UNUSED(window); + SLANG_UNUSED(outSwapchain); + return SLANG_FAIL; + } + virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout( + const IFramebufferLayout::Desc& desc, + IFramebufferLayout** outLayout) override + { + SLANG_UNUSED(desc); + SLANG_UNUSED(outLayout); + return SLANG_FAIL; + } + virtual SLANG_NO_THROW Result SLANG_MCALL + createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffer** outFramebuffer) override + { + SLANG_UNUSED(desc); + SLANG_UNUSED(outFramebuffer); + return SLANG_FAIL; + } + virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( + const IRenderPassLayout::Desc& desc, + IRenderPassLayout** outRenderPassLayout) override + { + SLANG_UNUSED(desc); + SLANG_UNUSED(outRenderPassLayout); + return SLANG_FAIL; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createInputLayout( + const InputElementDesc* inputElements, + UInt inputElementCount, + IInputLayout** outLayout) override + { + SLANG_UNUSED(inputElements); + SLANG_UNUSED(inputElementCount); + SLANG_UNUSED(outLayout); + return SLANG_E_NOT_AVAILABLE; + } + virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState( + const GraphicsPipelineStateDesc& desc, + IPipelineState** outState) override + { + SLANG_UNUSED(desc); + SLANG_UNUSED(outState); + return SLANG_E_NOT_AVAILABLE; + } + virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( + ITextureResource* texture, + ResourceState state, + ISlangBlob** outBlob, + size_t* outRowPitch, + size_t* outPixelSize) override + { + SLANG_UNUSED(texture); + SLANG_UNUSED(outBlob); + SLANG_UNUSED(outRowPitch); + SLANG_UNUSED(outPixelSize); + + return SLANG_E_NOT_AVAILABLE; + } +}; } diff --git a/tools/gfx/open-gl/render-gl.cpp b/tools/gfx/open-gl/render-gl.cpp index b9be7f1fc..9997fd5c8 100644 --- a/tools/gfx/open-gl/render-gl.cpp +++ b/tools/gfx/open-gl/render-gl.cpp @@ -95,16 +95,17 @@ class GLDevice : public ImmediateRendererBase { public: // Renderer implementation - virtual SLANG_NO_THROW SlangResult SLANG_MCALL initialize(const Desc& desc) override; - virtual SLANG_NO_THROW void SLANG_MCALL clearFrame(uint32_t mask, bool clearDepth, bool clearStencil) override; + virtual SLANG_NO_THROW Result SLANG_MCALL initialize(const Desc& desc) override; + virtual void clearFrame(uint32_t mask, bool clearDepth, bool clearStencil) override; virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( const ISwapchain::Desc& desc, WindowHandle window, ISwapchain** outSwapchain) override; virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout( const IFramebufferLayout::Desc& desc, IFramebufferLayout** outLayout) override; - virtual SLANG_NO_THROW Result SLANG_MCALL - createFramebuffer(const IFramebuffer::Desc& desc, IFramebuffer** outFramebuffer) override; - virtual SLANG_NO_THROW void SLANG_MCALL setFramebuffer(IFramebuffer* frameBuffer) override; - virtual SLANG_NO_THROW void SLANG_MCALL setStencilReference(uint32_t referenceValue) override; + virtual SLANG_NO_THROW Result SLANG_MCALL createFramebuffer( + const IFramebuffer::Desc& desc, + IFramebuffer** outFramebuffer) override; + virtual void setFramebuffer(IFramebuffer* frameBuffer) override; + virtual void setStencilReference(uint32_t referenceValue) override; virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource( IResource::Usage initialUsage, @@ -133,9 +134,8 @@ public: slang::TypeLayoutReflection* typeLayout, ShaderObjectLayoutBase** outLayout) override; virtual Result createShaderObject(ShaderObjectLayoutBase* layout, IShaderObject** outObject) override; - virtual SLANG_NO_THROW Result SLANG_MCALL - createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override; - virtual void bindRootShaderObject(PipelineType pipelineType, IShaderObject* shaderObject) override; + virtual Result createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override; + virtual void bindRootShaderObject(IShaderObject* shaderObject) override; virtual SLANG_NO_THROW Result SLANG_MCALL createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) override; @@ -144,7 +144,7 @@ public: virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState( const ComputePipelineStateDesc& desc, IPipelineState** outState) override; - virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + virtual void copyBuffer( IBufferResource* dst, size_t dstOffset, IBufferResource* src, @@ -155,28 +155,23 @@ public: virtual void* map(IBufferResource* buffer, MapFlavor flavor) override; virtual void unmap(IBufferResource* buffer) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setPrimitiveTopology(PrimitiveTopology topology) override; + virtual void setPrimitiveTopology(PrimitiveTopology topology) override; - virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( + virtual void setVertexBuffers( UInt startSlot, UInt slotCount, IBufferResource* const* buffers, const UInt* strides, const UInt* offsets) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setViewports(UInt count, Viewport const* viewports) override; - virtual SLANG_NO_THROW void SLANG_MCALL - setScissorRects(UInt count, ScissorRect const* rects) override; - virtual SLANG_NO_THROW void SLANG_MCALL setPipelineState(IPipelineState* state) override; - virtual SLANG_NO_THROW void SLANG_MCALL draw(UInt vertexCount, UInt startVertex) override; - virtual void SLANG_MCALL - drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) override; - virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override; - virtual SLANG_NO_THROW void SLANG_MCALL submitGpuWork() override {} - virtual SLANG_NO_THROW void SLANG_MCALL waitForGpu() override {} + virtual void setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) override; + virtual void setViewports(UInt count, Viewport const* viewports) override; + virtual void setScissorRects(UInt count, ScissorRect const* rects) override; + virtual void setPipelineState(IPipelineState* state) override; + virtual void draw(UInt vertexCount, UInt startVertex) override; + virtual void drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) override; + virtual void dispatchCompute(int x, int y, int z) override; + virtual void submitGpuWork() override {} + virtual void waitForGpu() override {} virtual SLANG_NO_THROW const DeviceInfo& SLANG_MCALL getDeviceInfo() const override { return m_info; @@ -2216,8 +2211,7 @@ SLANG_NO_THROW Result SLANG_MCALL GLDevice::initialize(const Desc& desc) return SLANG_OK; } -SLANG_NO_THROW void SLANG_MCALL - GLDevice::clearFrame(uint32_t mask, bool clearDepth, bool clearStencil) +void GLDevice::clearFrame(uint32_t mask, bool clearDepth, bool clearStencil) { uint32_t clearMask = 0; if (clearDepth) @@ -2332,7 +2326,7 @@ SLANG_NO_THROW Result SLANG_MCALL return SLANG_OK; } -SLANG_NO_THROW void SLANG_MCALL GLDevice::setFramebuffer(IFramebuffer* frameBuffer) +void GLDevice::setFramebuffer(IFramebuffer* frameBuffer) { m_currentFramebuffer = static_cast<FramebufferImpl*>(frameBuffer); } @@ -2745,7 +2739,7 @@ void GLDevice::unmap(IBufferResource* bufferIn) glUnmapBuffer(buffer->m_target); } -SLANG_NO_THROW void SLANG_MCALL GLDevice::setPrimitiveTopology(PrimitiveTopology topology) +void GLDevice::setPrimitiveTopology(PrimitiveTopology topology) { GLenum glTopology = 0; switch (topology) @@ -2759,7 +2753,7 @@ SLANG_NO_THROW void SLANG_MCALL GLDevice::setPrimitiveTopology(PrimitiveTopology m_boundPrimitiveTopology = glTopology; } -SLANG_NO_THROW void SLANG_MCALL GLDevice::setVertexBuffers( +void GLDevice::setVertexBuffers( UInt startSlot, UInt slotCount, IBufferResource* const* buffers, @@ -2779,8 +2773,7 @@ SLANG_NO_THROW void SLANG_MCALL GLDevice::setVertexBuffers( } } -SLANG_NO_THROW void SLANG_MCALL - GLDevice::setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) +void GLDevice::setIndexBuffer(IBufferResource* buffer, Format indexFormat, UInt offset) { auto bufferImpl = static_cast<BufferResourceImpl*>(buffer); m_boundIndexBuffer = bufferImpl->m_handle; @@ -2788,7 +2781,7 @@ SLANG_NO_THROW void SLANG_MCALL m_boundIndexBufferSize = bufferImpl->m_size; } -SLANG_NO_THROW void SLANG_MCALL GLDevice::setViewports(UInt count, Viewport const* viewports) +void GLDevice::setViewports(UInt count, Viewport const* viewports) { assert(count == 1); auto viewport = viewports[0]; @@ -2800,7 +2793,7 @@ SLANG_NO_THROW void SLANG_MCALL GLDevice::setViewports(UInt count, Viewport cons glDepthRange(viewport.minZ, viewport.maxZ); } -SLANG_NO_THROW void SLANG_MCALL GLDevice::setScissorRects(UInt count, ScissorRect const* rects) +void GLDevice::setScissorRects(UInt count, ScissorRect const* rects) { assert(count <= 1); if( count ) @@ -2828,7 +2821,7 @@ SLANG_NO_THROW void SLANG_MCALL GLDevice::setScissorRects(UInt count, ScissorRec } } -SLANG_NO_THROW void SLANG_MCALL GLDevice::setPipelineState(IPipelineState* state) +void GLDevice::setPipelineState(IPipelineState* state) { auto pipelineStateImpl = static_cast<PipelineStateImpl*>(state); @@ -2839,15 +2832,14 @@ SLANG_NO_THROW void SLANG_MCALL GLDevice::setPipelineState(IPipelineState* state glUseProgram(programID); } -SLANG_NO_THROW void SLANG_MCALL GLDevice::draw(UInt vertexCount, UInt startVertex = 0) +void GLDevice::draw(UInt vertexCount, UInt startVertex = 0) { flushStateForDraw(); glDrawArrays(m_boundPrimitiveTopology, (GLint)startVertex, (GLsizei)vertexCount); } -SLANG_NO_THROW void SLANG_MCALL - GLDevice::drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) +void GLDevice::drawIndexed(UInt indexCount, UInt startIndex, UInt baseVertex) { flushStateForDraw(); @@ -2859,7 +2851,7 @@ SLANG_NO_THROW void SLANG_MCALL (GLint)baseVertex); } -SLANG_NO_THROW void SLANG_MCALL GLDevice::dispatchCompute(int x, int y, int z) +void GLDevice::dispatchCompute(int x, int y, int z) { glDispatchCompute(x, y, z); } @@ -3005,7 +2997,7 @@ Result GLDevice::createRootShaderObject(IShaderProgram* program, IShaderObject** return SLANG_OK; } -void GLDevice::bindRootShaderObject(PipelineType pipelineType, IShaderObject* shaderObject) +void GLDevice::bindRootShaderObject(IShaderObject* shaderObject) { RootShaderObjectImpl* rootShaderObjectImpl = static_cast<RootShaderObjectImpl*>(shaderObject); RefPtr<PipelineStateBase> specializedPipeline; diff --git a/tools/gfx/renderer-shared.h b/tools/gfx/renderer-shared.h index d1ecebfce..79c965631 100644 --- a/tools/gfx/renderer-shared.h +++ b/tools/gfx/renderer-shared.h @@ -271,6 +271,10 @@ public: // pipeline cannot be used directly and must be specialized first. bool isSpecializable = false; ComPtr<IShaderProgram> m_program; + template <typename TProgram> TProgram* getProgram() + { + return static_cast<TProgram*>(m_program.get()); + } protected: void initializeBase(const PipelineStateDesc& inDesc); diff --git a/tools/gfx/transient-resource-heap-base.h b/tools/gfx/transient-resource-heap-base.h new file mode 100644 index 000000000..2376ab1ac --- /dev/null +++ b/tools/gfx/transient-resource-heap-base.h @@ -0,0 +1,113 @@ +#include "slang-gfx.h" +#include "source/core/slang-basic.h" + +namespace gfx +{ +template <typename TDevice, typename TBufferResource> +class TransientResourceHeapBase + : public ITransientResourceHeap + , public Slang::RefObject +{ +public: + SLANG_REF_OBJECT_IUNKNOWN_ALL + ITransientResourceHeap* getInterface(const Slang::Guid& guid) + { + if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ITransientResourceHeap) + return static_cast<ITransientResourceHeap*>(this); + return nullptr; + } + +public: + TDevice* m_device; + Slang::List<Slang::RefPtr<TBufferResource>> m_constantBuffers; + Slang::Index m_constantBufferAllocCounter = 0; + size_t m_constantBufferOffsetAllocCounter = 0; + uint64_t m_version; + uint64_t getVersion() { return m_version; } + uint64_t& getVersionCounter() + { + static uint64_t version = 1; + return version; + } + + Result init(const ITransientResourceHeap::Desc& desc, TDevice* device) + { + m_device = device; + + if (desc.constantBufferSize) + { + Slang::ComPtr<IBufferResource> bufferPtr; + IBufferResource::Desc bufferDesc; + bufferDesc.type = IResource::Type::Buffer; + bufferDesc.setDefaults(IResource::Usage::ConstantBuffer); + bufferDesc.init(desc.constantBufferSize); + bufferDesc.cpuAccessFlags = IResource::AccessFlag::Write; + SLANG_RETURN_ON_FAIL(m_device->createBufferResource( + IResource::Usage::ConstantBuffer, bufferDesc, nullptr, bufferPtr.writeRef())); + m_constantBuffers.add(static_cast<TBufferResource*>(bufferPtr.get())); + } + + m_version = getVersionCounter(); + getVersionCounter()++; + return SLANG_OK; + } + + Result allocateConstantBuffer( + size_t size, + IBufferResource*& outBufferWeakPtr, + size_t& outOffset) + { + size_t bufferAllocOffset = m_constantBufferOffsetAllocCounter; + Slang::Index bufferId = -1; + // Find first constant buffer from `m_constantBufferAllocCounter` that has enough space + // for this allocation. + for (Slang::Index i = m_constantBufferAllocCounter; i < m_constantBuffers.getCount(); i++) + { + auto cb = m_constantBuffers[i].Ptr(); + if (bufferAllocOffset + size <= cb->getDesc()->sizeInBytes) + { + bufferId = i; + break; + } + bufferAllocOffset = 0; + } + // If we cannot find an existing constant buffer with sufficient free space, + // create a new constant buffer. + if (bufferId == -1) + { + Slang::ComPtr<IBufferResource> bufferPtr; + IBufferResource::Desc bufferDesc; + bufferDesc.type = IResource::Type::Buffer; + bufferDesc.setDefaults(IResource::Usage::ConstantBuffer); + bufferDesc.cpuAccessFlags |= IResource::AccessFlag::Write; + size_t lastConstantBufferSize = 0; + if (m_constantBuffers.getCount()) + { + lastConstantBufferSize = m_constantBuffers.getLast()->getDesc()->sizeInBytes; + } + bufferDesc.init(Slang::Math::Max( + lastConstantBufferSize * 2, Slang::Math::Max(size, size_t(4 << 20)))); + SLANG_RETURN_ON_FAIL(m_device->createBufferResource( + IResource::Usage::ConstantBuffer, bufferDesc, nullptr, bufferPtr.writeRef())); + bufferId = m_constantBuffers.getCount(); + bufferAllocOffset = 0; + m_constantBuffers.add(static_cast<TBufferResource*>(bufferPtr.get())); + } + // Sub allocate from current constant buffer. + outBufferWeakPtr = m_constantBuffers[bufferId].Ptr(); + outOffset = bufferAllocOffset; + m_constantBufferAllocCounter = bufferId; + m_constantBufferOffsetAllocCounter = bufferAllocOffset + size; + return SLANG_OK; + } + + void reset() + { + m_constantBufferAllocCounter = 0; + m_constantBufferOffsetAllocCounter = 0; + m_version = getVersionCounter(); + getVersionCounter()++; + } +}; + +} // namespace gfx diff --git a/tools/gfx/vulkan/render-vk.cpp b/tools/gfx/vulkan/render-vk.cpp index 96a0d1047..32ff0e7a2 100644 --- a/tools/gfx/vulkan/render-vk.cpp +++ b/tools/gfx/vulkan/render-vk.cpp @@ -3,6 +3,7 @@ //WORKING:#include "options.h" #include "../renderer-shared.h" +#include "../transient-resource-heap-base.h" #include "core/slang-basic.h" #include "core/slang-blob.h" @@ -93,8 +94,6 @@ public: ShaderObjectLayoutBase** outLayout) override; virtual Result createShaderObject(ShaderObjectLayoutBase* layout, IShaderObject** outObject) override; - virtual SLANG_NO_THROW Result SLANG_MCALL - createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) override; virtual SLANG_NO_THROW Result SLANG_MCALL createProgram(const IShaderProgram::Desc& desc, IShaderProgram** outProgram) override; @@ -155,7 +154,7 @@ public: }; class InputLayoutImpl : public IInputLayout, public RefObject - { + { public: SLANG_REF_OBJECT_IUNKNOWN_ALL IInputLayout* getInterface(const Guid& guid) @@ -1365,8 +1364,6 @@ public: } VulkanApi* m_api; - RefPtr<ShaderObjectBase> m_rootShaderObject; - void init(CommandBufferImpl* commandBuffer); void endEncodingImpl() @@ -1413,20 +1410,28 @@ public: m_vkPreCommandBuffer, static_cast<BufferResourceImpl*>(buffer), offset, size, data); } - Result bindRootShaderObjectImpl(PipelineType pipelineType, IShaderObject* object); + Result bindRootShaderObjectImpl(VkPipelineBindPoint bindPoint); - void setPipelineStateImpl(IPipelineState* state) + Result setPipelineStateImpl(IPipelineState* state, IShaderObject** outRootObject) { m_currentPipeline = static_cast<PipelineStateImpl*>(state); + SLANG_RETURN_ON_FAIL(m_commandBuffer->m_rootObject.init( + m_commandBuffer->m_renderer, + m_currentPipeline->getProgram<ShaderProgramImpl>()->m_rootObjectLayout)); + *outRootObject = &m_commandBuffer->m_rootObject; + return SLANG_OK; } void flushBindingState(VkPipelineBindPoint pipelineBindPoint) { auto& api = *m_api; + bindRootShaderObjectImpl(pipelineBindPoint); + // Get specialized pipeline state and bind it. // RefPtr<PipelineStateBase> newPipeline; - m_device->maybeSpecializePipeline(m_currentPipeline, m_rootShaderObject, newPipeline); + m_device->maybeSpecializePipeline( + m_currentPipeline, &m_commandBuffer->m_rootObject, newPipeline); PipelineStateImpl* newPipelineImpl = static_cast<PipelineStateImpl*>(newPipeline.Ptr()); auto pipelineBindPointId = getBindPointIndex(pipelineBindPoint); if (m_boundPipelines[pipelineBindPointId] != newPipelineImpl->m_pipeline) @@ -1788,6 +1793,8 @@ public: { m_layout = layout; + m_upToDateConstantBufferHeapVersion = 0; + // If the layout tells us that there is any uniform data, // then we will allocate a CPU memory buffer to hold that data // while it is being set from the host. @@ -2046,7 +2053,9 @@ public: RootBindingState* bindingState, BindingOffset offset, VkDescriptorType descriptorType, - BufferResourceImpl* buffer) + BufferResourceImpl* buffer, + size_t bufferOffset, + size_t bufferSize) { auto descriptorSet = bindingState->descriptorSets[offset.descriptorSetIndexOffset]; VkWriteDescriptorSet write = {}; @@ -2059,11 +2068,21 @@ public: auto& bufferInfo = bindingState->descriptorInfos.reserveRange(1)->bufferInfo; write.pBufferInfo = &bufferInfo; bufferInfo.buffer = buffer->m_buffer.m_buffer; - bufferInfo.offset = 0; - bufferInfo.range = buffer->getDesc()->sizeInBytes; + bufferInfo.offset = bufferOffset; + bufferInfo.range = bufferSize; bindingState->descriptorSetWrites.add(write); } + static void writeBufferDescriptor( + RootBindingState* bindingState, + BindingOffset offset, + VkDescriptorType descriptorType, + BufferResourceImpl* buffer) + { + writeBufferDescriptor( + bindingState, offset, descriptorType, buffer, 0, buffer->getDesc()->sizeInBytes); + } + static void writePlainBufferDescriptor( RootBindingState* bindingState, BindingOffset offset, @@ -2198,8 +2217,11 @@ public: // operations on a shader object once an operation has requested this buffer // be created. We need to enforce that rule if we want to rely on it. // - if (m_ordinaryDataBuffer) + if (m_upToDateConstantBufferHeapVersion == + encoder->m_commandBuffer->m_transientHeap->getVersion()) + { return SLANG_OK; + } // Computing the size of the ordinary data buffer is *not* just as simple // as using the size of the `m_ordinayData` array that we store. The reason @@ -2216,22 +2238,19 @@ public: RefPtr<ShaderObjectLayoutImpl> specializedLayout; SLANG_RETURN_ON_FAIL(_getSpecializedLayout(specializedLayout.writeRef())); - auto specializedOrdinaryDataSize = specializedLayout->getElementTypeLayout()->getSize(); - if (specializedOrdinaryDataSize == 0) + m_constantBufferSize = specializedLayout->getElementTypeLayout()->getSize(); + if (m_constantBufferSize == 0) + { + m_upToDateConstantBufferHeapVersion = + encoder->m_commandBuffer->m_transientHeap->getVersion(); return SLANG_OK; + } // Once we have computed how large the buffer should be, we can allocate - // it using the existing public `IDevice` API. + // it from the transient resource heap. // - IDevice* device = getRenderer(); - IBufferResource::Desc bufferDesc; - bufferDesc.init(specializedOrdinaryDataSize); - bufferDesc.cpuAccessFlags |= IResource::AccessFlag::Write; - SLANG_RETURN_ON_FAIL(device->createBufferResource( - IResource::Usage::ConstantBuffer, - bufferDesc, - nullptr, - m_ordinaryDataBuffer.writeRef())); + SLANG_RETURN_ON_FAIL(encoder->m_commandBuffer->m_transientHeap->allocateConstantBuffer( + m_constantBufferSize, m_constantBuffer, m_constantBufferOffset)); // Once the buffer is allocated, we can use `_writeOrdinaryData` to fill it in. // @@ -2240,7 +2259,16 @@ public: // don't need or want to inline it into this call site. // SLANG_RETURN_ON_FAIL(_writeOrdinaryData( - encoder, m_ordinaryDataBuffer, 0, specializedOrdinaryDataSize, specializedLayout)); + encoder, + m_constantBuffer, + m_constantBufferOffset, + m_constantBufferSize, + specializedLayout)); + + // Update version tracker so that we don't redundantly alloc and fill in + // constant buffers for the same transient heap. + m_upToDateConstantBufferHeapVersion = + encoder->m_commandBuffer->m_transientHeap->getVersion(); return SLANG_OK; } @@ -2264,11 +2292,16 @@ public: // the given `descriptorSet` and update the base range index for // subsequent binding operations to account for it. // - if (m_ordinaryDataBuffer) + if (m_constantBuffer) { - auto bufferImpl = static_cast<BufferResourceImpl*>(m_ordinaryDataBuffer.get()); + auto bufferImpl = static_cast<BufferResourceImpl*>(m_constantBuffer); writeBufferDescriptor( - bindingState, offset, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, bufferImpl); + bindingState, + offset, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + bufferImpl, + m_constantBufferOffset, + m_constantBufferSize); offset.descriptorRangeOffset++; } @@ -2434,11 +2467,15 @@ public: List<RefPtr<ShaderObjectImpl>> m_objects; - /// A constant buffer used to stored ordinary data for this object - /// and existential-type sub-objects. - /// - /// Created on demand with `_createOrdinaryDataBufferIfNeeded()` - ComPtr<IBufferResource> m_ordinaryDataBuffer; + // The version number of the transient resource heap that contains up-to-date + // constant buffer content for this shader object. + uint64_t m_upToDateConstantBufferHeapVersion; + // The transient constant buffer that holds the GPU copy of the constant data, + // weak referenced. + IBufferResource* m_constantBuffer = nullptr; + // The offset into the transient constant buffer where the constant data starts. + size_t m_constantBufferOffset = 0; + size_t m_constantBufferSize = 0; /// Get the layout of this shader object with specialization arguments considered /// @@ -2531,20 +2568,12 @@ public: class RootShaderObjectImpl : public ShaderObjectImpl { typedef ShaderObjectImpl Super; - public: - static Result create( - IDevice* device, - RootShaderObjectLayout* layout, - RootShaderObjectImpl** outShaderObject) - { - RefPtr<RootShaderObjectImpl> object = new RootShaderObjectImpl(); - SLANG_RETURN_ON_FAIL(object->init(device, layout)); - - *outShaderObject = object.detach(); - return SLANG_OK; - } - + // Override default reference counting behavior to disable lifetime management. + // Root objects are managed by command buffer and does not need to be freed by the user. + SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 1; } + SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 1; } + public: RootShaderObjectLayout* getLayout() { return static_cast<RootShaderObjectLayout*>(m_layout.Ptr()); @@ -2596,11 +2625,12 @@ public: return SLANG_OK; } - protected: + public: Result init(IDevice* device, RootShaderObjectLayout* layout) { SLANG_RETURN_ON_FAIL(Super::init(device, layout)); - + m_specializedLayout = nullptr; + m_entryPoints.clear(); for (auto entryPointInfo : layout->getEntryPoints()) { RefPtr<EntryPointShaderObject> entryPoint; @@ -2612,6 +2642,7 @@ public: return SLANG_OK; } + protected: Result _createSpecializedLayout(ShaderObjectLayoutImpl** outLayout) SLANG_OVERRIDE { ExtendedShaderObjectTypeList specializationArgs; @@ -2699,6 +2730,8 @@ public: List<RefPtr<EntryPointShaderObject>> m_entryPoints; }; + class TransientResourceHeapImpl; + class CommandBufferImpl : public ICommandBuffer , public RefObject @@ -2718,8 +2751,9 @@ public: VkCommandPool m_pool; VkFence m_fence; VKDevice* m_renderer; - DescriptorSetAllocator* m_transientDescSetAllocator; + TransientResourceHeapImpl* m_transientHeap; bool m_isPreCommandBufferEmpty = true; + RootShaderObjectImpl m_rootObject; // Command buffers are deallocated by its command pool, // so no need to free individually. ~CommandBufferImpl() = default; @@ -2728,10 +2762,10 @@ public: VKDevice* renderer, VkCommandPool pool, VkFence fence, - DescriptorSetAllocator* transientDescSetAllocator) + TransientResourceHeapImpl* transientHeap) { m_renderer = renderer; - m_transientDescSetAllocator = transientDescSetAllocator; + m_transientHeap = transientHeap; m_pool = pool; m_fence = fence; @@ -2843,16 +2877,10 @@ public: endEncodingImpl(); } - virtual SLANG_NO_THROW void SLANG_MCALL - setPipelineState(IPipelineState* pipelineState) override - { - setPipelineStateImpl(pipelineState); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) override + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* pipelineState, IShaderObject** outRootObject) override { - bindRootShaderObjectImpl(PipelineType::Graphics, object); + return setPipelineStateImpl(pipelineState, outRootObject); } virtual SLANG_NO_THROW void SLANG_MCALL @@ -3070,16 +3098,10 @@ public: endEncodingImpl(); } - virtual SLANG_NO_THROW void SLANG_MCALL - setPipelineState(IPipelineState* pipelineState) override + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* pipelineState, IShaderObject** outRootObject) override { - setPipelineStateImpl(pipelineState); - } - - virtual SLANG_NO_THROW void SLANG_MCALL - bindRootShaderObject(IShaderObject* object) override - { - bindRootShaderObjectImpl(PipelineType::Compute, object); + return setPipelineStateImpl(pipelineState, outRootObject); } virtual SLANG_NO_THROW void SLANG_MCALL dispatchCompute(int x, int y, int z) override @@ -3346,17 +3368,10 @@ public: }; class TransientResourceHeapImpl - : public ITransientResourceHeap - , public RefObject + : public TransientResourceHeapBase<VKDevice, BufferResourceImpl> { - public: - SLANG_REF_OBJECT_IUNKNOWN_ALL - ITransientResourceHeap* getInterface(const Slang::Guid& guid) - { - if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_ITransientResourceHeap) - return static_cast<ITransientResourceHeap*>(this); - return nullptr; - } + private: + typedef TransientResourceHeapBase<VKDevice, BufferResourceImpl> Super; public: VkCommandPool m_commandPool; @@ -3364,8 +3379,6 @@ public: VkFence m_fence; List<RefPtr<CommandBufferImpl>> m_commandBufferPool; uint32_t m_commandBufferAllocId = 0; - RefPtr<BufferResourceImpl> m_constantBuffer; - RefPtr<VKDevice> m_device; Result init(const ITransientResourceHeap::Desc& desc, VKDevice* device); ~TransientResourceHeapImpl() @@ -3798,12 +3811,10 @@ void VKDevice::PipelineCommandEncoder::init(CommandBufferImpl* commandBuffer) } Result VKDevice::PipelineCommandEncoder::bindRootShaderObjectImpl( - PipelineType pipelineType, - IShaderObject* object) + VkPipelineBindPoint bindPoint) { // Obtain specialized root layout. - auto rootObjectImpl = static_cast<RootShaderObjectImpl*>(object); - m_rootShaderObject = rootObjectImpl; + auto rootObjectImpl = &m_commandBuffer->m_rootObject; auto specializedLayout = rootObjectImpl->getSpecializedLayout(); if (!specializedLayout) @@ -3813,7 +3824,7 @@ Result VKDevice::PipelineCommandEncoder::bindRootShaderObjectImpl( bindState.pushConstantRanges = specializedLayout->m_pushConstantRanges.getView(); bindState.pipelineLayout = specializedLayout->m_pipelineLayout; bindState.device = m_device; - bindState.descriptorSetAllocator = m_commandBuffer->m_transientDescSetAllocator; + bindState.descriptorSetAllocator = &m_commandBuffer->m_transientHeap->m_descSetAllocator; // Write bindings into descriptor sets. This step allocate descriptor sets and collects // all `VkWriteDescriptorSet` operations in `bindState.descriptorSetWrites`. @@ -3831,7 +3842,7 @@ Result VKDevice::PipelineCommandEncoder::bindRootShaderObjectImpl( // Bind descriptor sets. m_device->m_api.vkCmdBindDescriptorSets( m_commandBuffer->m_commandBuffer, - VulkanUtil::getPipelineBindPoint(pipelineType), + bindPoint, specializedLayout->m_pipelineLayout, 0, (uint32_t)bindState.descriptorSets.getCount(), @@ -3991,7 +4002,7 @@ Result VKDevice::initVulkanInstanceAndDevice(bool useValidationLayer) VkApplicationInfo applicationInfo = { VK_STRUCTURE_TYPE_APPLICATION_INFO }; applicationInfo.pApplicationName = "slang-render-test"; applicationInfo.pEngineName = "slang-render-test"; - applicationInfo.apiVersion = VK_API_VERSION_1_0; + applicationInfo.apiVersion = VK_API_VERSION_1_1; applicationInfo.engineVersion = 1; applicationInfo.applicationVersion = 1; const char* instanceExtensions[] = @@ -4067,7 +4078,17 @@ Result VKDevice::initVulkanInstanceAndDevice(bool useValidationLayer) instanceCreateInfo.ppEnabledLayerNames = layerNames; } } - SLANG_RETURN_ON_FAIL(m_api.vkCreateInstance(&instanceCreateInfo, nullptr, &instance)); + uint32_t apiVersionsToTry[] = {VK_API_VERSION_1_2, VK_API_VERSION_1_1, VK_API_VERSION_1_0}; + for (auto apiVersion : apiVersionsToTry) + { + applicationInfo.apiVersion = apiVersion; + if (m_api.vkCreateInstance(&instanceCreateInfo, nullptr, &instance) == VK_SUCCESS) + { + break; + } + } + if (!instance) + return SLANG_FAIL; SLANG_RETURN_ON_FAIL(m_api.initInstanceProcs(instance)); if (useValidationLayer) @@ -4163,6 +4184,10 @@ Result VKDevice::initVulkanInstanceAndDevice(bool useValidationLayer) VkPhysicalDeviceTimelineSemaphoreFeatures timelineFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES }; // Extended dynamic state features VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extendedDynamicStateFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT }; + // Subgroup extended type features + VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures shaderSubgroupExtendedTypeFeatures = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES}; + // API version check, can't use vkGetPhysicalDeviceProperties2 yet since this device might not support it if (VK_MAKE_VERSION(majorVersion, minorVersion, 0) >= VK_API_VERSION_1_1 && m_api.vkGetPhysicalDeviceProperties2 && @@ -4172,6 +4197,10 @@ Result VKDevice::initVulkanInstanceAndDevice(bool useValidationLayer) VkPhysicalDeviceFeatures2 deviceFeatures2 = {}; deviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + // Subgroup features + shaderSubgroupExtendedTypeFeatures.pNext = deviceFeatures2.pNext; + deviceFeatures2.pNext = &shaderSubgroupExtendedTypeFeatures; + // Extended dynamic states extendedDynamicStateFeatures.pNext = deviceFeatures2.pNext; deviceFeatures2.pNext = &extendedDynamicStateFeatures; @@ -4248,6 +4277,14 @@ Result VKDevice::initVulkanInstanceAndDevice(bool useValidationLayer) deviceExtensions.add(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); m_features.add("extended-dynamic-states"); } + + if (shaderSubgroupExtendedTypeFeatures.shaderSubgroupExtendedTypes) + { + shaderSubgroupExtendedTypeFeatures.pNext = (void*)deviceCreateInfo.pNext; + deviceCreateInfo.pNext = &shaderSubgroupExtendedTypeFeatures; + deviceExtensions.add(VK_KHR_SHADER_SUBGROUP_EXTENDED_TYPES_EXTENSION_NAME); + m_features.add("shader-subgroup-extended-types"); + } } m_queueFamilyIndex = m_api.findQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT); @@ -4263,7 +4300,7 @@ Result VKDevice::initVulkanInstanceAndDevice(bool useValidationLayer) deviceCreateInfo.enabledExtensionCount = uint32_t(deviceExtensions.getCount()); deviceCreateInfo.ppEnabledExtensionNames = deviceExtensions.getBuffer(); - + if (m_api.vkCreateDevice(m_api.m_physicalDevice, &deviceCreateInfo, nullptr, &m_device) != VK_SUCCESS) return SLANG_FAIL; SLANG_RETURN_ON_FAIL(m_api.initDeviceProcs(m_device)); @@ -4310,7 +4347,8 @@ Result VKDevice::TransientResourceHeapImpl::init( const ITransientResourceHeap::Desc& desc, VKDevice* device) { - m_device = device; + Super::init(desc, device); + m_descSetAllocator.m_api = &device->m_api; VkCommandPoolCreateInfo poolCreateInfo = {}; @@ -4325,6 +4363,7 @@ Result VKDevice::TransientResourceHeapImpl::init( fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; fenceCreateInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; device->m_api.vkCreateFence(device->m_api.m_device, &fenceCreateInfo, nullptr, &m_fence); + return SLANG_OK; } @@ -4341,7 +4380,7 @@ Result VKDevice::TransientResourceHeapImpl::createCommandBuffer(ICommandBuffer** RefPtr<CommandBufferImpl> commandBuffer = new CommandBufferImpl(); SLANG_RETURN_ON_FAIL(commandBuffer->init( - m_device, m_commandPool, m_fence, &m_descSetAllocator)); + m_device, m_commandPool, m_fence, this)); m_commandBufferPool.add(commandBuffer); m_commandBufferAllocId++; *outCmdBuffer = commandBuffer.detach(); @@ -4358,6 +4397,7 @@ Result VKDevice::TransientResourceHeapImpl::synchronizeAndReset() } api.vkResetCommandPool(api.m_device, m_commandPool, 0); m_descSetAllocator.reset(); + Super::reset(); return SLANG_OK; } @@ -5428,17 +5468,6 @@ Result VKDevice::createShaderObject(ShaderObjectLayoutBase* layout, IShaderObjec return SLANG_OK; } -Result SLANG_MCALL - VKDevice::createRootShaderObject(IShaderProgram* program, IShaderObject** outObject) -{ - auto programImpl = dynamic_cast<ShaderProgramImpl*>(program); - RefPtr<RootShaderObjectImpl> shaderObject; - SLANG_RETURN_ON_FAIL(RootShaderObjectImpl::create( - this, programImpl->m_rootObjectLayout, shaderObject.writeRef())); - *outObject = shaderObject.detach(); - return SLANG_OK; -} - Result VKDevice::createGraphicsPipelineState(const GraphicsPipelineStateDesc& inDesc, IPipelineState** outState) { GraphicsPipelineStateDesc desc = inDesc; diff --git a/tools/platform/gui.cpp b/tools/platform/gui.cpp index f78a52ce4..63cf3d51b 100644 --- a/tools/platform/gui.cpp +++ b/tools/platform/gui.cpp @@ -273,7 +273,7 @@ void GUI::endFrame(ITransientResourceHeap* transientHeap, IFramebuffer* framebuf auto renderEncoder = cmdBuf->encodeRenderCommands(renderPass, framebuffer); renderEncoder->setViewportAndScissor(viewport); - renderEncoder->setPipelineState(pipelineState); + renderEncoder->bindPipeline(pipelineState); renderEncoder->setVertexBuffer(0, vertexBuffer, sizeof(ImDrawVert)); renderEncoder->setIndexBuffer( diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp index db51339f1..6e45e5b24 100644 --- a/tools/render-test/render-test-main.cpp +++ b/tools/render-test/render-test-main.cpp @@ -97,8 +97,8 @@ public: void renderFrame(IRenderCommandEncoder* encoder); void finalize(); - void applyBinding(PipelineType pipelineType, ICommandEncoder* encoder); - void setProjectionMatrix(IResourceCommandEncoder* encoder); + Result applyBinding(PipelineType pipelineType, ICommandEncoder* encoder); + void setProjectionMatrix(IShaderObject* rootObject); Result writeBindingOutput(const char* fileName); Result writeScreen(const char* filename); @@ -135,7 +135,6 @@ protected: Options m_options; - ComPtr<IShaderObject> m_programVars; ShaderOutputPlan m_outputPlan; }; @@ -400,8 +399,11 @@ SlangResult _assignVarsFromLayout( return context.assign(rootCursor, layout.rootVal); } -void RenderTestApp::applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) +Result RenderTestApp::applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) { + auto slangReflection = (slang::ProgramLayout*)spGetReflection( + m_compilationOutput.output.getRequestForReflection()); + switch (pipelineType) { case PipelineType::Compute: @@ -409,7 +411,9 @@ void RenderTestApp::applyBinding(PipelineType pipelineType, ICommandEncoder* enc ComPtr<IComputeCommandEncoder> computeEncoder; encoder->queryInterface( SLANG_UUID_IComputeCommandEncoder, (void**)computeEncoder.writeRef()); - computeEncoder->bindRootShaderObject(m_programVars); + auto rootObject = computeEncoder->bindPipeline(m_pipelineState); + SLANG_RETURN_ON_FAIL(_assignVarsFromLayout( + m_device, rootObject, m_compilationOutput.layout, m_outputPlan, slangReflection)); } break; case PipelineType::Graphics: @@ -417,12 +421,16 @@ void RenderTestApp::applyBinding(PipelineType pipelineType, ICommandEncoder* enc ComPtr<IRenderCommandEncoder> renderEncoder; encoder->queryInterface( SLANG_UUID_IRenderCommandEncoder, (void**)renderEncoder.writeRef()); - renderEncoder->bindRootShaderObject(m_programVars); + auto rootObject = renderEncoder->bindPipeline(m_pipelineState); + SLANG_RETURN_ON_FAIL(_assignVarsFromLayout( + m_device, rootObject, m_compilationOutput.layout, m_outputPlan, slangReflection)); + setProjectionMatrix(rootObject); } break; default: throw "unknown pipeline type"; } + return SLANG_OK; } SlangResult RenderTestApp::initialize( @@ -443,22 +451,6 @@ SlangResult RenderTestApp::initialize( SLANG_RETURN_ON_FAIL( device->createProgram(m_compilationOutput.output.desc, m_shaderProgram.writeRef())); - // If we are doing a non-pass-through compilation, then we will rely on - // Slang's reflection API to tell us what the parameters of the program are. - // - auto slangReflection = (slang::ProgramLayout*) spGetReflection(m_compilationOutput.output.getRequestForReflection()); - - // Once we have determined the layout of all the parameters we need to bind, - // we will create a shader object to use for storing and binding those parameters. - // - m_programVars = device->createRootShaderObject(m_shaderProgram); - - // Now we need to assign from the input parameter data that was parsed into - // the program vars we allocated. - // - SLANG_RETURN_ON_FAIL(_assignVarsFromLayout( - device, m_programVars, m_compilationOutput.layout, m_outputPlan, slangReflection)); - m_device = device; _initializeRenderPass(); @@ -616,11 +608,10 @@ void RenderTestApp::_initializeRenderPass() m_device->createRenderPassLayout(renderPassDesc, m_renderPass.writeRef()); } -void RenderTestApp::setProjectionMatrix(IResourceCommandEncoder* encoder) +void RenderTestApp::setProjectionMatrix(IShaderObject* rootObject) { - SLANG_UNUSED(encoder); auto info = m_device->getDeviceInfo(); - ShaderCursor(m_programVars) + ShaderCursor(rootObject) .getField("Uniforms") .getDereferenced() .setData(info.identityProjectionMatrix, sizeof(float) * 16); @@ -630,8 +621,6 @@ void RenderTestApp::renderFrame(IRenderCommandEncoder* encoder) { auto pipelineType = PipelineType::Graphics; - encoder->setPipelineState(m_pipelineState); - encoder->setPrimitiveTopology(PrimitiveTopology::TriangleList); encoder->setVertexBuffer(0, m_vertexBuffer, sizeof(Vertex)); @@ -643,7 +632,6 @@ void RenderTestApp::renderFrame(IRenderCommandEncoder* encoder) void RenderTestApp::runCompute(IComputeCommandEncoder* encoder) { auto pipelineType = PipelineType::Compute; - encoder->setPipelineState(m_pipelineState); applyBinding(pipelineType, encoder); encoder->dispatchCompute( m_options.computeDispatchSize[0], @@ -653,7 +641,6 @@ void RenderTestApp::runCompute(IComputeCommandEncoder* encoder) void RenderTestApp::finalize() { - m_programVars = nullptr; m_inputLayout = nullptr; m_vertexBuffer = nullptr; m_shaderProgram = nullptr; @@ -764,10 +751,6 @@ Result RenderTestApp::update() } else { - auto resEncoder = commandBuffer->encodeResourceCommands(); - setProjectionMatrix(resEncoder); - resEncoder->endEncoding(); - auto encoder = commandBuffer->encodeRenderCommands(m_renderPass, m_framebuffer); gfx::Viewport viewport = {}; viewport.maxZ = 1.0f; |
