diff options
Diffstat (limited to 'tools/gfx/vulkan/render-vk.cpp')
| -rw-r--r-- | tools/gfx/vulkan/render-vk.cpp | 657 |
1 files changed, 557 insertions, 100 deletions
diff --git a/tools/gfx/vulkan/render-vk.cpp b/tools/gfx/vulkan/render-vk.cpp index 97a527ca6..10cc7aae5 100644 --- a/tools/gfx/vulkan/render-vk.cpp +++ b/tools/gfx/vulkan/render-vk.cpp @@ -120,6 +120,15 @@ public: size_t offset, size_t size, ISlangBlob** outBlob) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL getAccelerationStructurePrebuildInfo( + const IAccelerationStructure::BuildInputs& buildInputs, + IAccelerationStructure::PrebuildInfo* outPrebuildInfo) override; + + virtual SLANG_NO_THROW Result SLANG_MCALL createAccelerationStructure( + const IAccelerationStructure::CreateDesc& desc, + IAccelerationStructure** outView) override; + void waitForGpu(); virtual SLANG_NO_THROW const DeviceInfo& SLANG_MCALL getDeviceInfo() const override { @@ -128,6 +137,40 @@ public: /// Dtor ~VKDevice(); +public: + // Float16 features + VkPhysicalDeviceFloat16Int8FeaturesKHR float16Features = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR}; + // 16 bit storage features + VkPhysicalDevice16BitStorageFeatures storage16BitFeatures = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR}; + // AtomicInt64 features + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomicInt64Features = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR}; + // Atomic Float features + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomicFloatFeatures = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT}; + // Timeline Semaphore features + VkPhysicalDeviceTimelineSemaphoreFeatures timelineFeatures = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES}; + // Extended dynamic state features + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extendedDynamicStateFeatures = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT}; + // Subgroup extended type features + VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures shaderSubgroupExtendedTypeFeatures = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES}; + // Acceleration structure features + VkPhysicalDeviceAccelerationStructureFeaturesKHR accelerationStructureFeatures = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR}; + // Ray query (inline ray-tracing) features + VkPhysicalDeviceRayQueryFeaturesKHR rayQueryFeatures = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR}; + // Buffer device address features + VkPhysicalDeviceBufferDeviceAddressFeatures bufferDeviceAddressFeatures = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES}; + +public: + class Buffer { public: @@ -303,6 +346,28 @@ public: VkDeviceSize size; }; + class AccelerationStructureImpl : public AccelerationStructureBase + { + public: + VkAccelerationStructureKHR m_vkHandle = VK_NULL_HANDLE; + RefPtr<BufferResourceImpl> m_buffer; + VkDeviceSize m_offset; + VkDeviceSize m_size; + RefPtr<VKDevice> m_device; + public: + virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() override + { + return m_buffer->getDeviceAddress() + m_offset; + } + ~AccelerationStructureImpl() + { + if (m_device) + { + m_device->m_api.vkDestroyAccelerationStructureKHR(m_device->m_api.m_device, m_vkHandle, nullptr); + } + } + }; + class FramebufferLayoutImpl : public FramebufferLayoutBase { public: @@ -2101,7 +2166,6 @@ public: class PipelineCommandEncoder : public RefObject { public: - bool m_isOpen = false; CommandBufferImpl* m_commandBuffer; VkCommandBuffer m_vkCommandBuffer; VkCommandBuffer m_vkPreCommandBuffer = VK_NULL_HANDLE; @@ -2130,7 +2194,6 @@ public: void endEncodingImpl() { - m_isOpen = false; for (auto& pipeline : m_boundPipelines) pipeline = VK_NULL_HANDLE; } @@ -2668,6 +2731,36 @@ public: } } + static void writeAccelerationStructureDescriptor( + RootBindingContext& context, + BindingOffset const& offset, + VkDescriptorType descriptorType, + ArrayView<RefPtr<ResourceViewImpl>> resourceViews) + { + auto descriptorSet = context.descriptorSets[offset.bindingSet]; + + Index count = resourceViews.getCount(); + for (Index i = 0; i < count; ++i) + { + auto accelerationStructure = static_cast<AccelerationStructureImpl*>( + static_cast<IResourceView*>(resourceViews[i].Ptr())); + + VkWriteDescriptorSetAccelerationStructureKHR writeAS = {}; + writeAS.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; + writeAS.accelerationStructureCount = 1; + writeAS.pAccelerationStructures = &accelerationStructure->m_vkHandle; + VkWriteDescriptorSet write = {}; + write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write.descriptorCount = 1; + write.descriptorType = descriptorType; + write.dstArrayElement = uint32_t(i); + write.dstBinding = offset.binding; + write.dstSet = descriptorSet; + write.pNext = &writeAS; + writeDescriptor(context, write); + } + } + static void writeTextureDescriptor( RootBindingContext& context, BindingOffset const& offset, @@ -2881,7 +2974,15 @@ public: VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, m_resourceViews.getArrayView(baseIndex, count)); break; - + case slang::BindingType::RayTracingAccelerationStructure: + rangeOffset.bindingSet += bindingRangeInfo.setOffset; + rangeOffset.binding += bindingRangeInfo.bindingOffset; + writeAccelerationStructureDescriptor( + context, + rangeOffset, + VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, + m_resourceViews.getArrayView(baseIndex, count)); + break; case slang::BindingType::VaryingInput: case slang::BindingType::VaryingOutput: break; @@ -3473,7 +3574,6 @@ public: VkCommandBuffer m_commandBuffer; VkCommandBuffer m_preCommandBuffer = VK_NULL_HANDLE; VkCommandPool m_pool; - VkFence m_fence; VKDevice* m_renderer; BreakableReference<TransientResourceHeapImpl> m_transientHeap; bool m_isPreCommandBufferEmpty = true; @@ -3485,13 +3585,11 @@ public: Result init( VKDevice* renderer, VkCommandPool pool, - VkFence fence, TransientResourceHeapImpl* transientHeap) { m_renderer = renderer; m_transientHeap = transientHeap; m_pool = pool; - m_fence = fence; auto& api = renderer->m_api; VkCommandBufferAllocateInfo allocInfo = {}; @@ -3576,21 +3674,6 @@ public: VkIndexType m_boundIndexFormat; public: - virtual SLANG_NO_THROW SlangResult SLANG_MCALL - queryInterface(SlangUUID const& uuid, void** outObject) override - { - if (uuid == GfxGUID::IID_ISlangUnknown || uuid == GfxGUID::IID_ICommandEncoder || - uuid == GfxGUID::IID_IRenderCommandEncoder) - { - *outObject = static_cast<IRenderCommandEncoder*>(this); - return SLANG_OK; - } - *outObject = nullptr; - return SLANG_E_NO_INTERFACE; - } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 1; } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 1; } - void beginPass(IRenderPassLayout* renderPass, IFramebuffer* framebuffer) { FramebufferImpl* framebufferImpl = static_cast<FramebufferImpl*>(framebuffer); @@ -3610,7 +3693,6 @@ public: beginInfo.pClearValues = framebufferImpl->m_clearValues; auto& api = *m_api; api.vkCmdBeginRenderPass(m_vkCommandBuffer, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); - m_isOpen = true; } virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override @@ -3825,7 +3907,6 @@ public: m_renderCommandEncoder = new RenderCommandEncoder(); m_renderCommandEncoder->init(this); } - assert(!m_renderCommandEncoder->m_isOpen); m_renderCommandEncoder->beginPass(renderPass, framebuffer); *outEncoder = m_renderCommandEncoder.Ptr(); } @@ -3835,21 +3916,6 @@ public: , public PipelineCommandEncoder { public: - virtual SLANG_NO_THROW SlangResult SLANG_MCALL - queryInterface(SlangUUID const& uuid, void** outObject) override - { - if (uuid == GfxGUID::IID_ISlangUnknown || uuid == GfxGUID::IID_ICommandEncoder || - uuid == GfxGUID::IID_IComputeCommandEncoder) - { - *outObject = static_cast<IComputeCommandEncoder*>(this); - return SLANG_OK; - } - *outObject = nullptr; - return SLANG_E_NO_INTERFACE; - } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 1; } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 1; } - public: virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override { endEncodingImpl(); @@ -3893,7 +3959,6 @@ public: m_computeCommandEncoder = new ComputeCommandEncoder(); m_computeCommandEncoder->init(this); } - assert(!m_computeCommandEncoder->m_isOpen); *outEncoder = m_computeCommandEncoder.Ptr(); } @@ -3904,21 +3969,6 @@ public: public: CommandBufferImpl* m_commandBuffer; public: - virtual SLANG_NO_THROW SlangResult SLANG_MCALL - queryInterface(SlangUUID const& uuid, void** outObject) override - { - if (uuid == GfxGUID::IID_ISlangUnknown || uuid == GfxGUID::IID_ICommandEncoder || - uuid == GfxGUID::IID_IResourceCommandEncoder) - { - *outObject = static_cast<IResourceCommandEncoder*>(this); - return SLANG_OK; - } - *outObject = nullptr; - return SLANG_E_NO_INTERFACE; - } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 1; } - virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 1; } - public: virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( IBufferResource* dst, size_t dstOffset, @@ -4006,6 +4056,264 @@ public: *outEncoder = m_resourceCommandEncoder.Ptr(); } + class RayTracingCommandEncoder + : public IRayTracingCommandEncoder + , public RefObject + { + public: + CommandBufferImpl* m_commandBuffer; + + public: + void init(CommandBufferImpl* commandBuffer) { m_commandBuffer = commandBuffer; } + + inline VkAccessFlags translateAccelerationStructureAccessFlag(AccessFlag::Enum access) + { + VkAccessFlags result = 0; + if (access & AccessFlag::Read) + result |= VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT; + if (access & AccessFlag::Write) + result |= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR; + return result; + } + + inline void _memoryBarrier( + int count, + IAccelerationStructure* const* structures, + AccessFlag::Enum srcAccess, + AccessFlag::Enum destAccess) + { + ShortList<VkBufferMemoryBarrier> memBarriers; + memBarriers.setCount(count); + for (int i = 0; i < count; i++) + { + memBarriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + memBarriers[i].pNext = nullptr; + memBarriers[i].dstAccessMask = + translateAccelerationStructureAccessFlag(destAccess); + memBarriers[i].srcAccessMask = + translateAccelerationStructureAccessFlag(srcAccess); + memBarriers[i].srcQueueFamilyIndex = + m_commandBuffer->m_renderer->m_queueFamilyIndex; + memBarriers[i].dstQueueFamilyIndex = + m_commandBuffer->m_renderer->m_queueFamilyIndex; + + auto asImpl = static_cast<AccelerationStructureImpl*>(structures[i]); + memBarriers[i].buffer = asImpl->m_buffer->m_buffer.m_buffer; + memBarriers[i].offset = asImpl->m_offset; + memBarriers[i].size = asImpl->m_size; + } + m_commandBuffer->m_renderer->m_api.vkCmdPipelineBarrier( + m_commandBuffer->m_commandBuffer, + VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, + 0, + nullptr, + (uint32_t)memBarriers.getCount(), + memBarriers.getArrayView().getBuffer(), + 0, + nullptr); + } + + inline void _queryAccelerationStructureProperties( + int accelerationStructureCount, + IAccelerationStructure* const* accelerationStructures, + int queryCount, + AccelerationStructureQueryDesc* queryDescs) + { + ShortList<VkAccelerationStructureKHR> vkHandles; + vkHandles.setCount(accelerationStructureCount); + for (int i = 0; i < accelerationStructureCount; i++) + { + vkHandles[i] = + static_cast<AccelerationStructureImpl*>(accelerationStructures[i]) + ->m_vkHandle; + } + auto vkHandlesView = vkHandles.getArrayView(); + for (int i = 0; i < queryCount; i++) + { + VkQueryType queryType; + switch (queryDescs[i].queryType) + { + case QueryType::AccelerationStructureCompactedSize: + queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR; + break; + case QueryType::AccelerationStructureSerializedSize: + queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR; + break; + default: + getDebugCallback()->handleMessage(DebugMessageType::Error, DebugMessageSource::Layer, + "Invalid query type for use in queryAccelerationStructureProperties."); + return; + } + auto queryPool = static_cast<QueryPoolImpl*>(queryDescs[i].queryPool)->m_pool; + m_commandBuffer->m_renderer->m_api.vkCmdResetQueryPool( + m_commandBuffer->m_commandBuffer, + queryPool, + (uint32_t)queryDescs[i].firstQueryIndex, + 1); + m_commandBuffer->m_renderer->m_api + .vkCmdWriteAccelerationStructuresPropertiesKHR( + m_commandBuffer->m_commandBuffer, + accelerationStructureCount, + vkHandlesView.getBuffer(), + queryType, + queryPool, + queryDescs[i].firstQueryIndex); + } + } + + virtual SLANG_NO_THROW void SLANG_MCALL buildAccelerationStructure( + const IAccelerationStructure::BuildDesc& desc, + int propertyQueryCount, + AccelerationStructureQueryDesc* queryDescs) override + { + AccelerationStructureBuildGeometryInfoBuilder geomInfoBuilder; + if (geomInfoBuilder.build(desc.inputs, getDebugCallback()) != SLANG_OK) + return; + + if (desc.dest) + { + geomInfoBuilder.buildInfo.dstAccelerationStructure = + static_cast<AccelerationStructureImpl*>(desc.dest)->m_vkHandle; + } + if (desc.source) + { + geomInfoBuilder.buildInfo.srcAccelerationStructure = + static_cast<AccelerationStructureImpl*>(desc.source)->m_vkHandle; + } + geomInfoBuilder.buildInfo.scratchData.deviceAddress = desc.scratchData; + + List<VkAccelerationStructureBuildRangeInfoKHR> rangeInfos; + rangeInfos.setCount(geomInfoBuilder.primitiveCounts.getCount()); + for (Index i = 0; i < geomInfoBuilder.primitiveCounts.getCount(); i++) + { + auto& rangeInfo = rangeInfos[i]; + rangeInfo.primitiveCount = geomInfoBuilder.primitiveCounts[i]; + rangeInfo.firstVertex = 0; + rangeInfo.primitiveOffset = 0; + rangeInfo.transformOffset = 0; + } + + auto rangeInfoPtr = rangeInfos.getBuffer(); + m_commandBuffer->m_renderer->m_api.vkCmdBuildAccelerationStructuresKHR( + m_commandBuffer->m_commandBuffer, 1, &geomInfoBuilder.buildInfo, &rangeInfoPtr); + + if (propertyQueryCount) + { + _memoryBarrier(1, &desc.dest, AccessFlag::Write, AccessFlag::Read); + _queryAccelerationStructureProperties( + 1, &desc.dest, propertyQueryCount, queryDescs); + } + } + + virtual SLANG_NO_THROW void SLANG_MCALL copyAccelerationStructure( + IAccelerationStructure* dest, + IAccelerationStructure* src, + AccelerationStructureCopyMode mode) override + { + VkCopyAccelerationStructureInfoKHR copyInfo = { + VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR}; + copyInfo.src = static_cast<AccelerationStructureImpl*>(src)->m_vkHandle; + copyInfo.dst = static_cast<AccelerationStructureImpl*>(dest)->m_vkHandle; + switch (mode) + { + case AccelerationStructureCopyMode::Clone: + copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR; + break; + case AccelerationStructureCopyMode::Compact: + copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR; + break; + default: + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "Unsupported AccelerationStructureCopyMode."); + return; + } + m_commandBuffer->m_renderer->m_api.vkCmdCopyAccelerationStructureKHR( + m_commandBuffer->m_commandBuffer, ©Info); + } + + virtual SLANG_NO_THROW void SLANG_MCALL queryAccelerationStructureProperties( + int accelerationStructureCount, + IAccelerationStructure* const* accelerationStructures, + int queryCount, + AccelerationStructureQueryDesc* queryDescs) override + { + _queryAccelerationStructureProperties( + accelerationStructureCount, accelerationStructures, queryCount, queryDescs); + } + + virtual SLANG_NO_THROW void SLANG_MCALL serializeAccelerationStructure( + DeviceAddress dest, + IAccelerationStructure* source) override + { + VkCopyAccelerationStructureToMemoryInfoKHR copyInfo = { + VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR}; + copyInfo.src = static_cast<AccelerationStructureImpl*>(source)->m_vkHandle; + copyInfo.dst.deviceAddress = dest; + copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR; + m_commandBuffer->m_renderer->m_api.vkCmdCopyAccelerationStructureToMemoryKHR( + m_commandBuffer->m_commandBuffer, ©Info); + } + + virtual SLANG_NO_THROW void SLANG_MCALL deserializeAccelerationStructure( + IAccelerationStructure* dest, + DeviceAddress source) override + { + VkCopyMemoryToAccelerationStructureInfoKHR copyInfo = { + VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR}; + copyInfo.src.deviceAddress = source; + copyInfo.dst = static_cast<AccelerationStructureImpl*>(dest)->m_vkHandle; + copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR; + m_commandBuffer->m_renderer->m_api.vkCmdCopyMemoryToAccelerationStructureKHR( + m_commandBuffer->m_commandBuffer, ©Info); + } + + virtual SLANG_NO_THROW void memoryBarrier( + int count, + IAccelerationStructure* const* structures, + AccessFlag::Enum srcAccess, + AccessFlag::Enum destAccess) override + { + _memoryBarrier(count, structures, srcAccess, destAccess); + } + + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override + { + } + + virtual SLANG_NO_THROW void SLANG_MCALL + writeTimestamp(IQueryPool* queryPool, SlangInt index) override + { + _writeTimestamp( + &m_commandBuffer->m_renderer->m_api, + m_commandBuffer->m_commandBuffer, + queryPool, + index); + } + }; + + RefPtr<RayTracingCommandEncoder> m_rayTracingCommandEncoder; + + virtual SLANG_NO_THROW void SLANG_MCALL + encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) override + { + if (!m_rayTracingCommandEncoder) + { + if (m_renderer->m_api.vkCmdBuildAccelerationStructuresKHR) + { + m_rayTracingCommandEncoder = new RayTracingCommandEncoder(); + m_rayTracingCommandEncoder->init(this); + } + } + *outEncoder = m_rayTracingCommandEncoder.Ptr(); + } + virtual SLANG_NO_THROW void SLANG_MCALL close() override { auto& vkAPI = m_renderer->m_api; @@ -4052,9 +4360,9 @@ public: RefPtr<VKDevice> m_renderer; VkQueue m_queue; uint32_t m_queueFamilyIndex; - VkSemaphore m_pendingWaitSemaphore = VK_NULL_HANDLE; + VkSemaphore m_pendingWaitSemaphores[2] = {VK_NULL_HANDLE, VK_NULL_HANDLE}; List<VkCommandBuffer> m_submitCommandBuffers; - static const int kSemaphoreCount = 2; + static const int kSemaphoreCount = 32; uint32_t m_currentSemaphoreIndex; VkSemaphore m_semaphores[kSemaphoreCount]; ~CommandQueueImpl() @@ -4114,26 +4422,37 @@ public: auto vkCmdBuf = cmdBufImpl->m_commandBuffer; m_submitCommandBuffers.add(vkCmdBuf); } - VkSemaphore waitSemaphore = m_pendingWaitSemaphore; VkSemaphore signalSemaphore = m_semaphores[m_currentSemaphoreIndex]; VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - VkPipelineStageFlags stageFlag = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - submitInfo.pWaitDstStageMask = &stageFlag; + VkPipelineStageFlags stageFlag[] = { + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT}; + submitInfo.pWaitDstStageMask = stageFlag; submitInfo.commandBufferCount = (uint32_t)m_submitCommandBuffers.getCount(); submitInfo.pCommandBuffers = m_submitCommandBuffers.getBuffer(); - if (m_pendingWaitSemaphore != VK_NULL_HANDLE) + Array<VkSemaphore, 2> waitSemaphores; + for (auto s : m_pendingWaitSemaphores) + { + if (s != VK_NULL_HANDLE) + { + waitSemaphores.add(s); + } + } + submitInfo.waitSemaphoreCount = (uint32_t)waitSemaphores.getCount(); + if (submitInfo.waitSemaphoreCount) { - submitInfo.waitSemaphoreCount = 1; - submitInfo.pWaitSemaphores = &waitSemaphore; + submitInfo.pWaitSemaphores = waitSemaphores.getBuffer(); } submitInfo.signalSemaphoreCount = 1; submitInfo.pSignalSemaphores = &signalSemaphore; - auto fence = static_cast<CommandBufferImpl*>(commandBuffers[0])->m_fence; + auto commandBufferImpl = static_cast<CommandBufferImpl*>(commandBuffers[0]); + auto fence = commandBufferImpl->m_transientHeap->getCurrentFence(); vkAPI.vkResetFences(vkAPI.m_device, 1, &fence); vkAPI.vkQueueSubmit(m_queue, 1, &submitInfo, fence); - m_pendingWaitSemaphore = signalSemaphore; + m_pendingWaitSemaphores[0] = signalSemaphore; + m_pendingWaitSemaphores[1] = VK_NULL_HANDLE; + commandBufferImpl->m_transientHeap->advanceFence(); m_currentSemaphoreIndex++; m_currentSemaphoreIndex = m_currentSemaphoreIndex % kSemaphoreCount; @@ -4149,16 +4468,37 @@ public: public: VkCommandPool m_commandPool; DescriptorSetAllocator m_descSetAllocator; - VkFence m_fence; + List<VkFence> m_fences; + Index m_fenceIndex = -1; List<RefPtr<CommandBufferImpl>> m_commandBufferPool; uint32_t m_commandBufferAllocId = 0; + VkFence getCurrentFence() + { + return m_fences[m_fenceIndex]; + } + void advanceFence() + { + m_fenceIndex++; + if (m_fenceIndex >= m_fences.getCount()) + { + m_fences.setCount(m_fenceIndex + 1); + VkFenceCreateInfo fenceCreateInfo = {}; + fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceCreateInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; + m_device->m_api.vkCreateFence( + m_device->m_api.m_device, &fenceCreateInfo, nullptr, &m_fences[m_fenceIndex]); + } + } Result init(const ITransientResourceHeap::Desc& desc, VKDevice* device); ~TransientResourceHeapImpl() { m_commandBufferPool = decltype(m_commandBufferPool)(); m_device->m_api.vkDestroyCommandPool(m_device->m_api.m_device, m_commandPool, nullptr); - m_device->m_api.vkDestroyFence(m_device->m_api.m_device, m_fence, nullptr); + for (auto fence : m_fences) + { + m_device->m_api.vkDestroyFence(m_device->m_api.m_device, fence, nullptr); + } m_descSetAllocator.close(); } public: @@ -4191,6 +4531,12 @@ public: case QueryType::Timestamp: createInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; break; + case QueryType::AccelerationStructureCompactedSize: + createInfo.queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR; + break; + case QueryType::AccelerationStructureSerializedSize: + createInfo.queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR; + break; default: return SLANG_E_INVALID_ARG; } @@ -4554,13 +4900,22 @@ public: presentInfo.swapchainCount = 1; presentInfo.pSwapchains = &m_swapChain; presentInfo.pImageIndices = swapChainIndices; - if (m_queue->m_pendingWaitSemaphore != VK_NULL_HANDLE) + Array<VkSemaphore, 2> waitSemaphores; + for (auto s : m_queue->m_pendingWaitSemaphores) { - presentInfo.waitSemaphoreCount = 1; - presentInfo.pWaitSemaphores = &m_queue->m_pendingWaitSemaphore; + if (s != VK_NULL_HANDLE) + { + waitSemaphores.add(s); + } + } + presentInfo.waitSemaphoreCount = (uint32_t)waitSemaphores.getCount(); + if (presentInfo.waitSemaphoreCount) + { + presentInfo.pWaitSemaphores = waitSemaphores.getBuffer(); } m_api->vkQueuePresentKHR(m_queue->m_queue, &presentInfo); - m_queue->m_pendingWaitSemaphore = VK_NULL_HANDLE; + m_queue->m_pendingWaitSemaphores[0] = VK_NULL_HANDLE; + m_queue->m_pendingWaitSemaphores[1] = VK_NULL_HANDLE; return SLANG_OK; } virtual SLANG_NO_THROW int SLANG_MCALL acquireNextImage() override @@ -4584,7 +4939,7 @@ public: return m_currentImageIndex; } // Make the queue's next submit wait on `m_nextImageSemaphore`. - m_queue->m_pendingWaitSemaphore = m_nextImageSemaphore; + m_queue->m_pendingWaitSemaphores[1] = m_nextImageSemaphore; return m_currentImageIndex; } }; @@ -4730,7 +5085,6 @@ Result VKDevice::Buffer::init(const VulkanApi& api, size_t bufferSize, VkBufferU VkBufferCreateInfo bufferCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; bufferCreateInfo.size = bufferSize; bufferCreateInfo.usage = usage; - SLANG_VK_CHECK(api.vkCreateBuffer(api.m_device, &bufferCreateInfo, nullptr, &m_buffer)); VkMemoryRequirements memoryReqs = {}; @@ -4744,7 +5098,14 @@ Result VKDevice::Buffer::init(const VulkanApi& api, size_t bufferSize, VkBufferU VkMemoryAllocateInfo allocateInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; allocateInfo.allocationSize = memoryReqs.size; allocateInfo.memoryTypeIndex = memoryTypeIndex; - + VkMemoryAllocateFlagsInfo flagInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO}; + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) + { + flagInfo.deviceMask = 1; + flagInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; + allocateInfo.pNext = &flagInfo; + } + SLANG_VK_CHECK(api.vkAllocateMemory(api.m_device, &allocateInfo, nullptr, &m_memory)); SLANG_VK_CHECK(api.vkBindBufferMemory(api.m_device, m_buffer, m_memory, 0)); @@ -5042,24 +5403,6 @@ Result VKDevice::initVulkanInstanceAndDevice(bool useValidationLayer) const uint32_t majorVersion = VK_VERSION_MAJOR(basicProps.apiVersion); const uint32_t minorVersion = VK_VERSION_MINOR(basicProps.apiVersion); - // Need in this scope because it will be linked into the device creation (if it is available) - - // Float16 features - VkPhysicalDeviceFloat16Int8FeaturesKHR float16Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR }; - // 16 bit storage features - VkPhysicalDevice16BitStorageFeatures storage16BitFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR }; - // AtomicInt64 features - VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomicInt64Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR }; - // Atomic Float features - VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomicFloatFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT }; - // Timeline Semaphore features - VkPhysicalDeviceTimelineSemaphoreFeatures timelineFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES }; - // Extended dynamic state features - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extendedDynamicStateFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT }; - // Subgroup extended type features - VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures shaderSubgroupExtendedTypeFeatures = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES}; - // API version check, can't use vkGetPhysicalDeviceProperties2 yet since this device might not support it if (VK_MAKE_VERSION(majorVersion, minorVersion, 0) >= VK_API_VERSION_1_1 && m_api.vkGetPhysicalDeviceProperties2 && @@ -5069,6 +5412,18 @@ Result VKDevice::initVulkanInstanceAndDevice(bool useValidationLayer) VkPhysicalDeviceFeatures2 deviceFeatures2 = {}; deviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + // Buffer device address features + bufferDeviceAddressFeatures.pNext = deviceFeatures2.pNext; + deviceFeatures2.pNext = &bufferDeviceAddressFeatures; + + // Ray query features + rayQueryFeatures.pNext = deviceFeatures2.pNext; + deviceFeatures2.pNext = &rayQueryFeatures; + + // Acceleration structure features + accelerationStructureFeatures.pNext = deviceFeatures2.pNext; + deviceFeatures2.pNext = &accelerationStructureFeatures; + // Subgroup features shaderSubgroupExtendedTypeFeatures.pNext = deviceFeatures2.pNext; deviceFeatures2.pNext = &shaderSubgroupExtendedTypeFeatures; @@ -5174,6 +5529,31 @@ Result VKDevice::initVulkanInstanceAndDevice(bool useValidationLayer) deviceExtensions.add(VK_KHR_SHADER_SUBGROUP_EXTENDED_TYPES_EXTENSION_NAME); m_features.add("shader-subgroup-extended-types"); } + + if (accelerationStructureFeatures.accelerationStructure) + { + accelerationStructureFeatures.pNext = (void*)deviceCreateInfo.pNext; + deviceCreateInfo.pNext = &accelerationStructureFeatures; + deviceExtensions.add(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); + deviceExtensions.add(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME); + m_features.add("acceleration-structure"); + } + + if (rayQueryFeatures.rayQuery) + { + rayQueryFeatures.pNext = (void*)deviceCreateInfo.pNext; + deviceCreateInfo.pNext = &rayQueryFeatures; + deviceExtensions.add(VK_KHR_RAY_QUERY_EXTENSION_NAME); + m_features.add("ray-query"); + } + + if (bufferDeviceAddressFeatures.bufferDeviceAddress) + { + bufferDeviceAddressFeatures.pNext = (void*)deviceCreateInfo.pNext; + deviceCreateInfo.pNext = &bufferDeviceAddressFeatures; + deviceExtensions.add(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME); + m_features.add("buffer-device-address"); + } } m_queueFamilyIndex = m_api.findQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT); @@ -5255,11 +5635,7 @@ Result VKDevice::TransientResourceHeapImpl::init( device->m_api.vkCreateCommandPool( device->m_api.m_device, &poolCreateInfo, nullptr, &m_commandPool); - VkFenceCreateInfo fenceCreateInfo = {}; - fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fenceCreateInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; - device->m_api.vkCreateFence(device->m_api.m_device, &fenceCreateInfo, nullptr, &m_fence); - + advanceFence(); return SLANG_OK; } @@ -5276,7 +5652,7 @@ Result VKDevice::TransientResourceHeapImpl::createCommandBuffer(ICommandBuffer** RefPtr<CommandBufferImpl> commandBuffer = new CommandBufferImpl(); SLANG_RETURN_ON_FAIL(commandBuffer->init( - m_device, m_commandPool, m_fence, this)); + m_device, m_commandPool, this)); m_commandBufferPool.add(commandBuffer); m_commandBufferAllocId++; returnComPtr(outCmdBuffer, commandBuffer); @@ -5287,12 +5663,15 @@ Result VKDevice::TransientResourceHeapImpl::synchronizeAndReset() { m_commandBufferAllocId = 0; auto& api = m_device->m_api; - if (api.vkWaitForFences(api.m_device, 1, &m_fence, 1, UINT64_MAX) != VK_SUCCESS) + if (api.vkWaitForFences( + api.m_device, (uint32_t)m_fences.getCount(), m_fences.getBuffer(), 1, UINT64_MAX) != + VK_SUCCESS) { return SLANG_FAIL; } api.vkResetCommandPool(api.m_device, m_commandPool, 0); m_descSetAllocator.reset(); + m_fenceIndex = 0; Super::reset(); return SLANG_OK; } @@ -5421,6 +5800,69 @@ SlangResult VKDevice::readBufferResource( return SLANG_OK; } +Result VKDevice::getAccelerationStructurePrebuildInfo( + const IAccelerationStructure::BuildInputs& buildInputs, + IAccelerationStructure::PrebuildInfo* outPrebuildInfo) +{ + if (!m_api.vkGetAccelerationStructureBuildSizesKHR) + { + return SLANG_E_NOT_AVAILABLE; + } + VkAccelerationStructureBuildSizesInfoKHR sizeInfo = { + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR}; + AccelerationStructureBuildGeometryInfoBuilder geomInfoBuilder; + SLANG_RETURN_ON_FAIL(geomInfoBuilder.build(buildInputs, getDebugCallback())); + m_api.vkGetAccelerationStructureBuildSizesKHR( + m_api.m_device, + VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, + &geomInfoBuilder.buildInfo, + geomInfoBuilder.primitiveCounts.getBuffer(), + &sizeInfo); + outPrebuildInfo->resultDataMaxSize = sizeInfo.accelerationStructureSize; + outPrebuildInfo->scratchDataSize = sizeInfo.buildScratchSize; + outPrebuildInfo->updateScratchDataSize = sizeInfo.updateScratchSize; + return SLANG_OK; +} + +Result VKDevice::createAccelerationStructure( + const IAccelerationStructure::CreateDesc& desc, + IAccelerationStructure** outAS) +{ + if (!m_api.vkCreateAccelerationStructureKHR) + { + return SLANG_E_NOT_AVAILABLE; + } + RefPtr<AccelerationStructureImpl> resultAS = new AccelerationStructureImpl(); + resultAS->m_offset = desc.offset; + resultAS->m_size = desc.size; + resultAS->m_buffer = static_cast<BufferResourceImpl*>(desc.buffer); + resultAS->m_device = this; + VkAccelerationStructureCreateInfoKHR createInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR}; + createInfo.buffer = resultAS->m_buffer->m_buffer.m_buffer; + createInfo.offset = desc.offset; + createInfo.size = desc.size; + switch (desc.kind) + { + case IAccelerationStructure::Kind::BottomLevel: + createInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + break; + case IAccelerationStructure::Kind::TopLevel: + createInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR; + break; + default: + getDebugCallback()->handleMessage( + DebugMessageType::Error, + DebugMessageSource::Layer, + "invalid value of IAccelerationStructure::Kind encountered in desc.kind"); + return SLANG_E_INVALID_ARG; + } + + SLANG_VK_RETURN_ON_FAIL(m_api.vkCreateAccelerationStructureKHR( + m_api.m_device, &createInfo, nullptr, &resultAS->m_vkHandle)); + returnComPtr(outAS, resultAS); + return SLANG_OK; +} + static VkBufferUsageFlagBits _calcBufferUsageFlags(ResourceState state) { switch (state) @@ -5448,6 +5890,8 @@ static VkBufferUsageFlagBits _calcBufferUsageFlags(ResourceState state) return VK_BUFFER_USAGE_TRANSFER_SRC_BIT; case ResourceState::CopyDestination: return VK_BUFFER_USAGE_TRANSFER_DST_BIT; + case ResourceState::AccelerationStructure: + return VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR; default: return VkBufferUsageFlagBits(0); } @@ -5510,7 +5954,7 @@ static VkImageUsageFlags _calcImageUsageFlags( { VkImageUsageFlags usage = _calcImageUsageFlags(states); - if ((cpuAccessFlags & IResource::AccessFlag::Write) || initData) + if ((cpuAccessFlags & AccessFlag::Write) || initData) { usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; } @@ -5626,6 +6070,15 @@ void VKDevice::_transitionImageLayout(VkImage image, VkFormat format, const Text sourceStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; destinationStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; } + else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + newLayout == VK_IMAGE_LAYOUT_GENERAL) + { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + + sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destinationStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } else { assert(!"unsupported layout transition!"); @@ -5902,6 +6355,10 @@ Result VKDevice::createBufferResource(const IBufferResource::Desc& descIn, const VkMemoryPropertyFlags reqMemoryProperties = 0; VkBufferUsageFlags usage = _calcBufferUsageFlags(desc.allowedStates); + if (bufferDeviceAddressFeatures.bufferDeviceAddress) + { + usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + } if (initData) { @@ -5916,7 +6373,7 @@ Result VKDevice::createBufferResource(const IBufferResource::Desc& descIn, const RefPtr<BufferResourceImpl> buffer(new BufferResourceImpl(desc, this)); SLANG_RETURN_ON_FAIL(buffer->m_buffer.init(m_api, desc.sizeInBytes, usage, reqMemoryProperties)); - if ((desc.cpuAccessFlags & IResource::AccessFlag::Write) || initData) + if ((desc.cpuAccessFlags & AccessFlag::Write) || initData) { SLANG_RETURN_ON_FAIL(buffer->m_uploadBuffer.init(m_api, bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)); } |
