diff options
| author | Yong He <yonghe@outlook.com> | 2022-09-22 21:54:16 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-09-22 21:54:16 -0700 |
| commit | bd11629739ae2b0619699c765354894ff32dacf1 (patch) | |
| tree | 4f72313bc33ed34dd96d16cec6623ca9b0a8292e /tools | |
| parent | a6fcb3b0ed2b185723afde750bd4491b5b4113eb (diff) | |
GFX: improve d3d12 performance. (#2408)
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/gfx/d3d/d3d-util.cpp | 4 | ||||
| -rw-r--r-- | tools/gfx/d3d11/d3d11-helper-functions.cpp | 2 | ||||
| -rw-r--r-- | tools/gfx/d3d12/d3d12-device.cpp | 44 | ||||
| -rw-r--r-- | tools/gfx/d3d12/d3d12-device.h | 2 | ||||
| -rw-r--r-- | tools/gfx/d3d12/d3d12-pipeline-state.cpp | 23 | ||||
| -rw-r--r-- | tools/gfx/d3d12/d3d12-shader-object-layout.cpp | 19 | ||||
| -rw-r--r-- | tools/gfx/d3d12/d3d12-shader-object-layout.h | 12 | ||||
| -rw-r--r-- | tools/gfx/d3d12/d3d12-shader-table.cpp | 9 | ||||
| -rw-r--r-- | tools/gfx/renderer-shared.cpp | 2 | ||||
| -rw-r--r-- | tools/gfx/vulkan/vk-helper-functions.cpp | 10 | ||||
| -rw-r--r-- | tools/gfx/vulkan/vk-util.cpp | 4 |
11 files changed, 92 insertions, 39 deletions
diff --git a/tools/gfx/d3d/d3d-util.cpp b/tools/gfx/d3d/d3d-util.cpp index 942764d01..e50ddaa80 100644 --- a/tools/gfx/d3d/d3d-util.cpp +++ b/tools/gfx/d3d/d3d-util.cpp @@ -810,6 +810,10 @@ D3D12_RESOURCE_STATES D3DUtil::getResourceState(ResourceState state) case ResourceState::ShaderResource: case ResourceState::AccelerationStructureBuildInput: return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + case ResourceState::PixelShaderResource: + return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + case ResourceState::NonPixelShaderResource: + return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; case ResourceState::UnorderedAccess: return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; case ResourceState::RenderTarget: diff --git a/tools/gfx/d3d11/d3d11-helper-functions.cpp b/tools/gfx/d3d11/d3d11-helper-functions.cpp index c4caab59b..c797979ce 100644 --- a/tools/gfx/d3d11/d3d11-helper-functions.cpp +++ b/tools/gfx/d3d11/d3d11-helper-functions.cpp @@ -43,6 +43,8 @@ namespace d3d11 case ResourceState::UnorderedAccess: return D3D11_BIND_UNORDERED_ACCESS; case ResourceState::ShaderResource: + case ResourceState::PixelShaderResource: + case ResourceState::NonPixelShaderResource: return D3D11_BIND_SHADER_RESOURCE; default: return D3D11_BIND_FLAG(0); diff --git a/tools/gfx/d3d12/d3d12-device.cpp b/tools/gfx/d3d12/d3d12-device.cpp index f5c457809..efc1beb22 100644 --- a/tools/gfx/d3d12/d3d12-device.cpp +++ b/tools/gfx/d3d12/d3d12-device.cpp @@ -31,6 +31,8 @@ namespace d3d12 using namespace Slang; +static const uint32_t D3D_FEATURE_LEVEL_12_2 = 0xc200; + Result DeviceImpl::createBuffer( const D3D12_RESOURCE_DESC& resourceDesc, const void* srcData, @@ -416,6 +418,13 @@ Result DeviceImpl::initialize(const Desc& desc) return SLANG_FAIL; } + m_D3D12SerializeVersionedRootSignature = + (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)loadProc(d3dModule, "D3D12SerializeVersionedRootSignature"); + if (!m_D3D12SerializeVersionedRootSignature) + { + return SLANG_FAIL; + } + HMODULE pixModule = LoadLibraryW(L"WinPixEventRuntime.dll"); if (pixModule) { @@ -472,21 +481,36 @@ Result DeviceImpl::initialize(const Desc& desc) DeviceCheckFlag::UseHardwareDevice, ChangeType::OnOff); ///< First try hardware, then reference - const D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0; - const int numCombinations = combiner.getNumCombinations(); - for (int i = 0; i < numCombinations; ++i) + const D3D_FEATURE_LEVEL featureLevels[] = { - if (SLANG_SUCCEEDED(_createDevice( - combiner.getCombination(i), - UnownedStringSlice(desc.adapter), - featureLevel, - m_deviceInfo))) + (D3D_FEATURE_LEVEL)D3D_FEATURE_LEVEL_12_2, + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + D3D_FEATURE_LEVEL_9_3, + D3D_FEATURE_LEVEL_9_2, + D3D_FEATURE_LEVEL_9_1 + }; + for (auto featureLevel : featureLevels) + { + const int numCombinations = combiner.getNumCombinations(); + for (int i = 0; i < numCombinations; ++i) { - break; + if (SLANG_SUCCEEDED(_createDevice( + combiner.getCombination(i), + UnownedStringSlice(desc.adapter), + featureLevel, + m_deviceInfo))) + { + goto succ; + } } } - + succ: if (!m_deviceInfo.m_adapter) { // Couldn't find an adapter diff --git a/tools/gfx/d3d12/d3d12-device.h b/tools/gfx/d3d12/d3d12-device.h index 71daf9ffb..e02005ffd 100644 --- a/tools/gfx/d3d12/d3d12-device.h +++ b/tools/gfx/d3d12/d3d12-device.h @@ -78,7 +78,7 @@ public: PFN_D3D12_GET_DEBUG_INTERFACE m_D3D12GetDebugInterface = nullptr; PFN_D3D12_CREATE_DEVICE m_D3D12CreateDevice = nullptr; PFN_D3D12_SERIALIZE_ROOT_SIGNATURE m_D3D12SerializeRootSignature = nullptr; - + PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE m_D3D12SerializeVersionedRootSignature = nullptr; PFN_BeginEventOnCommandList m_BeginEventOnCommandList = nullptr; PFN_EndEventOnCommandList m_EndEventOnCommandList = nullptr; diff --git a/tools/gfx/d3d12/d3d12-pipeline-state.cpp b/tools/gfx/d3d12/d3d12-pipeline-state.cpp index 983348409..ec073bf44 100644 --- a/tools/gfx/d3d12/d3d12-pipeline-state.cpp +++ b/tools/gfx/d3d12/d3d12-pipeline-state.cpp @@ -330,7 +330,6 @@ Result RayTracingPipelineStateImpl::ensureAPIPipelineStateCreated() ChunkedList<ComPtr<ISlangBlob>> codeBlobs; ChunkedList<D3D12_EXPORT_DESC> exports; ChunkedList<const wchar_t*> strPtrs; - ComPtr<ISlangBlob> diagnostics; ChunkedList<OSString> stringPool; auto getWStr = [&](const char* name) @@ -339,6 +338,19 @@ Result RayTracingPipelineStateImpl::ensureAPIPipelineStateCreated() auto wstr = str.toWString(); return stringPool.add(wstr)->begin(); }; + + D3D12_RAYTRACING_PIPELINE_CONFIG1 pipelineConfig = {}; + pipelineConfig.MaxTraceRecursionDepth = desc.rayTracing.maxRecursion; + if (desc.rayTracing.flags & RayTracingPipelineFlags::SkipTriangles) + pipelineConfig.Flags |= D3D12_RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES; + if (desc.rayTracing.flags & RayTracingPipelineFlags::SkipProcedurals) + pipelineConfig.Flags |= D3D12_RAYTRACING_PIPELINE_FLAG_SKIP_PROCEDURAL_PRIMITIVES; + + D3D12_STATE_SUBOBJECT pipelineConfigSubobject = {}; + pipelineConfigSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG1; + pipelineConfigSubobject.pDesc = &pipelineConfig; + subObjects.add(pipelineConfigSubobject); + auto compileShader = [&](slang::EntryPointLayout* entryPointInfo, slang::IComponentType* component, SlangInt entryPointIndex) @@ -361,7 +373,7 @@ Result RayTracingPipelineStateImpl::ensureAPIPipelineStateCreated() library.NumExports = 1; D3D12_EXPORT_DESC exportDesc = {}; exportDesc.Name = getWStr(entryPointInfo->getNameOverride()); - exportDesc.ExportToRename = getWStr(entryPointInfo->getNameOverride()); + exportDesc.ExportToRename = nullptr; exportDesc.Flags = D3D12_EXPORT_FLAG_NONE; library.pExports = exports.add(exportDesc); @@ -435,13 +447,6 @@ Result RayTracingPipelineStateImpl::ensureAPIPipelineStateCreated() globalSignatureSubobject.pDesc = &globalSignatureDesc; subObjects.add(globalSignatureSubobject); - D3D12_RAYTRACING_PIPELINE_CONFIG pipelineConfig = {}; - pipelineConfig.MaxTraceRecursionDepth = desc.rayTracing.maxRecursion; - D3D12_STATE_SUBOBJECT pipelineConfigSubobject = {}; - pipelineConfigSubobject.Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG; - pipelineConfigSubobject.pDesc = &pipelineConfig; - subObjects.add(pipelineConfigSubobject); - if (m_device->m_pipelineCreationAPIDispatcher) { m_device->m_pipelineCreationAPIDispatcher->beforeCreateRayTracingState( diff --git a/tools/gfx/d3d12/d3d12-shader-object-layout.cpp b/tools/gfx/d3d12/d3d12-shader-object-layout.cpp index 3e8598d51..d90b638ba 100644 --- a/tools/gfx/d3d12/d3d12-shader-object-layout.cpp +++ b/tools/gfx/d3d12/d3d12-shader-object-layout.cpp @@ -500,7 +500,7 @@ Result RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addDescriptorRange( { if (isRootParameter) { - D3D12_ROOT_PARAMETER rootParam = {}; + D3D12_ROOT_PARAMETER1 rootParam = {}; switch (rangeType) { case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: @@ -525,7 +525,7 @@ Result RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addDescriptorRange( auto& descriptorSet = m_descriptorSets[physicalDescriptorSetIndex]; - D3D12_DESCRIPTOR_RANGE range = {}; + D3D12_DESCRIPTOR_RANGE1 range = {}; range.RangeType = rangeType; range.NumDescriptors = count; range.BaseShaderRegister = registerIndex; @@ -824,14 +824,14 @@ void RootShaderObjectLayoutImpl::RootSignatureDescBuilder::addAsValue( } } -D3D12_ROOT_SIGNATURE_DESC& RootShaderObjectLayoutImpl::RootSignatureDescBuilder::build() +D3D12_ROOT_SIGNATURE_DESC1& RootShaderObjectLayoutImpl::RootSignatureDescBuilder::build() { for (Index i = 0; i < m_descriptorSets.getCount(); i++) { auto& descriptorSet = m_descriptorSets[i]; if (descriptorSet.m_resourceRanges.getCount()) { - D3D12_ROOT_PARAMETER rootParam = {}; + D3D12_ROOT_PARAMETER1 rootParam = {}; rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParam.DescriptorTable.NumDescriptorRanges = (UINT)descriptorSet.m_resourceRanges.getCount(); @@ -841,7 +841,7 @@ D3D12_ROOT_SIGNATURE_DESC& RootShaderObjectLayoutImpl::RootSignatureDescBuilder: } if (descriptorSet.m_samplerRanges.getCount()) { - D3D12_ROOT_PARAMETER rootParam = {}; + D3D12_ROOT_PARAMETER1 rootParam = {}; rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParam.DescriptorTable.NumDescriptorRanges = (UINT)descriptorSet.m_samplerRanges.getCount(); @@ -915,12 +915,13 @@ Result RootShaderObjectLayoutImpl::createRootSignatureFromSlang( } auto& rootSignatureDesc = builder.build(); - + D3D12_VERSIONED_ROOT_SIGNATURE_DESC versionedDesc = {}; + versionedDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; + versionedDesc.Desc_1_1 = rootSignatureDesc; ComPtr<ID3DBlob> signature; ComPtr<ID3DBlob> error; - if (SLANG_FAILED(device->m_D3D12SerializeRootSignature( - &rootSignatureDesc, - D3D_ROOT_SIGNATURE_VERSION_1, + if (SLANG_FAILED(device->m_D3D12SerializeVersionedRootSignature( + &versionedDesc, signature.writeRef(), error.writeRef()))) { diff --git a/tools/gfx/d3d12/d3d12-shader-object-layout.h b/tools/gfx/d3d12/d3d12-shader-object-layout.h index d70b194c8..8b72067f7 100644 --- a/tools/gfx/d3d12/d3d12-shader-object-layout.h +++ b/tools/gfx/d3d12/d3d12-shader-object-layout.h @@ -257,8 +257,8 @@ public: struct DescriptorSetLayout { - List<D3D12_DESCRIPTOR_RANGE> m_resourceRanges; - List<D3D12_DESCRIPTOR_RANGE> m_samplerRanges; + List<D3D12_DESCRIPTOR_RANGE1> m_resourceRanges; + List<D3D12_DESCRIPTOR_RANGE1> m_samplerRanges; uint32_t m_resourceCount = 0; uint32_t m_samplerCount = 0; }; @@ -275,8 +275,10 @@ public: // descriptor set for each `ParameterBlock` binding range in the shader object // hierarchy, regardless of the shader's `space` indices. List<DescriptorSetLayout> m_descriptorSets; - List<D3D12_ROOT_PARAMETER> m_rootParameters; - D3D12_ROOT_SIGNATURE_DESC m_rootSignatureDesc = {}; + List<D3D12_ROOT_PARAMETER1> m_rootParameters; + List<D3D12_ROOT_PARAMETER1> m_rootDescTableParameters; + + D3D12_ROOT_SIGNATURE_DESC1 m_rootSignatureDesc = {}; static Result translateDescriptorRangeType( slang::BindingType c, D3D12_DESCRIPTOR_RANGE_TYPE* outType); @@ -449,7 +451,7 @@ public: BindingRegisterOffsetPair const& containerOffset, BindingRegisterOffsetPair const& elementOffset); - D3D12_ROOT_SIGNATURE_DESC& build(); + D3D12_ROOT_SIGNATURE_DESC1& build(); }; static Result createRootSignatureFromSlang( diff --git a/tools/gfx/d3d12/d3d12-shader-table.cpp b/tools/gfx/d3d12/d3d12-shader-table.cpp index 2773578b8..3e49350ab 100644 --- a/tools/gfx/d3d12/d3d12-shader-table.cpp +++ b/tools/gfx/d3d12/d3d12-shader-table.cpp @@ -31,6 +31,7 @@ RefPtr<BufferResource> ShaderTableImpl::createDeviceBuffer( IBufferResource::Desc bufferDesc = {}; bufferDesc.memoryType = gfx::MemoryType::DeviceLocal; bufferDesc.defaultState = ResourceState::General; + bufferDesc.allowedStates.add(ResourceState::NonPixelShaderResource); bufferDesc.type = IResource::Type::Buffer; bufferDesc.sizeInBytes = tableSize; m_device->createBufferResource(bufferDesc, nullptr, bufferResource.writeRef()); @@ -57,10 +58,6 @@ RefPtr<BufferResource> ShaderTableImpl::createDeviceBuffer( void* shaderId = stateObjectProperties->GetShaderIdentifier(name.toWString().begin()); memcpy(dest, shaderId, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); } - else - { - memset(dest, 0, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); - } if (overwrite.size) { memcpy((uint8_t*)dest + overwrite.offset, overwrite.data, overwrite.size); @@ -68,6 +65,8 @@ RefPtr<BufferResource> ShaderTableImpl::createDeviceBuffer( }; uint8_t* stagingBufferPtr = (uint8_t*)stagingPtr + stagingBufferOffset; + memset(stagingBufferPtr, 0, tableSize); + for (uint32_t i = 0; i < m_rayGenShaderCount; i++) { copyShaderIdInto( @@ -96,7 +95,7 @@ RefPtr<BufferResource> ShaderTableImpl::createDeviceBuffer( 1, bufferResource.readRef(), gfx::ResourceState::CopyDestination, - gfx::ResourceState::ShaderResource); + gfx::ResourceState::NonPixelShaderResource); RefPtr<BufferResource> resultPtr = static_cast<BufferResource*>(bufferResource.get()); return _Move(resultPtr); } diff --git a/tools/gfx/renderer-shared.cpp b/tools/gfx/renderer-shared.cpp index 69046d216..43629a0f2 100644 --- a/tools/gfx/renderer-shared.cpp +++ b/tools/gfx/renderer-shared.cpp @@ -386,6 +386,8 @@ Result RendererBase::getFormatSupportedResourceStates(Format format, ResourceSta outStates->add(ResourceState::ResolveDestination); outStates->add(ResourceState::ResolveSource); outStates->add(ResourceState::ShaderResource); + outStates->add(ResourceState::PixelShaderResource); + outStates->add(ResourceState::NonPixelShaderResource); outStates->add(ResourceState::StreamOutput); outStates->add(ResourceState::Undefined); outStates->add(ResourceState::UnorderedAccess); diff --git a/tools/gfx/vulkan/vk-helper-functions.cpp b/tools/gfx/vulkan/vk-helper-functions.cpp index 033096f8a..aa6c42ec5 100644 --- a/tools/gfx/vulkan/vk-helper-functions.cpp +++ b/tools/gfx/vulkan/vk-helper-functions.cpp @@ -83,6 +83,8 @@ VkImageLayout translateImageLayout(ResourceState state) case ResourceState::DepthWrite: return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; case ResourceState::ShaderResource: + case ResourceState::NonPixelShaderResource: + case ResourceState::PixelShaderResource: return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; case ResourceState::ResolveDestination: case ResourceState::CopyDestination: @@ -116,6 +118,8 @@ VkAccessFlagBits calcAccessFlags(ResourceState state) return VkAccessFlagBits( VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT); case ResourceState::ShaderResource: + case ResourceState::NonPixelShaderResource: + case ResourceState::PixelShaderResource: return VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; case ResourceState::UnorderedAccess: return VkAccessFlagBits(VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); @@ -167,6 +171,8 @@ VkPipelineStageFlagBits calcPipelineStageFlags(ResourceState state, bool src) VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR); case ResourceState::ShaderResource: + case ResourceState::NonPixelShaderResource: + case ResourceState::PixelShaderResource: return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; case ResourceState::RenderTarget: return VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; @@ -237,6 +243,8 @@ VkBufferUsageFlagBits _calcBufferUsageFlags(ResourceState state) return ( VkBufferUsageFlagBits)(VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); case ResourceState::ShaderResource: + case ResourceState::NonPixelShaderResource: + case ResourceState::PixelShaderResource: return ( VkBufferUsageFlagBits)(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); case ResourceState::CopySource: @@ -277,6 +285,8 @@ VkImageUsageFlagBits _calcImageUsageFlags(ResourceState state) case ResourceState::DepthRead: return VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; case ResourceState::ShaderResource: + case ResourceState::NonPixelShaderResource: + case ResourceState::PixelShaderResource: return VK_IMAGE_USAGE_SAMPLED_BIT; case ResourceState::UnorderedAccess: return VK_IMAGE_USAGE_STORAGE_BIT; diff --git a/tools/gfx/vulkan/vk-util.cpp b/tools/gfx/vulkan/vk-util.cpp index 251e91cab..930e70439 100644 --- a/tools/gfx/vulkan/vk-util.cpp +++ b/tools/gfx/vulkan/vk-util.cpp @@ -201,6 +201,8 @@ VkImageLayout VulkanUtil::getImageLayoutFromState(ResourceState state) switch (state) { case ResourceState::ShaderResource: + case ResourceState::PixelShaderResource: + case ResourceState::NonPixelShaderResource: return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; case ResourceState::UnorderedAccess: case ResourceState::General: @@ -555,6 +557,8 @@ VkImageLayout VulkanUtil::mapResourceStateToLayout(ResourceState state) case ResourceState::Undefined: return VK_IMAGE_LAYOUT_UNDEFINED; case ResourceState::ShaderResource: + case ResourceState::PixelShaderResource: + case ResourceState::NonPixelShaderResource: return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; case ResourceState::UnorderedAccess: return VK_IMAGE_LAYOUT_GENERAL; |
