From f114433debfba67cbe1db239b6e92278d41ed438 Mon Sep 17 00:00:00 2001 From: Yong He Date: Fri, 19 Jul 2024 11:49:42 -0700 Subject: Support parameter block in metal shader objects. (#4671) * Support parameter block in metal shader objects. * Ingore parameter block tests on devices without tier2 argument buffer. * Fix warning. * Fix texture subscript test. --------- Co-authored-by: Yong He --- tools/gfx/metal/metal-command-encoder.cpp | 20 ++- tools/gfx/metal/metal-device.cpp | 19 ++- tools/gfx/metal/metal-device.h | 2 + tools/gfx/metal/metal-shader-object-layout.cpp | 10 ++ tools/gfx/metal/metal-shader-object-layout.h | 3 + tools/gfx/metal/metal-shader-object.cpp | 182 ++++++++++++++++++++++++- tools/gfx/metal/metal-shader-object.h | 21 +++ 7 files changed, 245 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/gfx/metal/metal-command-encoder.cpp b/tools/gfx/metal/metal-command-encoder.cpp index 2447295c4..442c216aa 100644 --- a/tools/gfx/metal/metal-command-encoder.cpp +++ b/tools/gfx/metal/metal-command-encoder.cpp @@ -478,18 +478,28 @@ Result ComputeCommandEncoder::bindPipelineWithRootObject( Result ComputeCommandEncoder::dispatchCompute(int x, int y, int z) { - auto pipeline = static_cast(m_currentPipeline.Ptr()); - pipeline->ensureAPIPipelineStateCreated(); - MTL::ComputeCommandEncoder* encoder = m_commandBuffer->getMetalComputeCommandEncoder(); - encoder->setComputePipelineState(pipeline->m_computePipelineState.get()); ComputeBindingContext bindingContext; bindingContext.init(m_commandBuffer->m_device, encoder); auto program = static_cast(m_currentPipeline->m_program.get()); m_commandBuffer->m_rootObject.bindAsRoot(&bindingContext, program->m_rootObjectLayout); - encoder->dispatchThreadgroups(MTL::Size(x, y, z), pipeline->m_threadGroupSize); + auto pipeline = static_cast(m_currentPipeline.Ptr()); + RootShaderObjectImpl* rootObjectImpl = &m_commandBuffer->m_rootObject; + RefPtr newPipeline; + SLANG_RETURN_ON_FAIL(m_commandBuffer->m_device->maybeSpecializePipeline( + m_currentPipeline, rootObjectImpl, newPipeline)); + PipelineStateImpl* newPipelineImpl = static_cast(newPipeline.Ptr()); + + SLANG_RETURN_ON_FAIL(newPipelineImpl->ensureAPIPipelineStateCreated()); + m_currentPipeline = newPipelineImpl; + + m_currentPipeline->ensureAPIPipelineStateCreated(); + encoder->setComputePipelineState(m_currentPipeline->m_computePipelineState.get()); + + + encoder->dispatchThreadgroups(MTL::Size(x, y, z), m_currentPipeline->m_threadGroupSize); return SLANG_OK; } diff --git a/tools/gfx/metal/metal-device.cpp b/tools/gfx/metal/metal-device.cpp index 4a1c02480..609c1bf27 100644 --- a/tools/gfx/metal/metal-device.cpp +++ b/tools/gfx/metal/metal-device.cpp @@ -70,6 +70,12 @@ SlangResult DeviceImpl::initialize(const Desc& desc) m_device = NS::TransferPtr(MTL::CreateSystemDefaultDevice()); m_commandQueue = NS::TransferPtr(m_device->newCommandQueue(64)); + m_hasArgumentBufferTier2 = m_device->argumentBuffersSupport() >= MTL::ArgumentBuffersTier2; + + if (m_hasArgumentBufferTier2) + { + m_features.add("argument-buffer-tier-2"); + } SLANG_RETURN_ON_FAIL(slangContext.initialize( desc.slang, @@ -415,8 +421,19 @@ Result DeviceImpl::createTextureResource( } if (desc.allowedStates.contains(ResourceState::UnorderedAccess)) { + textureUsage |= MTL::TextureUsageShaderRead; textureUsage |= MTL::TextureUsageShaderWrite; - textureUsage |= MTL::TextureUsageShaderAtomic; + + // Request atomic access if the format allows it. + switch (desc.format) + { + case Format::R32_UINT: + case Format::R32_SINT: + case Format::R32G32_UINT: + case Format::R32G32_SINT: + textureUsage |= MTL::TextureUsageShaderAtomic; + break; + } } textureDesc->setMipmapLevelCount(desc.numMipLevels); diff --git a/tools/gfx/metal/metal-device.h b/tools/gfx/metal/metal-device.h index 4f08b346e..50eb0e88d 100644 --- a/tools/gfx/metal/metal-device.h +++ b/tools/gfx/metal/metal-device.h @@ -137,6 +137,8 @@ public: uint32_t m_queueAllocCount; + bool m_hasArgumentBufferTier2 = false; + // A list to hold objects that may have a strong back reference to the device // instance. Because of the pipeline cache in `RendererBase`, there could be a reference // cycle among `DeviceImpl`->`PipelineStateImpl`->`ShaderProgramImpl`->`DeviceImpl`. diff --git a/tools/gfx/metal/metal-shader-object-layout.cpp b/tools/gfx/metal/metal-shader-object-layout.cpp index abde03b54..94bcb6db6 100644 --- a/tools/gfx/metal/metal-shader-object-layout.cpp +++ b/tools/gfx/metal/metal-shader-object-layout.cpp @@ -219,6 +219,16 @@ SlangResult ShaderObjectLayoutImpl::Builder::build(ShaderObjectLayoutImpl** outL return SLANG_OK; } +slang::TypeLayoutReflection* ShaderObjectLayoutImpl::getParameterBlockTypeLayout() +{ + if (!m_parameterBlockTypeLayout) + { + m_parameterBlockTypeLayout = m_slangSession->getTypeLayout( + m_elementTypeLayout->getType(), 0, slang::LayoutRules::MetalArgumentBufferTier2); + } + return m_parameterBlockTypeLayout; +} + Result ShaderObjectLayoutImpl::createForElementType( RendererBase* renderer, slang::ISession* session, diff --git a/tools/gfx/metal/metal-shader-object-layout.h b/tools/gfx/metal/metal-shader-object-layout.h index 969c78c3a..24a969c89 100644 --- a/tools/gfx/metal/metal-shader-object-layout.h +++ b/tools/gfx/metal/metal-shader-object-layout.h @@ -177,6 +177,7 @@ public: uint32_t getTotalOrdinaryDataSize() const { return m_totalOrdinaryDataSize; } + slang::TypeLayoutReflection* getParameterBlockTypeLayout(); protected: Result _init(Builder const* builder); @@ -190,6 +191,8 @@ protected: Index m_subObjectCount = 0; uint32_t m_totalOrdinaryDataSize = 0; List m_subObjectRanges; + // The type layout to use when the shader object is bind as a parameter block. + slang::TypeLayoutReflection* m_parameterBlockTypeLayout = nullptr; }; class RootShaderObjectLayoutImpl : public ShaderObjectLayoutImpl diff --git a/tools/gfx/metal/metal-shader-object.cpp b/tools/gfx/metal/metal-shader-object.cpp index 865196c5c..6d421e065 100644 --- a/tools/gfx/metal/metal-shader-object.cpp +++ b/tools/gfx/metal/metal-shader-object.cpp @@ -54,7 +54,7 @@ SLANG_NO_THROW Result SLANG_MCALL memcpy(dest + offset, data, size); m_isConstantBufferDirty = true; - + m_isArgumentBufferDirty = true; return SLANG_OK; } @@ -89,6 +89,7 @@ SLANG_NO_THROW Result SLANG_MCALL // m_textures[bindingRange.baseIndex + offset.bindingArrayIndex] = static_cast(resourceView); break; } + m_isArgumentBufferDirty = true; return SLANG_OK; } @@ -102,6 +103,7 @@ SLANG_NO_THROW Result SLANG_MCALL ShaderObjectImpl::setSampler(ShaderOffset cons auto& bindingRange = layout->getBindingRange(offset.bindingRangeIndex); m_samplers[bindingRange.baseIndex + offset.bindingArrayIndex] = static_cast(sampler); + m_isArgumentBufferDirty = true; return SLANG_OK; } @@ -161,7 +163,7 @@ Result ShaderObjectImpl::init(IDevice* device, ShaderObjectLayoutImpl* layout) m_objects[bindingRangeInfo.subObjectIndex + i] = subObject; } } - + m_isArgumentBufferDirty = true; return SLANG_OK; } @@ -255,7 +257,6 @@ Result ShaderObjectImpl::_writeOrdinaryData( subObject->_writeOrdinaryData(subObjectDest, destSize - subObjectOffset, subObjectLayout); } } - return SLANG_OK; } @@ -325,6 +326,165 @@ Result ShaderObjectImpl::_bindOrdinaryDataBufferIfNeeded( return SLANG_OK; } +void ShaderObjectImpl::writeOrdinaryDataIntoArgumentBuffer( + slang::TypeLayoutReflection* argumentBufferTypeLayout, + slang::TypeLayoutReflection* defaultTypeLayout, + uint8_t* argumentBuffer, + uint8_t* srcData) +{ + // If we are pure data, just copy it over from srcData. + if (defaultTypeLayout->getCategoryCount() == 1) + { + switch (defaultTypeLayout->getCategoryByIndex(0)) + { + case slang::ParameterCategory::Uniform: + // Just copy the uniform data + memcpy(argumentBuffer, srcData, defaultTypeLayout->getSize()); + break; + } + return; + } + + for (unsigned int i = 0; i < argumentBufferTypeLayout->getFieldCount(); i++) + { + auto argumentBufferField = argumentBufferTypeLayout->getFieldByIndex(i); + auto defaultLayoutField = defaultTypeLayout->getFieldByIndex(i); + // If the field is mixed type, recurse. + writeOrdinaryDataIntoArgumentBuffer( + argumentBufferField->getTypeLayout(), + defaultLayoutField->getTypeLayout(), + argumentBuffer + argumentBufferField->getOffset(), + srcData + defaultLayoutField->getOffset()); + } +} + +BufferResourceImpl* ShaderObjectImpl::_ensureArgumentBufferUpToDate( + DeviceImpl* device, + ShaderObjectLayoutImpl* layout) +{ + auto typeLayout = layout->getParameterBlockTypeLayout(); + auto defaultTypeLayout = m_layout->getElementTypeLayout(); + + // If we have already created a buffer to hold the parmaeter block, then we should + // simply re-use that buffer rather than re-create it. + if (!m_argumentBuffer) + { + ComPtr bufferResourcePtr; + IBufferResource::Desc bufferDesc = {}; + bufferDesc.type = IResource::Type::Buffer; + bufferDesc.sizeInBytes = typeLayout->getSize(); + bufferDesc.defaultState = ResourceState::ConstantBuffer; + bufferDesc.allowedStates = + ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination); + bufferDesc.memoryType = MemoryType::Upload; + SLANG_RETURN_NULL_ON_FAIL( + device->createBufferResource(bufferDesc, nullptr, bufferResourcePtr.writeRef())); + m_argumentBuffer = static_cast(bufferResourcePtr.get()); + } + + if (m_isArgumentBufferDirty) + { + // Once the buffer is allocated, we can fill it in with the uniform data + // and resource bindings we have tracked, using `typeLayout` to obtain + // the offsets for each field. + // + auto dataSize = typeLayout->getSize(); + MemoryRange range = { 0, dataSize }; + void* argumentData; + SLANG_RETURN_NULL_ON_FAIL(m_argumentBuffer->map(&range, &argumentData)); + + // Now fill in argument values to `argumentData`. + int bindingRangeIndex = 0; + SLANG_ASSERT(defaultTypeLayout->getBindingRangeCount() == typeLayout->getBindingRangeCount()); + + int bufferBindingIndexOffset = layout->getTotalOrdinaryDataSize() != 0 ? 1 : 0; + + for (unsigned int bindingRangeIndex = 0; bindingRangeIndex < defaultTypeLayout->getBindingRangeCount(); bindingRangeIndex++) + { + int bindingCount = defaultTypeLayout->getBindingRangeBindingCount(bindingRangeIndex); + int setIndex = defaultTypeLayout->getBindingRangeDescriptorSetIndex(bindingRangeIndex); + int rangeIndex = defaultTypeLayout->getBindingRangeFirstDescriptorRangeIndex(bindingRangeIndex); + int bindingOffset = defaultTypeLayout->getDescriptorSetDescriptorRangeIndexOffset(setIndex, rangeIndex); + auto bindingType = defaultTypeLayout->getBindingRangeType(bindingRangeIndex); + for (int i = 0; i < bindingCount; i++) + { + auto argumentDataOffset = typeLayout->getDescriptorSetDescriptorRangeIndexOffset(setIndex, rangeIndex) + i * sizeof(uint64_t); + auto argumentPtr = (uint8_t*)argumentData + argumentDataOffset; + auto resourceIndex = bindingOffset + i; + switch (bindingType) + { + case slang::BindingType::ConstantBuffer: + case slang::BindingType::ParameterBlock: + { + if (m_objects[resourceIndex]) + { + auto subArgumentBuffer = m_objects[resourceIndex]->_ensureArgumentBufferUpToDate(device, m_objects[resourceIndex]->getLayout()); + if (subArgumentBuffer) + { + gfx::DeviceAddress bufferPtr = subArgumentBuffer->m_buffer->gpuAddress(); + memcpy(argumentPtr, &bufferPtr, sizeof(bufferPtr)); + } + } + break; + } + case slang::BindingType::RawBuffer: + case slang::BindingType::MutableRawBuffer: + { + auto bufferViewImpl = static_cast(m_buffers[resourceIndex + bufferBindingIndexOffset].get()); + + if (bufferViewImpl) + { + gfx::DeviceAddress bufferPtr = bufferViewImpl->m_buffer->getDeviceAddress() + bufferViewImpl->m_offset; + memcpy(argumentPtr, &bufferPtr, sizeof(bufferPtr)); + } + break; + } + case slang::BindingType::Texture: + case slang::BindingType::MutableTexture: + { + auto textureViewImpl = static_cast(m_textures[resourceIndex].get()); + if (textureViewImpl) + { + auto resourceId = textureViewImpl->m_textureView->gpuResourceID(); + memcpy(argumentPtr, &resourceId, sizeof(resourceId)); + } + break; + } + case slang::BindingType::Sampler: + { + auto samplerStateImpl = static_cast(m_samplers[resourceIndex].get()); + auto resourceId = samplerStateImpl->m_samplerState->gpuResourceID(); + memcpy(argumentPtr, &resourceId, sizeof(resourceId)); + break; + } + } + } + } + writeOrdinaryDataIntoArgumentBuffer(typeLayout, defaultTypeLayout, (uint8_t*)argumentData, (uint8_t*)m_data.getBuffer()); + m_argumentBuffer->unmap(&range); + m_isArgumentBufferDirty = false; + } + + return m_argumentBuffer.get(); +} + +Result ShaderObjectImpl::bindAsParameterBlock( + BindingContext* context, + BindingOffset const& inOffset, + ShaderObjectLayoutImpl* layout) +{ + if (!context->device->m_hasArgumentBufferTier2) + return SLANG_FAIL; + + auto argumentBuffer = _ensureArgumentBufferUpToDate(context->device, layout); + + if (m_argumentBuffer) + { + context->setBuffer(m_argumentBuffer->m_buffer.get(), inOffset.buffer); + } + return SLANG_OK; +} + Result ShaderObjectImpl::bindAsConstantBuffer( BindingContext* context, BindingOffset const& inOffset, @@ -425,7 +585,6 @@ Result ShaderObjectImpl::bindAsValue( switch (bindingRange.bindingType) { case slang::BindingType::ConstantBuffer: - case slang::BindingType::ParameterBlock: { BindingOffset objOffset = rangeOffset; for (Index i = 0; i < count; ++i) @@ -435,12 +594,23 @@ Result ShaderObjectImpl::bindAsValue( // Unsurprisingly, we bind each object in the range as // a constant buffer. // - subObject->bindAsConstantBuffer(context, objOffset, subObjectLayout); + SLANG_RETURN_ON_FAIL(subObject->bindAsConstantBuffer(context, objOffset, subObjectLayout)); objOffset += rangeStride; } + break; } - break; + case slang::BindingType::ParameterBlock: + { + BindingOffset objOffset = rangeOffset; + for (Index i = 0; i < count; ++i) + { + auto subObject = m_objects[subObjectIndex + i]; + SLANG_RETURN_ON_FAIL(subObject->bindAsParameterBlock(context, objOffset, subObjectLayout)); + objOffset += rangeStride; + } + } + break; #if 0 case slang::BindingType::ExistentialValue: diff --git a/tools/gfx/metal/metal-shader-object.h b/tools/gfx/metal/metal-shader-object.h index b9695febe..8a3d5d392 100644 --- a/tools/gfx/metal/metal-shader-object.h +++ b/tools/gfx/metal/metal-shader-object.h @@ -88,6 +88,16 @@ protected: DeviceImpl* device, ShaderObjectLayoutImpl* layout); + BufferResourceImpl* _ensureArgumentBufferUpToDate( + DeviceImpl* device, + ShaderObjectLayoutImpl* layout); + + void writeOrdinaryDataIntoArgumentBuffer( + slang::TypeLayoutReflection* argumentBufferTypeLayout, + slang::TypeLayoutReflection* defaultTypeLayout, + uint8_t* argumentBuffer, + uint8_t* srcData); + /// Bind the buffer for ordinary/uniform data, if needed /// /// The `ioOffset` parameter will be updated to reflect the constant buffer @@ -105,6 +115,12 @@ public: BindingOffset const& inOffset, ShaderObjectLayoutImpl* layout); + /// Bind this object as if it was declared as a `ParameterBlock` in Slang + Result bindAsParameterBlock( + BindingContext* context, + BindingOffset const& inOffset, + ShaderObjectLayoutImpl* layout); + /// Bind this object as a value that appears in the body of another object. /// /// This case is directly used when binding an object for an interface-type @@ -137,7 +153,12 @@ public: /// Created on demand with `_createOrdinaryDataBufferIfNeeded()` RefPtr m_ordinaryDataBuffer; + /// Argument buffer created on demand to bind as a parameter block. + RefPtr m_argumentBuffer; + + bool m_isConstantBufferDirty = true; + bool m_isArgumentBufferDirty = true; }; class MutableShaderObjectImpl -- cgit v1.2.3