diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/core.meta.slang | 43 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 99 | ||||
| -rw-r--r-- | source/slang/slang-emit-spirv-ops.h | 43 | ||||
| -rw-r--r-- | source/slang/slang-emit-spirv.cpp | 262 | ||||
| -rw-r--r-- | source/slang/slang-ir-defer-buffer-load.cpp | 14 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts-stable-names.lua | 1 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts.h | 21 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts.lua | 1 | ||||
| -rw-r--r-- | source/slang/slang-ir-redundancy-removal.cpp | 64 | ||||
| -rw-r--r-- | source/slang/slang-ir.cpp | 39 | ||||
| -rw-r--r-- | source/slang/slang-ir.h | 2 |
11 files changed, 404 insertions, 185 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 5ffab1f9c..9b55dc35a 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -1368,19 +1368,20 @@ struct Ptr< __intrinsic_op($(kIROp_GetOffsetPtr)) [nonmutating] + [__NoSideEffect] ref; } }; //@hidden: __intrinsic_op($(kIROp_AlignedAttr)) -void __align_attr(int alignment); +internal int __align_attr(int alignment); __intrinsic_op($(kIROp_Load)) -T __load_aligned<T, U>(T* ptr, U alignmentAttr); +internal T __load_aligned<T>(T* ptr, int alignmentAttr); __intrinsic_op($(kIROp_Store)) -void __store_aligned<T, U>(T* ptr, T value, U alignmentAttr); +internal void __store_aligned<T>(T* ptr, T value, int alignmentAttr); //@public: @@ -1413,6 +1414,42 @@ void storeAligned<int alignment, T>(T* ptr, T value) __store_aligned(ptr, value, __align_attr(alignment)); } +//@hidden: +__intrinsic_op($(kIROp_MemoryScopeAttr)) +internal int __memoryscope_attr(MemoryScope scope); + +__intrinsic_op($(kIROp_Load)) +internal T __load_coherent<T, Access access, AddressSpace addrSpace>(Ptr<T, access, addrSpace> ptr, int alignmentAttr, int memoryScopeAttr); + +__intrinsic_op($(kIROp_Store)) +internal void __store_coherent<T, AddressSpace addrSpace>(Ptr<T, Access::ReadWrite, addrSpace> ptr, T value, int alignmentAttr, int memoryScopeAttr); + +/// Store a value coherently to a memoryscope. +/// Tighter memory scopes may be faster to operate on. +/// @param ptr The pointer to store value to. +/// @param value The value to store. +/// +[require(SPV_KHR_vulkan_memory_model)] +[ForceInline] +__generic<int alignment, MemoryScope scope, T, AddressSpace addrSpace> +void storeCoherent(Ptr<T, Access::ReadWrite, addrSpace> ptr, T value) +{ + __store_coherent<T, addrSpace>(ptr, value, __align_attr(alignment), __memoryscope_attr(scope)); +} + +/// Load a value coherently to a memoryscope. +/// Tighter memory scopes may be faster to operate on. +/// @param ptr The pointer to load from. +/// +[require(SPV_KHR_vulkan_memory_model)] +[ForceInline] +[__NoSideEffect] +__generic<int alignment, MemoryScope scope, T, Access access, AddressSpace addrSpace> +T loadCoherent(Ptr<T, access, addrSpace> ptr) +{ + return __load_coherent<T, access, addrSpace>(ptr, __align_attr(alignment), __memoryscope_attr(scope)); +} + ${{{ StringBuilder ptrTypeParameterListBuilder; ptrTypeParameterListBuilder << "T, Access access, AddressSpace addrSpace"; diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 73bdee96e..824a06000 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -23239,6 +23239,18 @@ extension<T, L : IBufferDataLayout> RasterizerOrderedStructuredBuffer<T, L> : IR int getCount() { uint count; uint stride; this.GetDimensions(count, stride); return count; } } +[require(vk_mem_model)] +internal void enableVMMDeviceScopeCapabilityIfNeeded(constexpr MemoryScope memoryScope) +{ + if (memoryScope == MemoryScope::Device) + { + spirv_asm + { + OpCapability VulkanMemoryModelDeviceScopeKHR; + }; + } +} + namespace linalg { @@ -23813,6 +23825,22 @@ struct CoopMat }; } + // TODO: make this function an intrinsic and support all types via the single intrinsic + [require(cooperative_matrix, vk_mem_model)] + void StoreCoherent< + let matrixLayout : CoopMatMatrixLayout + >(T* buffer, uint element, uint stride, constexpr MemoryScope memoryScope) + { + enableVMMDeviceScopeCapabilityIfNeeded(memoryScope); + let alignment = 16U; + const int32_t scope = (int32_t)memoryScope; + return spirv_asm + { + %pointer:$$T* = OpPtrAccessChain $buffer $element; + OpCooperativeMatrixStoreKHR %pointer $this $matrixLayout $stride Aligned|MakePointerAvailable|NonPrivatePointer !alignment $scope; + }; + } + [ForceInline] [require(cooperative_matrix)] void Store< @@ -23924,6 +23952,24 @@ ${{{{ }; } + // TODO: make this function an intrinsic and support all types via the single intrinsic + [ForceInline] + [__NoSideEffect] + [require(cooperative_matrix, vk_mem_model)] + static This LoadCoherent< + let matrixLayout : CoopMatMatrixLayout + >(T* buffer, uint element, uint stride, constexpr MemoryScope memoryScope) + { + enableVMMDeviceScopeCapabilityIfNeeded(memoryScope); + let alignment = 16U; + const int32_t scope = (int32_t)memoryScope; + return spirv_asm + { + %pointer:$$T* = OpPtrAccessChain $buffer $element; + result:$$CoopMat<T, S, M, N, R> = OpCooperativeMatrixLoadKHR %pointer $matrixLayout $stride Aligned|MakePointerVisible|NonPrivatePointer !alignment $scope; + }; + } + [ForceInline] [require(cooperative_matrix)] static This Load< @@ -24480,6 +24526,24 @@ CoopMat<T, S, M, N, R> coopMatLoad< } [ForceInline] +[require(cooperative_matrix, vk_mem_model)] +CoopMat<T, S, M, N, R> coopMatLoadCoherent< + T : __BuiltinArithmeticType, + let S : MemoryScope, + let M : int, + let N : int, + let R : CoopMatMatrixUse, + let matrixLayout : CoopMatMatrixLayout +>( + T* buffer, + uint element, + uint stride, + constexpr MemoryScope memoryScope) +{ + return CoopMat<T, S, M, N, R>.LoadCoherent<matrixLayout>(buffer, element, stride, memoryScope); +} + +[ForceInline] [require(cooperative_matrix)] CoopMat<T, S, M, N, R> coopMatLoad< T : __BuiltinArithmeticType, @@ -24845,6 +24909,20 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti }; } + // TODO: make this function an intrinsic and support all types via the single intrinsic + [require(cooperative_vector, vk_mem_model)] + void storeCoherent(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device) + { + enableVMMDeviceScopeCapabilityIfNeeded(memoryScope); + let pointer = Ptr<T[]>(buffer); + let alignment = 16U; + const int32_t scope = (int32_t)memoryScope; + spirv_asm + { + OpCooperativeVectorStoreNV $pointer $byteOffset16ByteAligned $this Aligned|MakePointerAvailable|NonPrivatePointer !alignment $scope; + }; + } + [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] @@ -25017,6 +25095,20 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti }; } + // TODO: make this function an intrinsic and support all types via the single intrinsic + [require(cooperative_vector, vk_mem_model)] + static CoopVec<T, N> loadCoherent(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device) + { + enableVMMDeviceScopeCapabilityIfNeeded(memoryScope); + let pointer = Ptr<T[]>(buffer); + let alignment = 16U; + const int32_t scope = (int32_t)memoryScope; + return spirv_asm + { + result:$$CoopVec<T, N> = OpCooperativeVectorLoadNV $pointer $byteOffset16ByteAligned Aligned|MakePointerVisible|NonPrivatePointer !alignment $scope; + }; + } + // Groupshared [ForceInline] [__NoSideEffect] @@ -26457,6 +26549,13 @@ CoopVec<T, N> coopVecLoad<let N : int, T : __BuiltinArithmeticType>(T* buffer, i return CoopVec<T, N>.load(buffer, byteOffset16ByteAligned); } +[ForceInline] +[require(spirv, cooperative_vector, vk_mem_model)] +CoopVec<T, N> coopVecLoadCoherent<let N : int, T : __BuiltinArithmeticType>(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device) +{ + return CoopVec<T, N>.loadCoherent(buffer, byteOffset16ByteAligned, memoryScope); +} + // Groupshared [ForceInline] [require(cooperative_vector)] diff --git a/source/slang/slang-emit-spirv-ops.h b/source/slang/slang-emit-spirv-ops.h index a5e4d730a..da9058b62 100644 --- a/source/slang/slang-emit-spirv-ops.h +++ b/source/slang/slang-emit-spirv-ops.h @@ -600,28 +600,6 @@ SpvInst* emitOpLoad( return emitInst(parent, inst, SpvOpLoad, idResultType, kResultID, pointer, memoryAccess); } -// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpLoad -template<typename T1, typename T2> -SpvInst* emitOpLoadAligned( - SpvInstParent* parent, - IRInst* inst, - const T1& idResultType, - const T2& pointer, - const SpvLiteralInteger& literalInteger) -{ - static_assert(isSingular<T1>); - static_assert(isSingular<T2>); - return emitInst( - parent, - inst, - SpvOpLoad, - idResultType, - kResultID, - pointer, - SpvMemoryAccessAlignedMask, - literalInteger); -} - // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpStore template<typename T1, typename T2> SpvInst* emitOpStore( @@ -636,27 +614,6 @@ SpvInst* emitOpStore( return emitInst(parent, inst, SpvOpStore, pointer, object, memoryAccess); } -// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpStore -template<typename T1, typename T2> -SpvInst* emitOpStoreAligned( - SpvInstParent* parent, - IRInst* inst, - const T1& pointer, - const T2& object, - const SpvLiteralInteger& literalInteger) -{ - static_assert(isSingular<T1>); - static_assert(isSingular<T2>); - return emitInst( - parent, - inst, - SpvOpStore, - pointer, - object, - SpvMemoryAccessAlignedMask, - literalInteger); -} - // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpAccessChain template<typename T1, typename T2, typename Ts> SpvInst* emitOpAccessChain( diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 3a8a913ec..8bcd1429f 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4569,33 +4569,37 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex break; case kIROp_AtomicLoad: { - IRBuilder builder{inst}; - if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType())) + IRAtomicLoad* atomicLoad = as<IRAtomicLoad>(inst); + auto ptr = atomicLoad->getPtr(); + IRBuilder builder{atomicLoad}; + if (isAtomicableAddressSpace(ptr->getDataType())) { if (m_memoryModel == SpvMemoryModelVulkan) requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope); const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); - const auto memorySemantics = - emitMemorySemanticMask(inst->getOperand(1), inst->getOperand(0)); + const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1), ptr); result = emitOpAtomicLoad( parent, inst, inst->getFullType(), - inst->getOperand(0), + ptr, memoryScope, memorySemantics); ensureAtomicCapability(inst, SpvOpAtomicLoad); } else { - result = emitLoadMaybeCoherent(parent, inst); + result = emitLoad(parent, inst, ptr); } } break; case kIROp_AtomicStore: { + IRAtomicStore* atomicStore = as<IRAtomicStore>(inst); + auto ptr = atomicStore->getPtr(); + auto val = atomicStore->getVal(); IRBuilder builder{inst}; if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType())) { @@ -4604,48 +4608,44 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); - const auto memorySemantics = - emitMemorySemanticMask(inst->getOperand(2), inst->getOperand(0)); - result = emitOpAtomicStore( - parent, - inst, - inst->getOperand(0), - memoryScope, - memorySemantics, - inst->getOperand(1)); + const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr); + result = + emitOpAtomicStore(parent, inst, ptr, memoryScope, memorySemantics, val); ensureAtomicCapability(inst, SpvOpAtomicStore); } else { - result = emitStoreMaybeCoherent(parent, inst); + result = emitStore(parent, inst, ptr, val); } } break; case kIROp_AtomicExchange: { + IRAtomicExchange* atomicExchange = as<IRAtomicExchange>(inst); + auto ptr = atomicExchange->getPtr(); + auto val = atomicExchange->getOperand(1); IRBuilder builder{inst}; - if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType())) + if (isAtomicableAddressSpace(ptr->getDataType())) { if (m_memoryModel == SpvMemoryModelVulkan) requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope); const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); - const auto memorySemantics = - emitMemorySemanticMask(inst->getOperand(2), inst->getOperand(0)); + const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr); result = emitOpAtomicExchange( parent, inst, inst->getFullType(), - inst->getOperand(0), + ptr, memoryScope, memorySemantics, - inst->getOperand(1)); + val); ensureAtomicCapability(inst, SpvOpAtomicExchange); } else { - result = emitStoreMaybeCoherent(parent, inst); + result = emitStore(parent, inst, ptr, val); } } break; @@ -7082,6 +7082,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex SpvInst* emitGetOffsetPtr(SpvInstParent* parent, IRInst* inst) { + requireVariableBufferCapabilityIfNeeded(inst->getDataType()); + return emitOpPtrAccessChain( parent, inst, @@ -7174,54 +7176,100 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex } } - SpvInst* emitLoad(SpvInstParent* parent, IRLoad* inst) + enum class MemoryAccessType { - requireVariableBufferCapabilityIfNeeded(inst->getDataType()); + Load, + Store + }; + + template<MemoryAccessType memoryAccessType> + void getMemoryAccessOperandsOfLoadStore( + IRInst* inst, + IRInst* ptr, + int& memoryAccessMaskOut, + int& alignmentOut, + MemoryScope& memoryScopeOut) + { + IRAlignedAttr* alignedAttr = nullptr; + IRMemoryScopeAttr* memoryScopeAttr = nullptr; - auto ptrType = as<IRPtrTypeBase>(inst->getPtr()->getDataType()); - if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) == - SpvStorageClassPhysicalStorageBuffer) + for (auto attr : inst->getAllAttrs()) { - IRSizeAndAlignment sizeAndAlignment; - if (auto alignedAttr = inst->findAttr<IRAlignedAttr>()) + if (auto foundAlignedAttr = as<IRAlignedAttr>(attr)) + alignedAttr = foundAlignedAttr; + else if (auto foundMemoryScopeAttr = as<IRMemoryScopeAttr>(attr)) + memoryScopeAttr = foundMemoryScopeAttr; + } + + // Determine coherence + { + bool isCoherent = false; + if (memoryScopeAttr) { - sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment()); + memoryScopeOut = (MemoryScope)getIntVal(memoryScopeAttr->getMemoryScope()); + if (m_memoryModel != SpvMemoryModelVulkan) + SLANG_ASSERT_FAILURE( + "Explicit coherent operations require vulkan-memory-model, " + "specify the capability 'vk_mem_model'"); + isCoherent = true; } else { - getNaturalSizeAndAlignment( - m_targetProgram->getOptionSet(), - ptrType->getValueType(), - &sizeAndAlignment); + if (NeedToUseCoherentLoadOrStore(ptr)) + { + memoryScopeOut = MemoryScope::Device; + isCoherent = true; + } + } + if (isCoherent) + { + + memoryAccessMaskOut |= SpvMemoryAccessNonPrivatePointerMask; + if constexpr (memoryAccessType == MemoryAccessType::Load) + memoryAccessMaskOut |= SpvMemoryAccessMakePointerVisibleMask; + else + memoryAccessMaskOut |= SpvMemoryAccessMakePointerAvailableMask; + if (memoryScopeOut == MemoryScope::Device) + requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope); } - return emitOpLoadAligned( - parent, - inst, - inst->getDataType(), - inst->getPtr(), - SpvLiteralInteger::from32(sizeAndAlignment.alignment)); } - else + + // Determine alignment { - return emitLoadMaybeCoherent(parent, inst); + auto ptrType = as<IRPtrTypeBase>(ptr->getDataType()); + if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) == + SpvStorageClassPhysicalStorageBuffer) + { + IRSizeAndAlignment sizeAndAlignment; + if (alignedAttr) + sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment()); + else + getNaturalSizeAndAlignment( + m_targetProgram->getOptionSet(), + ptrType->getValueType(), + &sizeAndAlignment); + + alignmentOut = sizeAndAlignment.alignment; + if (alignmentOut != -1) + memoryAccessMaskOut |= SpvMemoryAccessAlignedMask; + } } } - SpvInst* emitLoadMaybeCoherent(SpvInstParent* parent, IRInst* inst) + SpvInst* emitLoad(SpvInstParent* parent, IRInst* inst, IRInst* ptr) { - IRBuilder builder{inst}; - builder.setInsertBefore(inst); - - SpvInst* deviceScope = nullptr; - IRInst* pointer = inst->getOperand(0); - - bool coherentPointer = NeedToUseCoherentLoadOrStore(pointer); - if (coherentPointer) - { - requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope); - deviceScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); - } + requireVariableBufferCapabilityIfNeeded(inst->getDataType()); + IRBuilder builder(inst); + int memoryAccessMask = 0; + int alignment = -1; + MemoryScope memoryScope{}; + getMemoryAccessOperandsOfLoadStore<MemoryAccessType::Load>( + inst, + ptr, + memoryAccessMask, + alignment, + memoryScope); return emitInstCustomOperandFunc( parent, inst, @@ -7230,85 +7278,61 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex { emitOperand(inst->getFullType()); emitOperand(kResultID); - emitOperand(pointer); - - if (coherentPointer) + emitOperand(ptr); + if (memoryAccessMask) { - emitOperand( - SpvMemoryAccessMakePointerVisibleMask | - SpvMemoryAccessNonPrivatePointerMask); - - emitOperand(deviceScope); + emitOperand(SpvLiteralInteger::from32(memoryAccessMask)); + if (memoryAccessMask & SpvMemoryAccessAlignedMask) + emitOperand(SpvLiteralInteger::from32((uint32_t)alignment)); + if (memoryAccessMask & SpvMemoryAccessMakePointerVisibleMask) + emitOperand( + emitIntConstant((IRIntegerValue)memoryScope, builder.getIntType())); } }); } - SpvInst* emitStore(SpvInstParent* parent, IRStore* inst) + SpvInst* emitLoad(SpvInstParent* parent, IRLoad* inst) { - auto ptrType = as<IRPtrTypeBase>(inst->getPtr()->getDataType()); - if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) == - SpvStorageClassPhysicalStorageBuffer) - { - IRSizeAndAlignment sizeAndAlignment; - if (auto alignedAttr = inst->findAttr<IRAlignedAttr>()) - { - sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment()); - } - else - { - getNaturalSizeAndAlignment( - m_targetProgram->getOptionSet(), - ptrType->getValueType(), - &sizeAndAlignment); - } - return emitOpStoreAligned( - parent, - inst, - inst->getPtr(), - inst->getVal(), - SpvLiteralInteger::from32(sizeAndAlignment.alignment)); - } - else - { - return emitStoreMaybeCoherent(parent, inst); - } + return emitLoad(parent, inst, inst->getPtr()); } - SpvInst* emitStoreMaybeCoherent(SpvInstParent* parent, IRInst* inst) + SpvInst* emitStore(SpvInstParent* parent, IRInst* inst, IRInst* ptr, IRInst* val) { - IRBuilder builder{inst}; - builder.setInsertBefore(inst); - - SpvInst* deviceScope = nullptr; - IRInst* pointer = inst->getOperand(0); - IRInst* object = inst->getOperand(1); - - bool coherentPointer = NeedToUseCoherentLoadOrStore(pointer); - if (coherentPointer) - { - requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope); - deviceScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); - } + requireVariableBufferCapabilityIfNeeded(inst->getDataType()); + IRBuilder builder(inst); + int memoryAccessMask = 0; + int alignment = -1; + MemoryScope memoryScope{}; + getMemoryAccessOperandsOfLoadStore<MemoryAccessType::Store>( + inst, + ptr, + memoryAccessMask, + alignment, + memoryScope); return emitInstCustomOperandFunc( parent, inst, SpvOpStore, [&]() { - emitOperand(pointer); - emitOperand(object); - - if (coherentPointer) + emitOperand(ptr); + emitOperand(val); + if (memoryAccessMask) { - emitOperand( - SpvMemoryAccessMakePointerAvailableMask | - SpvMemoryAccessNonPrivatePointerMask); - - emitOperand(deviceScope); + emitOperand(SpvLiteralInteger::from32(memoryAccessMask)); + if (memoryAccessMask & SpvMemoryAccessAlignedMask) + emitOperand(SpvLiteralInteger::from32((uint32_t)alignment)); + if (memoryAccessMask & SpvMemoryAccessMakePointerAvailableMask) + emitOperand( + emitIntConstant((IRIntegerValue)memoryScope, builder.getIntType())); } }); } + SpvInst* emitStore(SpvInstParent* parent, IRStore* inst) + { + return emitStore(parent, inst, inst->getPtr(), inst->getVal()); + } SpvInst* emitSwizzledStore(SpvInstParent* parent, IRSwizzledStore* inst) { @@ -8613,6 +8637,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex SpvInst* emitDebugValue(SpvInstParent* parent, IRDebugValue* debugValue) { + auto debugVar = debugValue->getDebugVar(); + auto debugValueVal = debugValue->getValue(); // We are asked to update the value for a debug variable. // A debug variable is already emited as a OpDebugVariable + // OpVariable + OpDebugDeclare. We only need to store the new value @@ -8628,7 +8654,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex // variable. If it doesn't, we can't emit a store. // List<IRInst*> irAccessChain; - auto rootVar = getRootAddr(debugValue->getDebugVar(), irAccessChain); + auto rootVar = getRootAddr(debugVar, irAccessChain); SpvInst* spvDebugVar = nullptr; if (!m_mapIRInstToSpvInst.tryGetValue(rootVar, spvDebugVar)) return nullptr; @@ -8644,7 +8670,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex // be fully static. We will skip emitting the debug inst if the access chain // isn't static. // - auto type = unwrapAttributedType(debugValue->getDebugVar()->getDataType()); + auto type = unwrapAttributedType(debugVar->getDataType()); List<SpvInst*> accessChain; bool isConstAccessChain = translateIRAccessChain(builder, type, irAccessChain, accessChain); @@ -8657,7 +8683,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex m_voidType, getNonSemanticDebugInfoExtInst(), rootVar, - debugValue->getValue(), + debugValueVal, getDwarfExpr(), accessChain); } @@ -8669,7 +8695,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex // The ordinary case is the debug variable has a backing ordinary variable. // We can simply emit a store into the backing variable for the DebugValue operation. // - return emitStoreMaybeCoherent(parent, debugValue); + return emitStore(parent, debugValue, debugVar, debugValueVal); } IRInst* getName(IRInst* inst) diff --git a/source/slang/slang-ir-defer-buffer-load.cpp b/source/slang/slang-ir-defer-buffer-load.cpp index 4736b4e65..3c8a9f4c7 100644 --- a/source/slang/slang-ir-defer-buffer-load.cpp +++ b/source/slang/slang-ir-defer-buffer-load.cpp @@ -151,9 +151,19 @@ struct DeferBufferLoadContext void deferBufferLoadInst(IRBuilder& builder, List<IRInst*>& workList, IRInst* loadInst) { + bool failDueToAttributeFound = false; + for (auto attr : loadInst->getAllAttrs()) + { + if (as<IRAlignedAttr>(attr) || as<IRMemoryScopeAttr>(attr)) + { + failDueToAttributeFound = true; + break; + } + } + // Don't defer the load anymore if the type is simple. - if (!isTypePreferrableToDeferLoad(codeGenContext, loadInst->getDataType()) || - loadInst->findAttr<IRAlignedAttr>()) + if (failDueToAttributeFound || + !isTypePreferrableToDeferLoad(codeGenContext, loadInst->getDataType())) { return; } diff --git a/source/slang/slang-ir-insts-stable-names.lua b/source/slang/slang-ir-insts-stable-names.lua index fefc7a956..a34dc346a 100644 --- a/source/slang/slang-ir-insts-stable-names.lua +++ b/source/slang/slang-ir-insts-stable-names.lua @@ -679,4 +679,5 @@ return { ["CastResourceToDescriptorHandle"] = 675, ["SymbolAlias"] = 676, ["Decoration.InParamProxyVar"] = 677, + ["Attr.MemoryScope"] = 678, } diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 5c27d5e25..2255afc67 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -2191,6 +2191,13 @@ struct IRAlignedAttr : IRAttr }; FIDDLE() +struct IRMemoryScopeAttr : IRAttr +{ + FIDDLE(leafInst()) + IRInst* getMemoryScope() { return getOperand(0); } +}; + +FIDDLE() struct IRLoad : IRInst { FIDDLE(leafInst()) @@ -2242,6 +2249,17 @@ struct IRAtomicStore : IRAtomicOperation }; FIDDLE() +struct IRAtomicExchange : IRAtomicOperation +{ + FIDDLE(leafInst()) + IRUse ptr; + IRUse val; + + IRInst* getPtr() { return ptr.get(); } + IRInst* getVal() { return val.get(); } +}; + +FIDDLE() struct IRRWStructuredBufferStore : IRInst { FIDDLE(leafInst()) @@ -4365,7 +4383,7 @@ public: IRInst* emitLoad(IRType* type, IRInst* ptr); IRInst* emitLoad(IRType* type, IRInst* ptr, IRInst* align); - IRInst* emitLoad(IRType* type, IRInst* ptr, IRAlignedAttr* align); + IRInst* emitLoad(IRType* type, IRInst* ptr, ArrayView<IRInst*> attributes); IRInst* emitLoad(IRInst* ptr); IRInst* emitLoadReverseGradient(IRType* type, IRInst* diffValue); @@ -4375,6 +4393,7 @@ public: IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal); IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align); + IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align, IRInst* memoryScope); IRInst* emitAtomicStore(IRInst* dstPtr, IRInst* srcVal, IRInst* memoryOrder); diff --git a/source/slang/slang-ir-insts.lua b/source/slang/slang-ir-insts.lua index a4bb4a6f2..e21fc86ae 100644 --- a/source/slang/slang-ir-insts.lua +++ b/source/slang/slang-ir-insts.lua @@ -2018,6 +2018,7 @@ local insts = { }, }, { Aligned = { struct_name = "AlignedAttr", min_operands = 1 } }, + { MemoryScope = { struct_name = "MemoryScopeAttr", min_operands = 1 } }, { SemanticAttr = { { userSemantic = { struct_name = "UserSemanticAttr", min_operands = 2 } }, diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp index 3b1a731f9..0308b50c2 100644 --- a/source/slang/slang-ir-redundancy-removal.cpp +++ b/source/slang/slang-ir-redundancy-removal.cpp @@ -416,6 +416,47 @@ static IRInst* _getRootVar(IRInst* inst) return inst; } +// 0 is the most broad scope +static int getMemoryScopeOrder(MemoryScope scope) +{ + switch (scope) + { + case MemoryScope::CrossDevice: + return 7; + case MemoryScope::Device: + return 6; + case MemoryScope::QueueFamily: + // https://docs.vulkan.org/spec/latest/chapters/shaders.html#shaders-scope-queue-family + return 5; + case MemoryScope::ShaderCall: + // https://docs.vulkan.org/spec/latest/chapters/shaders.html#shaders-scope-shadercall + return 4; + case MemoryScope::Workgroup: + return 3; + case MemoryScope::Subgroup: + return 2; + case MemoryScope::Invocation: + default: + return 1; + } +} + +// Returns if MemoryScope x is a sub-set of y +static bool isMemoryScopeSubsetOf(MemoryScope x, MemoryScope y) +{ + return getMemoryScopeOrder(x) <= getMemoryScopeOrder(y); +} + +// Inst's are relative to a memory scope, get that memory scope. +static MemoryScope getMemoryScopeOfLoadStore(IRInst* inst) +{ + SLANG_ASSERT(as<IRLoad>(inst) || as<IRStore>(inst)); + auto memoryScope = inst->findAttr<IRMemoryScopeAttr>(); + if (!memoryScope) + return MemoryScope::Invocation; + return (MemoryScope)getIntVal(memoryScope->getMemoryScope()); +} + bool tryRemoveRedundantStore(IRGlobalValueWithCode* func, IRStore* store) { // We perform a quick and conservative check: @@ -473,15 +514,18 @@ bool tryRemoveRedundantStore(IRGlobalValueWithCode* func, IRStore* store) } } - // A store can be removed if there are subsequent stores to the same variable, + // This store can be removed if there are subsequent stores to the same variable, // and there are no insts in between the stores that can read the variable. - + // Additionally, MemoryScope of the `store` must be a sub-set of `nextStore`, + // otherwise we can not be certain that `nextStore` completely overwrites `store`. + MemoryScope memoryScopeOfStore = getMemoryScopeOfLoadStore(store); HashSet<IRBlock*> visitedBlocks; for (auto next = store->getNextInst(); next;) { if (auto nextStore = as<IRStore>(next)) { - if (nextStore->getPtr() == store->getPtr()) + if (nextStore->getPtr() == store->getPtr() && + isMemoryScopeSubsetOf(memoryScopeOfStore, getMemoryScopeOfLoadStore(nextStore))) { hasOverridingStore = true; break; @@ -585,13 +629,21 @@ bool tryRemoveRedundantLoad(IRGlobalValueWithCode* func, IRLoad* load) { bool changed = false; - // If the load is preceeded by a store without any side-effect insts - // in-between, remove the load. + // Get the memory scope we are operating on. + MemoryScope memoryScopeOfLoad = getMemoryScopeOfLoadStore(load); + + // We can replace a load with a `Store->getVal()` if that store is a super-set + // memory scope to our load. + // Ex 1: Store into Workgroup, load from Invocation. Load will be equal to the Store. + // + // Ex 2: Store into Invocation, load from Workgroup. Load may/may-not be equal to the Store + // since the cache managing the Workgroup scope may contain different data than the invocation. for (auto prev = load->getPrevInst(); prev; prev = prev->getPrevInst()) { if (auto store = as<IRStore>(prev)) { - if (store->getPtr() == load->getPtr()) + if (store->getPtr() == load->getPtr() && + isMemoryScopeSubsetOf(memoryScopeOfLoad, getMemoryScopeOfLoadStore(store))) { auto value = store->getVal(); load->replaceUsesWith(value); diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index 7b7d5ec17..8371d6ef5 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -5212,18 +5212,20 @@ IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, IRInst* align) return inst; } -IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, IRAlignedAttr* align) +IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, ArrayView<IRInst*> attributes) { - if (align) - { - auto inst = createInst<IRLoad>(this, kIROp_Load, type, ptr, align); - addInst(inst); - return inst; - } - else - { - return emitLoad(type, ptr); - } + ShortList<IRInst*> params; + params.add(ptr); + params.addRange(attributes); + auto inst = createInst<IRLoad>( + this, + kIROp_Load, + type, + params.getCount(), + params.getArrayView().getBuffer()); + + addInst(inst); + return inst; } IRInst* IRBuilder::emitLoad(IRInst* ptr) @@ -5279,6 +5281,21 @@ IRInst* IRBuilder::emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align) return inst; } +IRInst* IRBuilder::emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align, IRInst* memoryScope) +{ + auto inst = createInst<IRStore>( + this, + kIROp_Store, + nullptr, + dstPtr, + srcVal, + getAttr(kIROp_AlignedAttr, align), + getAttr(kIROp_MemoryScopeAttr, memoryScope)); + + addInst(inst); + return inst; +} + IRInst* IRBuilder::emitAtomicStore(IRInst* dstPtr, IRInst* srcVal, IRInst* memoryOrder) { auto inst = createInst<IRAtomicStore>( diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h index 54bf23754..4f9941946 100644 --- a/source/slang/slang-ir.h +++ b/source/slang/slang-ir.h @@ -2431,7 +2431,7 @@ public: // anything to do with serialization format // const static UInt k_minSupportedModuleVersion = 1; - const static UInt k_maxSupportedModuleVersion = 1; + const static UInt k_maxSupportedModuleVersion = 2; static_assert(k_minSupportedModuleVersion <= k_maxSupportedModuleVersion); private: |
