summaryrefslogtreecommitdiffstats
path: root/source/slang
diff options
context:
space:
mode:
author16-Bit-Dog <67922228+16-Bit-Dog@users.noreply.github.com>2025-10-10 13:09:24 -0400
committerGitHub <noreply@github.com>2025-10-10 17:09:24 +0000
commit1e0908bd7107dfbdac912b693c3ab9bd6e1dc8b3 (patch)
treecc39d2e18abc954fb76f9a54b11a8d492685c6e2 /source/slang
parentb4023f715885ada9a2777ea3b0d6d9739860b39b (diff)
Addition of `Load`/`Store` coherent operations (#8395)
Fixes: https://github.com/shader-slang/slang/issues/7634 Duplicate of PR https://github.com/shader-slang/slang/pull/8052 Primary Changes: * Added `storeCoherent` and `loadCoherent` for coherent load/store via pointers. This is backed by `IRMemoryScopeAttr` which is an `IRAttr` attached to `IRLoad` and `IRStore` * Logic in `source\slang\slang-emit-spirv.cpp` for load/store emitting has been reworked to be less messy and more maintainable * Add to `hlsl.meta.slang` coop vector and coop matrix coherent load/store operations Secondary Changes: * Added a missing load/store test for coop matrix: `tests\cooperative-matrix\load-store-pointer.slang` --------- Co-authored-by: ArielG-NV <aglasroth@nvidia.com> Co-authored-by: ArielG-NV <159081215+ArielG-NV@users.noreply.github.com> Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> Co-authored-by: Nathan V. Morrical <natemorrical@gmail.com>
Diffstat (limited to 'source/slang')
-rw-r--r--source/slang/core.meta.slang43
-rw-r--r--source/slang/hlsl.meta.slang99
-rw-r--r--source/slang/slang-emit-spirv-ops.h43
-rw-r--r--source/slang/slang-emit-spirv.cpp262
-rw-r--r--source/slang/slang-ir-defer-buffer-load.cpp14
-rw-r--r--source/slang/slang-ir-insts-stable-names.lua1
-rw-r--r--source/slang/slang-ir-insts.h21
-rw-r--r--source/slang/slang-ir-insts.lua1
-rw-r--r--source/slang/slang-ir-redundancy-removal.cpp64
-rw-r--r--source/slang/slang-ir.cpp39
-rw-r--r--source/slang/slang-ir.h2
11 files changed, 404 insertions, 185 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 5ffab1f9c..9b55dc35a 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -1368,19 +1368,20 @@ struct Ptr<
__intrinsic_op($(kIROp_GetOffsetPtr))
[nonmutating]
+ [__NoSideEffect]
ref;
}
};
//@hidden:
__intrinsic_op($(kIROp_AlignedAttr))
-void __align_attr(int alignment);
+internal int __align_attr(int alignment);
__intrinsic_op($(kIROp_Load))
-T __load_aligned<T, U>(T* ptr, U alignmentAttr);
+internal T __load_aligned<T>(T* ptr, int alignmentAttr);
__intrinsic_op($(kIROp_Store))
-void __store_aligned<T, U>(T* ptr, T value, U alignmentAttr);
+internal void __store_aligned<T>(T* ptr, T value, int alignmentAttr);
//@public:
@@ -1413,6 +1414,42 @@ void storeAligned<int alignment, T>(T* ptr, T value)
__store_aligned(ptr, value, __align_attr(alignment));
}
+//@hidden:
+__intrinsic_op($(kIROp_MemoryScopeAttr))
+internal int __memoryscope_attr(MemoryScope scope);
+
+__intrinsic_op($(kIROp_Load))
+internal T __load_coherent<T, Access access, AddressSpace addrSpace>(Ptr<T, access, addrSpace> ptr, int alignmentAttr, int memoryScopeAttr);
+
+__intrinsic_op($(kIROp_Store))
+internal void __store_coherent<T, AddressSpace addrSpace>(Ptr<T, Access::ReadWrite, addrSpace> ptr, T value, int alignmentAttr, int memoryScopeAttr);
+
+/// Store a value coherently to a memoryscope.
+/// Tighter memory scopes may be faster to operate on.
+/// @param ptr The pointer to store value to.
+/// @param value The value to store.
+///
+[require(SPV_KHR_vulkan_memory_model)]
+[ForceInline]
+__generic<int alignment, MemoryScope scope, T, AddressSpace addrSpace>
+void storeCoherent(Ptr<T, Access::ReadWrite, addrSpace> ptr, T value)
+{
+ __store_coherent<T, addrSpace>(ptr, value, __align_attr(alignment), __memoryscope_attr(scope));
+}
+
+/// Load a value coherently to a memoryscope.
+/// Tighter memory scopes may be faster to operate on.
+/// @param ptr The pointer to load from.
+///
+[require(SPV_KHR_vulkan_memory_model)]
+[ForceInline]
+[__NoSideEffect]
+__generic<int alignment, MemoryScope scope, T, Access access, AddressSpace addrSpace>
+T loadCoherent(Ptr<T, access, addrSpace> ptr)
+{
+ return __load_coherent<T, access, addrSpace>(ptr, __align_attr(alignment), __memoryscope_attr(scope));
+}
+
${{{
StringBuilder ptrTypeParameterListBuilder;
ptrTypeParameterListBuilder << "T, Access access, AddressSpace addrSpace";
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 73bdee96e..824a06000 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -23239,6 +23239,18 @@ extension<T, L : IBufferDataLayout> RasterizerOrderedStructuredBuffer<T, L> : IR
int getCount() { uint count; uint stride; this.GetDimensions(count, stride); return count; }
}
+[require(vk_mem_model)]
+internal void enableVMMDeviceScopeCapabilityIfNeeded(constexpr MemoryScope memoryScope)
+{
+ if (memoryScope == MemoryScope::Device)
+ {
+ spirv_asm
+ {
+ OpCapability VulkanMemoryModelDeviceScopeKHR;
+ };
+ }
+}
+
namespace linalg
{
@@ -23813,6 +23825,22 @@ struct CoopMat
};
}
+ // TODO: make this function an intrinsic and support all types via the single intrinsic
+ [require(cooperative_matrix, vk_mem_model)]
+ void StoreCoherent<
+ let matrixLayout : CoopMatMatrixLayout
+ >(T* buffer, uint element, uint stride, constexpr MemoryScope memoryScope)
+ {
+ enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+ let alignment = 16U;
+ const int32_t scope = (int32_t)memoryScope;
+ return spirv_asm
+ {
+ %pointer:$$T* = OpPtrAccessChain $buffer $element;
+ OpCooperativeMatrixStoreKHR %pointer $this $matrixLayout $stride Aligned|MakePointerAvailable|NonPrivatePointer !alignment $scope;
+ };
+ }
+
[ForceInline]
[require(cooperative_matrix)]
void Store<
@@ -23924,6 +23952,24 @@ ${{{{
};
}
+ // TODO: make this function an intrinsic and support all types via the single intrinsic
+ [ForceInline]
+ [__NoSideEffect]
+ [require(cooperative_matrix, vk_mem_model)]
+ static This LoadCoherent<
+ let matrixLayout : CoopMatMatrixLayout
+ >(T* buffer, uint element, uint stride, constexpr MemoryScope memoryScope)
+ {
+ enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+ let alignment = 16U;
+ const int32_t scope = (int32_t)memoryScope;
+ return spirv_asm
+ {
+ %pointer:$$T* = OpPtrAccessChain $buffer $element;
+ result:$$CoopMat<T, S, M, N, R> = OpCooperativeMatrixLoadKHR %pointer $matrixLayout $stride Aligned|MakePointerVisible|NonPrivatePointer !alignment $scope;
+ };
+ }
+
[ForceInline]
[require(cooperative_matrix)]
static This Load<
@@ -24480,6 +24526,24 @@ CoopMat<T, S, M, N, R> coopMatLoad<
}
[ForceInline]
+[require(cooperative_matrix, vk_mem_model)]
+CoopMat<T, S, M, N, R> coopMatLoadCoherent<
+ T : __BuiltinArithmeticType,
+ let S : MemoryScope,
+ let M : int,
+ let N : int,
+ let R : CoopMatMatrixUse,
+ let matrixLayout : CoopMatMatrixLayout
+>(
+ T* buffer,
+ uint element,
+ uint stride,
+ constexpr MemoryScope memoryScope)
+{
+ return CoopMat<T, S, M, N, R>.LoadCoherent<matrixLayout>(buffer, element, stride, memoryScope);
+}
+
+[ForceInline]
[require(cooperative_matrix)]
CoopMat<T, S, M, N, R> coopMatLoad<
T : __BuiltinArithmeticType,
@@ -24845,6 +24909,20 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti
};
}
+ // TODO: make this function an intrinsic and support all types via the single intrinsic
+ [require(cooperative_vector, vk_mem_model)]
+ void storeCoherent(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device)
+ {
+ enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+ let pointer = Ptr<T[]>(buffer);
+ let alignment = 16U;
+ const int32_t scope = (int32_t)memoryScope;
+ spirv_asm
+ {
+ OpCooperativeVectorStoreNV $pointer $byteOffset16ByteAligned $this Aligned|MakePointerAvailable|NonPrivatePointer !alignment $scope;
+ };
+ }
+
[ForceInline]
[require(cooperative_vector)]
[require(hlsl_coopvec_poc)]
@@ -25017,6 +25095,20 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti
};
}
+ // TODO: make this function an intrinsic and support all types via the single intrinsic
+ [require(cooperative_vector, vk_mem_model)]
+ static CoopVec<T, N> loadCoherent(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device)
+ {
+ enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+ let pointer = Ptr<T[]>(buffer);
+ let alignment = 16U;
+ const int32_t scope = (int32_t)memoryScope;
+ return spirv_asm
+ {
+ result:$$CoopVec<T, N> = OpCooperativeVectorLoadNV $pointer $byteOffset16ByteAligned Aligned|MakePointerVisible|NonPrivatePointer !alignment $scope;
+ };
+ }
+
// Groupshared
[ForceInline]
[__NoSideEffect]
@@ -26457,6 +26549,13 @@ CoopVec<T, N> coopVecLoad<let N : int, T : __BuiltinArithmeticType>(T* buffer, i
return CoopVec<T, N>.load(buffer, byteOffset16ByteAligned);
}
+[ForceInline]
+[require(spirv, cooperative_vector, vk_mem_model)]
+CoopVec<T, N> coopVecLoadCoherent<let N : int, T : __BuiltinArithmeticType>(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device)
+{
+ return CoopVec<T, N>.loadCoherent(buffer, byteOffset16ByteAligned, memoryScope);
+}
+
// Groupshared
[ForceInline]
[require(cooperative_vector)]
diff --git a/source/slang/slang-emit-spirv-ops.h b/source/slang/slang-emit-spirv-ops.h
index a5e4d730a..da9058b62 100644
--- a/source/slang/slang-emit-spirv-ops.h
+++ b/source/slang/slang-emit-spirv-ops.h
@@ -600,28 +600,6 @@ SpvInst* emitOpLoad(
return emitInst(parent, inst, SpvOpLoad, idResultType, kResultID, pointer, memoryAccess);
}
-// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpLoad
-template<typename T1, typename T2>
-SpvInst* emitOpLoadAligned(
- SpvInstParent* parent,
- IRInst* inst,
- const T1& idResultType,
- const T2& pointer,
- const SpvLiteralInteger& literalInteger)
-{
- static_assert(isSingular<T1>);
- static_assert(isSingular<T2>);
- return emitInst(
- parent,
- inst,
- SpvOpLoad,
- idResultType,
- kResultID,
- pointer,
- SpvMemoryAccessAlignedMask,
- literalInteger);
-}
-
// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpStore
template<typename T1, typename T2>
SpvInst* emitOpStore(
@@ -636,27 +614,6 @@ SpvInst* emitOpStore(
return emitInst(parent, inst, SpvOpStore, pointer, object, memoryAccess);
}
-// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpStore
-template<typename T1, typename T2>
-SpvInst* emitOpStoreAligned(
- SpvInstParent* parent,
- IRInst* inst,
- const T1& pointer,
- const T2& object,
- const SpvLiteralInteger& literalInteger)
-{
- static_assert(isSingular<T1>);
- static_assert(isSingular<T2>);
- return emitInst(
- parent,
- inst,
- SpvOpStore,
- pointer,
- object,
- SpvMemoryAccessAlignedMask,
- literalInteger);
-}
-
// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpAccessChain
template<typename T1, typename T2, typename Ts>
SpvInst* emitOpAccessChain(
diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp
index 3a8a913ec..8bcd1429f 100644
--- a/source/slang/slang-emit-spirv.cpp
+++ b/source/slang/slang-emit-spirv.cpp
@@ -4569,33 +4569,37 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
break;
case kIROp_AtomicLoad:
{
- IRBuilder builder{inst};
- if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType()))
+ IRAtomicLoad* atomicLoad = as<IRAtomicLoad>(inst);
+ auto ptr = atomicLoad->getPtr();
+ IRBuilder builder{atomicLoad};
+ if (isAtomicableAddressSpace(ptr->getDataType()))
{
if (m_memoryModel == SpvMemoryModelVulkan)
requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
const auto memoryScope =
emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
- const auto memorySemantics =
- emitMemorySemanticMask(inst->getOperand(1), inst->getOperand(0));
+ const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1), ptr);
result = emitOpAtomicLoad(
parent,
inst,
inst->getFullType(),
- inst->getOperand(0),
+ ptr,
memoryScope,
memorySemantics);
ensureAtomicCapability(inst, SpvOpAtomicLoad);
}
else
{
- result = emitLoadMaybeCoherent(parent, inst);
+ result = emitLoad(parent, inst, ptr);
}
}
break;
case kIROp_AtomicStore:
{
+ IRAtomicStore* atomicStore = as<IRAtomicStore>(inst);
+ auto ptr = atomicStore->getPtr();
+ auto val = atomicStore->getVal();
IRBuilder builder{inst};
if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType()))
{
@@ -4604,48 +4608,44 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
const auto memoryScope =
emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
- const auto memorySemantics =
- emitMemorySemanticMask(inst->getOperand(2), inst->getOperand(0));
- result = emitOpAtomicStore(
- parent,
- inst,
- inst->getOperand(0),
- memoryScope,
- memorySemantics,
- inst->getOperand(1));
+ const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr);
+ result =
+ emitOpAtomicStore(parent, inst, ptr, memoryScope, memorySemantics, val);
ensureAtomicCapability(inst, SpvOpAtomicStore);
}
else
{
- result = emitStoreMaybeCoherent(parent, inst);
+ result = emitStore(parent, inst, ptr, val);
}
}
break;
case kIROp_AtomicExchange:
{
+ IRAtomicExchange* atomicExchange = as<IRAtomicExchange>(inst);
+ auto ptr = atomicExchange->getPtr();
+ auto val = atomicExchange->getOperand(1);
IRBuilder builder{inst};
- if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType()))
+ if (isAtomicableAddressSpace(ptr->getDataType()))
{
if (m_memoryModel == SpvMemoryModelVulkan)
requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
const auto memoryScope =
emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
- const auto memorySemantics =
- emitMemorySemanticMask(inst->getOperand(2), inst->getOperand(0));
+ const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr);
result = emitOpAtomicExchange(
parent,
inst,
inst->getFullType(),
- inst->getOperand(0),
+ ptr,
memoryScope,
memorySemantics,
- inst->getOperand(1));
+ val);
ensureAtomicCapability(inst, SpvOpAtomicExchange);
}
else
{
- result = emitStoreMaybeCoherent(parent, inst);
+ result = emitStore(parent, inst, ptr, val);
}
}
break;
@@ -7082,6 +7082,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
SpvInst* emitGetOffsetPtr(SpvInstParent* parent, IRInst* inst)
{
+ requireVariableBufferCapabilityIfNeeded(inst->getDataType());
+
return emitOpPtrAccessChain(
parent,
inst,
@@ -7174,54 +7176,100 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
}
}
- SpvInst* emitLoad(SpvInstParent* parent, IRLoad* inst)
+ enum class MemoryAccessType
{
- requireVariableBufferCapabilityIfNeeded(inst->getDataType());
+ Load,
+ Store
+ };
+
+ template<MemoryAccessType memoryAccessType>
+ void getMemoryAccessOperandsOfLoadStore(
+ IRInst* inst,
+ IRInst* ptr,
+ int& memoryAccessMaskOut,
+ int& alignmentOut,
+ MemoryScope& memoryScopeOut)
+ {
+ IRAlignedAttr* alignedAttr = nullptr;
+ IRMemoryScopeAttr* memoryScopeAttr = nullptr;
- auto ptrType = as<IRPtrTypeBase>(inst->getPtr()->getDataType());
- if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) ==
- SpvStorageClassPhysicalStorageBuffer)
+ for (auto attr : inst->getAllAttrs())
{
- IRSizeAndAlignment sizeAndAlignment;
- if (auto alignedAttr = inst->findAttr<IRAlignedAttr>())
+ if (auto foundAlignedAttr = as<IRAlignedAttr>(attr))
+ alignedAttr = foundAlignedAttr;
+ else if (auto foundMemoryScopeAttr = as<IRMemoryScopeAttr>(attr))
+ memoryScopeAttr = foundMemoryScopeAttr;
+ }
+
+ // Determine coherence
+ {
+ bool isCoherent = false;
+ if (memoryScopeAttr)
{
- sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment());
+ memoryScopeOut = (MemoryScope)getIntVal(memoryScopeAttr->getMemoryScope());
+ if (m_memoryModel != SpvMemoryModelVulkan)
+ SLANG_ASSERT_FAILURE(
+ "Explicit coherent operations require vulkan-memory-model, "
+ "specify the capability 'vk_mem_model'");
+ isCoherent = true;
}
else
{
- getNaturalSizeAndAlignment(
- m_targetProgram->getOptionSet(),
- ptrType->getValueType(),
- &sizeAndAlignment);
+ if (NeedToUseCoherentLoadOrStore(ptr))
+ {
+ memoryScopeOut = MemoryScope::Device;
+ isCoherent = true;
+ }
+ }
+ if (isCoherent)
+ {
+
+ memoryAccessMaskOut |= SpvMemoryAccessNonPrivatePointerMask;
+ if constexpr (memoryAccessType == MemoryAccessType::Load)
+ memoryAccessMaskOut |= SpvMemoryAccessMakePointerVisibleMask;
+ else
+ memoryAccessMaskOut |= SpvMemoryAccessMakePointerAvailableMask;
+ if (memoryScopeOut == MemoryScope::Device)
+ requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
}
- return emitOpLoadAligned(
- parent,
- inst,
- inst->getDataType(),
- inst->getPtr(),
- SpvLiteralInteger::from32(sizeAndAlignment.alignment));
}
- else
+
+ // Determine alignment
{
- return emitLoadMaybeCoherent(parent, inst);
+ auto ptrType = as<IRPtrTypeBase>(ptr->getDataType());
+ if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) ==
+ SpvStorageClassPhysicalStorageBuffer)
+ {
+ IRSizeAndAlignment sizeAndAlignment;
+ if (alignedAttr)
+ sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment());
+ else
+ getNaturalSizeAndAlignment(
+ m_targetProgram->getOptionSet(),
+ ptrType->getValueType(),
+ &sizeAndAlignment);
+
+ alignmentOut = sizeAndAlignment.alignment;
+ if (alignmentOut != -1)
+ memoryAccessMaskOut |= SpvMemoryAccessAlignedMask;
+ }
}
}
- SpvInst* emitLoadMaybeCoherent(SpvInstParent* parent, IRInst* inst)
+ SpvInst* emitLoad(SpvInstParent* parent, IRInst* inst, IRInst* ptr)
{
- IRBuilder builder{inst};
- builder.setInsertBefore(inst);
-
- SpvInst* deviceScope = nullptr;
- IRInst* pointer = inst->getOperand(0);
-
- bool coherentPointer = NeedToUseCoherentLoadOrStore(pointer);
- if (coherentPointer)
- {
- requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
- deviceScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
- }
+ requireVariableBufferCapabilityIfNeeded(inst->getDataType());
+ IRBuilder builder(inst);
+ int memoryAccessMask = 0;
+ int alignment = -1;
+ MemoryScope memoryScope{};
+ getMemoryAccessOperandsOfLoadStore<MemoryAccessType::Load>(
+ inst,
+ ptr,
+ memoryAccessMask,
+ alignment,
+ memoryScope);
return emitInstCustomOperandFunc(
parent,
inst,
@@ -7230,85 +7278,61 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
{
emitOperand(inst->getFullType());
emitOperand(kResultID);
- emitOperand(pointer);
-
- if (coherentPointer)
+ emitOperand(ptr);
+ if (memoryAccessMask)
{
- emitOperand(
- SpvMemoryAccessMakePointerVisibleMask |
- SpvMemoryAccessNonPrivatePointerMask);
-
- emitOperand(deviceScope);
+ emitOperand(SpvLiteralInteger::from32(memoryAccessMask));
+ if (memoryAccessMask & SpvMemoryAccessAlignedMask)
+ emitOperand(SpvLiteralInteger::from32((uint32_t)alignment));
+ if (memoryAccessMask & SpvMemoryAccessMakePointerVisibleMask)
+ emitOperand(
+ emitIntConstant((IRIntegerValue)memoryScope, builder.getIntType()));
}
});
}
- SpvInst* emitStore(SpvInstParent* parent, IRStore* inst)
+ SpvInst* emitLoad(SpvInstParent* parent, IRLoad* inst)
{
- auto ptrType = as<IRPtrTypeBase>(inst->getPtr()->getDataType());
- if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) ==
- SpvStorageClassPhysicalStorageBuffer)
- {
- IRSizeAndAlignment sizeAndAlignment;
- if (auto alignedAttr = inst->findAttr<IRAlignedAttr>())
- {
- sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment());
- }
- else
- {
- getNaturalSizeAndAlignment(
- m_targetProgram->getOptionSet(),
- ptrType->getValueType(),
- &sizeAndAlignment);
- }
- return emitOpStoreAligned(
- parent,
- inst,
- inst->getPtr(),
- inst->getVal(),
- SpvLiteralInteger::from32(sizeAndAlignment.alignment));
- }
- else
- {
- return emitStoreMaybeCoherent(parent, inst);
- }
+ return emitLoad(parent, inst, inst->getPtr());
}
- SpvInst* emitStoreMaybeCoherent(SpvInstParent* parent, IRInst* inst)
+ SpvInst* emitStore(SpvInstParent* parent, IRInst* inst, IRInst* ptr, IRInst* val)
{
- IRBuilder builder{inst};
- builder.setInsertBefore(inst);
-
- SpvInst* deviceScope = nullptr;
- IRInst* pointer = inst->getOperand(0);
- IRInst* object = inst->getOperand(1);
-
- bool coherentPointer = NeedToUseCoherentLoadOrStore(pointer);
- if (coherentPointer)
- {
- requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
- deviceScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
- }
+ requireVariableBufferCapabilityIfNeeded(inst->getDataType());
+ IRBuilder builder(inst);
+ int memoryAccessMask = 0;
+ int alignment = -1;
+ MemoryScope memoryScope{};
+ getMemoryAccessOperandsOfLoadStore<MemoryAccessType::Store>(
+ inst,
+ ptr,
+ memoryAccessMask,
+ alignment,
+ memoryScope);
return emitInstCustomOperandFunc(
parent,
inst,
SpvOpStore,
[&]()
{
- emitOperand(pointer);
- emitOperand(object);
-
- if (coherentPointer)
+ emitOperand(ptr);
+ emitOperand(val);
+ if (memoryAccessMask)
{
- emitOperand(
- SpvMemoryAccessMakePointerAvailableMask |
- SpvMemoryAccessNonPrivatePointerMask);
-
- emitOperand(deviceScope);
+ emitOperand(SpvLiteralInteger::from32(memoryAccessMask));
+ if (memoryAccessMask & SpvMemoryAccessAlignedMask)
+ emitOperand(SpvLiteralInteger::from32((uint32_t)alignment));
+ if (memoryAccessMask & SpvMemoryAccessMakePointerAvailableMask)
+ emitOperand(
+ emitIntConstant((IRIntegerValue)memoryScope, builder.getIntType()));
}
});
}
+ SpvInst* emitStore(SpvInstParent* parent, IRStore* inst)
+ {
+ return emitStore(parent, inst, inst->getPtr(), inst->getVal());
+ }
SpvInst* emitSwizzledStore(SpvInstParent* parent, IRSwizzledStore* inst)
{
@@ -8613,6 +8637,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
SpvInst* emitDebugValue(SpvInstParent* parent, IRDebugValue* debugValue)
{
+ auto debugVar = debugValue->getDebugVar();
+ auto debugValueVal = debugValue->getValue();
// We are asked to update the value for a debug variable.
// A debug variable is already emited as a OpDebugVariable +
// OpVariable + OpDebugDeclare. We only need to store the new value
@@ -8628,7 +8654,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
// variable. If it doesn't, we can't emit a store.
//
List<IRInst*> irAccessChain;
- auto rootVar = getRootAddr(debugValue->getDebugVar(), irAccessChain);
+ auto rootVar = getRootAddr(debugVar, irAccessChain);
SpvInst* spvDebugVar = nullptr;
if (!m_mapIRInstToSpvInst.tryGetValue(rootVar, spvDebugVar))
return nullptr;
@@ -8644,7 +8670,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
// be fully static. We will skip emitting the debug inst if the access chain
// isn't static.
//
- auto type = unwrapAttributedType(debugValue->getDebugVar()->getDataType());
+ auto type = unwrapAttributedType(debugVar->getDataType());
List<SpvInst*> accessChain;
bool isConstAccessChain =
translateIRAccessChain(builder, type, irAccessChain, accessChain);
@@ -8657,7 +8683,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
m_voidType,
getNonSemanticDebugInfoExtInst(),
rootVar,
- debugValue->getValue(),
+ debugValueVal,
getDwarfExpr(),
accessChain);
}
@@ -8669,7 +8695,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
// The ordinary case is the debug variable has a backing ordinary variable.
// We can simply emit a store into the backing variable for the DebugValue operation.
//
- return emitStoreMaybeCoherent(parent, debugValue);
+ return emitStore(parent, debugValue, debugVar, debugValueVal);
}
IRInst* getName(IRInst* inst)
diff --git a/source/slang/slang-ir-defer-buffer-load.cpp b/source/slang/slang-ir-defer-buffer-load.cpp
index 4736b4e65..3c8a9f4c7 100644
--- a/source/slang/slang-ir-defer-buffer-load.cpp
+++ b/source/slang/slang-ir-defer-buffer-load.cpp
@@ -151,9 +151,19 @@ struct DeferBufferLoadContext
void deferBufferLoadInst(IRBuilder& builder, List<IRInst*>& workList, IRInst* loadInst)
{
+ bool failDueToAttributeFound = false;
+ for (auto attr : loadInst->getAllAttrs())
+ {
+ if (as<IRAlignedAttr>(attr) || as<IRMemoryScopeAttr>(attr))
+ {
+ failDueToAttributeFound = true;
+ break;
+ }
+ }
+
// Don't defer the load anymore if the type is simple.
- if (!isTypePreferrableToDeferLoad(codeGenContext, loadInst->getDataType()) ||
- loadInst->findAttr<IRAlignedAttr>())
+ if (failDueToAttributeFound ||
+ !isTypePreferrableToDeferLoad(codeGenContext, loadInst->getDataType()))
{
return;
}
diff --git a/source/slang/slang-ir-insts-stable-names.lua b/source/slang/slang-ir-insts-stable-names.lua
index fefc7a956..a34dc346a 100644
--- a/source/slang/slang-ir-insts-stable-names.lua
+++ b/source/slang/slang-ir-insts-stable-names.lua
@@ -679,4 +679,5 @@ return {
["CastResourceToDescriptorHandle"] = 675,
["SymbolAlias"] = 676,
["Decoration.InParamProxyVar"] = 677,
+ ["Attr.MemoryScope"] = 678,
}
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 5c27d5e25..2255afc67 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -2191,6 +2191,13 @@ struct IRAlignedAttr : IRAttr
};
FIDDLE()
+struct IRMemoryScopeAttr : IRAttr
+{
+ FIDDLE(leafInst())
+ IRInst* getMemoryScope() { return getOperand(0); }
+};
+
+FIDDLE()
struct IRLoad : IRInst
{
FIDDLE(leafInst())
@@ -2242,6 +2249,17 @@ struct IRAtomicStore : IRAtomicOperation
};
FIDDLE()
+struct IRAtomicExchange : IRAtomicOperation
+{
+ FIDDLE(leafInst())
+ IRUse ptr;
+ IRUse val;
+
+ IRInst* getPtr() { return ptr.get(); }
+ IRInst* getVal() { return val.get(); }
+};
+
+FIDDLE()
struct IRRWStructuredBufferStore : IRInst
{
FIDDLE(leafInst())
@@ -4365,7 +4383,7 @@ public:
IRInst* emitLoad(IRType* type, IRInst* ptr);
IRInst* emitLoad(IRType* type, IRInst* ptr, IRInst* align);
- IRInst* emitLoad(IRType* type, IRInst* ptr, IRAlignedAttr* align);
+ IRInst* emitLoad(IRType* type, IRInst* ptr, ArrayView<IRInst*> attributes);
IRInst* emitLoad(IRInst* ptr);
IRInst* emitLoadReverseGradient(IRType* type, IRInst* diffValue);
@@ -4375,6 +4393,7 @@ public:
IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal);
IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align);
+ IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align, IRInst* memoryScope);
IRInst* emitAtomicStore(IRInst* dstPtr, IRInst* srcVal, IRInst* memoryOrder);
diff --git a/source/slang/slang-ir-insts.lua b/source/slang/slang-ir-insts.lua
index a4bb4a6f2..e21fc86ae 100644
--- a/source/slang/slang-ir-insts.lua
+++ b/source/slang/slang-ir-insts.lua
@@ -2018,6 +2018,7 @@ local insts = {
},
},
{ Aligned = { struct_name = "AlignedAttr", min_operands = 1 } },
+ { MemoryScope = { struct_name = "MemoryScopeAttr", min_operands = 1 } },
{
SemanticAttr = {
{ userSemantic = { struct_name = "UserSemanticAttr", min_operands = 2 } },
diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp
index 3b1a731f9..0308b50c2 100644
--- a/source/slang/slang-ir-redundancy-removal.cpp
+++ b/source/slang/slang-ir-redundancy-removal.cpp
@@ -416,6 +416,47 @@ static IRInst* _getRootVar(IRInst* inst)
return inst;
}
+// 0 is the most broad scope
+static int getMemoryScopeOrder(MemoryScope scope)
+{
+ switch (scope)
+ {
+ case MemoryScope::CrossDevice:
+ return 7;
+ case MemoryScope::Device:
+ return 6;
+ case MemoryScope::QueueFamily:
+ // https://docs.vulkan.org/spec/latest/chapters/shaders.html#shaders-scope-queue-family
+ return 5;
+ case MemoryScope::ShaderCall:
+ // https://docs.vulkan.org/spec/latest/chapters/shaders.html#shaders-scope-shadercall
+ return 4;
+ case MemoryScope::Workgroup:
+ return 3;
+ case MemoryScope::Subgroup:
+ return 2;
+ case MemoryScope::Invocation:
+ default:
+ return 1;
+ }
+}
+
+// Returns if MemoryScope x is a sub-set of y
+static bool isMemoryScopeSubsetOf(MemoryScope x, MemoryScope y)
+{
+ return getMemoryScopeOrder(x) <= getMemoryScopeOrder(y);
+}
+
+// Inst's are relative to a memory scope, get that memory scope.
+static MemoryScope getMemoryScopeOfLoadStore(IRInst* inst)
+{
+ SLANG_ASSERT(as<IRLoad>(inst) || as<IRStore>(inst));
+ auto memoryScope = inst->findAttr<IRMemoryScopeAttr>();
+ if (!memoryScope)
+ return MemoryScope::Invocation;
+ return (MemoryScope)getIntVal(memoryScope->getMemoryScope());
+}
+
bool tryRemoveRedundantStore(IRGlobalValueWithCode* func, IRStore* store)
{
// We perform a quick and conservative check:
@@ -473,15 +514,18 @@ bool tryRemoveRedundantStore(IRGlobalValueWithCode* func, IRStore* store)
}
}
- // A store can be removed if there are subsequent stores to the same variable,
+ // This store can be removed if there are subsequent stores to the same variable,
// and there are no insts in between the stores that can read the variable.
-
+ // Additionally, MemoryScope of the `store` must be a sub-set of `nextStore`,
+ // otherwise we can not be certain that `nextStore` completely overwrites `store`.
+ MemoryScope memoryScopeOfStore = getMemoryScopeOfLoadStore(store);
HashSet<IRBlock*> visitedBlocks;
for (auto next = store->getNextInst(); next;)
{
if (auto nextStore = as<IRStore>(next))
{
- if (nextStore->getPtr() == store->getPtr())
+ if (nextStore->getPtr() == store->getPtr() &&
+ isMemoryScopeSubsetOf(memoryScopeOfStore, getMemoryScopeOfLoadStore(nextStore)))
{
hasOverridingStore = true;
break;
@@ -585,13 +629,21 @@ bool tryRemoveRedundantLoad(IRGlobalValueWithCode* func, IRLoad* load)
{
bool changed = false;
- // If the load is preceeded by a store without any side-effect insts
- // in-between, remove the load.
+ // Get the memory scope we are operating on.
+ MemoryScope memoryScopeOfLoad = getMemoryScopeOfLoadStore(load);
+
+ // We can replace a load with a `Store->getVal()` if that store is a super-set
+ // memory scope to our load.
+ // Ex 1: Store into Workgroup, load from Invocation. Load will be equal to the Store.
+ //
+ // Ex 2: Store into Invocation, load from Workgroup. Load may/may-not be equal to the Store
+ // since the cache managing the Workgroup scope may contain different data than the invocation.
for (auto prev = load->getPrevInst(); prev; prev = prev->getPrevInst())
{
if (auto store = as<IRStore>(prev))
{
- if (store->getPtr() == load->getPtr())
+ if (store->getPtr() == load->getPtr() &&
+ isMemoryScopeSubsetOf(memoryScopeOfLoad, getMemoryScopeOfLoadStore(store)))
{
auto value = store->getVal();
load->replaceUsesWith(value);
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index 7b7d5ec17..8371d6ef5 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -5212,18 +5212,20 @@ IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, IRInst* align)
return inst;
}
-IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, IRAlignedAttr* align)
+IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, ArrayView<IRInst*> attributes)
{
- if (align)
- {
- auto inst = createInst<IRLoad>(this, kIROp_Load, type, ptr, align);
- addInst(inst);
- return inst;
- }
- else
- {
- return emitLoad(type, ptr);
- }
+ ShortList<IRInst*> params;
+ params.add(ptr);
+ params.addRange(attributes);
+ auto inst = createInst<IRLoad>(
+ this,
+ kIROp_Load,
+ type,
+ params.getCount(),
+ params.getArrayView().getBuffer());
+
+ addInst(inst);
+ return inst;
}
IRInst* IRBuilder::emitLoad(IRInst* ptr)
@@ -5279,6 +5281,21 @@ IRInst* IRBuilder::emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align)
return inst;
}
+IRInst* IRBuilder::emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align, IRInst* memoryScope)
+{
+ auto inst = createInst<IRStore>(
+ this,
+ kIROp_Store,
+ nullptr,
+ dstPtr,
+ srcVal,
+ getAttr(kIROp_AlignedAttr, align),
+ getAttr(kIROp_MemoryScopeAttr, memoryScope));
+
+ addInst(inst);
+ return inst;
+}
+
IRInst* IRBuilder::emitAtomicStore(IRInst* dstPtr, IRInst* srcVal, IRInst* memoryOrder)
{
auto inst = createInst<IRAtomicStore>(
diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h
index 54bf23754..4f9941946 100644
--- a/source/slang/slang-ir.h
+++ b/source/slang/slang-ir.h
@@ -2431,7 +2431,7 @@ public:
// anything to do with serialization format
//
const static UInt k_minSupportedModuleVersion = 1;
- const static UInt k_maxSupportedModuleVersion = 1;
+ const static UInt k_maxSupportedModuleVersion = 2;
static_assert(k_minSupportedModuleVersion <= k_maxSupportedModuleVersion);
private: