summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--source/slang/core.meta.slang43
-rw-r--r--source/slang/hlsl.meta.slang99
-rw-r--r--source/slang/slang-emit-spirv-ops.h43
-rw-r--r--source/slang/slang-emit-spirv.cpp262
-rw-r--r--source/slang/slang-ir-defer-buffer-load.cpp14
-rw-r--r--source/slang/slang-ir-insts-stable-names.lua1
-rw-r--r--source/slang/slang-ir-insts.h21
-rw-r--r--source/slang/slang-ir-insts.lua1
-rw-r--r--source/slang/slang-ir-redundancy-removal.cpp64
-rw-r--r--source/slang/slang-ir.cpp39
-rw-r--r--source/slang/slang-ir.h2
-rw-r--r--tests/cooperative-matrix/coherent-load-store-pointer.slang34
-rw-r--r--tests/cooperative-matrix/load-store-pointer.slang35
-rw-r--r--tests/cooperative-vector/coherent-load-store-pointer.slang38
-rw-r--r--tests/ir/dump-module-info.slang2
-rw-r--r--tests/language-feature/pointer/coherent-load-store-groupshared.slang26
-rw-r--r--tests/language-feature/pointer/coherent-load-store-image.slang29
-rw-r--r--tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang24
-rw-r--r--tests/language-feature/pointer/redundant-coherent-load.slang56
-rw-r--r--tests/language-feature/pointer/redundant-coherent-store.slang40
20 files changed, 687 insertions, 186 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 5ffab1f9c..9b55dc35a 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -1368,19 +1368,20 @@ struct Ptr<
__intrinsic_op($(kIROp_GetOffsetPtr))
[nonmutating]
+ [__NoSideEffect]
ref;
}
};
//@hidden:
__intrinsic_op($(kIROp_AlignedAttr))
-void __align_attr(int alignment);
+internal int __align_attr(int alignment);
__intrinsic_op($(kIROp_Load))
-T __load_aligned<T, U>(T* ptr, U alignmentAttr);
+internal T __load_aligned<T>(T* ptr, int alignmentAttr);
__intrinsic_op($(kIROp_Store))
-void __store_aligned<T, U>(T* ptr, T value, U alignmentAttr);
+internal void __store_aligned<T>(T* ptr, T value, int alignmentAttr);
//@public:
@@ -1413,6 +1414,42 @@ void storeAligned<int alignment, T>(T* ptr, T value)
__store_aligned(ptr, value, __align_attr(alignment));
}
+//@hidden:
+__intrinsic_op($(kIROp_MemoryScopeAttr))
+internal int __memoryscope_attr(MemoryScope scope);
+
+__intrinsic_op($(kIROp_Load))
+internal T __load_coherent<T, Access access, AddressSpace addrSpace>(Ptr<T, access, addrSpace> ptr, int alignmentAttr, int memoryScopeAttr);
+
+__intrinsic_op($(kIROp_Store))
+internal void __store_coherent<T, AddressSpace addrSpace>(Ptr<T, Access::ReadWrite, addrSpace> ptr, T value, int alignmentAttr, int memoryScopeAttr);
+
+/// Store a value coherently to a memoryscope.
+/// Tighter memory scopes may be faster to operate on.
+/// @param ptr The pointer to store value to.
+/// @param value The value to store.
+///
+[require(SPV_KHR_vulkan_memory_model)]
+[ForceInline]
+__generic<int alignment, MemoryScope scope, T, AddressSpace addrSpace>
+void storeCoherent(Ptr<T, Access::ReadWrite, addrSpace> ptr, T value)
+{
+ __store_coherent<T, addrSpace>(ptr, value, __align_attr(alignment), __memoryscope_attr(scope));
+}
+
+/// Load a value coherently to a memoryscope.
+/// Tighter memory scopes may be faster to operate on.
+/// @param ptr The pointer to load from.
+///
+[require(SPV_KHR_vulkan_memory_model)]
+[ForceInline]
+[__NoSideEffect]
+__generic<int alignment, MemoryScope scope, T, Access access, AddressSpace addrSpace>
+T loadCoherent(Ptr<T, access, addrSpace> ptr)
+{
+ return __load_coherent<T, access, addrSpace>(ptr, __align_attr(alignment), __memoryscope_attr(scope));
+}
+
${{{
StringBuilder ptrTypeParameterListBuilder;
ptrTypeParameterListBuilder << "T, Access access, AddressSpace addrSpace";
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 73bdee96e..824a06000 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -23239,6 +23239,18 @@ extension<T, L : IBufferDataLayout> RasterizerOrderedStructuredBuffer<T, L> : IR
int getCount() { uint count; uint stride; this.GetDimensions(count, stride); return count; }
}
+[require(vk_mem_model)]
+internal void enableVMMDeviceScopeCapabilityIfNeeded(constexpr MemoryScope memoryScope)
+{
+ if (memoryScope == MemoryScope::Device)
+ {
+ spirv_asm
+ {
+ OpCapability VulkanMemoryModelDeviceScopeKHR;
+ };
+ }
+}
+
namespace linalg
{
@@ -23813,6 +23825,22 @@ struct CoopMat
};
}
+ // TODO: make this function an intrinsic and support all types via the single intrinsic
+ [require(cooperative_matrix, vk_mem_model)]
+ void StoreCoherent<
+ let matrixLayout : CoopMatMatrixLayout
+ >(T* buffer, uint element, uint stride, constexpr MemoryScope memoryScope)
+ {
+ enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+ let alignment = 16U;
+ const int32_t scope = (int32_t)memoryScope;
+ return spirv_asm
+ {
+ %pointer:$$T* = OpPtrAccessChain $buffer $element;
+ OpCooperativeMatrixStoreKHR %pointer $this $matrixLayout $stride Aligned|MakePointerAvailable|NonPrivatePointer !alignment $scope;
+ };
+ }
+
[ForceInline]
[require(cooperative_matrix)]
void Store<
@@ -23924,6 +23952,24 @@ ${{{{
};
}
+ // TODO: make this function an intrinsic and support all types via the single intrinsic
+ [ForceInline]
+ [__NoSideEffect]
+ [require(cooperative_matrix, vk_mem_model)]
+ static This LoadCoherent<
+ let matrixLayout : CoopMatMatrixLayout
+ >(T* buffer, uint element, uint stride, constexpr MemoryScope memoryScope)
+ {
+ enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+ let alignment = 16U;
+ const int32_t scope = (int32_t)memoryScope;
+ return spirv_asm
+ {
+ %pointer:$$T* = OpPtrAccessChain $buffer $element;
+ result:$$CoopMat<T, S, M, N, R> = OpCooperativeMatrixLoadKHR %pointer $matrixLayout $stride Aligned|MakePointerVisible|NonPrivatePointer !alignment $scope;
+ };
+ }
+
[ForceInline]
[require(cooperative_matrix)]
static This Load<
@@ -24480,6 +24526,24 @@ CoopMat<T, S, M, N, R> coopMatLoad<
}
[ForceInline]
+[require(cooperative_matrix, vk_mem_model)]
+CoopMat<T, S, M, N, R> coopMatLoadCoherent<
+ T : __BuiltinArithmeticType,
+ let S : MemoryScope,
+ let M : int,
+ let N : int,
+ let R : CoopMatMatrixUse,
+ let matrixLayout : CoopMatMatrixLayout
+>(
+ T* buffer,
+ uint element,
+ uint stride,
+ constexpr MemoryScope memoryScope)
+{
+ return CoopMat<T, S, M, N, R>.LoadCoherent<matrixLayout>(buffer, element, stride, memoryScope);
+}
+
+[ForceInline]
[require(cooperative_matrix)]
CoopMat<T, S, M, N, R> coopMatLoad<
T : __BuiltinArithmeticType,
@@ -24845,6 +24909,20 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti
};
}
+ // TODO: make this function an intrinsic and support all types via the single intrinsic
+ [require(cooperative_vector, vk_mem_model)]
+ void storeCoherent(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device)
+ {
+ enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+ let pointer = Ptr<T[]>(buffer);
+ let alignment = 16U;
+ const int32_t scope = (int32_t)memoryScope;
+ spirv_asm
+ {
+ OpCooperativeVectorStoreNV $pointer $byteOffset16ByteAligned $this Aligned|MakePointerAvailable|NonPrivatePointer !alignment $scope;
+ };
+ }
+
[ForceInline]
[require(cooperative_vector)]
[require(hlsl_coopvec_poc)]
@@ -25017,6 +25095,20 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti
};
}
+ // TODO: make this function an intrinsic and support all types via the single intrinsic
+ [require(cooperative_vector, vk_mem_model)]
+ static CoopVec<T, N> loadCoherent(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device)
+ {
+ enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+ let pointer = Ptr<T[]>(buffer);
+ let alignment = 16U;
+ const int32_t scope = (int32_t)memoryScope;
+ return spirv_asm
+ {
+ result:$$CoopVec<T, N> = OpCooperativeVectorLoadNV $pointer $byteOffset16ByteAligned Aligned|MakePointerVisible|NonPrivatePointer !alignment $scope;
+ };
+ }
+
// Groupshared
[ForceInline]
[__NoSideEffect]
@@ -26457,6 +26549,13 @@ CoopVec<T, N> coopVecLoad<let N : int, T : __BuiltinArithmeticType>(T* buffer, i
return CoopVec<T, N>.load(buffer, byteOffset16ByteAligned);
}
+[ForceInline]
+[require(spirv, cooperative_vector, vk_mem_model)]
+CoopVec<T, N> coopVecLoadCoherent<let N : int, T : __BuiltinArithmeticType>(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device)
+{
+ return CoopVec<T, N>.loadCoherent(buffer, byteOffset16ByteAligned, memoryScope);
+}
+
// Groupshared
[ForceInline]
[require(cooperative_vector)]
diff --git a/source/slang/slang-emit-spirv-ops.h b/source/slang/slang-emit-spirv-ops.h
index a5e4d730a..da9058b62 100644
--- a/source/slang/slang-emit-spirv-ops.h
+++ b/source/slang/slang-emit-spirv-ops.h
@@ -600,28 +600,6 @@ SpvInst* emitOpLoad(
return emitInst(parent, inst, SpvOpLoad, idResultType, kResultID, pointer, memoryAccess);
}
-// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpLoad
-template<typename T1, typename T2>
-SpvInst* emitOpLoadAligned(
- SpvInstParent* parent,
- IRInst* inst,
- const T1& idResultType,
- const T2& pointer,
- const SpvLiteralInteger& literalInteger)
-{
- static_assert(isSingular<T1>);
- static_assert(isSingular<T2>);
- return emitInst(
- parent,
- inst,
- SpvOpLoad,
- idResultType,
- kResultID,
- pointer,
- SpvMemoryAccessAlignedMask,
- literalInteger);
-}
-
// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpStore
template<typename T1, typename T2>
SpvInst* emitOpStore(
@@ -636,27 +614,6 @@ SpvInst* emitOpStore(
return emitInst(parent, inst, SpvOpStore, pointer, object, memoryAccess);
}
-// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpStore
-template<typename T1, typename T2>
-SpvInst* emitOpStoreAligned(
- SpvInstParent* parent,
- IRInst* inst,
- const T1& pointer,
- const T2& object,
- const SpvLiteralInteger& literalInteger)
-{
- static_assert(isSingular<T1>);
- static_assert(isSingular<T2>);
- return emitInst(
- parent,
- inst,
- SpvOpStore,
- pointer,
- object,
- SpvMemoryAccessAlignedMask,
- literalInteger);
-}
-
// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpAccessChain
template<typename T1, typename T2, typename Ts>
SpvInst* emitOpAccessChain(
diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp
index 3a8a913ec..8bcd1429f 100644
--- a/source/slang/slang-emit-spirv.cpp
+++ b/source/slang/slang-emit-spirv.cpp
@@ -4569,33 +4569,37 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
break;
case kIROp_AtomicLoad:
{
- IRBuilder builder{inst};
- if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType()))
+ IRAtomicLoad* atomicLoad = as<IRAtomicLoad>(inst);
+ auto ptr = atomicLoad->getPtr();
+ IRBuilder builder{atomicLoad};
+ if (isAtomicableAddressSpace(ptr->getDataType()))
{
if (m_memoryModel == SpvMemoryModelVulkan)
requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
const auto memoryScope =
emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
- const auto memorySemantics =
- emitMemorySemanticMask(inst->getOperand(1), inst->getOperand(0));
+ const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1), ptr);
result = emitOpAtomicLoad(
parent,
inst,
inst->getFullType(),
- inst->getOperand(0),
+ ptr,
memoryScope,
memorySemantics);
ensureAtomicCapability(inst, SpvOpAtomicLoad);
}
else
{
- result = emitLoadMaybeCoherent(parent, inst);
+ result = emitLoad(parent, inst, ptr);
}
}
break;
case kIROp_AtomicStore:
{
+ IRAtomicStore* atomicStore = as<IRAtomicStore>(inst);
+ auto ptr = atomicStore->getPtr();
+ auto val = atomicStore->getVal();
IRBuilder builder{inst};
if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType()))
{
@@ -4604,48 +4608,44 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
const auto memoryScope =
emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
- const auto memorySemantics =
- emitMemorySemanticMask(inst->getOperand(2), inst->getOperand(0));
- result = emitOpAtomicStore(
- parent,
- inst,
- inst->getOperand(0),
- memoryScope,
- memorySemantics,
- inst->getOperand(1));
+ const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr);
+ result =
+ emitOpAtomicStore(parent, inst, ptr, memoryScope, memorySemantics, val);
ensureAtomicCapability(inst, SpvOpAtomicStore);
}
else
{
- result = emitStoreMaybeCoherent(parent, inst);
+ result = emitStore(parent, inst, ptr, val);
}
}
break;
case kIROp_AtomicExchange:
{
+ IRAtomicExchange* atomicExchange = as<IRAtomicExchange>(inst);
+ auto ptr = atomicExchange->getPtr();
+ auto val = atomicExchange->getOperand(1);
IRBuilder builder{inst};
- if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType()))
+ if (isAtomicableAddressSpace(ptr->getDataType()))
{
if (m_memoryModel == SpvMemoryModelVulkan)
requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
const auto memoryScope =
emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
- const auto memorySemantics =
- emitMemorySemanticMask(inst->getOperand(2), inst->getOperand(0));
+ const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr);
result = emitOpAtomicExchange(
parent,
inst,
inst->getFullType(),
- inst->getOperand(0),
+ ptr,
memoryScope,
memorySemantics,
- inst->getOperand(1));
+ val);
ensureAtomicCapability(inst, SpvOpAtomicExchange);
}
else
{
- result = emitStoreMaybeCoherent(parent, inst);
+ result = emitStore(parent, inst, ptr, val);
}
}
break;
@@ -7082,6 +7082,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
SpvInst* emitGetOffsetPtr(SpvInstParent* parent, IRInst* inst)
{
+ requireVariableBufferCapabilityIfNeeded(inst->getDataType());
+
return emitOpPtrAccessChain(
parent,
inst,
@@ -7174,54 +7176,100 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
}
}
- SpvInst* emitLoad(SpvInstParent* parent, IRLoad* inst)
+ enum class MemoryAccessType
{
- requireVariableBufferCapabilityIfNeeded(inst->getDataType());
+ Load,
+ Store
+ };
+
+ template<MemoryAccessType memoryAccessType>
+ void getMemoryAccessOperandsOfLoadStore(
+ IRInst* inst,
+ IRInst* ptr,
+ int& memoryAccessMaskOut,
+ int& alignmentOut,
+ MemoryScope& memoryScopeOut)
+ {
+ IRAlignedAttr* alignedAttr = nullptr;
+ IRMemoryScopeAttr* memoryScopeAttr = nullptr;
- auto ptrType = as<IRPtrTypeBase>(inst->getPtr()->getDataType());
- if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) ==
- SpvStorageClassPhysicalStorageBuffer)
+ for (auto attr : inst->getAllAttrs())
{
- IRSizeAndAlignment sizeAndAlignment;
- if (auto alignedAttr = inst->findAttr<IRAlignedAttr>())
+ if (auto foundAlignedAttr = as<IRAlignedAttr>(attr))
+ alignedAttr = foundAlignedAttr;
+ else if (auto foundMemoryScopeAttr = as<IRMemoryScopeAttr>(attr))
+ memoryScopeAttr = foundMemoryScopeAttr;
+ }
+
+ // Determine coherence
+ {
+ bool isCoherent = false;
+ if (memoryScopeAttr)
{
- sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment());
+ memoryScopeOut = (MemoryScope)getIntVal(memoryScopeAttr->getMemoryScope());
+ if (m_memoryModel != SpvMemoryModelVulkan)
+ SLANG_ASSERT_FAILURE(
+ "Explicit coherent operations require vulkan-memory-model, "
+ "specify the capability 'vk_mem_model'");
+ isCoherent = true;
}
else
{
- getNaturalSizeAndAlignment(
- m_targetProgram->getOptionSet(),
- ptrType->getValueType(),
- &sizeAndAlignment);
+ if (NeedToUseCoherentLoadOrStore(ptr))
+ {
+ memoryScopeOut = MemoryScope::Device;
+ isCoherent = true;
+ }
+ }
+ if (isCoherent)
+ {
+
+ memoryAccessMaskOut |= SpvMemoryAccessNonPrivatePointerMask;
+ if constexpr (memoryAccessType == MemoryAccessType::Load)
+ memoryAccessMaskOut |= SpvMemoryAccessMakePointerVisibleMask;
+ else
+ memoryAccessMaskOut |= SpvMemoryAccessMakePointerAvailableMask;
+ if (memoryScopeOut == MemoryScope::Device)
+ requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
}
- return emitOpLoadAligned(
- parent,
- inst,
- inst->getDataType(),
- inst->getPtr(),
- SpvLiteralInteger::from32(sizeAndAlignment.alignment));
}
- else
+
+ // Determine alignment
{
- return emitLoadMaybeCoherent(parent, inst);
+ auto ptrType = as<IRPtrTypeBase>(ptr->getDataType());
+ if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) ==
+ SpvStorageClassPhysicalStorageBuffer)
+ {
+ IRSizeAndAlignment sizeAndAlignment;
+ if (alignedAttr)
+ sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment());
+ else
+ getNaturalSizeAndAlignment(
+ m_targetProgram->getOptionSet(),
+ ptrType->getValueType(),
+ &sizeAndAlignment);
+
+ alignmentOut = sizeAndAlignment.alignment;
+ if (alignmentOut != -1)
+ memoryAccessMaskOut |= SpvMemoryAccessAlignedMask;
+ }
}
}
- SpvInst* emitLoadMaybeCoherent(SpvInstParent* parent, IRInst* inst)
+ SpvInst* emitLoad(SpvInstParent* parent, IRInst* inst, IRInst* ptr)
{
- IRBuilder builder{inst};
- builder.setInsertBefore(inst);
-
- SpvInst* deviceScope = nullptr;
- IRInst* pointer = inst->getOperand(0);
-
- bool coherentPointer = NeedToUseCoherentLoadOrStore(pointer);
- if (coherentPointer)
- {
- requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
- deviceScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
- }
+ requireVariableBufferCapabilityIfNeeded(inst->getDataType());
+ IRBuilder builder(inst);
+ int memoryAccessMask = 0;
+ int alignment = -1;
+ MemoryScope memoryScope{};
+ getMemoryAccessOperandsOfLoadStore<MemoryAccessType::Load>(
+ inst,
+ ptr,
+ memoryAccessMask,
+ alignment,
+ memoryScope);
return emitInstCustomOperandFunc(
parent,
inst,
@@ -7230,85 +7278,61 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
{
emitOperand(inst->getFullType());
emitOperand(kResultID);
- emitOperand(pointer);
-
- if (coherentPointer)
+ emitOperand(ptr);
+ if (memoryAccessMask)
{
- emitOperand(
- SpvMemoryAccessMakePointerVisibleMask |
- SpvMemoryAccessNonPrivatePointerMask);
-
- emitOperand(deviceScope);
+ emitOperand(SpvLiteralInteger::from32(memoryAccessMask));
+ if (memoryAccessMask & SpvMemoryAccessAlignedMask)
+ emitOperand(SpvLiteralInteger::from32((uint32_t)alignment));
+ if (memoryAccessMask & SpvMemoryAccessMakePointerVisibleMask)
+ emitOperand(
+ emitIntConstant((IRIntegerValue)memoryScope, builder.getIntType()));
}
});
}
- SpvInst* emitStore(SpvInstParent* parent, IRStore* inst)
+ SpvInst* emitLoad(SpvInstParent* parent, IRLoad* inst)
{
- auto ptrType = as<IRPtrTypeBase>(inst->getPtr()->getDataType());
- if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) ==
- SpvStorageClassPhysicalStorageBuffer)
- {
- IRSizeAndAlignment sizeAndAlignment;
- if (auto alignedAttr = inst->findAttr<IRAlignedAttr>())
- {
- sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment());
- }
- else
- {
- getNaturalSizeAndAlignment(
- m_targetProgram->getOptionSet(),
- ptrType->getValueType(),
- &sizeAndAlignment);
- }
- return emitOpStoreAligned(
- parent,
- inst,
- inst->getPtr(),
- inst->getVal(),
- SpvLiteralInteger::from32(sizeAndAlignment.alignment));
- }
- else
- {
- return emitStoreMaybeCoherent(parent, inst);
- }
+ return emitLoad(parent, inst, inst->getPtr());
}
- SpvInst* emitStoreMaybeCoherent(SpvInstParent* parent, IRInst* inst)
+ SpvInst* emitStore(SpvInstParent* parent, IRInst* inst, IRInst* ptr, IRInst* val)
{
- IRBuilder builder{inst};
- builder.setInsertBefore(inst);
-
- SpvInst* deviceScope = nullptr;
- IRInst* pointer = inst->getOperand(0);
- IRInst* object = inst->getOperand(1);
-
- bool coherentPointer = NeedToUseCoherentLoadOrStore(pointer);
- if (coherentPointer)
- {
- requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
- deviceScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
- }
+ requireVariableBufferCapabilityIfNeeded(inst->getDataType());
+ IRBuilder builder(inst);
+ int memoryAccessMask = 0;
+ int alignment = -1;
+ MemoryScope memoryScope{};
+ getMemoryAccessOperandsOfLoadStore<MemoryAccessType::Store>(
+ inst,
+ ptr,
+ memoryAccessMask,
+ alignment,
+ memoryScope);
return emitInstCustomOperandFunc(
parent,
inst,
SpvOpStore,
[&]()
{
- emitOperand(pointer);
- emitOperand(object);
-
- if (coherentPointer)
+ emitOperand(ptr);
+ emitOperand(val);
+ if (memoryAccessMask)
{
- emitOperand(
- SpvMemoryAccessMakePointerAvailableMask |
- SpvMemoryAccessNonPrivatePointerMask);
-
- emitOperand(deviceScope);
+ emitOperand(SpvLiteralInteger::from32(memoryAccessMask));
+ if (memoryAccessMask & SpvMemoryAccessAlignedMask)
+ emitOperand(SpvLiteralInteger::from32((uint32_t)alignment));
+ if (memoryAccessMask & SpvMemoryAccessMakePointerAvailableMask)
+ emitOperand(
+ emitIntConstant((IRIntegerValue)memoryScope, builder.getIntType()));
}
});
}
+ SpvInst* emitStore(SpvInstParent* parent, IRStore* inst)
+ {
+ return emitStore(parent, inst, inst->getPtr(), inst->getVal());
+ }
SpvInst* emitSwizzledStore(SpvInstParent* parent, IRSwizzledStore* inst)
{
@@ -8613,6 +8637,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
SpvInst* emitDebugValue(SpvInstParent* parent, IRDebugValue* debugValue)
{
+ auto debugVar = debugValue->getDebugVar();
+ auto debugValueVal = debugValue->getValue();
// We are asked to update the value for a debug variable.
// A debug variable is already emited as a OpDebugVariable +
// OpVariable + OpDebugDeclare. We only need to store the new value
@@ -8628,7 +8654,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
// variable. If it doesn't, we can't emit a store.
//
List<IRInst*> irAccessChain;
- auto rootVar = getRootAddr(debugValue->getDebugVar(), irAccessChain);
+ auto rootVar = getRootAddr(debugVar, irAccessChain);
SpvInst* spvDebugVar = nullptr;
if (!m_mapIRInstToSpvInst.tryGetValue(rootVar, spvDebugVar))
return nullptr;
@@ -8644,7 +8670,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
// be fully static. We will skip emitting the debug inst if the access chain
// isn't static.
//
- auto type = unwrapAttributedType(debugValue->getDebugVar()->getDataType());
+ auto type = unwrapAttributedType(debugVar->getDataType());
List<SpvInst*> accessChain;
bool isConstAccessChain =
translateIRAccessChain(builder, type, irAccessChain, accessChain);
@@ -8657,7 +8683,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
m_voidType,
getNonSemanticDebugInfoExtInst(),
rootVar,
- debugValue->getValue(),
+ debugValueVal,
getDwarfExpr(),
accessChain);
}
@@ -8669,7 +8695,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
// The ordinary case is the debug variable has a backing ordinary variable.
// We can simply emit a store into the backing variable for the DebugValue operation.
//
- return emitStoreMaybeCoherent(parent, debugValue);
+ return emitStore(parent, debugValue, debugVar, debugValueVal);
}
IRInst* getName(IRInst* inst)
diff --git a/source/slang/slang-ir-defer-buffer-load.cpp b/source/slang/slang-ir-defer-buffer-load.cpp
index 4736b4e65..3c8a9f4c7 100644
--- a/source/slang/slang-ir-defer-buffer-load.cpp
+++ b/source/slang/slang-ir-defer-buffer-load.cpp
@@ -151,9 +151,19 @@ struct DeferBufferLoadContext
void deferBufferLoadInst(IRBuilder& builder, List<IRInst*>& workList, IRInst* loadInst)
{
+ bool failDueToAttributeFound = false;
+ for (auto attr : loadInst->getAllAttrs())
+ {
+ if (as<IRAlignedAttr>(attr) || as<IRMemoryScopeAttr>(attr))
+ {
+ failDueToAttributeFound = true;
+ break;
+ }
+ }
+
// Don't defer the load anymore if the type is simple.
- if (!isTypePreferrableToDeferLoad(codeGenContext, loadInst->getDataType()) ||
- loadInst->findAttr<IRAlignedAttr>())
+ if (failDueToAttributeFound ||
+ !isTypePreferrableToDeferLoad(codeGenContext, loadInst->getDataType()))
{
return;
}
diff --git a/source/slang/slang-ir-insts-stable-names.lua b/source/slang/slang-ir-insts-stable-names.lua
index fefc7a956..a34dc346a 100644
--- a/source/slang/slang-ir-insts-stable-names.lua
+++ b/source/slang/slang-ir-insts-stable-names.lua
@@ -679,4 +679,5 @@ return {
["CastResourceToDescriptorHandle"] = 675,
["SymbolAlias"] = 676,
["Decoration.InParamProxyVar"] = 677,
+ ["Attr.MemoryScope"] = 678,
}
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 5c27d5e25..2255afc67 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -2191,6 +2191,13 @@ struct IRAlignedAttr : IRAttr
};
FIDDLE()
+struct IRMemoryScopeAttr : IRAttr
+{
+ FIDDLE(leafInst())
+ IRInst* getMemoryScope() { return getOperand(0); }
+};
+
+FIDDLE()
struct IRLoad : IRInst
{
FIDDLE(leafInst())
@@ -2242,6 +2249,17 @@ struct IRAtomicStore : IRAtomicOperation
};
FIDDLE()
+struct IRAtomicExchange : IRAtomicOperation
+{
+ FIDDLE(leafInst())
+ IRUse ptr;
+ IRUse val;
+
+ IRInst* getPtr() { return ptr.get(); }
+ IRInst* getVal() { return val.get(); }
+};
+
+FIDDLE()
struct IRRWStructuredBufferStore : IRInst
{
FIDDLE(leafInst())
@@ -4365,7 +4383,7 @@ public:
IRInst* emitLoad(IRType* type, IRInst* ptr);
IRInst* emitLoad(IRType* type, IRInst* ptr, IRInst* align);
- IRInst* emitLoad(IRType* type, IRInst* ptr, IRAlignedAttr* align);
+ IRInst* emitLoad(IRType* type, IRInst* ptr, ArrayView<IRInst*> attributes);
IRInst* emitLoad(IRInst* ptr);
IRInst* emitLoadReverseGradient(IRType* type, IRInst* diffValue);
@@ -4375,6 +4393,7 @@ public:
IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal);
IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align);
+ IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align, IRInst* memoryScope);
IRInst* emitAtomicStore(IRInst* dstPtr, IRInst* srcVal, IRInst* memoryOrder);
diff --git a/source/slang/slang-ir-insts.lua b/source/slang/slang-ir-insts.lua
index a4bb4a6f2..e21fc86ae 100644
--- a/source/slang/slang-ir-insts.lua
+++ b/source/slang/slang-ir-insts.lua
@@ -2018,6 +2018,7 @@ local insts = {
},
},
{ Aligned = { struct_name = "AlignedAttr", min_operands = 1 } },
+ { MemoryScope = { struct_name = "MemoryScopeAttr", min_operands = 1 } },
{
SemanticAttr = {
{ userSemantic = { struct_name = "UserSemanticAttr", min_operands = 2 } },
diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp
index 3b1a731f9..0308b50c2 100644
--- a/source/slang/slang-ir-redundancy-removal.cpp
+++ b/source/slang/slang-ir-redundancy-removal.cpp
@@ -416,6 +416,47 @@ static IRInst* _getRootVar(IRInst* inst)
return inst;
}
+// 0 is the most broad scope
+static int getMemoryScopeOrder(MemoryScope scope)
+{
+ switch (scope)
+ {
+ case MemoryScope::CrossDevice:
+ return 7;
+ case MemoryScope::Device:
+ return 6;
+ case MemoryScope::QueueFamily:
+ // https://docs.vulkan.org/spec/latest/chapters/shaders.html#shaders-scope-queue-family
+ return 5;
+ case MemoryScope::ShaderCall:
+ // https://docs.vulkan.org/spec/latest/chapters/shaders.html#shaders-scope-shadercall
+ return 4;
+ case MemoryScope::Workgroup:
+ return 3;
+ case MemoryScope::Subgroup:
+ return 2;
+ case MemoryScope::Invocation:
+ default:
+ return 1;
+ }
+}
+
+// Returns if MemoryScope x is a sub-set of y
+static bool isMemoryScopeSubsetOf(MemoryScope x, MemoryScope y)
+{
+ return getMemoryScopeOrder(x) <= getMemoryScopeOrder(y);
+}
+
+// Inst's are relative to a memory scope, get that memory scope.
+static MemoryScope getMemoryScopeOfLoadStore(IRInst* inst)
+{
+ SLANG_ASSERT(as<IRLoad>(inst) || as<IRStore>(inst));
+ auto memoryScope = inst->findAttr<IRMemoryScopeAttr>();
+ if (!memoryScope)
+ return MemoryScope::Invocation;
+ return (MemoryScope)getIntVal(memoryScope->getMemoryScope());
+}
+
bool tryRemoveRedundantStore(IRGlobalValueWithCode* func, IRStore* store)
{
// We perform a quick and conservative check:
@@ -473,15 +514,18 @@ bool tryRemoveRedundantStore(IRGlobalValueWithCode* func, IRStore* store)
}
}
- // A store can be removed if there are subsequent stores to the same variable,
+ // This store can be removed if there are subsequent stores to the same variable,
// and there are no insts in between the stores that can read the variable.
-
+ // Additionally, MemoryScope of the `store` must be a sub-set of `nextStore`,
+ // otherwise we can not be certain that `nextStore` completely overwrites `store`.
+ MemoryScope memoryScopeOfStore = getMemoryScopeOfLoadStore(store);
HashSet<IRBlock*> visitedBlocks;
for (auto next = store->getNextInst(); next;)
{
if (auto nextStore = as<IRStore>(next))
{
- if (nextStore->getPtr() == store->getPtr())
+ if (nextStore->getPtr() == store->getPtr() &&
+ isMemoryScopeSubsetOf(memoryScopeOfStore, getMemoryScopeOfLoadStore(nextStore)))
{
hasOverridingStore = true;
break;
@@ -585,13 +629,21 @@ bool tryRemoveRedundantLoad(IRGlobalValueWithCode* func, IRLoad* load)
{
bool changed = false;
- // If the load is preceeded by a store without any side-effect insts
- // in-between, remove the load.
+ // Get the memory scope we are operating on.
+ MemoryScope memoryScopeOfLoad = getMemoryScopeOfLoadStore(load);
+
+ // We can replace a load with a `Store->getVal()` if that store is a super-set
+ // memory scope to our load.
+ // Ex 1: Store into Workgroup, load from Invocation. Load will be equal to the Store.
+ //
+ // Ex 2: Store into Invocation, load from Workgroup. Load may/may-not be equal to the Store
+ // since the cache managing the Workgroup scope may contain different data than the invocation.
for (auto prev = load->getPrevInst(); prev; prev = prev->getPrevInst())
{
if (auto store = as<IRStore>(prev))
{
- if (store->getPtr() == load->getPtr())
+ if (store->getPtr() == load->getPtr() &&
+ isMemoryScopeSubsetOf(memoryScopeOfLoad, getMemoryScopeOfLoadStore(store)))
{
auto value = store->getVal();
load->replaceUsesWith(value);
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index 7b7d5ec17..8371d6ef5 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -5212,18 +5212,20 @@ IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, IRInst* align)
return inst;
}
-IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, IRAlignedAttr* align)
+IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, ArrayView<IRInst*> attributes)
{
- if (align)
- {
- auto inst = createInst<IRLoad>(this, kIROp_Load, type, ptr, align);
- addInst(inst);
- return inst;
- }
- else
- {
- return emitLoad(type, ptr);
- }
+ ShortList<IRInst*> params;
+ params.add(ptr);
+ params.addRange(attributes);
+ auto inst = createInst<IRLoad>(
+ this,
+ kIROp_Load,
+ type,
+ params.getCount(),
+ params.getArrayView().getBuffer());
+
+ addInst(inst);
+ return inst;
}
IRInst* IRBuilder::emitLoad(IRInst* ptr)
@@ -5279,6 +5281,21 @@ IRInst* IRBuilder::emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align)
return inst;
}
+IRInst* IRBuilder::emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align, IRInst* memoryScope)
+{
+ auto inst = createInst<IRStore>(
+ this,
+ kIROp_Store,
+ nullptr,
+ dstPtr,
+ srcVal,
+ getAttr(kIROp_AlignedAttr, align),
+ getAttr(kIROp_MemoryScopeAttr, memoryScope));
+
+ addInst(inst);
+ return inst;
+}
+
IRInst* IRBuilder::emitAtomicStore(IRInst* dstPtr, IRInst* srcVal, IRInst* memoryOrder)
{
auto inst = createInst<IRAtomicStore>(
diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h
index 54bf23754..4f9941946 100644
--- a/source/slang/slang-ir.h
+++ b/source/slang/slang-ir.h
@@ -2431,7 +2431,7 @@ public:
// anything to do with serialization format
//
const static UInt k_minSupportedModuleVersion = 1;
- const static UInt k_maxSupportedModuleVersion = 1;
+ const static UInt k_maxSupportedModuleVersion = 2;
static_assert(k_minSupportedModuleVersion <= k_maxSupportedModuleVersion);
private:
diff --git a/tests/cooperative-matrix/coherent-load-store-pointer.slang b/tests/cooperative-matrix/coherent-load-store-pointer.slang
new file mode 100644
index 000000000..6057ab41f
--- /dev/null
+++ b/tests/cooperative-matrix/coherent-load-store-pointer.slang
@@ -0,0 +1,34 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly
+
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakePointerVisible
+// SPIRV: MakePointerAvailable
+
+// CHECK: 1
+// CHECK-NEXT: 2
+// CHECK-NEXT: 3
+// CHECK-NEXT: 4
+// CHECK-NEXT: 5
+// CHECK-NEXT: 6
+// CHECK-NEXT: 7
+// CHECK-NEXT: 8
+
+//TEST_INPUT:ubuffer(data=[1 2 3 4 5 6 7 8], stride=4, count=256):name=inputBuffer
+uniform int32_t* inputBuffer;
+
+//TEST_INPUT:ubuffer(stride=4, count=256):out,name=outputBuffer
+uniform int32_t* outputBuffer;
+
+using namespace linalg;
+
+[numthreads(32, 1, 1)]
+void computeMain()
+{
+ int32_t* ptrIn = inputBuffer;
+ int32_t* ptrOut = outputBuffer;
+
+ let stride = 16;
+ let mat = coopMatLoadCoherent<int32_t, MemoryScope.Subgroup, 16, 16, CoopMatMatrixUse.MatrixAccumulator, CoopMatMatrixLayout.RowMajor>(ptrIn, 0, stride, MemoryScope::Device);
+ mat.StoreCoherent<CoopMatMatrixLayout.RowMajor>(ptrOut, 0, 16, MemoryScope::Device);
+} \ No newline at end of file
diff --git a/tests/cooperative-matrix/load-store-pointer.slang b/tests/cooperative-matrix/load-store-pointer.slang
new file mode 100644
index 000000000..2bbd8fef1
--- /dev/null
+++ b/tests/cooperative-matrix/load-store-pointer.slang
@@ -0,0 +1,35 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly
+
+// Ensure SPIRV does not do coherent operations here
+// SPIRV-NOT: MakePointerAvailable
+// SPIRV-NOT: MakePointerVisible
+
+
+// CHECK: 1
+// CHECK-NEXT: 2
+// CHECK-NEXT: 3
+// CHECK-NEXT: 4
+// CHECK-NEXT: 5
+// CHECK-NEXT: 6
+// CHECK-NEXT: 7
+// CHECK-NEXT: 8
+
+//TEST_INPUT:ubuffer(data=[1 2 3 4 5 6 7 8], stride=4, count=256):name=inputBuffer
+uniform int32_t* inputBuffer;
+
+//TEST_INPUT:ubuffer(stride=4, count=256):out,name=outputBuffer
+uniform int32_t* outputBuffer;
+
+using namespace linalg;
+
+[numthreads(32, 1, 1)]
+void computeMain()
+{
+ int32_t* ptrIn = inputBuffer;
+ int32_t* ptrOut = outputBuffer;
+
+ let stride = 16;
+ let mat = coopMatLoad<int32_t, MemoryScope.Subgroup, 16, 16, CoopMatMatrixUse.MatrixAccumulator, CoopMatMatrixLayout.RowMajor>(ptrIn, 0, stride);
+ mat.Store<CoopMatMatrixLayout.RowMajor>(ptrOut, 0, 16);
+} \ No newline at end of file
diff --git a/tests/cooperative-vector/coherent-load-store-pointer.slang b/tests/cooperative-vector/coherent-load-store-pointer.slang
new file mode 100644
index 000000000..40efeee1a
--- /dev/null
+++ b/tests/cooperative-vector/coherent-load-store-pointer.slang
@@ -0,0 +1,38 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv
+
+// coherent CoopVec operations crash the Nvidia driver.
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -render-feature cooperative-vector -emit-spirv-directly
+
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakePointerVisible
+// SPIRV: MakePointerAvailable
+
+//TEST_INPUT: set inputBuffer = ubuffer(data=[1 2 3 4 5 6 7 8 9 10 11 12], stride=4);
+uniform int32_t* inputBuffer;
+
+//TEST_INPUT: set outputBuffer = out ubuffer(data=[0 0 0 0 0 0 0 0], stride=4);
+uniform int32_t* outputBuffer;
+
+// CHECK: 9
+// CHECK-NEXT: A
+// CHECK-NEXT: B
+// CHECK-NEXT: C
+// CHECK-NEXT: 1
+// CHECK-NEXT: 2
+// CHECK-NEXT: 3
+// CHECK-NEXT: 4
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void computeMain()
+{
+ //// First half of input.
+ let a = coopVecLoadCoherent<4, int32_t>(inputBuffer, 0, MemoryScope::Device);
+ //// Second half of input.
+ let b = coopVecLoadCoherent<4, int32_t>(inputBuffer + 4, 4 * 4, MemoryScope::Device);
+ //// Store second half of input to first half of output buffer.
+ b.storeCoherent(outputBuffer, 0, MemoryScope::Device);
+ //// Store first half of input to second half of output buffer.
+ a.storeCoherent(outputBuffer, 4 * 4, MemoryScope::Device);
+}
+
diff --git a/tests/ir/dump-module-info.slang b/tests/ir/dump-module-info.slang
index c7753b440..67a43b274 100644
--- a/tests/ir/dump-module-info.slang
+++ b/tests/ir/dump-module-info.slang
@@ -6,7 +6,7 @@ module "foo";
// CHECK: Module Name: foo
// This will need bumping whenever we bump the ir module version
-// CHECK: Module Version: 1
+// CHECK: Module Version: 2
// Just check that this is in the output with some string
// CHECK: Compiler Version: {{.+}}
diff --git a/tests/language-feature/pointer/coherent-load-store-groupshared.slang b/tests/language-feature/pointer/coherent-load-store-groupshared.slang
new file mode 100644
index 000000000..2e537ef01
--- /dev/null
+++ b/tests/language-feature/pointer/coherent-load-store-groupshared.slang
@@ -0,0 +1,26 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model
+
+// Tests if we pass-through and handle groupshared address space pointers correctly.
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakePointerAvailable|NonPrivatePointer
+// SPIRV: MakePointerVisible|NonPrivatePointer
+
+// CHECK: 2
+// CHECK-NEXT: 1
+// CHECK-NEXT: 0
+
+//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+groupshared int[32] shared;
+
+#define THREAD_GROUP_SIZE 3
+[numthreads(THREAD_GROUP_SIZE, 1, 1)]
+void computeMain(uint3 group_thread_id: SV_GroupThreadID)
+{
+ Ptr<int, Access::ReadWrite, AddressSpace::GroupShared> ptr = __getAddress(shared[0]);
+ storeCoherent<4, MemoryScope::Workgroup>(ptr + group_thread_id.x, (int)group_thread_id.x);
+ AllMemoryBarrierWithGroupSync();
+ outputBuffer[group_thread_id.x] = loadCoherent<4, MemoryScope::Workgroup>(ptr + THREAD_GROUP_SIZE - group_thread_id.x - 1);
+} \ No newline at end of file
diff --git a/tests/language-feature/pointer/coherent-load-store-image.slang b/tests/language-feature/pointer/coherent-load-store-image.slang
new file mode 100644
index 000000000..359994a0e
--- /dev/null
+++ b/tests/language-feature/pointer/coherent-load-store-image.slang
@@ -0,0 +1,29 @@
+//DISABLE_TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -profile spirv_1_3 -capability vk_mem_model
+// These tests are expected to fail, pointers to texels are
+// currently a broken feature and do not work.
+// Additionally, we do not allow texel pointers with `__getAddress`.
+
+
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakeTexelAvailable
+// SPIRV: MakeTexelVisible
+
+// CHECK: 0
+// CHECK-NEXT: 5
+
+//TEST_INPUT: RWTexture1D(format=R32Uint, size=8, content = one, mipMaps = 1):name=texture
+RWTexture1D<uint> texture;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<uint> outputBuffer;
+
+[numthreads(32, 1, 1)]
+void computeMain()
+{
+ Ptr<uint> ptrIn = __getAddress(texture[1]);
+ Ptr<uint> secondPtrIn = ptrIn;
+
+ storeCoherent<4, MemoryScope::Device>(ptrIn, 5);
+ outputBuffer[0] = loadCoherent<4, MemoryScope::Device>(ptrIn);
+}
diff --git a/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang b/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang
new file mode 100644
index 000000000..b70664d82
--- /dev/null
+++ b/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang
@@ -0,0 +1,24 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -capability vk_mem_model
+
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakePointerVisible
+// SPIRV: MakePointerAvailable
+
+// CHECK: 2
+
+//TEST_INPUT:ubuffer(data=[1 2 3], stride=4):name=inputBuffer
+uniform int* inputBuffer;
+
+//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer
+uniform int* outputBuffer;
+
+[shader("compute")]
+[numthreads(32, 1, 1)]
+void computeMain()
+{
+ Ptr<int> ptrIn = inputBuffer;
+ Ptr<int> secondPtrIn = ptrIn;
+ Ptr<int> ptrOut = outputBuffer;
+ storeCoherent<4, MemoryScope::Device>(ptrOut, loadCoherent<4, MemoryScope::Device>(&secondPtrIn[1]));
+} \ No newline at end of file
diff --git a/tests/language-feature/pointer/redundant-coherent-load.slang b/tests/language-feature/pointer/redundant-coherent-load.slang
new file mode 100644
index 000000000..e0c7d5e56
--- /dev/null
+++ b/tests/language-feature/pointer/redundant-coherent-load.slang
@@ -0,0 +1,56 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model -output-using-type
+
+// Tests if we optimize redundant load's correctly
+
+//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+//TEST_INPUT:ubuffer(data=[0 0 0 11 10], stride=4),name=buffer
+uniform int* buffer;
+
+[numthreads(2, 1, 1)]
+void computeMain(uint3 group_thread_id: SV_GroupThreadID)
+{
+ Ptr<int, Access::ReadWrite, AddressSpace::Device> ptr = __getAddress(buffer[0]);
+
+ // Consider the load from this store-load pattern as redundant since
+ // the load is a sub-set memory-scope of the memory-scope of the store.
+ // Invocation == Invocation.
+ *ptr = 8;
+ outputBuffer[0] = loadCoherent<4, MemoryScope::Invocation>(ptr);
+ // CHECK: 8
+ // SPIRV: OpStore %ptr %int_8
+ // SPIRV-NOT: OpLoad
+ // SPIRV: %[[#OUTPUT_BUFFER1:]] = OpAccessChain {{.*}} %outputBuffer %{{.*}} %int_0
+ // SPIRV: OpStore %[[#OUTPUT_BUFFER1]] %int_8
+
+ // Consider the load from this store-load pattern as redundant since
+ // the load is a sub-set memory-scope of the memory-scope of the store.
+ // Device > Workgroup.
+ let offset1 = ptr + 1;
+ storeCoherent<4, MemoryScope::Device>(offset1, 9);
+ outputBuffer[1] = loadCoherent<4, MemoryScope::Workgroup>(offset1);
+ // CHECK-NEXT: 9
+ // SPIRV: %[[#PTR_OFFSET:]] = OpPtrAccessChain {{.*}} %ptr %int_1
+ // SPIRV: OpStore %[[#PTR_OFFSET]] %int_9
+ // SPIRV-NOT: OpLoad
+ // SPIRV: %[[#OUTPUT_BUFFER2:]] = OpAccessChain {{.*}} %outputBuffer %{{.*}} %int_1
+ // SPIRV: OpStore %[[#OUTPUT_BUFFER2]] %int_9
+
+ // Consider the following store-load pattern as not redundant since the data stored
+ // may not be the same data that will be loaded if Workgroup-scope contains
+ // different data than the Subgroup-scope.
+ // Subgroup < Workgroup.
+ let offset2 = ptr + 2;
+ storeCoherent<4, MemoryScope::Subgroup>(offset2, buffer[3]);
+ if(group_thread_id.x == 1)
+ {
+ storeCoherent<4, MemoryScope::Invocation>(offset2, buffer[4]);
+ let result = loadCoherent<4, MemoryScope::Workgroup>(offset2);
+ outputBuffer[2] = (result == 11 || result == 10) ? 12 : 0;
+ }
+ // CHECK-NEXT: 12
+ // SPIRV: OpStore {{.*}}MakePointerAvailable{{.*}} 4 %int_3
+ // SPIRV: OpStore {{.*}}MakePointerAvailable{{.*}} 4 %int_4
+ // SPIRV: OpLoad {{.*}}MakePointerVisible{{.*}} 4 %int_2
+} \ No newline at end of file
diff --git a/tests/language-feature/pointer/redundant-coherent-store.slang b/tests/language-feature/pointer/redundant-coherent-store.slang
new file mode 100644
index 000000000..81cba3024
--- /dev/null
+++ b/tests/language-feature/pointer/redundant-coherent-store.slang
@@ -0,0 +1,40 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model
+
+// Tests if we optimize redundant store's correctly
+
+//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+//TEST_INPUT:ubuffer(data=[0 0], stride=4),name=buffer
+uniform int* buffer;
+
+[numthreads(128, 1, 1)]
+void computeMain(uint3 group_thread_id: SV_GroupThreadID)
+{
+ Ptr<int, Access::ReadWrite, AddressSpace::Device> ptr = __getAddress(buffer[0]);
+ if (group_thread_id.x == 0)
+ {
+ // This store will not optimize out, Device > Invocation.
+ // SPIRV: OpStore %ptr %int_1
+ storeCoherent<4, MemoryScope::Device>(ptr, 1);
+ // SPIRV-NEXT: OpStore %ptr %int_2
+ storeCoherent<4, MemoryScope::Invocation>(ptr, 2);
+
+ // Both of these stores will optimize out, Subgroup > Invocation.
+ // SPIRV-NOT: OpStore {{.*}} %int_3
+ *(ptr + 1) = 3;
+ // SPIRV-NOT: OpStore {{.*}} %int_4
+ storeCoherent<4, MemoryScope::Invocation>(ptr + 1, 4);
+ // SPIRV: OpStore {{.*}} %int_5
+ storeCoherent<4, MemoryScope::Workgroup>(ptr + 1, 5);
+ }
+ AllMemoryBarrierWithGroupSync();
+ if (group_thread_id.x == 127)
+ {
+ // CHECK: 1
+ outputBuffer[0] = (*ptr == 1 || *ptr == 2) ? 1 : 0;
+
+ // CHECK-NEXT: 5
+ outputBuffer[1] = loadCoherent<4, MemoryScope::Workgroup>(ptr+1);
+ }
+} \ No newline at end of file