20 files changed, 687 insertions, 186 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 5ffab1f9c..9b55dc35a 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -1368,19 +1368,20 @@ struct Ptr<
 
         __intrinsic_op($(kIROp_GetOffsetPtr))
         [nonmutating]
+        [__NoSideEffect]
         ref;
     }
 };
 
 //@hidden:
 __intrinsic_op($(kIROp_AlignedAttr))
-void __align_attr(int alignment);
+internal int __align_attr(int alignment);
 
 __intrinsic_op($(kIROp_Load))
-T __load_aligned<T, U>(T* ptr, U alignmentAttr);
+internal T __load_aligned<T>(T* ptr, int alignmentAttr);
 
 __intrinsic_op($(kIROp_Store))
-void __store_aligned<T, U>(T* ptr, T value, U alignmentAttr);
+internal void __store_aligned<T>(T* ptr, T value, int alignmentAttr);
 
 //@public:
 
@@ -1413,6 +1414,42 @@ void storeAligned<int alignment, T>(T* ptr, T value)
     __store_aligned(ptr, value, __align_attr(alignment));
 }
 
+//@hidden:
+__intrinsic_op($(kIROp_MemoryScopeAttr))
+internal int __memoryscope_attr(MemoryScope scope);
+
+__intrinsic_op($(kIROp_Load))
+internal T __load_coherent<T, Access access, AddressSpace addrSpace>(Ptr<T, access, addrSpace> ptr, int alignmentAttr, int memoryScopeAttr);
+
+__intrinsic_op($(kIROp_Store))
+internal void __store_coherent<T, AddressSpace addrSpace>(Ptr<T, Access::ReadWrite, addrSpace> ptr, T value, int alignmentAttr, int memoryScopeAttr);
+
+/// Store a value coherently to a memoryscope.
+/// Tighter memory scopes may be faster to operate on.
+/// @param ptr The pointer to store value to.
+/// @param value The value to store.
+///
+[require(SPV_KHR_vulkan_memory_model)]
+[ForceInline]
+__generic<int alignment, MemoryScope scope, T, AddressSpace addrSpace> 
+void storeCoherent(Ptr<T, Access::ReadWrite, addrSpace> ptr, T value)
+{
+    __store_coherent<T, addrSpace>(ptr, value, __align_attr(alignment), __memoryscope_attr(scope));
+}
+
+/// Load a value coherently to a memoryscope.
+/// Tighter memory scopes may be faster to operate on.
+/// @param ptr The pointer to load from.
+///
+[require(SPV_KHR_vulkan_memory_model)]
+[ForceInline]
+[__NoSideEffect]
+__generic<int alignment, MemoryScope scope, T, Access access, AddressSpace addrSpace> 
+T loadCoherent(Ptr<T, access, addrSpace> ptr)
+{
+    return __load_coherent<T, access, addrSpace>(ptr, __align_attr(alignment), __memoryscope_attr(scope));
+}
+
 ${{{
     StringBuilder ptrTypeParameterListBuilder;
     ptrTypeParameterListBuilder << "T, Access access, AddressSpace addrSpace";
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 73bdee96e..824a06000 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -23239,6 +23239,18 @@ extension<T, L : IBufferDataLayout> RasterizerOrderedStructuredBuffer<T, L> : IR
     int getCount() { uint count; uint stride; this.GetDimensions(count, stride); return count; }
 }
 
+[require(vk_mem_model)]
+internal void enableVMMDeviceScopeCapabilityIfNeeded(constexpr MemoryScope memoryScope)
+{
+    if (memoryScope == MemoryScope::Device)
+    {
+        spirv_asm
+        {
+            OpCapability VulkanMemoryModelDeviceScopeKHR;
+        };
+    }
+}
+
 namespace linalg
 {
 
@@ -23813,6 +23825,22 @@ struct CoopMat
         };
     }
 
+    // TODO: make this function an intrinsic and support all types via the single intrinsic
+    [require(cooperative_matrix, vk_mem_model)]
+    void StoreCoherent<
+        let matrixLayout : CoopMatMatrixLayout
+    >(T* buffer, uint element, uint stride, constexpr MemoryScope memoryScope)
+    {
+        enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+        let alignment = 16U;
+        const int32_t scope = (int32_t)memoryScope;
+        return spirv_asm
+        {
+            %pointer:$$T* = OpPtrAccessChain $buffer $element;
+            OpCooperativeMatrixStoreKHR %pointer $this $matrixLayout $stride Aligned|MakePointerAvailable|NonPrivatePointer !alignment $scope;
+        };
+    }
+
     [ForceInline]
     [require(cooperative_matrix)]
     void Store<
@@ -23924,6 +23952,24 @@ ${{{{
         };
     }
 
+    // TODO: make this function an intrinsic and support all types via the single intrinsic
+    [ForceInline]
+    [__NoSideEffect]
+    [require(cooperative_matrix, vk_mem_model)]
+    static This LoadCoherent<
+        let matrixLayout : CoopMatMatrixLayout
+    >(T* buffer, uint element, uint stride, constexpr MemoryScope memoryScope)
+    {
+        enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+        let alignment = 16U;
+        const int32_t scope = (int32_t)memoryScope;
+        return spirv_asm
+        {
+            %pointer:$$T* = OpPtrAccessChain $buffer $element;
+            result:$$CoopMat<T, S, M, N, R> = OpCooperativeMatrixLoadKHR %pointer $matrixLayout $stride Aligned|MakePointerVisible|NonPrivatePointer !alignment $scope;
+        };
+    }
+
     [ForceInline]
     [require(cooperative_matrix)]
     static This Load<
@@ -24480,6 +24526,24 @@ CoopMat<T, S, M, N, R> coopMatLoad<
 }
 
 [ForceInline]
+[require(cooperative_matrix, vk_mem_model)]
+CoopMat<T, S, M, N, R> coopMatLoadCoherent<
+    T : __BuiltinArithmeticType,
+    let S : MemoryScope,
+    let M : int,
+    let N : int,
+    let R : CoopMatMatrixUse,
+    let matrixLayout : CoopMatMatrixLayout
+>(
+    T* buffer,
+    uint element,
+    uint stride,
+    constexpr MemoryScope memoryScope)
+{
+    return CoopMat<T, S, M, N, R>.LoadCoherent<matrixLayout>(buffer, element, stride, memoryScope);
+}
+
+[ForceInline]
 [require(cooperative_matrix)]
 CoopMat<T, S, M, N, R> coopMatLoad<
     T : __BuiltinArithmeticType,
@@ -24845,6 +24909,20 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti
         };
     }
 
+    // TODO: make this function an intrinsic and support all types via the single intrinsic
+    [require(cooperative_vector, vk_mem_model)]
+    void storeCoherent(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device)
+    {
+        enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+        let pointer = Ptr<T[]>(buffer);
+        let alignment = 16U;
+        const int32_t scope = (int32_t)memoryScope;
+        spirv_asm
+        {
+            OpCooperativeVectorStoreNV $pointer $byteOffset16ByteAligned $this Aligned|MakePointerAvailable|NonPrivatePointer !alignment $scope;
+        };
+    }
+
     [ForceInline]
     [require(cooperative_vector)]
     [require(hlsl_coopvec_poc)]
@@ -25017,6 +25095,20 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti
         };
     }
 
+    // TODO: make this function an intrinsic and support all types via the single intrinsic
+    [require(cooperative_vector, vk_mem_model)]
+    static CoopVec<T, N> loadCoherent(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device)
+    {
+        enableVMMDeviceScopeCapabilityIfNeeded(memoryScope);
+        let pointer = Ptr<T[]>(buffer);
+        let alignment = 16U;
+        const int32_t scope = (int32_t)memoryScope;
+        return spirv_asm
+        {
+            result:$$CoopVec<T, N> = OpCooperativeVectorLoadNV $pointer $byteOffset16ByteAligned Aligned|MakePointerVisible|NonPrivatePointer !alignment $scope;
+        };
+    }
+
     // Groupshared
     [ForceInline]
     [__NoSideEffect]
@@ -26457,6 +26549,13 @@ CoopVec<T, N> coopVecLoad<let N : int, T : __BuiltinArithmeticType>(T* buffer, i
     return CoopVec<T, N>.load(buffer, byteOffset16ByteAligned);
 }
 
+[ForceInline]
+[require(spirv, cooperative_vector, vk_mem_model)]
+CoopVec<T, N> coopVecLoadCoherent<let N : int, T : __BuiltinArithmeticType>(T* buffer, int32_t byteOffset16ByteAligned = 0, constexpr MemoryScope memoryScope = MemoryScope::Device)
+{
+    return CoopVec<T, N>.loadCoherent(buffer, byteOffset16ByteAligned, memoryScope);
+}
+
 // Groupshared
 [ForceInline]
 [require(cooperative_vector)]
diff --git a/source/slang/slang-emit-spirv-ops.h b/source/slang/slang-emit-spirv-ops.h
index a5e4d730a..da9058b62 100644
--- a/source/slang/slang-emit-spirv-ops.h
+++ b/source/slang/slang-emit-spirv-ops.h
@@ -600,28 +600,6 @@ SpvInst* emitOpLoad(
     return emitInst(parent, inst, SpvOpLoad, idResultType, kResultID, pointer, memoryAccess);
 }
 
-// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpLoad
-template<typename T1, typename T2>
-SpvInst* emitOpLoadAligned(
-    SpvInstParent* parent,
-    IRInst* inst,
-    const T1& idResultType,
-    const T2& pointer,
-    const SpvLiteralInteger& literalInteger)
-{
-    static_assert(isSingular<T1>);
-    static_assert(isSingular<T2>);
-    return emitInst(
-        parent,
-        inst,
-        SpvOpLoad,
-        idResultType,
-        kResultID,
-        pointer,
-        SpvMemoryAccessAlignedMask,
-        literalInteger);
-}
-
 // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpStore
 template<typename T1, typename T2>
 SpvInst* emitOpStore(
@@ -636,27 +614,6 @@ SpvInst* emitOpStore(
     return emitInst(parent, inst, SpvOpStore, pointer, object, memoryAccess);
 }
 
-// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpStore
-template<typename T1, typename T2>
-SpvInst* emitOpStoreAligned(
-    SpvInstParent* parent,
-    IRInst* inst,
-    const T1& pointer,
-    const T2& object,
-    const SpvLiteralInteger& literalInteger)
-{
-    static_assert(isSingular<T1>);
-    static_assert(isSingular<T2>);
-    return emitInst(
-        parent,
-        inst,
-        SpvOpStore,
-        pointer,
-        object,
-        SpvMemoryAccessAlignedMask,
-        literalInteger);
-}
-
 // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpAccessChain
 template<typename T1, typename T2, typename Ts>
 SpvInst* emitOpAccessChain(
diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp
index 3a8a913ec..8bcd1429f 100644
--- a/source/slang/slang-emit-spirv.cpp
+++ b/source/slang/slang-emit-spirv.cpp
@@ -4569,33 +4569,37 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
             break;
         case kIROp_AtomicLoad:
             {
-                IRBuilder builder{inst};
-                if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType()))
+                IRAtomicLoad* atomicLoad = as<IRAtomicLoad>(inst);
+                auto ptr = atomicLoad->getPtr();
+                IRBuilder builder{atomicLoad};
+                if (isAtomicableAddressSpace(ptr->getDataType()))
                 {
                     if (m_memoryModel == SpvMemoryModelVulkan)
                         requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
 
                     const auto memoryScope =
                         emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
-                    const auto memorySemantics =
-                        emitMemorySemanticMask(inst->getOperand(1), inst->getOperand(0));
+                    const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1), ptr);
                     result = emitOpAtomicLoad(
                         parent,
                         inst,
                         inst->getFullType(),
-                        inst->getOperand(0),
+                        ptr,
                         memoryScope,
                         memorySemantics);
                     ensureAtomicCapability(inst, SpvOpAtomicLoad);
                 }
                 else
                 {
-                    result = emitLoadMaybeCoherent(parent, inst);
+                    result = emitLoad(parent, inst, ptr);
                 }
             }
             break;
         case kIROp_AtomicStore:
             {
+                IRAtomicStore* atomicStore = as<IRAtomicStore>(inst);
+                auto ptr = atomicStore->getPtr();
+                auto val = atomicStore->getVal();
                 IRBuilder builder{inst};
                 if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType()))
                 {
@@ -4604,48 +4608,44 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
 
                     const auto memoryScope =
                         emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
-                    const auto memorySemantics =
-                        emitMemorySemanticMask(inst->getOperand(2), inst->getOperand(0));
-                    result = emitOpAtomicStore(
-                        parent,
-                        inst,
-                        inst->getOperand(0),
-                        memoryScope,
-                        memorySemantics,
-                        inst->getOperand(1));
+                    const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr);
+                    result =
+                        emitOpAtomicStore(parent, inst, ptr, memoryScope, memorySemantics, val);
                     ensureAtomicCapability(inst, SpvOpAtomicStore);
                 }
                 else
                 {
-                    result = emitStoreMaybeCoherent(parent, inst);
+                    result = emitStore(parent, inst, ptr, val);
                 }
             }
             break;
         case kIROp_AtomicExchange:
             {
+                IRAtomicExchange* atomicExchange = as<IRAtomicExchange>(inst);
+                auto ptr = atomicExchange->getPtr();
+                auto val = atomicExchange->getOperand(1);
                 IRBuilder builder{inst};
-                if (isAtomicableAddressSpace(inst->getOperand(0)->getDataType()))
+                if (isAtomicableAddressSpace(ptr->getDataType()))
                 {
                     if (m_memoryModel == SpvMemoryModelVulkan)
                         requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
 
                     const auto memoryScope =
                         emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
-                    const auto memorySemantics =
-                        emitMemorySemanticMask(inst->getOperand(2), inst->getOperand(0));
+                    const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr);
                     result = emitOpAtomicExchange(
                         parent,
                         inst,
                         inst->getFullType(),
-                        inst->getOperand(0),
+                        ptr,
                         memoryScope,
                         memorySemantics,
-                        inst->getOperand(1));
+                        val);
                     ensureAtomicCapability(inst, SpvOpAtomicExchange);
                 }
                 else
                 {
-                    result = emitStoreMaybeCoherent(parent, inst);
+                    result = emitStore(parent, inst, ptr, val);
                 }
             }
             break;
@@ -7082,6 +7082,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
 
     SpvInst* emitGetOffsetPtr(SpvInstParent* parent, IRInst* inst)
     {
+        requireVariableBufferCapabilityIfNeeded(inst->getDataType());
+
         return emitOpPtrAccessChain(
             parent,
             inst,
@@ -7174,54 +7176,100 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
         }
     }
 
-    SpvInst* emitLoad(SpvInstParent* parent, IRLoad* inst)
+    enum class MemoryAccessType
     {
-        requireVariableBufferCapabilityIfNeeded(inst->getDataType());
+        Load,
+        Store
+    };
+
+    template<MemoryAccessType memoryAccessType>
+    void getMemoryAccessOperandsOfLoadStore(
+        IRInst* inst,
+        IRInst* ptr,
+        int& memoryAccessMaskOut,
+        int& alignmentOut,
+        MemoryScope& memoryScopeOut)
+    {
+        IRAlignedAttr* alignedAttr = nullptr;
+        IRMemoryScopeAttr* memoryScopeAttr = nullptr;
 
-        auto ptrType = as<IRPtrTypeBase>(inst->getPtr()->getDataType());
-        if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) ==
-                           SpvStorageClassPhysicalStorageBuffer)
+        for (auto attr : inst->getAllAttrs())
         {
-            IRSizeAndAlignment sizeAndAlignment;
-            if (auto alignedAttr = inst->findAttr<IRAlignedAttr>())
+            if (auto foundAlignedAttr = as<IRAlignedAttr>(attr))
+                alignedAttr = foundAlignedAttr;
+            else if (auto foundMemoryScopeAttr = as<IRMemoryScopeAttr>(attr))
+                memoryScopeAttr = foundMemoryScopeAttr;
+        }
+
+        // Determine coherence
+        {
+            bool isCoherent = false;
+            if (memoryScopeAttr)
             {
-                sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment());
+                memoryScopeOut = (MemoryScope)getIntVal(memoryScopeAttr->getMemoryScope());
+                if (m_memoryModel != SpvMemoryModelVulkan)
+                    SLANG_ASSERT_FAILURE(
+                        "Explicit coherent operations require vulkan-memory-model, "
+                        "specify the capability 'vk_mem_model'");
+                isCoherent = true;
             }
             else
             {
-                getNaturalSizeAndAlignment(
-                    m_targetProgram->getOptionSet(),
-                    ptrType->getValueType(),
-                    &sizeAndAlignment);
+                if (NeedToUseCoherentLoadOrStore(ptr))
+                {
+                    memoryScopeOut = MemoryScope::Device;
+                    isCoherent = true;
+                }
+            }
+            if (isCoherent)
+            {
+
+                memoryAccessMaskOut |= SpvMemoryAccessNonPrivatePointerMask;
+                if constexpr (memoryAccessType == MemoryAccessType::Load)
+                    memoryAccessMaskOut |= SpvMemoryAccessMakePointerVisibleMask;
+                else
+                    memoryAccessMaskOut |= SpvMemoryAccessMakePointerAvailableMask;
+                if (memoryScopeOut == MemoryScope::Device)
+                    requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
             }
-            return emitOpLoadAligned(
-                parent,
-                inst,
-                inst->getDataType(),
-                inst->getPtr(),
-                SpvLiteralInteger::from32(sizeAndAlignment.alignment));
         }
-        else
+
+        // Determine alignment
         {
-            return emitLoadMaybeCoherent(parent, inst);
+            auto ptrType = as<IRPtrTypeBase>(ptr->getDataType());
+            if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) ==
+                               SpvStorageClassPhysicalStorageBuffer)
+            {
+                IRSizeAndAlignment sizeAndAlignment;
+                if (alignedAttr)
+                    sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment());
+                else
+                    getNaturalSizeAndAlignment(
+                        m_targetProgram->getOptionSet(),
+                        ptrType->getValueType(),
+                        &sizeAndAlignment);
+
+                alignmentOut = sizeAndAlignment.alignment;
+                if (alignmentOut != -1)
+                    memoryAccessMaskOut |= SpvMemoryAccessAlignedMask;
+            }
         }
     }
 
-    SpvInst* emitLoadMaybeCoherent(SpvInstParent* parent, IRInst* inst)
+    SpvInst* emitLoad(SpvInstParent* parent, IRInst* inst, IRInst* ptr)
     {
-        IRBuilder builder{inst};
-        builder.setInsertBefore(inst);
-
-        SpvInst* deviceScope = nullptr;
-        IRInst* pointer = inst->getOperand(0);
-
-        bool coherentPointer = NeedToUseCoherentLoadOrStore(pointer);
-        if (coherentPointer)
-        {
-            requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
-            deviceScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
-        }
+        requireVariableBufferCapabilityIfNeeded(inst->getDataType());
 
+        IRBuilder builder(inst);
+        int memoryAccessMask = 0;
+        int alignment = -1;
+        MemoryScope memoryScope{};
+        getMemoryAccessOperandsOfLoadStore<MemoryAccessType::Load>(
+            inst,
+            ptr,
+            memoryAccessMask,
+            alignment,
+            memoryScope);
         return emitInstCustomOperandFunc(
             parent,
             inst,
@@ -7230,85 +7278,61 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
             {
                 emitOperand(inst->getFullType());
                 emitOperand(kResultID);
-                emitOperand(pointer);
-
-                if (coherentPointer)
+                emitOperand(ptr);
+                if (memoryAccessMask)
                 {
-                    emitOperand(
-                        SpvMemoryAccessMakePointerVisibleMask |
-                        SpvMemoryAccessNonPrivatePointerMask);
-
-                    emitOperand(deviceScope);
+                    emitOperand(SpvLiteralInteger::from32(memoryAccessMask));
+                    if (memoryAccessMask & SpvMemoryAccessAlignedMask)
+                        emitOperand(SpvLiteralInteger::from32((uint32_t)alignment));
+                    if (memoryAccessMask & SpvMemoryAccessMakePointerVisibleMask)
+                        emitOperand(
+                            emitIntConstant((IRIntegerValue)memoryScope, builder.getIntType()));
                 }
             });
     }
 
-    SpvInst* emitStore(SpvInstParent* parent, IRStore* inst)
+    SpvInst* emitLoad(SpvInstParent* parent, IRLoad* inst)
     {
-        auto ptrType = as<IRPtrTypeBase>(inst->getPtr()->getDataType());
-        if (ptrType && addressSpaceToStorageClass(ptrType->getAddressSpace()) ==
-                           SpvStorageClassPhysicalStorageBuffer)
-        {
-            IRSizeAndAlignment sizeAndAlignment;
-            if (auto alignedAttr = inst->findAttr<IRAlignedAttr>())
-            {
-                sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment());
-            }
-            else
-            {
-                getNaturalSizeAndAlignment(
-                    m_targetProgram->getOptionSet(),
-                    ptrType->getValueType(),
-                    &sizeAndAlignment);
-            }
-            return emitOpStoreAligned(
-                parent,
-                inst,
-                inst->getPtr(),
-                inst->getVal(),
-                SpvLiteralInteger::from32(sizeAndAlignment.alignment));
-        }
-        else
-        {
-            return emitStoreMaybeCoherent(parent, inst);
-        }
+        return emitLoad(parent, inst, inst->getPtr());
     }
 
-    SpvInst* emitStoreMaybeCoherent(SpvInstParent* parent, IRInst* inst)
+    SpvInst* emitStore(SpvInstParent* parent, IRInst* inst, IRInst* ptr, IRInst* val)
     {
-        IRBuilder builder{inst};
-        builder.setInsertBefore(inst);
-
-        SpvInst* deviceScope = nullptr;
-        IRInst* pointer = inst->getOperand(0);
-        IRInst* object = inst->getOperand(1);
-
-        bool coherentPointer = NeedToUseCoherentLoadOrStore(pointer);
-        if (coherentPointer)
-        {
-            requireSPIRVCapability(SpvCapabilityVulkanMemoryModelDeviceScope);
-            deviceScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
-        }
+        requireVariableBufferCapabilityIfNeeded(inst->getDataType());
 
+        IRBuilder builder(inst);
+        int memoryAccessMask = 0;
+        int alignment = -1;
+        MemoryScope memoryScope{};
+        getMemoryAccessOperandsOfLoadStore<MemoryAccessType::Store>(
+            inst,
+            ptr,
+            memoryAccessMask,
+            alignment,
+            memoryScope);
         return emitInstCustomOperandFunc(
             parent,
             inst,
             SpvOpStore,
             [&]()
             {
-                emitOperand(pointer);
-                emitOperand(object);
-
-                if (coherentPointer)
+                emitOperand(ptr);
+                emitOperand(val);
+                if (memoryAccessMask)
                 {
-                    emitOperand(
-                        SpvMemoryAccessMakePointerAvailableMask |
-                        SpvMemoryAccessNonPrivatePointerMask);
-
-                    emitOperand(deviceScope);
+                    emitOperand(SpvLiteralInteger::from32(memoryAccessMask));
+                    if (memoryAccessMask & SpvMemoryAccessAlignedMask)
+                        emitOperand(SpvLiteralInteger::from32((uint32_t)alignment));
+                    if (memoryAccessMask & SpvMemoryAccessMakePointerAvailableMask)
+                        emitOperand(
+                            emitIntConstant((IRIntegerValue)memoryScope, builder.getIntType()));
                 }
             });
     }
+    SpvInst* emitStore(SpvInstParent* parent, IRStore* inst)
+    {
+        return emitStore(parent, inst, inst->getPtr(), inst->getVal());
+    }
 
     SpvInst* emitSwizzledStore(SpvInstParent* parent, IRSwizzledStore* inst)
     {
@@ -8613,6 +8637,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
 
     SpvInst* emitDebugValue(SpvInstParent* parent, IRDebugValue* debugValue)
     {
+        auto debugVar = debugValue->getDebugVar();
+        auto debugValueVal = debugValue->getValue();
         // We are asked to update the value for a debug variable.
         // A debug variable is already emited as a OpDebugVariable +
         // OpVariable + OpDebugDeclare. We only need to store the new value
@@ -8628,7 +8654,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
         // variable. If it doesn't, we can't emit a store.
         //
         List<IRInst*> irAccessChain;
-        auto rootVar = getRootAddr(debugValue->getDebugVar(), irAccessChain);
+        auto rootVar = getRootAddr(debugVar, irAccessChain);
         SpvInst* spvDebugVar = nullptr;
         if (!m_mapIRInstToSpvInst.tryGetValue(rootVar, spvDebugVar))
             return nullptr;
@@ -8644,7 +8670,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
             // be fully static. We will skip emitting the debug inst if the access chain
             // isn't static.
             //
-            auto type = unwrapAttributedType(debugValue->getDebugVar()->getDataType());
+            auto type = unwrapAttributedType(debugVar->getDataType());
             List<SpvInst*> accessChain;
             bool isConstAccessChain =
                 translateIRAccessChain(builder, type, irAccessChain, accessChain);
@@ -8657,7 +8683,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
                     m_voidType,
                     getNonSemanticDebugInfoExtInst(),
                     rootVar,
-                    debugValue->getValue(),
+                    debugValueVal,
                     getDwarfExpr(),
                     accessChain);
             }
@@ -8669,7 +8695,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
         // The ordinary case is the debug variable has a backing ordinary variable.
         // We can simply emit a store into the backing variable for the DebugValue operation.
         //
-        return emitStoreMaybeCoherent(parent, debugValue);
+        return emitStore(parent, debugValue, debugVar, debugValueVal);
     }
 
     IRInst* getName(IRInst* inst)
diff --git a/source/slang/slang-ir-defer-buffer-load.cpp b/source/slang/slang-ir-defer-buffer-load.cpp
index 4736b4e65..3c8a9f4c7 100644
--- a/source/slang/slang-ir-defer-buffer-load.cpp
+++ b/source/slang/slang-ir-defer-buffer-load.cpp
@@ -151,9 +151,19 @@ struct DeferBufferLoadContext
 
     void deferBufferLoadInst(IRBuilder& builder, List<IRInst*>& workList, IRInst* loadInst)
     {
+        bool failDueToAttributeFound = false;
+        for (auto attr : loadInst->getAllAttrs())
+        {
+            if (as<IRAlignedAttr>(attr) || as<IRMemoryScopeAttr>(attr))
+            {
+                failDueToAttributeFound = true;
+                break;
+            }
+        }
+
         // Don't defer the load anymore if the type is simple.
-        if (!isTypePreferrableToDeferLoad(codeGenContext, loadInst->getDataType()) ||
-            loadInst->findAttr<IRAlignedAttr>())
+        if (failDueToAttributeFound ||
+            !isTypePreferrableToDeferLoad(codeGenContext, loadInst->getDataType()))
         {
             return;
         }
diff --git a/source/slang/slang-ir-insts-stable-names.lua b/source/slang/slang-ir-insts-stable-names.lua
index fefc7a956..a34dc346a 100644
--- a/source/slang/slang-ir-insts-stable-names.lua
+++ b/source/slang/slang-ir-insts-stable-names.lua
@@ -679,4 +679,5 @@ return {
 	["CastResourceToDescriptorHandle"] = 675,
 	["SymbolAlias"] = 676,
 	["Decoration.InParamProxyVar"] = 677,
+	["Attr.MemoryScope"] = 678,
 }
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 5c27d5e25..2255afc67 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -2191,6 +2191,13 @@ struct IRAlignedAttr : IRAttr
 };
 
 FIDDLE()
+struct IRMemoryScopeAttr : IRAttr
+{
+    FIDDLE(leafInst())
+    IRInst* getMemoryScope() { return getOperand(0); }
+};
+
+FIDDLE()
 struct IRLoad : IRInst
 {
     FIDDLE(leafInst())
@@ -2242,6 +2249,17 @@ struct IRAtomicStore : IRAtomicOperation
 };
 
 FIDDLE()
+struct IRAtomicExchange : IRAtomicOperation
+{
+    FIDDLE(leafInst())
+    IRUse ptr;
+    IRUse val;
+
+    IRInst* getPtr() { return ptr.get(); }
+    IRInst* getVal() { return val.get(); }
+};
+
+FIDDLE()
 struct IRRWStructuredBufferStore : IRInst
 {
     FIDDLE(leafInst())
@@ -4365,7 +4383,7 @@ public:
 
     IRInst* emitLoad(IRType* type, IRInst* ptr);
     IRInst* emitLoad(IRType* type, IRInst* ptr, IRInst* align);
-    IRInst* emitLoad(IRType* type, IRInst* ptr, IRAlignedAttr* align);
+    IRInst* emitLoad(IRType* type, IRInst* ptr, ArrayView<IRInst*> attributes);
     IRInst* emitLoad(IRInst* ptr);
 
     IRInst* emitLoadReverseGradient(IRType* type, IRInst* diffValue);
@@ -4375,6 +4393,7 @@ public:
 
     IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal);
     IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align);
+    IRInst* emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align, IRInst* memoryScope);
 
     IRInst* emitAtomicStore(IRInst* dstPtr, IRInst* srcVal, IRInst* memoryOrder);
 
diff --git a/source/slang/slang-ir-insts.lua b/source/slang/slang-ir-insts.lua
index a4bb4a6f2..e21fc86ae 100644
--- a/source/slang/slang-ir-insts.lua
+++ b/source/slang/slang-ir-insts.lua
@@ -2018,6 +2018,7 @@ local insts = {
 				},
 			},
 			{ Aligned = { struct_name = "AlignedAttr", min_operands = 1 } },
+			{ MemoryScope = { struct_name = "MemoryScopeAttr", min_operands = 1 } },
 			{
 				SemanticAttr = {
 					{ userSemantic = { struct_name = "UserSemanticAttr", min_operands = 2 } },
diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp
index 3b1a731f9..0308b50c2 100644
--- a/source/slang/slang-ir-redundancy-removal.cpp
+++ b/source/slang/slang-ir-redundancy-removal.cpp
@@ -416,6 +416,47 @@ static IRInst* _getRootVar(IRInst* inst)
     return inst;
 }
 
+// 0 is the most broad scope
+static int getMemoryScopeOrder(MemoryScope scope)
+{
+    switch (scope)
+    {
+    case MemoryScope::CrossDevice:
+        return 7;
+    case MemoryScope::Device:
+        return 6;
+    case MemoryScope::QueueFamily:
+        // https://docs.vulkan.org/spec/latest/chapters/shaders.html#shaders-scope-queue-family
+        return 5;
+    case MemoryScope::ShaderCall:
+        // https://docs.vulkan.org/spec/latest/chapters/shaders.html#shaders-scope-shadercall
+        return 4;
+    case MemoryScope::Workgroup:
+        return 3;
+    case MemoryScope::Subgroup:
+        return 2;
+    case MemoryScope::Invocation:
+    default:
+        return 1;
+    }
+}
+
+// Returns if MemoryScope x is a sub-set of y
+static bool isMemoryScopeSubsetOf(MemoryScope x, MemoryScope y)
+{
+    return getMemoryScopeOrder(x) <= getMemoryScopeOrder(y);
+}
+
+// Inst's are relative to a memory scope, get that memory scope.
+static MemoryScope getMemoryScopeOfLoadStore(IRInst* inst)
+{
+    SLANG_ASSERT(as<IRLoad>(inst) || as<IRStore>(inst));
+    auto memoryScope = inst->findAttr<IRMemoryScopeAttr>();
+    if (!memoryScope)
+        return MemoryScope::Invocation;
+    return (MemoryScope)getIntVal(memoryScope->getMemoryScope());
+}
+
 bool tryRemoveRedundantStore(IRGlobalValueWithCode* func, IRStore* store)
 {
     // We perform a quick and conservative check:
@@ -473,15 +514,18 @@ bool tryRemoveRedundantStore(IRGlobalValueWithCode* func, IRStore* store)
         }
     }
 
-    // A store can be removed if there are subsequent stores to the same variable,
+    // This store can be removed if there are subsequent stores to the same variable,
     // and there are no insts in between the stores that can read the variable.
-
+    // Additionally, MemoryScope of the `store` must be a sub-set of `nextStore`,
+    // otherwise we can not be certain that `nextStore` completely overwrites `store`.
+    MemoryScope memoryScopeOfStore = getMemoryScopeOfLoadStore(store);
     HashSet<IRBlock*> visitedBlocks;
     for (auto next = store->getNextInst(); next;)
     {
         if (auto nextStore = as<IRStore>(next))
         {
-            if (nextStore->getPtr() == store->getPtr())
+            if (nextStore->getPtr() == store->getPtr() &&
+                isMemoryScopeSubsetOf(memoryScopeOfStore, getMemoryScopeOfLoadStore(nextStore)))
             {
                 hasOverridingStore = true;
                 break;
@@ -585,13 +629,21 @@ bool tryRemoveRedundantLoad(IRGlobalValueWithCode* func, IRLoad* load)
 {
     bool changed = false;
 
-    // If the load is preceeded by a store without any side-effect insts
-    // in-between, remove the load.
+    // Get the memory scope we are operating on.
+    MemoryScope memoryScopeOfLoad = getMemoryScopeOfLoadStore(load);
+
+    // We can replace a load with a `Store->getVal()` if that store is a super-set
+    // memory scope to our load.
+    // Ex 1: Store into Workgroup, load from Invocation. Load will be equal to the Store.
+    //
+    // Ex 2: Store into Invocation, load from Workgroup. Load may/may-not be equal to the Store
+    // since the cache managing the Workgroup scope may contain different data than the invocation.
     for (auto prev = load->getPrevInst(); prev; prev = prev->getPrevInst())
     {
         if (auto store = as<IRStore>(prev))
         {
-            if (store->getPtr() == load->getPtr())
+            if (store->getPtr() == load->getPtr() &&
+                isMemoryScopeSubsetOf(memoryScopeOfLoad, getMemoryScopeOfLoadStore(store)))
             {
                 auto value = store->getVal();
                 load->replaceUsesWith(value);
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index 7b7d5ec17..8371d6ef5 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -5212,18 +5212,20 @@ IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, IRInst* align)
     return inst;
 }
 
-IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, IRAlignedAttr* align)
+IRInst* IRBuilder::emitLoad(IRType* type, IRInst* ptr, ArrayView<IRInst*> attributes)
 {
-    if (align)
-    {
-        auto inst = createInst<IRLoad>(this, kIROp_Load, type, ptr, align);
-        addInst(inst);
-        return inst;
-    }
-    else
-    {
-        return emitLoad(type, ptr);
-    }
+    ShortList<IRInst*> params;
+    params.add(ptr);
+    params.addRange(attributes);
+    auto inst = createInst<IRLoad>(
+        this,
+        kIROp_Load,
+        type,
+        params.getCount(),
+        params.getArrayView().getBuffer());
+
+    addInst(inst);
+    return inst;
 }
 
 IRInst* IRBuilder::emitLoad(IRInst* ptr)
@@ -5279,6 +5281,21 @@ IRInst* IRBuilder::emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align)
     return inst;
 }
 
+IRInst* IRBuilder::emitStore(IRInst* dstPtr, IRInst* srcVal, IRInst* align, IRInst* memoryScope)
+{
+    auto inst = createInst<IRStore>(
+        this,
+        kIROp_Store,
+        nullptr,
+        dstPtr,
+        srcVal,
+        getAttr(kIROp_AlignedAttr, align),
+        getAttr(kIROp_MemoryScopeAttr, memoryScope));
+
+    addInst(inst);
+    return inst;
+}
+
 IRInst* IRBuilder::emitAtomicStore(IRInst* dstPtr, IRInst* srcVal, IRInst* memoryOrder)
 {
     auto inst = createInst<IRAtomicStore>(
diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h
index 54bf23754..4f9941946 100644
--- a/source/slang/slang-ir.h
+++ b/source/slang/slang-ir.h
@@ -2431,7 +2431,7 @@ public:
     // anything to do with serialization format
     //
     const static UInt k_minSupportedModuleVersion = 1;
-    const static UInt k_maxSupportedModuleVersion = 1;
+    const static UInt k_maxSupportedModuleVersion = 2;
     static_assert(k_minSupportedModuleVersion <= k_maxSupportedModuleVersion);
 
 private:
diff --git a/tests/cooperative-matrix/coherent-load-store-pointer.slang b/tests/cooperative-matrix/coherent-load-store-pointer.slang
new file mode 100644
index 000000000..6057ab41f
--- /dev/null
+++ b/tests/cooperative-matrix/coherent-load-store-pointer.slang
@@ -0,0 +1,34 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly
+
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakePointerVisible
+// SPIRV: MakePointerAvailable
+
+// CHECK: 1
+// CHECK-NEXT: 2
+// CHECK-NEXT: 3
+// CHECK-NEXT: 4
+// CHECK-NEXT: 5
+// CHECK-NEXT: 6
+// CHECK-NEXT: 7
+// CHECK-NEXT: 8
+
+//TEST_INPUT:ubuffer(data=[1 2 3 4 5 6 7 8], stride=4, count=256):name=inputBuffer
+uniform int32_t* inputBuffer;
+
+//TEST_INPUT:ubuffer(stride=4, count=256):out,name=outputBuffer
+uniform int32_t* outputBuffer;
+
+using namespace linalg;
+
+[numthreads(32, 1, 1)]
+void computeMain()
+{
+    int32_t* ptrIn = inputBuffer;
+    int32_t* ptrOut = outputBuffer;
+
+    let stride = 16;
+    let mat = coopMatLoadCoherent<int32_t, MemoryScope.Subgroup, 16, 16, CoopMatMatrixUse.MatrixAccumulator, CoopMatMatrixLayout.RowMajor>(ptrIn, 0, stride, MemoryScope::Device);
+    mat.StoreCoherent<CoopMatMatrixLayout.RowMajor>(ptrOut, 0, 16, MemoryScope::Device);
+}
+\ No newline at end of file
diff --git a/tests/cooperative-matrix/load-store-pointer.slang b/tests/cooperative-matrix/load-store-pointer.slang
new file mode 100644
index 000000000..2bbd8fef1
--- /dev/null
+++ b/tests/cooperative-matrix/load-store-pointer.slang
@@ -0,0 +1,35 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly
+
+// Ensure SPIRV does not do coherent operations here
+// SPIRV-NOT: MakePointerAvailable
+// SPIRV-NOT: MakePointerVisible
+
+
+// CHECK: 1
+// CHECK-NEXT: 2
+// CHECK-NEXT: 3
+// CHECK-NEXT: 4
+// CHECK-NEXT: 5
+// CHECK-NEXT: 6
+// CHECK-NEXT: 7
+// CHECK-NEXT: 8
+
+//TEST_INPUT:ubuffer(data=[1 2 3 4 5 6 7 8], stride=4, count=256):name=inputBuffer
+uniform int32_t* inputBuffer;
+
+//TEST_INPUT:ubuffer(stride=4, count=256):out,name=outputBuffer
+uniform int32_t* outputBuffer;
+
+using namespace linalg;
+
+[numthreads(32, 1, 1)]
+void computeMain()
+{
+    int32_t* ptrIn = inputBuffer;
+    int32_t* ptrOut = outputBuffer;
+
+    let stride = 16;
+    let mat = coopMatLoad<int32_t, MemoryScope.Subgroup, 16, 16, CoopMatMatrixUse.MatrixAccumulator, CoopMatMatrixLayout.RowMajor>(ptrIn, 0, stride);
+    mat.Store<CoopMatMatrixLayout.RowMajor>(ptrOut, 0, 16);
+}
+\ No newline at end of file
diff --git a/tests/cooperative-vector/coherent-load-store-pointer.slang b/tests/cooperative-vector/coherent-load-store-pointer.slang
new file mode 100644
index 000000000..40efeee1a
--- /dev/null
+++ b/tests/cooperative-vector/coherent-load-store-pointer.slang
@@ -0,0 +1,38 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv
+
+// coherent CoopVec operations crash the Nvidia driver.
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -render-feature cooperative-vector -emit-spirv-directly
+
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakePointerVisible
+// SPIRV: MakePointerAvailable
+
+//TEST_INPUT: set inputBuffer = ubuffer(data=[1 2 3 4 5 6 7 8 9 10 11 12], stride=4);
+uniform int32_t* inputBuffer;
+
+//TEST_INPUT: set outputBuffer = out ubuffer(data=[0 0 0 0 0 0 0 0], stride=4);
+uniform int32_t* outputBuffer;
+
+// CHECK: 9
+// CHECK-NEXT: A
+// CHECK-NEXT: B
+// CHECK-NEXT: C
+// CHECK-NEXT: 1
+// CHECK-NEXT: 2
+// CHECK-NEXT: 3
+// CHECK-NEXT: 4
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void computeMain()
+{
+    //// First half of input.
+    let a = coopVecLoadCoherent<4, int32_t>(inputBuffer, 0, MemoryScope::Device);
+    //// Second half of input.
+    let b = coopVecLoadCoherent<4, int32_t>(inputBuffer + 4, 4 * 4, MemoryScope::Device);
+    //// Store second half of input to first half of output buffer.
+    b.storeCoherent(outputBuffer, 0, MemoryScope::Device);
+    //// Store first half of input to second half of output buffer.
+    a.storeCoherent(outputBuffer, 4 * 4, MemoryScope::Device);
+}
+
diff --git a/tests/ir/dump-module-info.slang b/tests/ir/dump-module-info.slang
index c7753b440..67a43b274 100644
--- a/tests/ir/dump-module-info.slang
+++ b/tests/ir/dump-module-info.slang
@@ -6,7 +6,7 @@ module "foo";
 
 // CHECK: Module Name: foo
 // This will need bumping whenever we bump the ir module version
-// CHECK: Module Version: 1
+// CHECK: Module Version: 2
 // Just check that this is in the output with some string
 // CHECK: Compiler Version: {{.+}}
 
diff --git a/tests/language-feature/pointer/coherent-load-store-groupshared.slang b/tests/language-feature/pointer/coherent-load-store-groupshared.slang
new file mode 100644
index 000000000..2e537ef01
--- /dev/null
+++ b/tests/language-feature/pointer/coherent-load-store-groupshared.slang
@@ -0,0 +1,26 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model
+
+// Tests if we pass-through and handle groupshared address space pointers correctly.
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakePointerAvailable|NonPrivatePointer
+// SPIRV: MakePointerVisible|NonPrivatePointer
+
+// CHECK: 2
+// CHECK-NEXT: 1
+// CHECK-NEXT: 0
+
+//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+groupshared int[32] shared;
+
+#define THREAD_GROUP_SIZE 3
+[numthreads(THREAD_GROUP_SIZE, 1, 1)]
+void computeMain(uint3 group_thread_id: SV_GroupThreadID)
+{
+    Ptr<int, Access::ReadWrite, AddressSpace::GroupShared> ptr = __getAddress(shared[0]);
+    storeCoherent<4, MemoryScope::Workgroup>(ptr + group_thread_id.x, (int)group_thread_id.x);
+    AllMemoryBarrierWithGroupSync();
+    outputBuffer[group_thread_id.x] = loadCoherent<4, MemoryScope::Workgroup>(ptr + THREAD_GROUP_SIZE - group_thread_id.x - 1);
+}
+\ No newline at end of file
diff --git a/tests/language-feature/pointer/coherent-load-store-image.slang b/tests/language-feature/pointer/coherent-load-store-image.slang
new file mode 100644
index 000000000..359994a0e
--- /dev/null
+++ b/tests/language-feature/pointer/coherent-load-store-image.slang
@@ -0,0 +1,29 @@
+//DISABLE_TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -profile spirv_1_3 -capability vk_mem_model
+// These tests are expected to fail, pointers to texels are
+// currently a broken feature and do not work.
+// Additionally, we do not allow texel pointers with `__getAddress`.
+
+
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakeTexelAvailable
+// SPIRV: MakeTexelVisible
+
+// CHECK: 0
+// CHECK-NEXT: 5
+
+//TEST_INPUT: RWTexture1D(format=R32Uint, size=8, content = one, mipMaps = 1):name=texture
+RWTexture1D<uint> texture;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<uint> outputBuffer;
+
+[numthreads(32, 1, 1)]
+void computeMain()
+{
+    Ptr<uint> ptrIn = __getAddress(texture[1]);
+    Ptr<uint> secondPtrIn = ptrIn;
+
+    storeCoherent<4, MemoryScope::Device>(ptrIn, 5);
+    outputBuffer[0] = loadCoherent<4, MemoryScope::Device>(ptrIn);
+}
diff --git a/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang b/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang
new file mode 100644
index 000000000..b70664d82
--- /dev/null
+++ b/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang
@@ -0,0 +1,24 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -capability vk_mem_model
+
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakePointerVisible
+// SPIRV: MakePointerAvailable
+
+// CHECK: 2
+
+//TEST_INPUT:ubuffer(data=[1 2 3], stride=4):name=inputBuffer
+uniform int* inputBuffer;
+
+//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer
+uniform int* outputBuffer;
+
+[shader("compute")]
+[numthreads(32, 1, 1)]
+void computeMain()
+{
+    Ptr<int> ptrIn = inputBuffer;
+    Ptr<int> secondPtrIn = ptrIn;
+    Ptr<int> ptrOut = outputBuffer;
+    storeCoherent<4, MemoryScope::Device>(ptrOut, loadCoherent<4, MemoryScope::Device>(&secondPtrIn[1]));
+}
+\ No newline at end of file
diff --git a/tests/language-feature/pointer/redundant-coherent-load.slang b/tests/language-feature/pointer/redundant-coherent-load.slang
new file mode 100644
index 000000000..e0c7d5e56
--- /dev/null
+++ b/tests/language-feature/pointer/redundant-coherent-load.slang
@@ -0,0 +1,56 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model -output-using-type
+
+// Tests if we optimize redundant load's correctly
+
+//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+//TEST_INPUT:ubuffer(data=[0 0 0 11 10], stride=4),name=buffer
+uniform int* buffer;
+
+[numthreads(2, 1, 1)]
+void computeMain(uint3 group_thread_id: SV_GroupThreadID)
+{
+    Ptr<int, Access::ReadWrite, AddressSpace::Device> ptr = __getAddress(buffer[0]);
+    
+    // Consider the load from this store-load pattern as redundant since 
+    // the load is a sub-set memory-scope of the memory-scope of the store.
+    // Invocation == Invocation.
+    *ptr = 8;
+    outputBuffer[0] = loadCoherent<4, MemoryScope::Invocation>(ptr);
+    // CHECK: 8
+    // SPIRV: OpStore %ptr %int_8
+    // SPIRV-NOT: OpLoad
+    // SPIRV: %[[#OUTPUT_BUFFER1:]] = OpAccessChain {{.*}} %outputBuffer %{{.*}} %int_0
+    // SPIRV: OpStore %[[#OUTPUT_BUFFER1]] %int_8
+
+    // Consider the load from this store-load pattern as redundant since 
+    // the load is a sub-set memory-scope of the memory-scope of the store.
+    // Device > Workgroup.
+    let offset1 = ptr + 1;
+    storeCoherent<4, MemoryScope::Device>(offset1, 9);
+    outputBuffer[1] = loadCoherent<4, MemoryScope::Workgroup>(offset1);
+    // CHECK-NEXT: 9
+    // SPIRV: %[[#PTR_OFFSET:]] = OpPtrAccessChain {{.*}} %ptr %int_1
+    // SPIRV: OpStore %[[#PTR_OFFSET]] %int_9
+    // SPIRV-NOT: OpLoad
+    // SPIRV: %[[#OUTPUT_BUFFER2:]] = OpAccessChain {{.*}} %outputBuffer %{{.*}} %int_1
+    // SPIRV: OpStore %[[#OUTPUT_BUFFER2]] %int_9
+
+    // Consider the following store-load pattern as not redundant since the data stored
+    // may not be the same data that will be loaded if Workgroup-scope contains
+    // different data than the Subgroup-scope.
+    // Subgroup < Workgroup.
+    let offset2 = ptr + 2;
+    storeCoherent<4, MemoryScope::Subgroup>(offset2, buffer[3]);
+    if(group_thread_id.x == 1)
+    {
+        storeCoherent<4, MemoryScope::Invocation>(offset2, buffer[4]);
+        let result = loadCoherent<4, MemoryScope::Workgroup>(offset2);
+        outputBuffer[2] = (result == 11 || result == 10) ? 12 : 0;
+    }
+    // CHECK-NEXT: 12
+    // SPIRV: OpStore {{.*}}MakePointerAvailable{{.*}} 4 %int_3
+    // SPIRV: OpStore {{.*}}MakePointerAvailable{{.*}} 4 %int_4
+    // SPIRV: OpLoad {{.*}}MakePointerVisible{{.*}} 4 %int_2
+}
+\ No newline at end of file
diff --git a/tests/language-feature/pointer/redundant-coherent-store.slang b/tests/language-feature/pointer/redundant-coherent-store.slang
new file mode 100644
index 000000000..81cba3024
--- /dev/null
+++ b/tests/language-feature/pointer/redundant-coherent-store.slang
@@ -0,0 +1,40 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model
+
+// Tests if we optimize redundant store's correctly
+
+//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+//TEST_INPUT:ubuffer(data=[0 0], stride=4),name=buffer
+uniform int* buffer;
+
+[numthreads(128, 1, 1)]
+void computeMain(uint3 group_thread_id: SV_GroupThreadID)
+{
+    Ptr<int, Access::ReadWrite, AddressSpace::Device> ptr = __getAddress(buffer[0]);
+    if (group_thread_id.x == 0)
+    {
+        // This store will not optimize out, Device > Invocation.
+        // SPIRV: OpStore %ptr %int_1
+        storeCoherent<4, MemoryScope::Device>(ptr, 1);
+        // SPIRV-NEXT: OpStore %ptr %int_2
+        storeCoherent<4, MemoryScope::Invocation>(ptr, 2);
+
+        // Both of these stores will optimize out, Subgroup > Invocation.
+        // SPIRV-NOT: OpStore {{.*}} %int_3
+        *(ptr + 1) = 3;
+        // SPIRV-NOT: OpStore {{.*}} %int_4
+        storeCoherent<4, MemoryScope::Invocation>(ptr + 1, 4);
+        // SPIRV: OpStore {{.*}} %int_5
+        storeCoherent<4, MemoryScope::Workgroup>(ptr + 1, 5);
+    }
+    AllMemoryBarrierWithGroupSync();
+    if (group_thread_id.x == 127)
+    {
+        // CHECK: 1
+        outputBuffer[0] = (*ptr == 1 || *ptr == 2) ? 1 : 0;
+
+        // CHECK-NEXT: 5
+        outputBuffer[1] = loadCoherent<4, MemoryScope::Workgroup>(ptr+1);
+    }
+}
+\ No newline at end of file