Addition of `Load`/`Store` coherent operations (#8395)

Fixes: https://github.com/shader-slang/slang/issues/7634 Duplicate of PR https://github.com/shader-slang/slang/pull/8052 Primary Changes: * Added `storeCoherent` and `loadCoherent` for coherent load/store via pointers. This is backed by `IRMemoryScopeAttr` which is an `IRAttr` attached to `IRLoad` and `IRStore` * Logic in `source\slang\slang-emit-spirv.cpp` for load/store emitting has been reworked to be less messy and more maintainable * Add to `hlsl.meta.slang` coop vector and coop matrix coherent load/store operations Secondary Changes: * Added a missing load/store test for coop matrix: `tests\cooperative-matrix\load-store-pointer.slang` --------- Co-authored-by: ArielG-NV <aglasroth@nvidia.com> Co-authored-by: ArielG-NV <159081215+ArielG-NV@users.noreply.github.com> Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> Co-authored-by: Nathan V. Morrical <natemorrical@gmail.com>
author: 16-Bit-Dog <67922228+16-Bit-Dog@users.noreply.github.com> 2025-10-10 13:09:24 -0400
committer: GitHub <noreply@github.com> 2025-10-10 17:09:24 +0000
commit: 1e0908bd7107dfbdac912b693c3ab9bd6e1dc8b3 (patch)
tree: cc39d2e18abc954fb76f9a54b11a8d492685c6e2 /tests/language-feature/pointer
parent: b4023f715885ada9a2777ea3b0d6d9739860b39b (diff)
5 files changed, 175 insertions, 0 deletions
diff --git a/tests/language-feature/pointer/coherent-load-store-groupshared.slang b/tests/language-feature/pointer/coherent-load-store-groupshared.slang
new file mode 100644
index 000000000..2e537ef01
--- /dev/null
+++ b/tests/language-feature/pointer/coherent-load-store-groupshared.slang
@@ -0,0 +1,26 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model
+
+// Tests if we pass-through and handle groupshared address space pointers correctly.
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakePointerAvailable|NonPrivatePointer
+// SPIRV: MakePointerVisible|NonPrivatePointer
+
+// CHECK: 2
+// CHECK-NEXT: 1
+// CHECK-NEXT: 0
+
+//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+groupshared int[32] shared;
+
+#define THREAD_GROUP_SIZE 3
+[numthreads(THREAD_GROUP_SIZE, 1, 1)]
+void computeMain(uint3 group_thread_id: SV_GroupThreadID)
+{
+    Ptr<int, Access::ReadWrite, AddressSpace::GroupShared> ptr = __getAddress(shared[0]);
+    storeCoherent<4, MemoryScope::Workgroup>(ptr + group_thread_id.x, (int)group_thread_id.x);
+    AllMemoryBarrierWithGroupSync();
+    outputBuffer[group_thread_id.x] = loadCoherent<4, MemoryScope::Workgroup>(ptr + THREAD_GROUP_SIZE - group_thread_id.x - 1);
+}
+\ No newline at end of file
diff --git a/tests/language-feature/pointer/coherent-load-store-image.slang b/tests/language-feature/pointer/coherent-load-store-image.slang
new file mode 100644
index 000000000..359994a0e
--- /dev/null
+++ b/tests/language-feature/pointer/coherent-load-store-image.slang
@@ -0,0 +1,29 @@
+//DISABLE_TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -profile spirv_1_3 -capability vk_mem_model
+// These tests are expected to fail, pointers to texels are
+// currently a broken feature and do not work.
+// Additionally, we do not allow texel pointers with `__getAddress`.
+
+
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakeTexelAvailable
+// SPIRV: MakeTexelVisible
+
+// CHECK: 0
+// CHECK-NEXT: 5
+
+//TEST_INPUT: RWTexture1D(format=R32Uint, size=8, content = one, mipMaps = 1):name=texture
+RWTexture1D<uint> texture;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<uint> outputBuffer;
+
+[numthreads(32, 1, 1)]
+void computeMain()
+{
+    Ptr<uint> ptrIn = __getAddress(texture[1]);
+    Ptr<uint> secondPtrIn = ptrIn;
+
+    storeCoherent<4, MemoryScope::Device>(ptrIn, 5);
+    outputBuffer[0] = loadCoherent<4, MemoryScope::Device>(ptrIn);
+}
diff --git a/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang b/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang
new file mode 100644
index 000000000..b70664d82
--- /dev/null
+++ b/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang
@@ -0,0 +1,24 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -capability vk_mem_model
+
+// Ensure SPIRV emits coherent operations here
+// SPIRV: MakePointerVisible
+// SPIRV: MakePointerAvailable
+
+// CHECK: 2
+
+//TEST_INPUT:ubuffer(data=[1 2 3], stride=4):name=inputBuffer
+uniform int* inputBuffer;
+
+//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer
+uniform int* outputBuffer;
+
+[shader("compute")]
+[numthreads(32, 1, 1)]
+void computeMain()
+{
+    Ptr<int> ptrIn = inputBuffer;
+    Ptr<int> secondPtrIn = ptrIn;
+    Ptr<int> ptrOut = outputBuffer;
+    storeCoherent<4, MemoryScope::Device>(ptrOut, loadCoherent<4, MemoryScope::Device>(&secondPtrIn[1]));
+}
+\ No newline at end of file
diff --git a/tests/language-feature/pointer/redundant-coherent-load.slang b/tests/language-feature/pointer/redundant-coherent-load.slang
new file mode 100644
index 000000000..e0c7d5e56
--- /dev/null
+++ b/tests/language-feature/pointer/redundant-coherent-load.slang
@@ -0,0 +1,56 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model -output-using-type
+
+// Tests if we optimize redundant load's correctly
+
+//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+//TEST_INPUT:ubuffer(data=[0 0 0 11 10], stride=4),name=buffer
+uniform int* buffer;
+
+[numthreads(2, 1, 1)]
+void computeMain(uint3 group_thread_id: SV_GroupThreadID)
+{
+    Ptr<int, Access::ReadWrite, AddressSpace::Device> ptr = __getAddress(buffer[0]);
+    
+    // Consider the load from this store-load pattern as redundant since 
+    // the load is a sub-set memory-scope of the memory-scope of the store.
+    // Invocation == Invocation.
+    *ptr = 8;
+    outputBuffer[0] = loadCoherent<4, MemoryScope::Invocation>(ptr);
+    // CHECK: 8
+    // SPIRV: OpStore %ptr %int_8
+    // SPIRV-NOT: OpLoad
+    // SPIRV: %[[#OUTPUT_BUFFER1:]] = OpAccessChain {{.*}} %outputBuffer %{{.*}} %int_0
+    // SPIRV: OpStore %[[#OUTPUT_BUFFER1]] %int_8
+
+    // Consider the load from this store-load pattern as redundant since 
+    // the load is a sub-set memory-scope of the memory-scope of the store.
+    // Device > Workgroup.
+    let offset1 = ptr + 1;
+    storeCoherent<4, MemoryScope::Device>(offset1, 9);
+    outputBuffer[1] = loadCoherent<4, MemoryScope::Workgroup>(offset1);
+    // CHECK-NEXT: 9
+    // SPIRV: %[[#PTR_OFFSET:]] = OpPtrAccessChain {{.*}} %ptr %int_1
+    // SPIRV: OpStore %[[#PTR_OFFSET]] %int_9
+    // SPIRV-NOT: OpLoad
+    // SPIRV: %[[#OUTPUT_BUFFER2:]] = OpAccessChain {{.*}} %outputBuffer %{{.*}} %int_1
+    // SPIRV: OpStore %[[#OUTPUT_BUFFER2]] %int_9
+
+    // Consider the following store-load pattern as not redundant since the data stored
+    // may not be the same data that will be loaded if Workgroup-scope contains
+    // different data than the Subgroup-scope.
+    // Subgroup < Workgroup.
+    let offset2 = ptr + 2;
+    storeCoherent<4, MemoryScope::Subgroup>(offset2, buffer[3]);
+    if(group_thread_id.x == 1)
+    {
+        storeCoherent<4, MemoryScope::Invocation>(offset2, buffer[4]);
+        let result = loadCoherent<4, MemoryScope::Workgroup>(offset2);
+        outputBuffer[2] = (result == 11 || result == 10) ? 12 : 0;
+    }
+    // CHECK-NEXT: 12
+    // SPIRV: OpStore {{.*}}MakePointerAvailable{{.*}} 4 %int_3
+    // SPIRV: OpStore {{.*}}MakePointerAvailable{{.*}} 4 %int_4
+    // SPIRV: OpLoad {{.*}}MakePointerVisible{{.*}} 4 %int_2
+}
+\ No newline at end of file
diff --git a/tests/language-feature/pointer/redundant-coherent-store.slang b/tests/language-feature/pointer/redundant-coherent-store.slang
new file mode 100644
index 000000000..81cba3024
--- /dev/null
+++ b/tests/language-feature/pointer/redundant-coherent-store.slang
@@ -0,0 +1,40 @@
+//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model
+
+// Tests if we optimize redundant store's correctly
+
+//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+//TEST_INPUT:ubuffer(data=[0 0], stride=4),name=buffer
+uniform int* buffer;
+
+[numthreads(128, 1, 1)]
+void computeMain(uint3 group_thread_id: SV_GroupThreadID)
+{
+    Ptr<int, Access::ReadWrite, AddressSpace::Device> ptr = __getAddress(buffer[0]);
+    if (group_thread_id.x == 0)
+    {
+        // This store will not optimize out, Device > Invocation.
+        // SPIRV: OpStore %ptr %int_1
+        storeCoherent<4, MemoryScope::Device>(ptr, 1);
+        // SPIRV-NEXT: OpStore %ptr %int_2
+        storeCoherent<4, MemoryScope::Invocation>(ptr, 2);
+
+        // Both of these stores will optimize out, Subgroup > Invocation.
+        // SPIRV-NOT: OpStore {{.*}} %int_3
+        *(ptr + 1) = 3;
+        // SPIRV-NOT: OpStore {{.*}} %int_4
+        storeCoherent<4, MemoryScope::Invocation>(ptr + 1, 4);
+        // SPIRV: OpStore {{.*}} %int_5
+        storeCoherent<4, MemoryScope::Workgroup>(ptr + 1, 5);
+    }
+    AllMemoryBarrierWithGroupSync();
+    if (group_thread_id.x == 127)
+    {
+        // CHECK: 1
+        outputBuffer[0] = (*ptr == 1 || *ptr == 2) ? 1 : 0;
+
+        // CHECK-NEXT: 5
+        outputBuffer[1] = loadCoherent<4, MemoryScope::Workgroup>(ptr+1);
+    }
+}
+\ No newline at end of file
author	16-Bit-Dog <67922228+16-Bit-Dog@users.noreply.github.com>	2025-10-10 13:09:24 -0400
committer	GitHub <noreply@github.com>	2025-10-10 17:09:24 +0000
commit	1e0908bd7107dfbdac912b693c3ab9bd6e1dc8b3 (patch)
tree	cc39d2e18abc954fb76f9a54b11a8d492685c6e2 /tests/language-feature/pointer
parent	b4023f715885ada9a2777ea3b0d6d9739860b39b (diff)