diff options
| author | 16-Bit-Dog <67922228+16-Bit-Dog@users.noreply.github.com> | 2025-10-10 13:09:24 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-10-10 17:09:24 +0000 |
| commit | 1e0908bd7107dfbdac912b693c3ab9bd6e1dc8b3 (patch) | |
| tree | cc39d2e18abc954fb76f9a54b11a8d492685c6e2 /tests/language-feature/pointer | |
| parent | b4023f715885ada9a2777ea3b0d6d9739860b39b (diff) | |
Addition of `Load`/`Store` coherent operations (#8395)
Fixes: https://github.com/shader-slang/slang/issues/7634
Duplicate of PR https://github.com/shader-slang/slang/pull/8052
Primary Changes:
* Added `storeCoherent` and `loadCoherent` for coherent load/store via
pointers. This is backed by `IRMemoryScopeAttr` which is an `IRAttr`
attached to `IRLoad` and `IRStore`
* Logic in `source\slang\slang-emit-spirv.cpp` for load/store emitting
has been reworked to be less messy and more maintainable
* Add to `hlsl.meta.slang` coop vector and coop matrix coherent
load/store operations
Secondary Changes:
* Added a missing load/store test for coop matrix:
`tests\cooperative-matrix\load-store-pointer.slang`
---------
Co-authored-by: ArielG-NV <aglasroth@nvidia.com>
Co-authored-by: ArielG-NV <159081215+ArielG-NV@users.noreply.github.com>
Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
Co-authored-by: Nathan V. Morrical <natemorrical@gmail.com>
Diffstat (limited to 'tests/language-feature/pointer')
5 files changed, 175 insertions, 0 deletions
diff --git a/tests/language-feature/pointer/coherent-load-store-groupshared.slang b/tests/language-feature/pointer/coherent-load-store-groupshared.slang new file mode 100644 index 000000000..2e537ef01 --- /dev/null +++ b/tests/language-feature/pointer/coherent-load-store-groupshared.slang @@ -0,0 +1,26 @@ +//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model + +// Tests if we pass-through and handle groupshared address space pointers correctly. +// Ensure SPIRV emits coherent operations here +// SPIRV: MakePointerAvailable|NonPrivatePointer +// SPIRV: MakePointerVisible|NonPrivatePointer + +// CHECK: 2 +// CHECK-NEXT: 1 +// CHECK-NEXT: 0 + +//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<int> outputBuffer; + +groupshared int[32] shared; + +#define THREAD_GROUP_SIZE 3 +[numthreads(THREAD_GROUP_SIZE, 1, 1)] +void computeMain(uint3 group_thread_id: SV_GroupThreadID) +{ + Ptr<int, Access::ReadWrite, AddressSpace::GroupShared> ptr = __getAddress(shared[0]); + storeCoherent<4, MemoryScope::Workgroup>(ptr + group_thread_id.x, (int)group_thread_id.x); + AllMemoryBarrierWithGroupSync(); + outputBuffer[group_thread_id.x] = loadCoherent<4, MemoryScope::Workgroup>(ptr + THREAD_GROUP_SIZE - group_thread_id.x - 1); +}
\ No newline at end of file diff --git a/tests/language-feature/pointer/coherent-load-store-image.slang b/tests/language-feature/pointer/coherent-load-store-image.slang new file mode 100644 index 000000000..359994a0e --- /dev/null +++ b/tests/language-feature/pointer/coherent-load-store-image.slang @@ -0,0 +1,29 @@ +//DISABLE_TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model +//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -profile spirv_1_3 -capability vk_mem_model +// These tests are expected to fail, pointers to texels are +// currently a broken feature and do not work. +// Additionally, we do not allow texel pointers with `__getAddress`. + + +// Ensure SPIRV emits coherent operations here +// SPIRV: MakeTexelAvailable +// SPIRV: MakeTexelVisible + +// CHECK: 0 +// CHECK-NEXT: 5 + +//TEST_INPUT: RWTexture1D(format=R32Uint, size=8, content = one, mipMaps = 1):name=texture +RWTexture1D<uint> texture; + +//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<uint> outputBuffer; + +[numthreads(32, 1, 1)] +void computeMain() +{ + Ptr<uint> ptrIn = __getAddress(texture[1]); + Ptr<uint> secondPtrIn = ptrIn; + + storeCoherent<4, MemoryScope::Device>(ptrIn, 5); + outputBuffer[0] = loadCoherent<4, MemoryScope::Device>(ptrIn); +} diff --git a/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang b/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang new file mode 100644 index 000000000..b70664d82 --- /dev/null +++ b/tests/language-feature/pointer/coherent-load-store-physical-storage-buffer.slang @@ -0,0 +1,24 @@ +//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -capability vk_mem_model + +// Ensure SPIRV emits coherent operations here +// SPIRV: MakePointerVisible +// SPIRV: MakePointerAvailable + +// CHECK: 2 + +//TEST_INPUT:ubuffer(data=[1 2 3], stride=4):name=inputBuffer +uniform int* inputBuffer; + +//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer +uniform int* outputBuffer; + +[shader("compute")] +[numthreads(32, 1, 1)] +void computeMain() +{ + Ptr<int> ptrIn = inputBuffer; + Ptr<int> secondPtrIn = ptrIn; + Ptr<int> ptrOut = outputBuffer; + storeCoherent<4, MemoryScope::Device>(ptrOut, loadCoherent<4, MemoryScope::Device>(&secondPtrIn[1])); +}
\ No newline at end of file diff --git a/tests/language-feature/pointer/redundant-coherent-load.slang b/tests/language-feature/pointer/redundant-coherent-load.slang new file mode 100644 index 000000000..e0c7d5e56 --- /dev/null +++ b/tests/language-feature/pointer/redundant-coherent-load.slang @@ -0,0 +1,56 @@ +//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model -output-using-type + +// Tests if we optimize redundant load's correctly + +//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<int> outputBuffer; +//TEST_INPUT:ubuffer(data=[0 0 0 11 10], stride=4),name=buffer +uniform int* buffer; + +[numthreads(2, 1, 1)] +void computeMain(uint3 group_thread_id: SV_GroupThreadID) +{ + Ptr<int, Access::ReadWrite, AddressSpace::Device> ptr = __getAddress(buffer[0]); + + // Consider the load from this store-load pattern as redundant since + // the load is a sub-set memory-scope of the memory-scope of the store. + // Invocation == Invocation. + *ptr = 8; + outputBuffer[0] = loadCoherent<4, MemoryScope::Invocation>(ptr); + // CHECK: 8 + // SPIRV: OpStore %ptr %int_8 + // SPIRV-NOT: OpLoad + // SPIRV: %[[#OUTPUT_BUFFER1:]] = OpAccessChain {{.*}} %outputBuffer %{{.*}} %int_0 + // SPIRV: OpStore %[[#OUTPUT_BUFFER1]] %int_8 + + // Consider the load from this store-load pattern as redundant since + // the load is a sub-set memory-scope of the memory-scope of the store. + // Device > Workgroup. + let offset1 = ptr + 1; + storeCoherent<4, MemoryScope::Device>(offset1, 9); + outputBuffer[1] = loadCoherent<4, MemoryScope::Workgroup>(offset1); + // CHECK-NEXT: 9 + // SPIRV: %[[#PTR_OFFSET:]] = OpPtrAccessChain {{.*}} %ptr %int_1 + // SPIRV: OpStore %[[#PTR_OFFSET]] %int_9 + // SPIRV-NOT: OpLoad + // SPIRV: %[[#OUTPUT_BUFFER2:]] = OpAccessChain {{.*}} %outputBuffer %{{.*}} %int_1 + // SPIRV: OpStore %[[#OUTPUT_BUFFER2]] %int_9 + + // Consider the following store-load pattern as not redundant since the data stored + // may not be the same data that will be loaded if Workgroup-scope contains + // different data than the Subgroup-scope. + // Subgroup < Workgroup. + let offset2 = ptr + 2; + storeCoherent<4, MemoryScope::Subgroup>(offset2, buffer[3]); + if(group_thread_id.x == 1) + { + storeCoherent<4, MemoryScope::Invocation>(offset2, buffer[4]); + let result = loadCoherent<4, MemoryScope::Workgroup>(offset2); + outputBuffer[2] = (result == 11 || result == 10) ? 12 : 0; + } + // CHECK-NEXT: 12 + // SPIRV: OpStore {{.*}}MakePointerAvailable{{.*}} 4 %int_3 + // SPIRV: OpStore {{.*}}MakePointerAvailable{{.*}} 4 %int_4 + // SPIRV: OpLoad {{.*}}MakePointerVisible{{.*}} 4 %int_2 +}
\ No newline at end of file diff --git a/tests/language-feature/pointer/redundant-coherent-store.slang b/tests/language-feature/pointer/redundant-coherent-store.slang new file mode 100644 index 000000000..81cba3024 --- /dev/null +++ b/tests/language-feature/pointer/redundant-coherent-store.slang @@ -0,0 +1,40 @@ +//TEST:SIMPLE(filecheck=SPIRV):-stage compute -entry computeMain -target spirv -capability vk_mem_model +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -emit-spirv-directly -capability vk_mem_model + +// Tests if we optimize redundant store's correctly + +//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<int> outputBuffer; +//TEST_INPUT:ubuffer(data=[0 0], stride=4),name=buffer +uniform int* buffer; + +[numthreads(128, 1, 1)] +void computeMain(uint3 group_thread_id: SV_GroupThreadID) +{ + Ptr<int, Access::ReadWrite, AddressSpace::Device> ptr = __getAddress(buffer[0]); + if (group_thread_id.x == 0) + { + // This store will not optimize out, Device > Invocation. + // SPIRV: OpStore %ptr %int_1 + storeCoherent<4, MemoryScope::Device>(ptr, 1); + // SPIRV-NEXT: OpStore %ptr %int_2 + storeCoherent<4, MemoryScope::Invocation>(ptr, 2); + + // Both of these stores will optimize out, Subgroup > Invocation. + // SPIRV-NOT: OpStore {{.*}} %int_3 + *(ptr + 1) = 3; + // SPIRV-NOT: OpStore {{.*}} %int_4 + storeCoherent<4, MemoryScope::Invocation>(ptr + 1, 4); + // SPIRV: OpStore {{.*}} %int_5 + storeCoherent<4, MemoryScope::Workgroup>(ptr + 1, 5); + } + AllMemoryBarrierWithGroupSync(); + if (group_thread_id.x == 127) + { + // CHECK: 1 + outputBuffer[0] = (*ptr == 1 || *ptr == 2) ? 1 : 0; + + // CHECK-NEXT: 5 + outputBuffer[1] = loadCoherent<4, MemoryScope::Workgroup>(ptr+1); + } +}
\ No newline at end of file |
