summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorJay Kwak <82421531+jkwak-work@users.noreply.github.com>2024-06-25 22:07:41 -0700
committerGitHub <noreply@github.com>2024-06-25 22:07:41 -0700
commit969dd4cc7246bfe89103efcb00f399606e804e98 (patch)
tree6b44527d72a08f4b39848bf5cc2efe03ed5e6c90 /tests
parent63e0064bd3a2007adf17a35d3c58894d90ddc04a (diff)
Support atomic intrinsics for Metal (#4473)
* Support atomic intrinsics for Metal This commit adds a support for the atomic intrinsics in Metal. The atomic member functions for buffers is not implemented yet. Metal requires the first argument for the atomic functions to be an atomic data type. This implementation rely on the fact that we can do a C-style type casting from a regular data type to an atomic data type.
Diffstat (limited to 'tests')
-rw-r--r--tests/bugs/atomic-coerce.slang2
-rw-r--r--tests/compute/atomics-groupshared.slang2
-rw-r--r--tests/compute/atomics.slang2
-rw-r--r--tests/metal/atomic-intrinsics.slang352
4 files changed, 355 insertions, 3 deletions
diff --git a/tests/bugs/atomic-coerce.slang b/tests/bugs/atomic-coerce.slang
index 2fe927355..bfb0eeb63 100644
--- a/tests/bugs/atomic-coerce.slang
+++ b/tests/bugs/atomic-coerce.slang
@@ -1,6 +1,6 @@
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj
//TEST(compute,vulkan):COMPARE_COMPUTE_EX:-vk -slang -compute -shaderobj
-//DISABLE_TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl
+//TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl
//TEST_INPUT:ubuffer(data=[0 0 0 0 ], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
diff --git a/tests/compute/atomics-groupshared.slang b/tests/compute/atomics-groupshared.slang
index fcfc9c8d7..a01f7bf6a 100644
--- a/tests/compute/atomics-groupshared.slang
+++ b/tests/compute/atomics-groupshared.slang
@@ -4,7 +4,7 @@
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -shaderobj
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -vk -shaderobj
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -cuda -shaderobj
-//DISABLE_TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl
+//TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl
//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
diff --git a/tests/compute/atomics.slang b/tests/compute/atomics.slang
index b00f437f5..ee02c623f 100644
--- a/tests/compute/atomics.slang
+++ b/tests/compute/atomics.slang
@@ -4,7 +4,7 @@
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -shaderobj
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -vk -shaderobj
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -cuda -shaderobj
-//DISABLE_TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl
+//TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl
//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out, name outputBuffer
diff --git a/tests/metal/atomic-intrinsics.slang b/tests/metal/atomic-intrinsics.slang
new file mode 100644
index 000000000..3533ea2aa
--- /dev/null
+++ b/tests/metal/atomic-intrinsics.slang
@@ -0,0 +1,352 @@
+//TEST:SIMPLE(filecheck=MTL):-target metal -entry computeMain -stage compute -DMETAL
+//TEST:SIMPLE(filecheck=LIB):-target metallib -entry computeMain -stage compute -DMETAL
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -output-using-type
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-vk -emit-spirv-directly -compute -shaderobj -output-using-type
+
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj -output-using-type
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj -output-using-type
+
+
+//TEST_INPUT:ubuffer(data=[0 1 2 3], stride=4):name=uintBuffer
+RWStructuredBuffer<uint> uintBuffer;
+//TEST_INPUT:ubuffer(data=[0 1 2 3], stride=4):name=intBuffer
+RWStructuredBuffer<int> intBuffer;
+
+groupshared uint shareMemUI[4];
+groupshared int shareMemI[4];
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+[numthreads(4, 1, 1)]
+void computeMain(uint groupIndex : SV_GroupIndex)
+{
+ if (groupIndex == 0)
+ {
+ for (int i = 0; i < 4; ++i)
+ {
+ shareMemUI[i] = 0U;
+ shareMemI[i] = 0;
+ }
+ }
+ AllMemoryBarrierWithGroupSync();
+
+ int idx = groupIndex;
+ float val = 0.0f;
+
+ // InterlockedAdd
+ //MTL: atomic_uint threadgroup* {{.*}}shareMemUI
+ //LIB: call {{.*}}.atomic.local.add.u.i32
+ InterlockedAdd(shareMemUI[idx], uint(1));
+ val += shareMemUI[idx];
+
+ //MTL: atomic_int threadgroup* {{.*}}shareMemI
+ //LIB: call {{.*}}.atomic.local.add.s.i32
+ InterlockedAdd(shareMemI[idx], 2);
+ val += shareMemI[idx];
+
+ //MTL: atomic_uint device* {{.*}}uintBuffer
+ //LIB: call {{.*}}.atomic.global.add.u.i32
+ InterlockedAdd(uintBuffer[idx], 1);
+ val += uintBuffer[idx];
+
+ //MTL: atomic_int device* {{.*}}intBuffer
+ //LIB: call {{.*}}.atomic.global.add.s.i32
+ InterlockedAdd(intBuffer[idx], 2);
+ val += intBuffer[idx];
+
+ //LIB: call {{.*}}.atomic.local.add.s.i32
+ InterlockedAdd(shareMemI[idx], -1);
+ val += shareMemI[idx];
+
+ //LIB: call {{.*}}.atomic.global.add.s.i32
+ InterlockedAdd(intBuffer[idx], -1);
+ val += intBuffer[idx];
+
+ // InterlockedAdd - original_value
+ uint origui = 0;
+ //LIB: call {{.*}}.atomic.local.add.u.i32
+ InterlockedAdd(shareMemUI[idx], 1, origui);
+ val += shareMemUI[idx];
+ val += origui;
+
+ int origi = 0;
+ //LIB: call {{.*}}.atomic.local.add.s.i32
+ InterlockedAdd(shareMemI[idx], 2, origi);
+ val += shareMemI[idx];
+ val += origi;
+
+ //LIB: call {{.*}}.atomic.global.add.u.i32
+ InterlockedAdd(uintBuffer[idx], 1, origui);
+ val += uintBuffer[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.global.add.s.i32
+ InterlockedAdd(intBuffer[idx], 2, origi);
+ val += intBuffer[idx];
+ val += origi;
+
+ //LIB: call {{.*}}.atomic.local.add.s.i32
+ InterlockedAdd(shareMemI[idx], -1, origi);
+ val += shareMemI[idx];
+ val += origi;
+
+ //LIB: call {{.*}}.atomic.global.add.s.i32
+ InterlockedAdd(intBuffer[idx], -1, origi);
+ val += intBuffer[idx];
+ val += origi;
+
+ // InterlockedAnd
+ //LIB: call {{.*}}.atomic.local.and.u.i32
+ InterlockedAnd(shareMemUI[idx], 255);
+ val += shareMemUI[idx];
+
+ //LIB: call {{.*}}.atomic.local.and.s.i32
+ InterlockedAnd(shareMemI[idx], 255);
+ val += shareMemI[idx];
+
+ //LIB: call {{.*}}.atomic.global.and.u.i32
+ InterlockedAnd(uintBuffer[idx], 255);
+ val += uintBuffer[idx];
+
+ //LIB: call {{.*}}.atomic.global.and.s.i32
+ InterlockedAnd(intBuffer[idx], 255);
+ val += intBuffer[idx];
+
+ // InterlockedAnd - original_value
+ //LIB: call {{.*}}.atomic.local.and.u.i32
+ InterlockedAnd(shareMemUI[idx], 255, origui);
+ val += shareMemUI[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.local.and.s.i32
+ InterlockedAnd(shareMemI[idx], 255, origi);
+ val += shareMemI[idx];
+ val += origi;
+
+ //LIB: call {{.*}}.atomic.global.and.u.i32
+ InterlockedAnd(uintBuffer[idx], 255, origui);
+ val += uintBuffer[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.global.and.s.i32
+ InterlockedAnd(intBuffer[idx], 255, origi);
+ val += intBuffer[idx];
+ val += origi;
+
+ // InterlockedCompareExchange
+ //LIB: call {{.*}}.atomic.local.cmpxchg.weak.i32
+ InterlockedCompareExchange(shareMemUI[idx], 1, 0, origui);
+ val += shareMemUI[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.local.cmpxchg.weak.i32
+ InterlockedCompareExchange(shareMemI[idx], 1, 0, origi);
+ val += shareMemI[idx];
+ val += origi;
+
+ //LIB: call {{.*}}.atomic.global.cmpxchg.weak.i32
+ InterlockedCompareExchange(uintBuffer[idx], 1, 0, origui);
+ val += uintBuffer[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.global.cmpxchg.weak.i32
+ InterlockedCompareExchange(intBuffer[idx], 1, 0, origi);
+ val += intBuffer[idx];
+ val += origi;
+
+ // InterlockedCompareStore is not supported by Metal
+#if !defined(METAL)
+ InterlockedCompareStore(shareMemUI[idx], 255, 0);
+ val += shareMemUI[idx];
+
+ InterlockedCompareStore(shareMemI[idx], 255, 0);
+ val += shareMemI[idx];
+
+ InterlockedCompareStore(uintBuffer[idx], 255, 0);
+ val += uintBuffer[idx];
+
+ InterlockedCompareStore(intBuffer[idx], 255, 0);
+ val += intBuffer[idx];
+#endif
+
+ // InterlockedExchange
+ //LIB: call {{.*}}.atomic.local.xchg.i32
+ InterlockedExchange(shareMemUI[idx], 1, origui);
+ val += shareMemUI[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.local.xchg.i32
+ InterlockedExchange(shareMemI[idx], 1, origi);
+ val += shareMemI[idx];
+ val += origi;
+
+ //LIB: call {{.*}}.atomic.global.xchg.i32
+ InterlockedExchange(uintBuffer[idx], 1, origui);
+ val += uintBuffer[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.global.xchg.i32
+ InterlockedExchange(intBuffer[idx], 1, origi);
+ val += intBuffer[idx];
+ val += origi;
+
+ // InterlockedMax
+ //LIB: call {{.*}}.atomic.local.max.u.i32
+ InterlockedMax(shareMemUI[idx], 0);
+ val += shareMemUI[idx];
+
+ //LIB: call {{.*}}.atomic.local.max.s.i32
+ InterlockedMax(shareMemI[idx], 0);
+ val += shareMemI[idx];
+
+ //LIB: call {{.*}}.atomic.global.max.u.i32
+ InterlockedMax(uintBuffer[idx], 0);
+ val += uintBuffer[idx];
+
+ //LIB: call {{.*}}.atomic.global.max.s.i32
+ InterlockedMax(intBuffer[idx], 0);
+ val += intBuffer[idx];
+
+ // InterlockedMax - original_value
+ //LIB: call {{.*}}.atomic.local.max.u.i32
+ InterlockedMax(shareMemUI[idx], 0, origui);
+ val += shareMemUI[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.local.max.s.i32
+ InterlockedMax(shareMemI[idx], 0, origi);
+ val += shareMemI[idx];
+ val += origi;
+
+ //LIB: call {{.*}}.atomic.global.max.u.i32
+ InterlockedMax(uintBuffer[idx], 0, origui);
+ val += uintBuffer[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.global.max.s.i32
+ InterlockedMax(intBuffer[idx], 0, origi);
+ val += intBuffer[idx];
+ val += origi;
+
+ // InterlockedMin
+ //LIB: call {{.*}}.atomic.local.min.u.i32
+ InterlockedMin(shareMemUI[idx], 0);
+ val += shareMemUI[idx];
+
+ //LIB: call {{.*}}.atomic.local.min.s.i32
+ InterlockedMin(shareMemI[idx], 0);
+ val += shareMemI[idx];
+
+ //LIB: call {{.*}}.atomic.global.min.u.i32
+ InterlockedMin(uintBuffer[idx], 0);
+ val += uintBuffer[idx];
+
+ //LIB: call {{.*}}.atomic.global.min.s.i32
+ InterlockedMin(intBuffer[idx], 0);
+ val += intBuffer[idx];
+
+ // InterlockedMin - original_value
+ //LIB: call {{.*}}.atomic.local.min.u.i32
+ InterlockedMin(shareMemUI[idx], 0, origui);
+ val += shareMemUI[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.local.min.s.i32
+ InterlockedMin(shareMemI[idx], 0, origi);
+ val += shareMemI[idx];
+ val += origi;
+
+ //LIB: call {{.*}}.atomic.global.min.u.i32
+ InterlockedMin(uintBuffer[idx], 0, origui);
+ val += uintBuffer[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.global.min.s.i32
+ InterlockedMin(intBuffer[idx], 0, origi);
+ val += intBuffer[idx];
+ val += origi;
+
+ // InterlockedOr
+ //LIB: call {{.*}}.atomic.local.or.u.i32
+ InterlockedOr(shareMemUI[idx], 2);
+ val += shareMemUI[idx];
+
+ //LIB: call {{.*}}.atomic.local.or.s.i32
+ InterlockedOr(shareMemI[idx], 4);
+ val += shareMemI[idx];
+
+ //LIB: call {{.*}}.atomic.global.or.u.i32
+ InterlockedOr(uintBuffer[idx], 6);
+ val += uintBuffer[idx];
+
+ //LIB: call {{.*}}.atomic.global.or.s.i32
+ InterlockedOr(intBuffer[idx], 8);
+ val += intBuffer[idx];
+
+ // InterlockedOr - original_value
+ //LIB: call {{.*}}.atomic.local.or.u.i32
+ InterlockedOr(shareMemUI[idx], 2, origui);
+ val += shareMemUI[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.local.or.s.i32
+ InterlockedOr(shareMemI[idx], 4, origi);
+ val += shareMemI[idx];
+ val += origi;
+
+ //LIB: call {{.*}}.atomic.global.or.u.i32
+ InterlockedOr(uintBuffer[idx], 6, origui);
+ val += uintBuffer[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.global.or.s.i32
+ InterlockedOr(intBuffer[idx], 8, origi);
+ val += intBuffer[idx];
+ val += origi;
+
+ // InterlockedXor
+ //LIB: call {{.*}}.atomic.local.xor.u.i32
+ InterlockedXor(shareMemUI[idx], 2);
+ val += shareMemUI[idx];
+
+ //LIB: call {{.*}}.atomic.local.xor.s.i32
+ InterlockedXor(shareMemI[idx], 4);
+ val += shareMemI[idx];
+
+ //LIB: call {{.*}}.atomic.global.xor.u.i32
+ InterlockedXor(uintBuffer[idx], 6);
+ val += uintBuffer[idx];
+
+ //LIB: call {{.*}}.atomic.global.xor.s.i32
+ InterlockedXor(intBuffer[idx], 8);
+ val += intBuffer[idx];
+
+ // InterlockedXor - original_value
+ //LIB: call {{.*}}.atomic.local.xor.u.i32
+ InterlockedXor(shareMemUI[idx], 2, origui);
+ val += shareMemUI[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.local.xor.s.i32
+ InterlockedXor(shareMemI[idx], 4, origi);
+ val += shareMemI[idx];
+ val += origi;
+
+ //LIB: call {{.*}}.atomic.global.xor.u.i32
+ InterlockedXor(uintBuffer[idx], 6, origui);
+ val += uintBuffer[idx];
+ val += origui;
+
+ //LIB: call {{.*}}.atomic.global.xor.s.i32
+ InterlockedXor(intBuffer[idx], 8, origi);
+ val += intBuffer[idx];
+ val += origi;
+
+ outputBuffer[idx] = val;
+}
+
+// CHK: 184
+// CHK: 207
+// CHK: 230
+// CHK: 253