diff options
| author | Jay Kwak <82421531+jkwak-work@users.noreply.github.com> | 2024-06-25 22:07:41 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-06-25 22:07:41 -0700 |
| commit | 969dd4cc7246bfe89103efcb00f399606e804e98 (patch) | |
| tree | 6b44527d72a08f4b39848bf5cc2efe03ed5e6c90 /tests | |
| parent | 63e0064bd3a2007adf17a35d3c58894d90ddc04a (diff) | |
Support atomic intrinsics for Metal (#4473)
* Support atomic intrinsics for Metal
This commit adds a support for the atomic intrinsics in Metal.
The atomic member functions for buffers is not implemented yet.
Metal requires the first argument for the atomic functions to be an
atomic data type. This implementation rely on the fact that we can do a
C-style type casting from a regular data type to an atomic data type.
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/bugs/atomic-coerce.slang | 2 | ||||
| -rw-r--r-- | tests/compute/atomics-groupshared.slang | 2 | ||||
| -rw-r--r-- | tests/compute/atomics.slang | 2 | ||||
| -rw-r--r-- | tests/metal/atomic-intrinsics.slang | 352 |
4 files changed, 355 insertions, 3 deletions
diff --git a/tests/bugs/atomic-coerce.slang b/tests/bugs/atomic-coerce.slang index 2fe927355..bfb0eeb63 100644 --- a/tests/bugs/atomic-coerce.slang +++ b/tests/bugs/atomic-coerce.slang @@ -1,6 +1,6 @@ //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST(compute,vulkan):COMPARE_COMPUTE_EX:-vk -slang -compute -shaderobj -//DISABLE_TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl +//TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl //TEST_INPUT:ubuffer(data=[0 0 0 0 ], stride=4):out,name outputBuffer RWStructuredBuffer<int> outputBuffer; diff --git a/tests/compute/atomics-groupshared.slang b/tests/compute/atomics-groupshared.slang index fcfc9c8d7..a01f7bf6a 100644 --- a/tests/compute/atomics-groupshared.slang +++ b/tests/compute/atomics-groupshared.slang @@ -4,7 +4,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -shaderobj //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -vk -shaderobj //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -cuda -shaderobj -//DISABLE_TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl +//TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer diff --git a/tests/compute/atomics.slang b/tests/compute/atomics.slang index b00f437f5..ee02c623f 100644 --- a/tests/compute/atomics.slang +++ b/tests/compute/atomics.slang @@ -4,7 +4,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -shaderobj //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -vk -shaderobj //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -cuda -shaderobj -//DISABLE_TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl +//TEST(compute):COMPARE_COMPUTE:-slang -shaderobj -mtl //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out, name outputBuffer diff --git a/tests/metal/atomic-intrinsics.slang b/tests/metal/atomic-intrinsics.slang new file mode 100644 index 000000000..3533ea2aa --- /dev/null +++ b/tests/metal/atomic-intrinsics.slang @@ -0,0 +1,352 @@ +//TEST:SIMPLE(filecheck=MTL):-target metal -entry computeMain -stage compute -DMETAL +//TEST:SIMPLE(filecheck=LIB):-target metallib -entry computeMain -stage compute -DMETAL +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -output-using-type +//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-vk -emit-spirv-directly -compute -shaderobj -output-using-type + +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj -output-using-type +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj -output-using-type + + +//TEST_INPUT:ubuffer(data=[0 1 2 3], stride=4):name=uintBuffer +RWStructuredBuffer<uint> uintBuffer; +//TEST_INPUT:ubuffer(data=[0 1 2 3], stride=4):name=intBuffer +RWStructuredBuffer<int> intBuffer; + +groupshared uint shareMemUI[4]; +groupshared int shareMemI[4]; + +//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<float> outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint groupIndex : SV_GroupIndex) +{ + if (groupIndex == 0) + { + for (int i = 0; i < 4; ++i) + { + shareMemUI[i] = 0U; + shareMemI[i] = 0; + } + } + AllMemoryBarrierWithGroupSync(); + + int idx = groupIndex; + float val = 0.0f; + + // InterlockedAdd + //MTL: atomic_uint threadgroup* {{.*}}shareMemUI + //LIB: call {{.*}}.atomic.local.add.u.i32 + InterlockedAdd(shareMemUI[idx], uint(1)); + val += shareMemUI[idx]; + + //MTL: atomic_int threadgroup* {{.*}}shareMemI + //LIB: call {{.*}}.atomic.local.add.s.i32 + InterlockedAdd(shareMemI[idx], 2); + val += shareMemI[idx]; + + //MTL: atomic_uint device* {{.*}}uintBuffer + //LIB: call {{.*}}.atomic.global.add.u.i32 + InterlockedAdd(uintBuffer[idx], 1); + val += uintBuffer[idx]; + + //MTL: atomic_int device* {{.*}}intBuffer + //LIB: call {{.*}}.atomic.global.add.s.i32 + InterlockedAdd(intBuffer[idx], 2); + val += intBuffer[idx]; + + //LIB: call {{.*}}.atomic.local.add.s.i32 + InterlockedAdd(shareMemI[idx], -1); + val += shareMemI[idx]; + + //LIB: call {{.*}}.atomic.global.add.s.i32 + InterlockedAdd(intBuffer[idx], -1); + val += intBuffer[idx]; + + // InterlockedAdd - original_value + uint origui = 0; + //LIB: call {{.*}}.atomic.local.add.u.i32 + InterlockedAdd(shareMemUI[idx], 1, origui); + val += shareMemUI[idx]; + val += origui; + + int origi = 0; + //LIB: call {{.*}}.atomic.local.add.s.i32 + InterlockedAdd(shareMemI[idx], 2, origi); + val += shareMemI[idx]; + val += origi; + + //LIB: call {{.*}}.atomic.global.add.u.i32 + InterlockedAdd(uintBuffer[idx], 1, origui); + val += uintBuffer[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.global.add.s.i32 + InterlockedAdd(intBuffer[idx], 2, origi); + val += intBuffer[idx]; + val += origi; + + //LIB: call {{.*}}.atomic.local.add.s.i32 + InterlockedAdd(shareMemI[idx], -1, origi); + val += shareMemI[idx]; + val += origi; + + //LIB: call {{.*}}.atomic.global.add.s.i32 + InterlockedAdd(intBuffer[idx], -1, origi); + val += intBuffer[idx]; + val += origi; + + // InterlockedAnd + //LIB: call {{.*}}.atomic.local.and.u.i32 + InterlockedAnd(shareMemUI[idx], 255); + val += shareMemUI[idx]; + + //LIB: call {{.*}}.atomic.local.and.s.i32 + InterlockedAnd(shareMemI[idx], 255); + val += shareMemI[idx]; + + //LIB: call {{.*}}.atomic.global.and.u.i32 + InterlockedAnd(uintBuffer[idx], 255); + val += uintBuffer[idx]; + + //LIB: call {{.*}}.atomic.global.and.s.i32 + InterlockedAnd(intBuffer[idx], 255); + val += intBuffer[idx]; + + // InterlockedAnd - original_value + //LIB: call {{.*}}.atomic.local.and.u.i32 + InterlockedAnd(shareMemUI[idx], 255, origui); + val += shareMemUI[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.local.and.s.i32 + InterlockedAnd(shareMemI[idx], 255, origi); + val += shareMemI[idx]; + val += origi; + + //LIB: call {{.*}}.atomic.global.and.u.i32 + InterlockedAnd(uintBuffer[idx], 255, origui); + val += uintBuffer[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.global.and.s.i32 + InterlockedAnd(intBuffer[idx], 255, origi); + val += intBuffer[idx]; + val += origi; + + // InterlockedCompareExchange + //LIB: call {{.*}}.atomic.local.cmpxchg.weak.i32 + InterlockedCompareExchange(shareMemUI[idx], 1, 0, origui); + val += shareMemUI[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.local.cmpxchg.weak.i32 + InterlockedCompareExchange(shareMemI[idx], 1, 0, origi); + val += shareMemI[idx]; + val += origi; + + //LIB: call {{.*}}.atomic.global.cmpxchg.weak.i32 + InterlockedCompareExchange(uintBuffer[idx], 1, 0, origui); + val += uintBuffer[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.global.cmpxchg.weak.i32 + InterlockedCompareExchange(intBuffer[idx], 1, 0, origi); + val += intBuffer[idx]; + val += origi; + + // InterlockedCompareStore is not supported by Metal +#if !defined(METAL) + InterlockedCompareStore(shareMemUI[idx], 255, 0); + val += shareMemUI[idx]; + + InterlockedCompareStore(shareMemI[idx], 255, 0); + val += shareMemI[idx]; + + InterlockedCompareStore(uintBuffer[idx], 255, 0); + val += uintBuffer[idx]; + + InterlockedCompareStore(intBuffer[idx], 255, 0); + val += intBuffer[idx]; +#endif + + // InterlockedExchange + //LIB: call {{.*}}.atomic.local.xchg.i32 + InterlockedExchange(shareMemUI[idx], 1, origui); + val += shareMemUI[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.local.xchg.i32 + InterlockedExchange(shareMemI[idx], 1, origi); + val += shareMemI[idx]; + val += origi; + + //LIB: call {{.*}}.atomic.global.xchg.i32 + InterlockedExchange(uintBuffer[idx], 1, origui); + val += uintBuffer[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.global.xchg.i32 + InterlockedExchange(intBuffer[idx], 1, origi); + val += intBuffer[idx]; + val += origi; + + // InterlockedMax + //LIB: call {{.*}}.atomic.local.max.u.i32 + InterlockedMax(shareMemUI[idx], 0); + val += shareMemUI[idx]; + + //LIB: call {{.*}}.atomic.local.max.s.i32 + InterlockedMax(shareMemI[idx], 0); + val += shareMemI[idx]; + + //LIB: call {{.*}}.atomic.global.max.u.i32 + InterlockedMax(uintBuffer[idx], 0); + val += uintBuffer[idx]; + + //LIB: call {{.*}}.atomic.global.max.s.i32 + InterlockedMax(intBuffer[idx], 0); + val += intBuffer[idx]; + + // InterlockedMax - original_value + //LIB: call {{.*}}.atomic.local.max.u.i32 + InterlockedMax(shareMemUI[idx], 0, origui); + val += shareMemUI[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.local.max.s.i32 + InterlockedMax(shareMemI[idx], 0, origi); + val += shareMemI[idx]; + val += origi; + + //LIB: call {{.*}}.atomic.global.max.u.i32 + InterlockedMax(uintBuffer[idx], 0, origui); + val += uintBuffer[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.global.max.s.i32 + InterlockedMax(intBuffer[idx], 0, origi); + val += intBuffer[idx]; + val += origi; + + // InterlockedMin + //LIB: call {{.*}}.atomic.local.min.u.i32 + InterlockedMin(shareMemUI[idx], 0); + val += shareMemUI[idx]; + + //LIB: call {{.*}}.atomic.local.min.s.i32 + InterlockedMin(shareMemI[idx], 0); + val += shareMemI[idx]; + + //LIB: call {{.*}}.atomic.global.min.u.i32 + InterlockedMin(uintBuffer[idx], 0); + val += uintBuffer[idx]; + + //LIB: call {{.*}}.atomic.global.min.s.i32 + InterlockedMin(intBuffer[idx], 0); + val += intBuffer[idx]; + + // InterlockedMin - original_value + //LIB: call {{.*}}.atomic.local.min.u.i32 + InterlockedMin(shareMemUI[idx], 0, origui); + val += shareMemUI[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.local.min.s.i32 + InterlockedMin(shareMemI[idx], 0, origi); + val += shareMemI[idx]; + val += origi; + + //LIB: call {{.*}}.atomic.global.min.u.i32 + InterlockedMin(uintBuffer[idx], 0, origui); + val += uintBuffer[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.global.min.s.i32 + InterlockedMin(intBuffer[idx], 0, origi); + val += intBuffer[idx]; + val += origi; + + // InterlockedOr + //LIB: call {{.*}}.atomic.local.or.u.i32 + InterlockedOr(shareMemUI[idx], 2); + val += shareMemUI[idx]; + + //LIB: call {{.*}}.atomic.local.or.s.i32 + InterlockedOr(shareMemI[idx], 4); + val += shareMemI[idx]; + + //LIB: call {{.*}}.atomic.global.or.u.i32 + InterlockedOr(uintBuffer[idx], 6); + val += uintBuffer[idx]; + + //LIB: call {{.*}}.atomic.global.or.s.i32 + InterlockedOr(intBuffer[idx], 8); + val += intBuffer[idx]; + + // InterlockedOr - original_value + //LIB: call {{.*}}.atomic.local.or.u.i32 + InterlockedOr(shareMemUI[idx], 2, origui); + val += shareMemUI[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.local.or.s.i32 + InterlockedOr(shareMemI[idx], 4, origi); + val += shareMemI[idx]; + val += origi; + + //LIB: call {{.*}}.atomic.global.or.u.i32 + InterlockedOr(uintBuffer[idx], 6, origui); + val += uintBuffer[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.global.or.s.i32 + InterlockedOr(intBuffer[idx], 8, origi); + val += intBuffer[idx]; + val += origi; + + // InterlockedXor + //LIB: call {{.*}}.atomic.local.xor.u.i32 + InterlockedXor(shareMemUI[idx], 2); + val += shareMemUI[idx]; + + //LIB: call {{.*}}.atomic.local.xor.s.i32 + InterlockedXor(shareMemI[idx], 4); + val += shareMemI[idx]; + + //LIB: call {{.*}}.atomic.global.xor.u.i32 + InterlockedXor(uintBuffer[idx], 6); + val += uintBuffer[idx]; + + //LIB: call {{.*}}.atomic.global.xor.s.i32 + InterlockedXor(intBuffer[idx], 8); + val += intBuffer[idx]; + + // InterlockedXor - original_value + //LIB: call {{.*}}.atomic.local.xor.u.i32 + InterlockedXor(shareMemUI[idx], 2, origui); + val += shareMemUI[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.local.xor.s.i32 + InterlockedXor(shareMemI[idx], 4, origi); + val += shareMemI[idx]; + val += origi; + + //LIB: call {{.*}}.atomic.global.xor.u.i32 + InterlockedXor(uintBuffer[idx], 6, origui); + val += uintBuffer[idx]; + val += origui; + + //LIB: call {{.*}}.atomic.global.xor.s.i32 + InterlockedXor(intBuffer[idx], 8, origi); + val += intBuffer[idx]; + val += origi; + + outputBuffer[idx] = val; +} + +// CHK: 184 +// CHK: 207 +// CHK: 230 +// CHK: 253 |
