Add support for Metal subgroup/simd operations (#6247)

* initial work for metal subgroups * add glsl intrinsics * enable wave tests * enable glsl subgroup tests, glsl barrier fixes * minor fixes * fix incorrect test target * disable some glsl functional tests * disable failing glsl test --------- Co-authored-by: Yong He <yonghe@outlook.com>
author: Darren Wihandi <65404740+fairywreath@users.noreply.github.com> 2025-02-10 19:40:39 -0500
committer: GitHub <noreply@github.com> 2025-02-10 16:40:39 -0800
commit: 133bd259c00984c6a01869f71951a7feb919463a (patch)
tree: a69f1a6b3caff0ac4d958453fde6176ab3c66c91 /tests
parent: f761ab0586353da67bf7b3ae395ad7b090cd904f (diff)
19 files changed, 157 insertions, 102 deletions
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang
index d44a29c14..0a0fcade5 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang
@@ -10,6 +10,9 @@
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
 //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
 
+// Not testing because CI runners may not support Metal's intrinsics.
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
+
 #version 430
 
 #if 1                        \
@@ -100,8 +103,8 @@ bool test1Arithmetic() {
         & subgroupExclusiveAdd(T(1)) == T(3)
         & subgroupExclusiveMul(T(1)) == T(1)
 
-        // WGSL does not support exclusive min/max.
-#if !defined(WGPU)
+        // WGSL and METAL does not support exclusive min/max.
+#if !defined(WGPU) && !defined(METAL)
         & subgroupExclusiveMin(T(1)) == T(1)
         & subgroupExclusiveMax(T(1)) == T(1)
 #endif
@@ -115,8 +118,8 @@ bool testVArithmetic() {
         & subgroupExclusiveAdd(gvec(T(1))) == gvec(T(3))
         & subgroupExclusiveMul(gvec(T(1))) == gvec(T(1))
 
-        // WGSL does not support exclusive min/max.
-#if !defined(WGPU)
+        // WGSL and METAL does not support exclusive min/max.
+#if !defined(WGPU) && !defined(METAL)
         & subgroupExclusiveMin(gvec(T(1))) == gvec(T(1))
         & subgroupExclusiveMax(gvec(T(1))) == gvec(T(1))
 #endif
@@ -142,8 +145,8 @@ bool testArithmetic() {
         & testVArithmetic<uint, 3>()
         & testVArithmetic<uint, 4>()
 
-        // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined (WGPU)
+        // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
         & test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
         & testVArithmetic<double, 2>()
         & testVArithmetic<double, 3>()
@@ -152,10 +155,6 @@ bool testArithmetic() {
         & testVArithmetic<int8_t, 2>()
         & testVArithmetic<int8_t, 3>()
         & testVArithmetic<int8_t, 4>()
-        & test1Arithmetic<int16_t>() 
-        & testVArithmetic<int16_t, 2>()
-        & testVArithmetic<int16_t, 3>()
-        & testVArithmetic<int16_t, 4>()
         & test1Arithmetic<int64_t>() 
         & testVArithmetic<int64_t, 2>()
         & testVArithmetic<int64_t, 3>()
@@ -164,15 +163,23 @@ bool testArithmetic() {
         & testVArithmetic<uint8_t, 2>()
         & testVArithmetic<uint8_t, 3>()
         & testVArithmetic<uint8_t, 4>()
-        & test1Arithmetic<uint16_t>() 
-        & testVArithmetic<uint16_t, 2>()
-        & testVArithmetic<uint16_t, 3>()
-        & testVArithmetic<uint16_t, 4>()
         & test1Arithmetic<uint64_t>() 
         & testVArithmetic<uint64_t, 2>()
         & testVArithmetic<uint64_t, 3>()
         & testVArithmetic<uint64_t, 4>()
 #endif
+
+        // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined (WGPU)
+        & test1Arithmetic<int16_t>() 
+        & testVArithmetic<int16_t, 2>()
+        & testVArithmetic<int16_t, 3>()
+        & testVArithmetic<int16_t, 4>()
+        & test1Arithmetic<uint16_t>() 
+        & testVArithmetic<uint16_t, 2>()
+        & testVArithmetic<uint16_t, 3>()
+        & testVArithmetic<uint16_t, 4>()
+#endif
         ;
 }
 
@@ -180,8 +187,8 @@ void computeMain()
 {
 
     bool res0 = true
-    // WGSL does not support bitwise exclusive intrinsics.
-#if !defined(WGPU)
+    // WGSL and Metal does not support bitwise exclusive intrinsics.
+#if !defined(WGPU) && !defined(METAL)
             & testLogical()
 #endif
             ;
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang
index 0c94d4c90..58c7d5aaa 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang
@@ -10,6 +10,9 @@
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
 //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
 
+// Not testing because CI runners may not support Metal's intrinsics.
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
+
 #version 430
 
 #if 1                        \
@@ -100,8 +103,8 @@ bool test1Arithmetic() {
         & subgroupInclusiveAdd(T(1)) == T(4)
         & subgroupInclusiveMul(T(1)) == T(1)
     
-        // WGSL does not support inclusive min/max
-#if !defined(WGPU)
+        // WGSL and Metal does not support inclusive min/max
+#if !defined(WGPU) && !defined(METAL)
         & subgroupInclusiveMin(T(1)) == T(1)
         & subgroupInclusiveMax(T(1)) == T(1)
 #endif
@@ -115,8 +118,8 @@ bool testVArithmetic() {
         & subgroupInclusiveAdd(gvec(T(1))) == gvec(T(4)) 
         // & subgroupInclusiveMul(gvec(T(1))) == gvec(T(1))
 
-        // WGSL does not support inclusive min/max
-#if !defined(WGPU)
+        // WGSL and Metal does not support inclusive min/max
+#if !defined(WGPU) && !defined(METAL)
         & subgroupInclusiveMin(gvec(T(1))) == gvec(T(1))
         & subgroupInclusiveMax(gvec(T(1))) == gvec(T(1))
 #endif
@@ -142,20 +145,16 @@ bool testArithmetic() {
         // & testVArithmetic<uint, 3>()
         // & testVArithmetic<uint, 4>()
 
-        // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined (WGPU)
+        // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
         & test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
         & testVArithmetic<double, 2>()
         & testVArithmetic<double, 3>()
         & testVArithmetic<double, 4>()
-        & test1Arithmetic<uint8_t>() 
+        & test1Arithmetic<uint8_t>()
         & testVArithmetic<uint8_t, 2>()
         & testVArithmetic<uint8_t, 3>()
         & testVArithmetic<uint8_t, 4>()
-        & test1Arithmetic<uint16_t>() 
-        & testVArithmetic<uint16_t, 2>()
-        & testVArithmetic<uint16_t, 3>()
-        & testVArithmetic<uint16_t, 4>()
         & test1Arithmetic<uint64_t>() 
         & testVArithmetic<uint64_t, 2>()
         & testVArithmetic<uint64_t, 3>()
@@ -164,16 +163,23 @@ bool testArithmetic() {
         & testVArithmetic<int8_t, 2>()
         & testVArithmetic<int8_t, 3>()
         & testVArithmetic<int8_t, 4>()
-        & test1Arithmetic<int16_t>() 
-        & testVArithmetic<int16_t, 2>()
-        & testVArithmetic<int16_t, 3>()
-        & testVArithmetic<int16_t, 4>()
         & test1Arithmetic<int64_t>() 
         & testVArithmetic<int64_t, 2>()
         & testVArithmetic<int64_t, 3>()
         & testVArithmetic<int64_t, 4>()
 #endif
 
+        // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined (WGPU)
+        & test1Arithmetic<uint16_t>() 
+        & testVArithmetic<uint16_t, 2>()
+        & testVArithmetic<uint16_t, 3>()
+        & testVArithmetic<uint16_t, 4>()
+        & test1Arithmetic<int16_t>() 
+        & testVArithmetic<int16_t, 2>()
+        & testVArithmetic<int16_t, 3>()
+        & testVArithmetic<int16_t, 4>()
+#endif
         ;
 }
 
@@ -181,8 +187,8 @@ void computeMain()
 {
 
     bool res0 = true
-    // WGSL does not support bitwise inclusive intrinsics.
-#if !defined(WGPU)
+    // WGSL and Metal does not support bitwise inclusive intrinsics.
+#if !defined(WGPU) && !defined(METAL)
             & testLogical()
 #endif
             ;
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang
index e502e3608..bb6316a59 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang
@@ -10,6 +10,9 @@
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
 //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
 
+// Not testing because CI runners may not support Metal's intrinsics.
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
+
 #version 430
 
 #if 1                        \
@@ -64,16 +67,12 @@ bool testLogical() {
         & testVLogical<uint, 3>()
         & testVLogical<uint, 4>()
 
-        // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined (WGPU)
+        // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
         & test1Logical<int8_t>()
         & testVLogical<int8_t, 2>()
         & testVLogical<int8_t, 3>()
         & testVLogical<int8_t, 4>()
-        & test1Logical<int16_t>()
-        & testVLogical<int16_t, 2>()
-        & testVLogical<int16_t, 3>()
-        & testVLogical<int16_t, 4>()
         & test1Logical<int64_t>()
         & testVLogical<int64_t, 2>()
         & testVLogical<int64_t, 3>()
@@ -82,10 +81,6 @@ bool testLogical() {
         & testVLogical<uint8_t, 2>()
         & testVLogical<uint8_t, 3>()
         & testVLogical<uint8_t, 4>()
-        & test1Logical<uint16_t>()
-        & testVLogical<uint16_t, 2>()
-        & testVLogical<uint16_t, 3>()
-        & testVLogical<uint16_t, 4>()
         & test1Logical<uint64_t>()
         & testVLogical<uint64_t, 2>()
         & testVLogical<uint64_t, 3>()
@@ -95,6 +90,18 @@ bool testLogical() {
         & testVLogical<bool, 3>()
         & testVLogical<bool, 4>()
 #endif
+
+        // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined (WGPU)
+        & test1Logical<int16_t>()
+        & testVLogical<int16_t, 2>()
+        & testVLogical<int16_t, 3>()
+        & testVLogical<int16_t, 4>()
+        & test1Logical<uint16_t>()
+        & testVLogical<uint16_t, 2>()
+        & testVLogical<uint16_t, 3>()
+        & testVLogical<uint16_t, 4>()
+#endif
         ;
 }
 
@@ -138,8 +145,8 @@ bool testArithmetic() {
         & testVArithmetic<uint, 3>()
         & testVArithmetic<uint, 4>()
 
-        // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined (WGPU)
+        // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
         & test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
         & testVArithmetic<double, 2>()
         & testVArithmetic<double, 3>()
@@ -148,10 +155,6 @@ bool testArithmetic() {
         & testVArithmetic<int8_t, 2>()
         & testVArithmetic<int8_t, 3>()
         & testVArithmetic<int8_t, 4>()
-        & test1Arithmetic<int16_t>() 
-        & testVArithmetic<int16_t, 2>()
-        & testVArithmetic<int16_t, 3>()
-        & testVArithmetic<int16_t, 4>()
         & test1Arithmetic<int64_t>() 
         & testVArithmetic<int64_t, 2>()
         & testVArithmetic<int64_t, 3>()
@@ -160,14 +163,21 @@ bool testArithmetic() {
         & testVArithmetic<uint8_t, 2>()
         & testVArithmetic<uint8_t, 3>()
         & testVArithmetic<uint8_t, 4>()
+        & test1Arithmetic<uint64_t>()
+        & testVArithmetic<uint64_t, 2>()
+        & testVArithmetic<uint64_t, 3>()
+        & testVArithmetic<uint64_t, 4>()
+#endif
+        // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined (WGPU)
+        & test1Arithmetic<int16_t>() 
+        & testVArithmetic<int16_t, 2>()
+        & testVArithmetic<int16_t, 3>()
+        & testVArithmetic<int16_t, 4>()
         & test1Arithmetic<uint16_t>() 
         & testVArithmetic<uint16_t, 2>()
         & testVArithmetic<uint16_t, 3>()
         & testVArithmetic<uint16_t, 4>()
-        & test1Arithmetic<uint64_t>() 
-        & testVArithmetic<uint64_t, 2>()
-        & testVArithmetic<uint64_t, 3>()
-        & testVArithmetic<uint64_t, 4>()
 #endif
         ;
 }
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang
index d1ed4cc78..04f1b935a 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang
@@ -11,6 +11,9 @@
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
 //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
 
+// Not testing because CI runners may not support Metal's intrinsics.
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
+
 #version 430
 
 // breaks on Nvidia GPU by returning 0 which is trivially wrong (works on Intel Iris Xe)
@@ -76,8 +79,8 @@ bool testBroadcastX() {
         & testVBroadcastX<uint, 3>()
         & testVBroadcastX<uint, 4>()
 
-        // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined(WGPU)
+        // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
         & test1BroadcastX<double>() // WARNING: intel GPU's lack FP64 support
         & testVBroadcastX<double, 2>()
         & testVBroadcastX<double, 3>()
@@ -86,10 +89,6 @@ bool testBroadcastX() {
         & testVBroadcastX<int8_t, 2>()
         & testVBroadcastX<int8_t, 3>()
         & testVBroadcastX<int8_t, 4>()
-        & test1BroadcastX<int16_t>() 
-        & testVBroadcastX<int16_t, 2>()
-        & testVBroadcastX<int16_t, 3>()
-        & testVBroadcastX<int16_t, 4>()
         & test1BroadcastX<int64_t>() 
         & testVBroadcastX<int64_t, 2>()
         & testVBroadcastX<int64_t, 3>()
@@ -98,10 +97,6 @@ bool testBroadcastX() {
         & testVBroadcastX<uint8_t, 2>()
         & testVBroadcastX<uint8_t, 3>()
         & testVBroadcastX<uint8_t, 4>()
-        & test1BroadcastX<uint16_t>() 
-        & testVBroadcastX<uint16_t, 2>()
-        & testVBroadcastX<uint16_t, 3>()
-        & testVBroadcastX<uint16_t, 4>()
         & test1BroadcastX<uint64_t>() 
         & testVBroadcastX<uint64_t, 2>()
         & testVBroadcastX<uint64_t, 3>()
@@ -111,6 +106,18 @@ bool testBroadcastX() {
         & testVBroadcastX<bool, 3>()
         & testVBroadcastX<bool, 4>()
 #endif
+
+        // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined(WGPU)
+        & test1BroadcastX<int16_t>() 
+        & testVBroadcastX<int16_t, 2>()
+        & testVBroadcastX<int16_t, 3>()
+        & testVBroadcastX<int16_t, 4>()
+        & test1BroadcastX<uint16_t>() 
+        & testVBroadcastX<uint16_t, 2>()
+        & testVBroadcastX<uint16_t, 3>()
+        & testVBroadcastX<uint16_t, 4>()
+#endif
         ;
 }
 
@@ -118,7 +125,7 @@ bool testBallot() {
     return true 
         & (subgroupBallot(true).x == 0xFFFFFFFF)
 
-#if !defined(WGPU)
+#if !defined(WGPU) && !defined(METAL)
         & (subgroupInverseBallot(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == true)
         & (subgroupBallotBitExtract(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), 0) == true)
         & (subgroupBallotBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 32)
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang
index b862d289c..834b4c5cd 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang
@@ -10,6 +10,7 @@
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
 //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl
 
 #version 430
 
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang
index 5290ddfae..f9abfd8e5 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang
@@ -11,6 +11,7 @@
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
 //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
 
 #version 430
 
@@ -74,8 +75,8 @@ bool testShuffleX() {
         & testVShuffleX<uint, 3>()
         & testVShuffleX<uint, 4>()
 
-        // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined(WGPU)
+// Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
         & test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support
         & testVShuffleX<double, 2>()
         & testVShuffleX<double, 3>()
@@ -84,10 +85,6 @@ bool testShuffleX() {
         & testVShuffleX<int8_t, 2>()
         & testVShuffleX<int8_t, 3>()
         & testVShuffleX<int8_t, 4>()
-        & test1ShuffleX<int16_t>() 
-        & testVShuffleX<int16_t, 2>()
-        & testVShuffleX<int16_t, 3>()
-        & testVShuffleX<int16_t, 4>()
         & test1ShuffleX<int64_t>() 
         & testVShuffleX<int64_t, 2>()
         & testVShuffleX<int64_t, 3>()
@@ -96,10 +93,6 @@ bool testShuffleX() {
         & testVShuffleX<uint8_t, 2>()
         & testVShuffleX<uint8_t, 3>()
         & testVShuffleX<uint8_t, 4>()
-        & test1ShuffleX<uint16_t>() 
-        & testVShuffleX<uint16_t, 2>()
-        & testVShuffleX<uint16_t, 3>()
-        & testVShuffleX<uint16_t, 4>()
         & test1ShuffleX<uint64_t>() 
         & testVShuffleX<uint64_t, 2>()
         & testVShuffleX<uint64_t, 3>()
@@ -109,6 +102,18 @@ bool testShuffleX() {
         & testVShuffleX<bool, 3>()
         & testVShuffleX<bool, 4>()
 #endif
+
+        // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined(WGPU)
+        & test1ShuffleX<int16_t>()
+        & testVShuffleX<int16_t, 2>()
+        & testVShuffleX<int16_t, 3>()
+        & testVShuffleX<int16_t, 4>()
+        & test1ShuffleX<uint16_t>()
+        & testVShuffleX<uint16_t, 2>()
+        & testVShuffleX<uint16_t, 3>()
+        & testVShuffleX<uint16_t, 4>()
+#endif
         ;
 }
 
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang
index ea9b8c120..62af93f3e 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang
@@ -11,6 +11,7 @@
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
 //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
 
 #version 430
 
@@ -89,9 +90,9 @@ bool testShuffleX() {
         & testVShuffleX<uint, 2>()
         & testVShuffleX<uint, 3>()
         & testVShuffleX<uint, 4>()
-        
-        // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined(WGPU)
+
+        // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
         & test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support
         & testVShuffleX<double, 2>()
         & testVShuffleX<double, 3>()
@@ -100,30 +101,33 @@ bool testShuffleX() {
         & testVShuffleX<int8_t, 2>()
         & testVShuffleX<int8_t, 3>()
         & testVShuffleX<int8_t, 4>()
-        & test1ShuffleX<int16_t>() 
-        & testVShuffleX<int16_t, 2>()
-        & testVShuffleX<int16_t, 3>()
-        & testVShuffleX<int16_t, 4>()
-        & test1ShuffleX<int64_t>() 
+        & test1ShuffleX<int64_t>()
         & testVShuffleX<int64_t, 2>()
         & testVShuffleX<int64_t, 3>()
         & testVShuffleX<int64_t, 4>()
-        & test1ShuffleX<uint8_t>() 
+        & test1ShuffleX<uint8_t>()
         & testVShuffleX<uint8_t, 2>()
         & testVShuffleX<uint8_t, 3>()
         & testVShuffleX<uint8_t, 4>()
-        & test1ShuffleX<uint16_t>() 
-        & testVShuffleX<uint16_t, 2>()
-        & testVShuffleX<uint16_t, 3>()
-        & testVShuffleX<uint16_t, 4>()
-        & test1ShuffleX<uint64_t>() 
-        & testVShuffleX<uint64_t, 2>()
-        & testVShuffleX<uint64_t, 3>()
-        & testVShuffleX<uint64_t, 4>()
         & test1ShuffleX<bool>()
         & testVShuffleX<bool, 2>()
         & testVShuffleX<bool, 3>()
         & testVShuffleX<bool, 4>()
+        & test1ShuffleX<uint64_t>()
+        & testVShuffleX<uint64_t, 2>()
+        & testVShuffleX<uint64_t, 3>()
+        & testVShuffleX<uint64_t, 4>()
+#endif
+        // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined(WGPU)
+        & test1ShuffleX<int16_t>()
+        & testVShuffleX<int16_t, 2>()
+        & testVShuffleX<int16_t, 3>()
+        & testVShuffleX<int16_t, 4>()
+        & test1ShuffleX<uint16_t>()
+        & testVShuffleX<uint16_t, 2>()
+        & testVShuffleX<uint16_t, 3>()
+        & testVShuffleX<uint16_t, 4>()
 #endif
         ;
 }
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang
index 3f356e647..c0b6e3788 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang
@@ -11,6 +11,9 @@
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
 //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
 
+// Not testing because CI runners may not support Metal's intrinsics.
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
+
 #version 430
 
 //TEST_INPUT:ubuffer(data=[9], stride=4):name=inputBuffer
@@ -168,7 +171,7 @@ void computeMain()
     outputBuffer.data[4] = 1;
 
     // All equal intrinsic is not supported on WGSL as of time of writing.
-#if !defined(WGPU)
+#if !defined(WGPU) && !defined(METAL)
     if (testAllEqual()) {
         subgroupBarrier();
         outputBuffer.data[4] = 2;
diff --git a/tests/hlsl-intrinsic/wave-active-product.slang b/tests/hlsl-intrinsic/wave-active-product.slang
index 1a17f88e9..a15cbfc6d 100644
--- a/tests/hlsl-intrinsic/wave-active-product.slang
+++ b/tests/hlsl-intrinsic/wave-active-product.slang
@@ -5,6 +5,7 @@
 //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device
 //TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
@@ -30,4 +31,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
 
     outputBuffer[idx] = WaveActiveProduct((idx & 3) + 1);
 #endif
-}
-\ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang
index e51fdb3f9..b0cff08a9 100644
--- a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang
+++ b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang
@@ -2,6 +2,7 @@
 //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj
 //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
@@ -27,4 +28,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     }            
    
     outputBuffer[idx] = value;
-}
-\ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-diverge.slang b/tests/hlsl-intrinsic/wave-diverge.slang
index 56e9c1841..a18e99f58 100644
--- a/tests/hlsl-intrinsic/wave-diverge.slang
+++ b/tests/hlsl-intrinsic/wave-diverge.slang
@@ -5,6 +5,7 @@
 //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
@@ -25,4 +26,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     value = WaveActiveMin(idx + 1);
     
     outputBuffer[idx] = value;
-}
-\ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-is-first-lane.slang b/tests/hlsl-intrinsic/wave-is-first-lane.slang
index 03dcab507..220a5758b 100644
--- a/tests/hlsl-intrinsic/wave-is-first-lane.slang
+++ b/tests/hlsl-intrinsic/wave-is-first-lane.slang
@@ -5,6 +5,7 @@
 //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device
 //TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
@@ -23,4 +24,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     
     int value = 0;
     outputBuffer[idx] = WaveIsFirstLane();
-}
-\ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-prefix-product.slang b/tests/hlsl-intrinsic/wave-prefix-product.slang
index dfd11a654..774f5996e 100644
--- a/tests/hlsl-intrinsic/wave-prefix-product.slang
+++ b/tests/hlsl-intrinsic/wave-prefix-product.slang
@@ -5,6 +5,7 @@
 //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device
 //TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
@@ -23,4 +24,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     
     outputBuffer[idx] = r0 + (r2 << 16);
     
-}
-\ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang b/tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang
index dc8cfa5bf..03fa39da8 100644
--- a/tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang
+++ b/tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang
@@ -1,6 +1,7 @@
 //TEST:SIMPLE(filecheck=CHECK_SPV):-target spirv -entry computeMain -stage compute -emit-spirv-directly
 //TEST:SIMPLE(filecheck=CHECK_SPV):-target spirv -entry computeMain -stage compute
 //TEST:SIMPLE(filecheck=CHECK_WGSL):-target wgsl -entry computeMain -stage compute
+//TEST:SIMPLE(filecheck=CHECK_METAL):-target metal -entry computeMain -stage compute
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
@@ -14,7 +15,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     
     // CHECK_SPV: OpGroupNonUniformFAdd
     // CHECK_WGSL: subgroupExclusiveAdd
+    // CHECK_METAL: simd_prefix_exclusive_sum
     float2 r1 = WavePrefixSum(v1);
     
     outputBuffer[idx] = (int)r1.x;
-}
-\ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-prefix-sum.slang b/tests/hlsl-intrinsic/wave-prefix-sum.slang
index ab3480646..4f7c2912d 100644
--- a/tests/hlsl-intrinsic/wave-prefix-sum.slang
+++ b/tests/hlsl-intrinsic/wave-prefix-sum.slang
@@ -5,6 +5,7 @@
 //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device
 //TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
@@ -22,4 +23,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     int r2 = int(r1.x) + int(r1.y) - idx;
     
     outputBuffer[idx] = r0 + (r2 << 16);
-}
-\ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang
index 4f8a27a74..89ea47415 100644
--- a/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang
+++ b/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang
@@ -4,6 +4,7 @@
 //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device
 //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
@@ -42,4 +43,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     }
    
     outputBuffer[idx] = value;
-}
-\ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-shuffle-vk.slang b/tests/hlsl-intrinsic/wave-shuffle-vk.slang
index 980a8e3b4..fe8defa23 100644
--- a/tests/hlsl-intrinsic/wave-shuffle-vk.slang
+++ b/tests/hlsl-intrinsic/wave-shuffle-vk.slang
@@ -6,6 +6,7 @@
 //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
@@ -31,4 +32,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     }
     
     outputBuffer[idx] = value;
-}
-\ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-vector.slang b/tests/hlsl-intrinsic/wave-vector.slang
index d4d99b776..f786794ec 100644
--- a/tests/hlsl-intrinsic/wave-vector.slang
+++ b/tests/hlsl-intrinsic/wave-vector.slang
@@ -5,6 +5,7 @@
 //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device
 //TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
@@ -28,4 +29,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     int2 r = r0 + int2(r1) + r2 + r3 + r4;
    
     outputBuffer[idx] = r.x + r.y;
-}
-\ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave.slang b/tests/hlsl-intrinsic/wave.slang
index c15233e9c..f7e52b887 100644
--- a/tests/hlsl-intrinsic/wave.slang
+++ b/tests/hlsl-intrinsic/wave.slang
@@ -5,6 +5,7 @@
 //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
 //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
author	Darren Wihandi <65404740+fairywreath@users.noreply.github.com>	2025-02-10 19:40:39 -0500
committer	GitHub <noreply@github.com>	2025-02-10 16:40:39 -0800
commit	133bd259c00984c6a01869f71951a7feb919463a (patch)
tree	a69f1a6b3caff0ac4d958453fde6176ab3c66c91 /tests
parent	f761ab0586353da67bf7b3ae395ad7b090cd904f (diff)