summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rw-r--r--tests/glsl-intrinsic/intrinsic-texture.slang4
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang191
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang191
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang191
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang142
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang66
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-builtin-variables.slang44
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-clustered.slang171
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-quad.slang129
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang121
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang139
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang167
12 files changed, 1554 insertions, 2 deletions
diff --git a/tests/glsl-intrinsic/intrinsic-texture.slang b/tests/glsl-intrinsic/intrinsic-texture.slang
index 3b42be715..591ced099 100644
--- a/tests/glsl-intrinsic/intrinsic-texture.slang
+++ b/tests/glsl-intrinsic/intrinsic-texture.slang
@@ -6,8 +6,8 @@
//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage fragment -entry computeMain -target cuda
// Disabling following targets because they are currently causing compile errors.
-//T-EST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage fragment -entry computeMain -target hlsl
-//T-EST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage fragment -entry computeMain -target cpp
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage fragment -entry computeMain -target hlsl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage fragment -entry computeMain -target cpp
// "Offset" family of texture functions in GLSL requires offset parameter to be a constant value.
// It appears that slangc removes the constant-ness of constant values.
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang
new file mode 100644
index 000000000..7bfc4d886
--- /dev/null
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang
@@ -0,0 +1,191 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL
+//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV
+//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+
+// not testing cpp due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+#version 430
+
+#if 1 \
+ && !defined(TARGET_HLSL) \
+ && !defined(TARGET_CUDA)
+// hlsl does not treat boolean types with subgroup.* as a logical operator
+// cuda is missing an implementation
+#define TEST_when_logical_operators_are_implemented
+#endif
+
+//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer
+buffer MyBlockName2
+{
+ uint data[];
+} outputBuffer;
+
+#define local_size_x_v 4
+layout(local_size_x = local_size_x_v) in;
+
+__generic<T : __BuiltinLogicalType>
+bool test1Logical() {
+ return true
+#if defined(TEST_when_logical_operators_are_implemented)
+ && subgroupExclusiveAnd(T(1)) == T(1)
+ && subgroupExclusiveOr(T(1)) == T(1)
+ && subgroupExclusiveXor(T(1)) == T(1)
+#endif // #if defined(TEST_when_logical_operators_are_implemented)
+ ;
+}
+
+__generic<T : __BuiltinLogicalType, let N : int>
+bool testVLogical() {
+ typealias gvec = vector<T, N>;
+
+ return true
+#if defined(TEST_when_logical_operators_are_implemented)
+ && subgroupExclusiveAnd(gvec(T(1))) == gvec(T(1))
+ && subgroupExclusiveOr(gvec(T(1))) == gvec(T(1))
+ && subgroupExclusiveXor(gvec(T(1))) == gvec(T(1))
+#endif // #if defined(TEST_when_logical_operators_are_implemented)
+ ;
+}
+
+bool testLogical() {
+ return true
+ && test1Logical<int>()
+ && testVLogical<int, 2>()
+ && testVLogical<int, 3>()
+ && testVLogical<int, 4>()
+ && test1Logical<int8_t>()
+ && testVLogical<int8_t, 2>()
+ && testVLogical<int8_t, 3>()
+ && testVLogical<int8_t, 4>()
+ && test1Logical<int16_t>()
+ && testVLogical<int16_t, 2>()
+ && testVLogical<int16_t, 3>()
+ && testVLogical<int16_t, 4>()
+ && test1Logical<int64_t>()
+ && testVLogical<int64_t, 2>()
+ && testVLogical<int64_t, 3>()
+ && testVLogical<int64_t, 4>()
+ && test1Logical<uint>()
+ && testVLogical<uint, 2>()
+ && testVLogical<uint, 3>()
+ && testVLogical<uint, 4>()
+ && test1Logical<uint8_t>()
+ && testVLogical<uint8_t, 2>()
+ && testVLogical<uint8_t, 3>()
+ && testVLogical<uint8_t, 4>()
+ && test1Logical<uint16_t>()
+ && testVLogical<uint16_t, 2>()
+ && testVLogical<uint16_t, 3>()
+ && testVLogical<uint16_t, 4>()
+ && test1Logical<uint64_t>()
+ && testVLogical<uint64_t, 2>()
+ && testVLogical<uint64_t, 3>()
+ && testVLogical<uint64_t, 4>()
+ && test1Logical<bool>()
+ && testVLogical<bool, 2>()
+ && testVLogical<bool, 3>()
+ && testVLogical<bool, 4>()
+ ;
+}
+
+__generic<T : __BuiltinArithmeticType>
+bool test1Arithmetic() {
+ return true
+ && subgroupExclusiveAdd(T(1)) == T(3)
+ && subgroupExclusiveMul(T(1)) == T(1)
+ && subgroupExclusiveMin(T(1)) == T(1)
+ && subgroupExclusiveMax(T(1)) == T(1)
+ ;
+}
+__generic<T : __BuiltinArithmeticType, let N : int>
+bool testVArithmetic() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupExclusiveAdd(gvec(T(1))) == gvec(T(3))
+ && subgroupExclusiveMul(gvec(T(1))) == gvec(T(1))
+ && subgroupExclusiveMin(gvec(T(1))) == gvec(T(1))
+ && subgroupExclusiveMax(gvec(T(1))) == gvec(T(1))
+ ;
+}
+
+bool testArithmetic() {
+ return true
+ && test1Arithmetic<float>()
+ && testVArithmetic<float, 2>()
+ && testVArithmetic<float, 3>()
+ && testVArithmetic<float, 4>()
+ && test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
+ && testVArithmetic<double, 2>()
+ && testVArithmetic<double, 3>()
+ && testVArithmetic<double, 4>()
+ && test1Arithmetic<half>()
+ && testVArithmetic<half, 2>()
+ && testVArithmetic<half, 3>()
+ && testVArithmetic<half, 4>()
+ && test1Arithmetic<int>()
+ && testVArithmetic<int, 2>()
+ && testVArithmetic<int, 3>()
+ && testVArithmetic<int, 4>()
+ && test1Arithmetic<int8_t>()
+ && testVArithmetic<int8_t, 2>()
+ && testVArithmetic<int8_t, 3>()
+ && testVArithmetic<int8_t, 4>()
+ && test1Arithmetic<int16_t>()
+ && testVArithmetic<int16_t, 2>()
+ && testVArithmetic<int16_t, 3>()
+ && testVArithmetic<int16_t, 4>()
+ && test1Arithmetic<int64_t>()
+ && testVArithmetic<int64_t, 2>()
+ && testVArithmetic<int64_t, 3>()
+ && testVArithmetic<int64_t, 4>()
+ && test1Arithmetic<uint>()
+ && testVArithmetic<uint, 2>()
+ && testVArithmetic<uint, 3>()
+ && testVArithmetic<uint, 4>()
+ && test1Arithmetic<uint8_t>()
+ && testVArithmetic<uint8_t, 2>()
+ && testVArithmetic<uint8_t, 3>()
+ && testVArithmetic<uint8_t, 4>()
+ && test1Arithmetic<uint16_t>()
+ && testVArithmetic<uint16_t, 2>()
+ && testVArithmetic<uint16_t, 3>()
+ && testVArithmetic<uint16_t, 4>()
+ && test1Arithmetic<uint64_t>()
+ && testVArithmetic<uint64_t, 2>()
+ && testVArithmetic<uint64_t, 3>()
+ && testVArithmetic<uint64_t, 4>()
+ ;
+}
+
+void computeMain()
+{
+
+ bool res0 = true
+ && testLogical()
+ ;
+
+ bool res1 = true
+ && testArithmetic()
+ ;
+
+ if (gl_LocalInvocationID.x == 3) {
+ // seperate so if there is an erroneous error the "major"
+ // tests are issolated into 2 branches without polluting the
+ // file with a bunch of individual test values
+ outputBuffer.data[0] = res0;
+ outputBuffer.data[1] = res1;
+ }
+
+ // CHECK_GLSL: void main(
+ // CHECK_SPV: OpEntryPoint
+ // CHECK_HLSL: void computeMain(
+ // CHECK_CUDA: void computeMain(
+ // CHECK_CPP: void _computeMain(
+ // BUF: 1
+ // BUF-NEXT: 1
+}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang
new file mode 100644
index 000000000..09c6bdbdf
--- /dev/null
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang
@@ -0,0 +1,191 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL
+//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV
+//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+
+// not testing cpp due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+#version 430
+
+#if 1 \
+ && !defined(TARGET_HLSL) \
+ && !defined(TARGET_CUDA)
+// hlsl does not treat boolean types with subgroup.* as a logical operator
+// cuda is missing an implementation
+#define TEST_when_logical_operators_are_implemented
+#endif
+
+//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer
+buffer MyBlockName2
+{
+ uint data[];
+} outputBuffer;
+
+#define local_size_x_v 4
+layout(local_size_x = local_size_x_v) in;
+
+__generic<T : __BuiltinLogicalType>
+bool test1Logical() {
+ return true
+#if defined(TEST_when_logical_operators_are_implemented)
+ && subgroupInclusiveAnd(T(1)) == T(1)
+ && subgroupInclusiveOr(T(1)) == T(1)
+ && subgroupInclusiveXor(T(1)) == T(0)
+#endif // #if defined(TEST_when_logical_operators_are_implemented)
+ ;
+}
+
+__generic<T : __BuiltinLogicalType, let N : int>
+bool testVLogical() {
+ typealias gvec = vector<T, N>;
+
+ return true
+#if defined(TEST_when_logical_operators_are_implemented)
+ && subgroupInclusiveAnd(gvec(T(1))) == gvec(T(1))
+ && subgroupInclusiveOr(gvec(T(1))) == gvec(T(1))
+ && subgroupInclusiveXor(gvec(T(1))) == gvec(T(0))
+#endif // #if defined(TEST_when_logical_operators_are_implemented)
+ ;
+}
+
+bool testLogical() {
+ return true
+ && test1Logical<int>()
+ && testVLogical<int, 2>()
+ && testVLogical<int, 3>()
+ && testVLogical<int, 4>()
+ && test1Logical<int8_t>()
+ && testVLogical<int8_t, 2>()
+ && testVLogical<int8_t, 3>()
+ && testVLogical<int8_t, 4>()
+ && test1Logical<int16_t>()
+ && testVLogical<int16_t, 2>()
+ && testVLogical<int16_t, 3>()
+ && testVLogical<int16_t, 4>()
+ && test1Logical<int64_t>()
+ && testVLogical<int64_t, 2>()
+ && testVLogical<int64_t, 3>()
+ && testVLogical<int64_t, 4>()
+ && test1Logical<uint>()
+ && testVLogical<uint, 2>()
+ && testVLogical<uint, 3>()
+ && testVLogical<uint, 4>()
+ && test1Logical<uint8_t>()
+ && testVLogical<uint8_t, 2>()
+ && testVLogical<uint8_t, 3>()
+ && testVLogical<uint8_t, 4>()
+ && test1Logical<uint16_t>()
+ && testVLogical<uint16_t, 2>()
+ && testVLogical<uint16_t, 3>()
+ && testVLogical<uint16_t, 4>()
+ && test1Logical<uint64_t>()
+ && testVLogical<uint64_t, 2>()
+ && testVLogical<uint64_t, 3>()
+ && testVLogical<uint64_t, 4>()
+ && test1Logical<bool>()
+ && testVLogical<bool, 2>()
+ && testVLogical<bool, 3>()
+ && testVLogical<bool, 4>()
+ ;
+}
+
+__generic<T : __BuiltinArithmeticType>
+bool test1Arithmetic() {
+ return true
+ && subgroupInclusiveAdd(T(1)) == T(4)
+ && subgroupInclusiveMul(T(1)) == T(1)
+ && subgroupInclusiveMin(T(1)) == T(1)
+ && subgroupInclusiveMax(T(1)) == T(1)
+ ;
+}
+__generic<T : __BuiltinArithmeticType, let N : int>
+bool testVArithmetic() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupInclusiveAdd(gvec(T(1))) == gvec(T(4))
+ && subgroupInclusiveMul(gvec(T(1))) == gvec(T(1))
+ && subgroupInclusiveMin(gvec(T(1))) == gvec(T(1))
+ && subgroupInclusiveMax(gvec(T(1))) == gvec(T(1))
+ ;
+}
+
+bool testArithmetic() {
+ return true
+ && test1Arithmetic<float>()
+ && testVArithmetic<float, 2>()
+ && testVArithmetic<float, 3>()
+ && testVArithmetic<float, 4>()
+ && test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
+ && testVArithmetic<double, 2>()
+ && testVArithmetic<double, 3>()
+ && testVArithmetic<double, 4>()
+ && test1Arithmetic<half>()
+ && testVArithmetic<half, 2>()
+ && testVArithmetic<half, 3>()
+ && testVArithmetic<half, 4>()
+ && test1Arithmetic<int>()
+ && testVArithmetic<int, 2>()
+ && testVArithmetic<int, 3>()
+ && testVArithmetic<int, 4>()
+ && test1Arithmetic<int8_t>()
+ && testVArithmetic<int8_t, 2>()
+ && testVArithmetic<int8_t, 3>()
+ && testVArithmetic<int8_t, 4>()
+ && test1Arithmetic<int16_t>()
+ && testVArithmetic<int16_t, 2>()
+ && testVArithmetic<int16_t, 3>()
+ && testVArithmetic<int16_t, 4>()
+ && test1Arithmetic<int64_t>()
+ && testVArithmetic<int64_t, 2>()
+ && testVArithmetic<int64_t, 3>()
+ && testVArithmetic<int64_t, 4>()
+ && test1Arithmetic<uint>()
+ && testVArithmetic<uint, 2>()
+ && testVArithmetic<uint, 3>()
+ && testVArithmetic<uint, 4>()
+ && test1Arithmetic<uint8_t>()
+ && testVArithmetic<uint8_t, 2>()
+ && testVArithmetic<uint8_t, 3>()
+ && testVArithmetic<uint8_t, 4>()
+ && test1Arithmetic<uint16_t>()
+ && testVArithmetic<uint16_t, 2>()
+ && testVArithmetic<uint16_t, 3>()
+ && testVArithmetic<uint16_t, 4>()
+ && test1Arithmetic<uint64_t>()
+ && testVArithmetic<uint64_t, 2>()
+ && testVArithmetic<uint64_t, 3>()
+ && testVArithmetic<uint64_t, 4>()
+ ;
+}
+
+void computeMain()
+{
+
+ bool res0 = true
+ && testLogical()
+ ;
+
+ bool res1 = true
+ && testArithmetic()
+ ;
+
+ if (gl_LocalInvocationID.x == 3) {
+ // seperate so if there is an erroneous error the "major"
+ // tests are issolated into 2 branches without polluting the
+ // file with a bunch of individual test values
+ outputBuffer.data[0] = res0;
+ outputBuffer.data[1] = res1;
+ }
+
+ // CHECK_GLSL: void main(
+ // CHECK_SPV: OpEntryPoint
+ // CHECK_HLSL: void computeMain(
+ // CHECK_CUDA: void computeMain(
+ // CHECK_CPP: void _computeMain(
+ // BUF: 1
+ // BUF-NEXT: 1
+}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang
new file mode 100644
index 000000000..5300e6796
--- /dev/null
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang
@@ -0,0 +1,191 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL
+//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV
+//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+
+// not testing cpp due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+#version 430
+
+#if 1 \
+ && !defined(TARGET_HLSL) \
+ && !defined(TARGET_CUDA)
+// hlsl does not treat boolean types with subgroup.* as a logical operator
+// cuda is missing an implementation
+#define TEST_when_logical_operators_are_implemented
+#endif
+
+//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer
+buffer MyBlockName2
+{
+ uint data[];
+} outputBuffer;
+
+#define local_size_x_v 4
+layout(local_size_x = local_size_x_v) in;
+
+__generic<T : __BuiltinLogicalType>
+bool test1Logical() {
+ return true
+#if defined(TEST_when_logical_operators_are_implemented)
+ && subgroupAnd(T(1)) == T(1)
+ && subgroupOr(T(1)) == T(1)
+ && subgroupXor(T(1)) == T(0)
+#endif // #if defined(TEST_when_logical_operators_are_implemented)
+ ;
+}
+
+__generic<T : __BuiltinLogicalType, let N : int>
+bool testVLogical() {
+ typealias gvec = vector<T, N>;
+
+ return true
+#if defined(TEST_when_logical_operators_are_implemented)
+ && subgroupAnd(gvec(T(1))) == gvec(T(1))
+ && subgroupOr(gvec(T(1))) == gvec(T(1))
+ && subgroupXor(gvec(T(1))) == gvec(T(0))
+#endif // #if defined(TEST_when_logical_operators_are_implemented)
+ ;
+}
+
+bool testLogical() {
+ return true
+ && test1Logical<int>()
+ && testVLogical<int, 2>()
+ && testVLogical<int, 3>()
+ && testVLogical<int, 4>()
+ && test1Logical<int8_t>()
+ && testVLogical<int8_t, 2>()
+ && testVLogical<int8_t, 3>()
+ && testVLogical<int8_t, 4>()
+ && test1Logical<int16_t>()
+ && testVLogical<int16_t, 2>()
+ && testVLogical<int16_t, 3>()
+ && testVLogical<int16_t, 4>()
+ && test1Logical<int64_t>()
+ && testVLogical<int64_t, 2>()
+ && testVLogical<int64_t, 3>()
+ && testVLogical<int64_t, 4>()
+ && test1Logical<uint>()
+ && testVLogical<uint, 2>()
+ && testVLogical<uint, 3>()
+ && testVLogical<uint, 4>()
+ && test1Logical<uint8_t>()
+ && testVLogical<uint8_t, 2>()
+ && testVLogical<uint8_t, 3>()
+ && testVLogical<uint8_t, 4>()
+ && test1Logical<uint16_t>()
+ && testVLogical<uint16_t, 2>()
+ && testVLogical<uint16_t, 3>()
+ && testVLogical<uint16_t, 4>()
+ && test1Logical<uint64_t>()
+ && testVLogical<uint64_t, 2>()
+ && testVLogical<uint64_t, 3>()
+ && testVLogical<uint64_t, 4>()
+ && test1Logical<bool>()
+ && testVLogical<bool, 2>()
+ && testVLogical<bool, 3>()
+ && testVLogical<bool, 4>()
+ ;
+}
+
+__generic<T : __BuiltinArithmeticType>
+bool test1Arithmetic() {
+ return true
+ && subgroupAdd(T(1)) == T(local_size_x_v) // 32
+ && subgroupMul(T(1)) == T(1)
+ && subgroupMin(T(1)) == T(1)
+ && subgroupMax(T(1)) == T(1)
+ ;
+}
+__generic<T : __BuiltinArithmeticType, let N : int>
+bool testVArithmetic() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupAdd(gvec(T(1))) == gvec(T(local_size_x_v)) // 32
+ && subgroupMul(gvec(T(1))) == gvec(T(1))
+ && subgroupMin(gvec(T(1))) == gvec(T(1))
+ && subgroupMax(gvec(T(1))) == gvec(T(1))
+ ;
+}
+
+bool testArithmetic() {
+ return true
+ && test1Arithmetic<float>()
+ && testVArithmetic<float, 2>()
+ && testVArithmetic<float, 3>()
+ && testVArithmetic<float, 4>()
+ && test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
+ && testVArithmetic<double, 2>()
+ && testVArithmetic<double, 3>()
+ && testVArithmetic<double, 4>()
+ && test1Arithmetic<half>()
+ && testVArithmetic<half, 2>()
+ && testVArithmetic<half, 3>()
+ && testVArithmetic<half, 4>()
+ && test1Arithmetic<int>()
+ && testVArithmetic<int, 2>()
+ && testVArithmetic<int, 3>()
+ && testVArithmetic<int, 4>()
+ && test1Arithmetic<int8_t>()
+ && testVArithmetic<int8_t, 2>()
+ && testVArithmetic<int8_t, 3>()
+ && testVArithmetic<int8_t, 4>()
+ && test1Arithmetic<int16_t>()
+ && testVArithmetic<int16_t, 2>()
+ && testVArithmetic<int16_t, 3>()
+ && testVArithmetic<int16_t, 4>()
+ && test1Arithmetic<int64_t>()
+ && testVArithmetic<int64_t, 2>()
+ && testVArithmetic<int64_t, 3>()
+ && testVArithmetic<int64_t, 4>()
+ && test1Arithmetic<uint>()
+ && testVArithmetic<uint, 2>()
+ && testVArithmetic<uint, 3>()
+ && testVArithmetic<uint, 4>()
+ && test1Arithmetic<uint8_t>()
+ && testVArithmetic<uint8_t, 2>()
+ && testVArithmetic<uint8_t, 3>()
+ && testVArithmetic<uint8_t, 4>()
+ && test1Arithmetic<uint16_t>()
+ && testVArithmetic<uint16_t, 2>()
+ && testVArithmetic<uint16_t, 3>()
+ && testVArithmetic<uint16_t, 4>()
+ && test1Arithmetic<uint64_t>()
+ && testVArithmetic<uint64_t, 2>()
+ && testVArithmetic<uint64_t, 3>()
+ && testVArithmetic<uint64_t, 4>()
+ ;
+}
+
+void computeMain()
+{
+
+ bool res0 = true
+ && testLogical()
+ ;
+
+ bool res1 = true
+ && testArithmetic()
+ ;
+
+ if (gl_LocalInvocationID.x == 3) {
+ // seperate so if there is an erroneous error the "major"
+ // tests are issolated into 2 branches without polluting the
+ // file with a bunch of individual test values
+ outputBuffer.data[0] = res0;
+ outputBuffer.data[1] = res1;
+ }
+
+ // CHECK_GLSL: void main(
+ // CHECK_SPV: OpEntryPoint
+ // CHECK_HLSL: void computeMain(
+ // CHECK_CUDA: void computeMain(
+ // CHECK_CPP: void _computeMain(
+ // BUF: 1
+ // BUF-NEXT: 1
+}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang
new file mode 100644
index 000000000..8bbd60689
--- /dev/null
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang
@@ -0,0 +1,142 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
+//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly
+//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+
+// not testing cuda due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+// not testing cpp due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+#version 430
+
+// breaks on Nvidia GPU by returning 0 which is trivially wrong (works on Intel Iris Xe)
+//#define TEST_when_glsl_subgroupBallotExclusiveBitCount_is_not_bugged
+
+//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer
+buffer MyBlockName2
+{
+ uint data[];
+} outputBuffer;
+
+layout(local_size_x = 32) in;
+
+__generic<T : __BuiltinLogicalType>
+bool test1BroadcastX() {
+ return true
+ && subgroupBroadcast(T(1), 0) == T(1)
+ && subgroupBroadcastFirst(T(1)) == T(1)
+ ;
+}
+__generic<T : __BuiltinLogicalType, let N : int>
+bool testVBroadcastX() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupBroadcast(gvec(T(1)), 0) == gvec(T(1))
+ && subgroupBroadcastFirst(gvec(T(1))) == gvec(T(1))
+ ;
+}
+
+__generic<T : __BuiltinFloatingPointType>
+bool test1BroadcastX() {
+ return true
+ && subgroupBroadcast(T(1), 0) == T(1)
+ && subgroupBroadcastFirst(T(1)) == T(1)
+ ;
+}
+__generic<T : __BuiltinFloatingPointType, let N : int>
+bool testVBroadcastX() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupBroadcast(gvec(T(1)), 0) == gvec(T(1))
+ && subgroupBroadcastFirst(gvec(T(1))) == gvec(T(1))
+ ;
+}
+bool testBroadcastX() {
+ return true
+ && test1BroadcastX<float>()
+ && testVBroadcastX<float, 2>()
+ && testVBroadcastX<float, 3>()
+ && testVBroadcastX<float, 4>()
+ && test1BroadcastX<double>() // WARNING: intel GPU's lack FP64 support
+ && testVBroadcastX<double, 2>()
+ && testVBroadcastX<double, 3>()
+ && testVBroadcastX<double, 4>()
+ && test1BroadcastX<half>()
+ && testVBroadcastX<half, 2>()
+ && testVBroadcastX<half, 3>()
+ && testVBroadcastX<half, 4>()
+ && test1BroadcastX<int>()
+ && testVBroadcastX<int, 2>()
+ && testVBroadcastX<int, 3>()
+ && testVBroadcastX<int, 4>()
+ && test1BroadcastX<int8_t>()
+ && testVBroadcastX<int8_t, 2>()
+ && testVBroadcastX<int8_t, 3>()
+ && testVBroadcastX<int8_t, 4>()
+ && test1BroadcastX<int16_t>()
+ && testVBroadcastX<int16_t, 2>()
+ && testVBroadcastX<int16_t, 3>()
+ && testVBroadcastX<int16_t, 4>()
+ && test1BroadcastX<int64_t>()
+ && testVBroadcastX<int64_t, 2>()
+ && testVBroadcastX<int64_t, 3>()
+ && testVBroadcastX<int64_t, 4>()
+ && test1BroadcastX<uint>()
+ && testVBroadcastX<uint, 2>()
+ && testVBroadcastX<uint, 3>()
+ && testVBroadcastX<uint, 4>()
+ && test1BroadcastX<uint8_t>()
+ && testVBroadcastX<uint8_t, 2>()
+ && testVBroadcastX<uint8_t, 3>()
+ && testVBroadcastX<uint8_t, 4>()
+ && test1BroadcastX<uint16_t>()
+ && testVBroadcastX<uint16_t, 2>()
+ && testVBroadcastX<uint16_t, 3>()
+ && testVBroadcastX<uint16_t, 4>()
+ && test1BroadcastX<uint64_t>()
+ && testVBroadcastX<uint64_t, 2>()
+ && testVBroadcastX<uint64_t, 3>()
+ && testVBroadcastX<uint64_t, 4>()
+ && test1BroadcastX<bool>()
+ && testVBroadcastX<bool, 2>()
+ && testVBroadcastX<bool, 3>()
+ && testVBroadcastX<bool, 4>()
+ ;
+}
+
+bool testBallot() {
+ return true
+ && (subgroupBallot(true).x == 0xFFFFFFFF)
+ && (subgroupInverseBallot(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == true)
+ && (subgroupBallotBitExtract(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), 0) == true)
+ && (subgroupBallotBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 32)
+ && (subgroupBallotInclusiveBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) != 0)
+#ifdef TEST_when_glsl_subgroupBallotExclusiveBitCount_is_not_bugged
+ && (subgroupBallotExclusiveBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) != 0)
+#endif
+ && (subgroupBallotFindLSB(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 0)
+ && (subgroupBallotFindMSB(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 31)
+ ;
+}
+
+void computeMain()
+{
+ outputBuffer.data[0] = true
+ && testBroadcastX()
+ ;
+ outputBuffer.data[1] = true
+ && testBallot()
+ ;
+
+ // CHECK_GLSL: void main(
+ // CHECK_SPV: OpEntryPoint
+ // CHECK_HLSL: void computeMain(
+ // CHECK_CUDA: void computeMain(
+ // CHECK_CPP: void _computeMain(
+ // BUF: 1
+ // BUF-NEXT: 1
+}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang
new file mode 100644
index 000000000..82f2dc8e2
--- /dev/null
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang
@@ -0,0 +1,66 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
+//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly
+//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+
+// not testing cuda due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+// not testing cpp due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+#version 430
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0], stride=4):out,name=outputBuffer
+buffer MyBlockName2
+{
+ uint data[];
+} outputBuffer;
+
+layout(local_size_x = 32) in;
+
+shared uint shareMem;
+
+void computeMain()
+{
+ // TODO: no test for image memory was done -- subgroupMemoryBarrierImage();
+ // tests are seperate since concurrency testing
+
+ shareMem = 100;
+ subgroupMemoryBarrierShared();
+ outputBuffer.data[0] = 1;
+ subgroupBarrier();
+ outputBuffer.data[0] = 2;
+ subgroupBarrier();
+
+ outputBuffer.data[1] = 1;
+ subgroupMemoryBarrier();
+ outputBuffer.data[1] = 2;
+ subgroupBarrier();
+
+ outputBuffer.data[2] = 1;
+ subgroupMemoryBarrierBuffer();
+ outputBuffer.data[2] = 2;
+ subgroupBarrier();
+
+ shareMem = 2;
+ subgroupMemoryBarrierShared();
+ outputBuffer.data[3] = shareMem;
+ subgroupBarrier();
+
+ if (subgroupElect()) {
+ outputBuffer.data[4] = gl_GlobalInvocationID.x + 2;
+ }
+
+ // CHECK_GLSL: void main(
+ // CHECK_SPV: OpEntryPoint
+ // CHECK_HLSL: void computeMain(
+ // CHECK_CUDA: void computeMain(
+ // CHECK_CPP: void _computeMain(
+
+ // BUF: 2
+ // BUF-NEXT: 2
+ // BUF-NEXT: 2
+ // BUF-NEXT: 2
+ // BUF-NEXT: 2
+}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-builtin-variables.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-builtin-variables.slang
new file mode 100644
index 000000000..21b533178
--- /dev/null
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-builtin-variables.slang
@@ -0,0 +1,44 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
+//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly
+
+// missing implementation of most builtin values due to non trivial translation
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+// missing implementation of most builtin values due to non trivial translation
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+//missing implementation of system (varying?) values
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+#version 430
+
+//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
+buffer MyBlockName2
+{
+ uint data[];
+} outputBuffer;
+
+layout(local_size_x = 32) in;
+
+void computeMain()
+{
+ if (gl_GlobalInvocationID.x == 3) {
+ outputBuffer.data[0] = true
+ && gl_NumSubgroups == 1
+ && gl_SubgroupID == 0 //1 subgroup, 0 based indexing
+ && gl_SubgroupSize == 32
+ && gl_SubgroupInvocationID == 3
+ && gl_SubgroupEqMask == uvec4(0b1000,0,0,0)
+ && gl_SubgroupGeMask == uvec4(0xFFFFFFF8,0,0,0)
+ && gl_SubgroupGtMask == uvec4(0xFFFFFFF0,0,0,0)
+ && gl_SubgroupLeMask == uvec4(0b1111,0,0,0)
+ && gl_SubgroupLtMask == uvec4(0b111,0,0,0)
+ ;
+ }
+ // CHECK_GLSL: void main(
+ // CHECK_SPV: OpEntryPoint
+ // CHECK_HLSL: void computeMain(
+ // CHECK_CUDA: void computeMain(
+ // CHECK_CPP: void _computeMain(
+ // BUF: 1
+}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-clustered.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-clustered.slang
new file mode 100644
index 000000000..9e9b089d2
--- /dev/null
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-clustered.slang
@@ -0,0 +1,171 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
+//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly
+
+// not testing hlsl due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+// not testing cuda due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+// not testing cpp due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+#version 430
+
+//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer
+buffer MyBlockName2
+{
+ uint data[];
+} outputBuffer;
+
+layout(local_size_x = 32) in;
+
+__generic<T : __BuiltinLogicalType>
+bool test1Logical() {
+ return true
+ && subgroupClusteredAnd(T(1), 1) == T(1)
+ && subgroupClusteredOr(T(1), 1) == T(1)
+ && subgroupClusteredXor(T(1), 1) == T(1)
+ ;
+}
+
+__generic<T : __BuiltinLogicalType, let N : int>
+bool testVLogical() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupClusteredAnd(gvec(T(1)), 1) == gvec(T(1))
+ && subgroupClusteredOr(gvec(T(1)), 1) == gvec(T(1))
+ && subgroupClusteredXor(gvec(T(1)), 1) == gvec(T(1))
+ ;
+}
+
+bool testLogical() {
+ return true
+ && test1Logical<int>()
+ && testVLogical<int, 2>()
+ && testVLogical<int, 3>()
+ && testVLogical<int, 4>()
+ && test1Logical<int8_t>()
+ && testVLogical<int8_t, 2>()
+ && testVLogical<int8_t, 3>()
+ && testVLogical<int8_t, 4>()
+ && test1Logical<int16_t>()
+ && testVLogical<int16_t, 2>()
+ && testVLogical<int16_t, 3>()
+ && testVLogical<int16_t, 4>()
+ && test1Logical<int64_t>()
+ && testVLogical<int64_t, 2>()
+ && testVLogical<int64_t, 3>()
+ && testVLogical<int64_t, 4>()
+ && test1Logical<uint>()
+ && testVLogical<uint, 2>()
+ && testVLogical<uint, 3>()
+ && testVLogical<uint, 4>()
+ && test1Logical<uint8_t>()
+ && testVLogical<uint8_t, 2>()
+ && testVLogical<uint8_t, 3>()
+ && testVLogical<uint8_t, 4>()
+ && test1Logical<uint16_t>()
+ && testVLogical<uint16_t, 2>()
+ && testVLogical<uint16_t, 3>()
+ && testVLogical<uint16_t, 4>()
+ && test1Logical<uint64_t>()
+ && testVLogical<uint64_t, 2>()
+ && testVLogical<uint64_t, 3>()
+ && testVLogical<uint64_t, 4>()
+ && test1Logical<bool>()
+ && testVLogical<bool, 2>()
+ && testVLogical<bool, 3>()
+ && testVLogical<bool, 4>()
+ ;
+}
+
+__generic<T : __BuiltinArithmeticType>
+bool test1Arithmetic() {
+ return true
+ && subgroupClusteredAdd(T(1), 1) == T(1)
+ && subgroupClusteredMul(T(1), 1) == T(1)
+ && subgroupClusteredMin(T(1), 1) == T(1)
+ && subgroupClusteredMax(T(1), 1) == T(1)
+ ;
+}
+
+__generic<T : __BuiltinArithmeticType, let N : int>
+bool testVArithmetic() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupClusteredAdd(gvec(T(1)), 1) == gvec(T(1))
+ && subgroupClusteredMul(gvec(T(1)), 1) == gvec(T(1))
+ && subgroupClusteredMin(gvec(T(1)), 1) == gvec(T(1))
+ && subgroupClusteredMax(gvec(T(1)), 1) == gvec(T(1))
+ ;
+}
+
+bool testArithmetic() {
+ return true
+ && test1Arithmetic<float>()
+ && testVArithmetic<float, 2>()
+ && testVArithmetic<float, 3>()
+ && testVArithmetic<float, 4>()
+ && test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
+ && testVArithmetic<double, 2>()
+ && testVArithmetic<double, 3>()
+ && testVArithmetic<double, 4>()
+ && test1Arithmetic<half>()
+ && testVArithmetic<half, 2>()
+ && testVArithmetic<half, 3>()
+ && testVArithmetic<half, 4>()
+ && test1Arithmetic<int>()
+ && testVArithmetic<int, 2>()
+ && testVArithmetic<int, 3>()
+ && testVArithmetic<int, 4>()
+ && test1Arithmetic<int8_t>()
+ && testVArithmetic<int8_t, 2>()
+ && testVArithmetic<int8_t, 3>()
+ && testVArithmetic<int8_t, 4>()
+ && test1Arithmetic<int16_t>()
+ && testVArithmetic<int16_t, 2>()
+ && testVArithmetic<int16_t, 3>()
+ && testVArithmetic<int16_t, 4>()
+ && test1Arithmetic<int64_t>()
+ && testVArithmetic<int64_t, 2>()
+ && testVArithmetic<int64_t, 3>()
+ && testVArithmetic<int64_t, 4>()
+ && test1Arithmetic<uint>()
+ && testVArithmetic<uint, 2>()
+ && testVArithmetic<uint, 3>()
+ && testVArithmetic<uint, 4>()
+ && test1Arithmetic<uint8_t>()
+ && testVArithmetic<uint8_t, 2>()
+ && testVArithmetic<uint8_t, 3>()
+ && testVArithmetic<uint8_t, 4>()
+ && test1Arithmetic<uint16_t>()
+ && testVArithmetic<uint16_t, 2>()
+ && testVArithmetic<uint16_t, 3>()
+ && testVArithmetic<uint16_t, 4>()
+ && test1Arithmetic<uint64_t>()
+ && testVArithmetic<uint64_t, 2>()
+ && testVArithmetic<uint64_t, 3>()
+ && testVArithmetic<uint64_t, 4>()
+ ;
+}
+
+void computeMain()
+{
+ outputBuffer.data[0] = true
+ && testLogical()
+ ;
+ outputBuffer.data[1] = true
+ && testArithmetic()
+ ;
+
+ // CHECK_GLSL: void main(
+ // CHECK_SPV: OpEntryPoint
+ // CHECK_HLSL: void computeMain(
+ // CHECK_CUDA: void computeMain(
+ // CHECK_CPP: void _computeMain(
+ // BUF: 1
+ // BUF-NEXT: 1
+}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-quad.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-quad.slang
new file mode 100644
index 000000000..5ed6398b2
--- /dev/null
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-quad.slang
@@ -0,0 +1,129 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
+//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly
+//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+
+// not testing cuda due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+// not testing cpp due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+#version 430
+
+//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
+buffer MyBlockName2
+{
+ uint data[];
+} outputBuffer;
+
+layout(local_size_x = 4) in;
+
+__generic<T : __BuiltinLogicalType>
+bool test1QuadX() {
+ return true
+ && subgroupQuadSwapHorizontal(T(2)) == T(2)
+ && subgroupQuadSwapVertical(T(2)) == T(2)
+ && subgroupQuadSwapDiagonal(T(3)) == T(3)
+ && subgroupQuadBroadcast(T(1), 1) == T(1)
+ ;
+}
+__generic<T : __BuiltinLogicalType, let N : int>
+bool testVQuadX() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupQuadSwapHorizontal(gvec(T(2))) == gvec(T(2))
+ && subgroupQuadSwapVertical(gvec(T(2))) == gvec(T(2))
+ && subgroupQuadSwapDiagonal(gvec(T(3))) == gvec(T(3))
+ && subgroupQuadBroadcast(gvec(T(1)), 1) == gvec(T(1))
+ ;
+}
+
+__generic<T : __BuiltinFloatingPointType>
+bool test1QuadX() {
+ return true
+ && subgroupQuadSwapHorizontal(T(2)) == T(2)
+ && subgroupQuadSwapVertical(T(2)) == T(2)
+ && subgroupQuadSwapDiagonal(T(3)) == T(3)
+ && subgroupQuadBroadcast(T(1), 1) == T(1)
+ ;
+}
+__generic<T : __BuiltinFloatingPointType, let N : int>
+bool testVQuadX() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupQuadSwapHorizontal(gvec(T(2))) == gvec(T(2))
+ && subgroupQuadSwapVertical(gvec(T(2))) == gvec(T(2))
+ && subgroupQuadSwapDiagonal(gvec(T(3))) == gvec(T(3))
+ && subgroupQuadBroadcast(gvec(T(1)), 1) == gvec(T(1))
+ ;
+}
+bool testQuadSwapX() {
+ return true
+ && test1QuadX<float>()
+ && testVQuadX<float, 2>()
+ && testVQuadX<float, 3>()
+ && testVQuadX<float, 4>()
+ && test1QuadX<double>() // WARNING: intel GPU's lack FP64 support
+ && testVQuadX<double, 2>()
+ && testVQuadX<double, 3>()
+ && testVQuadX<double, 4>()
+ && test1QuadX<half>()
+ && testVQuadX<half, 2>()
+ && testVQuadX<half, 3>()
+ && testVQuadX<half, 4>()
+ && test1QuadX<int>()
+ && testVQuadX<int, 2>()
+ && testVQuadX<int, 3>()
+ && testVQuadX<int, 4>()
+ && test1QuadX<int8_t>()
+ && testVQuadX<int8_t, 2>()
+ && testVQuadX<int8_t, 3>()
+ && testVQuadX<int8_t, 4>()
+ && test1QuadX<int16_t>()
+ && testVQuadX<int16_t, 2>()
+ && testVQuadX<int16_t, 3>()
+ && testVQuadX<int16_t, 4>()
+ && test1QuadX<int64_t>()
+ && testVQuadX<int64_t, 2>()
+ && testVQuadX<int64_t, 3>()
+ && testVQuadX<int64_t, 4>()
+ && test1QuadX<uint>()
+ && testVQuadX<uint, 2>()
+ && testVQuadX<uint, 3>()
+ && testVQuadX<uint, 4>()
+ && test1QuadX<uint8_t>()
+ && testVQuadX<uint8_t, 2>()
+ && testVQuadX<uint8_t, 3>()
+ && testVQuadX<uint8_t, 4>()
+ && test1QuadX<uint16_t>()
+ && testVQuadX<uint16_t, 2>()
+ && testVQuadX<uint16_t, 3>()
+ && testVQuadX<uint16_t, 4>()
+ && test1QuadX<uint64_t>()
+ && testVQuadX<uint64_t, 2>()
+ && testVQuadX<uint64_t, 3>()
+ && testVQuadX<uint64_t, 4>()
+ && test1QuadX<bool>()
+ && testVQuadX<bool, 2>()
+ && testVQuadX<bool, 3>()
+ && testVQuadX<bool, 4>()
+ ;
+}
+
+void computeMain()
+{
+
+ outputBuffer.data[0] = true
+ && testQuadSwapX()
+ ;
+
+ // CHECK_GLSL: void main(
+ // CHECK_SPV: OpEntryPoint
+ // CHECK_HLSL: void computeMain(
+ // CHECK_CUDA: void computeMain(
+ // CHECK_CPP: void _computeMain(
+ // BUF: 1
+}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang
new file mode 100644
index 000000000..0e187c568
--- /dev/null
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang
@@ -0,0 +1,121 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
+//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly
+
+// not testing hlsl due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+// not testing cuda due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+// not testing cpp due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+#version 430
+
+//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
+buffer MyBlockName2
+{
+ uint data[];
+} outputBuffer;
+
+layout(local_size_x = 32) in;
+
+__generic<T : __BuiltinLogicalType>
+bool test1ShuffleX() {
+ return true
+ && subgroupShuffleUp(T(1), 1) == T(1)
+ && subgroupShuffleDown(T(1), 1) == T(1)
+ ;
+}
+__generic<T : __BuiltinLogicalType, let N : int>
+bool testVShuffleX() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupShuffleUp(gvec(T(1)), 1) == gvec(T(1))
+ && subgroupShuffleDown(gvec(T(1)), 1) == gvec(T(1))
+ ;
+}
+
+__generic<T : __BuiltinFloatingPointType>
+bool test1ShuffleX() {
+ return true
+ && subgroupShuffleUp(T(1), 1) == T(1)
+ && subgroupShuffleDown(T(1), 1) == T(1)
+ ;
+}
+__generic<T : __BuiltinFloatingPointType, let N : int>
+bool testVShuffleX() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupShuffleUp(gvec(T(1)), 1) == gvec(T(1))
+ && subgroupShuffleDown(gvec(T(1)), 1) == gvec(T(1))
+ ;
+}
+bool testShuffleX() {
+ return true
+ && test1ShuffleX<float>()
+ && testVShuffleX<float, 2>()
+ && testVShuffleX<float, 3>()
+ && testVShuffleX<float, 4>()
+ && test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support
+ && testVShuffleX<double, 2>()
+ && testVShuffleX<double, 3>()
+ && testVShuffleX<double, 4>()
+ && test1ShuffleX<half>()
+ && testVShuffleX<half, 2>()
+ && testVShuffleX<half, 3>()
+ && testVShuffleX<half, 4>()
+ && test1ShuffleX<int>()
+ && testVShuffleX<int, 2>()
+ && testVShuffleX<int, 3>()
+ && testVShuffleX<int, 4>()
+ && test1ShuffleX<int8_t>()
+ && testVShuffleX<int8_t, 2>()
+ && testVShuffleX<int8_t, 3>()
+ && testVShuffleX<int8_t, 4>()
+ && test1ShuffleX<int16_t>()
+ && testVShuffleX<int16_t, 2>()
+ && testVShuffleX<int16_t, 3>()
+ && testVShuffleX<int16_t, 4>()
+ && test1ShuffleX<int64_t>()
+ && testVShuffleX<int64_t, 2>()
+ && testVShuffleX<int64_t, 3>()
+ && testVShuffleX<int64_t, 4>()
+ && test1ShuffleX<uint>()
+ && testVShuffleX<uint, 2>()
+ && testVShuffleX<uint, 3>()
+ && testVShuffleX<uint, 4>()
+ && test1ShuffleX<uint8_t>()
+ && testVShuffleX<uint8_t, 2>()
+ && testVShuffleX<uint8_t, 3>()
+ && testVShuffleX<uint8_t, 4>()
+ && test1ShuffleX<uint16_t>()
+ && testVShuffleX<uint16_t, 2>()
+ && testVShuffleX<uint16_t, 3>()
+ && testVShuffleX<uint16_t, 4>()
+ && test1ShuffleX<uint64_t>()
+ && testVShuffleX<uint64_t, 2>()
+ && testVShuffleX<uint64_t, 3>()
+ && testVShuffleX<uint64_t, 4>()
+ && test1ShuffleX<bool>()
+ && testVShuffleX<bool, 2>()
+ && testVShuffleX<bool, 3>()
+ && testVShuffleX<bool, 4>()
+ ;
+}
+
+void computeMain()
+{
+ outputBuffer.data[0] = true
+ && testShuffleX()
+ ;
+
+ // CHECK_GLSL: void main(
+ // CHECK_SPV: OpEntryPoint
+ // CHECK_HLSL: void computeMain(
+ // CHECK_CUDA: void computeMain(
+ // CHECK_CPP: void _computeMain(
+ // BUF: 1
+}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang
new file mode 100644
index 000000000..5dca1a588
--- /dev/null
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang
@@ -0,0 +1,139 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
+//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly
+
+// not testing hlsl due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+// not testing cuda due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+// not testing cpp due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+#version 430
+
+#if 1 \
+ && !defined(TARGET_HLSL) \
+ && !defined(TARGET_CUDA)
+// hlsl is missing an implementation
+// cuda is missing an implementation
+#define TEST_when_subgroupShuffleXor_is_implemented
+#endif
+
+//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
+buffer MyBlockName2
+{
+ uint data[];
+} outputBuffer;
+
+layout(local_size_x = 32) in;
+
+__generic<T : __BuiltinLogicalType>
+bool test1ShuffleX() {
+ return true
+ && subgroupShuffle(T(1), 1) == T(1)
+#ifdef TEST_when_subgroupShuffleXor_is_implemented
+ && subgroupShuffleXor(T(1), 1) == T(1)
+#endif // #ifdef TEST_when_subgroupShuffleXor_is_implemented
+ ;
+}
+__generic<T : __BuiltinLogicalType, let N : int>
+bool testVShuffleX() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupShuffle(gvec(T(1)), 1) == gvec(T(1))
+#ifdef TEST_when_subgroupShuffleXor_is_implemented
+ && subgroupShuffleXor(gvec(T(1)), 1) == gvec(T(1))
+#endif // #ifdef TEST_when_subgroupShuffleXor_is_implemented
+ ;
+}
+
+__generic<T : __BuiltinFloatingPointType>
+bool test1ShuffleX() {
+ return true
+ && subgroupShuffle(T(1), 1) == T(1)
+#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
+ && subgroupShuffleXor(T(1), 1) == T(1)
+#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
+ ;
+}
+__generic<T : __BuiltinFloatingPointType, let N : int>
+bool testVShuffleX() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupShuffle(gvec(T(1)), 1) == gvec(T(1))
+#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
+ && subgroupShuffleXor(gvec(T(1)), 1) == gvec(T(1))
+#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
+ ;
+}
+bool testShuffleX() {
+ return true
+ && test1ShuffleX<float>()
+ && testVShuffleX<float, 2>()
+ && testVShuffleX<float, 3>()
+ && testVShuffleX<float, 4>()
+ && test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support
+ && testVShuffleX<double, 2>()
+ && testVShuffleX<double, 3>()
+ && testVShuffleX<double, 4>()
+ && test1ShuffleX<half>()
+ && testVShuffleX<half, 2>()
+ && testVShuffleX<half, 3>()
+ && testVShuffleX<half, 4>()
+ && test1ShuffleX<int>()
+ && testVShuffleX<int, 2>()
+ && testVShuffleX<int, 3>()
+ && testVShuffleX<int, 4>()
+ && test1ShuffleX<int8_t>()
+ && testVShuffleX<int8_t, 2>()
+ && testVShuffleX<int8_t, 3>()
+ && testVShuffleX<int8_t, 4>()
+ && test1ShuffleX<int16_t>()
+ && testVShuffleX<int16_t, 2>()
+ && testVShuffleX<int16_t, 3>()
+ && testVShuffleX<int16_t, 4>()
+ && test1ShuffleX<int64_t>()
+ && testVShuffleX<int64_t, 2>()
+ && testVShuffleX<int64_t, 3>()
+ && testVShuffleX<int64_t, 4>()
+ && test1ShuffleX<uint>()
+ && testVShuffleX<uint, 2>()
+ && testVShuffleX<uint, 3>()
+ && testVShuffleX<uint, 4>()
+ && test1ShuffleX<uint8_t>()
+ && testVShuffleX<uint8_t, 2>()
+ && testVShuffleX<uint8_t, 3>()
+ && testVShuffleX<uint8_t, 4>()
+ && test1ShuffleX<uint16_t>()
+ && testVShuffleX<uint16_t, 2>()
+ && testVShuffleX<uint16_t, 3>()
+ && testVShuffleX<uint16_t, 4>()
+ && test1ShuffleX<uint64_t>()
+ && testVShuffleX<uint64_t, 2>()
+ && testVShuffleX<uint64_t, 3>()
+ && testVShuffleX<uint64_t, 4>()
+ && test1ShuffleX<bool>()
+ && testVShuffleX<bool, 2>()
+ && testVShuffleX<bool, 3>()
+ && testVShuffleX<bool, 4>()
+ ;
+}
+
+
+void computeMain()
+{
+
+ outputBuffer.data[0] = true
+ && testShuffleX()
+ ;
+
+ // CHECK_GLSL: void main(
+ // CHECK_SPV: OpEntryPoint
+ // CHECK_HLSL: void computeMain(
+ // CHECK_CUDA: void computeMain(
+ // CHECK_CPP: void _computeMain(
+ // BUF: 1
+}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang
new file mode 100644
index 000000000..bcd4aeb56
--- /dev/null
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang
@@ -0,0 +1,167 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
+//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly
+//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+
+// not testing cuda due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+// not testing cpp due to missing impl
+//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+#version 430
+
+//TEST_INPUT:ubuffer(data=[9], stride=4):name=inputBuffer
+buffer MyBlockName
+{
+ uint data[];
+} inputBuffer;
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0], stride=4):out,name=outputBuffer
+buffer MyBlockName2
+{
+ uint data[];
+} outputBuffer;
+
+layout(local_size_x = 32) in;
+
+__generic<T : __BuiltinLogicalType>
+bool test1AllEqual() {
+ return true
+ && subgroupAllEqual(T(1)) == true
+ && subgroupAllEqual(T(gl_GlobalInvocationID.x)) == false
+ ;
+}
+__generic<T : __BuiltinLogicalType, let N : int>
+bool testVAllEqual() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupAllEqual(gvec(T(1))) == true
+ && subgroupAllEqual(gvec(T(gl_GlobalInvocationID.x))) == false
+ ;
+}
+
+__generic<T : __BuiltinFloatingPointType>
+bool test1AllEqual() {
+ return true
+ && subgroupAllEqual(T(1)) == true
+ && subgroupAllEqual(T(gl_GlobalInvocationID.x)) == false
+ ;
+}
+__generic<T : __BuiltinFloatingPointType, let N : int>
+bool testVAllEqual() {
+ typealias gvec = vector<T, N>;
+
+ return true
+ && subgroupAllEqual(gvec(T(1))) == true
+ && subgroupAllEqual(gvec(T(gl_GlobalInvocationID.x))) == false
+ ;
+}
+bool testAllEqual() {
+ return true
+ && test1AllEqual<float>()
+ && testVAllEqual<float, 2>()
+ && testVAllEqual<float, 3>()
+ && testVAllEqual<float, 4>()
+ && test1AllEqual<double>() // WARNING: intel GPU's lack FP64 support
+ && testVAllEqual<double, 2>()
+ && testVAllEqual<double, 3>()
+ && testVAllEqual<double, 4>()
+ && test1AllEqual<half>()
+ && testVAllEqual<half, 2>()
+ && testVAllEqual<half, 3>()
+ && testVAllEqual<half, 4>()
+ && test1AllEqual<int>()
+ && testVAllEqual<int, 2>()
+ && testVAllEqual<int, 3>()
+ && testVAllEqual<int, 4>()
+ && test1AllEqual<int8_t>()
+ && testVAllEqual<int8_t, 2>()
+ && testVAllEqual<int8_t, 3>()
+ && testVAllEqual<int8_t, 4>()
+ && test1AllEqual<int16_t>()
+ && testVAllEqual<int16_t, 2>()
+ && testVAllEqual<int16_t, 3>()
+ && testVAllEqual<int16_t, 4>()
+ && test1AllEqual<int64_t>()
+ && testVAllEqual<int64_t, 2>()
+ && testVAllEqual<int64_t, 3>()
+ && testVAllEqual<int64_t, 4>()
+ && test1AllEqual<uint>()
+ && testVAllEqual<uint, 2>()
+ && testVAllEqual<uint, 3>()
+ && testVAllEqual<uint, 4>()
+ && test1AllEqual<uint8_t>()
+ && testVAllEqual<uint8_t, 2>()
+ && testVAllEqual<uint8_t, 3>()
+ && testVAllEqual<uint8_t, 4>()
+ && test1AllEqual<uint16_t>()
+ && testVAllEqual<uint16_t, 2>()
+ && testVAllEqual<uint16_t, 3>()
+ && testVAllEqual<uint16_t, 4>()
+ && test1AllEqual<uint64_t>()
+ && testVAllEqual<uint64_t, 2>()
+ && testVAllEqual<uint64_t, 3>()
+ && testVAllEqual<uint64_t, 4>()
+ && test1AllEqual<bool>()
+ && testVAllEqual<bool, 2>()
+ && testVAllEqual<bool, 3>()
+ && testVAllEqual<bool, 4>()
+ ;
+}
+
+void computeMain()
+{
+ //seperate tests since testing concurrency
+
+ // one is true, rest false, positive
+ outputBuffer.data[0] = 1;
+ bool t1 = inputBuffer.data[0] == gl_GlobalInvocationID.x;
+ if (subgroupAny(t1)) {
+ subgroupBarrier();
+ outputBuffer.data[0] = 2;
+ }
+
+ // all false, negative
+ outputBuffer.data[1] = 1;
+ t1 = false;
+ if (!subgroupAny(t1)) {
+ subgroupBarrier();
+ outputBuffer.data[1] = 2;
+ }
+
+ // all true, positive
+ outputBuffer.data[2] = 1;
+ t1 = true;
+ if (subgroupAll(t1)) {
+ subgroupBarrier();
+ outputBuffer.data[2] = 2;
+ }
+
+ // all false, negative
+ outputBuffer.data[3] = 1;
+ t1 = false;
+ if (!subgroupAll(t1)) {
+ subgroupBarrier();
+ outputBuffer.data[3] = 2;
+ }
+
+ outputBuffer.data[4] = 1;
+
+ if (testAllEqual()) {
+ subgroupBarrier();
+ outputBuffer.data[4] = 2;
+ }
+
+ // CHECK_GLSL: void main(
+ // CHECK_SPV: OpEntryPoint
+ // CHECK_HLSL: void computeMain(
+ // CHECK_CUDA: void computeMain(
+ // CHECK_CPP: void _computeMain(
+ // BUF: 2
+ // BUF-NEXT: 2
+ // BUF-NEXT: 2
+ // BUF-NEXT: 2
+ // BUF-NEXT: 2
+}