diff options
| -rw-r--r-- | docs/user-guide/a3-02-reference-capability-atoms.md | 12 | ||||
| -rw-r--r-- | docs/wave-intrinsics.md | 14 | ||||
| -rw-r--r-- | source/slang/glsl.meta.slang | 70 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 149 | ||||
| -rw-r--r-- | source/slang/slang-capabilities.capdef | 20 | ||||
| -rw-r--r-- | tests/diagnostics/wave-operations-types.slang | 14 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang | 133 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang | 134 |
8 files changed, 507 insertions, 39 deletions
diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index 296614716..a9455761c 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -407,6 +407,9 @@ Extensions `SPV_NV_shader_subgroup_partitioned` > Represents the SPIR-V extension for shader subgroup partitioned. +`SPV_KHR_subgroup_rotate` +> Represents the SPIR-V extension enables rotating values across invocations within a subgroup. + `SPV_NV_ray_tracing_motion_blur` > Represents the SPIR-V extension for ray tracing motion blur. @@ -501,6 +504,9 @@ Extensions `spvGroupNonUniformPartitionedNV` > Represents the SPIR-V capability for group non-uniform partitioned operations. +`spvGroupNonUniformRotateKHR` +> Represents the SPIR-V capability for group non-uniform rotate operations. + `spvRayTracingMotionBlurNV` > Represents the SPIR-V capability for ray tracing motion blur. @@ -699,6 +705,9 @@ Extensions `GL_KHR_shader_subgroup_vote` > Represents the GL_KHR_shader_subgroup_vote extension. +`GL_KHR_shader_subgroup_rotate` +> Represents the GL_KHR_shader_subgroup_rotate extension. + `GL_NV_compute_shader_derivatives` > Represents the GL_NV_compute_shader_derivatives extension. @@ -1132,6 +1141,9 @@ Compound Capabilities `subgroup_partitioned` > Capabilities required to use GLSL-style subgroup operations 'subgroup_partitioned' +`subgroup_rotate` +> Capabilities required to use GLSL-style subgroup rotate operations 'subgroup_rotate' + `atomic_glsl_hlsl_nvapi_cuda_metal_float1` > (All implemented targets) Capabilities required to use atomic operations of GLSL tier-1 float atomics diff --git a/docs/wave-intrinsics.md b/docs/wave-intrinsics.md index aa46f72a1..7f6fb7b77 100644 --- a/docs/wave-intrinsics.md +++ b/docs/wave-intrinsics.md @@ -236,6 +236,20 @@ void GroupMemoryBarrierWithWaveSync(); Synchronizes all lanes to the same GroupMemoryBarrierWithWaveSync in program flow. Orders group shared memory accesses such that accesses after the barrier can be seen by writes before. +Wave Rotate Intrinsics +====================== + +These intrinsics are specific to Slang and were added to support the subgroup rotate functionalities provided by SPIRV (through the `GroupNonUniformRotateKHR` capability), GLSL (through the `GL_KHR_shader_subgroup_rotate +` extension), and Metal. + +``` +// Supported on SPIRV, GLSL, and Metal targets. +T WaveRotate(T value, uint delta); + +// Supported on SPIRV and GLSL targets. +T WaveClusteredRotate(T value, uint delta, constexpr uint clusterSize); +``` + Wave Mask Intrinsics ==================== diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index bbf0c40dd..85c8b174c 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -6110,45 +6110,6 @@ public void traceRayMotionNV( } } -__generic<T : __BuiltinType> -[ForceInline] -void typeRequireChecks_shader_subgroup_GLSL() { - // the following is a seperate function call, since else the `__requireTargetExtension` and associated __intrinsic_asm is ignored if the calling function also calls an __intrinsic_asm - __target_switch - { - case glsl: - if (__type_equals<T, half>() - || __type_equals<T, float16_t>() - ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16"); - else if (__type_equals<T, uint8_t>() - || __type_equals<T, int8_t>() - ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int8"); - else if (__type_equals<T, uint16_t>() - || __type_equals<T, int16_t>() - ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int16"); - else if (__type_equals<T, uint64_t>() - || __type_equals<T, int64_t>() - ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int64"); - - __intrinsic_asm ""; - } -} - -__generic<T : __BuiltinType> -void shader_subgroup_preamble() { - // checks needed for shader_subgroup functions; __requireTargetExtension does not work - // (does not add the ext specified correctly to the compile output; using extended type - // will result in error for using the type) - __target_switch - { - case glsl: - typeRequireChecks_shader_subgroup_GLSL<T>(); - default: - return; - } - -} - // GL_KHR_shader_subgroup_basic Built-in Variables [require(cpp_cuda_glsl_hlsl_spirv_wgsl, subgroup_basic)] @@ -8176,6 +8137,37 @@ public vector<T,N> subgroupQuadSwapDiagonal(vector<T,N> value) return QuadReadAcrossDiagonal(value); } +// GL_KHR_shader_subgroup_rotate + +__generic<T : __BuiltinType> +[require(glsl_metal_spirv, subgroup_rotate)] +public T subgroupRotate(T value, uint delta) +{ + return WaveRotate(value, delta); +} + +__generic<T : __BuiltinType, let N : int> +[require(glsl_metal_spirv, subgroup_rotate)] +public vector<T, N> subgroupRotate(vector<T, N> value, uint delta) +{ + return WaveRotate(value, delta); +} + +__generic<T : __BuiltinType> +[require(glsl_spirv, subgroup_rotate)] +public T subgroupClusteredRotate(T value, uint delta, constexpr uint clusterSize) +{ + return WaveClusteredRotate(value, delta, clusterSize); + +} + +__generic<T : __BuiltinType, let N : int> +[require(glsl_spirv, subgroup_rotate)] +public vector<T, N> subgroupClusteredRotate(vector<T, N> value, uint delta, constexpr uint clusterSize) +{ + return WaveClusteredRotate(value, delta, clusterSize); +} + //// GLSL atomic // The following type internally is a Shader Storage Buffer diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index c8a2c8c58..03321bfaf 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -16368,6 +16368,155 @@ bool IsHelperLane() } } +//@hidden: + +__generic<T : __BuiltinType> +[ForceInline] +[require(glsl)] +void __requireGLSLShaderSubgroupTypeExtension() +{ + // the following is a seperate function call, since else the `__requireTargetExtension` and associated __intrinsic_asm is ignored if the calling function also calls an __intrinsic_asm + if (__type_equals<T, half>() + || __type_equals<T, float16_t>() + ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16"); + else if (__type_equals<T, uint8_t>() + || __type_equals<T, int8_t>() + ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int8"); + else if (__type_equals<T, uint16_t>() + || __type_equals<T, int16_t>() + ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int16"); + else if (__type_equals<T, uint64_t>() + || __type_equals<T, int64_t>() + ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int64"); + + __intrinsic_asm ""; +} + +__generic<T : __BuiltinType> +[ForceInline] +[require(metal)] +void __checkMetalShaderSubgroupType() +{ + // These builtin types are not supported for Metal's `simd` operations. + if (__type_equals<T, uint8_t>() + || __type_equals<T, int8_t>() + || __type_equals<T, uint64_t>() + || __type_equals<T, int64_t>() + || __isBool<T>() + ) + { + static_assert(false, "Unsupported type for subgroup operations in Metal. Valid types include scalars and vectors of uint/uint32_t, int/int32_t, uint16_t, int16_t, float, and half."); + } +} + +__generic<T : __BuiltinType> +void shader_subgroup_preamble() +{ + // checks needed for shader_subgroup functions; __requireTargetExtension does not work + // (does not add the ext specified correctly to the compile output; using extended type + // will result in error for using the type) + __target_switch + { + case glsl: + __requireGLSLShaderSubgroupTypeExtension<T>(); + case metal: + __checkMetalShaderSubgroupType<T>(); + default: + return; + } +} + +//@public: + +// +// Wave Rotate intrinsics. +// These are Slang specific intrinsics to rotate values within a subgroup. +// + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_rotate) +[require(glsl_metal_spirv, subgroup_rotate)] +T WaveRotate(T value, uint delta) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupRotate"; + case metal: + __intrinsic_asm "simd_shuffle_rotate_down"; + case spirv: + return spirv_asm + { + OpExtension "SPV_KHR_subgroup_rotate"; + OpCapability GroupNonUniformRotateKHR; + result:$$T = OpGroupNonUniformRotateKHR Subgroup $value $delta; + }; + } +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_rotate) +[require(glsl_metal_spirv, subgroup_rotate)] +vector<T, N> WaveRotate(vector<T, N> value, uint delta) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupRotate"; + case metal: + __intrinsic_asm "simd_shuffle_rotate_down"; + case spirv: + return spirv_asm + { + OpExtension "SPV_KHR_subgroup_rotate"; + OpCapability GroupNonUniformRotateKHR; + result:$$vector<T,N> = OpGroupNonUniformRotateKHR Subgroup $value $delta; + }; + } +} + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_rotate) +[require(glsl_spirv, subgroup_rotate)] +T WaveClusteredRotate(T value, uint delta, constexpr uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredRotate"; + case spirv: + return spirv_asm + { + OpExtension "SPV_KHR_subgroup_rotate"; + OpCapability GroupNonUniformRotateKHR; + result:$$T = OpGroupNonUniformRotateKHR Subgroup $value $delta $clusterSize; + }; + } +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_rotate) +[require(glsl_spirv, subgroup_rotate)] +vector<T, N> WaveClusteredRotate(vector<T, N> value, uint delta, constexpr uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredRotate"; + case spirv: + return spirv_asm + { + OpExtension "SPV_KHR_subgroup_rotate"; + OpCapability GroupNonUniformRotateKHR; + result:$$vector<T,N> = OpGroupNonUniformRotateKHR Subgroup $value $delta $clusterSize; + }; + } +} + // // Quad Control intrinsics // diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index b62de0f08..f4ae94978 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -513,6 +513,10 @@ def SPV_KHR_shader_clock : _spirv_1_0; /// [EXT] def SPV_NV_shader_subgroup_partitioned : _spirv_1_0; +/// Represents the SPIR-V extension enables rotating values across invocations within a subgroup. +/// [EXT] +def SPV_KHR_subgroup_rotate : _spirv_1_3; + /// Represents the SPIR-V extension for ray tracing motion blur. /// [EXT] def SPV_NV_ray_tracing_motion_blur : _spirv_1_0; @@ -640,6 +644,10 @@ def spvGroupNonUniformVote : _spirv_1_3; /// [EXT] def spvGroupNonUniformPartitionedNV : _spirv_1_3 + SPV_NV_shader_subgroup_partitioned; +/// Represents the SPIR-V capability for group non-uniform rotate operations. +/// [EXT] +def spvGroupNonUniformRotateKHR : _spirv_1_3; + /// Represents the SPIR-V capability for ray tracing motion blur. /// [EXT] def spvRayTracingMotionBlurNV : SPV_NV_ray_tracing_motion_blur; @@ -777,6 +785,7 @@ def _GL_KHR_shader_subgroup_quad : _GLSL_140; def _GL_KHR_shader_subgroup_shuffle : _GLSL_140; def _GL_KHR_shader_subgroup_shuffle_relative : _GLSL_140; def _GL_KHR_shader_subgroup_vote : _GLSL_140; +def _GL_KHR_shader_subgroup_rotate : _GLSL_140; def _GL_NV_compute_shader_derivatives : _GLSL_450; def _GL_NV_fragment_shader_barycentric : _GL_EXT_fragment_shader_barycentric; @@ -982,6 +991,10 @@ alias GL_KHR_shader_subgroup_shuffle_relative = _GL_KHR_shader_subgroup_shuffle_ /// [EXT] alias GL_KHR_shader_subgroup_vote = _GL_KHR_shader_subgroup_vote | spvGroupNonUniformVote; +/// Represents the GL_KHR_shader_subgroup_rotate extension. +/// [EXT] +alias GL_KHR_shader_subgroup_rotate = _GL_KHR_shader_subgroup_rotate | spvGroupNonUniformRotateKHR; + /// Represents the GL_NV_compute_shader_derivatives extension. /// [EXT] alias GL_NV_compute_shader_derivatives = _GL_NV_compute_shader_derivatives | SPV_KHR_compute_shader_derivatives | _sm_6_6; @@ -2069,6 +2082,13 @@ alias subgroup_quad = GL_KHR_shader_subgroup_quad /// [Compound] alias subgroup_partitioned = GL_NV_shader_subgroup_partitioned + subgroup_ballot_activemask | _sm_6_5 | _cuda_sm_7_0; + +/// Capabilities required to use GLSL-style subgroup rotate operations 'subgroup_rotate' +/// [Compound] +alias subgroup_rotate = GL_KHR_shader_subgroup_rotate + | metal + ; + /// (All implemented targets) Capabilities required to use atomic operations of GLSL tier-1 float atomics /// [Compound] alias atomic_glsl_hlsl_nvapi_cuda_metal_float1 = atomic_glsl_float1 | hlsl_nvapi + _sm_4_0 | _cuda_sm_2_0 | metal; diff --git a/tests/diagnostics/wave-operations-types.slang b/tests/diagnostics/wave-operations-types.slang new file mode 100644 index 000000000..55a6a8e91 --- /dev/null +++ b/tests/diagnostics/wave-operations-types.slang @@ -0,0 +1,14 @@ +//DIAGNOSTIC_TEST:SIMPLE(filecheck=CHECK): -entry computeMain -stage compute -target metal + +RWStructuredBuffer<uint> out; + +[shader("compute")] +void computeMain(uint3 dispatchID : SV_DispatchThreadID) +{ + // CHECK: Unsupported type for subgroup operations in Metal. Valid types include + // CHECK: Unsupported type for subgroup operations in Metal. Valid types include + // CHECK: Unsupported type for subgroup operations in Metal. Valid types include + out[0] = WaveRotate(true, 1); + out[1] = WaveRotate(uint8_t(dispatchID.x), 1); + out[2] = WaveRotate(uint64_t(dispatchID.x), 1); +} diff --git a/tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang b/tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang new file mode 100644 index 000000000..d52384c15 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang @@ -0,0 +1,133 @@ +//TEST_CATEGORY(wave, compute) +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl + +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly -xslang -DUSE_GLSL_SYNTAX -allow-glsl +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl -xslang -DUSE_GLSL_SYNTAX -allow-glsl + +#if defined(USE_GLSL_SYNTAX) +#define __clusteredRotate subgroupClusteredRotate +#else +#define __clusteredRotate WaveClusteredRotate +#endif + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer +RWStructuredBuffer<uint> outputBuffer; + +#define SUBGROUP_SIZE 32 +#define DELTA 3 +#define CLUSTER_SIZE 8 + +static uint threadIndex; +static uint clusterIndex; +static uint rotatedValue; + +__generic<T : __BuiltinArithmeticType> +bool test1ClusteredRotate() +{ + return __clusteredRotate(T(threadIndex), DELTA, CLUSTER_SIZE) == T(rotatedValue); +} + +__generic<T : __BuiltinArithmeticType, let N : int> +bool testVRClusteredRotate() +{ + typealias gvec = vector<T, N>; + +#if defined(USE_GLSL_SYNTAX) + return (__clusteredRotate(gvec(T(threadIndex)), DELTA, CLUSTER_SIZE) == gvec(T(rotatedValue))); +#else + return (__clusteredRotate(gvec(T(threadIndex)), DELTA, CLUSTER_SIZE) == gvec(T(rotatedValue)))[0]; +#endif +} + +bool test1ClusteredRotateBool() +{ + bool currentValue = (threadIndex % 2 == 0) ? true : false; + bool rotatedValueBool = (threadIndex % 2 == 0) ? false : true; + return __clusteredRotate(currentValue, DELTA, CLUSTER_SIZE) == rotatedValueBool; +} + +__generic<let N : int> +bool testVRClusteredRotateBool() +{ + typealias gvec = vector<bool, N>; + bool currentValue = (threadIndex % 2 == 0) ? true : false; + bool rotatedValueBool = (threadIndex % 2 == 0) ? false : true; + +#if defined(USE_GLSL_SYNTAX) + return (__clusteredRotate(gvec(currentValue), DELTA, CLUSTER_SIZE) == gvec(rotatedValueBool)); +#else + return (__clusteredRotate(gvec(currentValue), DELTA, CLUSTER_SIZE) == gvec(rotatedValueBool))[0]; +#endif +} + +bool testClusteredRotate() +{ + return true + & test1ClusteredRotate<float>() + & testVRClusteredRotate<float, 2>() + & testVRClusteredRotate<float, 3>() + & testVRClusteredRotate<float, 4>() + & test1ClusteredRotate<half>() + & testVRClusteredRotate<half, 2>() + & testVRClusteredRotate<half, 3>() + & testVRClusteredRotate<half, 4>() + & test1ClusteredRotate<uint>() + & testVRClusteredRotate<uint, 2>() + & testVRClusteredRotate<uint, 3>() + & testVRClusteredRotate<uint, 4>() + & test1ClusteredRotate<uint16_t>() + & testVRClusteredRotate<uint16_t, 2>() + & testVRClusteredRotate<uint16_t, 3>() + & testVRClusteredRotate<uint16_t, 4>() + & test1ClusteredRotate<int>() + & testVRClusteredRotate<int, 2>() + & testVRClusteredRotate<int, 3>() + & testVRClusteredRotate<int, 4>() + & test1ClusteredRotate<int16_t>() + & testVRClusteredRotate<int16_t, 2>() + & testVRClusteredRotate<int16_t, 3>() + & testVRClusteredRotate<int16_t, 4>() + & test1ClusteredRotate<uint8_t>() + & testVRClusteredRotate<uint8_t, 2>() + & testVRClusteredRotate<uint8_t, 3>() + & testVRClusteredRotate<uint8_t, 4>() + & test1ClusteredRotate<uint64_t>() + & testVRClusteredRotate<uint64_t, 2>() + & testVRClusteredRotate<uint64_t, 3>() + & testVRClusteredRotate<uint64_t, 4>() + & test1ClusteredRotate<int8_t>() + & testVRClusteredRotate<int8_t, 2>() + & testVRClusteredRotate<int8_t, 3>() + & testVRClusteredRotate<int8_t, 4>() + & test1ClusteredRotate<int64_t>() + & testVRClusteredRotate<int64_t, 2>() + & testVRClusteredRotate<int64_t, 3>() + & testVRClusteredRotate<int64_t, 4>() + & test1ClusteredRotateBool() + & testVRClusteredRotateBool<2>() + & testVRClusteredRotateBool<3>() + & testVRClusteredRotateBool<4>() + ; +} + +[shader("compute")] +[numthreads(SUBGROUP_SIZE, 1, 1)] +void computeMain(uint3 dispatchID : SV_DispatchThreadID) +{ + threadIndex = dispatchID.x; + clusterIndex = dispatchID.x % CLUSTER_SIZE; + + // Determine expected value of clustered rotate in current invocation. + // The values passed in are global invocation ids, and we rotate them withina cluster of size `CLUSTER_SIZE`. + uint clusterStart = (threadIndex / CLUSTER_SIZE) * CLUSTER_SIZE; + rotatedValue = clusterStart + ((threadIndex - clusterStart + DELTA) % CLUSTER_SIZE); + + bool result = true + & testClusteredRotate() + ; + + // CHECK: 1 + outputBuffer[0] = uint(result); +} + diff --git a/tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang b/tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang new file mode 100644 index 000000000..4b815c265 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang @@ -0,0 +1,134 @@ +//TEST_CATEGORY(wave, compute) +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-metal -compute -shaderobj -xslang -DMETAL + + +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly -xslang -DUSE_GLSL_SYNTAX -allow-glsl +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl -xslang -DUSE_GLSL_SYNTAX -allow-glsl +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-metal -compute -shaderobj -xslang -DMETAL -xslang -DUSE_GLSL_SYNTAX -allow-glsl + + +#if defined(USE_GLSL_SYNTAX) +#define __rotate subgroupRotate +#else +#define __rotate WaveRotate +#endif + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer +RWStructuredBuffer<uint> outputBuffer; + +#define SUBGROUP_SIZE 32 +#define DELTA 3 + +static uint threadIndex; +static uint rotatedValue; + +__generic<T : __BuiltinArithmeticType> +bool test1Rotate() +{ + return __rotate(T(threadIndex), DELTA) == T(rotatedValue); +} + +__generic<T : __BuiltinArithmeticType, let N : int> +bool testVRotate() +{ + typealias gvec = vector<T, N>; + +#if defined(USE_GLSL_SYNTAX) + return (__rotate(gvec(T(threadIndex)), DELTA) == gvec(T(rotatedValue))); +#else + return (__rotate(gvec(T(threadIndex)), DELTA) == gvec(T(rotatedValue)))[0]; +#endif +} + +bool test1RotateBool() +{ + bool currentValue = (threadIndex % 2 == 0) ? true : false; + bool rotatedValueBool = (threadIndex % 2 == 0) ? false : true; + return __rotate(currentValue, DELTA) == rotatedValueBool; +} + +__generic<let N : int> +bool testVRotateBool() +{ + typealias gvec = vector<bool, N>; + bool currentValue = (threadIndex % 2 == 0) ? true : false; + bool rotatedValueBool = (threadIndex % 2 == 0) ? false : true; + +#if defined(USE_GLSL_SYNTAX) + return (__rotate(gvec(currentValue), DELTA) == gvec(rotatedValueBool)); +#else + return (__rotate(gvec(currentValue), DELTA) == gvec(rotatedValueBool))[0]; +#endif +} + +bool testRotate() +{ + return true + & test1Rotate<float>() + & testVRotate<float, 2>() + & testVRotate<float, 3>() + & testVRotate<float, 4>() + & test1Rotate<half>() + & testVRotate<half, 2>() + & testVRotate<half, 3>() + & testVRotate<half, 4>() + & test1Rotate<uint>() + & testVRotate<uint, 2>() + & testVRotate<uint, 3>() + & testVRotate<uint, 4>() + & test1Rotate<uint16_t>() + & testVRotate<uint16_t, 2>() + & testVRotate<uint16_t, 3>() + & testVRotate<uint16_t, 4>() + & test1Rotate<int>() + & testVRotate<int, 2>() + & testVRotate<int, 3>() + & testVRotate<int, 4>() + & test1Rotate<int16_t>() + & testVRotate<int16_t, 2>() + & testVRotate<int16_t, 3>() + & testVRotate<int16_t, 4>() + + // Subgroup rotate operations on these builtin types are not supported on Metal. +#if !defined(METAL) + & test1Rotate<uint8_t>() + & testVRotate<uint8_t, 2>() + & testVRotate<uint8_t, 3>() + & testVRotate<uint8_t, 4>() + & test1Rotate<uint64_t>() + & testVRotate<uint64_t, 2>() + & testVRotate<uint64_t, 3>() + & testVRotate<uint64_t, 4>() + & test1Rotate<int8_t>() + & testVRotate<int8_t, 2>() + & testVRotate<int8_t, 3>() + & testVRotate<int8_t, 4>() + & test1Rotate<int64_t>() + & testVRotate<int64_t, 2>() + & testVRotate<int64_t, 3>() + & testVRotate<int64_t, 4>() + & test1RotateBool() + & testVRotateBool<2>() + & testVRotateBool<3>() + & testVRotateBool<4>() +#endif + ; +} + +[shader("compute")] +[numthreads(SUBGROUP_SIZE, 1, 1)] +void computeMain(uint3 dispatchID : SV_DispatchThreadID) +{ + threadIndex = dispatchID.x; + rotatedValue = (threadIndex + DELTA) % SUBGROUP_SIZE; + + bool result = true + & testRotate() + ; + + // CHECK: 1 + outputBuffer[0] = uint(result); +} + |
