diff options
| author | Darren Wihandi <65404740+fairywreath@users.noreply.github.com> | 2025-04-22 14:04:56 -0600 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-04-22 20:04:56 +0000 |
| commit | ed5940a629ae05e9571bfe355d22f0728347dcb4 (patch) | |
| tree | 90a36c6543f0ee3748b80112a478897b027dddab /source | |
| parent | d5220b327632a8aeeb9a89494bb37bd82fec30cb (diff) | |
Implement shader subgroup rotate intrinsics (#6878)
* Initial implementation for SPIRV, GLSL and Metal
* test add bool test
* Fix and improve subgroup rotate tests
* Add proper GLSL extensions and proper Metal type checking
* Clean up tests and add diagnostics test for subgroup type for Metal
* Update wave-intrinsics docs
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/glsl.meta.slang | 70 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 149 | ||||
| -rw-r--r-- | source/slang/slang-capabilities.capdef | 20 |
3 files changed, 200 insertions, 39 deletions
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index bbf0c40dd..85c8b174c 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -6110,45 +6110,6 @@ public void traceRayMotionNV( } } -__generic<T : __BuiltinType> -[ForceInline] -void typeRequireChecks_shader_subgroup_GLSL() { - // the following is a seperate function call, since else the `__requireTargetExtension` and associated __intrinsic_asm is ignored if the calling function also calls an __intrinsic_asm - __target_switch - { - case glsl: - if (__type_equals<T, half>() - || __type_equals<T, float16_t>() - ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16"); - else if (__type_equals<T, uint8_t>() - || __type_equals<T, int8_t>() - ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int8"); - else if (__type_equals<T, uint16_t>() - || __type_equals<T, int16_t>() - ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int16"); - else if (__type_equals<T, uint64_t>() - || __type_equals<T, int64_t>() - ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int64"); - - __intrinsic_asm ""; - } -} - -__generic<T : __BuiltinType> -void shader_subgroup_preamble() { - // checks needed for shader_subgroup functions; __requireTargetExtension does not work - // (does not add the ext specified correctly to the compile output; using extended type - // will result in error for using the type) - __target_switch - { - case glsl: - typeRequireChecks_shader_subgroup_GLSL<T>(); - default: - return; - } - -} - // GL_KHR_shader_subgroup_basic Built-in Variables [require(cpp_cuda_glsl_hlsl_spirv_wgsl, subgroup_basic)] @@ -8176,6 +8137,37 @@ public vector<T,N> subgroupQuadSwapDiagonal(vector<T,N> value) return QuadReadAcrossDiagonal(value); } +// GL_KHR_shader_subgroup_rotate + +__generic<T : __BuiltinType> +[require(glsl_metal_spirv, subgroup_rotate)] +public T subgroupRotate(T value, uint delta) +{ + return WaveRotate(value, delta); +} + +__generic<T : __BuiltinType, let N : int> +[require(glsl_metal_spirv, subgroup_rotate)] +public vector<T, N> subgroupRotate(vector<T, N> value, uint delta) +{ + return WaveRotate(value, delta); +} + +__generic<T : __BuiltinType> +[require(glsl_spirv, subgroup_rotate)] +public T subgroupClusteredRotate(T value, uint delta, constexpr uint clusterSize) +{ + return WaveClusteredRotate(value, delta, clusterSize); + +} + +__generic<T : __BuiltinType, let N : int> +[require(glsl_spirv, subgroup_rotate)] +public vector<T, N> subgroupClusteredRotate(vector<T, N> value, uint delta, constexpr uint clusterSize) +{ + return WaveClusteredRotate(value, delta, clusterSize); +} + //// GLSL atomic // The following type internally is a Shader Storage Buffer diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index c8a2c8c58..03321bfaf 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -16368,6 +16368,155 @@ bool IsHelperLane() } } +//@hidden: + +__generic<T : __BuiltinType> +[ForceInline] +[require(glsl)] +void __requireGLSLShaderSubgroupTypeExtension() +{ + // the following is a seperate function call, since else the `__requireTargetExtension` and associated __intrinsic_asm is ignored if the calling function also calls an __intrinsic_asm + if (__type_equals<T, half>() + || __type_equals<T, float16_t>() + ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16"); + else if (__type_equals<T, uint8_t>() + || __type_equals<T, int8_t>() + ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int8"); + else if (__type_equals<T, uint16_t>() + || __type_equals<T, int16_t>() + ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int16"); + else if (__type_equals<T, uint64_t>() + || __type_equals<T, int64_t>() + ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int64"); + + __intrinsic_asm ""; +} + +__generic<T : __BuiltinType> +[ForceInline] +[require(metal)] +void __checkMetalShaderSubgroupType() +{ + // These builtin types are not supported for Metal's `simd` operations. + if (__type_equals<T, uint8_t>() + || __type_equals<T, int8_t>() + || __type_equals<T, uint64_t>() + || __type_equals<T, int64_t>() + || __isBool<T>() + ) + { + static_assert(false, "Unsupported type for subgroup operations in Metal. Valid types include scalars and vectors of uint/uint32_t, int/int32_t, uint16_t, int16_t, float, and half."); + } +} + +__generic<T : __BuiltinType> +void shader_subgroup_preamble() +{ + // checks needed for shader_subgroup functions; __requireTargetExtension does not work + // (does not add the ext specified correctly to the compile output; using extended type + // will result in error for using the type) + __target_switch + { + case glsl: + __requireGLSLShaderSubgroupTypeExtension<T>(); + case metal: + __checkMetalShaderSubgroupType<T>(); + default: + return; + } +} + +//@public: + +// +// Wave Rotate intrinsics. +// These are Slang specific intrinsics to rotate values within a subgroup. +// + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_rotate) +[require(glsl_metal_spirv, subgroup_rotate)] +T WaveRotate(T value, uint delta) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupRotate"; + case metal: + __intrinsic_asm "simd_shuffle_rotate_down"; + case spirv: + return spirv_asm + { + OpExtension "SPV_KHR_subgroup_rotate"; + OpCapability GroupNonUniformRotateKHR; + result:$$T = OpGroupNonUniformRotateKHR Subgroup $value $delta; + }; + } +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_rotate) +[require(glsl_metal_spirv, subgroup_rotate)] +vector<T, N> WaveRotate(vector<T, N> value, uint delta) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupRotate"; + case metal: + __intrinsic_asm "simd_shuffle_rotate_down"; + case spirv: + return spirv_asm + { + OpExtension "SPV_KHR_subgroup_rotate"; + OpCapability GroupNonUniformRotateKHR; + result:$$vector<T,N> = OpGroupNonUniformRotateKHR Subgroup $value $delta; + }; + } +} + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_rotate) +[require(glsl_spirv, subgroup_rotate)] +T WaveClusteredRotate(T value, uint delta, constexpr uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredRotate"; + case spirv: + return spirv_asm + { + OpExtension "SPV_KHR_subgroup_rotate"; + OpCapability GroupNonUniformRotateKHR; + result:$$T = OpGroupNonUniformRotateKHR Subgroup $value $delta $clusterSize; + }; + } +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_rotate) +[require(glsl_spirv, subgroup_rotate)] +vector<T, N> WaveClusteredRotate(vector<T, N> value, uint delta, constexpr uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredRotate"; + case spirv: + return spirv_asm + { + OpExtension "SPV_KHR_subgroup_rotate"; + OpCapability GroupNonUniformRotateKHR; + result:$$vector<T,N> = OpGroupNonUniformRotateKHR Subgroup $value $delta $clusterSize; + }; + } +} + // // Quad Control intrinsics // diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index b62de0f08..f4ae94978 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -513,6 +513,10 @@ def SPV_KHR_shader_clock : _spirv_1_0; /// [EXT] def SPV_NV_shader_subgroup_partitioned : _spirv_1_0; +/// Represents the SPIR-V extension enables rotating values across invocations within a subgroup. +/// [EXT] +def SPV_KHR_subgroup_rotate : _spirv_1_3; + /// Represents the SPIR-V extension for ray tracing motion blur. /// [EXT] def SPV_NV_ray_tracing_motion_blur : _spirv_1_0; @@ -640,6 +644,10 @@ def spvGroupNonUniformVote : _spirv_1_3; /// [EXT] def spvGroupNonUniformPartitionedNV : _spirv_1_3 + SPV_NV_shader_subgroup_partitioned; +/// Represents the SPIR-V capability for group non-uniform rotate operations. +/// [EXT] +def spvGroupNonUniformRotateKHR : _spirv_1_3; + /// Represents the SPIR-V capability for ray tracing motion blur. /// [EXT] def spvRayTracingMotionBlurNV : SPV_NV_ray_tracing_motion_blur; @@ -777,6 +785,7 @@ def _GL_KHR_shader_subgroup_quad : _GLSL_140; def _GL_KHR_shader_subgroup_shuffle : _GLSL_140; def _GL_KHR_shader_subgroup_shuffle_relative : _GLSL_140; def _GL_KHR_shader_subgroup_vote : _GLSL_140; +def _GL_KHR_shader_subgroup_rotate : _GLSL_140; def _GL_NV_compute_shader_derivatives : _GLSL_450; def _GL_NV_fragment_shader_barycentric : _GL_EXT_fragment_shader_barycentric; @@ -982,6 +991,10 @@ alias GL_KHR_shader_subgroup_shuffle_relative = _GL_KHR_shader_subgroup_shuffle_ /// [EXT] alias GL_KHR_shader_subgroup_vote = _GL_KHR_shader_subgroup_vote | spvGroupNonUniformVote; +/// Represents the GL_KHR_shader_subgroup_rotate extension. +/// [EXT] +alias GL_KHR_shader_subgroup_rotate = _GL_KHR_shader_subgroup_rotate | spvGroupNonUniformRotateKHR; + /// Represents the GL_NV_compute_shader_derivatives extension. /// [EXT] alias GL_NV_compute_shader_derivatives = _GL_NV_compute_shader_derivatives | SPV_KHR_compute_shader_derivatives | _sm_6_6; @@ -2069,6 +2082,13 @@ alias subgroup_quad = GL_KHR_shader_subgroup_quad /// [Compound] alias subgroup_partitioned = GL_NV_shader_subgroup_partitioned + subgroup_ballot_activemask | _sm_6_5 | _cuda_sm_7_0; + +/// Capabilities required to use GLSL-style subgroup rotate operations 'subgroup_rotate' +/// [Compound] +alias subgroup_rotate = GL_KHR_shader_subgroup_rotate + | metal + ; + /// (All implemented targets) Capabilities required to use atomic operations of GLSL tier-1 float atomics /// [Compound] alias atomic_glsl_hlsl_nvapi_cuda_metal_float1 = atomic_glsl_float1 | hlsl_nvapi + _sm_4_0 | _cuda_sm_2_0 | metal; |
