summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorDarren Wihandi <65404740+fairywreath@users.noreply.github.com>2025-04-22 14:04:56 -0600
committerGitHub <noreply@github.com>2025-04-22 20:04:56 +0000
commited5940a629ae05e9571bfe355d22f0728347dcb4 (patch)
tree90a36c6543f0ee3748b80112a478897b027dddab /source
parentd5220b327632a8aeeb9a89494bb37bd82fec30cb (diff)
Implement shader subgroup rotate intrinsics (#6878)
* Initial implementation for SPIRV, GLSL and Metal * test add bool test * Fix and improve subgroup rotate tests * Add proper GLSL extensions and proper Metal type checking * Clean up tests and add diagnostics test for subgroup type for Metal * Update wave-intrinsics docs
Diffstat (limited to 'source')
-rw-r--r--source/slang/glsl.meta.slang70
-rw-r--r--source/slang/hlsl.meta.slang149
-rw-r--r--source/slang/slang-capabilities.capdef20
3 files changed, 200 insertions, 39 deletions
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang
index bbf0c40dd..85c8b174c 100644
--- a/source/slang/glsl.meta.slang
+++ b/source/slang/glsl.meta.slang
@@ -6110,45 +6110,6 @@ public void traceRayMotionNV(
}
}
-__generic<T : __BuiltinType>
-[ForceInline]
-void typeRequireChecks_shader_subgroup_GLSL() {
- // the following is a seperate function call, since else the `__requireTargetExtension` and associated __intrinsic_asm is ignored if the calling function also calls an __intrinsic_asm
- __target_switch
- {
- case glsl:
- if (__type_equals<T, half>()
- || __type_equals<T, float16_t>()
- ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
- else if (__type_equals<T, uint8_t>()
- || __type_equals<T, int8_t>()
- ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int8");
- else if (__type_equals<T, uint16_t>()
- || __type_equals<T, int16_t>()
- ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int16");
- else if (__type_equals<T, uint64_t>()
- || __type_equals<T, int64_t>()
- ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int64");
-
- __intrinsic_asm "";
- }
-}
-
-__generic<T : __BuiltinType>
-void shader_subgroup_preamble() {
- // checks needed for shader_subgroup functions; __requireTargetExtension does not work
- // (does not add the ext specified correctly to the compile output; using extended type
- // will result in error for using the type)
- __target_switch
- {
- case glsl:
- typeRequireChecks_shader_subgroup_GLSL<T>();
- default:
- return;
- }
-
-}
-
// GL_KHR_shader_subgroup_basic Built-in Variables
[require(cpp_cuda_glsl_hlsl_spirv_wgsl, subgroup_basic)]
@@ -8176,6 +8137,37 @@ public vector<T,N> subgroupQuadSwapDiagonal(vector<T,N> value)
return QuadReadAcrossDiagonal(value);
}
+// GL_KHR_shader_subgroup_rotate
+
+__generic<T : __BuiltinType>
+[require(glsl_metal_spirv, subgroup_rotate)]
+public T subgroupRotate(T value, uint delta)
+{
+ return WaveRotate(value, delta);
+}
+
+__generic<T : __BuiltinType, let N : int>
+[require(glsl_metal_spirv, subgroup_rotate)]
+public vector<T, N> subgroupRotate(vector<T, N> value, uint delta)
+{
+ return WaveRotate(value, delta);
+}
+
+__generic<T : __BuiltinType>
+[require(glsl_spirv, subgroup_rotate)]
+public T subgroupClusteredRotate(T value, uint delta, constexpr uint clusterSize)
+{
+ return WaveClusteredRotate(value, delta, clusterSize);
+
+}
+
+__generic<T : __BuiltinType, let N : int>
+[require(glsl_spirv, subgroup_rotate)]
+public vector<T, N> subgroupClusteredRotate(vector<T, N> value, uint delta, constexpr uint clusterSize)
+{
+ return WaveClusteredRotate(value, delta, clusterSize);
+}
+
//// GLSL atomic
// The following type internally is a Shader Storage Buffer
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index c8a2c8c58..03321bfaf 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -16368,6 +16368,155 @@ bool IsHelperLane()
}
}
+//@hidden:
+
+__generic<T : __BuiltinType>
+[ForceInline]
+[require(glsl)]
+void __requireGLSLShaderSubgroupTypeExtension()
+{
+ // the following is a seperate function call, since else the `__requireTargetExtension` and associated __intrinsic_asm is ignored if the calling function also calls an __intrinsic_asm
+ if (__type_equals<T, half>()
+ || __type_equals<T, float16_t>()
+ ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ else if (__type_equals<T, uint8_t>()
+ || __type_equals<T, int8_t>()
+ ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int8");
+ else if (__type_equals<T, uint16_t>()
+ || __type_equals<T, int16_t>()
+ ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int16");
+ else if (__type_equals<T, uint64_t>()
+ || __type_equals<T, int64_t>()
+ ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int64");
+
+ __intrinsic_asm "";
+}
+
+__generic<T : __BuiltinType>
+[ForceInline]
+[require(metal)]
+void __checkMetalShaderSubgroupType()
+{
+ // These builtin types are not supported for Metal's `simd` operations.
+ if (__type_equals<T, uint8_t>()
+ || __type_equals<T, int8_t>()
+ || __type_equals<T, uint64_t>()
+ || __type_equals<T, int64_t>()
+ || __isBool<T>()
+ )
+ {
+ static_assert(false, "Unsupported type for subgroup operations in Metal. Valid types include scalars and vectors of uint/uint32_t, int/int32_t, uint16_t, int16_t, float, and half.");
+ }
+}
+
+__generic<T : __BuiltinType>
+void shader_subgroup_preamble()
+{
+ // checks needed for shader_subgroup functions; __requireTargetExtension does not work
+ // (does not add the ext specified correctly to the compile output; using extended type
+ // will result in error for using the type)
+ __target_switch
+ {
+ case glsl:
+ __requireGLSLShaderSubgroupTypeExtension<T>();
+ case metal:
+ __checkMetalShaderSubgroupType<T>();
+ default:
+ return;
+ }
+}
+
+//@public:
+
+//
+// Wave Rotate intrinsics.
+// These are Slang specific intrinsics to rotate values within a subgroup.
+//
+
+__generic<T : __BuiltinType>
+__glsl_extension(GL_KHR_shader_subgroup_rotate)
+[require(glsl_metal_spirv, subgroup_rotate)]
+T WaveRotate(T value, uint delta)
+{
+ shader_subgroup_preamble<T>();
+ __target_switch
+ {
+ case glsl:
+ __intrinsic_asm "subgroupRotate";
+ case metal:
+ __intrinsic_asm "simd_shuffle_rotate_down";
+ case spirv:
+ return spirv_asm
+ {
+ OpExtension "SPV_KHR_subgroup_rotate";
+ OpCapability GroupNonUniformRotateKHR;
+ result:$$T = OpGroupNonUniformRotateKHR Subgroup $value $delta;
+ };
+ }
+}
+
+__generic<T : __BuiltinType, let N : int>
+__glsl_extension(GL_KHR_shader_subgroup_rotate)
+[require(glsl_metal_spirv, subgroup_rotate)]
+vector<T, N> WaveRotate(vector<T, N> value, uint delta)
+{
+ shader_subgroup_preamble<T>();
+ __target_switch
+ {
+ case glsl:
+ __intrinsic_asm "subgroupRotate";
+ case metal:
+ __intrinsic_asm "simd_shuffle_rotate_down";
+ case spirv:
+ return spirv_asm
+ {
+ OpExtension "SPV_KHR_subgroup_rotate";
+ OpCapability GroupNonUniformRotateKHR;
+ result:$$vector<T,N> = OpGroupNonUniformRotateKHR Subgroup $value $delta;
+ };
+ }
+}
+
+__generic<T : __BuiltinType>
+__glsl_extension(GL_KHR_shader_subgroup_rotate)
+[require(glsl_spirv, subgroup_rotate)]
+T WaveClusteredRotate(T value, uint delta, constexpr uint clusterSize)
+{
+ shader_subgroup_preamble<T>();
+ __target_switch
+ {
+ case glsl:
+ __intrinsic_asm "subgroupClusteredRotate";
+ case spirv:
+ return spirv_asm
+ {
+ OpExtension "SPV_KHR_subgroup_rotate";
+ OpCapability GroupNonUniformRotateKHR;
+ result:$$T = OpGroupNonUniformRotateKHR Subgroup $value $delta $clusterSize;
+ };
+ }
+}
+
+__generic<T : __BuiltinType, let N : int>
+__glsl_extension(GL_KHR_shader_subgroup_rotate)
+[require(glsl_spirv, subgroup_rotate)]
+vector<T, N> WaveClusteredRotate(vector<T, N> value, uint delta, constexpr uint clusterSize)
+{
+ shader_subgroup_preamble<T>();
+ __target_switch
+ {
+ case glsl:
+ __intrinsic_asm "subgroupClusteredRotate";
+ case spirv:
+ return spirv_asm
+ {
+ OpExtension "SPV_KHR_subgroup_rotate";
+ OpCapability GroupNonUniformRotateKHR;
+ result:$$vector<T,N> = OpGroupNonUniformRotateKHR Subgroup $value $delta $clusterSize;
+ };
+ }
+}
+
//
// Quad Control intrinsics
//
diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef
index b62de0f08..f4ae94978 100644
--- a/source/slang/slang-capabilities.capdef
+++ b/source/slang/slang-capabilities.capdef
@@ -513,6 +513,10 @@ def SPV_KHR_shader_clock : _spirv_1_0;
/// [EXT]
def SPV_NV_shader_subgroup_partitioned : _spirv_1_0;
+/// Represents the SPIR-V extension enables rotating values across invocations within a subgroup.
+/// [EXT]
+def SPV_KHR_subgroup_rotate : _spirv_1_3;
+
/// Represents the SPIR-V extension for ray tracing motion blur.
/// [EXT]
def SPV_NV_ray_tracing_motion_blur : _spirv_1_0;
@@ -640,6 +644,10 @@ def spvGroupNonUniformVote : _spirv_1_3;
/// [EXT]
def spvGroupNonUniformPartitionedNV : _spirv_1_3 + SPV_NV_shader_subgroup_partitioned;
+/// Represents the SPIR-V capability for group non-uniform rotate operations.
+/// [EXT]
+def spvGroupNonUniformRotateKHR : _spirv_1_3;
+
/// Represents the SPIR-V capability for ray tracing motion blur.
/// [EXT]
def spvRayTracingMotionBlurNV : SPV_NV_ray_tracing_motion_blur;
@@ -777,6 +785,7 @@ def _GL_KHR_shader_subgroup_quad : _GLSL_140;
def _GL_KHR_shader_subgroup_shuffle : _GLSL_140;
def _GL_KHR_shader_subgroup_shuffle_relative : _GLSL_140;
def _GL_KHR_shader_subgroup_vote : _GLSL_140;
+def _GL_KHR_shader_subgroup_rotate : _GLSL_140;
def _GL_NV_compute_shader_derivatives : _GLSL_450;
def _GL_NV_fragment_shader_barycentric : _GL_EXT_fragment_shader_barycentric;
@@ -982,6 +991,10 @@ alias GL_KHR_shader_subgroup_shuffle_relative = _GL_KHR_shader_subgroup_shuffle_
/// [EXT]
alias GL_KHR_shader_subgroup_vote = _GL_KHR_shader_subgroup_vote | spvGroupNonUniformVote;
+/// Represents the GL_KHR_shader_subgroup_rotate extension.
+/// [EXT]
+alias GL_KHR_shader_subgroup_rotate = _GL_KHR_shader_subgroup_rotate | spvGroupNonUniformRotateKHR;
+
/// Represents the GL_NV_compute_shader_derivatives extension.
/// [EXT]
alias GL_NV_compute_shader_derivatives = _GL_NV_compute_shader_derivatives | SPV_KHR_compute_shader_derivatives | _sm_6_6;
@@ -2069,6 +2082,13 @@ alias subgroup_quad = GL_KHR_shader_subgroup_quad
/// [Compound]
alias subgroup_partitioned = GL_NV_shader_subgroup_partitioned + subgroup_ballot_activemask | _sm_6_5 | _cuda_sm_7_0;
+
+/// Capabilities required to use GLSL-style subgroup rotate operations 'subgroup_rotate'
+/// [Compound]
+alias subgroup_rotate = GL_KHR_shader_subgroup_rotate
+ | metal
+ ;
+
/// (All implemented targets) Capabilities required to use atomic operations of GLSL tier-1 float atomics
/// [Compound]
alias atomic_glsl_hlsl_nvapi_cuda_metal_float1 = atomic_glsl_float1 | hlsl_nvapi + _sm_4_0 | _cuda_sm_2_0 | metal;