summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorDarren Wihandi <65404740+fairywreath@users.noreply.github.com>2025-01-28 23:12:51 -0500
committerGitHub <noreply@github.com>2025-01-29 04:12:51 +0000
commit1c282b80b9fbcfea9dc3dab7f5f546b069143e01 (patch)
tree626a858fff466a0f0c54d4afbe4148a1a58caed4 /source
parentcf66563cfdcff9b7d76017e5b73319705ccdb735 (diff)
Implement WaveMultiPrefix* for SPIRV and GLSL (#6182)
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang250
-rw-r--r--source/slang/slang-capabilities.capdef4
2 files changed, 216 insertions, 38 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index ba5c95a0c..1853a82b6 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -15517,7 +15517,7 @@ uint4 WaveMatch(matrix<T,N,M> value)
}
/// @category wave
-[require(cuda_hlsl, waveprefix)]
+[require(cuda_hlsl, wave_multi_prefix)]
uint WaveMultiPrefixCountBits(bool value, uint4 mask)
{
__target_switch
@@ -15528,190 +15528,366 @@ uint WaveMultiPrefixCountBits(bool value, uint4 mask)
}
/// @category wave
-__generic<T : __BuiltinArithmeticType>
-__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__generic<T : __BuiltinIntegerType>
+__glsl_extension(GL_NV_shader_subgroup_partitioned)
__spirv_version(1.3)
-[require(cuda_glsl_hlsl, waveprefix)]
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
T WaveMultiPrefixBitAnd(T expr, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixAnd(_getMultiPrefixMask(($1).x), $0)";
- case glsl: __intrinsic_asm "subgroupExclusiveAnd($0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixBitAnd";
+ case glsl: __intrinsic_asm "subgroupPartitionedExclusiveAndNV";
+ case spirv:
+ return spirv_asm
+ {
+ OpExtension "SPV_NV_shader_subgroup_partitioned";
+ OpCapability GroupNonUniformPartitionedNV;
+ result:$$T = OpGroupNonUniformBitwiseAnd Subgroup PartitionedExclusiveScanNV $expr $mask
+ };
}
}
-__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__generic<T : __BuiltinIntegerType, let N : int>
+__glsl_extension(GL_NV_shader_subgroup_partitioned)
__spirv_version(1.3)
-__generic<T : __BuiltinArithmeticType, let N : int>
-[require(cuda_glsl_hlsl, waveprefix)]
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
vector<T,N> WaveMultiPrefixBitAnd(vector<T,N> expr, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixAndMultiple(_getMultiPrefixMask(($1).x), $0)";
- case glsl: __intrinsic_asm "subgroupExclusiveAnd($0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixBitAnd";
+ case glsl: __intrinsic_asm "subgroupPartitionedExclusiveAndNV";
+ case spirv:
+ return spirv_asm
+ {
+ OpExtension "SPV_NV_shader_subgroup_partitioned";
+ OpCapability GroupNonUniformPartitionedNV;
+ result:$$vector<T,N> = OpGroupNonUniformBitwiseAnd Subgroup PartitionedExclusiveScanNV $expr $mask
+ };
}
}
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-[require(cuda_hlsl, waveprefix)]
+__generic<T : __BuiltinIntegerType, let N : int, let M : int>
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
matrix<T,N,M> WaveMultiPrefixBitAnd(matrix<T,N,M> expr, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixAndMultiple(_getMultiPrefixMask(($1).x), $0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixBitAnd";
+ case glsl:
+ case spirv:
+ matrix<T, N, M> result;
+ for (int i = 0; i < N; ++i)
+ result[i] = WaveMultiPrefixBitAnd(expr[i], mask);
+ return result;
}
}
/// @category wave
-__generic<T : __BuiltinArithmeticType>
-__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__generic<T : __BuiltinIntegerType>
+__glsl_extension(GL_NV_shader_subgroup_partitioned)
__spirv_version(1.3)
-[require(cuda_glsl_hlsl, waveprefix)]
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
T WaveMultiPrefixBitOr(T expr, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixOr(, _getMultiPrefixMask(($1).x), $0)";
- case glsl: __intrinsic_asm "subgroupExclusiveOr($0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixBitOr";
+ case glsl: __intrinsic_asm "subgroupPartitionedExclusiveOrNV";
+ case spirv:
+ return spirv_asm
+ {
+ OpExtension "SPV_NV_shader_subgroup_partitioned";
+ OpCapability GroupNonUniformPartitionedNV;
+ result:$$T = OpGroupNonUniformBitwiseOr Subgroup PartitionedExclusiveScanNV $expr $mask
+ };
}
}
-__generic<T : __BuiltinArithmeticType, let N : int>
-__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__generic<T : __BuiltinIntegerType, let N : int>
+__glsl_extension(GL_NV_shader_subgroup_partitioned)
__spirv_version(1.3)
-[require(cuda_glsl_hlsl, waveprefix)]
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
vector<T,N> WaveMultiPrefixBitOr(vector<T,N> expr, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixOrMultiple(_getMultiPrefixMask(($1).x), $0)";
- case glsl: __intrinsic_asm "subgroupExclusiveOr($0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixBitOr";
+ case glsl: __intrinsic_asm "subgroupPartitionedExclusiveOrNV";
+ case spirv:
+ return spirv_asm
+ {
+ OpExtension "SPV_NV_shader_subgroup_partitioned";
+ OpCapability GroupNonUniformPartitionedNV;
+ result:$$vector<T,N> = OpGroupNonUniformBitwiseOr Subgroup PartitionedExclusiveScanNV $expr $mask
+ };
}
}
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-[require(cuda_hlsl, waveprefix)]
+__generic<T : __BuiltinIntegerType, let N : int, let M : int>
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
matrix<T,N,M> WaveMultiPrefixBitOr(matrix<T,N,M> expr, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixOrMultiple(_getMultiPrefixMask(($1).x), $0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixBitOr";
+ case glsl:
+ case spirv:
+ matrix<T, N, M> result;
+ for (int i = 0; i < N; ++i)
+ result[i] = WaveMultiPrefixBitOr(expr[i], mask);
+ return result;
}
}
/// @category wave
-__generic<T : __BuiltinArithmeticType>
-__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__generic<T : __BuiltinIntegerType>
+__glsl_extension(GL_NV_shader_subgroup_partitioned)
__spirv_version(1.3)
-[require(cuda_glsl_hlsl, waveprefix)]
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
T WaveMultiPrefixBitXor(T expr, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixXor(_getMultiPrefixMask(($1).x), $0)";
- case glsl: __intrinsic_asm "subgroupExclusiveXor($0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixBitXor";
+ case glsl: __intrinsic_asm "subgroupPartitionedExclusiveXorNV";
+ case spirv:
+ return spirv_asm
+ {
+ OpExtension "SPV_NV_shader_subgroup_partitioned";
+ OpCapability GroupNonUniformPartitionedNV;
+ result:$$T = OpGroupNonUniformBitwiseXor Subgroup PartitionedExclusiveScanNV $expr $mask
+ };
}
}
-__generic<T : __BuiltinArithmeticType, let N : int>
-__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__generic<T : __BuiltinIntegerType, let N : int>
+__glsl_extension(GL_NV_shader_subgroup_partitioned)
__spirv_version(1.3)
-[require(cuda_glsl_hlsl, waveprefix)]
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
vector<T,N> WaveMultiPrefixBitXor(vector<T,N> expr, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixXorMultiple(_getMultiPrefixMask(($1).x), $0)";
- case glsl: __intrinsic_asm "subgroupExclusiveXor($0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixBitXor";
+ case glsl: __intrinsic_asm "subgroupPartitionedExclusiveXorNV";
+ case spirv:
+ return spirv_asm
+ {
+ OpExtension "SPV_NV_shader_subgroup_partitioned";
+ OpCapability GroupNonUniformPartitionedNV;
+ result:$$vector<T,N> = OpGroupNonUniformBitwiseXor Subgroup PartitionedExclusiveScanNV $expr $mask
+ };
}
}
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-[require(cuda_hlsl, waveprefix)]
+__generic<T : __BuiltinIntegerType, let N : int, let M : int>
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
matrix<T,N,M> WaveMultiPrefixBitXor(matrix<T,N,M> expr, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixXorMultiple(_getMultiPrefixMask(($1).x), $0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixBitXor";
+ case glsl:
+ case spirv:
+ matrix<T, N, M> result;
+ for (int i = 0; i < N; ++i)
+ result[i] = WaveMultiPrefixBitXor(expr[i], mask);
+ return result;
}
}
/// @category wave
__generic<T : __BuiltinArithmeticType>
-[require(cuda_hlsl, waveprefix)]
+__glsl_extension(GL_NV_shader_subgroup_partitioned)
+__spirv_version(1.3)
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
T WaveMultiPrefixProduct(T value, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixProduct(_getMultiPrefixMask(($1).x), $0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixProduct";
+ case glsl: __intrinsic_asm "subgroupPartitionedExclusiveMulNV";
+ case spirv:
+ {
+ spirv_asm
+ {
+ OpExtension "SPV_NV_shader_subgroup_partitioned";
+ OpCapability GroupNonUniformPartitionedNV;
+ };
+
+ if (__isFloat<T>())
+ {
+ return spirv_asm
+ {
+ result:$$T = OpGroupNonUniformFMul Subgroup PartitionedExclusiveScanNV $value $mask
+ };
+ }
+ else
+ {
+ return spirv_asm
+ {
+ result:$$T = OpGroupNonUniformIMul Subgroup PartitionedExclusiveScanNV $value $mask
+ };
+ }
+ }
}
}
__generic<T : __BuiltinArithmeticType, let N : int>
-[require(cuda_hlsl, waveprefix)]
+__glsl_extension(GL_NV_shader_subgroup_partitioned)
+__spirv_version(1.3)
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
vector<T,N> WaveMultiPrefixProduct(vector<T,N> value, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixProductMultiple(_getMultiPrefixMask(($1).x), $0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixProduct";
+ case glsl: __intrinsic_asm "subgroupPartitionedExclusiveMulNV";
+ case spirv:
+ {
+ spirv_asm
+ {
+ OpExtension "SPV_NV_shader_subgroup_partitioned";
+ OpCapability GroupNonUniformPartitionedNV;
+ };
+
+ if (__isFloat<T>())
+ {
+ return spirv_asm
+ {
+ result:$$vector<T,N> = OpGroupNonUniformFMul Subgroup PartitionedExclusiveScanNV $value $mask
+ };
+ }
+ else
+ {
+ return spirv_asm
+ {
+ result:$$vector<T,N> = OpGroupNonUniformIMul Subgroup PartitionedExclusiveScanNV $value $mask
+ };
+ }
+ }
}
}
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-[require(cuda_hlsl, waveprefix)]
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
matrix<T,N,M> WaveMultiPrefixProduct(matrix<T,N,M> value, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixProductMultiple(_getMultiPrefixMask(($1).x), $0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixProduct";
+ case glsl:
+ case spirv:
+ matrix<T, N, M> result;
+ for (int i = 0; i < N; ++i)
+ result[i] = WaveMultiPrefixProduct(value[i], mask);
+ return result;
}
}
/// @category wave
__generic<T : __BuiltinArithmeticType>
-[require(cuda_hlsl, waveprefix)]
+__glsl_extension(GL_NV_shader_subgroup_partitioned)
+__spirv_version(1.3)
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
T WaveMultiPrefixSum(T value, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixSum(_getMultiPrefixMask(($1).x), $0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixSum";
+ case glsl: __intrinsic_asm "subgroupPartitionedExclusiveAddNV";
+ case spirv:
+ {
+ spirv_asm
+ {
+ OpExtension "SPV_NV_shader_subgroup_partitioned";
+ OpCapability GroupNonUniformPartitionedNV;
+ };
+
+ if (__isFloat<T>())
+ {
+ return spirv_asm
+ {
+ result:$$T = OpGroupNonUniformFAdd Subgroup PartitionedExclusiveScanNV $value $mask
+ };
+ }
+ else
+ {
+ return spirv_asm
+ {
+ result:$$T = OpGroupNonUniformIAdd Subgroup PartitionedExclusiveScanNV $value $mask
+ };
+ }
+ }
}
}
__generic<T : __BuiltinArithmeticType, let N : int>
-[require(cuda_hlsl, waveprefix)]
+__glsl_extension(GL_NV_shader_subgroup_partitioned)
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
+__spirv_version(1.3)
vector<T,N> WaveMultiPrefixSum(vector<T,N> value, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixSumMultiple(_getMultiPrefixMask(($1).x), $0 )";
case hlsl: __intrinsic_asm "WaveMultiPrefixSum";
+ case glsl: __intrinsic_asm "subgroupPartitionedExclusiveAddNV";
+ case spirv:
+ {
+ spirv_asm
+ {
+ OpExtension "SPV_NV_shader_subgroup_partitioned";
+ OpCapability GroupNonUniformPartitionedNV;
+ };
+
+ if (__isFloat<T>())
+ {
+ return spirv_asm
+ {
+ result:$$vector<T,N> = OpGroupNonUniformFAdd Subgroup PartitionedExclusiveScanNV $value $mask
+ };
+ }
+ else
+ {
+ return spirv_asm
+ {
+ result:$$vector<T,N> = OpGroupNonUniformIAdd Subgroup PartitionedExclusiveScanNV $value $mask
+ };
+ }
+ }
}
}
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-[require(cuda_hlsl, waveprefix)]
+[require(cuda_glsl_hlsl_spirv, wave_multi_prefix)]
matrix<T,N,M> WaveMultiPrefixSum(matrix<T,N,M> value, uint4 mask)
{
__target_switch
{
case cuda: __intrinsic_asm "_wavePrefixSumMultiple(_getMultiPrefixMask(($1).x), $0)";
case hlsl: __intrinsic_asm "WaveMultiPrefixSum";
+ case glsl:
+ case spirv:
+ matrix<T, N, M> result;
+ for (int i = 0; i < N; ++i)
+ result[i] = WaveMultiPrefixSum(value[i], mask);
+ return result;
}
}
diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef
index 4f6357779..3bc54c080 100644
--- a/source/slang/slang-capabilities.capdef
+++ b/source/slang/slang-capabilities.capdef
@@ -1024,7 +1024,9 @@ alias fragmentshaderbarycentric = GL_EXT_fragment_shader_barycentric | _sm_6_1;
alias shadermemorycontrol = glsl | _spirv_1_0 | _sm_5_0;
/// Capabilities needed to use HLSL tier wave operations
/// [Compound]
-alias waveprefix = _sm_6_5 | _cuda_sm_7_0 | GL_KHR_shader_subgroup_arithmetic;
+alias wave_multi_prefix = _sm_6_5
+ | _cuda_sm_7_0
+ | GL_KHR_shader_subgroup_ballot + GL_KHR_shader_subgroup_arithmetic + GL_NV_shader_subgroup_partitioned;
/// Capabilities needed to use GLSL buffer-reference's
/// [Compound]
alias bufferreference = GL_EXT_buffer_reference;