diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 86 |
1 files changed, 47 insertions, 39 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index b43cd009f..20158c1b1 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2498,6 +2498,7 @@ __generic<T : __BuiltinArithmeticType> __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveMul($0)") +__target_intrinsic(cuda, "_wavePrefixProduct($0)") T WavePrefixProduct(T expr); __generic<T : __BuiltinArithmeticType, let N : int> __glsl_extension(GL_KHR_shader_subgroup_arithmetic) @@ -2521,10 +2522,54 @@ vector<T,N> WavePrefixSum(vector<T,N> expr); __generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr); +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBroadcastFirst($0)") +__target_intrinsic(cuda, "_waveReadFirst($0)") +T WaveReadLaneFirst(T expr); +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBroadcastFirst($0)") +__target_intrinsic(cuda, "_waveReadFirstMultiple($0)") +vector<T,N> WaveReadLaneFirst(vector<T,N> expr); +__generic<T : __BuiltinType, let N : int, let M : int> +__target_intrinsic(cuda, "_waveReadFirstMultiple($0)") +matrix<T,N,M> WaveReadLaneFirst(matrix<T,N,M> expr); + +// NOTE! On GLSL based targets the lane index *must* be a compile time expression! +// See https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBroadcast($0, $1)") +__target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") +T WaveReadLaneAt(T value, int lane); +__generic<T : __BuiltinType, let N : int> +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBroadcast($0, $1)") +__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") +vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane); +__generic<T : __BuiltinType, let N : int, let M : int> +__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") +matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> value, int lane); + +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBallotExclusiveBitCount(subgroupBallot($0))") +__target_intrinsic(cuda, "__popc(__ballot_sync(__activemask(), $0) & _getLaneLtMask())") +uint WavePrefixCountBits(bool value); + +// Shader model 6.5 stuff +// https://github.com/microsoft/DirectX-Specs/blob/master/d3d/HLSL_ShaderModel6_5.md +// TODO(JS): Looks like they need a mask parameter + __generic<T : __BuiltinArithmeticType> __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveAnd($0)") +__target_intrinsic(cuda, "_wavePrefixAnd($0)") T WaveMultiPrefixBitAnd(T expr); __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) @@ -2538,6 +2583,7 @@ __generic<T : __BuiltinArithmeticType> __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveOr($0)") +__target_intrinsic(cuda, "_wavePrefixOr($0)") T WaveMultiPrefixBitOr(T expr); __generic<T : __BuiltinArithmeticType, let N : int> __glsl_extension(GL_KHR_shader_subgroup_arithmetic) @@ -2551,6 +2597,7 @@ __generic<T : __BuiltinArithmeticType> __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveXor($0)") +__target_intrinsic(cuda, "_wavePrefixXor($0)") T WaveMultiPrefixBitXor(T expr); __generic<T : __BuiltinArithmeticType, let N : int> __glsl_extension(GL_KHR_shader_subgroup_arithmetic) @@ -2560,11 +2607,6 @@ vector<T,N> WaveMultiPrefixBitXor(vector<T,N> expr); __generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WaveMultiPrefixBitXor(matrix<T,N,M> expr); -__glsl_extension(GL_KHR_shader_subgroup_ballot) -__spirv_version(1.3) -__target_intrinsic(glsl, "subgroupBallotExclusiveBitCount(subgroupBallot($0))") -__target_intrinsic(cuda, "__popc(__ballot_sync(__activemask(), $0) & _getLaneLtMask())") -uint WavePrefixCountBits(bool value); uint WaveMultiPrefixCountBits(bool value, uint4 mask); @@ -2576,40 +2618,6 @@ __generic<T : __BuiltinArithmeticType> T WaveMultiPrefixSum(T value, uint4 mask) __generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WaveMultiPrefixSum(vector<T,N> value, uint4 mask); __generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WaveMultiPrefixSum(matrix<T,N,M> value, uint4 mask); -__generic<T : __BuiltinType> -__glsl_extension(GL_KHR_shader_subgroup_ballot) -__spirv_version(1.3) -__target_intrinsic(glsl, "subgroupBroadcastFirst($0)") -__target_intrinsic(cuda, "_waveReadFirst($0)") -T WaveReadLaneFirst(T expr); -__generic<T : __BuiltinType, let N : int> -__glsl_extension(GL_KHR_shader_subgroup_ballot) -__spirv_version(1.3) -__target_intrinsic(glsl, "subgroupBroadcastFirst($0)") -__target_intrinsic(cuda, "_waveReadFirstMultiple($0)") -vector<T,N> WaveReadLaneFirst(vector<T,N> expr); -__generic<T : __BuiltinType, let N : int, let M : int> -__target_intrinsic(cuda, "_waveReadFirstMultiple($0)") -matrix<T,N,M> WaveReadLaneFirst(matrix<T,N,M> expr); - -// NOTE! On GLSL based targets the lane index *must* be a compile time expression! -// See https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt -__generic<T : __BuiltinType> -__glsl_extension(GL_KHR_shader_subgroup_ballot) -__spirv_version(1.3) -__target_intrinsic(glsl, "subgroupBroadcast($0, $1)") -__target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") -T WaveReadLaneAt(T value, int lane); -__generic<T : __BuiltinType, let N : int> -__spirv_version(1.3) -__target_intrinsic(glsl, "subgroupBroadcast($0, $1)") -__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") -vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane); -__generic<T : __BuiltinType, let N : int, let M : int> -__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") -matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> value, int lane); - - // `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points typedef Texture2D texture2D; |
