summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-04-21 09:32:21 -0400
committerGitHub <noreply@github.com>2020-04-21 09:32:21 -0400
commit7de5f63225cde20401da7c1c69b00d0b7dc8d89f (patch)
tree4935a8c8feb175582d3b8fda79592b0f4def668b /source
parent6d4fa92a86fe5d05dbfa248524cf976ab27f4444 (diff)
WaveMask remaining intrinsics and tests (#1327)
* Fix issues in wave-mask/wave.slang tests. WaveGetActiveMask -> WaveGetConvergedMask. Update target-compatibility.md * First pass at wave-intrinsics.md documentation. Write up around WaveMaskSharedSync. * Added more of the Wave intrinsics as WaveMask intrinsics. Improvements to documentation around wave-intrinsics. * Add the Wave intrinsics for SM6.5 for WaveMask Expand WaveMask intrinsics Improve WaveMask documentation * Added WaveMaskIsFirstLane. Co-authored-by: Tim Foley <tfoleyNV@users.noreply.github.com>
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang88
1 files changed, 84 insertions, 4 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index e9da539bf..55c66ffc0 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -2485,13 +2485,20 @@ matrix<T, N, M> trunc(matrix<T, N, M> x)
MATRIX_MAP_UNARY(T, N, M, trunc, x);
}
-// Slang Specific Mask Wave Intrinsics
+// Slang Specific 'Mask' Wave Intrinsics
typedef uint WaveMask;
__target_intrinsic(cuda, "__activemask()")
WaveMask WaveGetConvergedMask() { return 0xffffffff; }
+__glsl_extension(GL_KHR_shader_subgroup_basic)
+__spirv_version(1.3)
+__target_intrinsic(glsl, "subgroupElect()")
+__target_intrinsic(cuda, "(($0 & -$0) == (WarpMask(1) << _getLaneId()))")
+__target_intrinsic(hlsl, "WaveIsFirstLane()")
+bool WaveMaskIsFirstLane(WaveMask mask);
+
__glsl_extension(GL_KHR_shader_subgroup_vote)
__spirv_version(1.3)
__target_intrinsic(glsl, "subgroupAll($1)")
@@ -2564,7 +2571,6 @@ __target_intrinsic(glsl, "subgroupBarrier()")
__target_intrinsic(hlsl, "GroupMemoryBarrier()")
void GroupMemoryBarrierWithWaveMaskSync(WaveMask mask);
-
__glsl_extension(GL_KHR_shader_subgroup_basic)
__spirv_version(1.3)
__target_intrinsic(glsl, "subgroupBarrier()")
@@ -2620,7 +2626,7 @@ __target_intrinsic(cuda, "_waveShuffleMultiple($0, $1, $2)")
__target_intrinsic(hlsl, "WaveReadLaneAt($1, $2)")
vector<T,N> WaveMaskReadLaneAt(WaveMask mask, vector<T,N> value, int lane);
__generic<T : __BuiltinType, let N : int, let M : int>
-__target_intrinsic(cuda, "_waveShuffleMultiple($0, $1)")
+__target_intrinsic(cuda, "_waveShuffleMultiple($0, $1, $2)")
__target_intrinsic(hlsl, "WaveReadLaneAt($1, $2)")
matrix<T,N,M> WaveMaskReadLaneAt(WaveMask mask, matrix<T,N,M> value, int lane);
@@ -2689,7 +2695,7 @@ __target_intrinsic(cuda, "_waveOrMultiple($0, $1)")
__target_intrinsic(hlsl, "WaveActiveBitOr($1)")
vector<T,N> WaveMaskBitOr(WaveMask mask, vector<T,N> expr);
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
-__target_intrinsic(cuda, "_waveOrMultiple(_$0, $1)")
+__target_intrinsic(cuda, "_waveOrMultiple($0, $1)")
__target_intrinsic(hlsl, "WaveActiveBitOr($1)")
matrix<T,N,M> WaveMaskBitOr(WaveMask mask, matrix<T,N,M> expr);
@@ -2866,8 +2872,82 @@ __generic<T : __BuiltinType, let N : int, let M : int>
__target_intrinsic(cuda, "_waveReadFirstMultiple($0, $1)")
matrix<T,N,M> WaveMaskReadLaneFirst(WaveMask mask, matrix<T,N,M> expr);
+// WaveMask SM6.5 like intrinsics
+// TODO(JS): On HLSL it only works for 32 bits or less
+__generic<T : __BuiltinType>
+__target_intrinsic(hlsl, "WaveMatch($1).x")
+__cuda_sm_version(7.0)
+__target_intrinsic(cuda, "_waveMatchScalar($0, $1)")
+WaveMask WaveMaskMatch(WaveMask mask, T value);
+__generic<T : __BuiltinType, let N : int>
+__target_intrinsic(hlsl, "WaveMatch($1).x")
+__cuda_sm_version(7.0)
+__target_intrinsic(cuda, "_waveMatchMultiple($0, $1)")
+WaveMask WaveMaskMatch(WaveMask mask, vector<T,N> value);
+__generic<T : __BuiltinType, let N : int, let M : int>
+__target_intrinsic(hlsl, "WaveMatch($1).x")
+__cuda_sm_version(7.0)
+__target_intrinsic(cuda, "_waveMatchMultiple($0, $1)")
+WaveMask WaveMaskMatch(WaveMask mask, matrix<T,N,M> value);
+
+__generic<T : __BuiltinArithmeticType>
+__target_intrinsic(hlsl, "WaveMultiPrefixBitAnd($1, uint4($0, 0, 0, 0))")
+__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__spirv_version(1.3)
+//__target_intrinsic(glsl, "subgroupExclusiveAnd($1)")
+__target_intrinsic(cuda, "_wavePrefixAnd($0, $1)")
+T WaveMaskPrefixBitAnd(WaveMask mask, T expr);
+__target_intrinsic(hlsl, "WaveMultiPrefixBitAnd($1, uint4($0, 0, 0, 0))")
+__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__spirv_version(1.3)
+__target_intrinsic(glsl, "subgroupExclusiveAnd($1)")
+__target_intrinsic(cuda, "_wavePrefixAndMultiple($0, $1)")
+__generic<T : __BuiltinArithmeticType, let N : int>
+vector<T,N> WaveMaskPrefixBitAnd(WaveMask mask, vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
+__target_intrinsic(hlsl, "WaveMultiPrefixBitAnd($1, uint4($0, 0, 0, 0))")
+__target_intrinsic(cuda, "_wavePrefixAndMultiple(_getMultiPrefixMask($0, $1)")
+matrix<T,N,M> WaveMaskPrefixBitAnd(WaveMask mask, matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType>
+__target_intrinsic(hlsl, "WaveMultiPrefixBitOr($1, uint4($0, 0, 0, 0))")
+__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__spirv_version(1.3)
+//__target_intrinsic(glsl, "subgroupExclusiveOr($1)")
+__target_intrinsic(cuda, "_wavePrefixOr($0, $1)")
+T WaveMaskPrefixBitOr(WaveMask mask, T expr);
+__generic<T : __BuiltinArithmeticType, let N : int>
+__target_intrinsic(hlsl, "WaveMultiPrefixBitOr($1, uint4($0, 0, 0, 0))")
+__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__spirv_version(1.3)
+//__target_intrinsic(glsl, "subgroupExclusiveOr($1)")
+__target_intrinsic(cuda, "_wavePrefixOrMultiple($0, $1)")
+vector<T,N> WaveMaskPrefixBitOr(WaveMask mask, vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
+__target_intrinsic(hlsl, "WaveMultiPrefixBitOr($1, uint4($0, 0, 0, 0))")
+__target_intrinsic(cuda, "_wavePrefixOrMultiple($0, $1)")
+matrix<T,N,M> WaveMaskPrefixBitOr(WaveMask mask, matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType>
+__target_intrinsic(hlsl, "WaveMultiPrefixBitXor($1, uint4($0, 0, 0, 0))")
+__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__spirv_version(1.3)
+__target_intrinsic(glsl, "subgroupExclusiveXor($1)")
+__target_intrinsic(cuda, "_wavePrefixXor($0, $1)")
+T WaveMaskPrefixBitXor(WaveMask mask, T expr);
+__generic<T : __BuiltinArithmeticType, let N : int>
+__target_intrinsic(hlsl, "WaveMultiPrefixBitXor($1, uint4($0, 0, 0, 0))")
+__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
+__spirv_version(1.3)
+__target_intrinsic(glsl, "subgroupExclusiveXor($1)")
+__target_intrinsic(cuda, "_wavePrefixXorMultiple($0, $1)")
+vector<T,N> WaveMaskPrefixBitXor(WaveMask mask, vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
+__target_intrinsic(hlsl, "WaveMultiPrefixBitXor($1, uint4($0, 0, 0, 0))")
+__target_intrinsic(cuda, "_wavePrefixXorMultiple($0, $1)")
+matrix<T,N,M> WaveMaskPrefixBitXor(WaveMask mask, matrix<T,N,M> expr);
// Shader model 6.0 stuff