summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-04-21 14:09:36 -0400
committerGitHub <noreply@github.com>2020-04-21 11:09:36 -0700
commit77d59713ac665785b7ebee4ad2b5dcbb73cf5af5 (patch)
tree65efc2b0fe49888c5965798447820452bd4b67ba /source
parent7de5f63225cde20401da7c1c69b00d0b7dc8d89f (diff)
Small Improvements around Wave Intrinsics (#1328)
* Fix issues in wave-mask/wave.slang tests. WaveGetActiveMask -> WaveGetConvergedMask. Update target-compatibility.md * First pass at wave-intrinsics.md documentation. Write up around WaveMaskSharedSync. * Added more of the Wave intrinsics as WaveMask intrinsics. Improvements to documentation around wave-intrinsics. * Add the Wave intrinsics for SM6.5 for WaveMask Expand WaveMask intrinsics Improve WaveMask documentation * Added WaveMaskIsFirstLane. * Added WaveGetConvergedMask for glsl and hlsl. Added wave-get-converged-mask.slang test. * WaveGetActiveMask/Multi and WageGetConvergedMask/Multi * Improve Wave intrinsics docs. Adde WaveGetActveMulti WaveGetConvergedMulti, WaveGetActiveMask (for vk/hlsl). * Enable GLSL WaveMultiPrefixBitAnd. * Re-add definitions of f16tof32 and f32to16 from #1326 * Remove multiple definition of f32tof16 Disable optix call to Ray trace test, if OPTIX not available. * Improve wave intrinsics documetnation - remove the __generic as part of definitions, small improvements. * Change comment to try and trigger build.
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang29
1 files changed, 27 insertions, 2 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 55c66ffc0..67f44cdac 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -1156,6 +1156,7 @@ matrix<T,N,M> exp2(matrix<T,N,M> x)
MATRIX_MAP_UNARY(T, N, M, exp2, x);
}
+
// Convert 16-bit float stored in low bits of integer
__target_intrinsic(glsl, "unpackHalf2x16($0).x")
__glsl_version(420)
@@ -2489,8 +2490,18 @@ matrix<T, N, M> trunc(matrix<T, N, M> x)
typedef uint WaveMask;
+__glsl_extension(GL_KHR_shader_subgroup_ballot)
+__spirv_version(1.3)
+__target_intrinsic(glsl, "subgroupBallot(true).x")
__target_intrinsic(cuda, "__activemask()")
-WaveMask WaveGetConvergedMask() { return 0xffffffff; }
+__target_intrinsic(hlsl, "WaveActiveBallot(true).x")
+WaveMask WaveGetConvergedMask();
+
+__glsl_extension(GL_KHR_shader_subgroup_ballot)
+__spirv_version(1.3)
+__target_intrinsic(glsl, "subgroupBallot(true).x")
+__target_intrinsic(hlsl, "WaveActiveBallot(true).x")
+WaveMask WaveGetActiveMask();
__glsl_extension(GL_KHR_shader_subgroup_basic)
__spirv_version(1.3)
@@ -3267,6 +3278,20 @@ __target_intrinsic(glsl, "subgroupBallotExclusiveBitCount(subgroupBallot($0))")
__target_intrinsic(cuda, "__popc(__ballot_sync(__activemask(), $0) & _getLaneLtMask())")
uint WavePrefixCountBits(bool value);
+
+__glsl_extension(GL_KHR_shader_subgroup_ballot)
+__spirv_version(1.3)
+__target_intrinsic(glsl, "subgroupBallot(true)")
+__target_intrinsic(cuda, "make_uint4(__activemask(), 0, 0, 0)")
+__target_intrinsic(hlsl, "WaveActiveBallot(true)")
+uint4 WaveGetConvergedMulti();
+
+__glsl_extension(GL_KHR_shader_subgroup_ballot)
+__spirv_version(1.3)
+__target_intrinsic(glsl, "subgroupBallot(true)")
+__target_intrinsic(hlsl, "WaveActiveBallot(true)")
+uint4 WaveGetActiveMulti();
+
// Shader model 6.5 stuff
// https://github.com/microsoft/DirectX-Specs/blob/master/d3d/HLSL_ShaderModel6_5.md
@@ -3294,7 +3319,7 @@ __generic<T : __BuiltinArithmeticType>
__target_intrinsic(hlsl)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
-//__target_intrinsic(glsl, "subgroupExclusiveAnd($0)")
+__target_intrinsic(glsl, "subgroupExclusiveAnd($0)")
__target_intrinsic(cuda, "_wavePrefixAnd(_getMultiPrefixMask(($1).x), $0)")
T WaveMultiPrefixBitAnd(T expr, uint4 mask);
__target_intrinsic(hlsl)