From 77d59713ac665785b7ebee4ad2b5dcbb73cf5af5 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Tue, 21 Apr 2020 14:09:36 -0400 Subject: Small Improvements around Wave Intrinsics (#1328) * Fix issues in wave-mask/wave.slang tests. WaveGetActiveMask -> WaveGetConvergedMask. Update target-compatibility.md * First pass at wave-intrinsics.md documentation. Write up around WaveMaskSharedSync. * Added more of the Wave intrinsics as WaveMask intrinsics. Improvements to documentation around wave-intrinsics. * Add the Wave intrinsics for SM6.5 for WaveMask Expand WaveMask intrinsics Improve WaveMask documentation * Added WaveMaskIsFirstLane. * Added WaveGetConvergedMask for glsl and hlsl. Added wave-get-converged-mask.slang test. * WaveGetActiveMask/Multi and WageGetConvergedMask/Multi * Improve Wave intrinsics docs. Adde WaveGetActveMulti WaveGetConvergedMulti, WaveGetActiveMask (for vk/hlsl). * Enable GLSL WaveMultiPrefixBitAnd. * Re-add definitions of f16tof32 and f32to16 from #1326 * Remove multiple definition of f32tof16 Disable optix call to Ray trace test, if OPTIX not available. * Improve wave intrinsics documetnation - remove the __generic as part of definitions, small improvements. * Change comment to try and trigger build. --- source/slang/hlsl.meta.slang | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'source') diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 55c66ffc0..67f44cdac 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -1156,6 +1156,7 @@ matrix exp2(matrix x) MATRIX_MAP_UNARY(T, N, M, exp2, x); } + // Convert 16-bit float stored in low bits of integer __target_intrinsic(glsl, "unpackHalf2x16($0).x") __glsl_version(420) @@ -2489,8 +2490,18 @@ matrix trunc(matrix x) typedef uint WaveMask; +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBallot(true).x") __target_intrinsic(cuda, "__activemask()") -WaveMask WaveGetConvergedMask() { return 0xffffffff; } +__target_intrinsic(hlsl, "WaveActiveBallot(true).x") +WaveMask WaveGetConvergedMask(); + +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBallot(true).x") +__target_intrinsic(hlsl, "WaveActiveBallot(true).x") +WaveMask WaveGetActiveMask(); __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) @@ -3267,6 +3278,20 @@ __target_intrinsic(glsl, "subgroupBallotExclusiveBitCount(subgroupBallot($0))") __target_intrinsic(cuda, "__popc(__ballot_sync(__activemask(), $0) & _getLaneLtMask())") uint WavePrefixCountBits(bool value); + +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBallot(true)") +__target_intrinsic(cuda, "make_uint4(__activemask(), 0, 0, 0)") +__target_intrinsic(hlsl, "WaveActiveBallot(true)") +uint4 WaveGetConvergedMulti(); + +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBallot(true)") +__target_intrinsic(hlsl, "WaveActiveBallot(true)") +uint4 WaveGetActiveMulti(); + // Shader model 6.5 stuff // https://github.com/microsoft/DirectX-Specs/blob/master/d3d/HLSL_ShaderModel6_5.md @@ -3294,7 +3319,7 @@ __generic __target_intrinsic(hlsl) __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -//__target_intrinsic(glsl, "subgroupExclusiveAnd($0)") +__target_intrinsic(glsl, "subgroupExclusiveAnd($0)") __target_intrinsic(cuda, "_wavePrefixAnd(_getMultiPrefixMask(($1).x), $0)") T WaveMultiPrefixBitAnd(T expr, uint4 mask); __target_intrinsic(hlsl) -- cgit v1.2.3