diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-04-21 14:09:36 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-04-21 11:09:36 -0700 |
| commit | 77d59713ac665785b7ebee4ad2b5dcbb73cf5af5 (patch) | |
| tree | 65efc2b0fe49888c5965798447820452bd4b67ba /source | |
| parent | 7de5f63225cde20401da7c1c69b00d0b7dc8d89f (diff) | |
Small Improvements around Wave Intrinsics (#1328)
* Fix issues in wave-mask/wave.slang tests.
WaveGetActiveMask -> WaveGetConvergedMask.
Update target-compatibility.md
* First pass at wave-intrinsics.md documentation.
Write up around WaveMaskSharedSync.
* Added more of the Wave intrinsics as WaveMask intrinsics.
Improvements to documentation around wave-intrinsics.
* Add the Wave intrinsics for SM6.5 for WaveMask
Expand WaveMask intrinsics
Improve WaveMask documentation
* Added WaveMaskIsFirstLane.
* Added WaveGetConvergedMask for glsl and hlsl.
Added wave-get-converged-mask.slang test.
* WaveGetActiveMask/Multi and WageGetConvergedMask/Multi
* Improve Wave intrinsics docs.
Adde WaveGetActveMulti WaveGetConvergedMulti, WaveGetActiveMask (for vk/hlsl).
* Enable GLSL WaveMultiPrefixBitAnd.
* Re-add definitions of f16tof32 and f32to16 from #1326
* Remove multiple definition of f32tof16
Disable optix call to Ray trace test, if OPTIX not available.
* Improve wave intrinsics documetnation - remove the __generic as part of definitions, small improvements.
* Change comment to try and trigger build.
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 29 |
1 files changed, 27 insertions, 2 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 55c66ffc0..67f44cdac 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -1156,6 +1156,7 @@ matrix<T,N,M> exp2(matrix<T,N,M> x) MATRIX_MAP_UNARY(T, N, M, exp2, x); } + // Convert 16-bit float stored in low bits of integer __target_intrinsic(glsl, "unpackHalf2x16($0).x") __glsl_version(420) @@ -2489,8 +2490,18 @@ matrix<T, N, M> trunc(matrix<T, N, M> x) typedef uint WaveMask; +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBallot(true).x") __target_intrinsic(cuda, "__activemask()") -WaveMask WaveGetConvergedMask() { return 0xffffffff; } +__target_intrinsic(hlsl, "WaveActiveBallot(true).x") +WaveMask WaveGetConvergedMask(); + +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBallot(true).x") +__target_intrinsic(hlsl, "WaveActiveBallot(true).x") +WaveMask WaveGetActiveMask(); __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) @@ -3267,6 +3278,20 @@ __target_intrinsic(glsl, "subgroupBallotExclusiveBitCount(subgroupBallot($0))") __target_intrinsic(cuda, "__popc(__ballot_sync(__activemask(), $0) & _getLaneLtMask())") uint WavePrefixCountBits(bool value); + +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBallot(true)") +__target_intrinsic(cuda, "make_uint4(__activemask(), 0, 0, 0)") +__target_intrinsic(hlsl, "WaveActiveBallot(true)") +uint4 WaveGetConvergedMulti(); + +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBallot(true)") +__target_intrinsic(hlsl, "WaveActiveBallot(true)") +uint4 WaveGetActiveMulti(); + // Shader model 6.5 stuff // https://github.com/microsoft/DirectX-Specs/blob/master/d3d/HLSL_ShaderModel6_5.md @@ -3294,7 +3319,7 @@ __generic<T : __BuiltinArithmeticType> __target_intrinsic(hlsl) __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -//__target_intrinsic(glsl, "subgroupExclusiveAnd($0)") +__target_intrinsic(glsl, "subgroupExclusiveAnd($0)") __target_intrinsic(cuda, "_wavePrefixAnd(_getMultiPrefixMask(($1).x), $0)") T WaveMultiPrefixBitAnd(T expr, uint4 mask); __target_intrinsic(hlsl) |
