From e267ce24e37b9b7f98921f75abc150c1463b1d6d Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Fri, 27 Mar 2020 16:16:27 -0400 Subject: Adds WaveShuffle intrinsic (#1298) * Support for WaveReadLaneAt with dynamic (but uniform across Wave) on Vk by enabling VK1.4. Fixed wave-lane-at.slang test to test with laneId that is uniform across the Wave. * Added WaveShuffle intrinsic. Test for WaveShuffle intrinsic. * Added some documentation on WaveShuffle * Fix that version required for subgroupBroadcast to be non constexpr is actually 1.5 --- source/slang/hlsl.meta.slang | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'source') diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 739b8579d..e29e47581 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2722,17 +2722,18 @@ matrix WaveReadLaneFirst(matrix expr); // NOTE! On GLSL based targets the lane index *must* be a compile time expression! // See https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt -// It is allowed to be 'dynamically uniform within the subgroup' if it's SPIR-V 1.4. -// TODO(JS): For now we'll use 1.4, but aim for the future for the compiler to determine +// It is allowed to be 'dynamically uniform within the subgroup' if it's SPIR-V 1.5. +// TODO(JS): For now we'll use 1.5, but aim for the future for the compiler to determine // if the line the is compile constant, and reduce requirement to 1.3 __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) -__spirv_version(1.4) +__spirv_version(1.5) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") __target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") T WaveReadLaneAt(T value, int lane); __generic -__spirv_version(1.4) +__spirv_version(1.5) +__glsl_extension(GL_KHR_shader_subgroup_ballot) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") __target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") vector WaveReadLaneAt(vector value, int lane); @@ -2740,6 +2741,28 @@ __generic __target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") matrix WaveReadLaneAt(matrix value, int lane); +// NOTE! WaveShuffle is a NON STANDARD HLSL intrinsic! It will map to WaveReadLaneAt on HLSL +// which means it will only work on hardware which allows arbitrary laneIds which is not true +// in general because it breaks the HLSL standard, which requires it's 'dynamically uniform' across the Wave. +__generic +__glsl_extension(GL_KHR_shader_subgroup_shuffle) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupShuffle($0, $1)") +__target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") +__target_intrinsic(hlsl, "WaveReadLaneAt") +T WaveShuffle(T value, int lane); +__generic +__glsl_extension(GL_KHR_shader_subgroup_shuffle) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupShuffle($0, $1)") +__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") +__target_intrinsic(hlsl, "WaveReadLaneAt") +vector WaveShuffle(vector value, int lane); +__generic +__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") +__target_intrinsic(hlsl, "WaveReadLaneAt") +matrix WaveShuffle(matrix value, int lane); + __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBallotExclusiveBitCount(subgroupBallot($0))") -- cgit v1.2.3