Adds WaveShuffle intrinsic (#1298)

* Support for WaveReadLaneAt with dynamic (but uniform across Wave) on Vk by enabling VK1.4. Fixed wave-lane-at.slang test to test with laneId that is uniform across the Wave. * Added WaveShuffle intrinsic. Test for WaveShuffle intrinsic. * Added some documentation on WaveShuffle * Fix that version required for subgroupBroadcast to be non constexpr is actually 1.5
author: jsmall-nvidia <jsmall@nvidia.com> 2020-03-27 16:16:27 -0400
committer: GitHub <noreply@github.com> 2020-03-27 16:16:27 -0400
commit: e267ce24e37b9b7f98921f75abc150c1463b1d6d (patch)
tree: 331660a83ae0b72116b79b5cc8bf7a9c06555db5 /source
parent: 5b0b8436123aa2faa9b682ed45efe2bd7edbf01b (diff)
1 files changed, 27 insertions, 4 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 739b8579d..e29e47581 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -2722,17 +2722,18 @@ matrix<T,N,M> WaveReadLaneFirst(matrix<T,N,M> expr);
 
 // NOTE! On GLSL based targets the lane index *must* be a compile time expression!
 // See https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt
-// It is allowed to be 'dynamically uniform within the subgroup' if it's SPIR-V 1.4.
-// TODO(JS): For now we'll use 1.4, but aim for the future for the compiler to determine
+// It is allowed to be 'dynamically uniform within the subgroup' if it's SPIR-V 1.5.
+// TODO(JS): For now we'll use 1.5, but aim for the future for the compiler to determine
 // if the line the is compile constant, and reduce requirement to 1.3
 __generic<T : __BuiltinType>
 __glsl_extension(GL_KHR_shader_subgroup_ballot)
-__spirv_version(1.4)
+__spirv_version(1.5)
 __target_intrinsic(glsl, "subgroupBroadcast($0, $1)")
 __target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)")
 T WaveReadLaneAt(T value, int lane);
 __generic<T : __BuiltinType, let N : int>
-__spirv_version(1.4)
+__spirv_version(1.5)
+__glsl_extension(GL_KHR_shader_subgroup_ballot)
 __target_intrinsic(glsl, "subgroupBroadcast($0, $1)")
 __target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)")
 vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane);
@@ -2740,6 +2741,28 @@ __generic<T : __BuiltinType, let N : int, let M : int>
 __target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)")
 matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> value, int lane);
 
+// NOTE! WaveShuffle is a NON STANDARD HLSL intrinsic! It will map to WaveReadLaneAt on HLSL
+// which means it will only work on hardware which allows arbitrary laneIds which is not true
+// in general because it breaks the HLSL standard, which requires it's 'dynamically uniform' across the Wave.
+__generic<T : __BuiltinType>
+__glsl_extension(GL_KHR_shader_subgroup_shuffle)
+__spirv_version(1.3)
+__target_intrinsic(glsl, "subgroupShuffle($0, $1)")
+__target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)")
+__target_intrinsic(hlsl, "WaveReadLaneAt")
+T WaveShuffle(T value, int lane);
+__generic<T : __BuiltinType, let N : int>
+__glsl_extension(GL_KHR_shader_subgroup_shuffle)
+__spirv_version(1.3)
+__target_intrinsic(glsl, "subgroupShuffle($0, $1)")
+__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)")
+__target_intrinsic(hlsl, "WaveReadLaneAt")
+vector<T,N> WaveShuffle(vector<T,N> value, int lane);
+__generic<T : __BuiltinType, let N : int, let M : int>
+__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)")
+__target_intrinsic(hlsl, "WaveReadLaneAt")
+matrix<T,N,M> WaveShuffle(matrix<T,N,M> value, int lane);
+
 __glsl_extension(GL_KHR_shader_subgroup_ballot)
 __spirv_version(1.3)
 __target_intrinsic(glsl, "subgroupBallotExclusiveBitCount(subgroupBallot($0))")
author	jsmall-nvidia <jsmall@nvidia.com>	2020-03-27 16:16:27 -0400
committer	GitHub <noreply@github.com>	2020-03-27 16:16:27 -0400
commit	e267ce24e37b9b7f98921f75abc150c1463b1d6d (patch)
tree	331660a83ae0b72116b79b5cc8bf7a9c06555db5 /source
parent	5b0b8436123aa2faa9b682ed45efe2bd7edbf01b (diff)