diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-03-27 18:35:06 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-03-27 22:35:06 +0000 |
| commit | 6f43b2698a99cc4f4bb4e905749fb87f24bf391b (patch) | |
| tree | 567927f4e36ee42481c200ca4caa8a7ea47e3150 /source | |
| parent | e267ce24e37b9b7f98921f75abc150c1463b1d6d (diff) | |
WaveBroadcastAt/WaveShuffle (#1299)
* Support for WaveReadLaneAt with dynamic (but uniform across Wave) on Vk by enabling VK1.4.
Fixed wave-lane-at.slang test to test with laneId that is uniform across the Wave.
* Added WaveShuffle intrinsic.
Test for WaveShuffle intrinsic.
* Added some documentation on WaveShuffle
* Fix that version required for subgroupBroadcast to be non constexpr is actually 1.5
* Added WaveBroadcastLaneAt
Documented WaveShuffle/BroadcastLaneAt/ReadLaneAt
* Update docs around WaveBroadcast/Read/Shuffle.
Use '_waveShuffle` as name in CUDA prelude to better describe it's more flexible behavior.
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 46 |
1 files changed, 34 insertions, 12 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index e29e47581..e2e745773 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2720,25 +2720,47 @@ __generic<T : __BuiltinType, let N : int, let M : int> __target_intrinsic(cuda, "_waveReadFirstMultiple($0)") matrix<T,N,M> WaveReadLaneFirst(matrix<T,N,M> expr); -// NOTE! On GLSL based targets the lane index *must* be a compile time expression! -// See https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt -// It is allowed to be 'dynamically uniform within the subgroup' if it's SPIR-V 1.5. -// TODO(JS): For now we'll use 1.5, but aim for the future for the compiler to determine -// if the line the is compile constant, and reduce requirement to 1.3 +// NOTE! WaveBroadcastLaneAt is *NOT* standard HLSL +// It is provided as access to subgroupBroadcast which can only take a +// constexpr laneId. +// https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt +// Versions SPIR-V greater than 1.4 loosen this restriction, and allow 'dynamic uniform' index +// If that's the behavior required then client code should use WaveReadLaneAt which works this way. __generic<T : __BuiltinType> __glsl_extension(GL_KHR_shader_subgroup_ballot) -__spirv_version(1.5) +__spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") __target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") -T WaveReadLaneAt(T value, int lane); +__target_intrinsic(hlsl, "WaveReadLaneAt") +T WaveBroadcastLaneAt(T value, constexpr int lane); __generic<T : __BuiltinType, let N : int> -__spirv_version(1.5) __glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") -__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") +__target_intrinsic(cuda, "_waveShuffleMultiple($0, $1)") +__target_intrinsic(hlsl, "WaveReadLaneAt") +vector<T,N> WaveBroadcastLaneAt(vector<T,N> value, constexpr int lane); +__generic<T : __BuiltinType, let N : int, let M : int> +__target_intrinsic(cuda, "_waveShuffleMultiple($0, $1)") +__target_intrinsic(hlsl, "WaveReadLaneAt") +matrix<T,N,M> WaveBroadcastLaneAt(matrix<T,N,M> value, constexpr int lane); + +// TODO(JS): If it can be determines that the `laneId` is constExpr, then subgroupBroadcast +// could be used on GLSL. For now we just use subgroupShuffle +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_shuffle) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupShuffle($0, $1)") +__target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") +T WaveReadLaneAt(T value, int lane); +__generic<T : __BuiltinType, let N : int> +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_shuffle) +__target_intrinsic(glsl, "subgroupShuffle($0, $1)") +__target_intrinsic(cuda, "_waveShuffleMultiple($0, $1)") vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane); __generic<T : __BuiltinType, let N : int, let M : int> -__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") +__target_intrinsic(cuda, "_waveShuffleMultiple($0, $1)") matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> value, int lane); // NOTE! WaveShuffle is a NON STANDARD HLSL intrinsic! It will map to WaveReadLaneAt on HLSL @@ -2755,11 +2777,11 @@ __generic<T : __BuiltinType, let N : int> __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupShuffle($0, $1)") -__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") +__target_intrinsic(cuda, "_waveShuffleMultiple($0, $1)") __target_intrinsic(hlsl, "WaveReadLaneAt") vector<T,N> WaveShuffle(vector<T,N> value, int lane); __generic<T : __BuiltinType, let N : int, let M : int> -__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") +__target_intrinsic(cuda, "_waveShuffleMultiple($0, $1)") __target_intrinsic(hlsl, "WaveReadLaneAt") matrix<T,N,M> WaveShuffle(matrix<T,N,M> value, int lane); |
