diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-03-09 20:03:42 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-03-10 00:03:42 +0000 |
| commit | 721d2e8a2d457081cd3d9b081979d436b7002c2c (patch) | |
| tree | 57a1872eb9751c5f14a18c810ec219713351bdf6 /source/slang | |
| parent | 7e0aa9315f7f65033229c1f76d7df47ccd2da3d0 (diff) | |
CUDA Wave intrinsic vector/matrix support (#1267)
* Distinguish between __activeMask and _getConvergedMask().
Remove need to pass in mask to CUDA wave impls.
* Add support for vector/matrix Wave intrinsics for CUDA.
Fix issue with CUDA parsing of errors.
* Fix typo.
Make WaveReadLineAt and WaveReadFirst work for vector/matrix types.
* Fix typo.
* Added equality wave intrinsic test.
* Fix some typos
* Added wave-lane-at.slang
Diffstat (limited to 'source/slang')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 39cea9ba3..4b717d540 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2441,10 +2441,10 @@ __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAllEqual($0)") __target_intrinsic(cuda, "_waveAllEqualMultiple($0)") -vector<bool,N> WaveActiveAllEqual(vector<T,N> value); +bool WaveActiveAllEqual(vector<T,N> value); __generic<T : __BuiltinType, let N : int, let M : int> __target_intrinsic(cuda, "_waveAllEqualMultiple($0)") -matrix<bool,N,M> WaveActiveAllEqual(matrix<T,N,M> value); +bool WaveActiveAllEqual(matrix<T,N,M> value); __generic<T : __BuiltinType> uint4 WaveMatch(T value); __generic<T : __BuiltinType, let N : int> uint4 WaveMatch(vector<T,N> value); @@ -2585,21 +2585,27 @@ __generic<T : __BuiltinType, let N : int> __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcastFirst($0)") +__target_intrinsic(cuda, "_waveReadFirstMultiple($0)") vector<T,N> WaveReadLaneFirst(vector<T,N> expr); __generic<T : __BuiltinType, let N : int, let M : int> +__target_intrinsic(cuda, "_waveReadFirstMultiple($0)") matrix<T,N,M> WaveReadLaneFirst(matrix<T,N,M> expr); +// NOTE! On GLSL based targets the lane index *must* be a compile time expression! +// See https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt __generic<T : __BuiltinType> __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") -__target_intrinsic(cuda, "__shfl_sync(_activemask(), $0, $1)") +__target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") T WaveReadLaneAt(T value, int lane); __generic<T : __BuiltinType, let N : int> __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") +__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane); __generic<T : __BuiltinType, let N : int, let M : int> +__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> value, int lane); |
