From 721d2e8a2d457081cd3d9b081979d436b7002c2c Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Mon, 9 Mar 2020 20:03:42 -0400 Subject: CUDA Wave intrinsic vector/matrix support (#1267) * Distinguish between __activeMask and _getConvergedMask(). Remove need to pass in mask to CUDA wave impls. * Add support for vector/matrix Wave intrinsics for CUDA. Fix issue with CUDA parsing of errors. * Fix typo. Make WaveReadLineAt and WaveReadFirst work for vector/matrix types. * Fix typo. * Added equality wave intrinsic test. * Fix some typos * Added wave-lane-at.slang --- source/slang/hlsl.meta.slang | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'source') diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 39cea9ba3..4b717d540 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2441,10 +2441,10 @@ __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAllEqual($0)") __target_intrinsic(cuda, "_waveAllEqualMultiple($0)") -vector WaveActiveAllEqual(vector value); +bool WaveActiveAllEqual(vector value); __generic __target_intrinsic(cuda, "_waveAllEqualMultiple($0)") -matrix WaveActiveAllEqual(matrix value); +bool WaveActiveAllEqual(matrix value); __generic uint4 WaveMatch(T value); __generic uint4 WaveMatch(vector value); @@ -2585,21 +2585,27 @@ __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcastFirst($0)") +__target_intrinsic(cuda, "_waveReadFirstMultiple($0)") vector WaveReadLaneFirst(vector expr); __generic +__target_intrinsic(cuda, "_waveReadFirstMultiple($0)") matrix WaveReadLaneFirst(matrix expr); +// NOTE! On GLSL based targets the lane index *must* be a compile time expression! +// See https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") -__target_intrinsic(cuda, "__shfl_sync(_activemask(), $0, $1)") +__target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") T WaveReadLaneAt(T value, int lane); __generic __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") +__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") vector WaveReadLaneAt(vector value, int lane); __generic +__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") matrix WaveReadLaneAt(matrix value, int lane); -- cgit v1.2.3