From 5d3a737e75346b6ced204829a60be2837589e9ad Mon Sep 17 00:00:00 2001 From: Tim Foley Date: Mon, 4 May 2020 09:06:55 -0700 Subject: Make stdlib WaveActive* call WaveMask* (#1336) This change makes the various `WaveActive*()` functions have default implementations that call `WaveMask*()` passing `WaveActiveMask()`. The new definitions will be used during CUDA code generation, which simplifies some of the duplication that was occuring in the `__target_intrinsic` modifiers. This change does *not* add logic to make computation of `WaveGetActiveMask()` corect on CUDA, so these functions will still fail to provide the behavior that users need/expect. A future change will need to add logic to synthesize the value of `WaveGetActiveMask()` automatically. --- source/slang/hlsl.meta.slang | 399 +++++++++++++++++++++++++++++++------------ 1 file changed, 289 insertions(+), 110 deletions(-) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index a84e88ca8..c0dba51e3 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2531,6 +2531,7 @@ __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBallot(true).x") __target_intrinsic(hlsl, "WaveActiveBallot(true).x") +__target_intrinsic(cuda, "__activemask()") // Note: semantically incorrect, but best we can do for now. WaveMask WaveGetActiveMask(); __glsl_extension(GL_KHR_shader_subgroup_basic) @@ -2920,7 +2921,7 @@ matrix WaveMaskReadLaneFirst(WaveMask mask, matrix expr); __generic __target_intrinsic(hlsl, "WaveMatch($1).x") __cuda_sm_version(7.0) -__target_intrinsic(cuda, "_waveMatchScalar($0, $1)") +__target_intrinsic(cuda, "_waveMatchScalar($0, $1).x") WaveMask WaveMaskMatch(WaveMask mask, T value); __generic __target_intrinsic(hlsl, "WaveMatch($1).x") @@ -3016,158 +3017,253 @@ __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAnd($0)") -__target_intrinsic(cuda, "_waveAnd(_getActiveMask(), $0)") -T WaveActiveBitAnd(T expr); +__target_intrinsic(hlsl) +T WaveActiveBitAnd(T expr) +{ + return WaveMaskBitAnd(WaveGetActiveMask(), expr); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAnd($0)") -__target_intrinsic(cuda, "_waveAndMultiple(_getActiveMask(), $0)") -vector WaveActiveBitAnd(vector expr); +__target_intrinsic(hlsl) +vector WaveActiveBitAnd(vector expr) +{ + return WaveMaskBitAnd(WaveGetActiveMask(), expr); +} + __generic -__target_intrinsic(cuda, "_waveAndMultiple(_getActiveMask(), $0)") -matrix WaveActiveBitAnd(matrix expr); +__target_intrinsic(hlsl) +matrix WaveActiveBitAnd(matrix expr) +{ + return WaveMaskBitAnd(WaveGetActiveMask(), expr); +} __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupOr($0)") -__target_intrinsic(cuda, "_waveOr(_getActiveMask(), $0)") -T WaveActiveBitOr(T expr); +__target_intrinsic(hlsl) +T WaveActiveBitOr(T expr) +{ + return WaveMaskBitOr(WaveGetActiveMask(), expr); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupOr($0)") -__target_intrinsic(cuda, "_waveOrMultiple(_getActiveMask(), $0)") -vector WaveActiveBitOr(vector expr); +__target_intrinsic(hlsl) +vector WaveActiveBitOr(vector expr) +{ + return WaveMaskBitOr(WaveGetActiveMask(), expr); +} + __generic -__target_intrinsic(cuda, "_waveOrMultiple(_getActiveMask(), $0)") -matrix WaveActiveBitOr(matrix expr); +__target_intrinsic(hlsl) +matrix WaveActiveBitOr(matrix expr) +{ + return WaveMaskBitOr(WaveGetActiveMask(), expr); +} __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupXor($0)") -__target_intrinsic(cuda, "_waveXor(_getActiveMask(), $0)") -T WaveActiveBitXor(T expr); +__target_intrinsic(hlsl) +T WaveActiveBitXor(T expr) +{ + return WaveMaskBitXor(WaveGetActiveMask(), expr); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupXor($0)") -__target_intrinsic(cuda, "_waveXorMultiple(_getActiveMask(), $0)") -vector WaveActiveBitXor(vector expr); +__target_intrinsic(hlsl) +vector WaveActiveBitXor(vector expr) +{ + return WaveMaskBitXor(WaveGetActiveMask(), expr); +} + __generic -__target_intrinsic(cuda, "_waveXorMultiple(_getActiveMask(), $0)") -matrix WaveActiveBitXor(matrix expr); +__target_intrinsic(hlsl) +matrix WaveActiveBitXor(matrix expr) +{ + return WaveMaskBitXor(WaveGetActiveMask(), expr); +} __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupMax($0)") -__target_intrinsic(cuda, "_waveMax(_getActiveMask(), $0)") -T WaveActiveMax(T expr); +__target_intrinsic(hlsl) +T WaveActiveMax(T expr) +{ + return WaveMaskMax(WaveGetActiveMask(), expr); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupMax($0)") -__target_intrinsic(cuda, "_waveMaxMultiple(_getActiveMask(), $0)") -vector WaveActiveMax(vector expr); +__target_intrinsic(hlsl) +vector WaveActiveMax(vector expr) +{ + return WaveMaskMax(WaveGetActiveMask(), expr); +} + __generic -__target_intrinsic(cuda, "_waveMaxMultiple(_getActiveMask(), $0)") -matrix WaveActiveMax(matrix expr); +__target_intrinsic(hlsl) +matrix WaveActiveMax(matrix expr) +{ + return WaveMaskMax(WaveGetActiveMask(), expr); +} __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupMin($0)") -__target_intrinsic(cuda, "_waveMin(_getActiveMask(), $0)") -T WaveActiveMin(T expr); +__target_intrinsic(hlsl) +T WaveActiveMin(T expr) +{ + return WaveMaskMin(WaveGetActiveMask(), expr); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupMin($0)") -__target_intrinsic(cuda, "_waveMinMultiple(_getActiveMask(), $0)") -vector WaveActiveMin(vector expr); +__target_intrinsic(hlsl) +vector WaveActiveMin(vector expr) +{ + return WaveMaskMin(WaveGetActiveMask(), expr); +} + __generic -__target_intrinsic(cuda, "_waveMinMultiple(_getActiveMask(), $0)") -matrix WaveActiveMin(matrix expr); +__target_intrinsic(hlsl) +matrix WaveActiveMin(matrix expr) +{ + return WaveMaskMin(WaveGetActiveMask(), expr); +} __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupMul($0)") -__target_intrinsic(cuda, "_waveProduct(_getActiveMask(), $0)") -T WaveActiveProduct(T expr); +__target_intrinsic(hlsl) +T WaveActiveProduct(T expr) +{ + return WaveMaskProduct(WaveGetActiveMask(), expr); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupMul($0)") -__target_intrinsic(cuda, "_waveProductMultiple(_getActiveMask(), $0)") -vector WaveActiveProduct(vector expr); +__target_intrinsic(hlsl) +vector WaveActiveProduct(vector expr) +{ + return WaveMaskProduct(WaveGetActiveMask(), expr); +} + __generic -__target_intrinsic(cuda, "_waveProductMultiple(_getActiveMask(), $0)") -matrix WaveActiveProduct(matrix expr); +__target_intrinsic(hlsl) +matrix WaveActiveProduct(matrix expr) +{ + return WaveMaskProduct(WaveGetActiveMask(), expr); +} __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAdd($0)") -__target_intrinsic(cuda, "_waveSum(_getActiveMask(), $0)") -T WaveActiveSum(T expr); +__target_intrinsic(hlsl) +T WaveActiveSum(T expr) +{ + return WaveMaskSum(WaveGetActiveMask(), expr); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAdd($0)") -__target_intrinsic(cuda, "_waveSumMultiple(_getActiveMask(), $0)") -vector WaveActiveSum(vector expr); +__target_intrinsic(hlsl) +vector WaveActiveSum(vector expr) +{ + return WaveMaskSum(WaveGetActiveMask(), expr); +} + __generic -__target_intrinsic(cuda, "_waveSumMultiple(_getActiveMask(), $0)") -matrix WaveActiveSum(matrix expr); +__target_intrinsic(hlsl) +matrix WaveActiveSum(matrix expr) +{ + return WaveMaskSum(WaveGetActiveMask(), expr); +} __generic __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAllEqual($0)") -__cuda_sm_version(7.0) -__target_intrinsic(cuda, "_waveAllEqual(_getActiveMask(), $0)") -bool WaveActiveAllEqual(T value); +__target_intrinsic(hlsl) +bool WaveActiveAllEqual(T value) +{ + return WaveMaskAllEqual(WaveGetActiveMask(), value); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAllEqual($0)") -__cuda_sm_version(7.0) -__target_intrinsic(cuda, "_waveAllEqualMultiple(_getActiveMask(), $0)") -bool WaveActiveAllEqual(vector value); -__generic -__cuda_sm_version(7.0) -__target_intrinsic(cuda, "_waveAllEqualMultiple(_getActiveMask(), $0)") -bool WaveActiveAllEqual(matrix value); - +__target_intrinsic(hlsl) +bool WaveActiveAllEqual(vector value) +{ + return WaveMaskAllEqual(WaveGetActiveMask(), value); +} +__generic +__target_intrinsic(hlsl) +bool WaveActiveAllEqual(matrix value) +{ + return WaveMaskAllEqual(WaveGetActiveMask(), value); +} __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAll($0)") -__target_intrinsic(cuda, "(__all_sync(__activemask(), $0) != 0)") -bool WaveActiveAllTrue(bool condition); +__target_intrinsic(hlsl) +bool WaveActiveAllTrue(bool condition) +{ + return WaveMaskAllTrue(WaveGetActiveMask(), condition); +} __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAny($0)") -__target_intrinsic(cuda, "(__any_sync(__activemask(), $0) != 0)") -bool WaveActiveAnyTrue(bool condition); +__target_intrinsic(hlsl) +bool WaveActiveAnyTrue(bool condition) +{ + return WaveMaskAnyTrue(WaveGetActiveMask(), condition); +} __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBallot($0)") -__target_intrinsic(cuda, "make_uint4(__ballot_sync(__activemask(), $0), 0, 0, 0)") -uint4 WaveActiveBallot(bool condition); +__target_intrinsic(hlsl) +uint4 WaveActiveBallot(bool condition) +{ + return WaveMaskBallot(WaveGetActiveMask(), condition); +} __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "bitCount(subgroupBallot($0))") -__target_intrinsic(cuda, "__popc(__ballot_sync(__activemask(), $0))") -uint WaveActiveCountBits(bool value); +__target_intrinsic(hlsl) +uint WaveActiveCountBits(bool value) +{ + return WaveMaskCountBits(WaveGetActiveMask(), value); +} __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) @@ -3184,8 +3280,11 @@ uint WaveGetLaneIndex(); __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupElect()") -__target_intrinsic(cuda, "_waveIsFirstLane()") -bool WaveIsFirstLane(); +__target_intrinsic(hlsl) +bool WaveIsFirstLane() +{ + return WaveMaskIsFirstLane(WaveGetActiveMask()); +} // Prefix @@ -3193,49 +3292,83 @@ __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveMul($0)") -__target_intrinsic(cuda, "_wavePrefixProduct(_getActiveMask(), $0)") -T WavePrefixProduct(T expr); +__target_intrinsic(hlsl) +T WavePrefixProduct(T expr) +{ + return WaveMaskPrefixProduct(WaveGetActiveMask(), expr); +} + + __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveMul($0)") -__target_intrinsic(cuda, "_wavePrefixProductMultiple(_getActiveMask(), $0)") -vector WavePrefixProduct(vector expr); +__target_intrinsic(hlsl) +vector WavePrefixProduct(vector expr) +{ + return WaveMaskPrefixProduct(WaveGetActiveMask(), expr); +} + __generic -__target_intrinsic(cuda, "_wavePrefixProductMultiple(_getActiveMask(), $0)") -matrix WavePrefixProduct(matrix expr); +__target_intrinsic(hlsl) +matrix WavePrefixProduct(matrix expr) +{ + return WaveMaskPrefixProduct(WaveGetActiveMask(), expr); +} __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveAdd($0)") -__target_intrinsic(cuda, "_wavePrefixSum(_getActiveMask(), $0)") -T WavePrefixSum(T expr); +__target_intrinsic(hlsl) +T WavePrefixSum(T expr) +{ + return WaveMaskPrefixSum(WaveGetActiveMask(), expr); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveAdd($0)") -__target_intrinsic(cuda, "_wavePrefixSumMultiple(_getActiveMask(), $0)") -vector WavePrefixSum(vector expr); +__target_intrinsic(hlsl) +vector WavePrefixSum(vector expr) +{ + return WaveMaskPrefixSum(WaveGetActiveMask(), expr); +} + __generic -__target_intrinsic(cuda, "_wavePrefixSumMultiple(_getActiveMask(), $0)") -matrix WavePrefixSum(matrix expr); +__target_intrinsic(hlsl) +matrix WavePrefixSum(matrix expr) +{ + return WaveMaskPrefixSum(WaveGetActiveMask(), expr); +} __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcastFirst($0)") -__target_intrinsic(cuda, "_waveReadFirst(_getActiveMask(), $0)") -T WaveReadLaneFirst(T expr); +__target_intrinsic(hlsl) +T WaveReadLaneFirst(T expr) +{ + return WaveMaskReadLaneFirst(WaveGetActiveMask(), expr); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcastFirst($0)") -__target_intrinsic(cuda, "_waveReadFirstMultiple(_getActiveMask(), $0)") -vector WaveReadLaneFirst(vector expr); +__target_intrinsic(hlsl) +vector WaveReadLaneFirst(vector expr) +{ + return WaveMaskReadLaneFirst(WaveGetActiveMask(), expr); +} + __generic -__target_intrinsic(cuda, "_waveReadFirstMultiple(_getActiveMask(), $0)") -matrix WaveReadLaneFirst(matrix expr); +__target_intrinsic(hlsl) +matrix WaveReadLaneFirst(matrix expr) +{ + return WaveMaskReadLaneFirst(WaveGetActiveMask(), expr); +} // NOTE! WaveBroadcastLaneAt is *NOT* standard HLSL // It is provided as access to subgroupBroadcast which can only take a @@ -3247,20 +3380,29 @@ __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") -__target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") __target_intrinsic(hlsl, "WaveReadLaneAt") -T WaveBroadcastLaneAt(T value, constexpr int lane); +T WaveBroadcastLaneAt(T value, constexpr int lane) +{ + return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, lane); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") -__target_intrinsic(cuda, "_waveShuffleMultiple(_getActiveMask(), $0, $1)") __target_intrinsic(hlsl, "WaveReadLaneAt") -vector WaveBroadcastLaneAt(vector value, constexpr int lane); +vector WaveBroadcastLaneAt(vector value, constexpr int lane) +{ + return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, lane); +} + __generic __target_intrinsic(cuda, "_waveShuffleMultiple(_getActiveMask(), $0, $1)") __target_intrinsic(hlsl, "WaveReadLaneAt") -matrix WaveBroadcastLaneAt(matrix value, constexpr int lane); +matrix WaveBroadcastLaneAt(matrix value, constexpr int lane) +{ + return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, lane); +} // TODO(JS): If it can be determines that the `laneId` is constExpr, then subgroupBroadcast // could be used on GLSL. For now we just use subgroupShuffle @@ -3268,17 +3410,29 @@ __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupShuffle($0, $1)") -__target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") -T WaveReadLaneAt(T value, int lane); +__target_intrinsic(hlsl) +T WaveReadLaneAt(T value, int lane) +{ + return WaveMaskReadLaneAt(WaveGetActiveMask(), value, lane); +} + __generic __spirv_version(1.3) __glsl_extension(GL_KHR_shader_subgroup_shuffle) __target_intrinsic(glsl, "subgroupShuffle($0, $1)") -__target_intrinsic(cuda, "_waveShuffleMultiple(_getActiveMask(), $0, $1)") -vector WaveReadLaneAt(vector value, int lane); +__target_intrinsic(hlsl) +vector WaveReadLaneAt(vector value, int lane) +{ + return WaveMaskReadLaneAt(WaveGetActiveMask(), value, lane); +} + __generic __target_intrinsic(cuda, "_waveShuffleMultiple(_getActiveMask(), $0, $1)") -matrix WaveReadLaneAt(matrix value, int lane); +__target_intrinsic(hlsl) +matrix WaveReadLaneAt(matrix value, int lane) +{ + return WaveMaskReadLaneAt(WaveGetActiveMask(), value, lane); +} // NOTE! WaveShuffle is a NON STANDARD HLSL intrinsic! It will map to WaveReadLaneAt on HLSL // which means it will only work on hardware which allows arbitrary laneIds which is not true @@ -3287,27 +3441,37 @@ __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupShuffle($0, $1)") -__target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") __target_intrinsic(hlsl, "WaveReadLaneAt") -T WaveShuffle(T value, int lane); +T WaveShuffle(T value, int lane) +{ + return WaveMaskShuffle(WaveGetActiveMask(), value, lane); +} + __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupShuffle($0, $1)") -__target_intrinsic(cuda, "_waveShuffleMultiple(_getActiveMask(), $0, $1)") __target_intrinsic(hlsl, "WaveReadLaneAt") -vector WaveShuffle(vector value, int lane); +vector WaveShuffle(vector value, int lane) +{ + return WaveMaskShuffle(WaveGetActiveMask(), value, lane); +} + __generic -__target_intrinsic(cuda, "_waveShuffleMultiple(_getActiveMask(), $0, $1)") __target_intrinsic(hlsl, "WaveReadLaneAt") -matrix WaveShuffle(matrix value, int lane); +matrix WaveShuffle(matrix value, int lane) +{ + return WaveMaskShuffle(WaveGetActiveMask(), value, lane); +} __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBallotExclusiveBitCount(subgroupBallot($0))") -__target_intrinsic(cuda, "__popc(__ballot_sync(__activemask(), $0) & _getLaneLtMask())") -uint WavePrefixCountBits(bool value); - +__target_intrinsic(hlsl) +uint WavePrefixCountBits(bool value) +{ + return WaveMaskPrefixCountBits(WaveGetActiveMask(), value); +} __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) @@ -3327,19 +3491,24 @@ uint4 WaveGetActiveMulti(); __generic __target_intrinsic(hlsl) -__cuda_sm_version(7.0) -__target_intrinsic(cuda, "_waveMatchScalar(_getActiveMask(), $0)") -uint4 WaveMatch(T value); +uint4 WaveMatch(T value) +{ + return WaveMaskMatch(WaveGetActiveMask(), value); +} + __generic __target_intrinsic(hlsl) -__cuda_sm_version(7.0) -__target_intrinsic(cuda, "_waveMatchMultiple(_getActiveMask(), $0)") -uint4 WaveMatch(vector value); +uint4 WaveMatch(vector value) +{ + return WaveMaskMatch(WaveGetActiveMask(), value); +} + __generic __target_intrinsic(hlsl) -__cuda_sm_version(7.0) -__target_intrinsic(cuda, "_waveMatchMultiple(_getActiveMask(), $0)") -uint4 WaveMatch(matrix value); +uint4 WaveMatch(matrix value) +{ + return WaveMaskMatch(WaveGetActiveMask(), value); +} __target_intrinsic(hlsl) __target_intrinsic(cuda, "_popc(__ballot_sync(($1).x, $0) & _getLaneLtMask())") @@ -3352,6 +3521,7 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveAnd($0)") __target_intrinsic(cuda, "_wavePrefixAnd(_getMultiPrefixMask(($1).x), $0)") T WaveMultiPrefixBitAnd(T expr, uint4 mask); + __target_intrinsic(hlsl) __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) @@ -3359,6 +3529,7 @@ __target_intrinsic(glsl, "subgroupExclusiveAnd($0)") __target_intrinsic(cuda, "_wavePrefixAndMultiple(_getMultiPrefixMask(($1).x), $0)") __generic vector WaveMultiPrefixBitAnd(vector expr, uint4 mask); + __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixAndMultiple(_getMultiPrefixMask(($1).x), $0)") @@ -3371,6 +3542,7 @@ __spirv_version(1.3) //__target_intrinsic(glsl, "subgroupExclusiveOr($0)") __target_intrinsic(cuda, "_wavePrefixOr(, _getMultiPrefixMask(($1).x), $0)") T WaveMultiPrefixBitOr(T expr, uint4 mask); + __generic __target_intrinsic(hlsl) __glsl_extension(GL_KHR_shader_subgroup_arithmetic) @@ -3378,6 +3550,7 @@ __spirv_version(1.3) //__target_intrinsic(glsl, "subgroupExclusiveOr($0)") __target_intrinsic(cuda, "_wavePrefixOrMultiple(_getMultiPrefixMask(($1).x), $0)") vector WaveMultiPrefixBitOr(vector expr, uint4 mask); + __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixOrMultiple(_getMultiPrefixMask(($1).x), $0)") @@ -3390,6 +3563,7 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveXor($0)") __target_intrinsic(cuda, "_wavePrefixXor(_getMultiPrefixMask(($1).x), $0)") T WaveMultiPrefixBitXor(T expr, uint4 mask); + __generic __target_intrinsic(hlsl) __glsl_extension(GL_KHR_shader_subgroup_arithmetic) @@ -3397,6 +3571,7 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveXor($0)") __target_intrinsic(cuda, "_wavePrefixXorMultiple(_getMultiPrefixMask(($1).x), $0)") vector WaveMultiPrefixBitXor(vector expr, uint4 mask); + __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixXorMultiple(_getMultiPrefixMask(($1).x), $0)") @@ -3406,10 +3581,12 @@ __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixProduct(_getMultiPrefixMask(($1).x), $0)") T WaveMultiPrefixProduct(T value, uint4 mask); + __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixProductMultiple(_getMultiPrefixMask(($1).x), $0)") vector WaveMultiPrefixProduct(vector value, uint4 mask); + __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixProductMultiple(_getMultiPrefixMask(($1).x), $0)") @@ -3419,10 +3596,12 @@ __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixSum(_getMultiPrefixMask(($1).x), $0)") T WaveMultiPrefixSum(T value, uint4 mask); + __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixSumMultiple(_getMultiPrefixMask(($1).x), $0 )") vector WaveMultiPrefixSum(vector value, uint4 mask); + __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixSumMultiple(_getMultiPrefixMask(($1).x), $0)") -- cgit v1.2.3