From 7de5f63225cde20401da7c1c69b00d0b7dc8d89f Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Tue, 21 Apr 2020 09:32:21 -0400 Subject: WaveMask remaining intrinsics and tests (#1327) * Fix issues in wave-mask/wave.slang tests. WaveGetActiveMask -> WaveGetConvergedMask. Update target-compatibility.md * First pass at wave-intrinsics.md documentation. Write up around WaveMaskSharedSync. * Added more of the Wave intrinsics as WaveMask intrinsics. Improvements to documentation around wave-intrinsics. * Add the Wave intrinsics for SM6.5 for WaveMask Expand WaveMask intrinsics Improve WaveMask documentation * Added WaveMaskIsFirstLane. Co-authored-by: Tim Foley --- docs/wave-intrinsics.md | 195 +++++++++++++++++++++ source/slang/hlsl.meta.slang | 88 +++++++++- .../wave-mask/wave-active-product.slang | 28 +++ .../wave-active-product.slang.expected.txt | 0 .../wave-mask/wave-broadcast-lane-at-vk.slang | 30 ++++ .../wave-broadcast-lane-at-vk.slang.expected.txt | 4 + .../wave-mask/wave-broadcast-lane-at.slang | 43 +++++ .../wave-broadcast-lane-at.slang.expected.txt | 4 + tests/hlsl-intrinsic/wave-mask/wave-diverge.slang | 32 ++++ .../wave-mask/wave-diverge.slang.expected.txt | 4 + tests/hlsl-intrinsic/wave-mask/wave-equality.slang | 33 ++++ .../wave-mask/wave-equality.slang.expected.txt | 4 + .../wave-mask/wave-is-first-lane.slang | 28 +++ .../wave-is-first-lane.slang.expected.txt | 16 ++ .../wave-mask/wave-mask-prefix.slang | 26 +++ .../wave-mask/wave-mask-prefix.slang.expected.txt | 8 + tests/hlsl-intrinsic/wave-mask/wave-matrix.slang | 39 +++++ .../wave-mask/wave-matrix.slang.expected.txt | 8 + .../wave-mask/wave-prefix-product.slang | 27 +++ .../wave-prefix-product.slang.expected.txt | 8 + .../hlsl-intrinsic/wave-mask/wave-prefix-sum.slang | 25 +++ .../wave-mask/wave-prefix-sum.slang.expected.txt | 8 + .../wave-mask/wave-read-lane-at-vk.slang | 46 +++++ .../wave-read-lane-at-vk.slang.expected.txt | 4 + .../wave-mask/wave-read-lane-at.slang | 56 ++++++ .../wave-mask/wave-read-lane-at.slang.expected.txt | 4 + .../hlsl-intrinsic/wave-mask/wave-shuffle-vk.slang | 34 ++++ .../wave-mask/wave-shuffle-vk.slang.expected.txt | 4 + tests/hlsl-intrinsic/wave-mask/wave-shuffle.slang | 44 +++++ .../wave-mask/wave-shuffle.slang.expected.txt | 4 + tests/hlsl-intrinsic/wave-mask/wave-vector.slang | 31 ++++ .../wave-mask/wave-vector.slang.expected.txt | 8 + 32 files changed, 889 insertions(+), 4 deletions(-) create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-active-product.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-active-product.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at-vk.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at-vk.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-diverge.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-diverge.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-equality.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-equality.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-is-first-lane.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-is-first-lane.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-mask-prefix.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-mask-prefix.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-matrix.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-matrix.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-prefix-product.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-prefix-product.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-prefix-sum.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-prefix-sum.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-read-lane-at-vk.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-read-lane-at-vk.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-read-lane-at.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-read-lane-at.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-shuffle-vk.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-shuffle-vk.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-shuffle.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-shuffle.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-vector.slang create mode 100644 tests/hlsl-intrinsic/wave-mask/wave-vector.slang.expected.txt diff --git a/docs/wave-intrinsics.md b/docs/wave-intrinsics.md index 6a63d628c..eb670bf48 100644 --- a/docs/wave-intrinsics.md +++ b/docs/wave-intrinsics.md @@ -5,6 +5,110 @@ Slang has support for Wave intrinsics introduced to HLSL in SM6.0 and SM6.5. All Another wrinkle in compatibility is that on GLSL targets such as Vulkan, the is not built in language support for Matrix versions of Wave intrinsics. Currently this means that Matrix is not a supported type for Wave intrinsics on Vulkan, but may be in the future. +The Wave Intrinsics supported on Slang are listed below. Note that typically T generic types also include vector and matrix forms. + +``` + +// Lane info + +uint WaveGetLaneCount(); + +uint WaveGetLaneIndex(); + +bool WaveIsFirstLane(); + +// Ballot + +bool WaveActiveAllTrue(bool condition); + +bool WaveActiveAnyTrue(bool condition); + +uint4 WaveActiveBallot(bool condition); + +uint WaveActiveCountBits(bool value); + +// Barriers + +void AllMemoryBarrierWithWaveSync(); + +void GroupMemoryBarrierWithWaveSync(); + +// Across Lanes + +__generic +T WaveActiveBitAnd(T expr); + +__generic +T WaveActiveBitOr(T expr); + +__generic +T WaveActiveBitXor(T expr); + +__generic +T WaveActiveMax(T expr); + +__generic +T WaveActiveMin(T expr); + +__generic +T WaveActiveProduct(T expr); + +__generic +T WaveActiveSum(T expr); + +__generic +bool WaveActiveAllEqual(T value); + +// Prefix + +__generic +T WavePrefixProduct(T expr); + +__generic +T WavePrefixSum(T expr); + +// Communication + +__generic +T WaveReadLaneFirst(T expr); + +__generic +T WaveBroadcastLaneAt(T value, constexpr int lane); + +__generic +T WaveReadLaneAt(T value, int lane); + +__generic +T WaveShuffle(T value, int lane); + +// Prefix + +uint WavePrefixCountBits(bool value); + +// Shader model 6.5 stuff +// https://github.com/microsoft/DirectX-Specs/blob/master/d3d/HLSL_ShaderModel6_5.md + +__generic +uint4 WaveMatch(T value); + +uint WaveMultiPrefixCountBits(bool value, uint4 mask); + +__generic +T WaveMultiPrefixBitAnd(T expr, uint4 mask); + +__generic +T WaveMultiPrefixBitOr(T expr, uint4 mask); + +__generic +T WaveMultiPrefixBitXor(T expr, uint4 mask); + +__generic +T WaveMultiPrefixProduct(T value, uint4 mask); + +__generic +T WaveMultiPrefixSum(T value, uint4 mask); +``` + Additional Wave Intrinsics ========================== @@ -39,6 +143,9 @@ void GroupMemoryBarrierWithWaveSync(); Synchronizes all lanes to the same GroupMemoryBarrierWithWaveSync in program flow. Orders group shared memory accesses such that accesses after the barrier can be seen by writes before. + + + Wave Mask Intrinsics ==================== @@ -70,5 +177,93 @@ void GroupMemoryBarrierWithWaveMaskSync(WaveMask mask); Same as GroupMemoryBarrierWithWaveSync but takes a mask of active lanes to sync with. +The intrinsics that make up the Slang `WaveMask` extension. +``` +// Lane info + +WaveMask WaveGetConvergedMask(); + +bool WaveMaskIsFirstLane(WaveMask mask); + +// Ballot + +bool WaveMaskAllTrue(WaveMask mask, bool condition); + +bool WaveMaskAnyTrue(WaveMask mask, bool condition); + +WaveMask WaveMaskBallot(WaveMask mask, bool condition); + +WaveMask WaveMaskCountBits(WaveMask mask, bool value); + +__generic +WaveMask WaveMaskMatch(WaveMask mask, T value); + +// Barriers + +void AllMemoryBarrierWithWaveMaskSync(WaveMask mask); + +void GroupMemoryBarrierWithWaveMaskSync(WaveMask mask); + +// Across lane ops + +__generic +T WaveMaskBitAnd(WaveMask mask, T expr); + +__generic +T WaveMaskBitOr(WaveMask mask, T expr); + +__generic +T WaveMaskBitXor(WaveMask mask, T expr); + +__generic +T WaveMaskMax(WaveMask mask, T expr); + +__generic +T WaveMaskMin(WaveMask mask, T expr); + +__generic +T WaveMaskProduct(WaveMask mask, T expr); + +__generic +T WaveMaskSum(WaveMask mask, T expr); + +__generic +bool WaveMaskAllEqual(WaveMask mask, T value); + +// Prefix + +__generic +T WaveMaskPrefixProduct(WaveMask mask, T expr); + +__generic +T WaveMaskPrefixSum(WaveMask mask, T expr); + +__generic +T WaveMaskPrefixBitAnd(WaveMask mask, T expr); + +__generic +T WaveMaskPrefixBitOr(WaveMask mask, T expr); + +__generic +T WaveMaskPrefixBitXor(WaveMask mask, T expr); + +uint WaveMaskPrefixCountBits(WaveMask mask, bool value); + +// Communication + +__generic +T WaveMaskReadLaneFirst(WaveMask mask, T expr); + +__generic +T WaveMaskBroadcastLaneAt(WaveMask mask, T value, constexpr int lane); + +__generic +_ WaveMaskReadLaneAt(WaveMask mask, T value, int lane); +_ +__generic +T WaveMaskShuffle(WaveMask mask, T value, int lane); + +``` + \ No newline at end of file diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index e9da539bf..55c66ffc0 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2485,13 +2485,20 @@ matrix trunc(matrix x) MATRIX_MAP_UNARY(T, N, M, trunc, x); } -// Slang Specific Mask Wave Intrinsics +// Slang Specific 'Mask' Wave Intrinsics typedef uint WaveMask; __target_intrinsic(cuda, "__activemask()") WaveMask WaveGetConvergedMask() { return 0xffffffff; } +__glsl_extension(GL_KHR_shader_subgroup_basic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupElect()") +__target_intrinsic(cuda, "(($0 & -$0) == (WarpMask(1) << _getLaneId()))") +__target_intrinsic(hlsl, "WaveIsFirstLane()") +bool WaveMaskIsFirstLane(WaveMask mask); + __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAll($1)") @@ -2564,7 +2571,6 @@ __target_intrinsic(glsl, "subgroupBarrier()") __target_intrinsic(hlsl, "GroupMemoryBarrier()") void GroupMemoryBarrierWithWaveMaskSync(WaveMask mask); - __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBarrier()") @@ -2620,7 +2626,7 @@ __target_intrinsic(cuda, "_waveShuffleMultiple($0, $1, $2)") __target_intrinsic(hlsl, "WaveReadLaneAt($1, $2)") vector WaveMaskReadLaneAt(WaveMask mask, vector value, int lane); __generic -__target_intrinsic(cuda, "_waveShuffleMultiple($0, $1)") +__target_intrinsic(cuda, "_waveShuffleMultiple($0, $1, $2)") __target_intrinsic(hlsl, "WaveReadLaneAt($1, $2)") matrix WaveMaskReadLaneAt(WaveMask mask, matrix value, int lane); @@ -2689,7 +2695,7 @@ __target_intrinsic(cuda, "_waveOrMultiple($0, $1)") __target_intrinsic(hlsl, "WaveActiveBitOr($1)") vector WaveMaskBitOr(WaveMask mask, vector expr); __generic -__target_intrinsic(cuda, "_waveOrMultiple(_$0, $1)") +__target_intrinsic(cuda, "_waveOrMultiple($0, $1)") __target_intrinsic(hlsl, "WaveActiveBitOr($1)") matrix WaveMaskBitOr(WaveMask mask, matrix expr); @@ -2866,8 +2872,82 @@ __generic __target_intrinsic(cuda, "_waveReadFirstMultiple($0, $1)") matrix WaveMaskReadLaneFirst(WaveMask mask, matrix expr); +// WaveMask SM6.5 like intrinsics +// TODO(JS): On HLSL it only works for 32 bits or less +__generic +__target_intrinsic(hlsl, "WaveMatch($1).x") +__cuda_sm_version(7.0) +__target_intrinsic(cuda, "_waveMatchScalar($0, $1)") +WaveMask WaveMaskMatch(WaveMask mask, T value); +__generic +__target_intrinsic(hlsl, "WaveMatch($1).x") +__cuda_sm_version(7.0) +__target_intrinsic(cuda, "_waveMatchMultiple($0, $1)") +WaveMask WaveMaskMatch(WaveMask mask, vector value); +__generic +__target_intrinsic(hlsl, "WaveMatch($1).x") +__cuda_sm_version(7.0) +__target_intrinsic(cuda, "_waveMatchMultiple($0, $1)") +WaveMask WaveMaskMatch(WaveMask mask, matrix value); + +__generic +__target_intrinsic(hlsl, "WaveMultiPrefixBitAnd($1, uint4($0, 0, 0, 0))") +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +//__target_intrinsic(glsl, "subgroupExclusiveAnd($1)") +__target_intrinsic(cuda, "_wavePrefixAnd($0, $1)") +T WaveMaskPrefixBitAnd(WaveMask mask, T expr); +__target_intrinsic(hlsl, "WaveMultiPrefixBitAnd($1, uint4($0, 0, 0, 0))") +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupExclusiveAnd($1)") +__target_intrinsic(cuda, "_wavePrefixAndMultiple($0, $1)") +__generic +vector WaveMaskPrefixBitAnd(WaveMask mask, vector expr); +__generic +__target_intrinsic(hlsl, "WaveMultiPrefixBitAnd($1, uint4($0, 0, 0, 0))") +__target_intrinsic(cuda, "_wavePrefixAndMultiple(_getMultiPrefixMask($0, $1)") +matrix WaveMaskPrefixBitAnd(WaveMask mask, matrix expr); + +__generic +__target_intrinsic(hlsl, "WaveMultiPrefixBitOr($1, uint4($0, 0, 0, 0))") +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +//__target_intrinsic(glsl, "subgroupExclusiveOr($1)") +__target_intrinsic(cuda, "_wavePrefixOr($0, $1)") +T WaveMaskPrefixBitOr(WaveMask mask, T expr); +__generic +__target_intrinsic(hlsl, "WaveMultiPrefixBitOr($1, uint4($0, 0, 0, 0))") +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +//__target_intrinsic(glsl, "subgroupExclusiveOr($1)") +__target_intrinsic(cuda, "_wavePrefixOrMultiple($0, $1)") +vector WaveMaskPrefixBitOr(WaveMask mask, vector expr); +__generic +__target_intrinsic(hlsl, "WaveMultiPrefixBitOr($1, uint4($0, 0, 0, 0))") +__target_intrinsic(cuda, "_wavePrefixOrMultiple($0, $1)") +matrix WaveMaskPrefixBitOr(WaveMask mask, matrix expr); + +__generic +__target_intrinsic(hlsl, "WaveMultiPrefixBitXor($1, uint4($0, 0, 0, 0))") +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupExclusiveXor($1)") +__target_intrinsic(cuda, "_wavePrefixXor($0, $1)") +T WaveMaskPrefixBitXor(WaveMask mask, T expr); +__generic +__target_intrinsic(hlsl, "WaveMultiPrefixBitXor($1, uint4($0, 0, 0, 0))") +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupExclusiveXor($1)") +__target_intrinsic(cuda, "_wavePrefixXorMultiple($0, $1)") +vector WaveMaskPrefixBitXor(WaveMask mask, vector expr); +__generic +__target_intrinsic(hlsl, "WaveMultiPrefixBitXor($1, uint4($0, 0, 0, 0))") +__target_intrinsic(cuda, "_wavePrefixXorMultiple($0, $1)") +matrix WaveMaskPrefixBitXor(WaveMask mask, matrix expr); // Shader model 6.0 stuff diff --git a/tests/hlsl-intrinsic/wave-mask/wave-active-product.slang b/tests/hlsl-intrinsic/wave-mask/wave-active-product.slang new file mode 100644 index 000000000..3135586a5 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-active-product.slang @@ -0,0 +1,28 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const int idx = int(dispatchThreadID.x); + const WaveMask mask0 = 0xff; + + const WaveMask mask1 = WaveMaskBallot(mask0, idx < 3); + + if (idx < 3) + { + // Diverge!! + outputBuffer[idx] = -1; + return; + } + + const WaveMask mask2 = mask0 & ~mask1; + + outputBuffer[idx] = WaveMaskProduct(mask2, idx); +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-active-product.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-active-product.slang.expected.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at-vk.slang new file mode 100644 index 000000000..94a44df97 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at-vk.slang @@ -0,0 +1,30 @@ +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const WaveMask mask = 0xf; + + int idx = int(dispatchThreadID.x); + + int value = 0; + + // Scalar + + value += WaveMaskBroadcastLaneAt(mask, idx, 1); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveMaskBroadcastLaneAt(mask, v, 4 & 3); + + value += int(readValue[0] + readValue[1]); + } + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at-vk.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at-vk.slang.expected.txt new file mode 100644 index 000000000..e785149d2 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at-vk.slang.expected.txt @@ -0,0 +1,4 @@ +4 +4 +4 +4 diff --git a/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at.slang b/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at.slang new file mode 100644 index 000000000..62d29085f --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at.slang @@ -0,0 +1,43 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +// Disabled on VK because glsl can't do WaveReadLaneAt on matrix. +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + const WaveMask mask = 0xf; + + int value = 0; + + // Scalar + + value += WaveMaskBroadcastLaneAt(mask, idx, 1); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveMaskBroadcastLaneAt(mask, v, 2); + + value += int(readValue[0] + readValue[1]); + } + + // matrix + { + matrix v = matrix(idx, idx - 1, idx * 3, idx - 2); + + matrix readValue = WaveMaskBroadcastLaneAt(mask, v, 3); + + value += int(readValue[0][0] + readValue[0][1] + readValue[1][0] + readValue[1][1]); + } + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at.slang.expected.txt new file mode 100644 index 000000000..5ce1f8639 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-broadcast-lane-at.slang.expected.txt @@ -0,0 +1,4 @@ +17 +17 +17 +17 diff --git a/tests/hlsl-intrinsic/wave-mask/wave-diverge.slang b/tests/hlsl-intrinsic/wave-mask/wave-diverge.slang new file mode 100644 index 000000000..51b7d3aeb --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-diverge.slang @@ -0,0 +1,32 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const WaveMask mask0 = 0xf; + + int idx = int(dispatchThreadID.x); + + int value = 0; + + const WaveMask mask1 = WaveMaskBallot(mask0, idx == 2); + + if (idx == 2) + { + // diverge + return; + } + + const WaveMask mask2 = mask0 & ~mask1; + + value = WaveMaskMin(mask2, idx + 1); + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-diverge.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-diverge.slang.expected.txt new file mode 100644 index 000000000..68b8a88e2 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-diverge.slang.expected.txt @@ -0,0 +1,4 @@ +1 +1 +0 +1 diff --git a/tests/hlsl-intrinsic/wave-mask/wave-equality.slang b/tests/hlsl-intrinsic/wave-mask/wave-equality.slang new file mode 100644 index 000000000..13887b2c8 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-equality.slang @@ -0,0 +1,33 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const WaveMask mask = 0xf; + + int idx = int(dispatchThreadID.x); + + int value = 0; + + // Scalar + + value |= WaveMaskAllEqual(mask, idx * 0 + 1) ? 1 : 0; // true + value |= WaveMaskAllEqual(mask, idx & 2) ? 2 : 0; // false + + // Vector + + int2 v0 = int2(idx & 0xf0, (idx & 0xf00) + 1); // (0, 1) + int2 v1 = int2(idx & 2, (idx & 2) + 1); + + value |= WaveMaskAllEqual(mask, v0) ? 0x10 : 0; // true + value |= WaveMaskAllEqual(mask, v1) ? 0x20 : 0; // false + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-equality.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-equality.slang.expected.txt new file mode 100644 index 000000000..2bf571888 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-equality.slang.expected.txt @@ -0,0 +1,4 @@ +11 +11 +11 +11 diff --git a/tests/hlsl-intrinsic/wave-mask/wave-is-first-lane.slang b/tests/hlsl-intrinsic/wave-mask/wave-is-first-lane.slang new file mode 100644 index 000000000..2b332bfb5 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-is-first-lane.slang @@ -0,0 +1,28 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const WaveMask mask0 = 0xff; + + int idx = int(dispatchThreadID.x); + + const WaveMask mask1 = WaveMaskBallot(mask0, idx < 3); + if (idx < 3) + { + // Diverge!! + outputBuffer[idx] = -1; + return; + } + + const WaveMask mask2 = mask0 & ~mask1; + + outputBuffer[idx] = WaveMaskIsFirstLane(mask2); +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-is-first-lane.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-is-first-lane.slang.expected.txt new file mode 100644 index 000000000..43debbc9d --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-is-first-lane.slang.expected.txt @@ -0,0 +1,16 @@ +FFFFFFFF +FFFFFFFF +FFFFFFFF +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/tests/hlsl-intrinsic/wave-mask/wave-mask-prefix.slang b/tests/hlsl-intrinsic/wave-mask/wave-mask-prefix.slang new file mode 100644 index 000000000..a2f25d71b --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-mask-prefix.slang @@ -0,0 +1,26 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +// We need SM6.5 for these tests +// Disable because version of dxc we are currently using doesn't support SM6.5 +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile sm_6_5 +// Disabled because we don't have GLSL intrinsics for these it seems +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + int value = 0; + + WaveMask mask = 0xff; + + // Scalar + value += WaveMaskPrefixSum(mask, 1 << idx); + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-mask-prefix.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-mask-prefix.slang.expected.txt new file mode 100644 index 000000000..6ec6deeea --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-mask-prefix.slang.expected.txt @@ -0,0 +1,8 @@ +0 +1 +3 +7 +F +1F +3F +7F diff --git a/tests/hlsl-intrinsic/wave-mask/wave-matrix.slang b/tests/hlsl-intrinsic/wave-mask/wave-matrix.slang new file mode 100644 index 000000000..dadce4051 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-matrix.slang @@ -0,0 +1,39 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const WaveMask mask = 0xff; + + const int idx = int(dispatchThreadID.x); + + // NOTE! dxc only supports bit ops on uint and associated types NOT int + // Also GLSL does not have built in support for int matrices. So we'll just try with float for now + // GLSL does not support matrix types for Wave like intrinsics + + matrix v0 = matrix(idx + 1, idx + 2, idx + 3, idx + 4); + matrix v1 = matrix(v0) + matrix(1, 1, 1, 1); + + + matrix uv0 = matrix(v0[0][0], v0[0][1], v0[1][0], v0[0][1]); + + matrix r0 = WaveMaskSum(mask, v0); + matrix r1 = WaveMaskSum(mask, v1); + matrix r2 = WaveMaskBitXor(mask, uv0); + matrix r3 = WaveMaskBitOr(mask, uv0); + matrix r4 = WaveMaskBitAnd(mask, uv0); + + matrix r5 = r2 + r3 + r4; + matrix r6 = matrix(r5[0][0], r5[0][1], r5[1][0], r5[1][1]); + + matrix r = r0 + matrix(r1) + r6; + + outputBuffer[idx] = r[0][0] + r[0][1] + r[1][0] + r[1][1]; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-matrix.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-matrix.slang.expected.txt new file mode 100644 index 000000000..23f9285c3 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-matrix.slang.expected.txt @@ -0,0 +1,8 @@ +1EC +1EC +1EC +1EC +1EC +1EC +1EC +1EC diff --git a/tests/hlsl-intrinsic/wave-mask/wave-prefix-product.slang b/tests/hlsl-intrinsic/wave-mask/wave-prefix-product.slang new file mode 100644 index 000000000..14ac8d652 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-prefix-product.slang @@ -0,0 +1,27 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + WaveMask mask = 0xff; + + int idx = int(dispatchThreadID.x); + + + float2 v1 = float2(1, idx + 1); + + int r0 = WaveMaskPrefixProduct(mask, idx + 1); + float2 r1 = WaveMaskPrefixProduct(mask, v1); + + int r2 = int(r1.x) + int(r1.y) - 1; + + outputBuffer[idx] = r0 + (r2 << 16); + +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-prefix-product.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-prefix-product.slang.expected.txt new file mode 100644 index 000000000..1b233efaf --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-prefix-product.slang.expected.txt @@ -0,0 +1,8 @@ +10001 +10001 +20002 +60006 +180018 +780078 +2D002D0 +13B013B0 diff --git a/tests/hlsl-intrinsic/wave-mask/wave-prefix-sum.slang b/tests/hlsl-intrinsic/wave-mask/wave-prefix-sum.slang new file mode 100644 index 000000000..f756398b8 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-prefix-sum.slang @@ -0,0 +1,25 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + WaveMask mask = 0xff; + + int idx = int(dispatchThreadID.x); + + float2 v1 = float2(1, 1 << idx); + + int r0 = WaveMaskPrefixSum(mask, 1 << idx); + float2 r1 = WaveMaskPrefixSum(mask, v1); + + int r2 = int(r1.x) + int(r1.y) - idx; + + outputBuffer[idx] = r0 + (r2 << 16); +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-prefix-sum.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-prefix-sum.slang.expected.txt new file mode 100644 index 000000000..4b4230415 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-prefix-sum.slang.expected.txt @@ -0,0 +1,8 @@ +0 +10001 +30003 +70007 +F000F +1F001F +3F003F +7F007F diff --git a/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at-vk.slang new file mode 100644 index 000000000..e0464ae68 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at-vk.slang @@ -0,0 +1,46 @@ +// This is similar to wave-lane-at.slang but tests more limited supported types for vk. +// We have this 'simple' test, because we can't do matrix (or imat) operations on GLSL/Vk target + +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +// `The input lane index must be uniform across the wave.`. +// The same restriction applies to glsl/SPIR-V 1.5 +// So we are going to use the input buffer to achieve this. + +//TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):name inputBuffer +RWStructuredBuffer inputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + WaveMask mask = 0xf; + + int idx = int(dispatchThreadID.x); + + int value = 0; + + for (int i = 0; i < 4; ++i) + { + // Scalar + + // The landId is 'dynamic' but it also uniform across the wave (as required by spec) + const int laneId = inputBuffer[i]; + + value += WaveMaskReadLaneAt(mask, idx, laneId); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveMaskReadLaneAt(mask, v, (laneId + 1) & 3); + + value += int(readValue[0] + readValue[1]); + } + } + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at-vk.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at-vk.slang.expected.txt new file mode 100644 index 000000000..4e98888c6 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at-vk.slang.expected.txt @@ -0,0 +1,4 @@ +1E +1E +1E +1E diff --git a/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at.slang b/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at.slang new file mode 100644 index 000000000..1b231fbc1 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at.slang @@ -0,0 +1,56 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +// Disabled on VK because glsl can't do WaveReadLaneAt on matrix. +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +// Note from HLSL: `The input lane index must be uniform across the wave.`. +// The same restriction applies to glsl/SPIR-V 1.5 +// So we are going to use the input buffer to achieve this. + +//TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):name inputBuffer +RWStructuredBuffer inputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + WaveMask mask = 0xf; + + int idx = int(dispatchThreadID.x); + + int value = 0; + + for (int i = 0; i < 4; ++i) + { + // Scalar + + // The laneId is 'dynamic' but it also uniform across the wave (as required by spec) + const int laneId = inputBuffer[i]; + + value += WaveMaskReadLaneAt(mask, idx, laneId); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveMaskReadLaneAt(mask, v, (laneId + 1) & 3); + + value += int(readValue[0] + readValue[1]); + } + + // matrix + { + matrix v = matrix(idx, idx - 1, idx * 3, idx - 2); + + matrix readValue = WaveMaskReadLaneAt(mask, v, (laneId - 1) & 3); + + value += int(readValue[0][0] + readValue[0][1] + readValue[1][0] + readValue[1][1]); + } + } + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at.slang.expected.txt new file mode 100644 index 000000000..c6167dbae --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-read-lane-at.slang.expected.txt @@ -0,0 +1,4 @@ +36 +36 +36 +36 diff --git a/tests/hlsl-intrinsic/wave-mask/wave-shuffle-vk.slang b/tests/hlsl-intrinsic/wave-mask/wave-shuffle-vk.slang new file mode 100644 index 000000000..fa9f4b3e5 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-shuffle-vk.slang @@ -0,0 +1,34 @@ +// Disabled because main tests is wave-shuffle.slang, this just tests VK +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const WaveMask mask = 0xf; + + int idx = int(dispatchThreadID.x); + + int value = 0; + + // Scalar + + value += WaveMaskShuffle(mask, idx, (idx + 1) & 3); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveMaskShuffle(mask, v, (idx - 1) & 3); + + value += int(readValue[0] + readValue[1]); + } + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-shuffle-vk.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-shuffle-vk.slang.expected.txt new file mode 100644 index 000000000..b20444fc5 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-shuffle-vk.slang.expected.txt @@ -0,0 +1,4 @@ +A +5 +8 +7 diff --git a/tests/hlsl-intrinsic/wave-mask/wave-shuffle.slang b/tests/hlsl-intrinsic/wave-mask/wave-shuffle.slang new file mode 100644 index 000000000..a559aae8f --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-shuffle.slang @@ -0,0 +1,44 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//Disabled on D3D, because in general WaveShuffle requires hardware that doesn't have the 'uniform laneId across Wave' restriction. +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +// Disabled because vk doesn't currently support matrix types. See wave-shuffle-vk.slang +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const WaveMask mask = 0xf; + + int idx = int(dispatchThreadID.x); + + int value = 0; + + // Scalar + + value += WaveMaskShuffle(mask, idx, (idx + 1) & 3); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveMaskShuffle(mask, v, (idx - 1) & 3); + + value += int(readValue[0] + readValue[1]); + } + + // matrix + { + matrix v = matrix(idx, idx - 1, idx * 3, idx - 2); + + matrix readValue = WaveMaskShuffle(mask, v, (idx - 1) & 3); + + value += int(readValue[0][0] + readValue[0][1] + readValue[1][0] + readValue[1][1]); + } + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-shuffle.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-shuffle.slang.expected.txt new file mode 100644 index 000000000..a327b0804 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-shuffle.slang.expected.txt @@ -0,0 +1,4 @@ +19 +2 +B +10 diff --git a/tests/hlsl-intrinsic/wave-mask/wave-vector.slang b/tests/hlsl-intrinsic/wave-mask/wave-vector.slang new file mode 100644 index 000000000..083e1b5b9 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-vector.slang @@ -0,0 +1,31 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const WaveMask mask = 0xff; + + const int idx = int(dispatchThreadID.x); + + int2 v0 = int2(idx + 1, idx + 2); + float2 v1 = float2(idx + 2, idx + 3); + // NOTE! dxc only supports bit ops on uint and associated types NOT int + uint2 uv0 = v0; + + int2 r0 = WaveMaskSum(mask, v0); + float2 r1 = WaveMaskSum(mask, v1); + int2 r2 = WaveMaskBitXor(mask, uv0); + int2 r3 = WaveMaskBitOr(mask, uv0); + int2 r4 = WaveMaskBitAnd(mask, uv0); + + int2 r = r0 + int2(r1) + r2 + r3 + r4; + + outputBuffer[idx] = r.x + r.y; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-mask/wave-vector.slang.expected.txt b/tests/hlsl-intrinsic/wave-mask/wave-vector.slang.expected.txt new file mode 100644 index 000000000..eb6984bb6 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-mask/wave-vector.slang.expected.txt @@ -0,0 +1,8 @@ +D6 +D6 +D6 +D6 +D6 +D6 +D6 +D6 -- cgit v1.2.3