From 6f43b2698a99cc4f4bb4e905749fb87f24bf391b Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Fri, 27 Mar 2020 18:35:06 -0400 Subject: WaveBroadcastAt/WaveShuffle (#1299) * Support for WaveReadLaneAt with dynamic (but uniform across Wave) on Vk by enabling VK1.4. Fixed wave-lane-at.slang test to test with laneId that is uniform across the Wave. * Added WaveShuffle intrinsic. Test for WaveShuffle intrinsic. * Added some documentation on WaveShuffle * Fix that version required for subgroupBroadcast to be non constexpr is actually 1.5 * Added WaveBroadcastLaneAt Documented WaveShuffle/BroadcastLaneAt/ReadLaneAt * Update docs around WaveBroadcast/Read/Shuffle. Use '_waveShuffle` as name in CUDA prelude to better describe it's more flexible behavior. --- .../hlsl-intrinsic/wave-broadcast-lane-at-vk.slang | 28 +++++++++++ .../wave-broadcast-lane-at-vk.slang.expected.txt | 4 ++ tests/hlsl-intrinsic/wave-broadcast-lane-at.slang | 41 ++++++++++++++++ .../wave-broadcast-lane-at.slang.expected.txt | 4 ++ tests/hlsl-intrinsic/wave-lane-at-vk.slang | 45 ------------------ .../wave-lane-at-vk.slang.expected.txt | 4 -- tests/hlsl-intrinsic/wave-lane-at.slang | 54 ---------------------- .../hlsl-intrinsic/wave-lane-at.slang.expected.txt | 4 -- tests/hlsl-intrinsic/wave-read-lane-at-vk.slang | 44 ++++++++++++++++++ .../wave-read-lane-at-vk.slang.expected.txt | 4 ++ tests/hlsl-intrinsic/wave-read-lane-at.slang | 54 ++++++++++++++++++++++ .../wave-read-lane-at.slang.expected.txt | 4 ++ tests/hlsl-intrinsic/wave-shuffle-vk.slang | 1 - 13 files changed, 183 insertions(+), 108 deletions(-) create mode 100644 tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang create mode 100644 tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-broadcast-lane-at.slang create mode 100644 tests/hlsl-intrinsic/wave-broadcast-lane-at.slang.expected.txt delete mode 100644 tests/hlsl-intrinsic/wave-lane-at-vk.slang delete mode 100644 tests/hlsl-intrinsic/wave-lane-at-vk.slang.expected.txt delete mode 100644 tests/hlsl-intrinsic/wave-lane-at.slang delete mode 100644 tests/hlsl-intrinsic/wave-lane-at.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-read-lane-at-vk.slang create mode 100644 tests/hlsl-intrinsic/wave-read-lane-at-vk.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-read-lane-at.slang create mode 100644 tests/hlsl-intrinsic/wave-read-lane-at.slang.expected.txt (limited to 'tests') diff --git a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang new file mode 100644 index 000000000..3c746476a --- /dev/null +++ b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang @@ -0,0 +1,28 @@ +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + int value = 0; + + // Scalar + + value += WaveBroadcastLaneAt(idx, 1); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveBroadcastLaneAt(v, 4 & 3); + + value += int(readValue[0] + readValue[1]); + } + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang.expected.txt b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang.expected.txt new file mode 100644 index 000000000..e785149d2 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang.expected.txt @@ -0,0 +1,4 @@ +4 +4 +4 +4 diff --git a/tests/hlsl-intrinsic/wave-broadcast-lane-at.slang b/tests/hlsl-intrinsic/wave-broadcast-lane-at.slang new file mode 100644 index 000000000..b6f5d3847 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-broadcast-lane-at.slang @@ -0,0 +1,41 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +// Disabled on VK because glsl can't do WaveReadLaneAt on matrix. +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + int value = 0; + + // Scalar + + value += WaveBroadcastLaneAt(idx, 1); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveBroadcastLaneAt(v, 2); + + value += int(readValue[0] + readValue[1]); + } + + // matrix + { + matrix v = matrix(idx, idx - 1, idx * 3, idx - 2); + + matrix readValue = WaveBroadcastLaneAt(v, 3); + + value += int(readValue[0][0] + readValue[0][1] + readValue[1][0] + readValue[1][1]); + } + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-broadcast-lane-at.slang.expected.txt b/tests/hlsl-intrinsic/wave-broadcast-lane-at.slang.expected.txt new file mode 100644 index 000000000..5ce1f8639 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-broadcast-lane-at.slang.expected.txt @@ -0,0 +1,4 @@ +17 +17 +17 +17 diff --git a/tests/hlsl-intrinsic/wave-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-lane-at-vk.slang deleted file mode 100644 index 0d52f781e..000000000 --- a/tests/hlsl-intrinsic/wave-lane-at-vk.slang +++ /dev/null @@ -1,45 +0,0 @@ -// This is similar to wave-lane-at.slang but tests more limited supported types for vk. -// We have this 'simple' test, because we can't do matrix (or imat) operations on GLSL/Vk target - -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -// TODO(JS): Disabled for now, as requires upgraded glslang -//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute - -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer -RWStructuredBuffer outputBuffer; - -// `The input lane index must be uniform across the wave.`. -// The same restriction applies to glsl/SPIR-V 1.5 -// So we are going to use the input buffer to achieve this. - -//TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):name inputBuffer -RWStructuredBuffer inputBuffer; - -[numthreads(4, 1, 1)] -void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) -{ - int idx = int(dispatchThreadID.x); - - int value = 0; - - for (int i = 0; i < 4; ++i) - { - // Scalar - - // The landId is 'dynamic' but it also uniform across the wave (as required by spec) - const int laneId = inputBuffer[i]; - - value += WaveReadLaneAt(idx, laneId); - - // vector - - { - float2 v = float2(idx + 1, idx + 2); - float2 readValue = WaveReadLaneAt(v, (laneId + 1) & 3); - - value += int(readValue[0] + readValue[1]); - } - } - - outputBuffer[idx] = value; -} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-lane-at-vk.slang.expected.txt b/tests/hlsl-intrinsic/wave-lane-at-vk.slang.expected.txt deleted file mode 100644 index 4e98888c6..000000000 --- a/tests/hlsl-intrinsic/wave-lane-at-vk.slang.expected.txt +++ /dev/null @@ -1,4 +0,0 @@ -1E -1E -1E -1E diff --git a/tests/hlsl-intrinsic/wave-lane-at.slang b/tests/hlsl-intrinsic/wave-lane-at.slang deleted file mode 100644 index c3caaa4e8..000000000 --- a/tests/hlsl-intrinsic/wave-lane-at.slang +++ /dev/null @@ -1,54 +0,0 @@ -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -// Disabled on VK because glsl can't do WaveReadLaneAt on matrix. -//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute - -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer -RWStructuredBuffer outputBuffer; - -// Note from HLSL: `The input lane index must be uniform across the wave.`. -// The same restriction applies to glsl/SPIR-V 1.5 -// So we are going to use the input buffer to achieve this. - -//TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):name inputBuffer -RWStructuredBuffer inputBuffer; - -[numthreads(4, 1, 1)] -void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) -{ - int idx = int(dispatchThreadID.x); - - int value = 0; - - for (int i = 0; i < 4; ++i) - { - // Scalar - - // The landId is 'dynamic' but it also uniform across the wave (as required by spec) - const int laneId = inputBuffer[i]; - - value += WaveReadLaneAt(idx, laneId); - - // vector - - { - float2 v = float2(idx + 1, idx + 2); - float2 readValue = WaveReadLaneAt(v, (laneId + 1) & 3); - - value += int(readValue[0] + readValue[1]); - } - - // matrix - { - matrix v = matrix(idx, idx - 1, idx * 3, idx - 2); - - matrix readValue = WaveReadLaneAt(v, (laneId - 1) & 3); - - value += int(readValue[0][0] + readValue[0][1] + readValue[1][0] + readValue[1][1]); - } - } - - outputBuffer[idx] = value; -} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-lane-at.slang.expected.txt b/tests/hlsl-intrinsic/wave-lane-at.slang.expected.txt deleted file mode 100644 index c6167dbae..000000000 --- a/tests/hlsl-intrinsic/wave-lane-at.slang.expected.txt +++ /dev/null @@ -1,4 +0,0 @@ -36 -36 -36 -36 diff --git a/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang new file mode 100644 index 000000000..3bd6b36b8 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang @@ -0,0 +1,44 @@ +// This is similar to wave-lane-at.slang but tests more limited supported types for vk. +// We have this 'simple' test, because we can't do matrix (or imat) operations on GLSL/Vk target + +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +// `The input lane index must be uniform across the wave.`. +// The same restriction applies to glsl/SPIR-V 1.5 +// So we are going to use the input buffer to achieve this. + +//TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):name inputBuffer +RWStructuredBuffer inputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + int value = 0; + + for (int i = 0; i < 4; ++i) + { + // Scalar + + // The landId is 'dynamic' but it also uniform across the wave (as required by spec) + const int laneId = inputBuffer[i]; + + value += WaveReadLaneAt(idx, laneId); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveReadLaneAt(v, (laneId + 1) & 3); + + value += int(readValue[0] + readValue[1]); + } + } + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang.expected.txt b/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang.expected.txt new file mode 100644 index 000000000..4e98888c6 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang.expected.txt @@ -0,0 +1,4 @@ +1E +1E +1E +1E diff --git a/tests/hlsl-intrinsic/wave-read-lane-at.slang b/tests/hlsl-intrinsic/wave-read-lane-at.slang new file mode 100644 index 000000000..c3caaa4e8 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-read-lane-at.slang @@ -0,0 +1,54 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +// Disabled on VK because glsl can't do WaveReadLaneAt on matrix. +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +// Note from HLSL: `The input lane index must be uniform across the wave.`. +// The same restriction applies to glsl/SPIR-V 1.5 +// So we are going to use the input buffer to achieve this. + +//TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):name inputBuffer +RWStructuredBuffer inputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + int value = 0; + + for (int i = 0; i < 4; ++i) + { + // Scalar + + // The landId is 'dynamic' but it also uniform across the wave (as required by spec) + const int laneId = inputBuffer[i]; + + value += WaveReadLaneAt(idx, laneId); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveReadLaneAt(v, (laneId + 1) & 3); + + value += int(readValue[0] + readValue[1]); + } + + // matrix + { + matrix v = matrix(idx, idx - 1, idx * 3, idx - 2); + + matrix readValue = WaveReadLaneAt(v, (laneId - 1) & 3); + + value += int(readValue[0][0] + readValue[0][1] + readValue[1][0] + readValue[1][1]); + } + } + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-read-lane-at.slang.expected.txt b/tests/hlsl-intrinsic/wave-read-lane-at.slang.expected.txt new file mode 100644 index 000000000..c6167dbae --- /dev/null +++ b/tests/hlsl-intrinsic/wave-read-lane-at.slang.expected.txt @@ -0,0 +1,4 @@ +36 +36 +36 +36 diff --git a/tests/hlsl-intrinsic/wave-shuffle-vk.slang b/tests/hlsl-intrinsic/wave-shuffle-vk.slang index 01fb59155..75aa392ea 100644 --- a/tests/hlsl-intrinsic/wave-shuffle-vk.slang +++ b/tests/hlsl-intrinsic/wave-shuffle-vk.slang @@ -2,7 +2,6 @@ //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 - //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -- cgit v1.2.3