diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-03-02 16:18:20 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-03-02 16:18:20 -0500 |
| commit | 8899c149b05def1cce626ea649012c4c974861de (patch) | |
| tree | 77e97c2997a653ba9262b32f55e9e3f37e166653 /tests | |
| parent | b85ca6f86d46ee3c4d5784d0bd4ebc8509e2a9bd (diff) | |
Additional Wave Intrinsic Support (#1252)
* Test for some wave intrinsics.
More wave intrinsic support on CUDA.
* Use shfl_xor_sync.
* Improvements around wave intrinsics.
Fix built in integer types belong to __BuiltinIntegerType.
* Improvements and fixes around Wave intrinsics.
* Added WaveIsFirstLane test.
No longer use __wavemask_lt, as appears not available as an intrinsic.
* Small fixes to CUDA prelude.
* Add wave-active-product test.
Handle the special case for arbitray sums.
* Used macro to implement CUDA wave intrinsics.
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/hlsl-intrinsic/wave-active-product.slang | 31 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-active-product.slang.expected.txt | 16 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-is-first-lane.slang | 24 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-is-first-lane.slang.expected.txt | 16 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave.slang | 36 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave.slang.expected.txt | 4 |
6 files changed, 127 insertions, 0 deletions
diff --git a/tests/hlsl-intrinsic/wave-active-product.slang b/tests/hlsl-intrinsic/wave-active-product.slang new file mode 100644 index 000000000..cacc0a539 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-active-product.slang @@ -0,0 +1,31 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<int> outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const int idx = int(dispatchThreadID.x); + +#if 1 + if (idx < 3) + { + // Diverge!! + outputBuffer[idx] = -1; + return; + } + outputBuffer[idx] = WaveActiveProduct(idx); +#else + + /// NOTE! Can't say I totally understand WaveActiveProduct. + /// The following returns 0x240 on CUDA - which is what I'd expect + /// On DX12, it returns 0 + + outputBuffer[idx] = WaveActiveProduct((idx & 3) + 1); +#endif +}
\ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-active-product.slang.expected.txt b/tests/hlsl-intrinsic/wave-active-product.slang.expected.txt new file mode 100644 index 000000000..dbe392009 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-active-product.slang.expected.txt @@ -0,0 +1,16 @@ +FFFFFFFF +FFFFFFFF +FFFFFFFF +9D8 +9D8 +9D8 +9D8 +9D8 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/tests/hlsl-intrinsic/wave-is-first-lane.slang b/tests/hlsl-intrinsic/wave-is-first-lane.slang new file mode 100644 index 000000000..39a19336d --- /dev/null +++ b/tests/hlsl-intrinsic/wave-is-first-lane.slang @@ -0,0 +1,24 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<int> outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + if (idx < 3) + { + // Diverge!! + outputBuffer[idx] = -1; + return; + } + + int value = 0; + outputBuffer[idx] = WaveIsFirstLane(); +}
\ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-is-first-lane.slang.expected.txt b/tests/hlsl-intrinsic/wave-is-first-lane.slang.expected.txt new file mode 100644 index 000000000..43debbc9d --- /dev/null +++ b/tests/hlsl-intrinsic/wave-is-first-lane.slang.expected.txt @@ -0,0 +1,16 @@ +FFFFFFFF +FFFFFFFF +FFFFFFFF +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/tests/hlsl-intrinsic/wave.slang b/tests/hlsl-intrinsic/wave.slang new file mode 100644 index 000000000..bc30da4ad --- /dev/null +++ b/tests/hlsl-intrinsic/wave.slang @@ -0,0 +1,36 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<int> outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + int value = 0; + + value |= WaveActiveAllTrue(idx < 4 ) ? 1 : 0; + value |= WaveActiveAnyTrue(idx == 2) ? 2 : 0; + value |= WaveActiveAnyTrue(idx == -1) ? 4 : 0; + value |= WaveActiveAllTrue(idx == 3) ? 8 : 0; + + int sum = WaveActiveSum(idx); + value |= (sum << 4); + + // TODO(JS): + // This result is unexpected. I expect 1 * 2 * 1 * 2 = 4. But we get 0 on DX (so disable for now). On CUDA I get 4. + // int product = WaveActiveProduct((idx & 1) + 1); + /// value |= (product << 8); + + // TODO(JS): NOTE! This only works with uint, *NOT* int on HLSL/DXC. + // We need to update the stdlib to reflect this. + uint xor = WaveActiveBitXor(uint(idx + 1)); + value |= int(xor << 12); + + outputBuffer[idx] = value; +}
\ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave.slang.expected.txt b/tests/hlsl-intrinsic/wave.slang.expected.txt new file mode 100644 index 000000000..a3dff7d2d --- /dev/null +++ b/tests/hlsl-intrinsic/wave.slang.expected.txt @@ -0,0 +1,4 @@ +4063 +4063 +4063 +4063 |
