From 76b9ff6e65b4bd2be04a5bab0eb1464455c4b3ff Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Mon, 16 Mar 2020 15:01:21 -0400 Subject: CUDA support of MultiPrefix Wave intrinsics. (#1275) Support for cs_6_5 cand cs_6_4 in profile Added wave-multi-prefix.slang etst --- tests/hlsl-intrinsic/wave-multi-prefix.slang | 26 ++++++++++++++++++++++ .../wave-multi-prefix.slang.expected.txt | 8 +++++++ 2 files changed, 34 insertions(+) create mode 100644 tests/hlsl-intrinsic/wave-multi-prefix.slang create mode 100644 tests/hlsl-intrinsic/wave-multi-prefix.slang.expected.txt (limited to 'tests') diff --git a/tests/hlsl-intrinsic/wave-multi-prefix.slang b/tests/hlsl-intrinsic/wave-multi-prefix.slang new file mode 100644 index 000000000..3eee16e31 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-multi-prefix.slang @@ -0,0 +1,26 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +// We need SM6.5 for these tests +// Disable because version of dxc we are currently using doesn't support SM6.5 +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile sm_6_5 +// Disabled because we don't have GLSL intrinsics for these it seems +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + int value = 0; + + uint4 mask = WaveMatch(true); + + // Scalar + value += WaveMultiPrefixSum(1 << idx, mask); + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-multi-prefix.slang.expected.txt b/tests/hlsl-intrinsic/wave-multi-prefix.slang.expected.txt new file mode 100644 index 000000000..6ec6deeea --- /dev/null +++ b/tests/hlsl-intrinsic/wave-multi-prefix.slang.expected.txt @@ -0,0 +1,8 @@ +0 +1 +3 +7 +F +1F +3F +7F -- cgit v1.2.3