From b380b1af6ba6f5f58e3841c2a5b14db7ee8c372d Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Tue, 10 Mar 2020 16:43:41 -0400 Subject: Wave Prefix Product (#1270) * Fix some typos. * Add wave-prefix-sum.slang test * First pass at implementing prefixSum. * Small improvments to prefixSum CUDA. * Small improvement to prefix sum. * Enable prefix sum in stdlib. * Wave prefix product without using a divide. * Split out SM6.5 Wave intrinsics. Template mechanism for do prefix calculations. --- tests/hlsl-intrinsic/wave-prefix-product.slang | 16 ++++++++++++++++ .../wave-prefix-product.slang.expected.txt | 8 ++++++++ 2 files changed, 24 insertions(+) create mode 100644 tests/hlsl-intrinsic/wave-prefix-product.slang create mode 100644 tests/hlsl-intrinsic/wave-prefix-product.slang.expected.txt (limited to 'tests') diff --git a/tests/hlsl-intrinsic/wave-prefix-product.slang b/tests/hlsl-intrinsic/wave-prefix-product.slang new file mode 100644 index 000000000..bc324ed7d --- /dev/null +++ b/tests/hlsl-intrinsic/wave-prefix-product.slang @@ -0,0 +1,16 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + int val = WavePrefixProduct(idx + 1); + outputBuffer[idx] = val; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-prefix-product.slang.expected.txt b/tests/hlsl-intrinsic/wave-prefix-product.slang.expected.txt new file mode 100644 index 000000000..03cb63ab9 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-prefix-product.slang.expected.txt @@ -0,0 +1,8 @@ +1 +1 +2 +6 +18 +78 +2D0 +13B0 -- cgit v1.2.3