summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-04-15 14:14:58 -0400
committerGitHub <noreply@github.com>2020-04-15 14:14:58 -0400
commitd5d32221daf950b2f923122a179e791572dd6cb6 (patch)
tree0f4bd215c11abc98d0e1f9b3da920838e6e5862b /tests
parentfbac017938343724407ab036abd736c942b4e187 (diff)
First support for 'WaveMask' intrinsics (#1321)
* WIP tests to confirm divergence on CUDA. * Added wave.slang test that uses masks. Made all CUDA intrinsic impls take a mask explicitly. Added initial WaveMaskXXX intrinsics. * Added WaveMaskSharedSync. * Improvements aroung WaveMaskSharedSync/WaveMaskSync * Remove tabs.
Diffstat (limited to 'tests')
-rw-r--r--tests/hlsl-intrinsic/wave-mask/wave.slang64
1 files changed, 64 insertions, 0 deletions
diff --git a/tests/hlsl-intrinsic/wave-mask/wave.slang b/tests/hlsl-intrinsic/wave-mask/wave.slang
new file mode 100644
index 000000000..6b641906d
--- /dev/null
+++ b/tests/hlsl-intrinsic/wave-mask/wave.slang
@@ -0,0 +1,64 @@
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+//TEST_INPUT:ubuffer(data=[3 10 2 -1 4 53 4 6 1 2 3 4 7 5 3 1], stride=4):name inputBuffer
+RWStructuredBuffer<int> inputBuffer;
+
+groupshared int sharedMem[32];
+
+int exclusivePrefixSum(WaveMask mask, int index, int waveLaneId, int originalValue, int elementCount)
+{
+ WaveMask localMask = WaveMaskBallot(mask, waveLaneId < elementCount);
+
+ sharedMem[index] = 0;
+
+ if(waveLaneId < elementCount)
+ {
+ int temp = 0;
+ int val = originalValue;
+
+ for(int i = 1; i < elementCount; i += i)
+ {
+ int temp = WaveMaskShuffle(localMask, val, waveLaneId - i);
+ if(waveLaneId >= i)
+ {
+ val += temp;
+ }
+ }
+
+ // Make it an exclusive prefix sum
+ val -= originalValue;
+
+ // Write to shared memory
+ sharedMem[index] = val;
+
+ // Syncronizes on the mask, and ensures memory fence for shared data write
+ WaveMaskSharedSync(localMask);
+ return val;
+ }
+
+ return 0;
+}
+
+[numthreads(32, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ int index = int(dispatchThreadID.x);
+ const int waveLaneId = WaveGetLaneIndex();
+
+ const int value = inputBuffer[index];
+ const int elementCount = 9;
+
+ exclusivePrefixSum(WaveGetActiveMask(), index, waveLaneId, value, elementCount);
+
+ // It returns the result, but we are going to read from shared memory, to check that aspect worked
+ int prefixValue = sharedMem[index];
+
+ outputBuffer[index] = prefixValue;
+} \ No newline at end of file