summaryrefslogtreecommitdiffstats
path: root/tests/hlsl-intrinsic/wave-active-product.slang
blob: 49d87f474d5406c798fd5dacf8803a90a0b479ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
//TEST_CATEGORY(wave, compute)
//DISABLE_TEST:COMPARE_COMPUTE_EX:-cpu -compute -shaderobj
//DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj
//TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -shaderobj -render-feature hardware-device
//TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device
//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj
//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj

// WGSL doesn't support wave functions in a dynamic control flow; it works with uniform control flow.
//DISABLE_TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj

//TEST_INPUT:ubuffer(data=[0 0 0 0  0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;

[numthreads(8, 1, 1)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
    const int idx = int(dispatchThreadID.x);

#if 1
    if (idx < 3)
    {
        // Diverge!!
        outputBuffer[idx] = -1; // thread 0, 1 and 2 will get 0xFFFFFFFF
        return;
    }

    // thread 2, 3, 4, 5, 6, 7 will get the result of 2 * 3 * 4 * 5 * 6 * 7
    // which is 2520, which is 9D8 in hex
    outputBuffer[idx] = WaveActiveProduct(idx);
#else

    /// NOTE! Can't say I totally understand WaveActiveProduct.
    /// The following returns 0x240 on CUDA - which is what I'd expect
    /// On DX12, it returns 0

    outputBuffer[idx] = WaveActiveProduct((idx & 3) + 1);
#endif
}