summaryrefslogtreecommitdiffstats
path: root/tests/hlsl-intrinsic/wave-read-lane-at.slang
blob: ad26b6d9e37ebc34a28fb257948720b07a59e871 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
//TEST_CATEGORY(wave, compute)
//DISABLE_TEST:COMPARE_COMPUTE_EX:-cpu -compute -shaderobj
//DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj
//TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -shaderobj -render-feature hardware-device
// Disabled on VK because glsl can't do WaveReadLaneAt on matrix. 
//DISABLE_TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj

//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;

// Note from HLSL: `The input lane index must be uniform across the wave.`. 
// The same restriction applies to glsl/SPIR-V 1.5
// So we are going to use the input buffer to achieve this.

//TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):name inputBuffer
RWStructuredBuffer<int> inputBuffer;

[numthreads(4, 1, 1)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
    int idx = int(dispatchThreadID.x);
    
    int value = 0;
    
    for (int i = 0; i < 4; ++i)
    {
        // Scalar
        
        // The landId is 'dynamic' but it also uniform across the wave (as required by spec)
        const int laneId = inputBuffer[i];
        
        value += WaveReadLaneAt(idx, laneId);
        
        // vector
        
        {
            float2 v = float2(idx + 1, idx + 2);
            float2 readValue = WaveReadLaneAt(v, (laneId + 1) & 3);
            
            value += int(readValue[0] + readValue[1]);
        }
            
        // matrix
        {
            matrix<int, 2, 2> v = matrix<int, 2, 2>(idx, idx - 1, idx * 3, idx - 2);
            
            matrix<int, 2, 2> readValue = WaveReadLaneAt(v, (laneId - 1) & 3);
            
            value += int(readValue[0][0] + readValue[0][1] + readValue[1][0] + readValue[1][1]);
        }
    }
    
    outputBuffer[idx] = value;
}