summaryrefslogtreecommitdiffstats
path: root/tests/cuda/copy-elision-this-1.slang
blob: 376ef1f80ddc8481a876144120b8247bbc1fb377 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
//TEST:SIMPLE(filecheck=CUDA): -stage compute -entry computeMain -target cuda -line-directive-mode none
//TEST:SIMPLE(filecheck=PTX): -stage compute -entry computeMain -target cuda

struct Data {
    StructuredBuffer<float> input[2];
    RWStructuredBuffer<float> output;
    uint input_tensor_count;
    StructuredBuffer<uint> index_buffer;
    uint index_count;

    // CUDA: __device__ float Data_fetch{{.*}}(int {{.*}}, int {{.*}})
    // CUDA-NEXT: {
    // CUDA-NEXT: globalParams{{.*}}->data{{.*}}->input{{.*}}[{{.*}}]
    float fetch(int buffer, int index)
    {
        return input[buffer][index];
    }
};

ParameterBlock<Data> data;

// PTX: computeMain

[shader("compute")]
[numthreads(8, 8, 1)]
void computeMain(uint3 tid: SV_DispatchThreadID)
{
    float result = 0.0;
    for (int i = 0; i < data.index_count; ++i) {
        uint buffer = data.index_buffer[i];
        result += data.fetch(buffer, tid.x * 1024 + tid.y);
    }
    data.output[tid.x * 1024 + tid.y] = result;
}