summaryrefslogtreecommitdiffstats
path: root/tests/autodiff/autobind-struct-with-builtin-types.slang
blob: 70832cc40529b1a110c13e81d3f03defd20f23d6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
//TEST:SIMPLE(filecheck=CUDA): -target cuda -line-directive-mode none
//TEST:SIMPLE(filecheck=TORCH): -target torch -line-directive-mode none

struct MyStruct
{
    float3x3 data;
    float3 vec;
};

struct MyStruct2
{
    float data;
};

[AutoPyBindCUDA]
[CUDAKernel]
void plain_copy(MyStruct input, MyStruct2 input2, TensorView<float> output)
{
    // CUDA: __global__ void __kernel__plain_copy(U_StructStorage_MyStruct_0 input_0, MyStruct2_0 input2_0, TensorView output_0)
    // TORCH:    void __kernel__plain_copy(U_StructStorage_MyStruct_0 _0, MyStruct2_0 _1, TensorView _2);

    // Get the 'global' index of this thread.
    uint3 dispatchIdx = cudaThreadIdx() + cudaBlockIdx() * cudaBlockDim();

    // If the thread index is beyond the input size, exit early.
    if (dispatchIdx.x >= 1)
        return;

    output[0] = input.data[0][0];
    output[1] = input.vec[1];
    output[2] = input.data[2][2];
}