tests/autodiff/autopybind-basic.slang


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

//TEST:SIMPLE(filecheck=CUDA): -target cuda -line-directive-mode none
//TEST:SIMPLE(filecheck=TORCH): -target torch -line-directive-mode none

// CUDA: __global__ void __kernel__myKernel(TensorView inValues_[[#]], TensorView outValues_[[#]])
[AutoPyBindCUDA]
[CudaKernel]
void myKernel(TensorView<float> inValues, TensorView<float> outValues)
{
    if (cudaThreadIdx().x > 0)
        return;
    outValues.store(cudaThreadIdx().x, sin(inValues.load(cudaThreadIdx().x)));
}

// TORCH:      {{^SLANG_PRELUDE_EXPORT$}}
// TORCH-NEXT: void __kernel__myKernel(TensorView {{[[:alnum:]_]+}}, TensorView {{[[:alnum:]_]+}});

// TORCH:      {{^SLANG_PRELUDE_EXPORT$}}
// TORCH-NEXT: void myKernel(std::tuple<uint32_t, uint32_t, uint32_t> {{[[:alnum:]_]+}}, std::tuple<uint32_t, uint32_t, uint32_t> {{[[:alnum:]_]+}}, torch::Tensor {{[[:alnum:]_]+}}, torch::Tensor {{[[:alnum:]_]+}})

// TORCH:      {{^SLANG_PRELUDE_EXPORT$}}
// TORCH-NEXT: std::tuple<std::tuple<const char*, const char*, const char*, const char*>, std::tuple<const char*, const char*>, const char*, const char*> __funcinfo__myKernel()

// TORCH:      m.def("myKernel", &myKernel, "myKernel");

// TORCH:      m.def("__funcinfo__myKernel", &__funcinfo__myKernel, "__funcinfo__myKernel");