blob: 5568f26c51ec1afef773e6d52fb18bc1fed8a6e8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
|
//TEST:SIMPLE(filecheck=HLSL): -target hlsl -line-directive-mode none -entry computeMain -stage compute
//TEST:SIMPLE(filecheck=CUDA): -target cuda -line-directive-mode none
//TEST:SIMPLE(filecheck=TORCH): -target torch -line-directive-mode none
//TEST_INPUT:ubuffer(data=[0 0 0 0 0], stride=4):out,name=outputBuffer
RWStructuredBuffer<float> outputBuffer;
typedef DifferentialPair<float> dpfloat;
typedef float.Differential dfloat;
[Differentiable]
float func1(float x)
{
return x * 4;
}
[AutoPyBindCUDA]
[CUDAKernel]
void torchMain(TensorView<float> v)
{
v[0] = func1(v[0]);
v[1] = func1(v[1]);
}
// Shouldn't see torchMain (or its transformations) anywhere in the HLSL output
// HLSL-NOT:torchMain
// HLSL:func1
// HLSL-NOT:torchMain
// HLSL:computeMain
// HLSL-NOT:torchMain
[Differentiable]
float func2(float a)
{
return a;
}
[numthreads(1, 1, 1)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
{
dpfloat dpa = dpfloat(2.0, 1.0);
dpfloat dpb = dpfloat(1.5, 1.0);
outputBuffer[0] = fwd_diff(func1)(dpa).d; // Expect: 1
outputBuffer[1] = fwd_diff(func2)(dpfloat(dpa.p, 0.0)).d; // Expect: 0
}
}
// Ensure that the generated CUDA and Torch kernels do have torchMain & its transformations
// TORCH: {{^SLANG_PRELUDE_EXPORT$}}
// TORCH-NEXT: void __kernel__torchMain(TensorView {{[[:alnum:]_]+}});
// CUDA: __global__ void __kernel__torchMain(TensorView {{[[:alnum:]_]+}})
|