1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
// TODO(JS):
// It doesn't look like fxc, dxc, vk support double versions of many of the intrinsics, so they are disabled here.
// Arguably we should implement simple intrinsics if missing in the core module.
// More complicated functions (like say sin) can also be written, if not available on a target, but requires significant
// care.
// TODO(JS):
// NOTE! war-double-host-callable category is a work around for issues with x86 & host-callable
//TEST(compute, war-double-host-callable):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -output-using-type
//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -render-feature double
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type
//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<double> outputBuffer;
typedef double Float;
typedef matrix<Float, 2, 2> FloatMatrix;
typedef matrix<int64_t, 2, 2> IntMatrix;
typedef matrix<uint64_t, 2, 2> UIntMatrix;
typedef vector<Float, 2> FloatVector;
Float calcTotal(FloatVector v)
{
return v.x + v.y;
}
Float calcTotal(FloatMatrix v)
{
return calcTotal(v[0]) + calcTotal(v[1]);
}
FloatMatrix makeFloatMatrix(Float f)
{
return FloatMatrix(f);
}
IntMatrix makeIntMatrix(int v)
{
IntMatrix m = { { v, v }, { v, v } };
return m;
}
void test1(inout FloatMatrix ft, inout FloatMatrix f, int idx)
{
unmodified(f);
// fmod
ft += FloatMatrix(IntMatrix(((f % makeFloatMatrix(0.11f)) * makeFloatMatrix(100)) + makeFloatMatrix(0.5)));
ft += sin(f);
// Lets try some matrix/matrix
ft = f * ft;
// Lets try some vector matrix
{
FloatMatrix r = { mul(f[0], ft), mul(ft, f[1]) };
ft += r;
}
// Back to the transcendentals
ft += cos(f);
ft += tan(f);
ft += asin(f);
ft += acos(f);
ft += atan(f);
ft += atan2(f, makeFloatMatrix(2));
{
FloatMatrix sf, cf;
sincos(f, sf, cf);
ft += sf;
ft += cf;
}
ft += rcp(makeFloatMatrix(1.0) + f);
ft += FloatMatrix(sign(f - makeFloatMatrix(0.5)));
ft += saturate(f * makeFloatMatrix(4) - makeFloatMatrix(2.0));
ft += sqrt(f);
ft += rsqrt(makeFloatMatrix(1.0f) + f);
ft += exp2(f);
ft += exp(f);
ft += exp10(f);
ft += frac(f * makeFloatMatrix(3));
ft += ceil(f * makeFloatMatrix(5) - makeFloatMatrix(3));
ft += floor(f * makeFloatMatrix(10) - makeFloatMatrix(7));
ft += trunc(f * makeFloatMatrix(7));
ft += log(f + makeFloatMatrix(10.0));
ft += log2(f * makeFloatMatrix(3) + makeFloatMatrix(2));
{
float scalarVs[] = { 1, 10, 100, 1000 };
ft += FloatMatrix(IntMatrix(log10(makeFloatMatrix(Float(scalarVs[idx]))) + makeFloatMatrix(0.5f)));
}
ft += abs(f * makeFloatMatrix(4) - makeFloatMatrix(2.0f));
ft += min(makeFloatMatrix(0.5), f);
ft += max(f, makeFloatMatrix(0.75));
}
void test2(inout FloatMatrix ft, inout FloatMatrix f)
{
unmodified(f);
ft += pow(makeFloatMatrix(0.5), f);
ft += smoothstep(makeFloatMatrix(0.2), makeFloatMatrix(0.7), f);
ft += lerp(makeFloatMatrix(-100), makeFloatMatrix(100), f);
ft += clamp(f, makeFloatMatrix(0.1), makeFloatMatrix(0.3));
ft += step(f, makeFloatMatrix(0.5));
#if 0
IntMatrix vi = asint(makeFloatMatrix(idx));
ft += asfloat(vi);
UIntMatrix vu = asuint(f);
ft += asfloat(vu);
#endif
}
[numthreads(4, 1, 1)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
int idx = int(dispatchThreadID.x);
Float scalarF = idx * (1.0f / (4.0f));
FloatMatrix ft = {};
FloatMatrix f = { { scalarF + 0.01, scalarF + 0.02}, { scalarF + 0.011, scalarF + 0.022}};
test1(ft, f, idx);
test2(ft, f);
outputBuffer[idx] = calcTotal(ft);
}
|