1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
module mlp;
import common;
__include mlvec;
// We use Float16 for the CoopVec component type since it is more widely supported.
//
static const CoopVecComponentType kComponentType = CoopVecComponentType.Float16;
public struct FeedForwardLayer<int InputSize, int OutputSize>
{
internal void* weights;
internal void* weightsGrad;
internal void* biases;
internal void* biasesGrad;
public MLVec<OutputSize> eval(MLVec<InputSize> input)
{
// Compute mul(weights, inputVec) + biases.
// `weights` is treated as an OutputSize(row) x InputSize(col) matrix.
var output = coopVecMatMulAdd<NFloat, OutputSize>(
input.data, kComponentType, // input and format
weights, kComponentType, // weights and format
biases, kComponentType, // biases and format
CoopVecMatrixLayout.RowMajor, // matrix layout
false, // transpose matrix? must be `false` since we specified RowMajor.
InputSize * sizeof(NFloat)); // matrix stride
output = max(output, output * 0.001h); // Leaky ReLU activation
return {output};
}
[BackwardDerivativeOf(eval)]
public void evalBwd(
inout DifferentialPair<MLVec<InputSize>> input,
MLVec<OutputSize> resultGrad)
{
let fwd = eval(input.p);
// Back-prop resultGrad through activation.
[ForceUnroll]
for (int i = 0; i < OutputSize; i++)
{
if (fwd.data[i] < 0.0)
resultGrad.data[i] *= 0.01h;
}
// Back-prop gradients to the weights matrix.
coopVecOuterProductAccumulate(
resultGrad.data,
input.p.data,
weightsGrad,
0, // matrixStride, ignored since layout is TrainingOptimal
CoopVecMatrixLayout.TrainingOptimal, // matrix layout, must be TrainingOptimal.
kComponentType);
// Back-prop gradients to the biases vector.
coopVecReduceSumAccumulate(resultGrad.data, (void*)biasesGrad);
// Back-prop gradients to the input vector by computing
// mul(transpose(weights), resultGrad).
// By specifying the matrix layout as ColumnMajor, we can
// achieve the effect of transposing the weights matrix.
let dInput = coopVecMatMul<NFloat, InputSize>(
resultGrad.data, kComponentType,
weights, kComponentType,
CoopVecMatrixLayout.ColumnMajor,
false, // transpose, must be `false` since we specified ColumnMajor.
InputSize * sizeof(NFloat));
input = {input.p, {dInput}};
}
}
|