summaryrefslogtreecommitdiffstats
path: root/examples/mlp-training/mlp_sw.slang
blob: 1e222b99fa4f01d6f7d913d8efabe622eff34ebd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
module mlp_sw;

import common;

__include mlvec_sw;

public struct FeedForwardLayer<int InputSize, int OutputSize>
{
    public NFloat* weights;
    public NFloat* weightsGrad;
    public NFloat* biases;
    public NFloat* biasesGrad;

    [BackwardDerivative(evalBwd)]
    public MLVec<OutputSize> eval(MLVec<InputSize> input)
    {
        var output = matMulAdd<OutputSize>(
            input,
            weights,
            biases);
        // ReLU activation
        for (int i = 0; i < OutputSize; i++)
            if (output.data[i] < 0.0)
                output.data[i] *= 0.001h;
        return output; 
    }

    public void evalBwd(
        inout DifferentialPair<MLVec<InputSize>> input,
        MLVec<OutputSize> resultGrad)
    {
        let fwd = eval(input.p);

        // Back-prop resultGrad through activation.
        for (int i = 0; i < OutputSize; i++)
        {
            if (fwd.data[i] < 0.0)
                resultGrad.data[i] *= 0.01h;
        }

        // Back-prop gradients to the weights matrix.
        outerProductAccumulate(
            resultGrad,
            input.p,
            weightsGrad);

        // Back-prop gradients to the biases vector.
        for (int i = 0; i < OutputSize; i++)
        {
            NFloat originalValue;
            InterlockedAddF16Emulated(biasesGrad + i, resultGrad.data[i], originalValue);
        }

        // Back-prop gradients to the input vector.
        let dInput = matMulTransposed<InputSize>(resultGrad, weights);

        input = {input.p, dInput};
    }
}