summaryrefslogtreecommitdiffstats
path: root/examples/mlp-training/mlvec_sw.slang
blob: 695755706652b0cac3332ef069cdfffdc7f4333a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
implementing mlp_sw;

public struct MLVec<int N> : IDifferentiable
{
    public NFloat data[N];
    
    [Differentiable]
    public NFloat[N] toArray()
    {
        return data;
    }

    [Differentiable]
    public static MLVec<N> fromArray(NFloat[N] values)
    {
        MLVec<N> result;
        [ForceUnroll]
        for (int i = 0; i < N; i++)
            result.data[i] = values[i];
        return result;
    }
}

MLVec<OutputSize> matMulAdd<int OutputSize, int InputSize>(MLVec<InputSize> input, NFloat* matrix, NFloat* bias)
{
    let getMatElem = (int row, int col) => matrix[row*InputSize + col];
    let getBias = (int idx) => bias[idx];
    MLVec<OutputSize> result = {};
    for (int i = 0; i < OutputSize; i++)
    {
        NFloat r = getBias(i);
        for (int j = 0; j < InputSize; j++)
            r += getMatElem(i, j) * input.data[j];
        result.data[i] = r;
    }
    return result;
}

MLVec<OutputSize> matMulTransposed<int OutputSize, int InputSize>(MLVec<InputSize> input, NFloat* matrix)
{
    let getMatElem = (int row, int col) => matrix[col*OutputSize + row];
    MLVec<OutputSize> result = {};
    for (int i = 0; i < OutputSize; i++)
    {
        NFloat r = {};
        for (int j = 0; j < InputSize; j++)
            r += getMatElem(i, j) * input.data[j];
        result.data[i] = r;
    }
    return result;
}

void outerProductAccumulate<int M, int N>(MLVec<M> v0, MLVec<N> v1, NFloat* matrix)
{
    for (int i = 0; i < M; i++)
    {
        for (int j = 0; j < N; j++)
        {
            let elem = v0.data[i] * v1.data[j];
            half original;
            InterlockedAddF16Emulated(matrix + (i*N + j), elem, original);
        }
    }
}