Whisper/CPU/DecoderTensors.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

#include "stdafx.h"
#include "DecoderTensors.h"
using namespace CpuCompute;

#if TENSOR_GGML_COMPAT
namespace
{
	class CompatContext
	{
		std::vector<ggml_tensor>& vec;
		size_t index;

	public:
		CompatContext( std::vector<ggml_tensor>& dest, size_t layers ) :
			vec( dest )
		{
			constexpr size_t tensorsPerLayer = 21;
			const size_t count = tensorsPerLayer * layers + 4;
			vec.resize( count );
			index = 0;
		}

		void add( const Tensor& rsi, ggml_tensor*& res )
		{
			ggml_tensor& ten = vec[ index ];
			index++;
			ten = rsi.ggml();
			res = &ten;
		}

		void add2( const TensorPair& rsi, ggml_tensor*& w, ggml_tensor*& b )
		{
			add( rsi.w, w );
			add( rsi.b, b );
		}

		bool isComplete() const
		{
			return index == vec.size();
		}
	};
}

void DecoderTensors::makeCompatTensors()
{
	CompatContext ctx( ggml, layers.size() );

	ctx.add( positionalEmbedding, d_pe );
	ctx.add( tokenEmbedding, d_te );
	ctx.add2( ln, d_ln_w, d_ln_b );

	for( auto& i : layers )
	{
		ctx.add2( i.attnLn0, i.attn_ln_0_w, i.attn_ln_0_b );
		ctx.add2( i.attnLn1, i.attn_ln_1_w, i.attn_ln_1_b );
		ctx.add2( i.attnQuery, i.attn_q_w, i.attn_q_b );
		ctx.add( i.attnKey, i.attn_k_w );
		ctx.add2( i.attnValue, i.attn_v_w, i.attn_v_b );
		ctx.add2( i.crossAttnLn0, i.cross_attn_ln_0_w, i.cross_attn_ln_0_b );
		ctx.add2( i.crossAttnLn1, i.cross_attn_ln_1_w, i.cross_attn_ln_1_b );
		ctx.add2( i.crossAttnQuery, i.cross_attn_q_w, i.cross_attn_q_b );
		ctx.add2( i.mlpLn, i.mlp_ln_w, i.mlp_ln_b );
		ctx.add2( i.mlp0, i.mlp_0_w, i.mlp_0_b );
		ctx.add2( i.mlp1, i.mlp_1_w, i.mlp_1_b );
	}
	assert( ctx.isComplete() );
}
#endif