Whisper/MF/AudioBuffer.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

#pragma once
#include <algorithm>
#include <vector>

namespace Whisper
{
	struct AudioBuffer
	{
		std::vector<float> mono;
		std::vector<float> stereo;

		void appendMono( const float* rsi, size_t countFloats );
		void appendDownmixedStereo( const float* rsi, size_t countFloats );
		void appendStereo( const float* rsi, size_t countFloats );

		using pfnAppendSamples = void( AudioBuffer::* )( const float* rsi, size_t countFloats );

		inline static pfnAppendSamples appendSamplesFunc( bool sourceMono, bool wantStereo )
		{
			if( sourceMono )
				return &AudioBuffer::appendMono;
			else if( !wantStereo )
				return &AudioBuffer::appendDownmixedStereo;
			else
				return &AudioBuffer::appendStereo;
		}

		void clear()
		{
			mono.clear();
			stereo.clear();
		}

		void swap( AudioBuffer& that )
		{
			mono.swap( that.mono );
			stereo.swap( that.stereo );
		}

		void resize( size_t len )
		{
			assert( len <= mono.size() );
			mono.resize( len );
			if( !stereo.empty() )
				stereo.resize( len * 2 );
		}

		void dropFirst(size_t len)
		{
			assert(len <= mono.size());
			size_t remainder = mono.size() - len;
			auto tmp = std::vector<float>(remainder);
			memcpy(tmp.data(), mono.data() + len, remainder);
			mono = std::move(tmp);
		}

		void normalize()
		{
			const auto &min = *std::min_element(mono.begin(), mono.end());
			const auto &max = *std::max_element(mono.begin(), mono.end());

			for (auto& elm : mono) {
				elm -= min;
				elm /= (max - min) + 1;
				elm *= 255.0;
			}
		}
	};
}