summaryrefslogtreecommitdiffstats
path: root/Whisper/MF/PcmReader.h
blob: aebfc1f0b20548cc1a402445fd952477ba73d1db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#pragma once
#include "../Whisper/audioConstants.h"
#include <mfidl.h>
#include <mfreadwrite.h>
#include "AudioBuffer.h"
#include "../API/iMediaFoundation.cl.h"

namespace Whisper
{
	// PCM buffer with 10 milliseconds of single-channel audio
	struct PcmMonoChunk
	{
		std::array<float, FFT_STEP> mono;
	};
	// PCM buffer with 10 milliseconds of interleaved stereo
	struct PcmStereoChunk
	{
		std::array<float, FFT_STEP * 2> stereo;
	};

	__interface iSampleHandler;

	constexpr HRESULT E_EOF = HRESULT_FROM_WIN32( ERROR_HANDLE_EOF );

	// Utility class which reads chunks of FFT_STEP FP32 PCM samples from the MF source reader
	// The class always delivers mono chunks, and can optionally deliver stereo in a separate buffer.
	class PcmReader
	{
		// A small intermediate buffer with PCM data for complete media foundation samples
		AudioBuffer pcm;
		// Index of the first unconsumed sample in the pcm buffer
		size_t bufferReadOffset = 0;
		// Utility object to abstract away mono versus stereo shenanigans
		const iSampleHandler* sampleHandler;
		// The underlying MF source reader which delivers audio data
		CComPtr<IMFSourceReader> reader;
		// True after we consumed all available media samples from the reader
		bool m_readerEndOfFile = false;
		// True if this object delivers stereo samples
		bool m_stereoOutput = false;
		// The count of chunks we expect to get from the reader
		size_t m_length = 0;
		// Read next sample from the reader, store in the PCM buffer in this class
		HRESULT readNextSample();

	public:

		PcmReader( const iAudioReader* reader );

		// Count of chunks in the MEL spectrogram.
		// The PCM audio is generally slightly longer than that, due to the incomplete last chunk.
		size_t getLength() const noexcept
		{
			return m_length;
		}

		// True when the stereo flag passed to constructor, and the audio stream actually has 2 or more audio channels
		bool outputsStereo() const { return m_stereoOutput; }

		// Load another 10ms chunk from the stream
		// For the last chunk in the stream, the output buffers are padded with zeros
		HRESULT readChunk( PcmMonoChunk& mono, PcmStereoChunk* stereo );
	};
}