blob: 18e56dea0db05e321dce41b05a3793baac8ad391 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
#pragma once
#include "../../ComLightLib/comLightCommon.h"
#include "iTranscribeResult.cl.h"
#include "SpecialTokens.h"
#include "loggerApi.h"
#include "sLanguageList.h"
#include "sLoadModelCallbacks.h"
#include "eGpuModelFlags.h"
namespace Whisper
{
struct iModel;
struct iAudioBuffer;
struct iAudioReader;
struct iAudioCapture;
struct sCaptureCallbacks;
struct sFullParams;
enum struct eModelImplementation : uint32_t;
enum struct eSamplingStrategy : int;
using whisper_token = int;
struct sProgressSink;
struct DECLSPEC_NOVTABLE iContext : public ComLight::IUnknown
{
DEFINE_INTERFACE_ID( "{b9956374-3b18-4943-90f2-2ab18a404537}" );
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
// Uses the specified decoding strategy to obtain the text.
virtual HRESULT COMLIGHTCALL runFull( const sFullParams& params, const iAudioBuffer* buffer ) = 0;
virtual HRESULT COMLIGHTCALL runStreamed( const sFullParams& params, const sProgressSink& progress, const iAudioReader* reader ) = 0;
virtual HRESULT COMLIGHTCALL runCapture( const sFullParams& params, const sCaptureCallbacks& callbacks, const iAudioCapture* reader ) = 0;
virtual HRESULT COMLIGHTCALL getResults( eResultFlags flags, iTranscribeResult** pp ) const = 0;
// Try to detect speaker by comparing channels of the stereo PCM data
virtual HRESULT COMLIGHTCALL detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const = 0;
virtual HRESULT COMLIGHTCALL getModel( iModel** pp ) = 0;
virtual HRESULT COMLIGHTCALL fullDefaultParams( eSamplingStrategy strategy, sFullParams* rdi ) = 0;
// Performance information
virtual HRESULT COMLIGHTCALL timingsPrint() = 0;
virtual HRESULT COMLIGHTCALL timingsReset() = 0;
};
struct DECLSPEC_NOVTABLE iModel : public ComLight::IUnknown
{
DEFINE_INTERFACE_ID( "{abefb4c9-e8d8-46a3-8747-5afbadef1adb}" );
virtual HRESULT COMLIGHTCALL createContext( iContext** pp ) = 0;
virtual HRESULT COMLIGHTCALL isMultilingual() = 0;
virtual HRESULT COMLIGHTCALL getSpecialTokens( SpecialTokens& rdi ) = 0;
// Token Id -> String
virtual const char* COMLIGHTCALL stringFromToken( whisper_token token ) = 0;
};
HRESULT COMLIGHTCALL setupLogger( const sLoggerSetup& setup );
HRESULT COMLIGHTCALL loadModel( const wchar_t* path, eModelImplementation impl, uint32_t flags, const sLoadModelCallbacks* callbacks, iModel** pp );
uint32_t COMLIGHTCALL findLanguageKeyW( const wchar_t* lang );
uint32_t COMLIGHTCALL findLanguageKeyA( const char* lang );
HRESULT COMLIGHTCALL getSupportedLanguages( sLanguageList& rdi );
}
#include "sFullParams.h"
|