diff options
| author | Konstantin <const@const.me> | 2023-01-28 15:18:02 +0100 |
|---|---|---|
| committer | Konstantin <const@const.me> | 2023-01-28 15:18:02 +0100 |
| commit | e1e3ac09a97d602a6ea60ff1928de77de81d99a7 (patch) | |
| tree | db38b886ef5d93b76aaecfe89de8fc421616646e | |
| parent | 214aacaa5c0a685f8be1cbe4fe06f5a1af8ad2d4 (diff) | |
DLL API for diarize feature
| -rw-r--r-- | Whisper/API/TranscribeStructs.h | 9 | ||||
| -rw-r--r-- | Whisper/API/iContext.cl.h | 2 | ||||
| -rw-r--r-- | Whisper/API/iContext.h | 2 | ||||
| -rw-r--r-- | Whisper/Whisper/ContextImpl.h | 1 | ||||
| -rw-r--r-- | Whisper/Whisper/ContextImpl.misc.cpp | 5 | ||||
| -rw-r--r-- | WhisperNet/API/eSpeakerChannel.cs | 15 | ||||
| -rw-r--r-- | WhisperNet/Internal/iContext.cs | 4 |
7 files changed, 38 insertions, 0 deletions
diff --git a/Whisper/API/TranscribeStructs.h b/Whisper/API/TranscribeStructs.h index ac28357..29bb9ab 100644 --- a/Whisper/API/TranscribeStructs.h +++ b/Whisper/API/TranscribeStructs.h @@ -124,4 +124,13 @@ namespace Whisper { return 0 != ( (uint32_t)a & (uint32_t)b ); } + + // Output value for iContext.detectSpeaker method + enum struct eSpeakerChannel : uint8_t + { + Unsure = 0, + Left = 1, + Right = 2, + NoStereoData = 0xFF, + }; }
\ No newline at end of file diff --git a/Whisper/API/iContext.cl.h b/Whisper/API/iContext.cl.h index fdb15ce..18e56de 100644 --- a/Whisper/API/iContext.cl.h +++ b/Whisper/API/iContext.cl.h @@ -31,6 +31,8 @@ namespace Whisper virtual HRESULT COMLIGHTCALL runCapture( const sFullParams& params, const sCaptureCallbacks& callbacks, const iAudioCapture* reader ) = 0; virtual HRESULT COMLIGHTCALL getResults( eResultFlags flags, iTranscribeResult** pp ) const = 0; + // Try to detect speaker by comparing channels of the stereo PCM data + virtual HRESULT COMLIGHTCALL detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const = 0; virtual HRESULT COMLIGHTCALL getModel( iModel** pp ) = 0; diff --git a/Whisper/API/iContext.h b/Whisper/API/iContext.h index d6ca29c..fc38a53 100644 --- a/Whisper/API/iContext.h +++ b/Whisper/API/iContext.h @@ -28,6 +28,8 @@ namespace Whisper HRESULT __stdcall runCapture( const sFullParams& params, const sCaptureCallbacks& callbacks, const iAudioCapture* reader ); HRESULT __stdcall getResults( eResultFlags flags, iTranscribeResult** pp ) const; + // Try to detect speaker by comparing channels of the stereo PCM data + HRESULT __stdcall detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const; HRESULT __stdcall getModel( iModel** pp ); diff --git a/Whisper/Whisper/ContextImpl.h b/Whisper/Whisper/ContextImpl.h index 971f629..448efd5 100644 --- a/Whisper/Whisper/ContextImpl.h +++ b/Whisper/Whisper/ContextImpl.h @@ -63,6 +63,7 @@ namespace Whisper HRESULT COMLIGHTCALL makeResults( eResultFlags flags, TranscribeResult& res ) const noexcept; HRESULT COMLIGHTCALL getResults( eResultFlags flags, iTranscribeResult** pp ) const noexcept override final; + HRESULT COMLIGHTCALL detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const noexcept override final; int defaultThreadsCount() const; diff --git a/Whisper/Whisper/ContextImpl.misc.cpp b/Whisper/Whisper/ContextImpl.misc.cpp index 9a156fb..9eb4c04 100644 --- a/Whisper/Whisper/ContextImpl.misc.cpp +++ b/Whisper/Whisper/ContextImpl.misc.cpp @@ -401,4 +401,9 @@ HRESULT COMLIGHTCALL ContextImpl::runStreamed( const sFullParams& params, const { return hr; } +} + +HRESULT COMLIGHTCALL ContextImpl::detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const noexcept +{ + return E_NOTIMPL; }
\ No newline at end of file diff --git a/WhisperNet/API/eSpeakerChannel.cs b/WhisperNet/API/eSpeakerChannel.cs new file mode 100644 index 0000000..edb96e0 --- /dev/null +++ b/WhisperNet/API/eSpeakerChannel.cs @@ -0,0 +1,15 @@ +namespace Whisper +{ + /// <summary>Output value for iContext.detectSpeaker method</summary> + public enum eSpeakerChannel: byte + { + /// <summary>Unable to detect</summary> + Unsure = 0, + /// <summary>The speech was mostly in the left channel</summary> + Left = 1, + /// <summary>The speech was mostly in the right channel</summary> + Right = 2, + /// <summary>The audio only has 1 channel</summary> + NoStereoData = 0xFF, + } +}
\ No newline at end of file diff --git a/WhisperNet/Internal/iContext.cs b/WhisperNet/Internal/iContext.cs index 6adf8c5..010c139 100644 --- a/WhisperNet/Internal/iContext.cs +++ b/WhisperNet/Internal/iContext.cs @@ -21,6 +21,10 @@ namespace Whisper.Internal [RetValIndex( 1 )] iTranscribeResult getResults( eResultFlags flags ); + /// <summary>Try to detect speaker by comparing channels of the stereo PCM data</summary> + [RetValIndex( 1 )] + eSpeakerChannel detectSpeaker( [In] ref sTimeInterval interval ); + /// <summary>Get the model which was used to create this context</summary> [RetValIndex] iModel getModel(); |
