From e1e3ac09a97d602a6ea60ff1928de77de81d99a7 Mon Sep 17 00:00:00 2001 From: Konstantin Date: Sat, 28 Jan 2023 15:18:02 +0100 Subject: DLL API for diarize feature --- Whisper/API/TranscribeStructs.h | 9 +++++++++ Whisper/API/iContext.cl.h | 2 ++ Whisper/API/iContext.h | 2 ++ Whisper/Whisper/ContextImpl.h | 1 + Whisper/Whisper/ContextImpl.misc.cpp | 5 +++++ WhisperNet/API/eSpeakerChannel.cs | 15 +++++++++++++++ WhisperNet/Internal/iContext.cs | 4 ++++ 7 files changed, 38 insertions(+) create mode 100644 WhisperNet/API/eSpeakerChannel.cs diff --git a/Whisper/API/TranscribeStructs.h b/Whisper/API/TranscribeStructs.h index ac28357..29bb9ab 100644 --- a/Whisper/API/TranscribeStructs.h +++ b/Whisper/API/TranscribeStructs.h @@ -124,4 +124,13 @@ namespace Whisper { return 0 != ( (uint32_t)a & (uint32_t)b ); } + + // Output value for iContext.detectSpeaker method + enum struct eSpeakerChannel : uint8_t + { + Unsure = 0, + Left = 1, + Right = 2, + NoStereoData = 0xFF, + }; } \ No newline at end of file diff --git a/Whisper/API/iContext.cl.h b/Whisper/API/iContext.cl.h index fdb15ce..18e56de 100644 --- a/Whisper/API/iContext.cl.h +++ b/Whisper/API/iContext.cl.h @@ -31,6 +31,8 @@ namespace Whisper virtual HRESULT COMLIGHTCALL runCapture( const sFullParams& params, const sCaptureCallbacks& callbacks, const iAudioCapture* reader ) = 0; virtual HRESULT COMLIGHTCALL getResults( eResultFlags flags, iTranscribeResult** pp ) const = 0; + // Try to detect speaker by comparing channels of the stereo PCM data + virtual HRESULT COMLIGHTCALL detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const = 0; virtual HRESULT COMLIGHTCALL getModel( iModel** pp ) = 0; diff --git a/Whisper/API/iContext.h b/Whisper/API/iContext.h index d6ca29c..fc38a53 100644 --- a/Whisper/API/iContext.h +++ b/Whisper/API/iContext.h @@ -28,6 +28,8 @@ namespace Whisper HRESULT __stdcall runCapture( const sFullParams& params, const sCaptureCallbacks& callbacks, const iAudioCapture* reader ); HRESULT __stdcall getResults( eResultFlags flags, iTranscribeResult** pp ) const; + // Try to detect speaker by comparing channels of the stereo PCM data + HRESULT __stdcall detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const; HRESULT __stdcall getModel( iModel** pp ); diff --git a/Whisper/Whisper/ContextImpl.h b/Whisper/Whisper/ContextImpl.h index 971f629..448efd5 100644 --- a/Whisper/Whisper/ContextImpl.h +++ b/Whisper/Whisper/ContextImpl.h @@ -63,6 +63,7 @@ namespace Whisper HRESULT COMLIGHTCALL makeResults( eResultFlags flags, TranscribeResult& res ) const noexcept; HRESULT COMLIGHTCALL getResults( eResultFlags flags, iTranscribeResult** pp ) const noexcept override final; + HRESULT COMLIGHTCALL detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const noexcept override final; int defaultThreadsCount() const; diff --git a/Whisper/Whisper/ContextImpl.misc.cpp b/Whisper/Whisper/ContextImpl.misc.cpp index 9a156fb..9eb4c04 100644 --- a/Whisper/Whisper/ContextImpl.misc.cpp +++ b/Whisper/Whisper/ContextImpl.misc.cpp @@ -401,4 +401,9 @@ HRESULT COMLIGHTCALL ContextImpl::runStreamed( const sFullParams& params, const { return hr; } +} + +HRESULT COMLIGHTCALL ContextImpl::detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const noexcept +{ + return E_NOTIMPL; } \ No newline at end of file diff --git a/WhisperNet/API/eSpeakerChannel.cs b/WhisperNet/API/eSpeakerChannel.cs new file mode 100644 index 0000000..edb96e0 --- /dev/null +++ b/WhisperNet/API/eSpeakerChannel.cs @@ -0,0 +1,15 @@ +namespace Whisper +{ + /// Output value for iContext.detectSpeaker method + public enum eSpeakerChannel: byte + { + /// Unable to detect + Unsure = 0, + /// The speech was mostly in the left channel + Left = 1, + /// The speech was mostly in the right channel + Right = 2, + /// The audio only has 1 channel + NoStereoData = 0xFF, + } +} \ No newline at end of file diff --git a/WhisperNet/Internal/iContext.cs b/WhisperNet/Internal/iContext.cs index 6adf8c5..010c139 100644 --- a/WhisperNet/Internal/iContext.cs +++ b/WhisperNet/Internal/iContext.cs @@ -21,6 +21,10 @@ namespace Whisper.Internal [RetValIndex( 1 )] iTranscribeResult getResults( eResultFlags flags ); + /// Try to detect speaker by comparing channels of the stereo PCM data + [RetValIndex( 1 )] + eSpeakerChannel detectSpeaker( [In] ref sTimeInterval interval ); + /// Get the model which was used to create this context [RetValIndex] iModel getModel(); -- cgit v1.2.3