summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin <const@const.me>2023-01-28 15:18:02 +0100
committerKonstantin <const@const.me>2023-01-28 15:18:02 +0100
commite1e3ac09a97d602a6ea60ff1928de77de81d99a7 (patch)
treedb38b886ef5d93b76aaecfe89de8fc421616646e
parent214aacaa5c0a685f8be1cbe4fe06f5a1af8ad2d4 (diff)
DLL API for diarize feature
-rw-r--r--Whisper/API/TranscribeStructs.h9
-rw-r--r--Whisper/API/iContext.cl.h2
-rw-r--r--Whisper/API/iContext.h2
-rw-r--r--Whisper/Whisper/ContextImpl.h1
-rw-r--r--Whisper/Whisper/ContextImpl.misc.cpp5
-rw-r--r--WhisperNet/API/eSpeakerChannel.cs15
-rw-r--r--WhisperNet/Internal/iContext.cs4
7 files changed, 38 insertions, 0 deletions
diff --git a/Whisper/API/TranscribeStructs.h b/Whisper/API/TranscribeStructs.h
index ac28357..29bb9ab 100644
--- a/Whisper/API/TranscribeStructs.h
+++ b/Whisper/API/TranscribeStructs.h
@@ -124,4 +124,13 @@ namespace Whisper
{
return 0 != ( (uint32_t)a & (uint32_t)b );
}
+
+ // Output value for iContext.detectSpeaker method
+ enum struct eSpeakerChannel : uint8_t
+ {
+ Unsure = 0,
+ Left = 1,
+ Right = 2,
+ NoStereoData = 0xFF,
+ };
} \ No newline at end of file
diff --git a/Whisper/API/iContext.cl.h b/Whisper/API/iContext.cl.h
index fdb15ce..18e56de 100644
--- a/Whisper/API/iContext.cl.h
+++ b/Whisper/API/iContext.cl.h
@@ -31,6 +31,8 @@ namespace Whisper
virtual HRESULT COMLIGHTCALL runCapture( const sFullParams& params, const sCaptureCallbacks& callbacks, const iAudioCapture* reader ) = 0;
virtual HRESULT COMLIGHTCALL getResults( eResultFlags flags, iTranscribeResult** pp ) const = 0;
+ // Try to detect speaker by comparing channels of the stereo PCM data
+ virtual HRESULT COMLIGHTCALL detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const = 0;
virtual HRESULT COMLIGHTCALL getModel( iModel** pp ) = 0;
diff --git a/Whisper/API/iContext.h b/Whisper/API/iContext.h
index d6ca29c..fc38a53 100644
--- a/Whisper/API/iContext.h
+++ b/Whisper/API/iContext.h
@@ -28,6 +28,8 @@ namespace Whisper
HRESULT __stdcall runCapture( const sFullParams& params, const sCaptureCallbacks& callbacks, const iAudioCapture* reader );
HRESULT __stdcall getResults( eResultFlags flags, iTranscribeResult** pp ) const;
+ // Try to detect speaker by comparing channels of the stereo PCM data
+ HRESULT __stdcall detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const;
HRESULT __stdcall getModel( iModel** pp );
diff --git a/Whisper/Whisper/ContextImpl.h b/Whisper/Whisper/ContextImpl.h
index 971f629..448efd5 100644
--- a/Whisper/Whisper/ContextImpl.h
+++ b/Whisper/Whisper/ContextImpl.h
@@ -63,6 +63,7 @@ namespace Whisper
HRESULT COMLIGHTCALL makeResults( eResultFlags flags, TranscribeResult& res ) const noexcept;
HRESULT COMLIGHTCALL getResults( eResultFlags flags, iTranscribeResult** pp ) const noexcept override final;
+ HRESULT COMLIGHTCALL detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const noexcept override final;
int defaultThreadsCount() const;
diff --git a/Whisper/Whisper/ContextImpl.misc.cpp b/Whisper/Whisper/ContextImpl.misc.cpp
index 9a156fb..9eb4c04 100644
--- a/Whisper/Whisper/ContextImpl.misc.cpp
+++ b/Whisper/Whisper/ContextImpl.misc.cpp
@@ -401,4 +401,9 @@ HRESULT COMLIGHTCALL ContextImpl::runStreamed( const sFullParams& params, const
{
return hr;
}
+}
+
+HRESULT COMLIGHTCALL ContextImpl::detectSpeaker( const sTimeInterval& time, eSpeakerChannel& result ) const noexcept
+{
+ return E_NOTIMPL;
} \ No newline at end of file
diff --git a/WhisperNet/API/eSpeakerChannel.cs b/WhisperNet/API/eSpeakerChannel.cs
new file mode 100644
index 0000000..edb96e0
--- /dev/null
+++ b/WhisperNet/API/eSpeakerChannel.cs
@@ -0,0 +1,15 @@
+namespace Whisper
+{
+ /// <summary>Output value for iContext.detectSpeaker method</summary>
+ public enum eSpeakerChannel: byte
+ {
+ /// <summary>Unable to detect</summary>
+ Unsure = 0,
+ /// <summary>The speech was mostly in the left channel</summary>
+ Left = 1,
+ /// <summary>The speech was mostly in the right channel</summary>
+ Right = 2,
+ /// <summary>The audio only has 1 channel</summary>
+ NoStereoData = 0xFF,
+ }
+} \ No newline at end of file
diff --git a/WhisperNet/Internal/iContext.cs b/WhisperNet/Internal/iContext.cs
index 6adf8c5..010c139 100644
--- a/WhisperNet/Internal/iContext.cs
+++ b/WhisperNet/Internal/iContext.cs
@@ -21,6 +21,10 @@ namespace Whisper.Internal
[RetValIndex( 1 )]
iTranscribeResult getResults( eResultFlags flags );
+ /// <summary>Try to detect speaker by comparing channels of the stereo PCM data</summary>
+ [RetValIndex( 1 )]
+ eSpeakerChannel detectSpeaker( [In] ref sTimeInterval interval );
+
/// <summary>Get the model which was used to create this context</summary>
[RetValIndex]
iModel getModel();