From 6238fc31c6a0a6004cfb6791a7bf85c829c8acc9 Mon Sep 17 00:00:00 2001 From: Konstantin Date: Fri, 3 Feb 2023 12:52:05 +0100 Subject: Minor, API documentation --- WhisperNet/API/iAudioBuffer.cs | 13 +++++++------ WhisperNet/API/iAudioReader.cs | 4 ++-- WhisperNet/API/iMediaFoundation.cs | 7 ++++--- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'WhisperNet/API') diff --git a/WhisperNet/API/iAudioBuffer.cs b/WhisperNet/API/iAudioBuffer.cs index 1b35621..c4e86b9 100644 --- a/WhisperNet/API/iAudioBuffer.cs +++ b/WhisperNet/API/iAudioBuffer.cs @@ -5,23 +5,24 @@ namespace Whisper { /// A buffer with a chunk of audio. /// Note the interface supports both marshaling directions.
- /// I have not tested, but you should be able to implement this interface in C#, to supply PCM audio data to the native code
+ /// I have not tested, but you should be able to implement this interface in C#, to supply PCM audio data to the native code. [ComInterface( "013583aa-c9eb-42bc-83db-633c2c317051", eMarshalDirection.BothWays )] public interface iAudioBuffer: IDisposable { - /// Count of samples in the buffer + /// Count of samples in the buffer, equal to ( length in seconds ) * 16000 int countSamples(); - /// Unmanaged pointer to the internal buffer containing single-channel FP32 samples. + /// Unmanaged pointer to the internal buffer with single-channel float PCM samples @ 16 kHz sample rate. /// If you implementing this interface in C# and your audio data is on the managed heap, use to make sure it doesn't move.
/// Or better yet, move the data to unmanaged buffer allocated with or method.
IntPtr getPcmMono(); - /// Unmanaged pointer to the internal buffer containing stereo FP32 samples. - /// When the buffer doesn’t have stereo data, the method gonna return . + /// Unmanaged pointer to the internal buffer with interleaved stereo float PCM samples @ 16 kHz sample rate. + /// When the buffer doesn’t have stereo data, the method should return . IntPtr getPcmStereo(); - /// Start time of the buffer, relative to the start of the media + /// Start time of the buffer, relative to the start of the media. + /// The value is used to produce timestamps in and fields. void getTime( out TimeSpan time ); } } \ No newline at end of file diff --git a/WhisperNet/API/iAudioReader.cs b/WhisperNet/API/iAudioReader.cs index 68cf916..8e0e047 100644 --- a/WhisperNet/API/iAudioReader.cs +++ b/WhisperNet/API/iAudioReader.cs @@ -3,8 +3,8 @@ namespace Whisper { /// Audio stream reader object - /// The implementation is forward-only, and these objects ain’t reusable.
- /// To read a source file multiple time, dispose and re-create the reader.
+ /// The implementation is forward-only, and these objects aren’t reusable.
+ /// To read an audio file multiple time, dispose this object, and create a new one from the same source file.
[ComInterface( "35b988da-04a6-476a-a193-d8891d5dc390", eMarshalDirection.ToManaged )] public interface iAudioReader: IDisposable { diff --git a/WhisperNet/API/iMediaFoundation.cs b/WhisperNet/API/iMediaFoundation.cs index 535f904..b8d7449 100644 --- a/WhisperNet/API/iMediaFoundation.cs +++ b/WhisperNet/API/iMediaFoundation.cs @@ -12,7 +12,7 @@ namespace Whisper { /// Decode complete audio file into a new memory buffer. /// - /// Under the hood, the method asks MF to resample and convert audio into the suitable type for the Whisper model.
+ /// The method asks MF to resample and convert audio into the suitable type for the Whisper model.
/// If the path is a video file, the method will decode the first audio track. ///
[RetValIndex( 2 )] @@ -20,8 +20,9 @@ namespace Whisper /// Create a reader to stream the audio file from disk /// - /// Under the hood, the method asks MF to resample and convert audio into the suitable type for the Whisper model.
- /// If the path is a video file, the method will decode the first audio track. + /// The method returns an object which can be used to decode the audio file incrementally.
+ /// For long audio files, this saves both memory (no need for large uncompressed PCM buffer), and time (decode and transcribe run concurrently on different CPU threads).
+ /// If the path is a video file, the implementation will use the first audio track. ///
[RetValIndex( 2 )] iAudioReader openAudioFile( [MarshalAs( UnmanagedType.LPWStr )] string path, [MarshalAs( UnmanagedType.U1 )] bool stereo = false ); -- cgit v1.2.3