diff options
| author | Konstantin <const@const.me> | 2023-02-03 12:52:05 +0100 |
|---|---|---|
| committer | Konstantin <const@const.me> | 2023-02-03 12:52:05 +0100 |
| commit | 6238fc31c6a0a6004cfb6791a7bf85c829c8acc9 (patch) | |
| tree | 7645adb3aea2422ebf35640906a52c395b37a5f1 /WhisperNet | |
| parent | 2deddc6e541bc61f72e07f7cc82f91d7e117f619 (diff) | |
Minor, API documentation
Diffstat (limited to 'WhisperNet')
| -rw-r--r-- | WhisperNet/API/iAudioBuffer.cs | 13 | ||||
| -rw-r--r-- | WhisperNet/API/iAudioReader.cs | 4 | ||||
| -rw-r--r-- | WhisperNet/API/iMediaFoundation.cs | 7 |
3 files changed, 13 insertions, 11 deletions
diff --git a/WhisperNet/API/iAudioBuffer.cs b/WhisperNet/API/iAudioBuffer.cs index 1b35621..c4e86b9 100644 --- a/WhisperNet/API/iAudioBuffer.cs +++ b/WhisperNet/API/iAudioBuffer.cs @@ -5,23 +5,24 @@ namespace Whisper { /// <summary>A buffer with a chunk of audio.</summary> /// <remarks>Note the interface supports both marshaling directions.<br/> - /// I have not tested, but you should be able to implement this interface in C#, to supply PCM audio data to the native code</remarks> + /// I have not tested, but you should be able to implement this interface in C#, to supply PCM audio data to the native code.</remarks> [ComInterface( "013583aa-c9eb-42bc-83db-633c2c317051", eMarshalDirection.BothWays )] public interface iAudioBuffer: IDisposable { - /// <summary>Count of samples in the buffer</summary> + /// <summary>Count of samples in the buffer, equal to ( length in seconds ) * 16000</summary> int countSamples(); - /// <summary>Unmanaged pointer to the internal buffer containing single-channel FP32 samples.</summary> + /// <summary>Unmanaged pointer to the internal buffer with single-channel <c>float</c> PCM samples @ 16 kHz sample rate.</summary> /// <remarks>If you implementing this interface in C# and your audio data is on the managed heap, use <see cref="GCHandle" /> to make sure it doesn't move.<br/> /// Or better yet, move the data to unmanaged buffer allocated with <see cref="Marshal.AllocHGlobal(int)" /> or <see cref="Marshal.AllocCoTaskMem(int)" /> method.</remarks> IntPtr getPcmMono(); - /// <summary>Unmanaged pointer to the internal buffer containing stereo FP32 samples.</summary> - /// <remarks>When the buffer doesn’t have stereo data, the method gonna return <see cref="IntPtr.Zero" />.</remarks> + /// <summary>Unmanaged pointer to the internal buffer with interleaved stereo <c>float</c> PCM samples @ 16 kHz sample rate.</summary> + /// <remarks>When the buffer doesn’t have stereo data, the method should return <see cref="IntPtr.Zero" />.</remarks> IntPtr getPcmStereo(); - /// <summary>Start time of the buffer, relative to the start of the media</summary> + /// <summary>Start time of the buffer, relative to the start of the media.</summary> + /// <remarks>The value is used to produce timestamps in <see cref="sSegment.time" /> and <see cref="sToken.time" /> fields.</remarks> void getTime( out TimeSpan time ); } }
\ No newline at end of file diff --git a/WhisperNet/API/iAudioReader.cs b/WhisperNet/API/iAudioReader.cs index 68cf916..8e0e047 100644 --- a/WhisperNet/API/iAudioReader.cs +++ b/WhisperNet/API/iAudioReader.cs @@ -3,8 +3,8 @@ namespace Whisper { /// <summary>Audio stream reader object</summary> - /// <remarks>The implementation is forward-only, and these objects ain’t reusable.<br/> - /// To read a source file multiple time, dispose and re-create the reader.</remarks> + /// <remarks>The implementation is forward-only, and these objects aren’t reusable.<br/> + /// To read an audio file multiple time, dispose this object, and create a new one from the same source file.</remarks> [ComInterface( "35b988da-04a6-476a-a193-d8891d5dc390", eMarshalDirection.ToManaged )] public interface iAudioReader: IDisposable { diff --git a/WhisperNet/API/iMediaFoundation.cs b/WhisperNet/API/iMediaFoundation.cs index 535f904..b8d7449 100644 --- a/WhisperNet/API/iMediaFoundation.cs +++ b/WhisperNet/API/iMediaFoundation.cs @@ -12,7 +12,7 @@ namespace Whisper { /// <summary>Decode complete audio file into a new memory buffer.</summary> /// <returns> - /// Under the hood, the method asks MF to resample and convert audio into the suitable type for the Whisper model.<br/> + /// The method asks MF to resample and convert audio into the suitable type for the Whisper model.<br/> /// If the path is a video file, the method will decode the first audio track. /// </returns> [RetValIndex( 2 )] @@ -20,8 +20,9 @@ namespace Whisper /// <summary>Create a reader to stream the audio file from disk</summary> /// <returns> - /// Under the hood, the method asks MF to resample and convert audio into the suitable type for the Whisper model.<br/> - /// If the path is a video file, the method will decode the first audio track. + /// The method returns an object which can be used to decode the audio file incrementally.<br/> + /// For long audio files, this saves both memory (no need for large uncompressed PCM buffer), and time (decode and transcribe run concurrently on different CPU threads).<br/> + /// If the path is a video file, the implementation will use the first audio track. /// </returns> [RetValIndex( 2 )] iAudioReader openAudioFile( [MarshalAs( UnmanagedType.LPWStr )] string path, [MarshalAs( UnmanagedType.U1 )] bool stereo = false ); |
