summaryrefslogtreecommitdiffstats
path: root/WhisperNet/API
diff options
context:
space:
mode:
Diffstat (limited to 'WhisperNet/API')
-rw-r--r--WhisperNet/API/iAudioBuffer.cs13
-rw-r--r--WhisperNet/API/iAudioReader.cs4
-rw-r--r--WhisperNet/API/iMediaFoundation.cs7
3 files changed, 13 insertions, 11 deletions
diff --git a/WhisperNet/API/iAudioBuffer.cs b/WhisperNet/API/iAudioBuffer.cs
index 1b35621..c4e86b9 100644
--- a/WhisperNet/API/iAudioBuffer.cs
+++ b/WhisperNet/API/iAudioBuffer.cs
@@ -5,23 +5,24 @@ namespace Whisper
{
/// <summary>A buffer with a chunk of audio.</summary>
/// <remarks>Note the interface supports both marshaling directions.<br/>
- /// I have not tested, but you should be able to implement this interface in C#, to supply PCM audio data to the native code</remarks>
+ /// I have not tested, but you should be able to implement this interface in C#, to supply PCM audio data to the native code.</remarks>
[ComInterface( "013583aa-c9eb-42bc-83db-633c2c317051", eMarshalDirection.BothWays )]
public interface iAudioBuffer: IDisposable
{
- /// <summary>Count of samples in the buffer</summary>
+ /// <summary>Count of samples in the buffer, equal to ( length in seconds ) * 16000</summary>
int countSamples();
- /// <summary>Unmanaged pointer to the internal buffer containing single-channel FP32 samples.</summary>
+ /// <summary>Unmanaged pointer to the internal buffer with single-channel <c>float</c> PCM samples @ 16 kHz sample rate.</summary>
/// <remarks>If you implementing this interface in C# and your audio data is on the managed heap, use <see cref="GCHandle" /> to make sure it doesn't move.<br/>
/// Or better yet, move the data to unmanaged buffer allocated with <see cref="Marshal.AllocHGlobal(int)" /> or <see cref="Marshal.AllocCoTaskMem(int)" /> method.</remarks>
IntPtr getPcmMono();
- /// <summary>Unmanaged pointer to the internal buffer containing stereo FP32 samples.</summary>
- /// <remarks>When the buffer doesn’t have stereo data, the method gonna return <see cref="IntPtr.Zero" />.</remarks>
+ /// <summary>Unmanaged pointer to the internal buffer with interleaved stereo <c>float</c> PCM samples @ 16 kHz sample rate.</summary>
+ /// <remarks>When the buffer doesn’t have stereo data, the method should return <see cref="IntPtr.Zero" />.</remarks>
IntPtr getPcmStereo();
- /// <summary>Start time of the buffer, relative to the start of the media</summary>
+ /// <summary>Start time of the buffer, relative to the start of the media.</summary>
+ /// <remarks>The value is used to produce timestamps in <see cref="sSegment.time" /> and <see cref="sToken.time" /> fields.</remarks>
void getTime( out TimeSpan time );
}
} \ No newline at end of file
diff --git a/WhisperNet/API/iAudioReader.cs b/WhisperNet/API/iAudioReader.cs
index 68cf916..8e0e047 100644
--- a/WhisperNet/API/iAudioReader.cs
+++ b/WhisperNet/API/iAudioReader.cs
@@ -3,8 +3,8 @@
namespace Whisper
{
/// <summary>Audio stream reader object</summary>
- /// <remarks>The implementation is forward-only, and these objects ain’t reusable.<br/>
- /// To read a source file multiple time, dispose and re-create the reader.</remarks>
+ /// <remarks>The implementation is forward-only, and these objects aren’t reusable.<br/>
+ /// To read an audio file multiple time, dispose this object, and create a new one from the same source file.</remarks>
[ComInterface( "35b988da-04a6-476a-a193-d8891d5dc390", eMarshalDirection.ToManaged )]
public interface iAudioReader: IDisposable
{
diff --git a/WhisperNet/API/iMediaFoundation.cs b/WhisperNet/API/iMediaFoundation.cs
index 535f904..b8d7449 100644
--- a/WhisperNet/API/iMediaFoundation.cs
+++ b/WhisperNet/API/iMediaFoundation.cs
@@ -12,7 +12,7 @@ namespace Whisper
{
/// <summary>Decode complete audio file into a new memory buffer.</summary>
/// <returns>
- /// Under the hood, the method asks MF to resample and convert audio into the suitable type for the Whisper model.<br/>
+ /// The method asks MF to resample and convert audio into the suitable type for the Whisper model.<br/>
/// If the path is a video file, the method will decode the first audio track.
/// </returns>
[RetValIndex( 2 )]
@@ -20,8 +20,9 @@ namespace Whisper
/// <summary>Create a reader to stream the audio file from disk</summary>
/// <returns>
- /// Under the hood, the method asks MF to resample and convert audio into the suitable type for the Whisper model.<br/>
- /// If the path is a video file, the method will decode the first audio track.
+ /// The method returns an object which can be used to decode the audio file incrementally.<br/>
+ /// For long audio files, this saves both memory (no need for large uncompressed PCM buffer), and time (decode and transcribe run concurrently on different CPU threads).<br/>
+ /// If the path is a video file, the implementation will use the first audio track.
/// </returns>
[RetValIndex( 2 )]
iAudioReader openAudioFile( [MarshalAs( UnmanagedType.LPWStr )] string path, [MarshalAs( UnmanagedType.U1 )] bool stereo = false );