From 8c4603c73675958efc960fbd4bb599a2909d106a Mon Sep 17 00:00:00 2001
From: Konstantin <const@const.me>
Date: Mon, 16 Jan 2023 14:52:43 +0100
Subject: Source codes

---
 WhisperNet/API/CaptureDeviceId.cs      |  24 ++++
 WhisperNet/API/Parameters.cs           |  95 +++++++++++++++
 WhisperNet/API/SpecialTokens.cs        |  23 ++++
 WhisperNet/API/eCaptureStatus.cs       |  19 +++
 WhisperNet/API/eLanguage.cs            | 206 +++++++++++++++++++++++++++++++++
 WhisperNet/API/eLogLevel.cs            |  34 ++++++
 WhisperNet/API/eModelImplementation.cs |  25 ++++
 WhisperNet/API/eResultFlags.cs         |  21 ++++
 WhisperNet/API/iAudioBuffer.cs         |  27 +++++
 WhisperNet/API/iAudioReader.cs         |  23 ++++
 WhisperNet/API/iMediaFoundation.cs     |  36 ++++++
 WhisperNet/API/iModel.cs               |  27 +++++
 WhisperNet/API/sCaptureParams.cs       |  37 ++++++
 13 files changed, 597 insertions(+)
 create mode 100644 WhisperNet/API/CaptureDeviceId.cs
 create mode 100644 WhisperNet/API/Parameters.cs
 create mode 100644 WhisperNet/API/SpecialTokens.cs
 create mode 100644 WhisperNet/API/eCaptureStatus.cs
 create mode 100644 WhisperNet/API/eLanguage.cs
 create mode 100644 WhisperNet/API/eLogLevel.cs
 create mode 100644 WhisperNet/API/eModelImplementation.cs
 create mode 100644 WhisperNet/API/eResultFlags.cs
 create mode 100644 WhisperNet/API/iAudioBuffer.cs
 create mode 100644 WhisperNet/API/iAudioReader.cs
 create mode 100644 WhisperNet/API/iMediaFoundation.cs
 create mode 100644 WhisperNet/API/iModel.cs
 create mode 100644 WhisperNet/API/sCaptureParams.cs

(limited to 'WhisperNet/API')
diff --git a/WhisperNet/API/CaptureDeviceId.cs b/WhisperNet/API/CaptureDeviceId.cs
new file mode 100644
index 0000000..9636e53
--- /dev/null
+++ b/WhisperNet/API/CaptureDeviceId.cs
@@ -0,0 +1,24 @@
+﻿using Whisper.Internal;
+
+namespace Whisper
+{
+	/// <summary>Identifiers for an audio capture device</summary>
+	public record struct CaptureDeviceId
+	{
+		/// <summary>The display name is suitable for showing to the user, but might not be unique.</summary>
+		public string displayName;
+
+		/// <summary>Endpoint ID for an audio capture device.<br/>
+		/// It uniquely identifies the device on the system, but is not a readable string.</summary>
+		public string endpoint;
+
+		internal CaptureDeviceId( in sCaptureDevice rsi )
+		{
+			displayName = rsi.displayName ?? "<display name unavailable>";
+			endpoint = rsi.endpoint ?? throw new ApplicationException( "The device has no endpoint ID" );
+		}
+
+		/// <summary>Returns a String which represents the object instance</summary>
+		public override string ToString() => $"Capture device: \"{displayName}\"";
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/Parameters.cs b/WhisperNet/API/Parameters.cs
new file mode 100644
index 0000000..d2b53f9
--- /dev/null
+++ b/WhisperNet/API/Parameters.cs
@@ -0,0 +1,95 @@
+﻿// Missing XML comment for publicly visible type or member
+// TODO: remove this line and document them.
+#pragma warning disable CS1591
+
+namespace Whisper
+{
+	/// <summary>Available sampling strategies</summary>
+	public enum eSamplingStrategy: int
+	{
+		/// <summary>Always select the most probable token</summary>
+		Greedy,
+		/// <summary>TODO: not implemented yet!</summary>
+		BeamSearch,
+	};
+
+	[Flags]
+	public enum eFullParamsFlags: uint
+	{
+		None = 0,
+		Translate = 1,
+		NoContext = 2,
+		SingleSegment = 4,
+		PrintSpecial = 8,
+		PrintProgress = 0x10,
+		PrintRealtime = 0x20,
+		PrintTimestamps = 0x40,
+
+		// Experimental
+		TokenTimestamps = 0x100,
+		SpeedupAudio = 0x200,
+	};
+
+	/// <summary>Transcribe parameters</summary>
+	public struct Parameters
+	{
+		/// <summary>Sampling strategy</summary>
+		public eSamplingStrategy strategy;
+
+		/// <summary>Count of CPU worker threads to use</summary>
+		/// <remarks>So far, the GPU model only uses CPU threads for MEL spectrograms</remarks>
+		public int cpuThreads;
+
+		public int n_max_text_ctx;
+		/// <summary>start offset in ms</summary>
+		public int offset_ms;
+		/// <summary>audio duration to process in ms</summary>
+		public int duration_ms;
+		public eFullParamsFlags flags;
+
+		/// <summary>Set or clear the specified flag in the <see cref="flags" /> field of this structure</summary>
+		public void setFlag( eFullParamsFlags flag, bool set )
+		{
+			if( flag != eFullParamsFlags.None )
+			{
+				if( set )
+					flags |= flag;
+				else
+					flags &= ~flag;
+				return;
+			}
+			throw new ArgumentException();
+		}
+
+		/// <summary>Language</summary>
+		public eLanguage language;
+
+		// [EXPERIMENTAL] token-level timestamps
+		/// <summary>timestamp token probability threshold (~0.01)</summary>
+		public float thold_pt;
+		/// <summary>timestamp token sum probability threshold (~0.01)</summary>
+		public float thold_ptsum;
+		/// <summary>max segment length in characters</summary>
+		public int max_len;
+		/// <summary>max tokens per segment (0 = no limit)</summary>
+		public int max_tokens;
+
+		public struct sGreedy
+		{
+			public int n_past;
+		}
+		public sGreedy greedy;
+
+		public struct sBeamSearch
+		{
+			public int n_past;
+			public int beam_width;
+			public int n_best;
+		}
+		public sBeamSearch beamSearch;
+
+		// [EXPERIMENTAL] speed-up techniques
+		/// <summary>overwrite the audio context size (0 = use default)</summary>
+		public int audioContextSize;
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/SpecialTokens.cs b/WhisperNet/API/SpecialTokens.cs
new file mode 100644
index 0000000..d672369
--- /dev/null
+++ b/WhisperNet/API/SpecialTokens.cs
@@ -0,0 +1,23 @@
+﻿namespace Whisper
+{
+	/// <summary>Special tokens defined in the model</summary>
+	public readonly struct SpecialTokens
+	{
+		/// <summary>The end of a transcription</summary>
+		public readonly int TranscriptionEnd; // token_eot
+		/// <summary>Start of a transcription</summary>
+		public readonly int TranscriptionStart;   // token_sot
+		/// <summary>Represents the previous word in the transcription. It is used to help the model predict the current word based on the context of the words that came before it.</summary>
+		public readonly int PreviousWord;   // token_prev
+		/// <summary>Start of a sentence</summary>
+		public readonly int SentenceStart;   // token_solm
+		/// <summary>Represents the word "not" in the transcription</summary>
+		public readonly int Not;    // token_not
+		/// <summary>New transcription</summary>
+		public readonly int TranscriptionBegin;    // token_beg
+		/// <summary>token_translate</summary>
+		public readonly int TaskTranslate;
+		/// <summary>token_transcribe</summary>
+		public readonly int TaskTranscribe;
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/eCaptureStatus.cs b/WhisperNet/API/eCaptureStatus.cs
new file mode 100644
index 0000000..41f05fb
--- /dev/null
+++ b/WhisperNet/API/eCaptureStatus.cs
@@ -0,0 +1,19 @@
+﻿namespace Whisper
+{
+	/// <summary>Status of the voice capture</summary>
+	[Flags]
+	public enum eCaptureStatus: byte
+	{
+		/// <summary>Doing nothing</summary>
+		None = 0,
+		/// <summary>Capturing the audio</summary>
+		Listening = 1,
+		/// <summary>A voice is detected in the captured audio, recording</summary>
+		Voice = 2,
+		/// <summary>Transcribing a recorded piece of the audio</summary>
+		Transcribing = 4,
+		/// <summary>The computer is unable to transcribe the audio quickly enough,<br/>
+		/// and the capture is dropping the incoming audio samples.</summary>
+		Stalled = 0x80,
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/eLanguage.cs b/WhisperNet/API/eLanguage.cs
new file mode 100644
index 0000000..1241077
--- /dev/null
+++ b/WhisperNet/API/eLanguage.cs
@@ -0,0 +1,206 @@
+// This file is generated by a tool, from the `languageCodez.tsv` file in this repository
+namespace Whisper
+{
+	/// <summary>Supported languages</summary>
+	public enum eLanguage: uint
+	{
+		/// <summary>Afrikaans</summary>
+		Afrikaans = 0x6661,
+		/// <summary>Albanian</summary>
+		Albanian = 0x7173,
+		/// <summary>Amharic</summary>
+		Amharic = 0x6D61,
+		/// <summary>Arabic</summary>
+		Arabic = 0x7261,
+		/// <summary>Armenian</summary>
+		Armenian = 0x7968,
+		/// <summary>Assamese</summary>
+		Assamese = 0x7361,
+		/// <summary>Azerbaijani</summary>
+		Azerbaijani = 0x7A61,
+		/// <summary>Bashkir</summary>
+		Bashkir = 0x6162,
+		/// <summary>Basque</summary>
+		Basque = 0x7565,
+		/// <summary>Belarusian</summary>
+		Belarusian = 0x6562,
+		/// <summary>Bengali</summary>
+		Bengali = 0x6E62,
+		/// <summary>Bosnian</summary>
+		Bosnian = 0x7362,
+		/// <summary>Breton</summary>
+		Breton = 0x7262,
+		/// <summary>Bulgarian</summary>
+		Bulgarian = 0x6762,
+		/// <summary>Catalan</summary>
+		Catalan = 0x6163,
+		/// <summary>Chinese</summary>
+		Chinese = 0x687A,
+		/// <summary>Croatian</summary>
+		Croatian = 0x7268,
+		/// <summary>Czech</summary>
+		Czech = 0x7363,
+		/// <summary>Danish</summary>
+		Danish = 0x6164,
+		/// <summary>Dutch</summary>
+		Dutch = 0x6C6E,
+		/// <summary>English</summary>
+		English = 0x6E65,
+		/// <summary>Estonian</summary>
+		Estonian = 0x7465,
+		/// <summary>Faroese</summary>
+		Faroese = 0x6F66,
+		/// <summary>Finnish</summary>
+		Finnish = 0x6966,
+		/// <summary>French</summary>
+		French = 0x7266,
+		/// <summary>Galician</summary>
+		Galician = 0x6C67,
+		/// <summary>Georgian</summary>
+		Georgian = 0x616B,
+		/// <summary>German</summary>
+		German = 0x6564,
+		/// <summary>Greek</summary>
+		Greek = 0x6C65,
+		/// <summary>Gujarati</summary>
+		Gujarati = 0x7567,
+		/// <summary>Haitian Creole</summary>
+		HaitianCreole = 0x7468,
+		/// <summary>Hausa</summary>
+		Hausa = 0x6168,
+		/// <summary>Hawaiian</summary>
+		Hawaiian = 0x776168,
+		/// <summary>Hebrew</summary>
+		Hebrew = 0x7769,
+		/// <summary>Hindi</summary>
+		Hindi = 0x6968,
+		/// <summary>Hungarian</summary>
+		Hungarian = 0x7568,
+		/// <summary>Icelandic</summary>
+		Icelandic = 0x7369,
+		/// <summary>Indonesian</summary>
+		Indonesian = 0x6469,
+		/// <summary>Italian</summary>
+		Italian = 0x7469,
+		/// <summary>Japanese</summary>
+		Japanese = 0x616A,
+		/// <summary>Javanese</summary>
+		Javanese = 0x776A,
+		/// <summary>Kannada</summary>
+		Kannada = 0x6E6B,
+		/// <summary>Kazakh</summary>
+		Kazakh = 0x6B6B,
+		/// <summary>Khmer</summary>
+		Khmer = 0x6D6B,
+		/// <summary>Korean</summary>
+		Korean = 0x6F6B,
+		/// <summary>Lao</summary>
+		Lao = 0x6F6C,
+		/// <summary>Latin</summary>
+		Latin = 0x616C,
+		/// <summary>Latvian</summary>
+		Latvian = 0x766C,
+		/// <summary>Lingala</summary>
+		Lingala = 0x6E6C,
+		/// <summary>Lithuanian</summary>
+		Lithuanian = 0x746C,
+		/// <summary>Luxembourgish</summary>
+		Luxembourgish = 0x626C,
+		/// <summary>Macedonian</summary>
+		Macedonian = 0x6B6D,
+		/// <summary>Malagasy</summary>
+		Malagasy = 0x676D,
+		/// <summary>Malay</summary>
+		Malay = 0x736D,
+		/// <summary>Malayalam</summary>
+		Malayalam = 0x6C6D,
+		/// <summary>Maltese</summary>
+		Maltese = 0x746D,
+		/// <summary>Maori</summary>
+		Maori = 0x696D,
+		/// <summary>Marathi</summary>
+		Marathi = 0x726D,
+		/// <summary>Mongolian</summary>
+		Mongolian = 0x6E6D,
+		/// <summary>Myanmar</summary>
+		Myanmar = 0x796D,
+		/// <summary>Nepali</summary>
+		Nepali = 0x656E,
+		/// <summary>Norwegian</summary>
+		Norwegian = 0x6F6E,
+		/// <summary>Nynorsk</summary>
+		Nynorsk = 0x6E6E,
+		/// <summary>Occitan</summary>
+		Occitan = 0x636F,
+		/// <summary>Pashto</summary>
+		Pashto = 0x7370,
+		/// <summary>Persian</summary>
+		Persian = 0x6166,
+		/// <summary>Polish</summary>
+		Polish = 0x6C70,
+		/// <summary>Portuguese</summary>
+		Portuguese = 0x7470,
+		/// <summary>Punjabi</summary>
+		Punjabi = 0x6170,
+		/// <summary>Romanian</summary>
+		Romanian = 0x6F72,
+		/// <summary>Russian</summary>
+		Russian = 0x7572,
+		/// <summary>Sanskrit</summary>
+		Sanskrit = 0x6173,
+		/// <summary>Serbian</summary>
+		Serbian = 0x7273,
+		/// <summary>Shona</summary>
+		Shona = 0x6E73,
+		/// <summary>Sindhi</summary>
+		Sindhi = 0x6473,
+		/// <summary>Sinhala</summary>
+		Sinhala = 0x6973,
+		/// <summary>Slovak</summary>
+		Slovak = 0x6B73,
+		/// <summary>Slovenian</summary>
+		Slovenian = 0x6C73,
+		/// <summary>Somali</summary>
+		Somali = 0x6F73,
+		/// <summary>Spanish</summary>
+		Spanish = 0x7365,
+		/// <summary>Sundanese</summary>
+		Sundanese = 0x7573,
+		/// <summary>Swahili</summary>
+		Swahili = 0x7773,
+		/// <summary>Swedish</summary>
+		Swedish = 0x7673,
+		/// <summary>Tagalog</summary>
+		Tagalog = 0x6C74,
+		/// <summary>Tajik</summary>
+		Tajik = 0x6774,
+		/// <summary>Tamil</summary>
+		Tamil = 0x6174,
+		/// <summary>Tatar</summary>
+		Tatar = 0x7474,
+		/// <summary>Telugu</summary>
+		Telugu = 0x6574,
+		/// <summary>Thai</summary>
+		Thai = 0x6874,
+		/// <summary>Tibetan</summary>
+		Tibetan = 0x6F62,
+		/// <summary>Turkish</summary>
+		Turkish = 0x7274,
+		/// <summary>Turkmen</summary>
+		Turkmen = 0x6B74,
+		/// <summary>Ukrainian</summary>
+		Ukrainian = 0x6B75,
+		/// <summary>Urdu</summary>
+		Urdu = 0x7275,
+		/// <summary>Uzbek</summary>
+		Uzbek = 0x7A75,
+		/// <summary>Vietnamese</summary>
+		Vietnamese = 0x6976,
+		/// <summary>Welsh</summary>
+		Welsh = 0x7963,
+		/// <summary>Yiddish</summary>
+		Yiddish = 0x6979,
+		/// <summary>Yoruba</summary>
+		Yoruba = 0x6F79,
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/eLogLevel.cs b/WhisperNet/API/eLogLevel.cs
new file mode 100644
index 0000000..ae494d4
--- /dev/null
+++ b/WhisperNet/API/eLogLevel.cs
@@ -0,0 +1,34 @@
+﻿namespace Whisper
+{
+	/// <summary>Message log level</summary>
+	public enum eLogLevel: byte
+	{
+		/// <summary>Error message</summary>
+		Error = 0,
+		/// <summary>Warning message</summary>
+		Warning = 1,
+		/// <summary>Informational message</summary>
+		Info = 2,
+		/// <summary>Debug message</summary>
+		Debug = 3
+	}
+
+	/// <summary>A delegate to receive log messages from the library</summary>
+	public delegate void pfnLogMessage( eLogLevel level, string message );
+
+	/// <summary>Log destination flags</summary>
+	[Flags]
+	public enum eLoggerFlags: byte
+	{
+		/// <summary>No special flags</summary>
+		None = 0,
+
+		/// <summary>In addition to calling the delegate, print messaged to standard error</summary>
+		UseStandardError = 1,
+
+		/// <summary>Don’t format error codes into messages</summary>
+		/// <remarks>It’s recommended to use this flag in .NET.<br/>
+		/// The standard library already formats these messages automatically, as needed.</remarks>
+		SkipFormatMessage = 2,
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/eModelImplementation.cs b/WhisperNet/API/eModelImplementation.cs
new file mode 100644
index 0000000..1b0a079
--- /dev/null
+++ b/WhisperNet/API/eModelImplementation.cs
@@ -0,0 +1,25 @@
+﻿namespace Whisper
+{
+	/// <summary>Implementation value for the <see cref="Library.loadModel(string, eModelImplementation)" /> factory function</summary>
+	public enum eModelImplementation: uint
+	{
+		/// <summary>GPGPU implementation based on Direct3D 11.0 compute shaders</summary>
+		GPU = 1,
+
+		/// <summary>A hybrid implementation which uses DirectCompute for encode, and decodes on CPU</summary>
+		/// <remarks>
+		/// <para>The build of the native DLL included into this nuget package doesn’t implement this version.<br/>
+		/// To enable, edit <c>stdafx.h</c> in Whisper project, change the value of <c>BUILD_HYBRID_VERSION</c> macro from zero to one, and build.</para>
+		/// <para>This implementation requires a CPU with AVX1, FMA3, F16C and BMI1 instruction set extensions.</para>
+		/// </remarks>
+		Hybrid = 2,
+
+		/// <summary>A reference implementation which uses the original GGML CPU-running code.</summary>
+		/// <remarks>
+		/// <para>The build of the native DLL included into this nuget package doesn’t implement this version either.<br/>
+		/// To enable, edit <c>stdafx.h</c> in Whisper project, change the value of <c>BUILD_BOTH_VERSIONS</c> macro from zero to one, and build the project.</para>
+		/// <para>This implementation requires a CPU with AVX1, FMA3, and F16C instruction set extensions.</para>
+		/// </remarks>
+		Reference = 3,
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/eResultFlags.cs b/WhisperNet/API/eResultFlags.cs
new file mode 100644
index 0000000..1de61ab
--- /dev/null
+++ b/WhisperNet/API/eResultFlags.cs
@@ -0,0 +1,21 @@
+﻿namespace Whisper
+{
+	/// <summary>Flags for <see cref="Context.results(eResultFlags)" /> method</summary>
+	[Flags]
+	public enum eResultFlags: uint
+	{
+		/// <summary>No flags</summary>
+		None = 0,
+
+		/// <summary>Return individual tokens in addition to the segments</summary>
+		Tokens = 1,
+
+		/// <summary>Return timestamps</summary>
+		Timestamps = 2,
+
+		/// <summary>Create a new COM object for the results.</summary>
+		/// <remarks>Without this flag, the context returns a pointer to the COM object stored in the context.<br/>
+		/// The content of that object is replaced every time you call <see cref="Internal.iContext.getResults(eResultFlags)" /> method.</remarks>
+		NewObject = 0x100,
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/iAudioBuffer.cs b/WhisperNet/API/iAudioBuffer.cs
new file mode 100644
index 0000000..1b35621
--- /dev/null
+++ b/WhisperNet/API/iAudioBuffer.cs
@@ -0,0 +1,27 @@
+﻿using ComLight;
+using System.Runtime.InteropServices;
+
+namespace Whisper
+{
+	/// <summary>A buffer with a chunk of audio.</summary>
+	/// <remarks>Note the interface supports both marshaling directions.<br/>
+	/// I have not tested, but you should be able to implement this interface in C#, to supply PCM audio data to the native code</remarks>
+	[ComInterface( "013583aa-c9eb-42bc-83db-633c2c317051", eMarshalDirection.BothWays )]
+	public interface iAudioBuffer: IDisposable
+	{
+		/// <summary>Count of samples in the buffer</summary>
+		int countSamples();
+
+		/// <summary>Unmanaged pointer to the internal buffer containing single-channel FP32 samples.</summary>
+		/// <remarks>If you implementing this interface in C# and your audio data is on the managed heap, use <see cref="GCHandle" /> to make sure it doesn't move.<br/>
+		/// Or better yet, move the data to unmanaged buffer allocated with <see cref="Marshal.AllocHGlobal(int)" /> or <see cref="Marshal.AllocCoTaskMem(int)" /> method.</remarks>
+		IntPtr getPcmMono();
+
+		/// <summary>Unmanaged pointer to the internal buffer containing stereo FP32 samples.</summary>
+		/// <remarks>When the buffer doesn’t have stereo data, the method gonna return <see cref="IntPtr.Zero" />.</remarks>
+		IntPtr getPcmStereo();
+
+		/// <summary>Start time of the buffer, relative to the start of the media</summary>
+		void getTime( out TimeSpan time );
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/iAudioReader.cs b/WhisperNet/API/iAudioReader.cs
new file mode 100644
index 0000000..68cf916
--- /dev/null
+++ b/WhisperNet/API/iAudioReader.cs
@@ -0,0 +1,23 @@
+﻿using ComLight;
+
+namespace Whisper
+{
+	/// <summary>Audio stream reader object</summary>
+	/// <remarks>The implementation is forward-only, and these objects ain’t reusable.<br/>
+	/// To read a source file multiple time, dispose and re-create the reader.</remarks>
+	[ComInterface( "35b988da-04a6-476a-a193-d8891d5dc390", eMarshalDirection.ToManaged )]
+	public interface iAudioReader: IDisposable
+	{
+		/// <summary>Get duration of the media file</summary>
+		[RetValIndex]
+		TimeSpan getDuration();
+	}
+
+	/// <summary>Audio capture reader object</summary>
+	/// <remarks>This interface has no public methods callable from C#.<br/>
+	/// It’s only here to pass data between different functions implemented in C++.</remarks>
+	[ComInterface( "747752c2-d9fd-40df-8847-583c781bf013", eMarshalDirection.ToManaged )]
+	public interface iAudioCapture: IDisposable
+	{
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/iMediaFoundation.cs b/WhisperNet/API/iMediaFoundation.cs
new file mode 100644
index 0000000..535f904
--- /dev/null
+++ b/WhisperNet/API/iMediaFoundation.cs
@@ -0,0 +1,36 @@
+﻿using ComLight;
+using System.Runtime.InteropServices;
+using Whisper.Internal;
+
+namespace Whisper
+{
+	/// <summary>Exposes a small subset of MS Media Foundation framework.</summary>
+	/// <remarks>That framework is a part of Windows OS, since Vista.</remarks>
+	/// <seealso href="https://learn.microsoft.com/en-us/windows/win32/medfound/microsoft-media-foundation-sdk" />
+	[ComInterface( "fb9763a5-d77d-4b6e-aff8-f494813cebd8", eMarshalDirection.ToManaged ), CustomConventions( typeof( NativeLogger ) )]
+	public interface iMediaFoundation: IDisposable
+	{
+		/// <summary>Decode complete audio file into a new memory buffer.</summary>
+		/// <returns>
+		/// Under the hood, the method asks MF to resample and convert audio into the suitable type for the Whisper model.<br/>
+		/// If the path is a video file, the method will decode the first audio track.
+		/// </returns>
+		[RetValIndex( 2 )]
+		iAudioBuffer loadAudioFile( [MarshalAs( UnmanagedType.LPWStr )] string path, [MarshalAs( UnmanagedType.U1 )] bool stereo = false );
+
+		/// <summary>Create a reader to stream the audio file from disk</summary>
+		/// <returns>
+		/// Under the hood, the method asks MF to resample and convert audio into the suitable type for the Whisper model.<br/>
+		/// If the path is a video file, the method will decode the first audio track.
+		/// </returns>
+		[RetValIndex( 2 )]
+		iAudioReader openAudioFile( [MarshalAs( UnmanagedType.LPWStr )] string path, [MarshalAs( UnmanagedType.U1 )] bool stereo = false );
+
+		/// <summary>List capture devices</summary>
+		void listCaptureDevices( [MarshalAs( UnmanagedType.FunctionPtr )] pfnFoundCaptureDevices pfn, IntPtr pv );
+
+		/// <summary>Open audio capture device</summary>
+		[RetValIndex( 2 )]
+		iAudioCapture openCaptureDevice( [MarshalAs( UnmanagedType.LPWStr )] string endpoint, [In] ref sCaptureParams captureParams );
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/iModel.cs b/WhisperNet/API/iModel.cs
new file mode 100644
index 0000000..8ec6d17
--- /dev/null
+++ b/WhisperNet/API/iModel.cs
@@ -0,0 +1,27 @@
+﻿using ComLight;
+using System.ComponentModel;
+
+namespace Whisper
+{
+	/// <summary>A model in VRAM, loaded from GGML file.</summary>
+	/// <remarks>This objetc doesn't keep any mutable state, and can be safely used from multiple threads concurrently</remarks>
+	[ComInterface( "abefb4c9-e8d8-46a3-8747-5afbadef1adb", eMarshalDirection.ToManaged ), CustomConventions( typeof( Internal.NativeLogger ) )]
+	public interface iModel: IDisposable
+	{
+		/// <summary>Create a context to transcribe audio with this model</summary>
+		/// <remarks>Don't call this method, use <see cref="ExtensionMethods.createContext(iModel)" /> instead.</remarks>
+		[RetValIndex, EditorBrowsable( EditorBrowsableState.Never )]
+		Internal.iContext createContextInternal();
+
+		/// <summary>True if this model is multi-lingual</summary>
+		bool isMultilingual();
+
+		/// <summary>Retrieve integer IDs of the special tokens defined by the model</summary>
+		[RetValIndex]
+		SpecialTokens getSpecialTokens();
+
+		/// <summary>Try to resolve integer token ID into string.</summary>
+		/// <remarks>Don't call this method, use <see cref="ExtensionMethods.stringFromToken(iModel, int)" /> instead.</remarks>
+		IntPtr stringFromTokenInternal( int id );
+	}
+}
\ No newline at end of file
diff --git a/WhisperNet/API/sCaptureParams.cs b/WhisperNet/API/sCaptureParams.cs
new file mode 100644
index 0000000..7595a69
--- /dev/null
+++ b/WhisperNet/API/sCaptureParams.cs
@@ -0,0 +1,37 @@
+﻿namespace Whisper
+{
+	/// <summary>Flags for the audio capture</summary>
+	[Flags]
+	public enum eCaptureFlags: uint
+	{
+		/// <summary>No special flags</summary>
+		None = 0,
+		/// <summary>When the capture device supports stereo, keep stereo PCM samples in addition to mono</summary>
+		Stereo = 1,
+	}
+
+	/// <summary>Parameters for audio capture</summary>
+	public struct sCaptureParams
+	{
+		/// <summary>Minimum transcribe duration in seconds</summary>
+		public float minDuration;
+		/// <summary>Maximum transcribe duration in seconds</summary>
+		public float maxDuration;
+		/// <summary></summary>
+		public float dropStartSilence;
+		/// <summary></summary>
+		public float pauseDuration;
+		/// <summary>Flags for the audio capture</summary>
+		public eCaptureFlags flags;
+
+		/// <summary>Initialize the structure with some reasonable default values</summary>
+		public sCaptureParams()
+		{
+			minDuration = 7.0f;			// 7 seconds
+			maxDuration = 11.0f;		// 11 seconds
+			dropStartSilence = 0.25f;	// 250 ms
+			pauseDuration = 0.333f;		// 333 ms
+			flags = eCaptureFlags.None;
+		}
+	}
+}
\ No newline at end of file
-- 
cgit v1.2.3