summaryrefslogtreecommitdiffstats
path: root/WhisperNet/API/Parameters.cs
diff options
context:
space:
mode:
authorKonstantin <const@const.me>2023-01-16 14:52:43 +0100
committerKonstantin <const@const.me>2023-01-16 14:52:43 +0100
commit8c4603c73675958efc960fbd4bb599a2909d106a (patch)
tree714dc6fc9a1672d5fd7f89676b97e10959662abc /WhisperNet/API/Parameters.cs
parent990a8d0dbaefc996244097397259e92758b15cce (diff)
Source codes
Diffstat (limited to 'WhisperNet/API/Parameters.cs')
-rw-r--r--WhisperNet/API/Parameters.cs95
1 files changed, 95 insertions, 0 deletions
diff --git a/WhisperNet/API/Parameters.cs b/WhisperNet/API/Parameters.cs
new file mode 100644
index 0000000..d2b53f9
--- /dev/null
+++ b/WhisperNet/API/Parameters.cs
@@ -0,0 +1,95 @@
+// Missing XML comment for publicly visible type or member
+// TODO: remove this line and document them.
+#pragma warning disable CS1591
+
+namespace Whisper
+{
+ /// <summary>Available sampling strategies</summary>
+ public enum eSamplingStrategy: int
+ {
+ /// <summary>Always select the most probable token</summary>
+ Greedy,
+ /// <summary>TODO: not implemented yet!</summary>
+ BeamSearch,
+ };
+
+ [Flags]
+ public enum eFullParamsFlags: uint
+ {
+ None = 0,
+ Translate = 1,
+ NoContext = 2,
+ SingleSegment = 4,
+ PrintSpecial = 8,
+ PrintProgress = 0x10,
+ PrintRealtime = 0x20,
+ PrintTimestamps = 0x40,
+
+ // Experimental
+ TokenTimestamps = 0x100,
+ SpeedupAudio = 0x200,
+ };
+
+ /// <summary>Transcribe parameters</summary>
+ public struct Parameters
+ {
+ /// <summary>Sampling strategy</summary>
+ public eSamplingStrategy strategy;
+
+ /// <summary>Count of CPU worker threads to use</summary>
+ /// <remarks>So far, the GPU model only uses CPU threads for MEL spectrograms</remarks>
+ public int cpuThreads;
+
+ public int n_max_text_ctx;
+ /// <summary>start offset in ms</summary>
+ public int offset_ms;
+ /// <summary>audio duration to process in ms</summary>
+ public int duration_ms;
+ public eFullParamsFlags flags;
+
+ /// <summary>Set or clear the specified flag in the <see cref="flags" /> field of this structure</summary>
+ public void setFlag( eFullParamsFlags flag, bool set )
+ {
+ if( flag != eFullParamsFlags.None )
+ {
+ if( set )
+ flags |= flag;
+ else
+ flags &= ~flag;
+ return;
+ }
+ throw new ArgumentException();
+ }
+
+ /// <summary>Language</summary>
+ public eLanguage language;
+
+ // [EXPERIMENTAL] token-level timestamps
+ /// <summary>timestamp token probability threshold (~0.01)</summary>
+ public float thold_pt;
+ /// <summary>timestamp token sum probability threshold (~0.01)</summary>
+ public float thold_ptsum;
+ /// <summary>max segment length in characters</summary>
+ public int max_len;
+ /// <summary>max tokens per segment (0 = no limit)</summary>
+ public int max_tokens;
+
+ public struct sGreedy
+ {
+ public int n_past;
+ }
+ public sGreedy greedy;
+
+ public struct sBeamSearch
+ {
+ public int n_past;
+ public int beam_width;
+ public int n_best;
+ }
+ public sBeamSearch beamSearch;
+
+ // [EXPERIMENTAL] speed-up techniques
+ /// <summary>overwrite the audio context size (0 = use default)</summary>
+ public int audioContextSize;
+ }
+} \ No newline at end of file