From 8c4603c73675958efc960fbd4bb599a2909d106a Mon Sep 17 00:00:00 2001 From: Konstantin Date: Mon, 16 Jan 2023 14:52:43 +0100 Subject: Source codes --- WhisperNet/API/Parameters.cs | 95 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 WhisperNet/API/Parameters.cs (limited to 'WhisperNet/API/Parameters.cs') diff --git a/WhisperNet/API/Parameters.cs b/WhisperNet/API/Parameters.cs new file mode 100644 index 0000000..d2b53f9 --- /dev/null +++ b/WhisperNet/API/Parameters.cs @@ -0,0 +1,95 @@ +// Missing XML comment for publicly visible type or member +// TODO: remove this line and document them. +#pragma warning disable CS1591 + +namespace Whisper +{ + /// Available sampling strategies + public enum eSamplingStrategy: int + { + /// Always select the most probable token + Greedy, + /// TODO: not implemented yet! + BeamSearch, + }; + + [Flags] + public enum eFullParamsFlags: uint + { + None = 0, + Translate = 1, + NoContext = 2, + SingleSegment = 4, + PrintSpecial = 8, + PrintProgress = 0x10, + PrintRealtime = 0x20, + PrintTimestamps = 0x40, + + // Experimental + TokenTimestamps = 0x100, + SpeedupAudio = 0x200, + }; + + /// Transcribe parameters + public struct Parameters + { + /// Sampling strategy + public eSamplingStrategy strategy; + + /// Count of CPU worker threads to use + /// So far, the GPU model only uses CPU threads for MEL spectrograms + public int cpuThreads; + + public int n_max_text_ctx; + /// start offset in ms + public int offset_ms; + /// audio duration to process in ms + public int duration_ms; + public eFullParamsFlags flags; + + /// Set or clear the specified flag in the field of this structure + public void setFlag( eFullParamsFlags flag, bool set ) + { + if( flag != eFullParamsFlags.None ) + { + if( set ) + flags |= flag; + else + flags &= ~flag; + return; + } + throw new ArgumentException(); + } + + /// Language + public eLanguage language; + + // [EXPERIMENTAL] token-level timestamps + /// timestamp token probability threshold (~0.01) + public float thold_pt; + /// timestamp token sum probability threshold (~0.01) + public float thold_ptsum; + /// max segment length in characters + public int max_len; + /// max tokens per segment (0 = no limit) + public int max_tokens; + + public struct sGreedy + { + public int n_past; + } + public sGreedy greedy; + + public struct sBeamSearch + { + public int n_past; + public int beam_width; + public int n_best; + } + public sBeamSearch beamSearch; + + // [EXPERIMENTAL] speed-up techniques + /// overwrite the audio context size (0 = use default) + public int audioContextSize; + } +} \ No newline at end of file -- cgit v1.2.3