// Missing XML comment for publicly visible type or member
// TODO: remove this line and document them.
#pragma warning disable CS1591
namespace Whisper
{
/// Available sampling strategies
public enum eSamplingStrategy: int
{
/// Always select the most probable token
Greedy,
/// TODO: not implemented yet!
BeamSearch,
};
[Flags]
public enum eFullParamsFlags: uint
{
None = 0,
Translate = 1,
NoContext = 2,
SingleSegment = 4,
PrintSpecial = 8,
PrintProgress = 0x10,
PrintRealtime = 0x20,
PrintTimestamps = 0x40,
// Experimental
TokenTimestamps = 0x100,
SpeedupAudio = 0x200,
};
/// Transcribe parameters
public struct Parameters
{
/// Sampling strategy
public eSamplingStrategy strategy;
/// Count of CPU worker threads to use
/// So far, the GPU model only uses CPU threads for MEL spectrograms
public int cpuThreads;
public int n_max_text_ctx;
/// start offset in ms
public int offset_ms;
/// audio duration to process in ms
public int duration_ms;
public eFullParamsFlags flags;
/// Set or clear the specified flag in the field of this structure
public void setFlag( eFullParamsFlags flag, bool set )
{
if( flag != eFullParamsFlags.None )
{
if( set )
flags |= flag;
else
flags &= ~flag;
return;
}
throw new ArgumentException();
}
/// Language
public eLanguage language;
// [EXPERIMENTAL] token-level timestamps
/// timestamp token probability threshold (~0.01)
public float thold_pt;
/// timestamp token sum probability threshold (~0.01)
public float thold_ptsum;
/// max segment length in characters
public int max_len;
/// max tokens per segment (0 = no limit)
public int max_tokens;
public struct sGreedy
{
public int n_past;
}
public sGreedy greedy;
public struct sBeamSearch
{
public int n_past;
public int beam_width;
public int n_best;
}
public sBeamSearch beamSearch;
// [EXPERIMENTAL] speed-up techniques
/// overwrite the audio context size (0 = use default)
public int audioContextSize;
}
}