summaryrefslogtreecommitdiffstats
path: root/Examples/main/params.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Examples/main/params.cpp')
-rw-r--r--Examples/main/params.cpp101
1 files changed, 101 insertions, 0 deletions
diff --git a/Examples/main/params.cpp b/Examples/main/params.cpp
new file mode 100644
index 0000000..ff1cfdd
--- /dev/null
+++ b/Examples/main/params.cpp
@@ -0,0 +1,101 @@
+#include "params.h"
+#include <algorithm>
+#include <thread>
+#include "miscUtils.h"
+
+whisper_params::whisper_params()
+{
+#ifdef _DEBUG
+ n_threads = 2;
+#else
+ n_threads = std::min( 4u, std::thread::hardware_concurrency() );
+#endif
+}
+
+namespace
+{
+ const char* cstr( bool b )
+ {
+ return b ? "true" : "false";
+ }
+}
+
+void whisper_print_usage( int argc, wchar_t** argv, const whisper_params& params )
+{
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "usage: %S [options] file0.wav file1.wav ...\n", argv[ 0 ] );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "options:\n" );
+ fprintf( stderr, " -h, --help [default] show this help message and exit\n" );
+ fprintf( stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads );
+ fprintf( stderr, " -p N, --processors N [%-7d] number of processors to use during computation\n", params.n_processors );
+ fprintf( stderr, " -ot N, --offset-t N [%-7d] time offset in milliseconds\n", params.offset_t_ms );
+ fprintf( stderr, " -on N, --offset-n N [%-7d] segment index offset\n", params.offset_n );
+ fprintf( stderr, " -d N, --duration N [%-7d] duration of audio to process in milliseconds\n", params.duration_ms );
+ fprintf( stderr, " -mc N, --max-context N [%-7d] maximum number of text context tokens to store\n", params.max_context );
+ fprintf( stderr, " -ml N, --max-len N [%-7d] maximum segment length in characters\n", params.max_len );
+ fprintf( stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n", params.word_thold );
+ fprintf( stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", cstr( params.speed_up ) );
+ fprintf( stderr, " -tr, --translate [%-7s] translate from source language to english\n", cstr( params.translate ) );
+ fprintf( stderr, " -di, --diarize [%-7s] stereo audio diarization\n", cstr( params.diarize ) );
+ fprintf( stderr, " -otxt, --output-txt [%-7s] output result in a text file\n", cstr( params.output_txt ) );
+ fprintf( stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", cstr( params.output_vtt ) );
+ fprintf( stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", cstr( params.output_srt ) );
+ fprintf( stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", cstr( params.output_wts ) );
+ fprintf( stderr, " -ps, --print-special [%-7s] print special tokens\n", cstr( params.print_special ) );
+ fprintf( stderr, " -nc, --no-colors [%-7s] do not print colors\n", cstr( !params.print_colors ) );
+ fprintf( stderr, " -nt, --no-timestamps [%-7s] do not print timestamps\n", cstr( params.no_timestamps ) );
+ fprintf( stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str() );
+ fprintf( stderr, " -m FNAME, --model FNAME [%-7S] model path\n", params.model.c_str() );
+ fprintf( stderr, " -f FNAME, --file FNAME [%-7s] path of the input audio file\n", "" );
+ fprintf( stderr, "\n" );
+}
+
+bool whisper_params::parse( int argc, wchar_t* argv[] )
+{
+ for( int i = 1; i < argc; i++ )
+ {
+ std::wstring arg = argv[ i ];
+
+ if( arg[ 0 ] != '-' )
+ {
+ fname_inp.push_back( arg );
+ continue;
+ }
+
+ if( arg == L"-h" || arg == L"--help" )
+ {
+ whisper_print_usage( argc, argv, *this );
+ return false;
+ }
+
+ else if( arg == L"-t" || arg == L"--threads" ) { n_threads = std::stoul( argv[ ++i ] ); }
+ else if( arg == L"-p" || arg == L"--processors" ) { n_processors = std::stoul( argv[ ++i ] ); }
+ else if( arg == L"-ot" || arg == L"--offset-t" ) { offset_t_ms = std::stoul( argv[ ++i ] ); }
+ else if( arg == L"-on" || arg == L"--offset-n" ) { offset_n = std::stoul( argv[ ++i ] ); }
+ else if( arg == L"-d" || arg == L"--duration" ) { duration_ms = std::stoul( argv[ ++i ] ); }
+ else if( arg == L"-mc" || arg == L"--max-context" ) { max_context = std::stoul( argv[ ++i ] ); }
+ else if( arg == L"-ml" || arg == L"--max-len" ) { max_len = std::stoul( argv[ ++i ] ); }
+ else if( arg == L"-wt" || arg == L"--word-thold" ) { word_thold = std::stof( argv[ ++i ] ); }
+ else if( arg == L"-su" || arg == L"--speed-up" ) { speed_up = true; }
+ else if( arg == L"-tr" || arg == L"--translate" ) { translate = true; }
+ else if( arg == L"-di" || arg == L"--diarize" ) { diarize = true; }
+ else if( arg == L"-otxt" || arg == L"--output-txt" ) { output_txt = true; }
+ else if( arg == L"-ovtt" || arg == L"--output-vtt" ) { output_vtt = true; }
+ else if( arg == L"-osrt" || arg == L"--output-srt" ) { output_srt = true; }
+ else if( arg == L"-owts" || arg == L"--output-words" ) { output_wts = true; }
+ else if( arg == L"-ps" || arg == L"--print-special" ) { print_special = true; }
+ else if( arg == L"-nc" || arg == L"--no-colors" ) { print_colors = false; }
+ else if( arg == L"-nt" || arg == L"--no-timestamps" ) { no_timestamps = true; }
+ else if( arg == L"-l" || arg == L"--language" ) { language = utf8( argv[ ++i ] ); }
+ else if( arg == L"-m" || arg == L"--model" ) { model = argv[ ++i ]; }
+ else if( arg == L"-f" || arg == L"--file" ) { fname_inp.push_back( argv[ ++i ] ); }
+ else
+ {
+ fprintf( stderr, "error: unknown argument: %S\n", arg.c_str() );
+ whisper_print_usage( argc, argv, *this );
+ return false;
+ }
+ }
+ return true;
+} \ No newline at end of file