From 6bed3a15edf134fa176ca4866b4346017bc97ada Mon Sep 17 00:00:00 2001 From: yum Date: Sun, 1 Jan 2023 21:09:32 -0800 Subject: Portability bugfixes * Expose option to run transcription engine on CPU instead of GPU * Use embedded git when setting up the Python virtual environment --- GUI/GUI/GUI/Frame.cpp | 17 ++++++++++++++++- GUI/GUI/GUI/Frame.h | 1 + GUI/GUI/GUI/PythonWrapper.cpp | 31 ++++++++++++++++++++++++++----- GUI/GUI/GUI/PythonWrapper.h | 10 +++++++++- Scripts/transcribe.py | 28 ++++++++++++++++++---------- 5 files changed, 70 insertions(+), 17 deletions(-) diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index bd4bbec..cfb2060 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -32,6 +32,7 @@ namespace { ID_PY_APP_BYTES_PER_CHAR, ID_PY_APP_MODEL_PANEL, ID_PY_APP_ENABLE_LOCAL_BEEP, + ID_PY_APP_USE_CPU, ID_PY_APP_ROWS, ID_PY_APP_COLS, ID_PY_APP_WINDOW_DURATION, @@ -394,6 +395,17 @@ Frame::Frame() ); py_app_enable_local_beep_ = py_app_enable_local_beep; + auto* py_app_use_cpu = new wxCheckBox(py_config_panel, + ID_PY_APP_USE_CPU, "Use CPU"); + py_app_use_cpu->SetValue(false); + py_app_use_cpu->SetToolTip( + "If checked, the transcription engine will run on your " + "CPU instead of your GPU. This is typically much slower " + "and should only be used if you aren't able to use your " + "GPU." + ); + py_app_use_cpu_ = py_app_use_cpu; + auto* py_app_start_button = new wxButton(py_config_panel, ID_PY_APP_START_BUTTON, "Begin transcribing"); auto* py_app_stop_button = new wxButton(py_config_panel, ID_PY_APP_STOP_BUTTON, "Stop transcribing"); @@ -403,6 +415,7 @@ Frame::Frame() sizer->Add(py_dump_mics_button, /*proportion=*/0, /*flags=*/wxEXPAND); sizer->Add(py_app_config_panel_pairs, /*proportion=*/0, /*flags=*/wxEXPAND); sizer->Add(py_app_enable_local_beep, /*proportion=*/0, /*flags=*/wxEXPAND); + sizer->Add(py_app_use_cpu, /*proportion=*/0, /*flags=*/wxEXPAND); sizer->Add(py_app_start_button, /*proportion=*/0, /*flags=*/wxEXPAND); sizer->Add(py_app_stop_button, /*proportion=*/0, /*flags=*/wxEXPAND); } @@ -888,6 +901,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { bytes_per_char_idx = kBytesDefault; } const bool enable_local_beep = py_app_enable_local_beep_->GetValue(); + const bool use_cpu = py_app_use_cpu_->GetValue(); std::string rows_str = py_app_rows_->GetValue().ToStdString(); std::string cols_str = py_app_cols_->GetValue().ToStdString(); std::string window_duration_str = py_app_window_duration_->GetValue().ToStdString(); @@ -929,7 +943,8 @@ void Frame::OnAppStart(wxCommandEvent& event) { rows, cols, window_duration, - enable_local_beep); + enable_local_beep, + use_cpu); if (!p) { Log(transcribe_out_, "Failed to launch transcription engine\n"); return; diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 2a2760a..cd62127 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -50,6 +50,7 @@ private: wxChoice* unity_bytes_per_char_; wxCheckBox* py_app_enable_local_beep_; + wxCheckBox* py_app_use_cpu_; wxProcess* py_app_; wxTimer py_app_drain_; diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index da63a4a..81366e5 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -42,17 +42,29 @@ wxProcess* PythonWrapper::InvokeAsyncWithArgs(std::vector&& args, return p; } -bool PythonWrapper::InvokeWithArgs(std::vector&& args, +bool PythonWrapper::InvokeCommandWithArgs( + const std::string& cmd, + std::vector&& args, std::string* py_stdout, std::string* py_stderr) { std::ostringstream cmd_oss; - cmd_oss << "Resources/Python/python.exe"; + cmd_oss << cmd; for (const auto& arg : args) { cmd_oss << " " << arg; } + wxString path; + if (!wxGetEnv("PATH", &path)) { + *py_stderr = "Failed to get PATH"; + return false; + } + if (!wxSetEnv("PATH", path + ";Resources/PortableGit/bin")) { + *py_stderr = "Failed to append to PATH"; + return false; + } + wxArrayString cmd_stdout; wxArrayString cmd_stderr; - long result = wxExecute(cmd_oss.str(), cmd_stdout, cmd_stderr); + long result = wxExecute(cmd_oss.str(), cmd_stdout, cmd_stderr, /*flags=*/0); std::ostringstream cmd_stdout_oss; for (const auto& line : cmd_stdout) { if (!cmd_stdout_oss.str().empty()) { @@ -69,7 +81,9 @@ bool PythonWrapper::InvokeWithArgs(std::vector&& args, } if (result == -1) { std::ostringstream err_oss; - err_oss << "Error while executing python command \"" << cmd_oss.str() << "\": Failed to launch process"; + err_oss << "Error while executing python command \"" << cmd_oss.str() << "\": Failed to launch process" << std::endl; + err_oss << cmd_stdout_oss.str() << std::endl; + err_oss << cmd_stderr_oss.str() << std::endl; if (py_stderr) { *py_stderr = err_oss.str(); } @@ -94,6 +108,11 @@ bool PythonWrapper::InvokeWithArgs(std::vector&& args, return true; } +bool PythonWrapper::InvokeWithArgs(std::vector&& args, + std::string* py_stdout, std::string* py_stderr) { + return InvokeCommandWithArgs("Resources/Python/python.exe", + std::move(args), py_stdout, py_stderr); +} std::string PythonWrapper::GetVersion() { std::string py_stdout, py_stderr; @@ -125,7 +144,8 @@ wxProcess* PythonWrapper::StartApp( std::function&& exit_callback, const std::string& mic, const std::string& lang, const std::string& model, const std::string& chars_per_sync, const std::string& bytes_per_char, - int rows, int cols, int window_duration_s, bool enable_local_beep) { + int rows, int cols, int window_duration_s, bool enable_local_beep, + bool use_cpu) { return InvokeAsyncWithArgs({ "-u", "Resources/Scripts/transcribe.py", @@ -138,6 +158,7 @@ wxProcess* PythonWrapper::StartApp( "--rows", std::to_string(rows), "--cols", std::to_string(cols), "--window_duration_s", std::to_string(window_duration_s), + "--cpu", use_cpu ? "1" : "0", }, std::move(exit_callback)); } diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h index 25855a4..a60bdae 100644 --- a/GUI/GUI/GUI/PythonWrapper.h +++ b/GUI/GUI/GUI/PythonWrapper.h @@ -22,6 +22,13 @@ namespace PythonWrapper wxProcess* InvokeAsyncWithArgs(std::vector&& args, std::function&& exit_callback); + // Invoke a command on the shell with arguments. + // On error, sets `out` to an error message and returns false. + bool InvokeCommandWithArgs(const std::string& cmd, + std::vector&& args, + std::string* py_stdout, + std::string* py_stderr = NULL); + // Invoke the interpreter with arguments. // On error, sets `out` to an error message and returns false. bool InvokeWithArgs(std::vector&& args, std::string* py_stdout, @@ -44,7 +51,8 @@ namespace PythonWrapper std::function&& exit_callback, const std::string& mic, const std::string& lang, const std::string& model, const std::string& chars_per_sync, const std::string& bytes_per_char, - int rows, int cols, int window_duration_s, bool enable_local_beep + int rows, int cols, int window_duration_s, bool enable_local_beep, + bool use_cpu ); bool GenerateAnimator( diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 8491e4d..4d36e53 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -202,8 +202,7 @@ def resetAudio(audio_state): audio_state.transcribe_lock.release() # Transcribe the audio recorded in a file. -def transcribe(audio_state, model, frames): - +def transcribe(audio_state, model, frames, use_cpu: bool): start_time = time.time() frames = audio_state.frames @@ -223,8 +222,10 @@ def transcribe(audio_state, model, frames): #for temp in (0.00, 0.05, 0.10, 0.15, 0.20): #for temp in (0.00, 0.05): for temp in (0.00,): + use_gpu = not use_cpu options = whisper.DecodingOptions(language = audio_state.language, - beam_size = 5, temperature = temp, without_timestamps = True) + beam_size = 5, temperature = temp, without_timestamps = True, + fp16 = use_gpu) result = whisper.decode(model, mel, options) if result.avg_logprob < -1.0: @@ -247,7 +248,7 @@ def transcribe(audio_state, model, frames): return result -def transcribeAudio(audio_state, model): +def transcribeAudio(audio_state, model, use_cpu: bool): last_transcribe_time = time.time() while audio_state.run_app == True: # Pace this out @@ -266,7 +267,7 @@ def transcribeAudio(audio_state, model): audio_state.transcribe_sleep_duration_max_s, longer_sleep_dur) - text = transcribe(audio_state, model, audio_state.frames) + text = transcribe(audio_state, model, audio_state.frames, use_cpu) if not text: print("no transcription, spin ({} seconds)".format(time.time() - last_transcribe_time)) last_transcribe_time = time.time() @@ -373,7 +374,7 @@ def readControllerInput(audio_state, enable_local_beep): # model should correspond to one of the Whisper models defined in # whisper/__init__.py. Examples: tiny, base, small, medium. -def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool): +def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool, use_cpu: bool): audio_state = getMicStream(mic) audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language] @@ -386,7 +387,7 @@ def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool) print("Model {} will be saved to {}".format(model, model_root)) model = whisper.load_model(model, download_root=model_root) - transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state, model]) + transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state, model, use_cpu]) transcribe_audio_thd.daemon = True transcribe_audio_thd.start() @@ -432,10 +433,11 @@ if __name__ == "__main__": parser.add_argument("--model", type=str, help="Which AI model to use. Ex: tiny, base, small, medium") parser.add_argument("--bytes_per_char", type=str, help="The number of bytes to use to represent each character") parser.add_argument("--chars_per_sync", type=str, help="The number of characters to send on each sync event") - parser.add_argument("--enable_local_beep", type=int, help="Whether to play a local auditory indicator when transcription starts/stops."); + parser.add_argument("--enable_local_beep", type=int, help="Whether to play a local auditory indicator when transcription starts/stops.") parser.add_argument("--rows", type=int, help="The number of rows on the board") parser.add_argument("--cols", type=int, help="The number of columns on the board") - parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm"); + parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm") + parser.add_argument("--cpu", type=int, help="If set to 1, use CPU instead of GPU") args = parser.parse_args() if not args.mic: @@ -458,10 +460,16 @@ if __name__ == "__main__": if args.window_duration_s: config.MAX_LENGTH_S = int(args.window_duration_s) + if args.cpu == 1: + args.cpu = True + else: + args.cpu = False + generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char) generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync) generate_utils.config.BOARD_ROWS = int(args.rows) generate_utils.config.BOARD_COLS = int(args.cols) - transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep) + transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep, + args.cpu) -- cgit v1.2.3