From 6bed3a15edf134fa176ca4866b4346017bc97ada Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Sun, 1 Jan 2023 21:09:32 -0800
Subject: Portability bugfixes

* Expose option to run transcription engine on CPU instead of GPU
* Use embedded git when setting up the Python virtual environment
---
 GUI/GUI/GUI/Frame.cpp         | 17 ++++++++++++++++-
 GUI/GUI/GUI/Frame.h           |  1 +
 GUI/GUI/GUI/PythonWrapper.cpp | 31 ++++++++++++++++++++++++++-----
 GUI/GUI/GUI/PythonWrapper.h   | 10 +++++++++-
 Scripts/transcribe.py         | 28 ++++++++++++++++++----------
 5 files changed, 70 insertions(+), 17 deletions(-)

diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index bd4bbec..cfb2060 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -32,6 +32,7 @@ namespace {
         ID_PY_APP_BYTES_PER_CHAR,
         ID_PY_APP_MODEL_PANEL,
         ID_PY_APP_ENABLE_LOCAL_BEEP,
+        ID_PY_APP_USE_CPU,
         ID_PY_APP_ROWS,
         ID_PY_APP_COLS,
         ID_PY_APP_WINDOW_DURATION,
@@ -394,6 +395,17 @@ Frame::Frame()
                 );
                 py_app_enable_local_beep_ = py_app_enable_local_beep;
 
+                auto* py_app_use_cpu = new wxCheckBox(py_config_panel,
+                    ID_PY_APP_USE_CPU, "Use CPU");
+                py_app_use_cpu->SetValue(false);
+                py_app_use_cpu->SetToolTip(
+                    "If checked, the transcription engine will run on your "
+                    "CPU instead of your GPU. This is typically much slower "
+                    "and should only be used if you aren't able to use your "
+                    "GPU."
+                );
+                py_app_use_cpu_ = py_app_use_cpu;
+
                 auto* py_app_start_button = new wxButton(py_config_panel, ID_PY_APP_START_BUTTON, "Begin transcribing");
                 auto* py_app_stop_button = new wxButton(py_config_panel, ID_PY_APP_STOP_BUTTON, "Stop transcribing");
 
@@ -403,6 +415,7 @@ Frame::Frame()
                 sizer->Add(py_dump_mics_button, /*proportion=*/0, /*flags=*/wxEXPAND);
                 sizer->Add(py_app_config_panel_pairs, /*proportion=*/0, /*flags=*/wxEXPAND);
                 sizer->Add(py_app_enable_local_beep, /*proportion=*/0, /*flags=*/wxEXPAND);
+                sizer->Add(py_app_use_cpu, /*proportion=*/0, /*flags=*/wxEXPAND);
                 sizer->Add(py_app_start_button, /*proportion=*/0, /*flags=*/wxEXPAND);
                 sizer->Add(py_app_stop_button, /*proportion=*/0, /*flags=*/wxEXPAND);
             }
@@ -888,6 +901,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
         bytes_per_char_idx = kBytesDefault;
     }
     const bool enable_local_beep = py_app_enable_local_beep_->GetValue();
+    const bool use_cpu = py_app_use_cpu_->GetValue();
     std::string rows_str = py_app_rows_->GetValue().ToStdString();
     std::string cols_str = py_app_cols_->GetValue().ToStdString();
     std::string window_duration_str = py_app_window_duration_->GetValue().ToStdString();
@@ -929,7 +943,8 @@ void Frame::OnAppStart(wxCommandEvent& event) {
         rows,
         cols,
         window_duration,
-        enable_local_beep);
+        enable_local_beep,
+        use_cpu);
     if (!p) {
         Log(transcribe_out_, "Failed to launch transcription engine\n");
         return;
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 2a2760a..cd62127 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -50,6 +50,7 @@ private:
     wxChoice* unity_bytes_per_char_;
 
     wxCheckBox* py_app_enable_local_beep_;
+    wxCheckBox* py_app_use_cpu_;
 
     wxProcess* py_app_;
     wxTimer py_app_drain_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index da63a4a..81366e5 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -42,17 +42,29 @@ wxProcess* PythonWrapper::InvokeAsyncWithArgs(std::vector<std::string>&& args,
 	return p;
 }
 
-bool PythonWrapper::InvokeWithArgs(std::vector<std::string>&& args,
+bool PythonWrapper::InvokeCommandWithArgs(
+	const std::string& cmd,
+	std::vector<std::string>&& args,
 	std::string* py_stdout, std::string* py_stderr) {
 	std::ostringstream cmd_oss;
-	cmd_oss << "Resources/Python/python.exe";
+	cmd_oss << cmd;
 	for (const auto& arg : args) {
 		cmd_oss << " " << arg;
 	}
 
+	wxString path;
+	if (!wxGetEnv("PATH", &path)) {
+		*py_stderr = "Failed to get PATH";
+		return false;
+	}
+	if (!wxSetEnv("PATH", path + ";Resources/PortableGit/bin")) {
+		*py_stderr = "Failed to append to PATH";
+		return false;
+	}
+
 	wxArrayString cmd_stdout;
 	wxArrayString cmd_stderr;
-	long result = wxExecute(cmd_oss.str(), cmd_stdout, cmd_stderr);
+	long result = wxExecute(cmd_oss.str(), cmd_stdout, cmd_stderr, /*flags=*/0);
 	std::ostringstream cmd_stdout_oss;
 	for (const auto& line : cmd_stdout) {
 		if (!cmd_stdout_oss.str().empty()) {
@@ -69,7 +81,9 @@ bool PythonWrapper::InvokeWithArgs(std::vector<std::string>&& args,
 	}
 	if (result == -1) {
 		std::ostringstream err_oss;
-		err_oss << "Error while executing python command \"" << cmd_oss.str() << "\": Failed to launch process";
+		err_oss << "Error while executing python command \"" << cmd_oss.str() << "\": Failed to launch process" << std::endl;
+		err_oss << cmd_stdout_oss.str() << std::endl;
+		err_oss << cmd_stderr_oss.str() << std::endl;
 		if (py_stderr) {
 			*py_stderr = err_oss.str();
 		}
@@ -94,6 +108,11 @@ bool PythonWrapper::InvokeWithArgs(std::vector<std::string>&& args,
 	return true;
 }
 
+bool PythonWrapper::InvokeWithArgs(std::vector<std::string>&& args,
+	std::string* py_stdout, std::string* py_stderr) {
+	return InvokeCommandWithArgs("Resources/Python/python.exe",
+		std::move(args), py_stdout, py_stderr);
+}
 
 std::string PythonWrapper::GetVersion() {
 	std::string py_stdout, py_stderr;
@@ -125,7 +144,8 @@ wxProcess* PythonWrapper::StartApp(
 	std::function<void(wxProcess* proc, int ret)>&& exit_callback,
 	const std::string& mic, const std::string& lang, const std::string& model,
 	const std::string& chars_per_sync, const std::string& bytes_per_char,
-	int rows, int cols, int window_duration_s, bool enable_local_beep) {
+	int rows, int cols, int window_duration_s, bool enable_local_beep,
+	bool use_cpu) {
 	return InvokeAsyncWithArgs({
 		"-u",
 		"Resources/Scripts/transcribe.py",
@@ -138,6 +158,7 @@ wxProcess* PythonWrapper::StartApp(
 		"--rows", std::to_string(rows),
 		"--cols", std::to_string(cols),
 		"--window_duration_s", std::to_string(window_duration_s),
+		"--cpu", use_cpu ? "1" : "0",
 		},
 		std::move(exit_callback));
 }
diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h
index 25855a4..a60bdae 100644
--- a/GUI/GUI/GUI/PythonWrapper.h
+++ b/GUI/GUI/GUI/PythonWrapper.h
@@ -22,6 +22,13 @@ namespace PythonWrapper
 	wxProcess* InvokeAsyncWithArgs(std::vector<std::string>&& args,
 		std::function<void(wxProcess* proc, int ret)>&& exit_callback);
 
+	// Invoke a command on the shell with arguments.
+	// On error, sets `out` to an error message and returns false.
+	bool InvokeCommandWithArgs(const std::string& cmd,
+		std::vector<std::string>&& args,
+		std::string* py_stdout,
+		std::string* py_stderr = NULL);
+
 	// Invoke the interpreter with arguments.
 	// On error, sets `out` to an error message and returns false.
 	bool InvokeWithArgs(std::vector<std::string>&& args, std::string* py_stdout,
@@ -44,7 +51,8 @@ namespace PythonWrapper
 		std::function<void(wxProcess* proc, int ret)>&& exit_callback,
 		const std::string& mic, const std::string& lang, const std::string& model,
 		const std::string& chars_per_sync, const std::string& bytes_per_char,
-		int rows, int cols, int window_duration_s, bool enable_local_beep
+		int rows, int cols, int window_duration_s, bool enable_local_beep,
+		bool use_cpu
 		);
 
 	bool GenerateAnimator(
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 8491e4d..4d36e53 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -202,8 +202,7 @@ def resetAudio(audio_state):
     audio_state.transcribe_lock.release()
 
 # Transcribe the audio recorded in a file.
-def transcribe(audio_state, model, frames):
-
+def transcribe(audio_state, model, frames, use_cpu: bool):
     start_time = time.time()
 
     frames = audio_state.frames
@@ -223,8 +222,10 @@ def transcribe(audio_state, model, frames):
     #for temp in (0.00, 0.05, 0.10, 0.15, 0.20):
     #for temp in (0.00, 0.05):
     for temp in (0.00,):
+        use_gpu = not use_cpu
         options = whisper.DecodingOptions(language = audio_state.language,
-                beam_size = 5, temperature = temp, without_timestamps = True)
+                beam_size = 5, temperature = temp, without_timestamps = True,
+                fp16 = use_gpu)
         result = whisper.decode(model, mel, options)
 
         if result.avg_logprob < -1.0:
@@ -247,7 +248,7 @@ def transcribe(audio_state, model, frames):
 
     return result
 
-def transcribeAudio(audio_state, model):
+def transcribeAudio(audio_state, model, use_cpu: bool):
     last_transcribe_time = time.time()
     while audio_state.run_app == True:
         # Pace this out
@@ -266,7 +267,7 @@ def transcribeAudio(audio_state, model):
                 audio_state.transcribe_sleep_duration_max_s,
                 longer_sleep_dur)
 
-        text = transcribe(audio_state, model, audio_state.frames)
+        text = transcribe(audio_state, model, audio_state.frames, use_cpu)
         if not text:
             print("no transcription, spin ({} seconds)".format(time.time() - last_transcribe_time))
             last_transcribe_time = time.time()
@@ -373,7 +374,7 @@ def readControllerInput(audio_state, enable_local_beep):
 
 # model should correspond to one of the Whisper models defined in
 # whisper/__init__.py. Examples: tiny, base, small, medium.
-def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool):
+def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool, use_cpu: bool):
     audio_state = getMicStream(mic)
     audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language]
 
@@ -386,7 +387,7 @@ def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool)
     print("Model {} will be saved to {}".format(model, model_root))
     model = whisper.load_model(model, download_root=model_root)
 
-    transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state, model])
+    transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state, model, use_cpu])
     transcribe_audio_thd.daemon = True
     transcribe_audio_thd.start()
 
@@ -432,10 +433,11 @@ if __name__ == "__main__":
     parser.add_argument("--model", type=str, help="Which AI model to use. Ex: tiny, base, small, medium")
     parser.add_argument("--bytes_per_char", type=str, help="The number of bytes to use to represent each character")
     parser.add_argument("--chars_per_sync", type=str, help="The number of characters to send on each sync event")
-    parser.add_argument("--enable_local_beep", type=int, help="Whether to play a local auditory indicator when transcription starts/stops.");
+    parser.add_argument("--enable_local_beep", type=int, help="Whether to play a local auditory indicator when transcription starts/stops.")
     parser.add_argument("--rows", type=int, help="The number of rows on the board")
     parser.add_argument("--cols", type=int, help="The number of columns on the board")
-    parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm");
+    parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm")
+    parser.add_argument("--cpu", type=int, help="If set to 1, use CPU instead of GPU")
     args = parser.parse_args()
 
     if not args.mic:
@@ -458,10 +460,16 @@ if __name__ == "__main__":
     if args.window_duration_s:
         config.MAX_LENGTH_S = int(args.window_duration_s)
 
+    if args.cpu == 1:
+        args.cpu = True
+    else:
+        args.cpu = False
+
     generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char)
     generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync)
     generate_utils.config.BOARD_ROWS = int(args.rows)
     generate_utils.config.BOARD_COLS = int(args.cols)
 
-    transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep)
+    transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep,
+            args.cpu)
 
-- 
cgit v1.2.3