From 2d544407e64bed9328ee493d3daa095b20445b52 Mon Sep 17 00:00:00 2001 From: yum Date: Sun, 21 May 2023 21:27:50 -0700 Subject: Enable selecting specific GPU when transcribing Useful on devices with multiple GPUs, such as gaming laptops. * Update GUI/README.md. --- GUI/GUI/GUI/Config.cpp | 3 +++ GUI/GUI/GUI/Config.h | 1 + GUI/GUI/GUI/Frame.cpp | 37 +++++++++++++++++++++++++++++++------ GUI/GUI/GUI/Frame.h | 1 + GUI/GUI/GUI/Logging.cpp | 4 ++-- GUI/GUI/GUI/PythonWrapper.cpp | 1 + GUI/README.md | 33 +++++++++++++++++++++++++++++---- GUI/package.ps1 | 8 +++++++- Scripts/transcribe.py | 11 +++++++++-- 9 files changed, 84 insertions(+), 15 deletions(-) diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index f45aa45..02646ab 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -71,6 +71,7 @@ AppConfig::AppConfig(wxTextCtrl* out) enable_local_beep(true), use_cpu(false), use_builtin(false), + gpu_idx(0), chars_per_sync(8), bytes_per_char(1), @@ -113,6 +114,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("enable_local_beep", enable_local_beep); cm.Set("use_cpu", use_cpu); cm.Set("use_builtin", use_builtin); + cm.Set("gpu_idx", gpu_idx); cm.Set("chars_per_sync", chars_per_sync); cm.Set("bytes_per_char", bytes_per_char); @@ -168,6 +170,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("enable_local_beep", c.enable_local_beep); cm.Get("use_cpu", c.use_cpu); cm.Get("use_builtin", c.use_builtin); + cm.Get("gpu_idx", c.gpu_idx); cm.Get("chars_per_sync", c.chars_per_sync); cm.Get("bytes_per_char", c.bytes_per_char); diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index f0babc2..d86c8d8 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -57,6 +57,7 @@ public: bool enable_local_beep; bool use_cpu; bool use_builtin; + int gpu_idx; // Unity and transcription shared settings. int chars_per_sync; diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 1195540..ae07ad9 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -42,6 +42,7 @@ namespace { ID_PY_APP_ROWS, ID_PY_APP_COLS, ID_PY_APP_WINDOW_DURATION, + ID_PY_APP_GPU_IDX, ID_UNITY_PANEL, ID_UNITY_CONFIG_PANEL, ID_UNITY_OUT, @@ -526,6 +527,16 @@ Frame::Frame() "but are far more accurate."); py_app_window_duration_ = py_app_window_duration; + auto* py_app_gpu_idx = new wxTextCtrl( + py_app_config_panel_pairs, ID_PY_APP_GPU_IDX, + std::to_string(app_c_->gpu_idx), wxDefaultPosition, + wxDefaultSize, /*style=*/0); + py_app_gpu_idx->SetToolTip( + "The index of the GPU to use. 0 is usually your CPU's " + "onboard GPU (if you have one), 1 is usually your " + "discrete GPU."); + py_app_gpu_idx_ = py_app_gpu_idx; + auto* sizer = new wxFlexGridSizer(/*cols=*/2); py_app_config_panel_pairs->SetSizer(sizer); @@ -573,6 +584,11 @@ Frame::Frame() wxID_ANY, /*label=*/"Window duration (s):")); sizer->Add(py_app_window_duration, /*proportion=*/0, /*flags=*/wxEXPAND); + + sizer->Add(new wxStaticText(py_app_config_panel_pairs, + wxID_ANY, /*label=*/"GPU index:")); + sizer->Add(py_app_gpu_idx, /*proportion=*/0, + /*flags=*/wxEXPAND); } auto* py_app_enable_local_beep = new wxCheckBox(py_config_panel, @@ -2100,19 +2116,23 @@ void Frame::OnAppStart(wxCommandEvent& event) { kBytesPerChar[bytes_per_char_idx].ToStdString(); std::string window_duration_str = py_app_window_duration_->GetValue().ToStdString(); - int rows, cols, chars_per_sync, bytes_per_char, window_duration; + std::string gpu_idx_str = + py_app_gpu_idx_->GetValue().ToStdString(); + int rows, cols, chars_per_sync, bytes_per_char, window_duration, gpu_idx; try { rows = std::stoi(rows_str); cols = std::stoi(cols_str); chars_per_sync = std::stoi(chars_per_sync_str); bytes_per_char = std::stoi(bytes_per_char_str); window_duration = std::stoi(window_duration_str); + gpu_idx = std::stoi(gpu_idx_str); } catch (const std::invalid_argument&) { Log(transcribe_out_, "Could not parse rows \"{}\", cols \"{}\", chars " - "per sync \"{}\", bytes per char \"{}\" or window duration \"{}\" " + "per sync \"{}\", bytes per char \"{}\" window duration \"{}\" " + "or gpu_idx \"{}\"" "as an integer\n", rows_str, cols_str, chars_per_sync_str, - bytes_per_char_str, window_duration_str); + bytes_per_char_str, window_duration_str, gpu_idx_str); return; } catch (const std::out_of_range&) { @@ -2126,15 +2146,19 @@ void Frame::OnAppStart(wxCommandEvent& event) { const int max_cols = 240; const int min_window_duration_s = 10; const int max_window_duration_s = 300; + const int min_gpu_idx = 0; + const int max_gpu_idx = 10; if (rows < 0 || rows > max_rows || cols < 0 || cols > max_cols || window_duration < min_window_duration_s || - window_duration > max_window_duration_s) { + window_duration > max_window_duration_s || + gpu_idx < min_gpu_idx || gpu_idx > max_gpu_idx) { Log(transcribe_out_, "Rows not on [{},{}] or cols not on [{},{}] or " - "window_duration not on [{},{}]\n", + "window_duration not on [{},{}] or gpu_idx not on [{}, {}]\n", 0, max_rows, 0, max_cols, - min_window_duration_s, max_window_duration_s); + min_window_duration_s, max_window_duration_s, + min_gpu_idx, max_gpu_idx); return; } @@ -2150,6 +2174,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->enable_local_beep = enable_local_beep; app_c_->use_cpu = use_cpu; app_c_->use_builtin = use_builtin; + app_c_->gpu_idx = gpu_idx; app_c_->Serialize(AppConfig::kConfigPath); auto out_cb = [&](const std::string& out, const std::string& err) { diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 8bb88ea..1252542 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -40,6 +40,7 @@ private: wxTextCtrl* py_app_rows_; wxTextCtrl* py_app_cols_; wxTextCtrl* py_app_window_duration_; + wxTextCtrl* py_app_gpu_idx_; wxTextCtrl* unity_rows_; wxTextCtrl* unity_cols_; wxTextCtrl* whisper_rows_; diff --git a/GUI/GUI/GUI/Logging.cpp b/GUI/GUI/GUI/Logging.cpp index 5741341..b5b9e55 100644 --- a/GUI/GUI/GUI/Logging.cpp +++ b/GUI/GUI/GUI/Logging.cpp @@ -69,11 +69,11 @@ void Logging::DrainAsyncOutput(wxProcess* proc, wxTextCtrl* frame) { while (proc->IsInputAvailable()) { wxTextInputStream iss(*(proc->GetInputStream())); - Log(frame, " {}\n", iss.ReadLine()); + Log(frame, " {}\n", iss.ReadLine().ToStdString()); } while (proc->IsErrorAvailable()) { wxTextInputStream iss(*(proc->GetErrorStream())); - Log(frame, " {}\n", iss.ReadLine()); + Log(frame, " {}\n", iss.ReadLine().ToStdString()); } } diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index e6b9a70..c5f355a 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -472,6 +472,7 @@ std::future PythonWrapper::StartApp( "--cpu", config.use_cpu ? "1" : "0", "--use_builtin", config.use_builtin ? "1" : "0", "--emotes_pickle", kEmotesPickle, + "--gpu_idx", std::to_string(config.gpu_idx), }, std::move(out_cb), std::move(in_cb), diff --git a/GUI/README.md b/GUI/README.md index dbbc9be..6d5049d 100644 --- a/GUI/README.md +++ b/GUI/README.md @@ -1,25 +1,50 @@ ## Build instructions -0. Install build dependencies: cmake, git, python3, Visual Studio 2022 +0. Install build dependencies: cmake, git, python3, Visual Studio Community + 2022 + 0.0. When installing Visual Studio, make sure `Desktop development with C++` + is selected. + 0.1. Make sure Windows is using Python 3.10.9. From Powershell, the command + `python.exe --version` should show that it's using 3.10.9. Direct link: + https://www.python.org/ftp/python/3.10.9/python-3.10.9-amd64.exe 1. Open Powershell. 2. Make sure you've downloaded submodules: ``` $ git submodule init $ git submodule update ``` -3. Execute Libraries/fetch.ps1. +3. Execute Libraries/fetch.ps1. This will take 10-20 minutes. + 3.0. If you can't run the script, run `Set-ExecutionPolicyPolicy + Unrestricted` in an admin instance of powershell. Heed the warning, + this is a security risk! Never run code from someone you don't trust + unless you've carefully audited it. + 3.1. If you haven't built TaSTT-Whisper before, you'll see an error. Ignore + it. 4. Open `Libraries/wx/build/msw/wx_vc17.sln` with Visual Studio 2022. 5. Select every project in the Solution Explorer except for `_custom_build`. 6. Right click, select Properties, go to C/C++, Code Generation, and set Runtime Library to Multi-threaded (/MT). Make sure this applies to the - configuration x64/Release. + configuration x64/Release. Click Apply. 7. Build x64/Release. 1. The build configuration is in the top. By default it's probably Debug/x64. 2. To build: ctrl+shift+B -8. Follow TaSTT-Whisper README and build it as x64/Release. + 3. If you saw an error in 3.1, rerun Libraries/fetch.ps1. +8. Follow `Build instructions` section of TaSTT-Whisper/Readme.md and build it + as x64/Release. + 8.0. If you see a message like `Based on your solution... you might need to + install additional components`, do it. 9. Open GUI/GUI.sln with Visual Studio 2022. 10. Build x64/Release. 11. Run package.ps1 from powershell. + 11.0. If you're not creating a redistributable release, use this command + instead (it's way faster): `package.ps1 -skip_zip`. + 11.1. When PortableGit creates a window, wait for it to complete, then press + then press enter in Powershell. + 11.2. The first time you run this it'll take a long time since it has to + fetch a few large packages. Subsequent invocations will be much faster + since it won't reacquire anything already downloaded. On my connection, + it took 90 minutes to finish downloading, mostly because Google Drive + downloads are slower than dirt. ## High level design diff --git a/GUI/package.ps1 b/GUI/package.ps1 index e21d109..7bf032b 100644 --- a/GUI/package.ps1 +++ b/GUI/package.ps1 @@ -18,7 +18,9 @@ if (Test-Path $install_dir) { $py_dir = "Python" -rm -Recurse $py_dir +if (Test-Path $py_dir) { + rm -Recurse $py_dir +} if (-Not (Test-Path $py_dir)) { echo "Fetching python" @@ -39,6 +41,10 @@ if (-Not (Test-Path $py_dir)) { $pip_path = "$py_dir/get-pip.py" +if (Test-Path $pip_path) { + rm -Force $pip_path +} + if (-Not (Test-Path $pip_path)) { echo "Fetching pip" diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 043ca0c..1d19543 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -357,7 +357,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool, use_cpu: bool, use_builtin: bool, button: str, estate: EmotesState, - window_duration_s: int): + window_duration_s: int, gpu_idx: int): audio_state = getMicStream(mic) audio_state.language = langcodes.find(language).language audio_state.MAX_LENGTH_S = window_duration_s @@ -442,6 +442,7 @@ if __name__ == "__main__": parser.add_argument("--use_builtin", type=int, help="If set to 1, use the text box built into the game.") parser.add_argument("--button", type=str, help="The controller button used to start/stop transcription. E.g. \"left joystick\"") parser.add_argument("--emotes_pickle", type=str, help="The path to emotes pickle. See emotes_v2.py for details.") + parser.add_argument("--gpu_idx", type=str, help="The index of the GPU device to use. On single GPU systems, use 0.") args = parser.parse_args() if not args.mic: @@ -469,6 +470,11 @@ if __name__ == "__main__": print("--emotes_pickle required", file=sys.stderr) sys.exit(1) + if not args.gpu_idx: + print("--gpu_idx required", file=sys.stderr) + sys.exit(1) + args.gpu_idx = int(args.gpu_idx) + window_duration_s = 120 if args.window_duration_s: window_duration_s = int(args.window_duration_s) @@ -494,5 +500,6 @@ if __name__ == "__main__": print(f"PATH: {os.environ['PATH']}") transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep, - args.cpu, args.use_builtin, args.button, estate, window_duration_s) + args.cpu, args.use_builtin, args.button, estate, window_duration_s, + args.gpu_idx) -- cgit v1.2.3