diff options
| author | yum <yum.food.vr@gmail.com> | 2023-05-21 21:27:50 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-05-21 21:27:50 -0700 |
| commit | 2d544407e64bed9328ee493d3daa095b20445b52 (patch) | |
| tree | c98ff700afcf040cfec9170c2d5d72d17e5bc7ac | |
| parent | aec43ecd11e8ef84fe4f5f5fcca9578dbde59469 (diff) | |
Enable selecting specific GPU when transcribing
Useful on devices with multiple GPUs, such as gaming laptops.
* Update GUI/README.md.
| -rw-r--r-- | GUI/GUI/GUI/Config.cpp | 3 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Config.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 37 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Logging.cpp | 4 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.cpp | 1 | ||||
| -rw-r--r-- | GUI/README.md | 33 | ||||
| -rw-r--r-- | GUI/package.ps1 | 8 | ||||
| -rw-r--r-- | Scripts/transcribe.py | 11 |
9 files changed, 84 insertions, 15 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index f45aa45..02646ab 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -71,6 +71,7 @@ AppConfig::AppConfig(wxTextCtrl* out) enable_local_beep(true),
use_cpu(false),
use_builtin(false),
+ gpu_idx(0),
chars_per_sync(8),
bytes_per_char(1),
@@ -113,6 +114,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("enable_local_beep", enable_local_beep);
cm.Set("use_cpu", use_cpu);
cm.Set("use_builtin", use_builtin);
+ cm.Set("gpu_idx", gpu_idx);
cm.Set("chars_per_sync", chars_per_sync);
cm.Set("bytes_per_char", bytes_per_char);
@@ -168,6 +170,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("enable_local_beep", c.enable_local_beep);
cm.Get("use_cpu", c.use_cpu);
cm.Get("use_builtin", c.use_builtin);
+ cm.Get("gpu_idx", c.gpu_idx);
cm.Get("chars_per_sync", c.chars_per_sync);
cm.Get("bytes_per_char", c.bytes_per_char);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index f0babc2..d86c8d8 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -57,6 +57,7 @@ public: bool enable_local_beep;
bool use_cpu;
bool use_builtin;
+ int gpu_idx;
// Unity and transcription shared settings.
int chars_per_sync;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 1195540..ae07ad9 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -42,6 +42,7 @@ namespace { ID_PY_APP_ROWS,
ID_PY_APP_COLS,
ID_PY_APP_WINDOW_DURATION,
+ ID_PY_APP_GPU_IDX,
ID_UNITY_PANEL,
ID_UNITY_CONFIG_PANEL,
ID_UNITY_OUT,
@@ -526,6 +527,16 @@ Frame::Frame() "but are far more accurate.");
py_app_window_duration_ = py_app_window_duration;
+ auto* py_app_gpu_idx = new wxTextCtrl(
+ py_app_config_panel_pairs, ID_PY_APP_GPU_IDX,
+ std::to_string(app_c_->gpu_idx), wxDefaultPosition,
+ wxDefaultSize, /*style=*/0);
+ py_app_gpu_idx->SetToolTip(
+ "The index of the GPU to use. 0 is usually your CPU's "
+ "onboard GPU (if you have one), 1 is usually your "
+ "discrete GPU.");
+ py_app_gpu_idx_ = py_app_gpu_idx;
+
auto* sizer = new wxFlexGridSizer(/*cols=*/2);
py_app_config_panel_pairs->SetSizer(sizer);
@@ -573,6 +584,11 @@ Frame::Frame() wxID_ANY, /*label=*/"Window duration (s):"));
sizer->Add(py_app_window_duration, /*proportion=*/0,
/*flags=*/wxEXPAND);
+
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs,
+ wxID_ANY, /*label=*/"GPU index:"));
+ sizer->Add(py_app_gpu_idx, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
}
auto* py_app_enable_local_beep = new wxCheckBox(py_config_panel,
@@ -2100,19 +2116,23 @@ void Frame::OnAppStart(wxCommandEvent& event) { kBytesPerChar[bytes_per_char_idx].ToStdString();
std::string window_duration_str =
py_app_window_duration_->GetValue().ToStdString();
- int rows, cols, chars_per_sync, bytes_per_char, window_duration;
+ std::string gpu_idx_str =
+ py_app_gpu_idx_->GetValue().ToStdString();
+ int rows, cols, chars_per_sync, bytes_per_char, window_duration, gpu_idx;
try {
rows = std::stoi(rows_str);
cols = std::stoi(cols_str);
chars_per_sync = std::stoi(chars_per_sync_str);
bytes_per_char = std::stoi(bytes_per_char_str);
window_duration = std::stoi(window_duration_str);
+ gpu_idx = std::stoi(gpu_idx_str);
}
catch (const std::invalid_argument&) {
Log(transcribe_out_, "Could not parse rows \"{}\", cols \"{}\", chars "
- "per sync \"{}\", bytes per char \"{}\" or window duration \"{}\" "
+ "per sync \"{}\", bytes per char \"{}\" window duration \"{}\" "
+ "or gpu_idx \"{}\""
"as an integer\n", rows_str, cols_str, chars_per_sync_str,
- bytes_per_char_str, window_duration_str);
+ bytes_per_char_str, window_duration_str, gpu_idx_str);
return;
}
catch (const std::out_of_range&) {
@@ -2126,15 +2146,19 @@ void Frame::OnAppStart(wxCommandEvent& event) { const int max_cols = 240;
const int min_window_duration_s = 10;
const int max_window_duration_s = 300;
+ const int min_gpu_idx = 0;
+ const int max_gpu_idx = 10;
if (rows < 0 || rows > max_rows ||
cols < 0 || cols > max_cols ||
window_duration < min_window_duration_s ||
- window_duration > max_window_duration_s) {
+ window_duration > max_window_duration_s ||
+ gpu_idx < min_gpu_idx || gpu_idx > max_gpu_idx) {
Log(transcribe_out_, "Rows not on [{},{}] or cols not on [{},{}] or "
- "window_duration not on [{},{}]\n",
+ "window_duration not on [{},{}] or gpu_idx not on [{}, {}]\n",
0, max_rows,
0, max_cols,
- min_window_duration_s, max_window_duration_s);
+ min_window_duration_s, max_window_duration_s,
+ min_gpu_idx, max_gpu_idx);
return;
}
@@ -2150,6 +2174,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->enable_local_beep = enable_local_beep;
app_c_->use_cpu = use_cpu;
app_c_->use_builtin = use_builtin;
+ app_c_->gpu_idx = gpu_idx;
app_c_->Serialize(AppConfig::kConfigPath);
auto out_cb = [&](const std::string& out, const std::string& err) {
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 8bb88ea..1252542 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -40,6 +40,7 @@ private: wxTextCtrl* py_app_rows_;
wxTextCtrl* py_app_cols_;
wxTextCtrl* py_app_window_duration_;
+ wxTextCtrl* py_app_gpu_idx_;
wxTextCtrl* unity_rows_;
wxTextCtrl* unity_cols_;
wxTextCtrl* whisper_rows_;
diff --git a/GUI/GUI/GUI/Logging.cpp b/GUI/GUI/GUI/Logging.cpp index 5741341..b5b9e55 100644 --- a/GUI/GUI/GUI/Logging.cpp +++ b/GUI/GUI/GUI/Logging.cpp @@ -69,11 +69,11 @@ void Logging::DrainAsyncOutput(wxProcess* proc, wxTextCtrl* frame) { while (proc->IsInputAvailable()) {
wxTextInputStream iss(*(proc->GetInputStream()));
- Log(frame, " {}\n", iss.ReadLine());
+ Log(frame, " {}\n", iss.ReadLine().ToStdString());
}
while (proc->IsErrorAvailable()) {
wxTextInputStream iss(*(proc->GetErrorStream()));
- Log(frame, " {}\n", iss.ReadLine());
+ Log(frame, " {}\n", iss.ReadLine().ToStdString());
}
}
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index e6b9a70..c5f355a 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -472,6 +472,7 @@ std::future<bool> PythonWrapper::StartApp( "--cpu", config.use_cpu ? "1" : "0", "--use_builtin", config.use_builtin ? "1" : "0", "--emotes_pickle", kEmotesPickle, + "--gpu_idx", std::to_string(config.gpu_idx), }, std::move(out_cb), std::move(in_cb), diff --git a/GUI/README.md b/GUI/README.md index dbbc9be..6d5049d 100644 --- a/GUI/README.md +++ b/GUI/README.md @@ -1,25 +1,50 @@ ## Build instructions -0. Install build dependencies: cmake, git, python3, Visual Studio 2022 +0. Install build dependencies: cmake, git, python3, Visual Studio Community + 2022 + 0.0. When installing Visual Studio, make sure `Desktop development with C++` + is selected. + 0.1. Make sure Windows is using Python 3.10.9. From Powershell, the command + `python.exe --version` should show that it's using 3.10.9. Direct link: + https://www.python.org/ftp/python/3.10.9/python-3.10.9-amd64.exe 1. Open Powershell. 2. Make sure you've downloaded submodules: ``` $ git submodule init $ git submodule update ``` -3. Execute Libraries/fetch.ps1. +3. Execute Libraries/fetch.ps1. This will take 10-20 minutes. + 3.0. If you can't run the script, run `Set-ExecutionPolicyPolicy + Unrestricted` in an admin instance of powershell. Heed the warning, + this is a security risk! Never run code from someone you don't trust + unless you've carefully audited it. + 3.1. If you haven't built TaSTT-Whisper before, you'll see an error. Ignore + it. 4. Open `Libraries/wx/build/msw/wx_vc17.sln` with Visual Studio 2022. 5. Select every project in the Solution Explorer except for `_custom_build`. 6. Right click, select Properties, go to C/C++, Code Generation, and set Runtime Library to Multi-threaded (/MT). Make sure this applies to the - configuration x64/Release. + configuration x64/Release. Click Apply. 7. Build x64/Release. 1. The build configuration is in the top. By default it's probably Debug/x64. 2. To build: ctrl+shift+B -8. Follow TaSTT-Whisper README and build it as x64/Release. + 3. If you saw an error in 3.1, rerun Libraries/fetch.ps1. +8. Follow `Build instructions` section of TaSTT-Whisper/Readme.md and build it + as x64/Release. + 8.0. If you see a message like `Based on your solution... you might need to + install additional components`, do it. 9. Open GUI/GUI.sln with Visual Studio 2022. 10. Build x64/Release. 11. Run package.ps1 from powershell. + 11.0. If you're not creating a redistributable release, use this command + instead (it's way faster): `package.ps1 -skip_zip`. + 11.1. When PortableGit creates a window, wait for it to complete, then press + then press enter in Powershell. + 11.2. The first time you run this it'll take a long time since it has to + fetch a few large packages. Subsequent invocations will be much faster + since it won't reacquire anything already downloaded. On my connection, + it took 90 minutes to finish downloading, mostly because Google Drive + downloads are slower than dirt. ## High level design diff --git a/GUI/package.ps1 b/GUI/package.ps1 index e21d109..7bf032b 100644 --- a/GUI/package.ps1 +++ b/GUI/package.ps1 @@ -18,7 +18,9 @@ if (Test-Path $install_dir) { $py_dir = "Python"
-rm -Recurse $py_dir
+if (Test-Path $py_dir) {
+ rm -Recurse $py_dir
+}
if (-Not (Test-Path $py_dir)) {
echo "Fetching python"
@@ -39,6 +41,10 @@ if (-Not (Test-Path $py_dir)) { $pip_path = "$py_dir/get-pip.py"
+if (Test-Path $pip_path) {
+ rm -Force $pip_path
+}
+
if (-Not (Test-Path $pip_path)) {
echo "Fetching pip"
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 043ca0c..1d19543 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -357,7 +357,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool, use_cpu: bool, use_builtin: bool, button: str, estate: EmotesState, - window_duration_s: int): + window_duration_s: int, gpu_idx: int): audio_state = getMicStream(mic) audio_state.language = langcodes.find(language).language audio_state.MAX_LENGTH_S = window_duration_s @@ -442,6 +442,7 @@ if __name__ == "__main__": parser.add_argument("--use_builtin", type=int, help="If set to 1, use the text box built into the game.") parser.add_argument("--button", type=str, help="The controller button used to start/stop transcription. E.g. \"left joystick\"") parser.add_argument("--emotes_pickle", type=str, help="The path to emotes pickle. See emotes_v2.py for details.") + parser.add_argument("--gpu_idx", type=str, help="The index of the GPU device to use. On single GPU systems, use 0.") args = parser.parse_args() if not args.mic: @@ -469,6 +470,11 @@ if __name__ == "__main__": print("--emotes_pickle required", file=sys.stderr) sys.exit(1) + if not args.gpu_idx: + print("--gpu_idx required", file=sys.stderr) + sys.exit(1) + args.gpu_idx = int(args.gpu_idx) + window_duration_s = 120 if args.window_duration_s: window_duration_s = int(args.window_duration_s) @@ -494,5 +500,6 @@ if __name__ == "__main__": print(f"PATH: {os.environ['PATH']}") transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep, - args.cpu, args.use_builtin, args.button, estate, window_duration_s) + args.cpu, args.use_builtin, args.button, estate, window_duration_s, + args.gpu_idx) |
