From 011cfdd4bab866a64b06406ceaa7563294af9225 Mon Sep 17 00:00:00 2001 From: yum Date: Mon, 26 Jun 2023 17:12:41 -0700 Subject: Add UI for browser src Add ability to toggle on/off browser src & configure port. --- GUI/GUI/GUI/Config.cpp | 9 ++++-- GUI/GUI/GUI/Config.h | 3 +- GUI/GUI/GUI/Frame.cpp | 76 ++++++++++++++++++++++++++++++++++++++++---------- GUI/GUI/GUI/Frame.h | 2 ++ Scripts/transcribe.py | 2 +- 5 files changed, 72 insertions(+), 20 deletions(-) diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index bfc55d6..c549843 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -70,6 +70,8 @@ AppConfig::AppConfig(wxTextCtrl* out) button("left joystick"), enable_local_beep(true), + enable_browser_src(false), + browser_src_port(8097), use_cpu(false), use_builtin(false), enable_uwu_filter(false), @@ -102,7 +104,6 @@ AppConfig::AppConfig(wxTextCtrl* out) whisper_vad_pause_duration(0.2), whisper_vad_retain_duration(0.2), - browser_src_port(9517), whisper_enable_builtin(false), whisper_enable_custom(false), whisper_enable_browser_src(true) @@ -119,6 +120,8 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("button", button); cm.Set("enable_local_beep", enable_local_beep); + cm.Set("enable_browser_src", enable_browser_src); + cm.Set("browser_src_port", browser_src_port); cm.Set("use_cpu", use_cpu); cm.Set("use_builtin", use_builtin); cm.Set("enable_uwu_filter", enable_uwu_filter); @@ -151,7 +154,6 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("whisper_vad_pause_duration", whisper_vad_pause_duration); cm.Set("whisper_vad_retain_duration", whisper_vad_retain_duration); - cm.Set("browser_src_port", browser_src_port); cm.Set("whisper_enable_builtin", whisper_enable_builtin); cm.Set("whisper_enable_custom", whisper_enable_custom); cm.Set("whisper_enable_browser_src", whisper_enable_browser_src); @@ -181,6 +183,8 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("button", c.button); cm.Get("enable_local_beep", c.enable_local_beep); + cm.Get("enable_browser_src", c.enable_browser_src); + cm.Get("browser_src_port", c.browser_src_port); cm.Get("use_cpu", c.use_cpu); cm.Get("use_builtin", c.use_builtin); cm.Get("enable_uwu_filter", c.enable_uwu_filter); @@ -213,7 +217,6 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("whisper_vad_pause_duration", c.whisper_vad_pause_duration); cm.Get("whisper_vad_retain_duration", c.whisper_vad_retain_duration); - cm.Get("browser_src_port", c.browser_src_port); cm.Get("whisper_enable_builtin", c.whisper_enable_builtin); cm.Get("whisper_enable_custom", c.whisper_enable_custom); cm.Get("whisper_enable_browser_src", c.whisper_enable_browser_src); diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index fc99935..dd7e47a 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -56,6 +56,8 @@ public: std::string button; bool enable_local_beep; + bool enable_browser_src; + int browser_src_port; bool use_cpu; bool use_builtin; bool enable_uwu_filter; @@ -92,7 +94,6 @@ public: float whisper_vad_retain_duration; // Browser source-specific settings. - int browser_src_port; bool whisper_enable_builtin; bool whisper_enable_custom; bool whisper_enable_browser_src; diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index d39222b..191aa82 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -41,6 +41,7 @@ namespace { ID_PY_APP_BUTTON, ID_PY_APP_MODEL_PANEL, ID_PY_APP_ENABLE_LOCAL_BEEP, + ID_PY_APP_ENABLE_BROWSER_SRC, ID_PY_APP_USE_CPU, ID_PY_APP_USE_BUILTIN, ID_PY_APP_ENABLE_UWU_FILTER, @@ -51,6 +52,7 @@ namespace { ID_PY_APP_COLS, ID_PY_APP_GPU_IDX, ID_PY_APP_KEYBIND, + ID_PY_APP_BROWSER_SRC_PORT, ID_UNITY_PANEL, ID_UNITY_CONFIG_PANEL, ID_UNITY_OUT, @@ -739,6 +741,17 @@ Frame::Frame() "quickly."); py_app_keybind_ = py_app_keybind; + auto* py_app_browser_src_port = new wxTextCtrl( + py_app_config_panel_pairs, ID_PY_APP_BROWSER_SRC_PORT, + std::to_string(app_c_->browser_src_port), wxDefaultPosition, + wxDefaultSize, /*style=*/0); + py_app_browser_src_port->SetToolTip( + "The port to send the transcript to when `Enable " + "browser source` is enabled. To preview, go to " + "localhost:$PORT in your browser, where $PORT is the " + "value you configure here."); + py_app_browser_src_port_ = py_app_browser_src_port; + auto* sizer = new wxFlexGridSizer(/*cols=*/2); py_app_config_panel_pairs->SetSizer(sizer); @@ -801,8 +814,21 @@ Frame::Frame() wxID_ANY, /*label=*/"GPU index:")); sizer->Add(py_app_gpu_idx, /*proportion=*/0, /*flags=*/wxEXPAND); + + sizer->Add(new wxStaticText(py_app_config_panel_pairs, + wxID_ANY, /*label=*/"Browser source port:")); + sizer->Add(py_app_browser_src_port, /*proportion=*/0, + /*flags=*/wxEXPAND); } + auto* py_app_enable_browser_src = new wxCheckBox(py_config_panel, + ID_PY_APP_ENABLE_BROWSER_SRC, "Enable browser source"); + py_app_enable_browser_src->SetValue(app_c_->enable_browser_src); + py_app_enable_browser_src->SetToolTip( + "Stream transcript to a browser source. To preview, go to " + "localhost:8097, or whatever port you configured."); + py_app_enable_browser_src_ = py_app_enable_browser_src; + auto* py_app_enable_local_beep = new wxCheckBox(py_config_panel, ID_PY_APP_ENABLE_LOCAL_BEEP, "Enable local beep"); py_app_enable_local_beep->SetValue(app_c_->enable_local_beep); @@ -879,6 +905,8 @@ Frame::Frame() /*flags=*/wxEXPAND); sizer->Add(py_app_config_panel_pairs, /*proportion=*/0, /*flags=*/wxEXPAND); + sizer->Add(py_app_enable_browser_src, /*proportion=*/0, + /*flags=*/wxEXPAND); sizer->Add(py_app_enable_local_beep, /*proportion=*/0, /*flags=*/wxEXPAND); sizer->Add(py_app_use_cpu, /*proportion=*/0, @@ -1367,6 +1395,10 @@ void Frame::ApplyConfigToInputFields() py_app_desktop_keybind->Clear(); py_app_desktop_keybind->AppendText(app_c_->keybind); + auto* py_app_desktop_browser_src_port = static_cast(FindWindowById(ID_PY_APP_BROWSER_SRC_PORT)); + py_app_desktop_browser_src_port->Clear(); + py_app_desktop_browser_src_port->AppendText(std::to_string(app_c_->browser_src_port)); + auto* py_app_rows = static_cast(FindWindowById(ID_PY_APP_ROWS)); py_app_rows->Clear(); py_app_rows->AppendText(std::to_string(app_c_->rows)); @@ -1382,6 +1414,9 @@ void Frame::ApplyConfigToInputFields() auto* py_app_enable_local_beep = static_cast(FindWindowById(ID_PY_APP_ENABLE_LOCAL_BEEP)); py_app_enable_local_beep->SetValue(app_c_->enable_local_beep); + auto* py_app_enable_browser_src = static_cast(FindWindowById(ID_PY_APP_ENABLE_BROWSER_SRC)); + py_app_enable_browser_src->SetValue(app_c_->enable_browser_src); + auto* py_app_use_cpu = static_cast(FindWindowById(ID_PY_APP_USE_CPU)); py_app_use_cpu->SetValue(app_c_->use_cpu); @@ -1978,6 +2013,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { button_idx = kBytesDefault; } const bool enable_local_beep = py_app_enable_local_beep_->GetValue(); + const bool enable_browser_src = py_app_enable_browser_src_->GetValue(); const bool use_cpu = py_app_use_cpu_->GetValue(); const bool use_builtin = py_app_use_builtin_->GetValue(); const bool enable_uwu_filter = py_app_enable_uwu_filter_->GetValue(); @@ -1994,41 +2030,48 @@ void Frame::OnAppStart(wxCommandEvent& event) { py_app_gpu_idx_->GetValue().ToStdString(); std::string keybind = py_app_keybind_->GetValue().ToStdString(); - int rows, cols, chars_per_sync, bytes_per_char, gpu_idx; + std::string browser_src_port_str = + py_app_browser_src_port_->GetValue().ToStdString(); + int rows, cols, chars_per_sync, bytes_per_char, gpu_idx, browser_src_port; try { rows = std::stoi(rows_str); cols = std::stoi(cols_str); chars_per_sync = std::stoi(chars_per_sync_str); bytes_per_char = std::stoi(bytes_per_char_str); gpu_idx = std::stoi(gpu_idx_str); + browser_src_port = std::stoi(browser_src_port_str); } catch (const std::invalid_argument&) { Log(transcribe_out_, "Could not parse rows \"{}\", cols \"{}\", chars " - "per sync \"{}\", bytes per char \"{}\" " - "or gpu_idx \"{}\"" + "per sync \"{}\", bytes per char \"{}\", " + "gpu_idx \"{}\", or browser src port \"{}\"" "as an integer\n", rows_str, cols_str, chars_per_sync_str, - bytes_per_char_str, gpu_idx_str); + bytes_per_char_str, gpu_idx_str, browser_src_port_str); return; } catch (const std::out_of_range&) { Log(transcribe_out_, "Rows \"{}\", cols \"{}\", chars per sync " - "\"{}\", bytes per char \"{}\" or \"{}\" are out " - "of range\n", rows_str, cols_str, chars_per_sync_str, - bytes_per_char_str); + "\"{}\", bytes per char \"{}\", gpu idx \"{}\", or browser src " + "port \"{}\" are out of range\n", rows_str, cols_str, chars_per_sync_str, + bytes_per_char_str, gpu_idx, browser_src_port_str); return; } const int max_rows = 10; const int max_cols = 240; const int min_gpu_idx = 0; const int max_gpu_idx = 10; + const int min_browser_src_port = 1024; + const int max_browser_src_port = 65535; if (rows < 0 || rows > max_rows || cols < 0 || cols > max_cols || - gpu_idx < min_gpu_idx || gpu_idx > max_gpu_idx) { + gpu_idx < min_gpu_idx || gpu_idx > max_gpu_idx || + browser_src_port < min_browser_src_port || browser_src_port > max_browser_src_port) { Log(transcribe_out_, "Rows not on [{},{}] or cols not on [{},{}] or " - "gpu_idx not on [{}, {}]\n", + "gpu_idx not on [{}, {}] or browser src port not on [{}, {}]\n", 0, max_rows, 0, max_cols, - min_gpu_idx, max_gpu_idx); + min_gpu_idx, max_gpu_idx, + min_browser_src_port, max_browser_src_port); return; } @@ -2043,6 +2086,8 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->rows = rows; app_c_->cols = cols; app_c_->enable_local_beep = enable_local_beep; + app_c_->enable_browser_src = enable_browser_src; + app_c_->browser_src_port = browser_src_port; app_c_->use_cpu = use_cpu; app_c_->use_builtin = use_builtin; app_c_->enable_uwu_filter = enable_uwu_filter; @@ -2063,7 +2108,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { filtered_transcript.erase(std::remove_if(filtered_transcript.begin(), filtered_transcript.end(), [](char c) { return c == '\n' || c == '\r'; }), filtered_transcript.end()); - Log(transcribe_out_, "Got transcription line! Transcript: \"{}\"", filtered_transcript); + //Log(transcribe_out_, "Got transcription line! Transcript: \"{}\"", filtered_transcript); transcript_.Set(std::move(filtered_transcript)); } }; @@ -2076,11 +2121,12 @@ void Frame::OnAppStart(wxCommandEvent& event) { EnsureVirtualEnv(/*block=*/true); }; - // TODO(yum) parameterize port obs_app_ = std::async(std::launch::async, - [&]() -> bool { - BrowserSource browser_src(8097, transcribe_out_, &transcript_); - browser_src.Run(&run_py_app_); + [this, enable_browser_src, browser_src_port]() -> bool { + if (enable_browser_src) { + BrowserSource browser_src(browser_src_port, transcribe_out_, &transcript_); + browser_src.Run(&run_py_app_); + } return true; }); py_app_ = std::move(PythonWrapper::StartApp(*app_c_, transcribe_out_, diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 904df4f..5969cd8 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -41,6 +41,7 @@ private: wxTextCtrl* py_app_cols_; wxTextCtrl* py_app_gpu_idx_; wxTextCtrl* py_app_keybind_; + wxTextCtrl* py_app_browser_src_port_; wxTextCtrl* unity_rows_; wxTextCtrl* unity_cols_; wxTextCtrl* whisper_rows_; @@ -80,6 +81,7 @@ private: wxChoice* whisper_decode_method_; wxCheckBox* py_app_enable_local_beep_; + wxCheckBox* py_app_enable_browser_src_; wxCheckBox* py_app_use_cpu_; wxCheckBox* py_app_use_builtin_; wxCheckBox* py_app_enable_uwu_filter_; diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 56c4515..8fe6190 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -249,7 +249,7 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st for segment in ranges: first_segments.append(segment) break - if len(first_segments) >= 5: + if len(first_segments) >= 4: # Hack: require convergence across many frames to give the # algorithm a longer buffer to work with. c0 = first_segments[-1] -- cgit v1.2.3