From 4ee396584c348c11e0272f0c2842f6a5d3175586 Mon Sep 17 00:00:00 2001 From: yum Date: Sun, 10 Sep 2023 00:28:03 -0700 Subject: Add UI for transcription loop delay Allows users to directly modulate the performance-latency tradeoff. Also: * Bump up UI buffer to 1k lines. * Fix browser source reset. It now also resets preview text. --- GUI/GUI/GUI/Config.cpp | 3 +++ GUI/GUI/GUI/Config.h | 1 + GUI/GUI/GUI/Frame.cpp | 22 ++++++++++++++++++++++ GUI/GUI/GUI/Frame.h | 1 + GUI/GUI/GUI/Logging.cpp | 9 +++++---- GUI/GUI/GUI/Transcript.cpp | 1 + 6 files changed, 33 insertions(+), 4 deletions(-) (limited to 'GUI') diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index dd3d55c..91fd1e9 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -87,6 +87,7 @@ AppConfig::AppConfig(wxTextCtrl* out) gpu_idx(0), min_silence_duration_ms(250), max_speech_duration_s(5), + transcription_loop_delay_ms(100), keybind("ctrl+x"), chars_per_sync(8), @@ -131,6 +132,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("gpu_idx", gpu_idx); cm.Set("min_silence_duration_ms", min_silence_duration_ms); cm.Set("max_speech_duration_s", max_speech_duration_s); + cm.Set("transcription_loop_delay_ms", transcription_loop_delay_ms); cm.Set("keybind", keybind); cm.Set("chars_per_sync", chars_per_sync); @@ -188,6 +190,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("gpu_idx", c.gpu_idx); cm.Get("min_silence_duration_ms", c.min_silence_duration_ms); cm.Get("max_speech_duration_s", c.max_speech_duration_s); + cm.Get("transcription_loop_delay_ms", c.transcription_loop_delay_ms); cm.Get("keybind", c.keybind); cm.Get("chars_per_sync", c.chars_per_sync); diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index a366090..762adc5 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -73,6 +73,7 @@ public: int gpu_idx; int min_silence_duration_ms; int max_speech_duration_s; + int transcription_loop_delay_ms; std::string keybind; // Unity and transcription shared settings. diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 384f2a2..a61c821 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -77,6 +77,7 @@ namespace { ID_PY_APP_GPU_IDX, ID_PY_APP_MIN_SILENCE_DURATION_MS, ID_PY_APP_MAX_SPEECH_DURATION_S, + ID_PY_APP_TRANSCRIPTION_LOOP_DELAY_MS, ID_PY_APP_KEYBIND, ID_PY_APP_BROWSER_SRC_PORT, ID_PY_APP_COMMIT_FUZZ_THRESHOLD, @@ -775,6 +776,16 @@ Frame::Frame() "milliseconds."); py_app_max_speech_duration_s_ = py_app_max_speech_duration_s; + auto* py_app_transcription_loop_delay_ms = new wxTextCtrl( + py_app_config_panel_pairs, ID_PY_APP_TRANSCRIPTION_LOOP_DELAY_MS, + std::to_string(app_c_->transcription_loop_delay_ms), wxDefaultPosition, + wxDefaultSize, /*style=*/0); + py_app_transcription_loop_delay_ms->SetToolTip( + "The amount of time, in milliseconds, that the " + "application will sleep between every subsequent " + "transcription."); + py_app_transcription_loop_delay_ms_ = py_app_transcription_loop_delay_ms; + auto* py_app_keybind = new wxTextCtrl( py_app_config_panel_pairs, ID_PY_APP_KEYBIND, app_c_->keybind, wxDefaultPosition, @@ -869,6 +880,11 @@ Frame::Frame() sizer->Add(py_app_max_speech_duration_s, /*proportion=*/0, /*flags=*/wxEXPAND); + sizer->Add(new wxStaticText(py_app_config_panel_pairs, + wxID_ANY, /*label=*/"Transcription loop delay (ms):")); + sizer->Add(py_app_transcription_loop_delay_ms, /*proportion=*/0, + /*flags=*/wxEXPAND); + sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Browser source port:")); sizer->Add(py_app_browser_src_port, /*proportion=*/0, @@ -1571,6 +1587,10 @@ void Frame::ApplyConfigToInputFields() py_app_max_speech_duration_s->Clear(); py_app_max_speech_duration_s->AppendText(std::to_string(app_c_->max_speech_duration_s)); + auto* py_app_transcription_loop_delay_ms = static_cast(FindWindowById(ID_PY_APP_TRANSCRIPTION_LOOP_DELAY_MS)); + py_app_transcription_loop_delay_ms->Clear(); + py_app_transcription_loop_delay_ms->AppendText(std::to_string(app_c_->transcription_loop_delay_ms)); + auto* py_app_enable_local_beep = static_cast(FindWindowById(ID_PY_APP_ENABLE_LOCAL_BEEP)); py_app_enable_local_beep->SetValue(app_c_->enable_local_beep); @@ -2326,6 +2346,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { ASSIGN_OR_RETURN_VOID(int, gpu_idx, stoiInRange(transcribe_out_, py_app_gpu_idx_->GetValue().ToStdString(), "gpu_idx", 0, 10)); ASSIGN_OR_RETURN_VOID(int, min_silence_duration_ms, stoiInRange(transcribe_out_, py_app_min_silence_duration_ms_->GetValue().ToStdString(), "min_silence_duration_ms", 50, 5000)); ASSIGN_OR_RETURN_VOID(int, max_speech_duration_s, stoiInRange(transcribe_out_, py_app_max_speech_duration_s_->GetValue().ToStdString(), "max_speech_duration_s", 1, 30)); + ASSIGN_OR_RETURN_VOID(int, transcription_loop_delay_ms, stoiInRange(transcribe_out_, py_app_transcription_loop_delay_ms_->GetValue().ToStdString(), "transcription_loop_delay_ms", 0, 10000)); ASSIGN_OR_RETURN_VOID(int, browser_src_port, stoiInRange(transcribe_out_, py_app_browser_src_port_->GetValue().ToStdString(), "browser_src_port", 1024, 65535)); std::string keybind = py_app_keybind_->GetValue().ToStdString(); @@ -2357,6 +2378,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->gpu_idx = gpu_idx; app_c_->min_silence_duration_ms = min_silence_duration_ms; app_c_->max_speech_duration_s = max_speech_duration_s; + app_c_->transcription_loop_delay_ms = transcription_loop_delay_ms; app_c_->keybind = keybind; app_c_->Serialize(AppConfig::kConfigPath); diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 615726a..21f1220 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -40,6 +40,7 @@ private: wxTextCtrl* py_app_gpu_idx_; wxTextCtrl* py_app_min_silence_duration_ms_; wxTextCtrl* py_app_max_speech_duration_s_; + wxTextCtrl* py_app_transcription_loop_delay_ms_; wxTextCtrl* py_app_keybind_; wxTextCtrl* py_app_browser_src_port_; wxTextCtrl* py_app_commit_fuzz_threshold_; diff --git a/GUI/GUI/GUI/Logging.cpp b/GUI/GUI/GUI/Logging.cpp index 5d0e23e..f6ad3ab 100644 --- a/GUI/GUI/GUI/Logging.cpp +++ b/GUI/GUI/GUI/Logging.cpp @@ -48,15 +48,16 @@ void Logging::ThreadLogger::Drain() log_ofs << message; } - // Constrain wxTextCtrl's to 100-200 lines to keep memory usage / + // Constrain wxTextCtrl's to a few hundred lines to keep memory usage / // general snappiness in check. if (frame) { wxString allText = frame->GetValue(); wxArrayString lines = wxStringTokenize(allText, "\n"); size_t count = lines.GetCount(); - if (count > 200) { - // Keep only the last 100 lines. - size_t linesToRemove = count - 100; + constexpr int kHalfMaxLines = 1000; + if (count > kHalfMaxLines * 2) { + // Keep only the last kHalfMaxLines lines. + size_t linesToRemove = count - kHalfMaxLines; // Remove lines from the beginning lines.RemoveAt(0, linesToRemove); diff --git a/GUI/GUI/GUI/Transcript.cpp b/GUI/GUI/GUI/Transcript.cpp index eb798d9..11bab31 100644 --- a/GUI/GUI/GUI/Transcript.cpp +++ b/GUI/GUI/GUI/Transcript.cpp @@ -20,6 +20,7 @@ void Transcript::SetPreview(std::string&& segment) { void Transcript::Clear() { std::scoped_lock l(mu_); segments_.clear(); + previews_.clear(); } std::vector Transcript::Get() { -- cgit v1.2.3