From ae866f553d3db67030e37ce315707d72982f4063 Mon Sep 17 00:00:00 2001 From: yum Date: Sat, 9 Sep 2023 22:26:09 -0700 Subject: Add UI for max speech duration Also fix bug when not using previews. Audio buffer no longer grows without bound while there's no speech. --- GUI/GUI/GUI/Config.cpp | 3 +++ GUI/GUI/GUI/Config.h | 1 + GUI/GUI/GUI/Frame.cpp | 23 +++++++++++++++++++++++ GUI/GUI/GUI/Frame.h | 1 + 4 files changed, 28 insertions(+) (limited to 'GUI') diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index 1fc1aee..dd3d55c 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -86,6 +86,7 @@ AppConfig::AppConfig(wxTextCtrl* out) enable_lock_at_spawn(true), gpu_idx(0), min_silence_duration_ms(250), + max_speech_duration_s(5), keybind("ctrl+x"), chars_per_sync(8), @@ -129,6 +130,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("enable_lock_at_spawn", enable_lock_at_spawn); cm.Set("gpu_idx", gpu_idx); cm.Set("min_silence_duration_ms", min_silence_duration_ms); + cm.Set("max_speech_duration_s", max_speech_duration_s); cm.Set("keybind", keybind); cm.Set("chars_per_sync", chars_per_sync); @@ -185,6 +187,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("enable_lock_at_spawn", c.enable_lock_at_spawn); cm.Get("gpu_idx", c.gpu_idx); cm.Get("min_silence_duration_ms", c.min_silence_duration_ms); + cm.Get("max_speech_duration_s", c.max_speech_duration_s); cm.Get("keybind", c.keybind); cm.Get("chars_per_sync", c.chars_per_sync); diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index 808cf9e..a366090 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -72,6 +72,7 @@ public: bool enable_lock_at_spawn; int gpu_idx; int min_silence_duration_ms; + int max_speech_duration_s; std::string keybind; // Unity and transcription shared settings. diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index d110a0c..f2fb140 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -76,6 +76,7 @@ namespace { ID_PY_APP_COLS, ID_PY_APP_GPU_IDX, ID_PY_APP_MIN_SILENCE_DURATION_MS, + ID_PY_APP_MAX_SPEECH_DURATION_S, ID_PY_APP_KEYBIND, ID_PY_APP_BROWSER_SRC_PORT, ID_PY_APP_COMMIT_FUZZ_THRESHOLD, @@ -763,6 +764,17 @@ Frame::Frame() "used to segment speech."); py_app_min_silence_duration_ms_ = py_app_min_silence_duration_ms; + auto* py_app_max_speech_duration_s = new wxTextCtrl( + py_app_config_panel_pairs, ID_PY_APP_MAX_SPEECH_DURATION_S, + std::to_string(app_c_->max_speech_duration_s), wxDefaultPosition, + wxDefaultSize, /*style=*/0); + py_app_max_speech_duration_s->SetToolTip( + "The maximum duration, in seconds, of any segment of " + "speech. Continuous speech longer than this is split " + "at the last pause lasting longer than 100 " + "milliseconds."); + py_app_max_speech_duration_s_ = py_app_max_speech_duration_s; + auto* py_app_keybind = new wxTextCtrl( py_app_config_panel_pairs, ID_PY_APP_KEYBIND, app_c_->keybind, wxDefaultPosition, @@ -852,6 +864,11 @@ Frame::Frame() sizer->Add(py_app_min_silence_duration_ms, /*proportion=*/0, /*flags=*/wxEXPAND); + sizer->Add(new wxStaticText(py_app_config_panel_pairs, + wxID_ANY, /*label=*/"Maximum speech duration (s):")); + sizer->Add(py_app_max_speech_duration_s, /*proportion=*/0, + /*flags=*/wxEXPAND); + sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Browser source port:")); sizer->Add(py_app_browser_src_port, /*proportion=*/0, @@ -1550,6 +1567,10 @@ void Frame::ApplyConfigToInputFields() py_app_min_silence_duration_ms->Clear(); py_app_min_silence_duration_ms->AppendText(std::to_string(app_c_->min_silence_duration_ms)); + auto* py_app_max_speech_duration_s = static_cast(FindWindowById(ID_PY_APP_MAX_SPEECH_DURATION_S)); + py_app_max_speech_duration_s->Clear(); + py_app_max_speech_duration_s->AppendText(std::to_string(app_c_->max_speech_duration_s)); + auto* py_app_enable_local_beep = static_cast(FindWindowById(ID_PY_APP_ENABLE_LOCAL_BEEP)); py_app_enable_local_beep->SetValue(app_c_->enable_local_beep); @@ -2304,6 +2325,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { ASSIGN_OR_RETURN_VOID(int, bytes_per_char, stoiInRange(transcribe_out_, kBytesPerChar[bytes_per_char_idx].ToStdString(), "bytes_per_char", 1, 2)); ASSIGN_OR_RETURN_VOID(int, gpu_idx, stoiInRange(transcribe_out_, py_app_gpu_idx_->GetValue().ToStdString(), "gpu_idx", 0, 10)); ASSIGN_OR_RETURN_VOID(int, min_silence_duration_ms, stoiInRange(transcribe_out_, py_app_min_silence_duration_ms_->GetValue().ToStdString(), "min_silence_duration_ms", 50, 5000)); + ASSIGN_OR_RETURN_VOID(int, max_speech_duration_s, stoiInRange(transcribe_out_, py_app_max_speech_duration_s_->GetValue().ToStdString(), "max_speech_duration_s", 1, 30)); ASSIGN_OR_RETURN_VOID(int, browser_src_port, stoiInRange(transcribe_out_, py_app_browser_src_port_->GetValue().ToStdString(), "browser_src_port", 1024, 65535)); std::string keybind = py_app_keybind_->GetValue().ToStdString(); @@ -2334,6 +2356,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->enable_lock_at_spawn = enable_lock_at_spawn; app_c_->gpu_idx = gpu_idx; app_c_->min_silence_duration_ms = min_silence_duration_ms; + app_c_->max_speech_duration_s = max_speech_duration_s; app_c_->keybind = keybind; app_c_->Serialize(AppConfig::kConfigPath); diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 72ba6c4..615726a 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -39,6 +39,7 @@ private: wxTextCtrl* py_app_cols_; wxTextCtrl* py_app_gpu_idx_; wxTextCtrl* py_app_min_silence_duration_ms_; + wxTextCtrl* py_app_max_speech_duration_s_; wxTextCtrl* py_app_keybind_; wxTextCtrl* py_app_browser_src_port_; wxTextCtrl* py_app_commit_fuzz_threshold_; -- cgit v1.2.3