summaryrefslogtreecommitdiffstats
path: root/GUI
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-09-09 22:26:09 -0700
committeryum <yum.food.vr@gmail.com>2023-09-09 22:32:35 -0700
commitae866f553d3db67030e37ce315707d72982f4063 (patch)
tree8b8977cad5ff9c443a86868efcad3d5e4fb612ac /GUI
parent286dcae5e087db817f3350cf442145107b25bc9c (diff)
Add UI for max speech duration
Also fix bug when not using previews. Audio buffer no longer grows without bound while there's no speech.
Diffstat (limited to 'GUI')
-rw-r--r--GUI/GUI/GUI/Config.cpp3
-rw-r--r--GUI/GUI/GUI/Config.h1
-rw-r--r--GUI/GUI/GUI/Frame.cpp23
-rw-r--r--GUI/GUI/GUI/Frame.h1
4 files changed, 28 insertions, 0 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
index 1fc1aee..dd3d55c 100644
--- a/GUI/GUI/GUI/Config.cpp
+++ b/GUI/GUI/GUI/Config.cpp
@@ -86,6 +86,7 @@ AppConfig::AppConfig(wxTextCtrl* out)
enable_lock_at_spawn(true),
gpu_idx(0),
min_silence_duration_ms(250),
+ max_speech_duration_s(5),
keybind("ctrl+x"),
chars_per_sync(8),
@@ -129,6 +130,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) {
cm.Set("enable_lock_at_spawn", enable_lock_at_spawn);
cm.Set("gpu_idx", gpu_idx);
cm.Set("min_silence_duration_ms", min_silence_duration_ms);
+ cm.Set("max_speech_duration_s", max_speech_duration_s);
cm.Set("keybind", keybind);
cm.Set("chars_per_sync", chars_per_sync);
@@ -185,6 +187,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) {
cm.Get("enable_lock_at_spawn", c.enable_lock_at_spawn);
cm.Get("gpu_idx", c.gpu_idx);
cm.Get("min_silence_duration_ms", c.min_silence_duration_ms);
+ cm.Get("max_speech_duration_s", c.max_speech_duration_s);
cm.Get("keybind", c.keybind);
cm.Get("chars_per_sync", c.chars_per_sync);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
index 808cf9e..a366090 100644
--- a/GUI/GUI/GUI/Config.h
+++ b/GUI/GUI/GUI/Config.h
@@ -72,6 +72,7 @@ public:
bool enable_lock_at_spawn;
int gpu_idx;
int min_silence_duration_ms;
+ int max_speech_duration_s;
std::string keybind;
// Unity and transcription shared settings.
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index d110a0c..f2fb140 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -76,6 +76,7 @@ namespace {
ID_PY_APP_COLS,
ID_PY_APP_GPU_IDX,
ID_PY_APP_MIN_SILENCE_DURATION_MS,
+ ID_PY_APP_MAX_SPEECH_DURATION_S,
ID_PY_APP_KEYBIND,
ID_PY_APP_BROWSER_SRC_PORT,
ID_PY_APP_COMMIT_FUZZ_THRESHOLD,
@@ -763,6 +764,17 @@ Frame::Frame()
"used to segment speech.");
py_app_min_silence_duration_ms_ = py_app_min_silence_duration_ms;
+ auto* py_app_max_speech_duration_s = new wxTextCtrl(
+ py_app_config_panel_pairs, ID_PY_APP_MAX_SPEECH_DURATION_S,
+ std::to_string(app_c_->max_speech_duration_s), wxDefaultPosition,
+ wxDefaultSize, /*style=*/0);
+ py_app_max_speech_duration_s->SetToolTip(
+ "The maximum duration, in seconds, of any segment of "
+ "speech. Continuous speech longer than this is split "
+ "at the last pause lasting longer than 100 "
+ "milliseconds.");
+ py_app_max_speech_duration_s_ = py_app_max_speech_duration_s;
+
auto* py_app_keybind = new wxTextCtrl(
py_app_config_panel_pairs, ID_PY_APP_KEYBIND,
app_c_->keybind, wxDefaultPosition,
@@ -853,6 +865,11 @@ Frame::Frame()
/*flags=*/wxEXPAND);
sizer->Add(new wxStaticText(py_app_config_panel_pairs,
+ wxID_ANY, /*label=*/"Maximum speech duration (s):"));
+ sizer->Add(py_app_max_speech_duration_s, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
+
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs,
wxID_ANY, /*label=*/"Browser source port:"));
sizer->Add(py_app_browser_src_port, /*proportion=*/0,
/*flags=*/wxEXPAND);
@@ -1550,6 +1567,10 @@ void Frame::ApplyConfigToInputFields()
py_app_min_silence_duration_ms->Clear();
py_app_min_silence_duration_ms->AppendText(std::to_string(app_c_->min_silence_duration_ms));
+ auto* py_app_max_speech_duration_s = static_cast<wxTextCtrl*>(FindWindowById(ID_PY_APP_MAX_SPEECH_DURATION_S));
+ py_app_max_speech_duration_s->Clear();
+ py_app_max_speech_duration_s->AppendText(std::to_string(app_c_->max_speech_duration_s));
+
auto* py_app_enable_local_beep = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_LOCAL_BEEP));
py_app_enable_local_beep->SetValue(app_c_->enable_local_beep);
@@ -2304,6 +2325,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
ASSIGN_OR_RETURN_VOID(int, bytes_per_char, stoiInRange(transcribe_out_, kBytesPerChar[bytes_per_char_idx].ToStdString(), "bytes_per_char", 1, 2));
ASSIGN_OR_RETURN_VOID(int, gpu_idx, stoiInRange(transcribe_out_, py_app_gpu_idx_->GetValue().ToStdString(), "gpu_idx", 0, 10));
ASSIGN_OR_RETURN_VOID(int, min_silence_duration_ms, stoiInRange(transcribe_out_, py_app_min_silence_duration_ms_->GetValue().ToStdString(), "min_silence_duration_ms", 50, 5000));
+ ASSIGN_OR_RETURN_VOID(int, max_speech_duration_s, stoiInRange(transcribe_out_, py_app_max_speech_duration_s_->GetValue().ToStdString(), "max_speech_duration_s", 1, 30));
ASSIGN_OR_RETURN_VOID(int, browser_src_port, stoiInRange(transcribe_out_, py_app_browser_src_port_->GetValue().ToStdString(), "browser_src_port", 1024, 65535));
std::string keybind = py_app_keybind_->GetValue().ToStdString();
@@ -2334,6 +2356,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
app_c_->enable_lock_at_spawn = enable_lock_at_spawn;
app_c_->gpu_idx = gpu_idx;
app_c_->min_silence_duration_ms = min_silence_duration_ms;
+ app_c_->max_speech_duration_s = max_speech_duration_s;
app_c_->keybind = keybind;
app_c_->Serialize(AppConfig::kConfigPath);
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 72ba6c4..615726a 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -39,6 +39,7 @@ private:
wxTextCtrl* py_app_cols_;
wxTextCtrl* py_app_gpu_idx_;
wxTextCtrl* py_app_min_silence_duration_ms_;
+ wxTextCtrl* py_app_max_speech_duration_s_;
wxTextCtrl* py_app_keybind_;
wxTextCtrl* py_app_browser_src_port_;
wxTextCtrl* py_app_commit_fuzz_threshold_;