summaryrefslogtreecommitdiffstats
path: root/GUI
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-10-05 18:22:55 -0700
committeryum <yum.food.vr@gmail.com>2023-10-05 18:28:42 -0700
commitadd7bd8ef86ec21cd1327eb45bcb739aa54f7db8 (patch)
treef342e37917c93073552854a125696e12afbd4c39 /GUI
parentc2bc70c18d2fd1c3601b32f2a93b3b4a704786a5 (diff)
Transcripts preceding long pauses now dropv0.16.0
When hot-miking into the built-in chatbox, there are sometimes long pauses in conversation. After these pauses, it's undesirable to show the transcript generate before the pause. This feature makes it so that those transcripts can be dropped. Also: * Limit number of segments sent to browser source to 10. Allow this to grow up to 10 segments before dropping the first 5 segments. * Silence warnings generated by `install_in_venv`, used by e.g. translation codepath. * Enable audio normalization to improve accuracy when speaking softly, at the cost of some accuracy when speaking normally. Credit: user endo0269 on Discord suggested this feature.
Diffstat (limited to 'GUI')
-rw-r--r--GUI/GUI/GUI/Config.cpp3
-rw-r--r--GUI/GUI/GUI/Config.h1
-rw-r--r--GUI/GUI/GUI/Frame.cpp22
-rw-r--r--GUI/GUI/GUI/Frame.h1
-rw-r--r--GUI/GUI/GUI/PythonWrapper.h4
5 files changed, 27 insertions, 4 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
index 573238f..2abe5b2 100644
--- a/GUI/GUI/GUI/Config.cpp
+++ b/GUI/GUI/GUI/Config.cpp
@@ -88,6 +88,7 @@ AppConfig::AppConfig(wxTextCtrl* out)
gpu_idx(0),
min_silence_duration_ms(250),
max_speech_duration_s(5),
+ reset_after_silence_s(10),
transcription_loop_delay_ms(100),
keybind("ctrl+x"),
@@ -135,6 +136,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) {
cm.Set("gpu_idx", gpu_idx);
cm.Set("min_silence_duration_ms", min_silence_duration_ms);
cm.Set("max_speech_duration_s", max_speech_duration_s);
+ cm.Set("reset_after_silence_s", reset_after_silence_s);
cm.Set("transcription_loop_delay_ms", transcription_loop_delay_ms);
cm.Set("keybind", keybind);
@@ -195,6 +197,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) {
cm.Get("gpu_idx", c.gpu_idx);
cm.Get("min_silence_duration_ms", c.min_silence_duration_ms);
cm.Get("max_speech_duration_s", c.max_speech_duration_s);
+ cm.Get("reset_after_silence_s", c.reset_after_silence_s);
cm.Get("transcription_loop_delay_ms", c.transcription_loop_delay_ms);
cm.Get("keybind", c.keybind);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
index ede21d6..0d0da66 100644
--- a/GUI/GUI/GUI/Config.h
+++ b/GUI/GUI/GUI/Config.h
@@ -74,6 +74,7 @@ public:
int gpu_idx;
int min_silence_duration_ms;
int max_speech_duration_s;
+ int reset_after_silence_s;
int transcription_loop_delay_ms;
std::string keybind;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 23ac38c..602bf6d 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -78,6 +78,7 @@ namespace {
ID_PY_APP_GPU_IDX,
ID_PY_APP_MIN_SILENCE_DURATION_MS,
ID_PY_APP_MAX_SPEECH_DURATION_S,
+ ID_PY_APP_RESET_AFTER_SILENCE_S,
ID_PY_APP_TRANSCRIPTION_LOOP_DELAY_MS,
ID_PY_APP_KEYBIND,
ID_PY_APP_BROWSER_SRC_PORT,
@@ -796,6 +797,16 @@ Frame::Frame()
"milliseconds.");
py_app_max_speech_duration_s_ = py_app_max_speech_duration_s;
+ auto* py_app_reset_after_silence_s = new wxTextCtrl(
+ py_app_config_panel_pairs, ID_PY_APP_RESET_AFTER_SILENCE_S,
+ std::to_string(app_c_->reset_after_silence_s), wxDefaultPosition,
+ wxDefaultSize, /*style=*/0);
+ py_app_reset_after_silence_s->SetToolTip(
+ "If you pause for at least this long between "
+ "sentences, the transcript before the pause will be "
+ "removed. To disable this feature, set it to -1.");
+ py_app_reset_after_silence_s_ = py_app_reset_after_silence_s;
+
auto* py_app_transcription_loop_delay_ms = new wxTextCtrl(
py_app_config_panel_pairs, ID_PY_APP_TRANSCRIPTION_LOOP_DELAY_MS,
std::to_string(app_c_->transcription_loop_delay_ms), wxDefaultPosition,
@@ -906,6 +917,11 @@ Frame::Frame()
/*flags=*/wxEXPAND);
sizer->Add(new wxStaticText(py_app_config_panel_pairs,
+ wxID_ANY, /*label=*/"Reset after silence (s):"));
+ sizer->Add(py_app_reset_after_silence_s, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
+
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs,
wxID_ANY, /*label=*/"Transcription loop delay (ms):"));
sizer->Add(py_app_transcription_loop_delay_ms, /*proportion=*/0,
/*flags=*/wxEXPAND);
@@ -1629,6 +1645,10 @@ void Frame::ApplyConfigToInputFields()
py_app_max_speech_duration_s->Clear();
py_app_max_speech_duration_s->AppendText(std::to_string(app_c_->max_speech_duration_s));
+ auto* py_app_reset_after_silence_s = static_cast<wxTextCtrl*>(FindWindowById(ID_PY_APP_RESET_AFTER_SILENCE_S));
+ py_app_reset_after_silence_s->Clear();
+ py_app_reset_after_silence_s->AppendText(std::to_string(app_c_->reset_after_silence_s));
+
auto* py_app_transcription_loop_delay_ms = static_cast<wxTextCtrl*>(FindWindowById(ID_PY_APP_TRANSCRIPTION_LOOP_DELAY_MS));
py_app_transcription_loop_delay_ms->Clear();
py_app_transcription_loop_delay_ms->AppendText(std::to_string(app_c_->transcription_loop_delay_ms));
@@ -2405,6 +2425,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
ASSIGN_OR_RETURN_VOID(int, gpu_idx, stoiInRange(transcribe_out_, py_app_gpu_idx_->GetValue().ToStdString(), "gpu_idx", 0, 10));
ASSIGN_OR_RETURN_VOID(int, min_silence_duration_ms, stoiInRange(transcribe_out_, py_app_min_silence_duration_ms_->GetValue().ToStdString(), "min_silence_duration_ms", 50, 5000));
ASSIGN_OR_RETURN_VOID(int, max_speech_duration_s, stoiInRange(transcribe_out_, py_app_max_speech_duration_s_->GetValue().ToStdString(), "max_speech_duration_s", 1, 30));
+ ASSIGN_OR_RETURN_VOID(int, reset_after_silence_s, stoiInRange(transcribe_out_, py_app_reset_after_silence_s_->GetValue().ToStdString(), "reset_after_silence_s", -1, 30));
ASSIGN_OR_RETURN_VOID(int, transcription_loop_delay_ms, stoiInRange(transcribe_out_, py_app_transcription_loop_delay_ms_->GetValue().ToStdString(), "transcription_loop_delay_ms", 0, 10000));
ASSIGN_OR_RETURN_VOID(int, browser_src_port, stoiInRange(transcribe_out_, py_app_browser_src_port_->GetValue().ToStdString(), "browser_src_port", 1024, 65535));
@@ -2438,6 +2459,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
app_c_->gpu_idx = gpu_idx;
app_c_->min_silence_duration_ms = min_silence_duration_ms;
app_c_->max_speech_duration_s = max_speech_duration_s;
+ app_c_->reset_after_silence_s = reset_after_silence_s;
app_c_->transcription_loop_delay_ms = transcription_loop_delay_ms;
app_c_->keybind = keybind;
app_c_->Serialize(AppConfig::kConfigPath);
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 468a650..ee91e98 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -40,6 +40,7 @@ private:
wxTextCtrl* py_app_gpu_idx_;
wxTextCtrl* py_app_min_silence_duration_ms_;
wxTextCtrl* py_app_max_speech_duration_s_;
+ wxTextCtrl* py_app_reset_after_silence_s_;
wxTextCtrl* py_app_transcription_loop_delay_ms_;
wxTextCtrl* py_app_keybind_;
wxTextCtrl* py_app_browser_src_port_;
diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h
index 31c571e..b5fe518 100644
--- a/GUI/GUI/GUI/PythonWrapper.h
+++ b/GUI/GUI/GUI/PythonWrapper.h
@@ -77,10 +77,6 @@ namespace PythonWrapper
const std::function<bool()>&& run_cb = []() { return true; });
bool InstallPip(std::string* out, std::string* err = nullptr);
- // TODO(yum) both StartApp and GenerateAnimator should be
- // parameterized with config files instead of these ever-growing lists of
- // parameters. We could persist those files so settings would persist across
- // app restarts.
std::future<bool> StartApp(
const AppConfig& app_c,
const std::string& config_path,