summaryrefslogtreecommitdiffstats
path: root/GUI
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-09-10 00:28:03 -0700
committeryum <yum.food.vr@gmail.com>2023-09-10 00:29:04 -0700
commit4ee396584c348c11e0272f0c2842f6a5d3175586 (patch)
tree5f8854f1a2a67c5dd18bf7a7e29489172426b660 /GUI
parent9924a141b0b1266671915be12e21df6c8f4c5366 (diff)
Add UI for transcription loop delay
Allows users to directly modulate the performance-latency tradeoff. Also: * Bump up UI buffer to 1k lines. * Fix browser source reset. It now also resets preview text.
Diffstat (limited to 'GUI')
-rw-r--r--GUI/GUI/GUI/Config.cpp3
-rw-r--r--GUI/GUI/GUI/Config.h1
-rw-r--r--GUI/GUI/GUI/Frame.cpp22
-rw-r--r--GUI/GUI/GUI/Frame.h1
-rw-r--r--GUI/GUI/GUI/Logging.cpp9
-rw-r--r--GUI/GUI/GUI/Transcript.cpp1
6 files changed, 33 insertions, 4 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
index dd3d55c..91fd1e9 100644
--- a/GUI/GUI/GUI/Config.cpp
+++ b/GUI/GUI/GUI/Config.cpp
@@ -87,6 +87,7 @@ AppConfig::AppConfig(wxTextCtrl* out)
gpu_idx(0),
min_silence_duration_ms(250),
max_speech_duration_s(5),
+ transcription_loop_delay_ms(100),
keybind("ctrl+x"),
chars_per_sync(8),
@@ -131,6 +132,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) {
cm.Set("gpu_idx", gpu_idx);
cm.Set("min_silence_duration_ms", min_silence_duration_ms);
cm.Set("max_speech_duration_s", max_speech_duration_s);
+ cm.Set("transcription_loop_delay_ms", transcription_loop_delay_ms);
cm.Set("keybind", keybind);
cm.Set("chars_per_sync", chars_per_sync);
@@ -188,6 +190,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) {
cm.Get("gpu_idx", c.gpu_idx);
cm.Get("min_silence_duration_ms", c.min_silence_duration_ms);
cm.Get("max_speech_duration_s", c.max_speech_duration_s);
+ cm.Get("transcription_loop_delay_ms", c.transcription_loop_delay_ms);
cm.Get("keybind", c.keybind);
cm.Get("chars_per_sync", c.chars_per_sync);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
index a366090..762adc5 100644
--- a/GUI/GUI/GUI/Config.h
+++ b/GUI/GUI/GUI/Config.h
@@ -73,6 +73,7 @@ public:
int gpu_idx;
int min_silence_duration_ms;
int max_speech_duration_s;
+ int transcription_loop_delay_ms;
std::string keybind;
// Unity and transcription shared settings.
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 384f2a2..a61c821 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -77,6 +77,7 @@ namespace {
ID_PY_APP_GPU_IDX,
ID_PY_APP_MIN_SILENCE_DURATION_MS,
ID_PY_APP_MAX_SPEECH_DURATION_S,
+ ID_PY_APP_TRANSCRIPTION_LOOP_DELAY_MS,
ID_PY_APP_KEYBIND,
ID_PY_APP_BROWSER_SRC_PORT,
ID_PY_APP_COMMIT_FUZZ_THRESHOLD,
@@ -775,6 +776,16 @@ Frame::Frame()
"milliseconds.");
py_app_max_speech_duration_s_ = py_app_max_speech_duration_s;
+ auto* py_app_transcription_loop_delay_ms = new wxTextCtrl(
+ py_app_config_panel_pairs, ID_PY_APP_TRANSCRIPTION_LOOP_DELAY_MS,
+ std::to_string(app_c_->transcription_loop_delay_ms), wxDefaultPosition,
+ wxDefaultSize, /*style=*/0);
+ py_app_transcription_loop_delay_ms->SetToolTip(
+ "The amount of time, in milliseconds, that the "
+ "application will sleep between every subsequent "
+ "transcription.");
+ py_app_transcription_loop_delay_ms_ = py_app_transcription_loop_delay_ms;
+
auto* py_app_keybind = new wxTextCtrl(
py_app_config_panel_pairs, ID_PY_APP_KEYBIND,
app_c_->keybind, wxDefaultPosition,
@@ -870,6 +881,11 @@ Frame::Frame()
/*flags=*/wxEXPAND);
sizer->Add(new wxStaticText(py_app_config_panel_pairs,
+ wxID_ANY, /*label=*/"Transcription loop delay (ms):"));
+ sizer->Add(py_app_transcription_loop_delay_ms, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
+
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs,
wxID_ANY, /*label=*/"Browser source port:"));
sizer->Add(py_app_browser_src_port, /*proportion=*/0,
/*flags=*/wxEXPAND);
@@ -1571,6 +1587,10 @@ void Frame::ApplyConfigToInputFields()
py_app_max_speech_duration_s->Clear();
py_app_max_speech_duration_s->AppendText(std::to_string(app_c_->max_speech_duration_s));
+ auto* py_app_transcription_loop_delay_ms = static_cast<wxTextCtrl*>(FindWindowById(ID_PY_APP_TRANSCRIPTION_LOOP_DELAY_MS));
+ py_app_transcription_loop_delay_ms->Clear();
+ py_app_transcription_loop_delay_ms->AppendText(std::to_string(app_c_->transcription_loop_delay_ms));
+
auto* py_app_enable_local_beep = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_LOCAL_BEEP));
py_app_enable_local_beep->SetValue(app_c_->enable_local_beep);
@@ -2326,6 +2346,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
ASSIGN_OR_RETURN_VOID(int, gpu_idx, stoiInRange(transcribe_out_, py_app_gpu_idx_->GetValue().ToStdString(), "gpu_idx", 0, 10));
ASSIGN_OR_RETURN_VOID(int, min_silence_duration_ms, stoiInRange(transcribe_out_, py_app_min_silence_duration_ms_->GetValue().ToStdString(), "min_silence_duration_ms", 50, 5000));
ASSIGN_OR_RETURN_VOID(int, max_speech_duration_s, stoiInRange(transcribe_out_, py_app_max_speech_duration_s_->GetValue().ToStdString(), "max_speech_duration_s", 1, 30));
+ ASSIGN_OR_RETURN_VOID(int, transcription_loop_delay_ms, stoiInRange(transcribe_out_, py_app_transcription_loop_delay_ms_->GetValue().ToStdString(), "transcription_loop_delay_ms", 0, 10000));
ASSIGN_OR_RETURN_VOID(int, browser_src_port, stoiInRange(transcribe_out_, py_app_browser_src_port_->GetValue().ToStdString(), "browser_src_port", 1024, 65535));
std::string keybind = py_app_keybind_->GetValue().ToStdString();
@@ -2357,6 +2378,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
app_c_->gpu_idx = gpu_idx;
app_c_->min_silence_duration_ms = min_silence_duration_ms;
app_c_->max_speech_duration_s = max_speech_duration_s;
+ app_c_->transcription_loop_delay_ms = transcription_loop_delay_ms;
app_c_->keybind = keybind;
app_c_->Serialize(AppConfig::kConfigPath);
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 615726a..21f1220 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -40,6 +40,7 @@ private:
wxTextCtrl* py_app_gpu_idx_;
wxTextCtrl* py_app_min_silence_duration_ms_;
wxTextCtrl* py_app_max_speech_duration_s_;
+ wxTextCtrl* py_app_transcription_loop_delay_ms_;
wxTextCtrl* py_app_keybind_;
wxTextCtrl* py_app_browser_src_port_;
wxTextCtrl* py_app_commit_fuzz_threshold_;
diff --git a/GUI/GUI/GUI/Logging.cpp b/GUI/GUI/GUI/Logging.cpp
index 5d0e23e..f6ad3ab 100644
--- a/GUI/GUI/GUI/Logging.cpp
+++ b/GUI/GUI/GUI/Logging.cpp
@@ -48,15 +48,16 @@ void Logging::ThreadLogger::Drain()
log_ofs << message;
}
- // Constrain wxTextCtrl's to 100-200 lines to keep memory usage /
+ // Constrain wxTextCtrl's to a few hundred lines to keep memory usage /
// general snappiness in check.
if (frame) {
wxString allText = frame->GetValue();
wxArrayString lines = wxStringTokenize(allText, "\n");
size_t count = lines.GetCount();
- if (count > 200) {
- // Keep only the last 100 lines.
- size_t linesToRemove = count - 100;
+ constexpr int kHalfMaxLines = 1000;
+ if (count > kHalfMaxLines * 2) {
+ // Keep only the last kHalfMaxLines lines.
+ size_t linesToRemove = count - kHalfMaxLines;
// Remove lines from the beginning
lines.RemoveAt(0, linesToRemove);
diff --git a/GUI/GUI/GUI/Transcript.cpp b/GUI/GUI/GUI/Transcript.cpp
index eb798d9..11bab31 100644
--- a/GUI/GUI/GUI/Transcript.cpp
+++ b/GUI/GUI/GUI/Transcript.cpp
@@ -20,6 +20,7 @@ void Transcript::SetPreview(std::string&& segment) {
void Transcript::Clear() {
std::scoped_lock l(mu_);
segments_.clear();
+ previews_.clear();
}
std::vector<std::string> Transcript::Get() {