diff options
| author | yum <yum.food.vr@gmail.com> | 2023-06-27 16:01:16 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-06-27 16:01:16 -0700 |
| commit | 6638993e313773ba6ca8bdb6d7690b798d41f0d4 (patch) | |
| tree | 75ce815ed4cca607ad237a39fd4ee5c313fb8504 /GUI | |
| parent | 241813a5af11093c6b86e70ada729788c1f0dee6 (diff) | |
Add UI for fuzzy commit threshold
Recap: In the STT there's an algorithm that tries to determine when a
transcript is "stable" enough to commit. If that is too loose, then
accuracy suffers; if too strict, then the audio buffer eventually fills.
To mitigate the problem, I check whether the last N transcripts are
within some edit distance (Levenshtein edit distance) of each other. The
fuzzy matching lets us forgive small instabilities, like differences in
uppercase/lowercase or punctuation, while rejecting large instabilities.
The default value of 8 seems to be in the sweet spot of accuracy &
performance, but it will likely be tuned in the future.
Diffstat (limited to 'GUI')
| -rw-r--r-- | GUI/GUI/GUI/Config.cpp | 3 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Config.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 69 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.cpp | 1 |
5 files changed, 64 insertions, 11 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index c851983..4f730b5 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -72,6 +72,7 @@ AppConfig::AppConfig(wxTextCtrl* out) enable_local_beep(true),
enable_browser_src(false),
browser_src_port(8097),
+ commit_fuzz_threshold(8),
use_cpu(false),
use_builtin(false),
enable_uwu_filter(false),
@@ -123,6 +124,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("enable_local_beep", enable_local_beep);
cm.Set("enable_browser_src", enable_browser_src);
cm.Set("browser_src_port", browser_src_port);
+ cm.Set("commit_fuzz_threshold", commit_fuzz_threshold);
cm.Set("use_cpu", use_cpu);
cm.Set("use_builtin", use_builtin);
cm.Set("enable_uwu_filter", enable_uwu_filter);
@@ -187,6 +189,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("enable_local_beep", c.enable_local_beep);
cm.Get("enable_browser_src", c.enable_browser_src);
cm.Get("browser_src_port", c.browser_src_port);
+ cm.Get("commit_fuzz_threshold", c.commit_fuzz_threshold);
cm.Get("use_cpu", c.use_cpu);
cm.Get("use_builtin", c.use_builtin);
cm.Get("enable_uwu_filter", c.enable_uwu_filter);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index d71aeb4..6711d79 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -58,6 +58,7 @@ public: bool enable_local_beep;
bool enable_browser_src;
int browser_src_port;
+ int commit_fuzz_threshold;
bool use_cpu;
bool use_builtin;
bool enable_uwu_filter;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 8d4c868..1df60e7 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -53,6 +53,7 @@ namespace { ID_PY_APP_GPU_IDX,
ID_PY_APP_KEYBIND,
ID_PY_APP_BROWSER_SRC_PORT,
+ ID_PY_APP_COMMIT_FUZZ_THRESHOLD,
ID_UNITY_PANEL,
ID_UNITY_CONFIG_PANEL,
ID_UNITY_OUT,
@@ -721,6 +722,20 @@ Frame::Frame() "value you configure here.");
py_app_browser_src_port_ = py_app_browser_src_port;
+ auto* py_app_commit_fuzz_threshold = new wxTextCtrl(
+ py_app_config_panel_pairs, ID_PY_APP_COMMIT_FUZZ_THRESHOLD,
+ std::to_string(app_c_->commit_fuzz_threshold), wxDefaultPosition,
+ wxDefaultSize, /*style=*/0);
+ py_app_commit_fuzz_threshold->SetToolTip(
+ "The transcription app requires subsequent "
+ "transcripts to be within this edit distance of each "
+ "other before it commits them. Higher values make "
+ "transcripts commit more easily, making the app "
+ "faster but less accurate. Lower values make "
+ "transcripts commit less easily, making the app "
+ "slower but more accurate.");
+ py_app_commit_fuzz_threshold_ = py_app_commit_fuzz_threshold;
+
auto* sizer = new wxFlexGridSizer(/*cols=*/2);
py_app_config_panel_pairs->SetSizer(sizer);
@@ -785,6 +800,11 @@ Frame::Frame() /*flags=*/wxEXPAND);
sizer->Add(new wxStaticText(py_app_config_panel_pairs,
+ wxID_ANY, /*label=*/"Commit similarity threshold:"));
+ sizer->Add(py_app_commit_fuzz_threshold, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
+
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs,
wxID_ANY, /*label=*/"Browser source port:"));
sizer->Add(py_app_browser_src_port, /*proportion=*/0,
/*flags=*/wxEXPAND);
@@ -1380,6 +1400,10 @@ void Frame::ApplyConfigToInputFields() py_app_desktop_browser_src_port->Clear();
py_app_desktop_browser_src_port->AppendText(std::to_string(app_c_->browser_src_port));
+ auto* py_app_desktop_commit_fuzz_threshold = static_cast<wxTextCtrl*>(FindWindowById(ID_PY_APP_COMMIT_FUZZ_THRESHOLD));
+ py_app_desktop_commit_fuzz_threshold->Clear();
+ py_app_desktop_commit_fuzz_threshold->AppendText(std::to_string(app_c_->commit_fuzz_threshold));
+
auto* py_app_rows = static_cast<wxTextCtrl*>(FindWindowById(ID_PY_APP_ROWS));
py_app_rows->Clear();
py_app_rows->AppendText(std::to_string(app_c_->rows));
@@ -2017,7 +2041,9 @@ void Frame::OnAppStart(wxCommandEvent& event) { py_app_keybind_->GetValue().ToStdString();
std::string browser_src_port_str =
py_app_browser_src_port_->GetValue().ToStdString();
- int rows, cols, chars_per_sync, bytes_per_char, gpu_idx, browser_src_port;
+ std::string commit_fuzz_threshold_str =
+ py_app_commit_fuzz_threshold_->GetValue().ToStdString();
+ int rows, cols, chars_per_sync, bytes_per_char, gpu_idx, browser_src_port, commit_fuzz_threshold;
try {
rows = std::stoi(rows_str);
cols = std::stoi(cols_str);
@@ -2025,20 +2051,30 @@ void Frame::OnAppStart(wxCommandEvent& event) { bytes_per_char = std::stoi(bytes_per_char_str);
gpu_idx = std::stoi(gpu_idx_str);
browser_src_port = std::stoi(browser_src_port_str);
+ commit_fuzz_threshold = std::stoi(commit_fuzz_threshold_str);
}
catch (const std::invalid_argument&) {
Log(transcribe_out_, "Could not parse rows \"{}\", cols \"{}\", chars "
- "per sync \"{}\", bytes per char \"{}\", "
- "gpu_idx \"{}\", or browser src port \"{}\""
+ "per sync \"{}\", "
+ "bytes per char \"{}\", "
+ "gpu_idx \"{}\", "
+ "browser src port \"{}\"", ""
+ "or commit_fuzz_threshold \"{}\""
"as an integer\n", rows_str, cols_str, chars_per_sync_str,
- bytes_per_char_str, gpu_idx_str, browser_src_port_str);
+ bytes_per_char_str, gpu_idx_str, browser_src_port_str,
+ commit_fuzz_threshold_str);
return;
}
catch (const std::out_of_range&) {
- Log(transcribe_out_, "Rows \"{}\", cols \"{}\", chars per sync "
- "\"{}\", bytes per char \"{}\", gpu idx \"{}\", or browser src "
- "port \"{}\" are out of range\n", rows_str, cols_str, chars_per_sync_str,
- bytes_per_char_str, gpu_idx, browser_src_port_str);
+ Log(transcribe_out_, "Rows \"{}\", "
+ "cols \"{}\", "
+ "chars per sync \"{}\", "
+ "bytes per char \"{}\", "
+ "gpu idx \"{}\", "
+ "browser src port \"{}\", "
+ "or commit_fuzz_threshold \"{}\" "
+ "are out of range\n", rows_str, cols_str, chars_per_sync_str,
+ bytes_per_char_str, gpu_idx_str, browser_src_port_str, commit_fuzz_threshold_str);
return;
}
const int max_rows = 10;
@@ -2047,19 +2083,29 @@ void Frame::OnAppStart(wxCommandEvent& event) { const int max_gpu_idx = 10;
const int min_browser_src_port = 1024;
const int max_browser_src_port = 65535;
+ const int min_commit_fuzz_threshold = 0;
+ const int max_commit_fuzz_threshold = 100;
if (rows < 0 || rows > max_rows ||
cols < 0 || cols > max_cols ||
gpu_idx < min_gpu_idx || gpu_idx > max_gpu_idx ||
- browser_src_port < min_browser_src_port || browser_src_port > max_browser_src_port) {
+ browser_src_port < min_browser_src_port || browser_src_port > max_browser_src_port ||
+ commit_fuzz_threshold < min_commit_fuzz_threshold || commit_fuzz_threshold > max_commit_fuzz_threshold) {
Log(transcribe_out_, "Rows not on [{},{}] or cols not on [{},{}] or "
- "gpu_idx not on [{}, {}] or browser src port not on [{}, {}]\n",
+ "gpu_idx not on [{}, {}] or "
+ "browser src port not on [{}, {}] or "
+ "commit_fuzz_threshold not on [{}, {}] "
+ "\n",
0, max_rows,
0, max_cols,
min_gpu_idx, max_gpu_idx,
- min_browser_src_port, max_browser_src_port);
+ min_browser_src_port, max_browser_src_port,
+ min_commit_fuzz_threshold, max_commit_fuzz_threshold);
return;
}
+ Log(transcribe_out_, "Commit fuzz threshold str: {}\n", commit_fuzz_threshold_str);
+ Log(transcribe_out_, "Commit fuzz threshold: {}\n", commit_fuzz_threshold);
+
app_c_->microphone = kMicChoices[which_mic].ToStdString();
app_c_->language = kLangChoices[which_lang].ToStdString();
app_c_->language_target = kLangTargetChoices[which_translate_target].ToStdString();
@@ -2073,6 +2119,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->enable_local_beep = enable_local_beep;
app_c_->enable_browser_src = enable_browser_src;
app_c_->browser_src_port = browser_src_port;
+ app_c_->commit_fuzz_threshold = commit_fuzz_threshold;
app_c_->use_cpu = use_cpu;
app_c_->use_builtin = use_builtin;
app_c_->enable_uwu_filter = enable_uwu_filter;
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 1856e7d..7afc005 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -40,6 +40,7 @@ private: wxTextCtrl* py_app_gpu_idx_;
wxTextCtrl* py_app_keybind_;
wxTextCtrl* py_app_browser_src_port_;
+ wxTextCtrl* py_app_commit_fuzz_threshold_;
wxTextCtrl* unity_rows_;
wxTextCtrl* unity_cols_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 1402ed5..e6f10c2 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -497,6 +497,7 @@ std::future<bool> PythonWrapper::StartApp( "--gpu_idx", std::to_string(config.gpu_idx), "--keybind", Quote(config.keybind), "--reset_on_toggle", config.reset_on_toggle ? "1" : "0", + "--commit_fuzz_threshold", std::to_string(config.commit_fuzz_threshold), }, std::move(out_cb), std::move(in_cb), |
