From 6638993e313773ba6ca8bdb6d7690b798d41f0d4 Mon Sep 17 00:00:00 2001 From: yum Date: Tue, 27 Jun 2023 16:01:16 -0700 Subject: Add UI for fuzzy commit threshold Recap: In the STT there's an algorithm that tries to determine when a transcript is "stable" enough to commit. If that is too loose, then accuracy suffers; if too strict, then the audio buffer eventually fills. To mitigate the problem, I check whether the last N transcripts are within some edit distance (Levenshtein edit distance) of each other. The fuzzy matching lets us forgive small instabilities, like differences in uppercase/lowercase or punctuation, while rejecting large instabilities. The default value of 8 seems to be in the sweet spot of accuracy & performance, but it will likely be tuned in the future. --- GUI/GUI/GUI/Config.cpp | 3 ++ GUI/GUI/GUI/Config.h | 1 + GUI/GUI/GUI/Frame.cpp | 69 ++++++++++++++++++++++++++++++++++++------- GUI/GUI/GUI/Frame.h | 1 + GUI/GUI/GUI/PythonWrapper.cpp | 1 + 5 files changed, 64 insertions(+), 11 deletions(-) (limited to 'GUI') diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index c851983..4f730b5 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -72,6 +72,7 @@ AppConfig::AppConfig(wxTextCtrl* out) enable_local_beep(true), enable_browser_src(false), browser_src_port(8097), + commit_fuzz_threshold(8), use_cpu(false), use_builtin(false), enable_uwu_filter(false), @@ -123,6 +124,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("enable_local_beep", enable_local_beep); cm.Set("enable_browser_src", enable_browser_src); cm.Set("browser_src_port", browser_src_port); + cm.Set("commit_fuzz_threshold", commit_fuzz_threshold); cm.Set("use_cpu", use_cpu); cm.Set("use_builtin", use_builtin); cm.Set("enable_uwu_filter", enable_uwu_filter); @@ -187,6 +189,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("enable_local_beep", c.enable_local_beep); cm.Get("enable_browser_src", c.enable_browser_src); cm.Get("browser_src_port", c.browser_src_port); + cm.Get("commit_fuzz_threshold", c.commit_fuzz_threshold); cm.Get("use_cpu", c.use_cpu); cm.Get("use_builtin", c.use_builtin); cm.Get("enable_uwu_filter", c.enable_uwu_filter); diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index d71aeb4..6711d79 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -58,6 +58,7 @@ public: bool enable_local_beep; bool enable_browser_src; int browser_src_port; + int commit_fuzz_threshold; bool use_cpu; bool use_builtin; bool enable_uwu_filter; diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 8d4c868..1df60e7 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -53,6 +53,7 @@ namespace { ID_PY_APP_GPU_IDX, ID_PY_APP_KEYBIND, ID_PY_APP_BROWSER_SRC_PORT, + ID_PY_APP_COMMIT_FUZZ_THRESHOLD, ID_UNITY_PANEL, ID_UNITY_CONFIG_PANEL, ID_UNITY_OUT, @@ -721,6 +722,20 @@ Frame::Frame() "value you configure here."); py_app_browser_src_port_ = py_app_browser_src_port; + auto* py_app_commit_fuzz_threshold = new wxTextCtrl( + py_app_config_panel_pairs, ID_PY_APP_COMMIT_FUZZ_THRESHOLD, + std::to_string(app_c_->commit_fuzz_threshold), wxDefaultPosition, + wxDefaultSize, /*style=*/0); + py_app_commit_fuzz_threshold->SetToolTip( + "The transcription app requires subsequent " + "transcripts to be within this edit distance of each " + "other before it commits them. Higher values make " + "transcripts commit more easily, making the app " + "faster but less accurate. Lower values make " + "transcripts commit less easily, making the app " + "slower but more accurate."); + py_app_commit_fuzz_threshold_ = py_app_commit_fuzz_threshold; + auto* sizer = new wxFlexGridSizer(/*cols=*/2); py_app_config_panel_pairs->SetSizer(sizer); @@ -784,6 +799,11 @@ Frame::Frame() sizer->Add(py_app_gpu_idx, /*proportion=*/0, /*flags=*/wxEXPAND); + sizer->Add(new wxStaticText(py_app_config_panel_pairs, + wxID_ANY, /*label=*/"Commit similarity threshold:")); + sizer->Add(py_app_commit_fuzz_threshold, /*proportion=*/0, + /*flags=*/wxEXPAND); + sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Browser source port:")); sizer->Add(py_app_browser_src_port, /*proportion=*/0, @@ -1380,6 +1400,10 @@ void Frame::ApplyConfigToInputFields() py_app_desktop_browser_src_port->Clear(); py_app_desktop_browser_src_port->AppendText(std::to_string(app_c_->browser_src_port)); + auto* py_app_desktop_commit_fuzz_threshold = static_cast(FindWindowById(ID_PY_APP_COMMIT_FUZZ_THRESHOLD)); + py_app_desktop_commit_fuzz_threshold->Clear(); + py_app_desktop_commit_fuzz_threshold->AppendText(std::to_string(app_c_->commit_fuzz_threshold)); + auto* py_app_rows = static_cast(FindWindowById(ID_PY_APP_ROWS)); py_app_rows->Clear(); py_app_rows->AppendText(std::to_string(app_c_->rows)); @@ -2017,7 +2041,9 @@ void Frame::OnAppStart(wxCommandEvent& event) { py_app_keybind_->GetValue().ToStdString(); std::string browser_src_port_str = py_app_browser_src_port_->GetValue().ToStdString(); - int rows, cols, chars_per_sync, bytes_per_char, gpu_idx, browser_src_port; + std::string commit_fuzz_threshold_str = + py_app_commit_fuzz_threshold_->GetValue().ToStdString(); + int rows, cols, chars_per_sync, bytes_per_char, gpu_idx, browser_src_port, commit_fuzz_threshold; try { rows = std::stoi(rows_str); cols = std::stoi(cols_str); @@ -2025,20 +2051,30 @@ void Frame::OnAppStart(wxCommandEvent& event) { bytes_per_char = std::stoi(bytes_per_char_str); gpu_idx = std::stoi(gpu_idx_str); browser_src_port = std::stoi(browser_src_port_str); + commit_fuzz_threshold = std::stoi(commit_fuzz_threshold_str); } catch (const std::invalid_argument&) { Log(transcribe_out_, "Could not parse rows \"{}\", cols \"{}\", chars " - "per sync \"{}\", bytes per char \"{}\", " - "gpu_idx \"{}\", or browser src port \"{}\"" + "per sync \"{}\", " + "bytes per char \"{}\", " + "gpu_idx \"{}\", " + "browser src port \"{}\"", "" + "or commit_fuzz_threshold \"{}\"" "as an integer\n", rows_str, cols_str, chars_per_sync_str, - bytes_per_char_str, gpu_idx_str, browser_src_port_str); + bytes_per_char_str, gpu_idx_str, browser_src_port_str, + commit_fuzz_threshold_str); return; } catch (const std::out_of_range&) { - Log(transcribe_out_, "Rows \"{}\", cols \"{}\", chars per sync " - "\"{}\", bytes per char \"{}\", gpu idx \"{}\", or browser src " - "port \"{}\" are out of range\n", rows_str, cols_str, chars_per_sync_str, - bytes_per_char_str, gpu_idx, browser_src_port_str); + Log(transcribe_out_, "Rows \"{}\", " + "cols \"{}\", " + "chars per sync \"{}\", " + "bytes per char \"{}\", " + "gpu idx \"{}\", " + "browser src port \"{}\", " + "or commit_fuzz_threshold \"{}\" " + "are out of range\n", rows_str, cols_str, chars_per_sync_str, + bytes_per_char_str, gpu_idx_str, browser_src_port_str, commit_fuzz_threshold_str); return; } const int max_rows = 10; @@ -2047,19 +2083,29 @@ void Frame::OnAppStart(wxCommandEvent& event) { const int max_gpu_idx = 10; const int min_browser_src_port = 1024; const int max_browser_src_port = 65535; + const int min_commit_fuzz_threshold = 0; + const int max_commit_fuzz_threshold = 100; if (rows < 0 || rows > max_rows || cols < 0 || cols > max_cols || gpu_idx < min_gpu_idx || gpu_idx > max_gpu_idx || - browser_src_port < min_browser_src_port || browser_src_port > max_browser_src_port) { + browser_src_port < min_browser_src_port || browser_src_port > max_browser_src_port || + commit_fuzz_threshold < min_commit_fuzz_threshold || commit_fuzz_threshold > max_commit_fuzz_threshold) { Log(transcribe_out_, "Rows not on [{},{}] or cols not on [{},{}] or " - "gpu_idx not on [{}, {}] or browser src port not on [{}, {}]\n", + "gpu_idx not on [{}, {}] or " + "browser src port not on [{}, {}] or " + "commit_fuzz_threshold not on [{}, {}] " + "\n", 0, max_rows, 0, max_cols, min_gpu_idx, max_gpu_idx, - min_browser_src_port, max_browser_src_port); + min_browser_src_port, max_browser_src_port, + min_commit_fuzz_threshold, max_commit_fuzz_threshold); return; } + Log(transcribe_out_, "Commit fuzz threshold str: {}\n", commit_fuzz_threshold_str); + Log(transcribe_out_, "Commit fuzz threshold: {}\n", commit_fuzz_threshold); + app_c_->microphone = kMicChoices[which_mic].ToStdString(); app_c_->language = kLangChoices[which_lang].ToStdString(); app_c_->language_target = kLangTargetChoices[which_translate_target].ToStdString(); @@ -2073,6 +2119,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->enable_local_beep = enable_local_beep; app_c_->enable_browser_src = enable_browser_src; app_c_->browser_src_port = browser_src_port; + app_c_->commit_fuzz_threshold = commit_fuzz_threshold; app_c_->use_cpu = use_cpu; app_c_->use_builtin = use_builtin; app_c_->enable_uwu_filter = enable_uwu_filter; diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 1856e7d..7afc005 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -40,6 +40,7 @@ private: wxTextCtrl* py_app_gpu_idx_; wxTextCtrl* py_app_keybind_; wxTextCtrl* py_app_browser_src_port_; + wxTextCtrl* py_app_commit_fuzz_threshold_; wxTextCtrl* unity_rows_; wxTextCtrl* unity_cols_; diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 1402ed5..e6f10c2 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -497,6 +497,7 @@ std::future PythonWrapper::StartApp( "--gpu_idx", std::to_string(config.gpu_idx), "--keybind", Quote(config.keybind), "--reset_on_toggle", config.reset_on_toggle ? "1" : "0", + "--commit_fuzz_threshold", std::to_string(config.commit_fuzz_threshold), }, std::move(out_cb), std::move(in_cb), -- cgit v1.2.3