From 0bda49279ec80187d49a922ff2a47141ffb2fd8f Mon Sep 17 00:00:00 2001 From: yum Date: Tue, 30 May 2023 19:01:56 -0700 Subject: Finish translation for Western European language speakers NLLB needs its input to be split up into sentences. I use the sentence_splitter Python package to do this. It supports ~20 Western European languages, but notably, no Asian languages. * Sort spoken language list. English is still at the top. * Remove 'Translation source' dropdown. Infer this from the spoken language. * Add lang_compat.py to map language codes between the various libraries (whisper, nllb, sentence_splitter). * Fix bug where old text would appear in textbox when you first bring it up. --- GUI/GUI/GUI/Config.cpp | 3 - GUI/GUI/GUI/Config.h | 1 - GUI/GUI/GUI/Frame.cpp | 235 +++++++++++++++++++----------------------- GUI/GUI/GUI/Frame.h | 1 - GUI/GUI/GUI/Logging.h | 3 +- GUI/GUI/GUI/PythonWrapper.cpp | 1 - 6 files changed, 108 insertions(+), 136 deletions(-) (limited to 'GUI') diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index bf0c1c6..2bf17cf 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -64,7 +64,6 @@ AppConfig::AppConfig(wxTextCtrl* out) microphone("index"), language("english"), - language_source("Do not translate"), language_target("Do not translate"), model("base.en"), model_translation("nllb-200-distilled-600M"), @@ -115,7 +114,6 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("microphone", microphone); cm.Set("language", language); - cm.Set("language_source", language_source); cm.Set("language_target", language_target); cm.Set("model", model); cm.Set("model_translation", model_translation); @@ -179,7 +177,6 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { AppConfig c(out_); cm.Get("microphone", c.microphone); cm.Get("language", c.language); - cm.Get("language_source", c.language_source); cm.Get("language_target", c.language_target); cm.Get("model", c.model); cm.Get("model_translation", c.model_translation); diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index bc942d4..bf714bc 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -50,7 +50,6 @@ public: // Transcription-specific settings. std::string microphone; std::string language; - std::string language_source; std::string language_target; std::string model; std::string model_translation; diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 8cebc2d..a2b128a 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -30,7 +30,6 @@ namespace { ID_PY_APP_MIC, ID_PY_APP_MIC_PANEL, ID_PY_APP_LANG, - ID_PY_APP_TRANSLATE_SOURCE, ID_PY_APP_TRANSLATE_TARGET, ID_PY_APP_LANG_PANEL, ID_PY_APP_MODEL, @@ -130,107 +129,107 @@ namespace { constexpr int kMicDefault = 0; // index // lifted from whisper/tokenizer.py - const wxString kLangChoices[] = { - "english", - "chinese", - "german", - "spanish", - "russian", - "korean", - "french", - "japanese", - "portuguese", - "turkish", - "polish", - "catalan", - "dutch", - "arabic", - "swedish", - "italian", - "indonesian", - "hindi", - "finnish", - "vietnamese", - "hebrew", - "ukrainian", - "greek", - "malay", - "czech", - "romanian", - "danish", - "hungarian", - "tamil", - "norwegian", - "thai", - "urdu", - "croatian", - "bulgarian", - "lithuanian", - "latin", - "maori", - "malayalam", - "welsh", - "slovak", - "telugu", - "persian", - "latvian", - "bengali", - "serbian", - "azerbaijani", - "slovenian", - "kannada", - "estonian", - "macedonian", - "breton", - "basque", - "icelandic", - "armenian", - "nepali", - "mongolian", - "bosnian", - "kazakh", - "albanian", - "swahili", - "galician", - "marathi", - "punjabi", - "sinhala", - "khmer", - "shona", - "yoruba", - "somali", - "afrikaans", - "occitan", - "georgian", - "belarusian", - "tajik", - "sindhi", - "gujarati", - "amharic", - "yiddish", - "lao", - "uzbek", - "faroese", - "haitian creole", - "pashto", - "turkmen", - "nynorsk", - "maltese", - "sanskrit", - "luxembourgish", - "myanmar", - "tibetan", - "tagalog", - "malagasy", - "assamese", - "tatar", - "hawaiian", - "lingala", - "hausa", - "bashkir", - "javanese", - "sundanese" - }; + const wxString kLangChoices[] = { + "english", + "afrikaans", + "albanian", + "amharic", + "arabic", + "armenian", + "assamese", + "azerbaijani", + "bashkir", + "basque", + "belarusian", + "bengali", + "bosnian", + "breton", + "bulgarian", + "catalan", + "chinese", + "croatian", + "czech", + "danish", + "dutch", + "estonian", + "faroese", + "finnish", + "french", + "galician", + "georgian", + "german", + "greek", + "gujarati", + "haitian creole", + "hausa", + "hawaiian", + "hebrew", + "hindi", + "hungarian", + "icelandic", + "indonesian", + "italian", + "japanese", + "javanese", + "kannada", + "kazakh", + "khmer", + "korean", + "lao", + "latin", + "latvian", + "lingala", + "lithuanian", + "luxembourgish", + "macedonian", + "malagasy", + "malay", + "malayalam", + "maltese", + "maori", + "marathi", + "mongolian", + "myanmar", + "nepali", + "norwegian", + "nynorsk", + "occitan", + "pashto", + "persian", + "polish", + "portuguese", + "punjabi", + "romanian", + "russian", + "sanskrit", + "serbian", + "shona", + "sindhi", + "sinhala", + "slovak", + "slovenian", + "somali", + "spanish", + "sundanese" + "swahili", + "swedish", + "tagalog", + "tajik", + "tamil", + "tatar", + "telugu", + "thai", + "tibetan", + "turkish", + "turkmen", + "ukrainian", + "urdu", + "uzbek", + "vietnamese", + "welsh", + "yiddish", + "yoruba", + }; const size_t kNumLangChoices = sizeof(kLangChoices) / sizeof(kLangChoices[0]); constexpr int kLangDefault = 0; // english @@ -633,17 +632,9 @@ Frame::Frame() ID_PY_APP_LANG, wxDefaultPosition, wxDefaultSize, kNumLangChoices, kLangChoices); py_app_lang->SetToolTip("Select which language you will " - "speak in. If using something other than English, " - "make sure you're not using a *.en model."); + "speak in."); py_app_lang_ = py_app_lang; - auto* py_app_translate_source = new wxChoice(py_app_config_panel_pairs, - ID_PY_APP_TRANSLATE_SOURCE, wxDefaultPosition, wxDefaultSize, - kNumLangTargetChoices, kLangTargetChoices); - py_app_translate_source->SetToolTip("Select which language to " - "translate from, in other words, the language you are transcribing into."); - py_app_translate_source_ = py_app_translate_source; - auto* py_app_translate_target = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_TRANSLATE_TARGET, wxDefaultPosition, wxDefaultSize, kNumLangTargetChoices, kLangTargetChoices); @@ -674,7 +665,8 @@ Frame::Frame() py_app_model_translation->SetToolTip("Select which " "version of the translation model to use. 600M params " "uses 4.1 GB of memory, while 1.3B uses ~7GB of " - "memory."); + "memory. If 'Translate to' is set to 'Do not " + "translate', this does nothing."); py_app_model_translation_ = py_app_model_translation; auto* py_app_chars_per_sync = new wxChoice( @@ -767,8 +759,8 @@ Frame::Frame() /*flags=*/wxEXPAND); sizer->Add(new wxStaticText(py_app_config_panel_pairs, - wxID_ANY, /*label=*/"Translate from:")); - sizer->Add(py_app_translate_source, /*proportion=*/0, + wxID_ANY, /*label=*/"Transcription model:")); + sizer->Add(py_app_model, /*proportion=*/0, /*flags=*/wxEXPAND); sizer->Add(new wxStaticText(py_app_config_panel_pairs, @@ -776,11 +768,6 @@ Frame::Frame() sizer->Add(py_app_translate_target, /*proportion=*/0, /*flags=*/wxEXPAND); - sizer->Add(new wxStaticText(py_app_config_panel_pairs, - wxID_ANY, /*label=*/"Transcription model:")); - sizer->Add(py_app_model, /*proportion=*/0, - /*flags=*/wxEXPAND); - sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Translation model:")); sizer->Add(py_app_model_translation, /*proportion=*/0, @@ -1344,11 +1331,6 @@ void Frame::ApplyConfigToInputFields() kNumLangChoices, app_c_->language, kLangDefault); py_app_lang->SetSelection(lang_idx); - auto* py_app_translate_source = static_cast(FindWindowById(ID_PY_APP_TRANSLATE_SOURCE)); - int translate_source_idx = GetDropdownChoiceIndex(kLangTargetChoices, - kNumLangTargetChoices, app_c_->language_source, kLangTargetDefault); - py_app_translate_source->SetSelection(translate_source_idx); - auto* py_app_translate_target = static_cast(FindWindowById(ID_PY_APP_TRANSLATE_TARGET)); int translate_target_idx = GetDropdownChoiceIndex(kLangTargetChoices, kNumLangTargetChoices, app_c_->language_target, kLangTargetDefault); @@ -1968,10 +1950,6 @@ void Frame::OnAppStart(wxCommandEvent& event) { if (which_lang == wxNOT_FOUND) { which_lang = kLangDefault; } - int which_translate_source = py_app_translate_source_->GetSelection(); - if (which_translate_source == wxNOT_FOUND) { - which_translate_source = kLangDefault; - } int which_translate_target = py_app_translate_target_->GetSelection(); if (which_translate_target == wxNOT_FOUND) { which_translate_target = kLangDefault; @@ -2061,7 +2039,6 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->microphone = kMicChoices[which_mic].ToStdString(); app_c_->language = kLangChoices[which_lang].ToStdString(); - app_c_->language_source = kLangTargetChoices[which_translate_source].ToStdString(); app_c_->language_target = kLangTargetChoices[which_translate_target].ToStdString(); app_c_->model = kModelChoices[which_model].ToStdString(); app_c_->model_translation = kModelTranslationChoices[which_model_translation].ToStdString(); diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 24a0594..4176e83 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -62,7 +62,6 @@ private: wxChoice* py_app_mic_; wxChoice* py_app_lang_; - wxChoice* py_app_translate_source_; wxChoice* py_app_translate_target_; wxChoice* py_app_model_; wxChoice* py_app_model_translation_; diff --git a/GUI/GUI/GUI/Logging.h b/GUI/GUI/GUI/Logging.h index c85a376..193617a 100644 --- a/GUI/GUI/GUI/Logging.h +++ b/GUI/GUI/GUI/Logging.h @@ -40,8 +40,9 @@ namespace Logging { void Log(wxTextCtrl* frame, std::string_view format, Args&&... args) { const std::string raw = std::vformat(format, std::make_format_args(args...)); const std::string masked = HidePII(std::move(raw)); + const std::string decoded = wxString::FromUTF8(masked).ToStdString(); - kThreadLogger.Append(frame, std::move(masked)); + kThreadLogger.Append(frame, std::move(decoded)); } void DrainAsyncOutput(wxProcess* proc, wxTextCtrl* frame); diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index b95d6b3..2172ac4 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -463,7 +463,6 @@ std::future PythonWrapper::StartApp( "Resources/Scripts/transcribe.py", "--mic", config.microphone, "--language", config.language, - "--language_source", Quote(config.language_source), "--language_target", Quote(config.language_target), "--model", config.model, "--model_translation", config.model_translation, -- cgit v1.2.3