From 79f1b48042cbb724892301afdee842fb33ab2b37 Mon Sep 17 00:00:00 2001 From: yum Date: Sun, 18 Dec 2022 14:46:53 -0800 Subject: GUI: Add mic, language selection Users can now select their mic & spoken language in the GUI. * pyaudio now samples at the mic rate, fixing an issue where frames would drop. We downsample in the callback by dropping frames. * add Sounds folder to package --- GUI/GUI/GUI/Frame.cpp | 174 +++++++++++++++++++++++++++++++++++++----- GUI/GUI/GUI/Frame.h | 2 + GUI/GUI/GUI/PythonWrapper.cpp | 10 ++- GUI/GUI/GUI/PythonWrapper.h | 4 +- GUI/package.ps1 | 1 + 5 files changed, 170 insertions(+), 21 deletions(-) (limited to 'GUI') diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index db2bdd7..55112db 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -13,20 +13,145 @@ namespace { ID_PY_APP_START_BUTTON, ID_PY_APP_STOP_BUTTON, ID_PY_OUT, + ID_PY_APP_MIC, + ID_PY_APP_LANG, }; -}; + + const wxString kMicChoices[] = { + "index", + "focusrite", + // ok now this is epic + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + }; + const size_t kNumMicChoices = sizeof(kMicChoices) / sizeof(kMicChoices[0]); + + // lifted from whisper/tokenizer.py + const wxString kLangChoices[] = { + "english", + "chinese", + "german", + "spanish", + "russian", + "korean", + "french", + "japanese", + "portuguese", + "turkish", + "polish", + "catalan", + "dutch", + "arabic", + "swedish", + "italian", + "indonesian", + "hindi", + "finnish", + "vietnamese", + "hebrew", + "ukrainian", + "greek", + "malay", + "czech", + "romanian", + "danish", + "hungarian", + "tamil", + "norwegian", + "thai", + "urdu", + "croatian", + "bulgarian", + "lithuanian", + "latin", + "maori", + "malayalam", + "welsh", + "slovak", + "telugu", + "persian", + "latvian", + "bengali", + "serbian", + "azerbaijani", + "slovenian", + "kannada", + "estonian", + "macedonian", + "breton", + "basque", + "icelandic", + "armenian", + "nepali", + "mongolian", + "bosnian", + "kazakh", + "albanian", + "swahili", + "galician", + "marathi", + "punjabi", + "sinhala", + "khmer", + "shona", + "yoruba", + "somali", + "afrikaans", + "occitan", + "georgian", + "belarusian", + "tajik", + "sindhi", + "gujarati", + "amharic", + "yiddish", + "lao", + "uzbek", + "faroese", + "haitian creole", + "pashto", + "turkmen", + "nynorsk", + "maltese", + "sanskrit", + "luxembourgish", + "myanmar", + "tibetan", + "tagalog", + "malagasy", + "assamese", + "tatar", + "hawaiian", + "lingala", + "hausa", + "bashkir", + "javanese", + "sundanese" + }; + const size_t kNumLangChoices = sizeof(kLangChoices) / sizeof(kLangChoices[0]); +} // namespace Frame::Frame() - : wxFrame(nullptr, wxID_ANY, "TaSTT"), - py_panel_(this, ID_PY_PANEL), - py_panel_sizer_(wxVERTICAL), - py_version_button_(&py_panel_, ID_PY_VERSION_BUTTON, "Check embedded Python version"), - py_setup_button_(&py_panel_, ID_PY_SETUP_BUTTON, "Set up Python virtual environment"), - py_app_start_button_(&py_panel_, ID_PY_APP_START_BUTTON, "Begin transcribing"), - py_app_stop_button_(&py_panel_, ID_PY_APP_STOP_BUTTON, "Stop transcribing"), - py_out_(&py_panel_, ID_PY_OUT, wxEmptyString, wxDefaultPosition, - wxSize(/*x_px=*/480, /*y_px=*/160), wxTE_MULTILINE), - py_app_(nullptr) + : wxFrame(nullptr, wxID_ANY, "TaSTT"), + py_panel_(this, ID_PY_PANEL), + py_panel_sizer_(wxVERTICAL), + py_version_button_(&py_panel_, ID_PY_VERSION_BUTTON, "Check embedded Python version"), + py_setup_button_(&py_panel_, ID_PY_SETUP_BUTTON, "Set up Python virtual environment"), + py_app_start_button_(&py_panel_, ID_PY_APP_START_BUTTON, "Begin transcribing"), + py_app_stop_button_(&py_panel_, ID_PY_APP_STOP_BUTTON, "Stop transcribing"), + py_out_(&py_panel_, ID_PY_OUT, wxEmptyString, wxDefaultPosition, + wxSize(/*x_px=*/480, /*y_px=*/160), wxTE_MULTILINE), + py_app_(nullptr), + py_app_mic_(&py_panel_, ID_PY_APP_MIC, wxDefaultPosition, wxDefaultSize, kNumMicChoices, kMicChoices), + py_app_lang_(&py_panel_, ID_PY_APP_LANG, wxDefaultPosition, wxDefaultSize, kNumLangChoices, kLangChoices) { Bind(wxEVT_MENU, &Frame::OnExit, this, wxID_EXIT); Bind(wxEVT_BUTTON, &Frame::OnGetPythonVersion, this, ID_PY_VERSION_BUTTON); @@ -41,10 +166,14 @@ Frame::Frame() wxSize py_out_size(/*x=*/80, /*y=*/20); py_out_.SetSize(py_out_size); + py_app_mic_.SetSelection(0); + py_app_lang_.SetSelection(0); py_panel_.SetSizer(&py_panel_sizer_); py_panel_sizer_.Add(&py_version_button_); py_panel_sizer_.Add(&py_setup_button_); + py_panel_sizer_.Add(&py_app_mic_); + py_panel_sizer_.Add(&py_app_lang_); py_panel_sizer_.Add(&py_app_start_button_); py_panel_sizer_.Add(&py_app_stop_button_); py_panel_sizer_.Add(&py_out_); @@ -75,7 +204,7 @@ void Frame::OnSetupPython(wxCommandEvent& event) { std::string py_out; std::ostringstream py_out_oss; - py_out_oss << "Installing pip" << std::endl; + py_out_oss << " Installing pip" << std::endl; py_out_.AppendText(py_out_oss.str()); if (!py.InstallPip(&py_out)) { std::ostringstream py_out_oss; @@ -85,22 +214,20 @@ void Frame::OnSetupPython(wxCommandEvent& event) } const std::vector pip_deps{ + "openvr", "pillow", - "pydub", "pyaudio", + "python-osc", "playsound==1.2.2", "torch --extra-index-url https://download.pytorch.org/whl/cu116", "git+https://github.com/openai/whisper.git", - "openvr", "editdistance", - "pydub", - "python-osc", }; for (const auto& pip_dep : pip_deps) { { std::ostringstream py_out_oss; - py_out_oss << "Installing " << pip_dep << std::endl; + py_out_oss << " Installing " << pip_dep << std::endl; py_out_.AppendText(py_out_oss.str()); } std::string py_out; @@ -137,7 +264,18 @@ void Frame::OnAppStart(wxCommandEvent& event) { return; }; - wxProcess* p = py.StartApp(std::move(cb)); + int which_mic = py_app_mic_.GetSelection(); + if (which_mic == wxNOT_FOUND) { + which_mic = 0; + } + int which_lang = py_app_lang_.GetSelection(); + if (which_lang == wxNOT_FOUND) { + which_lang = 0; + } + + wxProcess* p = py.StartApp(std::move(cb), + kMicChoices[which_mic].ToStdString(), + kLangChoices[which_lang].ToStdString()); if (!p) { py_out_.AppendText("Failed to launch transcription engine\n"); return; diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 414d2b3..8132cce 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -22,6 +22,8 @@ private: wxButton py_app_start_button_; wxButton py_app_stop_button_; wxTextCtrl py_out_; + wxChoice py_app_mic_; + wxChoice py_app_lang_; wxProcess* py_app_; diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 7270ab5..53fcc06 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -85,8 +85,14 @@ bool PythonWrapper::InstallPip(std::string* out) { return InvokeWithArgs({ pip_path }, out); } -wxProcess* PythonWrapper::StartApp(std::function&& exit_callback) { - return InvokeAsyncWithArgs({ "Resources/Scripts/transcribe.py" }, +wxProcess* PythonWrapper::StartApp( + std::function&& exit_callback, + const std::string& mic, const std::string& lang) { + return InvokeAsyncWithArgs({ + "Resources/Scripts/transcribe.py", + "--mic", mic, + "--lang", lang, + }, std::move(exit_callback)); } diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h index 4407b5e..0fa3c94 100644 --- a/GUI/GUI/GUI/PythonWrapper.h +++ b/GUI/GUI/GUI/PythonWrapper.h @@ -34,6 +34,8 @@ public: // Execute get-pip.py. bool InstallPip(std::string* out); - wxProcess* StartApp(std::function&& exit_callback); + wxProcess* StartApp( + std::function&& exit_callback, + const std::string& mic, const std::string& lang); }; diff --git a/GUI/package.ps1 b/GUI/package.ps1 index 0c2cec0..0346e67 100644 --- a/GUI/package.ps1 +++ b/GUI/package.ps1 @@ -9,5 +9,6 @@ mkdir $install_dir/Resources > $null cp ../Images/logo.png TaSTT/Resources cp -Recurse ../Python TaSTT/Resources/Python cp -Recurse ../Scripts TaSTT/Resources/Scripts +cp -Recurse ../Sounds TaSTT/Resources/Sounds cp GUI/x64/Release/GUI.exe TaSTT/TaSTT.exe -- cgit v1.2.3