From 461714cc87549f3c0c3411bfe95f3936aca60f74 Mon Sep 17 00:00:00 2001 From: yum Date: Fri, 6 Jan 2023 12:31:13 -0800 Subject: GUI: Persist transcription app config The configuration of the transcription app, such as the number of rows and columns in the text box, now persists across app restarts. I found that I would have to change from the defaults to my preferred config every time I started up in VR, which was annoying. Now we just start with the config that was set last time. * Add dependency on rapidyaml (MIT) * Serialize transcription config to file under Resources/ * Add Config class to wrap serializing/deserializing * Update build instructions * Simplify StartApp() API, taking Config struct instead of a ton of arguments --- GUI/GUI/GUI/.gitignore | 2 + GUI/GUI/GUI/Config.cpp | 105 ++++++++++++++++++++++++++++++++++++++++ GUI/GUI/GUI/Config.h | 26 ++++++++++ GUI/GUI/GUI/Frame.cpp | 69 ++++++++++++++++++-------- GUI/GUI/GUI/GUI.vcxproj | 3 ++ GUI/GUI/GUI/GUI.vcxproj.filters | 9 ++++ GUI/GUI/GUI/PythonWrapper.cpp | 27 +++++------ GUI/GUI/GUI/PythonWrapper.h | 8 ++- GUI/Libraries/.gitignore | 1 + GUI/Libraries/fetch.ps1 | 18 ++++++- GUI/README.md | 21 ++++---- 11 files changed, 238 insertions(+), 51 deletions(-) create mode 100644 GUI/GUI/GUI/Config.cpp create mode 100644 GUI/GUI/GUI/Config.h (limited to 'GUI') diff --git a/GUI/GUI/GUI/.gitignore b/GUI/GUI/GUI/.gitignore index 41aaad2..e843fdb 100644 --- a/GUI/GUI/GUI/.gitignore +++ b/GUI/GUI/GUI/.gitignore @@ -3,3 +3,5 @@ x64 x86 # No .rc generated files GUI.APS +# No fetched files +ryml.h diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp new file mode 100644 index 0000000..0436f3b --- /dev/null +++ b/GUI/GUI/GUI/Config.cpp @@ -0,0 +1,105 @@ +#include + +#ifndef WX_PRECOMP +#include +#endif + +#include "Config.h" + +#define RYML_SINGLE_HDR_DEFINE_NOW +#include "ryml.h" + +#include +#include +#include + +TranscriptionAppConfig::TranscriptionAppConfig() + : microphone("index"), + language("english"), + model("base.en"), + chars_per_sync("20"), + bytes_per_char("1"), + rows("4"), + cols("48"), + window_duration("15"), + enable_local_beep(true), + use_cpu(false) +{} + +bool TranscriptionAppConfig::Serialize(const std::filesystem::path& path) { + ryml::Tree t; + ryml::NodeRef root = t.rootref(); + root |= ryml::MAP; + root["microphone"] << ryml::to_substr(microphone); + root["language"] << ryml::to_substr(language); + root["model"] << ryml::to_substr(model); + root["chars_per_sync"] << ryml::to_substr(chars_per_sync); + root["bytes_per_char"] << ryml::to_substr(bytes_per_char); + root["rows"] << ryml::to_substr(rows); + root["cols"] << ryml::to_substr(cols); + root["window_duration"] << ryml::to_substr(window_duration); + root["enable_local_beep"] << enable_local_beep; + root["use_cpu"] << use_cpu; + + // Write the config to a tmp file. If we crash in the middle of this, it + // doesn't matter, since the next process will just overwrite it. + std::filesystem::path tmp_path = path; + tmp_path += ".tmp"; + FILE* fp = fopen(tmp_path.string().c_str(), "wb"); + if (!fp) { + wxLogError("Failed to open %s: %s", path.string().c_str(), strerror(errno)); + return false; + } + ryml::emit_yaml(t, fp); // For now we assume this didn't fail. + fclose(fp); + fp = nullptr; + + // If there's an old config, delete it. + struct stat tmpstat; + if (stat(path.string().c_str(), &tmpstat) == 0) { + if (::_unlink(path.string().c_str())) { + wxLogError("Failed to delete old config at %s: %s", path.string().c_str(), + strerror(errno)); + return false; + } + } + + // File renames within the same filesystem are atomic, so there's no risk + // of leaving a corrupt file on disk. + if (rename(tmp_path.string().c_str(), path.string().c_str()) != 0) { + wxLogError("Failed to save config to %s: %s", path.string().c_str(), + strerror(errno)); + return false; + } + + return true; +} + +bool TranscriptionAppConfig::Deserialize(const std::filesystem::path& path) { + std::ifstream file(path, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return false; + } + std::streamsize size = file.tellg(); + file.seekg(0, std::ios::beg); + std::vector yaml_buf(size); + if (!file.read(yaml_buf.data(), size)) { + return false; + } + + ryml::Tree t = ryml::parse_in_place(ryml::to_substr(yaml_buf.data())); + ryml::ConstNodeRef root = t.rootref(); + TranscriptionAppConfig c; + root["microphone"] >> c.microphone; + root["language"] >> c.language; + root["model"] >> c.model; + root["chars_per_sync"] >> c.chars_per_sync; + root["bytes_per_char"] >> c.bytes_per_char; + root["rows"] >> c.rows; + root["cols"] >> c.cols; + root["window_duration"] >> c.window_duration; + root["enable_local_beep"] >> c.enable_local_beep; + + *this = std::move(c); + return true; +} diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h new file mode 100644 index 0000000..e142773 --- /dev/null +++ b/GUI/GUI/GUI/Config.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +class TranscriptionAppConfig { +public: + TranscriptionAppConfig(); + + bool Serialize(const std::filesystem::path& path); + + bool Deserialize(const std::filesystem::path& path); + + // The default path at which configs are serialized. + static constexpr char kConfigPath[] = "Resources/transcription_app_config.yml"; + + std::string microphone; + std::string language; + std::string model; + std::string chars_per_sync; + std::string bytes_per_char; + std::string rows; + std::string cols; + std::string window_duration; + bool enable_local_beep; + bool use_cpu; +}; diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 5fb8dd9..6c1f356 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -2,6 +2,8 @@ #include "Logging.h" #include "PythonWrapper.h" +#include "Config.h" + #include #include #include @@ -226,6 +228,20 @@ namespace { const size_t kNumBytesPerChar = sizeof(kBytesPerChar) / sizeof(kBytesPerChar[0]); // Sorry international users. Optimize for English speakers, by default. constexpr int kBytesDefault = 0; + + // Given the string value of a dropdown menu's entry, find its index. If no + // entry matches, return `default_index`. + int GetDropdownChoiceIndex(const wxString menu[], + const size_t num_menu_entries, const std::string& entry, + const int default_index) { + for (int i = 0; i < num_menu_entries; i++) { + if (entry == menu[i]) { + return i; + } + } + return default_index; + } + } // namespace using ::Logging::Log; @@ -235,6 +251,9 @@ Frame::Frame() py_app_(nullptr), py_app_drain_(this, ID_PY_APP_DRAIN) { + TranscriptionAppConfig c; + c.Deserialize(TranscriptionAppConfig::kConfigPath); + auto* main_panel = new wxPanel(this, ID_MAIN_PANEL); main_panel_ = main_panel; { @@ -282,7 +301,8 @@ Frame::Frame() { auto* py_app_mic = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_MIC, wxDefaultPosition, wxDefaultSize, kNumMicChoices, kMicChoices); - py_app_mic->SetSelection(kMicDefault); + int mic_idx = GetDropdownChoiceIndex(kMicChoices, kNumMicChoices, c.microphone, kMicDefault); + py_app_mic->SetSelection(mic_idx); py_app_mic->SetToolTip( "Select which microphone to listen to when " "transcribing. To get list microphones and get their " @@ -291,7 +311,8 @@ Frame::Frame() auto* py_app_lang = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_LANG, wxDefaultPosition, wxDefaultSize, kNumLangChoices, kLangChoices); - py_app_lang->SetSelection(kLangDefault); + int lang_idx = GetDropdownChoiceIndex(kLangChoices, kNumLangChoices, c.language, kLangDefault); + py_app_lang->SetSelection(lang_idx); py_app_lang->SetToolTip("Select which language you will " "speak in. It will be transcribed into that language. " "If using a language with non-ASCII characters (i.e. " @@ -302,7 +323,8 @@ Frame::Frame() auto* py_app_model = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_MODEL, wxDefaultPosition, wxDefaultSize, kNumModelChoices, kModelChoices); - py_app_model->SetSelection(kModelDefault); + int model_idx = GetDropdownChoiceIndex(kModelChoices, kNumModelChoices, c.model, kModelDefault); + py_app_model->SetSelection(model_idx); py_app_model->SetToolTip("Select which version of " "the transcription model to use. 'base' is a good " "choice for most users. 'small' is slightly more " @@ -314,7 +336,8 @@ Frame::Frame() auto* py_app_chars_per_sync = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_CHARS_PER_SYNC, wxDefaultPosition, wxDefaultSize, kNumCharsPerSync, kCharsPerSync); - py_app_chars_per_sync->SetSelection(kCharsDefault); + int chars_idx = GetDropdownChoiceIndex(kCharsPerSync, kNumCharsPerSync, c.chars_per_sync, kCharsDefault); + py_app_chars_per_sync->SetSelection(chars_idx); py_app_chars_per_sync->SetToolTip( "VRChat syncs avatar parameters roughly 5 times per " "second. We use this to send text to the box. By " @@ -325,28 +348,29 @@ Frame::Frame() auto* py_app_bytes_per_char = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_BYTES_PER_CHAR, wxDefaultPosition, wxDefaultSize, kNumBytesPerChar, kBytesPerChar); - py_app_bytes_per_char->SetSelection(kBytesDefault); + int bytes_idx = GetDropdownChoiceIndex(kBytesPerChar, kNumBytesPerChar, c.bytes_per_char, kBytesDefault); + py_app_bytes_per_char->SetSelection(bytes_idx); py_app_bytes_per_char->SetToolTip( "If you speak a language that uses non-ASCII " "characters (i.e. not English), set this to 2."); py_app_bytes_per_char_ = py_app_bytes_per_char; auto* py_app_rows = new wxTextCtrl(py_app_config_panel_pairs, - ID_PY_APP_ROWS, /*value=*/"4", + ID_PY_APP_ROWS, c.rows, wxDefaultPosition, wxDefaultSize, /*style=*/0); py_app_rows->SetToolTip( "The number of rows on the text box."); py_app_rows_ = py_app_rows; auto* py_app_cols = new wxTextCtrl(py_app_config_panel_pairs, - ID_PY_APP_COLS, /*value=*/"48", + ID_PY_APP_COLS, c.cols, wxDefaultPosition, wxDefaultSize, /*style=*/0); py_app_cols->SetToolTip( "The number of columns on the text box."); py_app_cols_ = py_app_cols; auto* py_app_window_duration = new wxTextCtrl(py_app_config_panel_pairs, - ID_PY_APP_WINDOW_DURATION, /*value=*/"15", + ID_PY_APP_WINDOW_DURATION, c.window_duration, wxDefaultPosition, wxDefaultSize, /*style=*/0); py_app_window_duration->SetToolTip( "This controls how long the slice of audio that " @@ -387,7 +411,7 @@ Frame::Frame() auto* py_app_enable_local_beep = new wxCheckBox(py_config_panel, ID_PY_APP_ENABLE_LOCAL_BEEP, "Enable local beep"); - py_app_enable_local_beep->SetValue(true); + py_app_enable_local_beep->SetValue(c.enable_local_beep); py_app_enable_local_beep->SetToolTip( "By default, TaSTT will play a sound (audible only to " "you) when it begins transcription and when it stops. " @@ -397,7 +421,7 @@ Frame::Frame() auto* py_app_use_cpu = new wxCheckBox(py_config_panel, ID_PY_APP_USE_CPU, "Use CPU"); - py_app_use_cpu->SetValue(false); + py_app_use_cpu->SetValue(c.use_cpu); py_app_use_cpu->SetToolTip( "If checked, the transcription engine will run on your " "CPU instead of your GPU. This is typically much slower " @@ -928,17 +952,20 @@ void Frame::OnAppStart(wxCommandEvent& event) { return; } - wxProcess* p = PythonWrapper::StartApp(std::move(cb), - kMicChoices[which_mic].ToStdString(), - kLangChoices[which_lang].ToStdString(), - kModelChoices[which_model].ToStdString(), - kCharsPerSync[chars_per_sync_idx].ToStdString(), - kBytesPerChar[bytes_per_char_idx].ToStdString(), - rows, - cols, - window_duration, - enable_local_beep, - use_cpu); + TranscriptionAppConfig c; + c.microphone = kMicChoices[which_mic].ToStdString(); + c.language = kLangChoices[which_lang].ToStdString(); + c.model = kModelChoices[which_model].ToStdString(); + c.chars_per_sync = kCharsPerSync[chars_per_sync_idx].ToStdString(); + c.bytes_per_char = kBytesPerChar[bytes_per_char_idx].ToStdString(); + c.rows = std::to_string(rows); + c.cols = std::to_string(cols); + c.window_duration = std::to_string(window_duration); + c.enable_local_beep = enable_local_beep; + c.use_cpu = use_cpu; + c.Serialize(TranscriptionAppConfig::kConfigPath); + + wxProcess* p = PythonWrapper::StartApp(std::move(cb), c); if (!p) { Log(transcribe_out_, "Failed to launch transcription engine\n"); return; diff --git a/GUI/GUI/GUI/GUI.vcxproj b/GUI/GUI/GUI/GUI.vcxproj index 976855d..cbe3a92 100644 --- a/GUI/GUI/GUI/GUI.vcxproj +++ b/GUI/GUI/GUI/GUI.vcxproj @@ -140,6 +140,7 @@ + @@ -147,10 +148,12 @@ + + diff --git a/GUI/GUI/GUI/GUI.vcxproj.filters b/GUI/GUI/GUI/GUI.vcxproj.filters index 348026a..3fa31c7 100644 --- a/GUI/GUI/GUI/GUI.vcxproj.filters +++ b/GUI/GUI/GUI/GUI.vcxproj.filters @@ -30,6 +30,9 @@ Source Files + + Source Files + @@ -50,6 +53,12 @@ Header Files + + Header Files + + + Header Files + diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index a38ee4a..0c43fa4 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -1,6 +1,8 @@ #include "Logging.h" #include "PythonWrapper.h" +#include "Config.h" + #include #include @@ -142,23 +144,20 @@ bool PythonWrapper::InstallPip(std::string* out) { wxProcess* PythonWrapper::StartApp( std::function&& exit_callback, - const std::string& mic, const std::string& lang, const std::string& model, - const std::string& chars_per_sync, const std::string& bytes_per_char, - int rows, int cols, int window_duration_s, bool enable_local_beep, - bool use_cpu) { + const TranscriptionAppConfig& config) { return InvokeAsyncWithArgs({ "-u", "Resources/Scripts/transcribe.py", - "--mic", mic, - "--lang", lang, - "--model", model, - "--chars_per_sync", chars_per_sync, - "--bytes_per_char", bytes_per_char, - "--enable_local_beep", enable_local_beep ? "1" : "0", - "--rows", std::to_string(rows), - "--cols", std::to_string(cols), - "--window_duration_s", std::to_string(window_duration_s), - "--cpu", use_cpu ? "1" : "0", + "--mic", config.microphone, + "--lang", config.language, + "--model", config.model, + "--chars_per_sync", config.chars_per_sync, + "--bytes_per_char", config.bytes_per_char, + "--enable_local_beep", config.enable_local_beep ? "1" : "0", + "--rows", config.rows, + "--cols", config.cols, + "--window_duration_s", config.window_duration, + "--cpu", config.use_cpu ? "1" : "0", }, std::move(exit_callback)); } diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h index fed8e7b..38b35d4 100644 --- a/GUI/GUI/GUI/PythonWrapper.h +++ b/GUI/GUI/GUI/PythonWrapper.h @@ -8,6 +8,8 @@ #include +#include "Config.h" + #include #include #include @@ -50,11 +52,7 @@ namespace PythonWrapper // app restarts. wxProcess* StartApp( std::function&& exit_callback, - const std::string& mic, const std::string& lang, const std::string& model, - const std::string& chars_per_sync, const std::string& bytes_per_char, - int rows, int cols, int window_duration_s, bool enable_local_beep, - bool use_cpu - ); + const TranscriptionAppConfig& config); bool GenerateAnimator( const std::filesystem::path& unity_assets_path, diff --git a/GUI/Libraries/.gitignore b/GUI/Libraries/.gitignore index 214bffb..fb46029 100644 --- a/GUI/Libraries/.gitignore +++ b/GUI/Libraries/.gitignore @@ -1,3 +1,4 @@ # Don't check in anything we fetch wx +rapidyaml diff --git a/GUI/Libraries/fetch.ps1 b/GUI/Libraries/fetch.ps1 index 5f8d63e..78bf0d5 100644 --- a/GUI/Libraries/fetch.ps1 +++ b/GUI/Libraries/fetch.ps1 @@ -6,6 +6,7 @@ $WX_FILE = $(Split-Path -Path $WX_URL -Leaf) pushd $PSScriptRoot +# WX if (Test-Path wx) { rm -Recurse wx } @@ -16,5 +17,20 @@ Invoke-WebRequest $WX_URL -OutFile $WX_FILE Expand-Archive $WX_FILE -DestinationPath . popd > $null -popd > $null +# RAPIDYAML +if (Test-Path rapidyaml) { + rm -Recurse rapidyaml +} + +git clone https://github.com/biojppm/rapidyaml +pushd rapidyaml > $null +git checkout v0.5.0 +git submodule update --init --recursive + +python3 tools/amalgamate.py ryml.h +cp ryml.h ../../GUI/GUI/ryml.h + +popd > $null # rapidyaml + +popd > $null # $PSScriptRoot diff --git a/GUI/README.md b/GUI/README.md index 15862b7..d2fa999 100644 --- a/GUI/README.md +++ b/GUI/README.md @@ -1,23 +1,24 @@ ## Build instructions -0. Open Powershell. -1. Make sure you've downloaded submodules: +0. Install build dependencies: git, python3, Visual Studio 2022 +1. Open Powershell. +2. Make sure you've downloaded submodules: ``` $ git submodule init $ git submodule update ``` -2. Execute Libraries/fetch.ps1. -3. Open Libraries/wx/build/msw/wx\_vc17.sln with Visual Studio 2022. -4. Select every project in the Solution Explorer except for _custom_build. -5. Right click, select Properties, go to C/C++, Code Generation, and set +3. Execute Libraries/fetch.ps1. +4. Open Libraries/wx/build/msw/wx\_vc17.sln with Visual Studio 2022. +5. Select every project in the Solution Explorer except for _custom_build. +6. Right click, select Properties, go to C/C++, Code Generation, and set Runtime Library to Multi-threaded (/MT). Make sure this applies to the configuration x64/Release. -6. Build x64/Release. +7. Build x64/Release. 1. The build configuration is in the top. By default it's probably Debug/x64. 2. To build: ctrl+shift+B -7. Open GUI/GUI.sln with Visual Studio 2022. -8. Build x64/Release. -9. Run package.ps1 from powershell. +8. Open GUI/GUI.sln with Visual Studio 2022. +9. Build x64/Release. +10. Run package.ps1 from powershell. ## High level design -- cgit v1.2.3