diff options
| author | yum <yum.food.vr@gmail.com> | 2023-01-06 12:31:13 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-01-06 12:31:13 -0800 |
| commit | 461714cc87549f3c0c3411bfe95f3936aca60f74 (patch) | |
| tree | 3edb404fac0ed1ec370ed2bed0b543128d0ba925 | |
| parent | 66d311b3267620995e5c35b16f3fba18ed0c48f3 (diff) | |
GUI: Persist transcription app configv0.2
The configuration of the transcription app, such as the number of rows
and columns in the text box, now persists across app restarts. I found
that I would have to change from the defaults to my preferred config
every time I started up in VR, which was annoying. Now we just start
with the config that was set last time.
* Add dependency on rapidyaml (MIT)
* Serialize transcription config to file under Resources/
* Add Config class to wrap serializing/deserializing
* Update build instructions
* Simplify StartApp() API, taking Config struct instead of a ton of
arguments
| -rw-r--r-- | GUI/GUI/GUI/.gitignore | 2 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Config.cpp | 105 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Config.h | 26 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 69 | ||||
| -rw-r--r-- | GUI/GUI/GUI/GUI.vcxproj | 3 | ||||
| -rw-r--r-- | GUI/GUI/GUI/GUI.vcxproj.filters | 9 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.cpp | 27 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.h | 8 | ||||
| -rw-r--r-- | GUI/Libraries/.gitignore | 1 | ||||
| -rw-r--r-- | GUI/Libraries/fetch.ps1 | 18 | ||||
| -rw-r--r-- | GUI/README.md | 21 |
11 files changed, 238 insertions, 51 deletions
diff --git a/GUI/GUI/GUI/.gitignore b/GUI/GUI/GUI/.gitignore index 41aaad2..e843fdb 100644 --- a/GUI/GUI/GUI/.gitignore +++ b/GUI/GUI/GUI/.gitignore @@ -3,3 +3,5 @@ x64 x86 # No .rc generated files GUI.APS +# No fetched files +ryml.h diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp new file mode 100644 index 0000000..0436f3b --- /dev/null +++ b/GUI/GUI/GUI/Config.cpp @@ -0,0 +1,105 @@ +#include <wx/wxprec.h>
+
+#ifndef WX_PRECOMP
+#include <wx/wx.h>
+#endif
+
+#include "Config.h"
+
+#define RYML_SINGLE_HDR_DEFINE_NOW
+#include "ryml.h"
+
+#include <fstream>
+#include <memory>
+#include <string>
+
+TranscriptionAppConfig::TranscriptionAppConfig()
+ : microphone("index"),
+ language("english"),
+ model("base.en"),
+ chars_per_sync("20"),
+ bytes_per_char("1"),
+ rows("4"),
+ cols("48"),
+ window_duration("15"),
+ enable_local_beep(true),
+ use_cpu(false)
+{}
+
+bool TranscriptionAppConfig::Serialize(const std::filesystem::path& path) {
+ ryml::Tree t;
+ ryml::NodeRef root = t.rootref();
+ root |= ryml::MAP;
+ root["microphone"] << ryml::to_substr(microphone);
+ root["language"] << ryml::to_substr(language);
+ root["model"] << ryml::to_substr(model);
+ root["chars_per_sync"] << ryml::to_substr(chars_per_sync);
+ root["bytes_per_char"] << ryml::to_substr(bytes_per_char);
+ root["rows"] << ryml::to_substr(rows);
+ root["cols"] << ryml::to_substr(cols);
+ root["window_duration"] << ryml::to_substr(window_duration);
+ root["enable_local_beep"] << enable_local_beep;
+ root["use_cpu"] << use_cpu;
+
+ // Write the config to a tmp file. If we crash in the middle of this, it
+ // doesn't matter, since the next process will just overwrite it.
+ std::filesystem::path tmp_path = path;
+ tmp_path += ".tmp";
+ FILE* fp = fopen(tmp_path.string().c_str(), "wb");
+ if (!fp) {
+ wxLogError("Failed to open %s: %s", path.string().c_str(), strerror(errno));
+ return false;
+ }
+ ryml::emit_yaml(t, fp); // For now we assume this didn't fail.
+ fclose(fp);
+ fp = nullptr;
+
+ // If there's an old config, delete it.
+ struct stat tmpstat;
+ if (stat(path.string().c_str(), &tmpstat) == 0) {
+ if (::_unlink(path.string().c_str())) {
+ wxLogError("Failed to delete old config at %s: %s", path.string().c_str(),
+ strerror(errno));
+ return false;
+ }
+ }
+
+ // File renames within the same filesystem are atomic, so there's no risk
+ // of leaving a corrupt file on disk.
+ if (rename(tmp_path.string().c_str(), path.string().c_str()) != 0) {
+ wxLogError("Failed to save config to %s: %s", path.string().c_str(),
+ strerror(errno));
+ return false;
+ }
+
+ return true;
+}
+
+bool TranscriptionAppConfig::Deserialize(const std::filesystem::path& path) {
+ std::ifstream file(path, std::ios::binary | std::ios::ate);
+ if (!file.is_open()) {
+ return false;
+ }
+ std::streamsize size = file.tellg();
+ file.seekg(0, std::ios::beg);
+ std::vector<char> yaml_buf(size);
+ if (!file.read(yaml_buf.data(), size)) {
+ return false;
+ }
+
+ ryml::Tree t = ryml::parse_in_place(ryml::to_substr(yaml_buf.data()));
+ ryml::ConstNodeRef root = t.rootref();
+ TranscriptionAppConfig c;
+ root["microphone"] >> c.microphone;
+ root["language"] >> c.language;
+ root["model"] >> c.model;
+ root["chars_per_sync"] >> c.chars_per_sync;
+ root["bytes_per_char"] >> c.bytes_per_char;
+ root["rows"] >> c.rows;
+ root["cols"] >> c.cols;
+ root["window_duration"] >> c.window_duration;
+ root["enable_local_beep"] >> c.enable_local_beep;
+
+ *this = std::move(c);
+ return true;
+}
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h new file mode 100644 index 0000000..e142773 --- /dev/null +++ b/GUI/GUI/GUI/Config.h @@ -0,0 +1,26 @@ +#pragma once
+
+#include <filesystem>
+
+class TranscriptionAppConfig {
+public:
+ TranscriptionAppConfig();
+
+ bool Serialize(const std::filesystem::path& path);
+
+ bool Deserialize(const std::filesystem::path& path);
+
+ // The default path at which configs are serialized.
+ static constexpr char kConfigPath[] = "Resources/transcription_app_config.yml";
+
+ std::string microphone;
+ std::string language;
+ std::string model;
+ std::string chars_per_sync;
+ std::string bytes_per_char;
+ std::string rows;
+ std::string cols;
+ std::string window_duration;
+ bool enable_local_beep;
+ bool use_cpu;
+};
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 5fb8dd9..6c1f356 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -2,6 +2,8 @@ #include "Logging.h"
#include "PythonWrapper.h"
+#include "Config.h"
+
#include <filesystem>
#include <string>
#include <vector>
@@ -226,6 +228,20 @@ namespace { const size_t kNumBytesPerChar = sizeof(kBytesPerChar) / sizeof(kBytesPerChar[0]);
// Sorry international users. Optimize for English speakers, by default.
constexpr int kBytesDefault = 0;
+
+ // Given the string value of a dropdown menu's entry, find its index. If no
+ // entry matches, return `default_index`.
+ int GetDropdownChoiceIndex(const wxString menu[],
+ const size_t num_menu_entries, const std::string& entry,
+ const int default_index) {
+ for (int i = 0; i < num_menu_entries; i++) {
+ if (entry == menu[i]) {
+ return i;
+ }
+ }
+ return default_index;
+ }
+
} // namespace
using ::Logging::Log;
@@ -235,6 +251,9 @@ Frame::Frame() py_app_(nullptr),
py_app_drain_(this, ID_PY_APP_DRAIN)
{
+ TranscriptionAppConfig c;
+ c.Deserialize(TranscriptionAppConfig::kConfigPath);
+
auto* main_panel = new wxPanel(this, ID_MAIN_PANEL);
main_panel_ = main_panel;
{
@@ -282,7 +301,8 @@ Frame::Frame() {
auto* py_app_mic = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_MIC, wxDefaultPosition,
wxDefaultSize, kNumMicChoices, kMicChoices);
- py_app_mic->SetSelection(kMicDefault);
+ int mic_idx = GetDropdownChoiceIndex(kMicChoices, kNumMicChoices, c.microphone, kMicDefault);
+ py_app_mic->SetSelection(mic_idx);
py_app_mic->SetToolTip(
"Select which microphone to listen to when "
"transcribing. To get list microphones and get their "
@@ -291,7 +311,8 @@ Frame::Frame() auto* py_app_lang = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_LANG, wxDefaultPosition,
wxDefaultSize, kNumLangChoices, kLangChoices);
- py_app_lang->SetSelection(kLangDefault);
+ int lang_idx = GetDropdownChoiceIndex(kLangChoices, kNumLangChoices, c.language, kLangDefault);
+ py_app_lang->SetSelection(lang_idx);
py_app_lang->SetToolTip("Select which language you will "
"speak in. It will be transcribed into that language. "
"If using a language with non-ASCII characters (i.e. "
@@ -302,7 +323,8 @@ Frame::Frame() auto* py_app_model = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_MODEL, wxDefaultPosition,
wxDefaultSize, kNumModelChoices, kModelChoices);
- py_app_model->SetSelection(kModelDefault);
+ int model_idx = GetDropdownChoiceIndex(kModelChoices, kNumModelChoices, c.model, kModelDefault);
+ py_app_model->SetSelection(model_idx);
py_app_model->SetToolTip("Select which version of "
"the transcription model to use. 'base' is a good "
"choice for most users. 'small' is slightly more "
@@ -314,7 +336,8 @@ Frame::Frame() auto* py_app_chars_per_sync = new wxChoice(py_app_config_panel_pairs,
ID_PY_APP_CHARS_PER_SYNC, wxDefaultPosition,
wxDefaultSize, kNumCharsPerSync, kCharsPerSync);
- py_app_chars_per_sync->SetSelection(kCharsDefault);
+ int chars_idx = GetDropdownChoiceIndex(kCharsPerSync, kNumCharsPerSync, c.chars_per_sync, kCharsDefault);
+ py_app_chars_per_sync->SetSelection(chars_idx);
py_app_chars_per_sync->SetToolTip(
"VRChat syncs avatar parameters roughly 5 times per "
"second. We use this to send text to the box. By "
@@ -325,28 +348,29 @@ Frame::Frame() auto* py_app_bytes_per_char = new wxChoice(py_app_config_panel_pairs,
ID_PY_APP_BYTES_PER_CHAR, wxDefaultPosition,
wxDefaultSize, kNumBytesPerChar, kBytesPerChar);
- py_app_bytes_per_char->SetSelection(kBytesDefault);
+ int bytes_idx = GetDropdownChoiceIndex(kBytesPerChar, kNumBytesPerChar, c.bytes_per_char, kBytesDefault);
+ py_app_bytes_per_char->SetSelection(bytes_idx);
py_app_bytes_per_char->SetToolTip(
"If you speak a language that uses non-ASCII "
"characters (i.e. not English), set this to 2.");
py_app_bytes_per_char_ = py_app_bytes_per_char;
auto* py_app_rows = new wxTextCtrl(py_app_config_panel_pairs,
- ID_PY_APP_ROWS, /*value=*/"4",
+ ID_PY_APP_ROWS, c.rows,
wxDefaultPosition, wxDefaultSize, /*style=*/0);
py_app_rows->SetToolTip(
"The number of rows on the text box.");
py_app_rows_ = py_app_rows;
auto* py_app_cols = new wxTextCtrl(py_app_config_panel_pairs,
- ID_PY_APP_COLS, /*value=*/"48",
+ ID_PY_APP_COLS, c.cols,
wxDefaultPosition, wxDefaultSize, /*style=*/0);
py_app_cols->SetToolTip(
"The number of columns on the text box.");
py_app_cols_ = py_app_cols;
auto* py_app_window_duration = new wxTextCtrl(py_app_config_panel_pairs,
- ID_PY_APP_WINDOW_DURATION, /*value=*/"15",
+ ID_PY_APP_WINDOW_DURATION, c.window_duration,
wxDefaultPosition, wxDefaultSize, /*style=*/0);
py_app_window_duration->SetToolTip(
"This controls how long the slice of audio that "
@@ -387,7 +411,7 @@ Frame::Frame() auto* py_app_enable_local_beep = new wxCheckBox(py_config_panel,
ID_PY_APP_ENABLE_LOCAL_BEEP, "Enable local beep");
- py_app_enable_local_beep->SetValue(true);
+ py_app_enable_local_beep->SetValue(c.enable_local_beep);
py_app_enable_local_beep->SetToolTip(
"By default, TaSTT will play a sound (audible only to "
"you) when it begins transcription and when it stops. "
@@ -397,7 +421,7 @@ Frame::Frame() auto* py_app_use_cpu = new wxCheckBox(py_config_panel,
ID_PY_APP_USE_CPU, "Use CPU");
- py_app_use_cpu->SetValue(false);
+ py_app_use_cpu->SetValue(c.use_cpu);
py_app_use_cpu->SetToolTip(
"If checked, the transcription engine will run on your "
"CPU instead of your GPU. This is typically much slower "
@@ -928,17 +952,20 @@ void Frame::OnAppStart(wxCommandEvent& event) { return;
}
- wxProcess* p = PythonWrapper::StartApp(std::move(cb),
- kMicChoices[which_mic].ToStdString(),
- kLangChoices[which_lang].ToStdString(),
- kModelChoices[which_model].ToStdString(),
- kCharsPerSync[chars_per_sync_idx].ToStdString(),
- kBytesPerChar[bytes_per_char_idx].ToStdString(),
- rows,
- cols,
- window_duration,
- enable_local_beep,
- use_cpu);
+ TranscriptionAppConfig c;
+ c.microphone = kMicChoices[which_mic].ToStdString();
+ c.language = kLangChoices[which_lang].ToStdString();
+ c.model = kModelChoices[which_model].ToStdString();
+ c.chars_per_sync = kCharsPerSync[chars_per_sync_idx].ToStdString();
+ c.bytes_per_char = kBytesPerChar[bytes_per_char_idx].ToStdString();
+ c.rows = std::to_string(rows);
+ c.cols = std::to_string(cols);
+ c.window_duration = std::to_string(window_duration);
+ c.enable_local_beep = enable_local_beep;
+ c.use_cpu = use_cpu;
+ c.Serialize(TranscriptionAppConfig::kConfigPath);
+
+ wxProcess* p = PythonWrapper::StartApp(std::move(cb), c);
if (!p) {
Log(transcribe_out_, "Failed to launch transcription engine\n");
return;
diff --git a/GUI/GUI/GUI/GUI.vcxproj b/GUI/GUI/GUI/GUI.vcxproj index 976855d..cbe3a92 100644 --- a/GUI/GUI/GUI/GUI.vcxproj +++ b/GUI/GUI/GUI/GUI.vcxproj @@ -140,6 +140,7 @@ </ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="App.cpp" />
+ <ClCompile Include="Config.cpp" />
<ClCompile Include="Frame.cpp" />
<ClCompile Include="Logging.cpp" />
<ClCompile Include="main.cpp" />
@@ -147,10 +148,12 @@ </ItemGroup>
<ItemGroup>
<ClInclude Include="App.h" />
+ <ClInclude Include="Config.h" />
<ClInclude Include="Frame.h" />
<ClInclude Include="Logging.h" />
<ClInclude Include="PythonWrapper.h" />
<ClInclude Include="resource.h" />
+ <ClInclude Include="ryml.h" />
<ClInclude Include="ScopeGuard.h" />
</ItemGroup>
<ItemGroup>
diff --git a/GUI/GUI/GUI/GUI.vcxproj.filters b/GUI/GUI/GUI/GUI.vcxproj.filters index 348026a..3fa31c7 100644 --- a/GUI/GUI/GUI/GUI.vcxproj.filters +++ b/GUI/GUI/GUI/GUI.vcxproj.filters @@ -30,6 +30,9 @@ <ClCompile Include="Logging.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="Config.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="PythonWrapper.h">
@@ -50,6 +53,12 @@ <ClInclude Include="Logging.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="ryml.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="Config.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="GUI.rc">
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index a38ee4a..0c43fa4 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -1,6 +1,8 @@ #include "Logging.h" #include "PythonWrapper.h" +#include "Config.h" + #include <stdio.h> #include <filesystem> @@ -142,23 +144,20 @@ bool PythonWrapper::InstallPip(std::string* out) { wxProcess* PythonWrapper::StartApp( std::function<void(wxProcess* proc, int ret)>&& exit_callback, - const std::string& mic, const std::string& lang, const std::string& model, - const std::string& chars_per_sync, const std::string& bytes_per_char, - int rows, int cols, int window_duration_s, bool enable_local_beep, - bool use_cpu) { + const TranscriptionAppConfig& config) { return InvokeAsyncWithArgs({ "-u", "Resources/Scripts/transcribe.py", - "--mic", mic, - "--lang", lang, - "--model", model, - "--chars_per_sync", chars_per_sync, - "--bytes_per_char", bytes_per_char, - "--enable_local_beep", enable_local_beep ? "1" : "0", - "--rows", std::to_string(rows), - "--cols", std::to_string(cols), - "--window_duration_s", std::to_string(window_duration_s), - "--cpu", use_cpu ? "1" : "0", + "--mic", config.microphone, + "--lang", config.language, + "--model", config.model, + "--chars_per_sync", config.chars_per_sync, + "--bytes_per_char", config.bytes_per_char, + "--enable_local_beep", config.enable_local_beep ? "1" : "0", + "--rows", config.rows, + "--cols", config.cols, + "--window_duration_s", config.window_duration, + "--cpu", config.use_cpu ? "1" : "0", }, std::move(exit_callback)); } diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h index fed8e7b..38b35d4 100644 --- a/GUI/GUI/GUI/PythonWrapper.h +++ b/GUI/GUI/GUI/PythonWrapper.h @@ -8,6 +8,8 @@ #include <wx/process.h> +#include "Config.h" + #include <filesystem> #include <string> #include <vector> @@ -50,11 +52,7 @@ namespace PythonWrapper // app restarts. wxProcess* StartApp( std::function<void(wxProcess* proc, int ret)>&& exit_callback, - const std::string& mic, const std::string& lang, const std::string& model, - const std::string& chars_per_sync, const std::string& bytes_per_char, - int rows, int cols, int window_duration_s, bool enable_local_beep, - bool use_cpu - ); + const TranscriptionAppConfig& config); bool GenerateAnimator( const std::filesystem::path& unity_assets_path, diff --git a/GUI/Libraries/.gitignore b/GUI/Libraries/.gitignore index 214bffb..fb46029 100644 --- a/GUI/Libraries/.gitignore +++ b/GUI/Libraries/.gitignore @@ -1,3 +1,4 @@ # Don't check in anything we fetch wx +rapidyaml diff --git a/GUI/Libraries/fetch.ps1 b/GUI/Libraries/fetch.ps1 index 5f8d63e..78bf0d5 100644 --- a/GUI/Libraries/fetch.ps1 +++ b/GUI/Libraries/fetch.ps1 @@ -6,6 +6,7 @@ $WX_FILE = $(Split-Path -Path $WX_URL -Leaf) pushd $PSScriptRoot +# WX if (Test-Path wx) { rm -Recurse wx } @@ -16,5 +17,20 @@ Invoke-WebRequest $WX_URL -OutFile $WX_FILE Expand-Archive $WX_FILE -DestinationPath . popd > $null -popd > $null +# RAPIDYAML +if (Test-Path rapidyaml) { + rm -Recurse rapidyaml +} + +git clone https://github.com/biojppm/rapidyaml +pushd rapidyaml > $null +git checkout v0.5.0 +git submodule update --init --recursive + +python3 tools/amalgamate.py ryml.h +cp ryml.h ../../GUI/GUI/ryml.h + +popd > $null # rapidyaml + +popd > $null # $PSScriptRoot diff --git a/GUI/README.md b/GUI/README.md index 15862b7..d2fa999 100644 --- a/GUI/README.md +++ b/GUI/README.md @@ -1,23 +1,24 @@ ## Build instructions -0. Open Powershell. -1. Make sure you've downloaded submodules: +0. Install build dependencies: git, python3, Visual Studio 2022 +1. Open Powershell. +2. Make sure you've downloaded submodules: ``` $ git submodule init $ git submodule update ``` -2. Execute Libraries/fetch.ps1. -3. Open Libraries/wx/build/msw/wx\_vc17.sln with Visual Studio 2022. -4. Select every project in the Solution Explorer except for _custom_build. -5. Right click, select Properties, go to C/C++, Code Generation, and set +3. Execute Libraries/fetch.ps1. +4. Open Libraries/wx/build/msw/wx\_vc17.sln with Visual Studio 2022. +5. Select every project in the Solution Explorer except for _custom_build. +6. Right click, select Properties, go to C/C++, Code Generation, and set Runtime Library to Multi-threaded (/MT). Make sure this applies to the configuration x64/Release. -6. Build x64/Release. +7. Build x64/Release. 1. The build configuration is in the top. By default it's probably Debug/x64. 2. To build: ctrl+shift+B -7. Open GUI/GUI.sln with Visual Studio 2022. -8. Build x64/Release. -9. Run package.ps1 from powershell. +8. Open GUI/GUI.sln with Visual Studio 2022. +9. Build x64/Release. +10. Run package.ps1 from powershell. ## High level design |
