summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-01-06 12:31:13 -0800
committeryum <yum.food.vr@gmail.com>2023-01-06 12:31:13 -0800
commit461714cc87549f3c0c3411bfe95f3936aca60f74 (patch)
tree3edb404fac0ed1ec370ed2bed0b543128d0ba925
parent66d311b3267620995e5c35b16f3fba18ed0c48f3 (diff)
GUI: Persist transcription app configv0.2
The configuration of the transcription app, such as the number of rows and columns in the text box, now persists across app restarts. I found that I would have to change from the defaults to my preferred config every time I started up in VR, which was annoying. Now we just start with the config that was set last time. * Add dependency on rapidyaml (MIT) * Serialize transcription config to file under Resources/ * Add Config class to wrap serializing/deserializing * Update build instructions * Simplify StartApp() API, taking Config struct instead of a ton of arguments
-rw-r--r--GUI/GUI/GUI/.gitignore2
-rw-r--r--GUI/GUI/GUI/Config.cpp105
-rw-r--r--GUI/GUI/GUI/Config.h26
-rw-r--r--GUI/GUI/GUI/Frame.cpp69
-rw-r--r--GUI/GUI/GUI/GUI.vcxproj3
-rw-r--r--GUI/GUI/GUI/GUI.vcxproj.filters9
-rw-r--r--GUI/GUI/GUI/PythonWrapper.cpp27
-rw-r--r--GUI/GUI/GUI/PythonWrapper.h8
-rw-r--r--GUI/Libraries/.gitignore1
-rw-r--r--GUI/Libraries/fetch.ps118
-rw-r--r--GUI/README.md21
11 files changed, 238 insertions, 51 deletions
diff --git a/GUI/GUI/GUI/.gitignore b/GUI/GUI/GUI/.gitignore
index 41aaad2..e843fdb 100644
--- a/GUI/GUI/GUI/.gitignore
+++ b/GUI/GUI/GUI/.gitignore
@@ -3,3 +3,5 @@ x64
x86
# No .rc generated files
GUI.APS
+# No fetched files
+ryml.h
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
new file mode 100644
index 0000000..0436f3b
--- /dev/null
+++ b/GUI/GUI/GUI/Config.cpp
@@ -0,0 +1,105 @@
+#include <wx/wxprec.h>
+
+#ifndef WX_PRECOMP
+#include <wx/wx.h>
+#endif
+
+#include "Config.h"
+
+#define RYML_SINGLE_HDR_DEFINE_NOW
+#include "ryml.h"
+
+#include <fstream>
+#include <memory>
+#include <string>
+
+TranscriptionAppConfig::TranscriptionAppConfig()
+ : microphone("index"),
+ language("english"),
+ model("base.en"),
+ chars_per_sync("20"),
+ bytes_per_char("1"),
+ rows("4"),
+ cols("48"),
+ window_duration("15"),
+ enable_local_beep(true),
+ use_cpu(false)
+{}
+
+bool TranscriptionAppConfig::Serialize(const std::filesystem::path& path) {
+ ryml::Tree t;
+ ryml::NodeRef root = t.rootref();
+ root |= ryml::MAP;
+ root["microphone"] << ryml::to_substr(microphone);
+ root["language"] << ryml::to_substr(language);
+ root["model"] << ryml::to_substr(model);
+ root["chars_per_sync"] << ryml::to_substr(chars_per_sync);
+ root["bytes_per_char"] << ryml::to_substr(bytes_per_char);
+ root["rows"] << ryml::to_substr(rows);
+ root["cols"] << ryml::to_substr(cols);
+ root["window_duration"] << ryml::to_substr(window_duration);
+ root["enable_local_beep"] << enable_local_beep;
+ root["use_cpu"] << use_cpu;
+
+ // Write the config to a tmp file. If we crash in the middle of this, it
+ // doesn't matter, since the next process will just overwrite it.
+ std::filesystem::path tmp_path = path;
+ tmp_path += ".tmp";
+ FILE* fp = fopen(tmp_path.string().c_str(), "wb");
+ if (!fp) {
+ wxLogError("Failed to open %s: %s", path.string().c_str(), strerror(errno));
+ return false;
+ }
+ ryml::emit_yaml(t, fp); // For now we assume this didn't fail.
+ fclose(fp);
+ fp = nullptr;
+
+ // If there's an old config, delete it.
+ struct stat tmpstat;
+ if (stat(path.string().c_str(), &tmpstat) == 0) {
+ if (::_unlink(path.string().c_str())) {
+ wxLogError("Failed to delete old config at %s: %s", path.string().c_str(),
+ strerror(errno));
+ return false;
+ }
+ }
+
+ // File renames within the same filesystem are atomic, so there's no risk
+ // of leaving a corrupt file on disk.
+ if (rename(tmp_path.string().c_str(), path.string().c_str()) != 0) {
+ wxLogError("Failed to save config to %s: %s", path.string().c_str(),
+ strerror(errno));
+ return false;
+ }
+
+ return true;
+}
+
+bool TranscriptionAppConfig::Deserialize(const std::filesystem::path& path) {
+ std::ifstream file(path, std::ios::binary | std::ios::ate);
+ if (!file.is_open()) {
+ return false;
+ }
+ std::streamsize size = file.tellg();
+ file.seekg(0, std::ios::beg);
+ std::vector<char> yaml_buf(size);
+ if (!file.read(yaml_buf.data(), size)) {
+ return false;
+ }
+
+ ryml::Tree t = ryml::parse_in_place(ryml::to_substr(yaml_buf.data()));
+ ryml::ConstNodeRef root = t.rootref();
+ TranscriptionAppConfig c;
+ root["microphone"] >> c.microphone;
+ root["language"] >> c.language;
+ root["model"] >> c.model;
+ root["chars_per_sync"] >> c.chars_per_sync;
+ root["bytes_per_char"] >> c.bytes_per_char;
+ root["rows"] >> c.rows;
+ root["cols"] >> c.cols;
+ root["window_duration"] >> c.window_duration;
+ root["enable_local_beep"] >> c.enable_local_beep;
+
+ *this = std::move(c);
+ return true;
+}
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
new file mode 100644
index 0000000..e142773
--- /dev/null
+++ b/GUI/GUI/GUI/Config.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <filesystem>
+
+class TranscriptionAppConfig {
+public:
+ TranscriptionAppConfig();
+
+ bool Serialize(const std::filesystem::path& path);
+
+ bool Deserialize(const std::filesystem::path& path);
+
+ // The default path at which configs are serialized.
+ static constexpr char kConfigPath[] = "Resources/transcription_app_config.yml";
+
+ std::string microphone;
+ std::string language;
+ std::string model;
+ std::string chars_per_sync;
+ std::string bytes_per_char;
+ std::string rows;
+ std::string cols;
+ std::string window_duration;
+ bool enable_local_beep;
+ bool use_cpu;
+};
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 5fb8dd9..6c1f356 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -2,6 +2,8 @@
#include "Logging.h"
#include "PythonWrapper.h"
+#include "Config.h"
+
#include <filesystem>
#include <string>
#include <vector>
@@ -226,6 +228,20 @@ namespace {
const size_t kNumBytesPerChar = sizeof(kBytesPerChar) / sizeof(kBytesPerChar[0]);
// Sorry international users. Optimize for English speakers, by default.
constexpr int kBytesDefault = 0;
+
+ // Given the string value of a dropdown menu's entry, find its index. If no
+ // entry matches, return `default_index`.
+ int GetDropdownChoiceIndex(const wxString menu[],
+ const size_t num_menu_entries, const std::string& entry,
+ const int default_index) {
+ for (int i = 0; i < num_menu_entries; i++) {
+ if (entry == menu[i]) {
+ return i;
+ }
+ }
+ return default_index;
+ }
+
} // namespace
using ::Logging::Log;
@@ -235,6 +251,9 @@ Frame::Frame()
py_app_(nullptr),
py_app_drain_(this, ID_PY_APP_DRAIN)
{
+ TranscriptionAppConfig c;
+ c.Deserialize(TranscriptionAppConfig::kConfigPath);
+
auto* main_panel = new wxPanel(this, ID_MAIN_PANEL);
main_panel_ = main_panel;
{
@@ -282,7 +301,8 @@ Frame::Frame()
{
auto* py_app_mic = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_MIC, wxDefaultPosition,
wxDefaultSize, kNumMicChoices, kMicChoices);
- py_app_mic->SetSelection(kMicDefault);
+ int mic_idx = GetDropdownChoiceIndex(kMicChoices, kNumMicChoices, c.microphone, kMicDefault);
+ py_app_mic->SetSelection(mic_idx);
py_app_mic->SetToolTip(
"Select which microphone to listen to when "
"transcribing. To get list microphones and get their "
@@ -291,7 +311,8 @@ Frame::Frame()
auto* py_app_lang = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_LANG, wxDefaultPosition,
wxDefaultSize, kNumLangChoices, kLangChoices);
- py_app_lang->SetSelection(kLangDefault);
+ int lang_idx = GetDropdownChoiceIndex(kLangChoices, kNumLangChoices, c.language, kLangDefault);
+ py_app_lang->SetSelection(lang_idx);
py_app_lang->SetToolTip("Select which language you will "
"speak in. It will be transcribed into that language. "
"If using a language with non-ASCII characters (i.e. "
@@ -302,7 +323,8 @@ Frame::Frame()
auto* py_app_model = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_MODEL, wxDefaultPosition,
wxDefaultSize, kNumModelChoices, kModelChoices);
- py_app_model->SetSelection(kModelDefault);
+ int model_idx = GetDropdownChoiceIndex(kModelChoices, kNumModelChoices, c.model, kModelDefault);
+ py_app_model->SetSelection(model_idx);
py_app_model->SetToolTip("Select which version of "
"the transcription model to use. 'base' is a good "
"choice for most users. 'small' is slightly more "
@@ -314,7 +336,8 @@ Frame::Frame()
auto* py_app_chars_per_sync = new wxChoice(py_app_config_panel_pairs,
ID_PY_APP_CHARS_PER_SYNC, wxDefaultPosition,
wxDefaultSize, kNumCharsPerSync, kCharsPerSync);
- py_app_chars_per_sync->SetSelection(kCharsDefault);
+ int chars_idx = GetDropdownChoiceIndex(kCharsPerSync, kNumCharsPerSync, c.chars_per_sync, kCharsDefault);
+ py_app_chars_per_sync->SetSelection(chars_idx);
py_app_chars_per_sync->SetToolTip(
"VRChat syncs avatar parameters roughly 5 times per "
"second. We use this to send text to the box. By "
@@ -325,28 +348,29 @@ Frame::Frame()
auto* py_app_bytes_per_char = new wxChoice(py_app_config_panel_pairs,
ID_PY_APP_BYTES_PER_CHAR, wxDefaultPosition,
wxDefaultSize, kNumBytesPerChar, kBytesPerChar);
- py_app_bytes_per_char->SetSelection(kBytesDefault);
+ int bytes_idx = GetDropdownChoiceIndex(kBytesPerChar, kNumBytesPerChar, c.bytes_per_char, kBytesDefault);
+ py_app_bytes_per_char->SetSelection(bytes_idx);
py_app_bytes_per_char->SetToolTip(
"If you speak a language that uses non-ASCII "
"characters (i.e. not English), set this to 2.");
py_app_bytes_per_char_ = py_app_bytes_per_char;
auto* py_app_rows = new wxTextCtrl(py_app_config_panel_pairs,
- ID_PY_APP_ROWS, /*value=*/"4",
+ ID_PY_APP_ROWS, c.rows,
wxDefaultPosition, wxDefaultSize, /*style=*/0);
py_app_rows->SetToolTip(
"The number of rows on the text box.");
py_app_rows_ = py_app_rows;
auto* py_app_cols = new wxTextCtrl(py_app_config_panel_pairs,
- ID_PY_APP_COLS, /*value=*/"48",
+ ID_PY_APP_COLS, c.cols,
wxDefaultPosition, wxDefaultSize, /*style=*/0);
py_app_cols->SetToolTip(
"The number of columns on the text box.");
py_app_cols_ = py_app_cols;
auto* py_app_window_duration = new wxTextCtrl(py_app_config_panel_pairs,
- ID_PY_APP_WINDOW_DURATION, /*value=*/"15",
+ ID_PY_APP_WINDOW_DURATION, c.window_duration,
wxDefaultPosition, wxDefaultSize, /*style=*/0);
py_app_window_duration->SetToolTip(
"This controls how long the slice of audio that "
@@ -387,7 +411,7 @@ Frame::Frame()
auto* py_app_enable_local_beep = new wxCheckBox(py_config_panel,
ID_PY_APP_ENABLE_LOCAL_BEEP, "Enable local beep");
- py_app_enable_local_beep->SetValue(true);
+ py_app_enable_local_beep->SetValue(c.enable_local_beep);
py_app_enable_local_beep->SetToolTip(
"By default, TaSTT will play a sound (audible only to "
"you) when it begins transcription and when it stops. "
@@ -397,7 +421,7 @@ Frame::Frame()
auto* py_app_use_cpu = new wxCheckBox(py_config_panel,
ID_PY_APP_USE_CPU, "Use CPU");
- py_app_use_cpu->SetValue(false);
+ py_app_use_cpu->SetValue(c.use_cpu);
py_app_use_cpu->SetToolTip(
"If checked, the transcription engine will run on your "
"CPU instead of your GPU. This is typically much slower "
@@ -928,17 +952,20 @@ void Frame::OnAppStart(wxCommandEvent& event) {
return;
}
- wxProcess* p = PythonWrapper::StartApp(std::move(cb),
- kMicChoices[which_mic].ToStdString(),
- kLangChoices[which_lang].ToStdString(),
- kModelChoices[which_model].ToStdString(),
- kCharsPerSync[chars_per_sync_idx].ToStdString(),
- kBytesPerChar[bytes_per_char_idx].ToStdString(),
- rows,
- cols,
- window_duration,
- enable_local_beep,
- use_cpu);
+ TranscriptionAppConfig c;
+ c.microphone = kMicChoices[which_mic].ToStdString();
+ c.language = kLangChoices[which_lang].ToStdString();
+ c.model = kModelChoices[which_model].ToStdString();
+ c.chars_per_sync = kCharsPerSync[chars_per_sync_idx].ToStdString();
+ c.bytes_per_char = kBytesPerChar[bytes_per_char_idx].ToStdString();
+ c.rows = std::to_string(rows);
+ c.cols = std::to_string(cols);
+ c.window_duration = std::to_string(window_duration);
+ c.enable_local_beep = enable_local_beep;
+ c.use_cpu = use_cpu;
+ c.Serialize(TranscriptionAppConfig::kConfigPath);
+
+ wxProcess* p = PythonWrapper::StartApp(std::move(cb), c);
if (!p) {
Log(transcribe_out_, "Failed to launch transcription engine\n");
return;
diff --git a/GUI/GUI/GUI/GUI.vcxproj b/GUI/GUI/GUI/GUI.vcxproj
index 976855d..cbe3a92 100644
--- a/GUI/GUI/GUI/GUI.vcxproj
+++ b/GUI/GUI/GUI/GUI.vcxproj
@@ -140,6 +140,7 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="App.cpp" />
+ <ClCompile Include="Config.cpp" />
<ClCompile Include="Frame.cpp" />
<ClCompile Include="Logging.cpp" />
<ClCompile Include="main.cpp" />
@@ -147,10 +148,12 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="App.h" />
+ <ClInclude Include="Config.h" />
<ClInclude Include="Frame.h" />
<ClInclude Include="Logging.h" />
<ClInclude Include="PythonWrapper.h" />
<ClInclude Include="resource.h" />
+ <ClInclude Include="ryml.h" />
<ClInclude Include="ScopeGuard.h" />
</ItemGroup>
<ItemGroup>
diff --git a/GUI/GUI/GUI/GUI.vcxproj.filters b/GUI/GUI/GUI/GUI.vcxproj.filters
index 348026a..3fa31c7 100644
--- a/GUI/GUI/GUI/GUI.vcxproj.filters
+++ b/GUI/GUI/GUI/GUI.vcxproj.filters
@@ -30,6 +30,9 @@
<ClCompile Include="Logging.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="Config.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="PythonWrapper.h">
@@ -50,6 +53,12 @@
<ClInclude Include="Logging.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="ryml.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="Config.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="GUI.rc">
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index a38ee4a..0c43fa4 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -1,6 +1,8 @@
#include "Logging.h"
#include "PythonWrapper.h"
+#include "Config.h"
+
#include <stdio.h>
#include <filesystem>
@@ -142,23 +144,20 @@ bool PythonWrapper::InstallPip(std::string* out) {
wxProcess* PythonWrapper::StartApp(
std::function<void(wxProcess* proc, int ret)>&& exit_callback,
- const std::string& mic, const std::string& lang, const std::string& model,
- const std::string& chars_per_sync, const std::string& bytes_per_char,
- int rows, int cols, int window_duration_s, bool enable_local_beep,
- bool use_cpu) {
+ const TranscriptionAppConfig& config) {
return InvokeAsyncWithArgs({
"-u",
"Resources/Scripts/transcribe.py",
- "--mic", mic,
- "--lang", lang,
- "--model", model,
- "--chars_per_sync", chars_per_sync,
- "--bytes_per_char", bytes_per_char,
- "--enable_local_beep", enable_local_beep ? "1" : "0",
- "--rows", std::to_string(rows),
- "--cols", std::to_string(cols),
- "--window_duration_s", std::to_string(window_duration_s),
- "--cpu", use_cpu ? "1" : "0",
+ "--mic", config.microphone,
+ "--lang", config.language,
+ "--model", config.model,
+ "--chars_per_sync", config.chars_per_sync,
+ "--bytes_per_char", config.bytes_per_char,
+ "--enable_local_beep", config.enable_local_beep ? "1" : "0",
+ "--rows", config.rows,
+ "--cols", config.cols,
+ "--window_duration_s", config.window_duration,
+ "--cpu", config.use_cpu ? "1" : "0",
},
std::move(exit_callback));
}
diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h
index fed8e7b..38b35d4 100644
--- a/GUI/GUI/GUI/PythonWrapper.h
+++ b/GUI/GUI/GUI/PythonWrapper.h
@@ -8,6 +8,8 @@
#include <wx/process.h>
+#include "Config.h"
+
#include <filesystem>
#include <string>
#include <vector>
@@ -50,11 +52,7 @@ namespace PythonWrapper
// app restarts.
wxProcess* StartApp(
std::function<void(wxProcess* proc, int ret)>&& exit_callback,
- const std::string& mic, const std::string& lang, const std::string& model,
- const std::string& chars_per_sync, const std::string& bytes_per_char,
- int rows, int cols, int window_duration_s, bool enable_local_beep,
- bool use_cpu
- );
+ const TranscriptionAppConfig& config);
bool GenerateAnimator(
const std::filesystem::path& unity_assets_path,
diff --git a/GUI/Libraries/.gitignore b/GUI/Libraries/.gitignore
index 214bffb..fb46029 100644
--- a/GUI/Libraries/.gitignore
+++ b/GUI/Libraries/.gitignore
@@ -1,3 +1,4 @@
# Don't check in anything we fetch
wx
+rapidyaml
diff --git a/GUI/Libraries/fetch.ps1 b/GUI/Libraries/fetch.ps1
index 5f8d63e..78bf0d5 100644
--- a/GUI/Libraries/fetch.ps1
+++ b/GUI/Libraries/fetch.ps1
@@ -6,6 +6,7 @@ $WX_FILE = $(Split-Path -Path $WX_URL -Leaf)
pushd $PSScriptRoot
+# WX
if (Test-Path wx) {
rm -Recurse wx
}
@@ -16,5 +17,20 @@ Invoke-WebRequest $WX_URL -OutFile $WX_FILE
Expand-Archive $WX_FILE -DestinationPath .
popd > $null
-popd > $null
+# RAPIDYAML
+if (Test-Path rapidyaml) {
+ rm -Recurse rapidyaml
+}
+
+git clone https://github.com/biojppm/rapidyaml
+pushd rapidyaml > $null
+git checkout v0.5.0
+git submodule update --init --recursive
+
+python3 tools/amalgamate.py ryml.h
+cp ryml.h ../../GUI/GUI/ryml.h
+
+popd > $null # rapidyaml
+
+popd > $null # $PSScriptRoot
diff --git a/GUI/README.md b/GUI/README.md
index 15862b7..d2fa999 100644
--- a/GUI/README.md
+++ b/GUI/README.md
@@ -1,23 +1,24 @@
## Build instructions
-0. Open Powershell.
-1. Make sure you've downloaded submodules:
+0. Install build dependencies: git, python3, Visual Studio 2022
+1. Open Powershell.
+2. Make sure you've downloaded submodules:
```
$ git submodule init
$ git submodule update
```
-2. Execute Libraries/fetch.ps1.
-3. Open Libraries/wx/build/msw/wx\_vc17.sln with Visual Studio 2022.
-4. Select every project in the Solution Explorer except for _custom_build.
-5. Right click, select Properties, go to C/C++, Code Generation, and set
+3. Execute Libraries/fetch.ps1.
+4. Open Libraries/wx/build/msw/wx\_vc17.sln with Visual Studio 2022.
+5. Select every project in the Solution Explorer except for _custom_build.
+6. Right click, select Properties, go to C/C++, Code Generation, and set
Runtime Library to Multi-threaded (/MT). Make sure this applies to the
configuration x64/Release.
-6. Build x64/Release.
+7. Build x64/Release.
1. The build configuration is in the top. By default it's probably Debug/x64.
2. To build: ctrl+shift+B
-7. Open GUI/GUI.sln with Visual Studio 2022.
-8. Build x64/Release.
-9. Run package.ps1 from powershell.
+8. Open GUI/GUI.sln with Visual Studio 2022.
+9. Build x64/Release.
+10. Run package.ps1 from powershell.
## High level design