From cb44e4744ac82d1d35547d12254cfea09dc63fae Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Fri, 1 Sep 2023 00:06:20 -0700
Subject: Add Unity panel toggle for phonemes (in-game audio indicator)

If not set, the prefab will have its audio sources removed.
---
 GUI/GUI/GUI/Config.cpp        |  5 ++++-
 GUI/GUI/GUI/Config.h          |  1 +
 GUI/GUI/GUI/Frame.cpp         | 34 ++++++++++++++++++++++++++++------
 GUI/GUI/GUI/Frame.h           |  1 +
 GUI/GUI/GUI/PythonWrapper.cpp | 11 +++++++++++
 Scripts/transcribe.py         | 10 ++++------
 6 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
index 40ac92a..456f5d4 100644
--- a/GUI/GUI/GUI/Config.cpp
+++ b/GUI/GUI/GUI/Config.cpp
@@ -95,7 +95,8 @@ AppConfig::AppConfig(wxTextCtrl* out)
 	params_path(),
 	menu_path(),
 	unity_generated_dir("TaSTT_Generated"),
-	clear_osc(true)
+	clear_osc(true),
+	enable_phonemes(false)
 {}
 
 bool AppConfig::Serialize(const std::filesystem::path& path) {
@@ -135,6 +136,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) {
 	cm.Set("menu_path", menu_path);
 	cm.Set("unity_generated_dir", unity_generated_dir);
 	cm.Set("clear_osc", clear_osc);
+	cm.Set("enable_phonemes", enable_phonemes);
 
 	return Config::Serialize(path, cm);
 }
@@ -187,6 +189,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) {
 	cm.Get("menu_path", c.menu_path);
 	cm.Get("unity_generated_dir", c.unity_generated_dir);
 	cm.Get("clear_osc", c.clear_osc);
+	cm.Get("enable_phonemes", c.enable_phonemes);
 
 	*this = std::move(c);
 	return true;
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
index 88d9b69..a6f83e2 100644
--- a/GUI/GUI/GUI/Config.h
+++ b/GUI/GUI/GUI/Config.h
@@ -84,5 +84,6 @@ public:
 	std::string menu_path;
 	std::string unity_generated_dir;
 	bool clear_osc;
+	bool enable_phonemes;
 };
 
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index bcf0a2a..432569a 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -96,6 +96,7 @@ namespace {
         ID_UNITY_ROWS,
         ID_UNITY_COLS,
         ID_UNITY_CLEAR_OSC,
+        ID_UNITY_ENABLE_PHONEMES,
 		ID_DEBUG_PANEL,
 		ID_DEBUG_OUT,
 		ID_DEBUG_CONFIG_PANEL,
@@ -1222,6 +1223,17 @@ Frame::Frame()
 					"an existing avatar.");
                 unity_clear_osc_ = clear_osc;
 
+				auto* enable_phonemes = new wxCheckBox(unity_config_panel,
+					ID_UNITY_ENABLE_PHONEMES, "Enable phonemes");
+				enable_phonemes->SetValue(app_c_->enable_phonemes);
+                enable_phonemes->SetToolTip(
+                    "If checked, the chatbox will be created with 5 audio "
+                    "sources for each English vowel sound: a, e, i, o, and u. "
+                    "Whenever a page of data is sent into the game, any "
+                    "vowels will have the corresponding audio source enabled. "
+                    "This uses 6 parameter bits.");
+                unity_enable_phonemes_ = enable_phonemes;
+
 				auto* unity_button_gen_fx = new wxButton(unity_config_panel,
                     ID_UNITY_BUTTON_GEN_ANIMATOR, "Generate avatar assets");
                 unity_button_gen_fx->SetWindowStyleFlag(wxBU_EXACTFIT);
@@ -1246,6 +1258,7 @@ Frame::Frame()
 				unity_config_panel->SetSizer(sizer);
 				sizer->Add(unity_config_panel_pairs);
                 sizer->Add(clear_osc);
+                sizer->Add(enable_phonemes);
 				sizer->Add(unity_button_gen_fx, /*proportion=*/0,
                     /*flags=*/wxEXPAND);
 				sizer->Add(unity_button_auto_refresh, /*proportion=*/0,
@@ -1407,6 +1420,8 @@ Frame::Frame()
         ID_UNITY_CHARS_PER_SYNC);
     Bind(wxEVT_CHOICE, &Frame::OnUnityParamChange, this,
         ID_UNITY_BYTES_PER_CHAR);
+    Bind(wxEVT_CHECKBOX, &Frame::OnUnityParamChange, this,
+        ID_UNITY_ENABLE_PHONEMES);
 
 	// wx needs this to be able to load PNGs.
 	wxImage::AddHandler(&png_handler_);
@@ -1786,6 +1801,7 @@ void Frame::OnGenerateFX(wxCommandEvent& event)
 		app_c_->rows = rows;
 		app_c_->cols = cols;
 		app_c_->clear_osc = unity_clear_osc_->GetValue();
+		app_c_->enable_phonemes = unity_enable_phonemes_->GetValue();
 		app_c_->Serialize(AppConfig::kConfigPath);
 
 		std::string out;
@@ -2134,15 +2150,21 @@ void Frame::OnUnityParamChangeImpl() {
     //   3. disable
     //   4. lock
     //   5. clear
-    //   6. audio indicator enable
-    //   7. audio indicator toggle
-    //   8. visual indicator 1
-    //   9. visual indicator 2
-    int misc_bits = 9;
-    int total_bits = select_bits + layer_bits + scale_bits + misc_bits;
+    int misc_bits = 5;
+
+    int phoneme_bits = 0;
+    if (unity_enable_phonemes_->GetValue()) {
+        phoneme_bits = 6;
+    }
+
+    int total_bits = select_bits + layer_bits + scale_bits + misc_bits + phoneme_bits;
+
     Log(unity_out_, "This configuration will use {} bits of avatar parameter space:\n", total_bits);
     Log(unity_out_, "  {} bits coming from ({} characters per sync) * ({} bytes per character)\n", layer_bits, chars_per_sync, bytes_per_char);
     Log(unity_out_, "  {} bits coming from fixed overheads\n", select_bits + scale_bits + misc_bits);
+    if (phoneme_bits > 0) {
+        Log(unity_out_, "  {} bits coming from phonemes\n", phoneme_bits);
+    }
 }
 
 void Frame::OnUnityParamChange(wxCommandEvent& event) {
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 45ebf65..d8b13f4 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -73,6 +73,7 @@ private:
     wxCheckBox* py_app_enable_debug_mode_;
     wxCheckBox* py_app_reset_on_toggle_;
     wxCheckBox* unity_clear_osc_;
+    wxCheckBox* unity_enable_phonemes_;
 
     std::future<bool> py_app_;
     std::future<bool> obs_app_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index 4acb51d..1b041e3 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -499,6 +499,7 @@ bool PythonWrapper::GenerateAnimator(
 	const std::string& unity_menu_generated_name,
 	wxTextCtrl* out) {
 	// Python script locations
+	std::string remove_audio_srcs_path = "Resources/Scripts/remove_audio_sources.py";
 	std::string libunity_path = "Resources/Scripts/libunity.py";
 	std::string libtastt_path = "Resources/Scripts/libtastt.py";
 	std::string generate_emotes_path = "Resources/Scripts/emotes_v2.py";
@@ -634,6 +635,16 @@ bool PythonWrapper::GenerateAnimator(
 		}
 		Log(out, "success!\n");
 	}
+	if (!config.enable_phonemes) {
+		std::string prefab_path = Quote(std::filesystem::path(tastt_assets_path) / "World Constraint.prefab");
+		Log(out, "Remove audio sources from prefab at {}\n", prefab_path);
+		Log(out, "Removing audio sources from prefab... ");
+		if (!InvokeWithArgs({ remove_audio_srcs_path, prefab_path },
+			"Failed to generate guid.map", out)) {
+			return false;
+		}
+		Log(out, "succes!\n");
+	}
 	{
 		Log(out, "Copying canned sounds... ");
 		auto opts = std::filesystem::copy_options();
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 2605bd3..0eeb1de 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -475,7 +475,8 @@ def transcribeAudio(audio_state):
             audio_state.transcribe_no_change_count = 0
             audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
 
-def sendAudio(audio_state, estate: EmotesState):
+def sendAudio(audio_state):
+    estate = EmotesState()
     while audio_state.run_app == True:
         text = audio_state.filtered_text
         if audio_state.cfg["use_builtin"]:
@@ -699,7 +700,6 @@ def readControllerInput(audio_state):
 # whisper/__init__.py. Examples: tiny, base, small, medium.
 def transcribeLoop(config_path: str):
     cfg = app_config.getConfig(config_path)
-    estate = EmotesState()
 
     generate_utils.config.BYTES_PER_CHAR = int(cfg["bytes_per_char"])
     generate_utils.config.CHARS_PER_SYNC = int(cfg["chars_per_sync"])
@@ -793,13 +793,11 @@ def transcribeLoop(config_path: str):
             download_root = model_root,
             local_files_only = download_it)
 
-    transcribe_audio_thd = threading.Thread(
-            target = transcribeAudio,
-            args = [audio_state])
+    transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state])
     transcribe_audio_thd.daemon = True
     transcribe_audio_thd.start()
 
-    send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state, estate])
+    send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state])
     send_audio_thd.daemon = True
     send_audio_thd.start()
 
-- 
cgit v1.2.3