From cb44e4744ac82d1d35547d12254cfea09dc63fae Mon Sep 17 00:00:00 2001 From: yum Date: Fri, 1 Sep 2023 00:06:20 -0700 Subject: Add Unity panel toggle for phonemes (in-game audio indicator) If not set, the prefab will have its audio sources removed. --- GUI/GUI/GUI/Config.cpp | 5 ++++- GUI/GUI/GUI/Config.h | 1 + GUI/GUI/GUI/Frame.cpp | 34 ++++++++++++++++++++++++++++------ GUI/GUI/GUI/Frame.h | 1 + GUI/GUI/GUI/PythonWrapper.cpp | 11 +++++++++++ Scripts/transcribe.py | 10 ++++------ 6 files changed, 49 insertions(+), 13 deletions(-) diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index 40ac92a..456f5d4 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -95,7 +95,8 @@ AppConfig::AppConfig(wxTextCtrl* out) params_path(), menu_path(), unity_generated_dir("TaSTT_Generated"), - clear_osc(true) + clear_osc(true), + enable_phonemes(false) {} bool AppConfig::Serialize(const std::filesystem::path& path) { @@ -135,6 +136,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("menu_path", menu_path); cm.Set("unity_generated_dir", unity_generated_dir); cm.Set("clear_osc", clear_osc); + cm.Set("enable_phonemes", enable_phonemes); return Config::Serialize(path, cm); } @@ -187,6 +189,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("menu_path", c.menu_path); cm.Get("unity_generated_dir", c.unity_generated_dir); cm.Get("clear_osc", c.clear_osc); + cm.Get("enable_phonemes", c.enable_phonemes); *this = std::move(c); return true; diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index 88d9b69..a6f83e2 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -84,5 +84,6 @@ public: std::string menu_path; std::string unity_generated_dir; bool clear_osc; + bool enable_phonemes; }; diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index bcf0a2a..432569a 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -96,6 +96,7 @@ namespace { ID_UNITY_ROWS, ID_UNITY_COLS, ID_UNITY_CLEAR_OSC, + ID_UNITY_ENABLE_PHONEMES, ID_DEBUG_PANEL, ID_DEBUG_OUT, ID_DEBUG_CONFIG_PANEL, @@ -1222,6 +1223,17 @@ Frame::Frame() "an existing avatar."); unity_clear_osc_ = clear_osc; + auto* enable_phonemes = new wxCheckBox(unity_config_panel, + ID_UNITY_ENABLE_PHONEMES, "Enable phonemes"); + enable_phonemes->SetValue(app_c_->enable_phonemes); + enable_phonemes->SetToolTip( + "If checked, the chatbox will be created with 5 audio " + "sources for each English vowel sound: a, e, i, o, and u. " + "Whenever a page of data is sent into the game, any " + "vowels will have the corresponding audio source enabled. " + "This uses 6 parameter bits."); + unity_enable_phonemes_ = enable_phonemes; + auto* unity_button_gen_fx = new wxButton(unity_config_panel, ID_UNITY_BUTTON_GEN_ANIMATOR, "Generate avatar assets"); unity_button_gen_fx->SetWindowStyleFlag(wxBU_EXACTFIT); @@ -1246,6 +1258,7 @@ Frame::Frame() unity_config_panel->SetSizer(sizer); sizer->Add(unity_config_panel_pairs); sizer->Add(clear_osc); + sizer->Add(enable_phonemes); sizer->Add(unity_button_gen_fx, /*proportion=*/0, /*flags=*/wxEXPAND); sizer->Add(unity_button_auto_refresh, /*proportion=*/0, @@ -1407,6 +1420,8 @@ Frame::Frame() ID_UNITY_CHARS_PER_SYNC); Bind(wxEVT_CHOICE, &Frame::OnUnityParamChange, this, ID_UNITY_BYTES_PER_CHAR); + Bind(wxEVT_CHECKBOX, &Frame::OnUnityParamChange, this, + ID_UNITY_ENABLE_PHONEMES); // wx needs this to be able to load PNGs. wxImage::AddHandler(&png_handler_); @@ -1786,6 +1801,7 @@ void Frame::OnGenerateFX(wxCommandEvent& event) app_c_->rows = rows; app_c_->cols = cols; app_c_->clear_osc = unity_clear_osc_->GetValue(); + app_c_->enable_phonemes = unity_enable_phonemes_->GetValue(); app_c_->Serialize(AppConfig::kConfigPath); std::string out; @@ -2134,15 +2150,21 @@ void Frame::OnUnityParamChangeImpl() { // 3. disable // 4. lock // 5. clear - // 6. audio indicator enable - // 7. audio indicator toggle - // 8. visual indicator 1 - // 9. visual indicator 2 - int misc_bits = 9; - int total_bits = select_bits + layer_bits + scale_bits + misc_bits; + int misc_bits = 5; + + int phoneme_bits = 0; + if (unity_enable_phonemes_->GetValue()) { + phoneme_bits = 6; + } + + int total_bits = select_bits + layer_bits + scale_bits + misc_bits + phoneme_bits; + Log(unity_out_, "This configuration will use {} bits of avatar parameter space:\n", total_bits); Log(unity_out_, " {} bits coming from ({} characters per sync) * ({} bytes per character)\n", layer_bits, chars_per_sync, bytes_per_char); Log(unity_out_, " {} bits coming from fixed overheads\n", select_bits + scale_bits + misc_bits); + if (phoneme_bits > 0) { + Log(unity_out_, " {} bits coming from phonemes\n", phoneme_bits); + } } void Frame::OnUnityParamChange(wxCommandEvent& event) { diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 45ebf65..d8b13f4 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -73,6 +73,7 @@ private: wxCheckBox* py_app_enable_debug_mode_; wxCheckBox* py_app_reset_on_toggle_; wxCheckBox* unity_clear_osc_; + wxCheckBox* unity_enable_phonemes_; std::future py_app_; std::future obs_app_; diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 4acb51d..1b041e3 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -499,6 +499,7 @@ bool PythonWrapper::GenerateAnimator( const std::string& unity_menu_generated_name, wxTextCtrl* out) { // Python script locations + std::string remove_audio_srcs_path = "Resources/Scripts/remove_audio_sources.py"; std::string libunity_path = "Resources/Scripts/libunity.py"; std::string libtastt_path = "Resources/Scripts/libtastt.py"; std::string generate_emotes_path = "Resources/Scripts/emotes_v2.py"; @@ -634,6 +635,16 @@ bool PythonWrapper::GenerateAnimator( } Log(out, "success!\n"); } + if (!config.enable_phonemes) { + std::string prefab_path = Quote(std::filesystem::path(tastt_assets_path) / "World Constraint.prefab"); + Log(out, "Remove audio sources from prefab at {}\n", prefab_path); + Log(out, "Removing audio sources from prefab... "); + if (!InvokeWithArgs({ remove_audio_srcs_path, prefab_path }, + "Failed to generate guid.map", out)) { + return false; + } + Log(out, "succes!\n"); + } { Log(out, "Copying canned sounds... "); auto opts = std::filesystem::copy_options(); diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 2605bd3..0eeb1de 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -475,7 +475,8 @@ def transcribeAudio(audio_state): audio_state.transcribe_no_change_count = 0 audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s -def sendAudio(audio_state, estate: EmotesState): +def sendAudio(audio_state): + estate = EmotesState() while audio_state.run_app == True: text = audio_state.filtered_text if audio_state.cfg["use_builtin"]: @@ -699,7 +700,6 @@ def readControllerInput(audio_state): # whisper/__init__.py. Examples: tiny, base, small, medium. def transcribeLoop(config_path: str): cfg = app_config.getConfig(config_path) - estate = EmotesState() generate_utils.config.BYTES_PER_CHAR = int(cfg["bytes_per_char"]) generate_utils.config.CHARS_PER_SYNC = int(cfg["chars_per_sync"]) @@ -793,13 +793,11 @@ def transcribeLoop(config_path: str): download_root = model_root, local_files_only = download_it) - transcribe_audio_thd = threading.Thread( - target = transcribeAudio, - args = [audio_state]) + transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state]) transcribe_audio_thd.daemon = True transcribe_audio_thd.start() - send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state, estate]) + send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state]) send_audio_thd.daemon = True send_audio_thd.start() -- cgit v1.2.3