summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-09-01 00:06:20 -0700
committeryum <yum.food.vr@gmail.com>2023-09-01 00:07:06 -0700
commitcb44e4744ac82d1d35547d12254cfea09dc63fae (patch)
tree8496d64bb441643287c1396e57b1e577b5e15b4a
parent62b8dfa0e315f0d960afe20e116fb4ca3d59a08a (diff)
Add Unity panel toggle for phonemes (in-game audio indicator)
If not set, the prefab will have its audio sources removed.
-rw-r--r--GUI/GUI/GUI/Config.cpp5
-rw-r--r--GUI/GUI/GUI/Config.h1
-rw-r--r--GUI/GUI/GUI/Frame.cpp34
-rw-r--r--GUI/GUI/GUI/Frame.h1
-rw-r--r--GUI/GUI/GUI/PythonWrapper.cpp11
-rw-r--r--Scripts/transcribe.py10
6 files changed, 49 insertions, 13 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
index 40ac92a..456f5d4 100644
--- a/GUI/GUI/GUI/Config.cpp
+++ b/GUI/GUI/GUI/Config.cpp
@@ -95,7 +95,8 @@ AppConfig::AppConfig(wxTextCtrl* out)
params_path(),
menu_path(),
unity_generated_dir("TaSTT_Generated"),
- clear_osc(true)
+ clear_osc(true),
+ enable_phonemes(false)
{}
bool AppConfig::Serialize(const std::filesystem::path& path) {
@@ -135,6 +136,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) {
cm.Set("menu_path", menu_path);
cm.Set("unity_generated_dir", unity_generated_dir);
cm.Set("clear_osc", clear_osc);
+ cm.Set("enable_phonemes", enable_phonemes);
return Config::Serialize(path, cm);
}
@@ -187,6 +189,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) {
cm.Get("menu_path", c.menu_path);
cm.Get("unity_generated_dir", c.unity_generated_dir);
cm.Get("clear_osc", c.clear_osc);
+ cm.Get("enable_phonemes", c.enable_phonemes);
*this = std::move(c);
return true;
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
index 88d9b69..a6f83e2 100644
--- a/GUI/GUI/GUI/Config.h
+++ b/GUI/GUI/GUI/Config.h
@@ -84,5 +84,6 @@ public:
std::string menu_path;
std::string unity_generated_dir;
bool clear_osc;
+ bool enable_phonemes;
};
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index bcf0a2a..432569a 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -96,6 +96,7 @@ namespace {
ID_UNITY_ROWS,
ID_UNITY_COLS,
ID_UNITY_CLEAR_OSC,
+ ID_UNITY_ENABLE_PHONEMES,
ID_DEBUG_PANEL,
ID_DEBUG_OUT,
ID_DEBUG_CONFIG_PANEL,
@@ -1222,6 +1223,17 @@ Frame::Frame()
"an existing avatar.");
unity_clear_osc_ = clear_osc;
+ auto* enable_phonemes = new wxCheckBox(unity_config_panel,
+ ID_UNITY_ENABLE_PHONEMES, "Enable phonemes");
+ enable_phonemes->SetValue(app_c_->enable_phonemes);
+ enable_phonemes->SetToolTip(
+ "If checked, the chatbox will be created with 5 audio "
+ "sources for each English vowel sound: a, e, i, o, and u. "
+ "Whenever a page of data is sent into the game, any "
+ "vowels will have the corresponding audio source enabled. "
+ "This uses 6 parameter bits.");
+ unity_enable_phonemes_ = enable_phonemes;
+
auto* unity_button_gen_fx = new wxButton(unity_config_panel,
ID_UNITY_BUTTON_GEN_ANIMATOR, "Generate avatar assets");
unity_button_gen_fx->SetWindowStyleFlag(wxBU_EXACTFIT);
@@ -1246,6 +1258,7 @@ Frame::Frame()
unity_config_panel->SetSizer(sizer);
sizer->Add(unity_config_panel_pairs);
sizer->Add(clear_osc);
+ sizer->Add(enable_phonemes);
sizer->Add(unity_button_gen_fx, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(unity_button_auto_refresh, /*proportion=*/0,
@@ -1407,6 +1420,8 @@ Frame::Frame()
ID_UNITY_CHARS_PER_SYNC);
Bind(wxEVT_CHOICE, &Frame::OnUnityParamChange, this,
ID_UNITY_BYTES_PER_CHAR);
+ Bind(wxEVT_CHECKBOX, &Frame::OnUnityParamChange, this,
+ ID_UNITY_ENABLE_PHONEMES);
// wx needs this to be able to load PNGs.
wxImage::AddHandler(&png_handler_);
@@ -1786,6 +1801,7 @@ void Frame::OnGenerateFX(wxCommandEvent& event)
app_c_->rows = rows;
app_c_->cols = cols;
app_c_->clear_osc = unity_clear_osc_->GetValue();
+ app_c_->enable_phonemes = unity_enable_phonemes_->GetValue();
app_c_->Serialize(AppConfig::kConfigPath);
std::string out;
@@ -2134,15 +2150,21 @@ void Frame::OnUnityParamChangeImpl() {
// 3. disable
// 4. lock
// 5. clear
- // 6. audio indicator enable
- // 7. audio indicator toggle
- // 8. visual indicator 1
- // 9. visual indicator 2
- int misc_bits = 9;
- int total_bits = select_bits + layer_bits + scale_bits + misc_bits;
+ int misc_bits = 5;
+
+ int phoneme_bits = 0;
+ if (unity_enable_phonemes_->GetValue()) {
+ phoneme_bits = 6;
+ }
+
+ int total_bits = select_bits + layer_bits + scale_bits + misc_bits + phoneme_bits;
+
Log(unity_out_, "This configuration will use {} bits of avatar parameter space:\n", total_bits);
Log(unity_out_, " {} bits coming from ({} characters per sync) * ({} bytes per character)\n", layer_bits, chars_per_sync, bytes_per_char);
Log(unity_out_, " {} bits coming from fixed overheads\n", select_bits + scale_bits + misc_bits);
+ if (phoneme_bits > 0) {
+ Log(unity_out_, " {} bits coming from phonemes\n", phoneme_bits);
+ }
}
void Frame::OnUnityParamChange(wxCommandEvent& event) {
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 45ebf65..d8b13f4 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -73,6 +73,7 @@ private:
wxCheckBox* py_app_enable_debug_mode_;
wxCheckBox* py_app_reset_on_toggle_;
wxCheckBox* unity_clear_osc_;
+ wxCheckBox* unity_enable_phonemes_;
std::future<bool> py_app_;
std::future<bool> obs_app_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index 4acb51d..1b041e3 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -499,6 +499,7 @@ bool PythonWrapper::GenerateAnimator(
const std::string& unity_menu_generated_name,
wxTextCtrl* out) {
// Python script locations
+ std::string remove_audio_srcs_path = "Resources/Scripts/remove_audio_sources.py";
std::string libunity_path = "Resources/Scripts/libunity.py";
std::string libtastt_path = "Resources/Scripts/libtastt.py";
std::string generate_emotes_path = "Resources/Scripts/emotes_v2.py";
@@ -634,6 +635,16 @@ bool PythonWrapper::GenerateAnimator(
}
Log(out, "success!\n");
}
+ if (!config.enable_phonemes) {
+ std::string prefab_path = Quote(std::filesystem::path(tastt_assets_path) / "World Constraint.prefab");
+ Log(out, "Remove audio sources from prefab at {}\n", prefab_path);
+ Log(out, "Removing audio sources from prefab... ");
+ if (!InvokeWithArgs({ remove_audio_srcs_path, prefab_path },
+ "Failed to generate guid.map", out)) {
+ return false;
+ }
+ Log(out, "succes!\n");
+ }
{
Log(out, "Copying canned sounds... ");
auto opts = std::filesystem::copy_options();
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 2605bd3..0eeb1de 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -475,7 +475,8 @@ def transcribeAudio(audio_state):
audio_state.transcribe_no_change_count = 0
audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
-def sendAudio(audio_state, estate: EmotesState):
+def sendAudio(audio_state):
+ estate = EmotesState()
while audio_state.run_app == True:
text = audio_state.filtered_text
if audio_state.cfg["use_builtin"]:
@@ -699,7 +700,6 @@ def readControllerInput(audio_state):
# whisper/__init__.py. Examples: tiny, base, small, medium.
def transcribeLoop(config_path: str):
cfg = app_config.getConfig(config_path)
- estate = EmotesState()
generate_utils.config.BYTES_PER_CHAR = int(cfg["bytes_per_char"])
generate_utils.config.CHARS_PER_SYNC = int(cfg["chars_per_sync"])
@@ -793,13 +793,11 @@ def transcribeLoop(config_path: str):
download_root = model_root,
local_files_only = download_it)
- transcribe_audio_thd = threading.Thread(
- target = transcribeAudio,
- args = [audio_state])
+ transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state])
transcribe_audio_thd.daemon = True
transcribe_audio_thd.start()
- send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state, estate])
+ send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state])
send_audio_thd.daemon = True
send_audio_thd.start()