From ebab21e22dea7931a8a7db05ff26d60cec4f5b5b Mon Sep 17 00:00:00 2001 From: yum Date: Wed, 24 May 2023 22:34:55 -0700 Subject: Add UI toggle for uwu filter UI now has a checkbox for the uwu filter. Does not materially affect resource usage or latency when enabled. --- GUI/GUI/GUI/Config.cpp | 3 +++ GUI/GUI/GUI/Config.h | 1 + GUI/GUI/GUI/Frame.cpp | 14 ++++++++++++ GUI/GUI/GUI/Frame.h | 1 + GUI/GUI/GUI/PythonWrapper.cpp | 1 + Scripts/transcribe.py | 50 +++++++++++++++++++++++++++++-------------- 6 files changed, 54 insertions(+), 16 deletions(-) diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index 0cf6d9d..0443278 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -71,6 +71,7 @@ AppConfig::AppConfig(wxTextCtrl* out) enable_local_beep(true), use_cpu(false), use_builtin(false), + enable_uwu_filter(false), gpu_idx(0), keybind("ctrl+x"), @@ -115,6 +116,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("enable_local_beep", enable_local_beep); cm.Set("use_cpu", use_cpu); cm.Set("use_builtin", use_builtin); + cm.Set("enable_uwu_filter", enable_uwu_filter); cm.Set("gpu_idx", gpu_idx); cm.Set("keybind", keybind); @@ -172,6 +174,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("enable_local_beep", c.enable_local_beep); cm.Get("use_cpu", c.use_cpu); cm.Get("use_builtin", c.use_builtin); + cm.Get("enable_uwu_filter", c.enable_uwu_filter); cm.Get("gpu_idx", c.gpu_idx); cm.Get("keybind", c.keybind); diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index f53b700..be036ea 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -57,6 +57,7 @@ public: bool enable_local_beep; bool use_cpu; bool use_builtin; + bool enable_uwu_filter; int gpu_idx; std::string keybind; diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 74a0233..f3895bc 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -39,6 +39,7 @@ namespace { ID_PY_APP_ENABLE_LOCAL_BEEP, ID_PY_APP_USE_CPU, ID_PY_APP_USE_BUILTIN, + ID_PY_APP_ENABLE_UWU_FILTER, ID_PY_APP_ROWS, ID_PY_APP_COLS, ID_PY_APP_WINDOW_DURATION, @@ -593,6 +594,15 @@ Frame::Frame() ); py_app_use_builtin_ = py_app_use_builtin; + auto* py_app_enable_uwu_filter = new wxCheckBox(py_config_panel, + ID_PY_APP_ENABLE_UWU_FILTER, "Enable uwu filter :3"); + py_app_enable_uwu_filter->SetValue(app_c_->enable_uwu_filter); + py_app_enable_uwu_filter->SetToolTip( + "If checked, transcribed text will be passed through an " + "uwu filter." + ); + py_app_enable_uwu_filter_ = py_app_enable_uwu_filter; + // Hack: Add newlines before and after the button text to make // the buttons bigger, and easier to click from inside VR. auto* py_app_start_button = new wxButton(py_config_panel, @@ -612,6 +622,8 @@ Frame::Frame() /*flags=*/wxEXPAND); sizer->Add(py_app_use_builtin, /*proportion=*/0, /*flags=*/wxEXPAND); + sizer->Add(py_app_enable_uwu_filter, /*proportion=*/0, + /*flags=*/wxEXPAND); sizer->Add(py_app_start_button, /*proportion=*/0, /*flags=*/wxEXPAND); sizer->Add(py_app_stop_button, /*proportion=*/0, @@ -1638,6 +1650,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { const bool enable_local_beep = py_app_enable_local_beep_->GetValue(); const bool use_cpu = py_app_use_cpu_->GetValue(); const bool use_builtin = py_app_use_builtin_->GetValue(); + const bool enable_uwu_filter = py_app_enable_uwu_filter_->GetValue(); std::string rows_str = py_app_rows_->GetValue().ToStdString(); std::string cols_str = py_app_cols_->GetValue().ToStdString(); std::string chars_per_sync_str = @@ -1706,6 +1719,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->enable_local_beep = enable_local_beep; app_c_->use_cpu = use_cpu; app_c_->use_builtin = use_builtin; + app_c_->enable_uwu_filter = enable_uwu_filter; app_c_->gpu_idx = gpu_idx; app_c_->keybind = keybind; app_c_->Serialize(AppConfig::kConfigPath); diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 082172e..130a716 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -80,6 +80,7 @@ private: wxCheckBox* py_app_enable_local_beep_; wxCheckBox* py_app_use_cpu_; wxCheckBox* py_app_use_builtin_; + wxCheckBox* py_app_enable_uwu_filter_; wxCheckBox* unity_clear_osc_; wxCheckBox* whisper_enable_local_beep_; wxCheckBox* whisper_use_cpu_; diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 2347248..98a0c3e 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -473,6 +473,7 @@ std::future PythonWrapper::StartApp( "--window_duration_s", config.window_duration, "--cpu", config.use_cpu ? "1" : "0", "--use_builtin", config.use_builtin ? "1" : "0", + "--enable_uwu_filter", config.enable_uwu_filter ? "1" : "0", "--emotes_pickle", kEmotesPickle, "--gpu_idx", std::to_string(config.gpu_idx), "--keybind", Quote(config.keybind), diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 26eb5ea..49dcc81 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -217,7 +217,7 @@ def transcribe(audio_state, model, frames, use_cpu: bool): return "".join(s.text for s in segments) -def transcribeAudio(audio_state, model, use_cpu: bool): +def transcribeAudio(audio_state, model, use_cpu: bool, enable_uwu_filter: bool): last_transcribe_time = time.time() while audio_state.run_app == True: # Pace this out @@ -257,15 +257,17 @@ def transcribeAudio(audio_state, model, use_cpu: bool): text, window_size = 25) # Apply filters to transcription - uwu_proc = subprocess.Popen(["Resources/Uwu/Uwwwu.exe", audio_state.text], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - uwu_stdout, uwu_stderr = uwu_proc.communicate() - uwu_text = uwu_stdout.decode("utf-8") - uwu_text = uwu_text.replace("\n", "") - uwu_text = uwu_text.replace("\r", "") - - audio_state.filtered_text = uwu_text + filtered_text = audio_state.text + if enable_uwu_filter: + uwu_proc = subprocess.Popen(["Resources/Uwu/Uwwwu.exe", filtered_text], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + uwu_stdout, uwu_stderr = uwu_proc.communicate() + uwu_text = uwu_stdout.decode("utf-8") + uwu_text = uwu_text.replace("\n", "") + uwu_text = uwu_text.replace("\r", "") + filtered_text = uwu_text + audio_state.filtered_text = filtered_text now = time.time() print("Transcription ({} seconds): {}".format( @@ -475,10 +477,17 @@ def readControllerInput(audio_state, enable_local_beep: bool, # model should correspond to one of the Whisper models defined in # whisper/__init__.py. Examples: tiny, base, small, medium. -def transcribeLoop(mic: str, language: str, model: str, - enable_local_beep: bool, use_cpu: bool, use_builtin: bool, - button: str, estate: EmotesState, - window_duration_s: int, gpu_idx: int, +def transcribeLoop(mic: str, + language: str, + model: str, + enable_local_beep: bool, + use_cpu: bool, + use_builtin: bool, + enable_uwu_filter: bool, + button: str, + estate: EmotesState, + window_duration_s: int, + gpu_idx: int, keyboard_hotkey: str): audio_state = getMicStream(mic) audio_state.language = langcodes.find(language).language @@ -505,7 +514,9 @@ def transcribeLoop(mic: str, language: str, model: str, download_root = model_root, local_files_only = download_it) - transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state, model, use_cpu]) + transcribe_audio_thd = threading.Thread( + target = transcribeAudio, + args = [audio_state, model, use_cpu, enable_uwu_filter]) transcribe_audio_thd.daemon = True transcribe_audio_thd.start() @@ -569,6 +580,7 @@ if __name__ == "__main__": parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm") parser.add_argument("--cpu", type=int, help="If set to 1, use CPU instead of GPU") parser.add_argument("--use_builtin", type=int, help="If set to 1, use the text box built into the game.") + parser.add_argument("--enable_uwu_filter", type=int, help="If set to 1, transcribed text will be passed through an uwu filter :3.") parser.add_argument("--button", type=str, help="The controller button used to start/stop transcription. E.g. \"left joystick\"") parser.add_argument("--emotes_pickle", type=str, help="The path to emotes pickle. See emotes_v2.py for details.") parser.add_argument("--gpu_idx", type=str, help="The index of the GPU device to use. On single GPU systems, use 0.") @@ -619,6 +631,11 @@ if __name__ == "__main__": else: args.use_builtin = False + if args.enable_uwu_filter == 1: + args.enable_uwu_filter = True + else: + args.enable_uwu_filter = False + estate = EmotesState() estate.load(args.emotes_pickle) @@ -630,6 +647,7 @@ if __name__ == "__main__": print(f"PATH: {os.environ['PATH']}") transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep, - args.cpu, args.use_builtin, args.button, estate, window_duration_s, + args.cpu, args.use_builtin, args.enable_uwu_filter, args.button, + estate, window_duration_s, args.gpu_idx, args.keybind) -- cgit v1.2.3