diff options
| -rw-r--r-- | GUI/GUI/GUI/Config.cpp | 3 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Config.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 18 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.cpp | 1 | ||||
| -rw-r--r-- | Scripts/transcribe.py | 55 |
6 files changed, 58 insertions, 21 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index 4f730b5..d337f77 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -79,6 +79,7 @@ AppConfig::AppConfig(wxTextCtrl* out) remove_trailing_period(false),
enable_uppercase_filter(false),
enable_lowercase_filter(false),
+ enable_debug_mode(false),
reset_on_toggle(true),
gpu_idx(0),
keybind("ctrl+x"),
@@ -131,6 +132,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("remove_trailing_period", remove_trailing_period);
cm.Set("enable_uppercase_filter", enable_uppercase_filter);
cm.Set("enable_lowercase_filter", enable_lowercase_filter);
+ cm.Set("enable_debug_mode", enable_debug_mode);
cm.Set("reset_on_toggle", reset_on_toggle);
cm.Set("gpu_idx", gpu_idx);
cm.Set("keybind", keybind);
@@ -196,6 +198,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("remove_trailing_period", c.remove_trailing_period);
cm.Get("enable_uppercase_filter", c.enable_uppercase_filter);
cm.Get("enable_lowercase_filter", c.enable_lowercase_filter);
+ cm.Get("enable_debug_mode", c.enable_debug_mode);
cm.Get("reset_on_toggle", c.reset_on_toggle);
cm.Get("gpu_idx", c.gpu_idx);
cm.Get("keybind", c.keybind);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index 6711d79..e4a9bf4 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -65,6 +65,7 @@ public: bool remove_trailing_period;
bool enable_uppercase_filter;
bool enable_lowercase_filter;
+ bool enable_debug_mode;
bool reset_on_toggle;
int gpu_idx;
std::string keybind;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 1df60e7..706165b 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -47,6 +47,7 @@ namespace { ID_PY_APP_REMOVE_TRAILING_PERIOD,
ID_PY_APP_ENABLE_UPPERCASE_FILTER,
ID_PY_APP_ENABLE_LOWERCASE_FILTER,
+ ID_PY_APP_ENABLE_DEBUG_MODE,
ID_PY_APP_RESET_ON_TOGGLE,
ID_PY_APP_ROWS,
ID_PY_APP_COLS,
@@ -881,6 +882,16 @@ Frame::Frame() );
py_app_enable_lowercase_filter_ = py_app_enable_lowercase_filter;
+ auto* py_app_enable_debug_mode = new wxCheckBox(py_config_panel,
+ ID_PY_APP_ENABLE_DEBUG_MODE, "Enable debug mode");
+ py_app_enable_debug_mode->SetValue(app_c_->enable_debug_mode);
+ py_app_enable_debug_mode->SetToolTip(
+ "If checked, the transcription engine will print out "
+ "additional information. Use this if you're debugging a "
+ "technical issue."
+ );
+ py_app_enable_debug_mode_ = py_app_enable_debug_mode;
+
auto* py_app_reset_on_toggle = new wxCheckBox(py_config_panel,
ID_PY_APP_RESET_ON_TOGGLE, "Reset transcript on toggle");
py_app_reset_on_toggle->SetValue(app_c_->reset_on_toggle);
@@ -922,6 +933,8 @@ Frame::Frame() /*flags=*/wxEXPAND);
sizer->Add(py_app_enable_lowercase_filter, /*proportion=*/0,
/*flags=*/wxEXPAND);
+ sizer->Add(py_app_enable_debug_mode, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
sizer->Add(py_app_start_button, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(py_app_stop_button, /*proportion=*/0,
@@ -1440,6 +1453,9 @@ void Frame::ApplyConfigToInputFields() auto* py_app_enable_lowercase_filter = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_LOWERCASE_FILTER));
py_app_enable_lowercase_filter->SetValue(app_c_->enable_lowercase_filter);
+ auto* py_app_enable_debug_mode = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_DEBUG_MODE));
+ py_app_enable_debug_mode->SetValue(app_c_->enable_debug_mode);
+
auto* py_app_reset_on_toggle = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_RESET_ON_TOGGLE));
py_app_reset_on_toggle->SetValue(app_c_->reset_on_toggle);
@@ -2028,6 +2044,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { const bool remove_trailing_period = py_app_remove_trailing_period_->GetValue();
const bool enable_uppercase_filter = py_app_enable_uppercase_filter_->GetValue();
const bool enable_lowercase_filter = py_app_enable_lowercase_filter_->GetValue();
+ const bool enable_debug_mode = py_app_enable_debug_mode_->GetValue();
const bool reset_on_toggle = py_app_reset_on_toggle_->GetValue();
std::string rows_str = py_app_rows_->GetValue().ToStdString();
std::string cols_str = py_app_cols_->GetValue().ToStdString();
@@ -2126,6 +2143,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->remove_trailing_period = remove_trailing_period;
app_c_->enable_uppercase_filter = enable_uppercase_filter;
app_c_->enable_lowercase_filter = enable_lowercase_filter;
+ app_c_->enable_debug_mode = enable_debug_mode;
app_c_->reset_on_toggle = reset_on_toggle;
app_c_->gpu_idx = gpu_idx;
app_c_->keybind = keybind;
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 7afc005..46f5bcd 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -69,6 +69,7 @@ private: wxCheckBox* py_app_remove_trailing_period_;
wxCheckBox* py_app_enable_uppercase_filter_;
wxCheckBox* py_app_enable_lowercase_filter_;
+ wxCheckBox* py_app_enable_debug_mode_;
wxCheckBox* py_app_reset_on_toggle_;
wxCheckBox* unity_clear_osc_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index e6f10c2..c5421e8 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -493,6 +493,7 @@ std::future<bool> PythonWrapper::StartApp( "--remove_trailing_period", config.remove_trailing_period ? "1" : "0", "--enable_uppercase_filter", config.enable_uppercase_filter ? "1" : "0", "--enable_lowercase_filter", config.enable_lowercase_filter ? "1" : "0", + "--enable_debug_mode", config.enable_debug_mode ? "1" : "0", "--emotes_pickle", kEmotesPickle, "--gpu_idx", std::to_string(config.gpu_idx), "--keybind", Quote(config.keybind), diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index cc1944c..28b6ca0 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -38,6 +38,9 @@ class AudioState: # This matches the framerate expected by whisper. self.RATE = 16000 + # If set, print additional information to stdout while transcribing. + self.enable_debug_mode = False + # The maximum length that recordAudio() will put into frames before it # starts dropping from the start. self.MAX_LENGTH_S = 300 @@ -211,7 +214,8 @@ def resetAudioLocked(audio_state): audio_state.transcribe_sleep_duration_min_s if audio_state.reset_on_toggle: - print("resetAudioLocked resetting text") + if audio_state.enable_debug_mode: + print("resetAudioLocked resetting text") audio_state.text = "" audio_state.preview_text = "" audio_state.filtered_text = "" @@ -244,7 +248,8 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st without_timestamps = False) ranges = [] for s in segments: - #print(f"Segment: {s}") + if audio_state.enable_debug_mode: + print(f"Segment: {s}") ranges.append((s.start, s.end, s.text)) audio_state.ranges_ls.append(ranges) @@ -270,13 +275,14 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st max_edit = audio_state.commit_fuzz_threshold - #print(f"c0: {c0}, c1: {c1}, c2: {c2}") - #if c0 == c1 and c1 == c2 and c2 == c3: + if audio_state.enable_debug_mode: + print(f"c0: {c0}, c1: {c1}, c2: {c2}") if c0_c1_d < max_edit and c1_c2_d < max_edit and c2_c3_d < max_edit: # For simplicity, completely reset saved audio ranges. audio_state.ranges_ls = [] committed_text = c0[2] - print(f"Dropping frames until {c0[1]}") + if audio_state.enable_debug_mode: + print(f"Dropping frames until {c0[1]}") n_frames_to_drop = int(ceil(audio_state.RATE * c0[1])) audio_state.drop_frames_till_i = n_frames_to_drop @@ -320,7 +326,8 @@ def transcribeAudio(audio_state, text, preview_text = transcribe(audio_state, model, audio_state.frames, use_cpu) if len(text) == 0 and len(preview_text) == 0: - print("no transcription, spin ({} seconds)".format(time.time() - last_transcribe_time)) + if audio_state.enable_debug_mode: + print("no transcription, spin ({} seconds)".format(time.time() - last_transcribe_time)) last_transcribe_time = time.time() continue @@ -329,7 +336,8 @@ def transcribeAudio(audio_state, audio_state.text = "" audio_state.preview_text = "" audio_state.filtered_text = "" - print("drop transcription ({} seconds)".format(time.time() - last_transcribe_time)) + if audio_state.enable_debug_mode: + print("drop transcription ({} seconds)".format(time.time() - last_transcribe_time)) last_transcribe_time = time.time() continue @@ -436,13 +444,14 @@ def readKeyboardInput(audio_state, enable_local_beep: bool, if not use_builtin: osc_ctrl.indicateSpeech(audio_state.osc_state.client, False) osc_ctrl.toggleBoard(audio_state.osc_state.client, False) - #playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav")) if audio_state.reset_on_toggle: - print("Toggle detected, dropping transcript (-2)") + if audio_state.enable_debug_mode: + print("Toggle detected, dropping transcript (1)") audio_state.drop_transcription = True else: - print("Toggle detected, committing preview text (2)") + if audio_state.enable_debug_mode: + print("Toggle detected, committing preview text (1)") audio_state.text += audio_state.preview_text audio_state.audio_paused = True resetAudioLocked(audio_state) @@ -458,7 +467,6 @@ def readKeyboardInput(audio_state, enable_local_beep: bool, audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s audio_state.audio_paused = True - resetAudioLocked(audio_state) if enable_local_beep == 1: playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), @@ -470,10 +478,12 @@ def readKeyboardInput(audio_state, enable_local_beep: bool, osc_ctrl.toggleBoard(audio_state.osc_state.client, True) osc_ctrl.lockWorld(audio_state.osc_state.client, False) if audio_state.reset_on_toggle: - print("Toggle detected, dropping transcript (2)") + if audio_state.enable_debug_mode: + print("Toggle detected, dropping transcript (2)") audio_state.drop_transcription = True else: - print("Toggle detected, committing preview text (2)") + if audio_state.enable_debug_mode: + print("Toggle detected, committing preview text (2)") audio_state.text += audio_state.preview_text audio_state.audio_paused = False @@ -492,7 +502,8 @@ def readControllerInput(audio_state, enable_local_beep: bool, try: session = steamvr.SessionState() except: - print("steamvr is off, no controller input") + if audio_state.enable_debug_mode: + print("steamvr is off, no controller input") session = None time.sleep(5) @@ -607,6 +618,7 @@ def transcribeLoop(mic: str, remove_trailing_period: bool, enable_uppercase_filter: bool, enable_lowercase_filter: bool, + enable_debug_mode: bool, button: str, estate: EmotesState, window_duration_s: int, @@ -620,6 +632,7 @@ def transcribeLoop(mic: str, audio_state.MAX_LENGTH_S = window_duration_s audio_state.reset_on_toggle = reset_on_toggle audio_state.commit_fuzz_threshold = commit_fuzz_threshold + audio_state.enable_debug_mode = enable_debug_mode lang_bits = language_target.split(" | ") if len(lang_bits) == 2: @@ -721,7 +734,6 @@ def transcribeLoop(mic: str, keyboard_input_thd.daemon = True keyboard_input_thd.start() - print("Press enter to start a new message.") for line in sys.stdin: audio_state.transcribe_lock.acquire() audio_state.audio_lock.acquire() @@ -745,12 +757,6 @@ if __name__ == "__main__": print("args: {}".format(" ".join(sys.argv))) - # Set cwd to TaSTT/ - abspath = os.path.abspath(__file__) - dname = os.path.dirname(abspath) - dname = os.path.dirname(dname) - dname = os.path.dirname(dname) - #os.chdir(dname) print(f"Set cwd to {os.getcwd()}") parser = argparse.ArgumentParser() @@ -780,6 +786,7 @@ if __name__ == "__main__": parser.add_argument("--keybind", type=str, help="The keyboard hotkey to use to toggle transcription. For example, ctrl+shift+s") parser.add_argument("--reset_on_toggle", type=int, help="Whether to reset (clear) the transcript every time that transcription is toggled on.") parser.add_argument("--commit_fuzz_threshold", type=int, help="The edit distance under which two consecutive transcripts are considered to match.") + parser.add_argument("--enable_debug_mode", type=int, help="If set to 1, print additional information to stdout while transcribing.") args = parser.parse_args() if not args.mic: @@ -863,6 +870,11 @@ if __name__ == "__main__": else: args.enable_lowercase_filter = False + if args.enable_debug_mode == 1: + args.enable_debug_mode = True + else: + args.enable_debug_mode = False + estate = EmotesState() estate.load(args.emotes_pickle) @@ -884,6 +896,7 @@ if __name__ == "__main__": args.remove_trailing_period, args.enable_uppercase_filter, args.enable_lowercase_filter, + args.enable_debug_mode, args.button, estate, window_duration_s, args.gpu_idx, |
