summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--GUI/GUI/GUI/Config.cpp3
-rw-r--r--GUI/GUI/GUI/Config.h1
-rw-r--r--GUI/GUI/GUI/Frame.cpp18
-rw-r--r--GUI/GUI/GUI/Frame.h1
-rw-r--r--GUI/GUI/GUI/PythonWrapper.cpp1
-rw-r--r--Scripts/transcribe.py55
6 files changed, 58 insertions, 21 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
index 4f730b5..d337f77 100644
--- a/GUI/GUI/GUI/Config.cpp
+++ b/GUI/GUI/GUI/Config.cpp
@@ -79,6 +79,7 @@ AppConfig::AppConfig(wxTextCtrl* out)
remove_trailing_period(false),
enable_uppercase_filter(false),
enable_lowercase_filter(false),
+ enable_debug_mode(false),
reset_on_toggle(true),
gpu_idx(0),
keybind("ctrl+x"),
@@ -131,6 +132,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) {
cm.Set("remove_trailing_period", remove_trailing_period);
cm.Set("enable_uppercase_filter", enable_uppercase_filter);
cm.Set("enable_lowercase_filter", enable_lowercase_filter);
+ cm.Set("enable_debug_mode", enable_debug_mode);
cm.Set("reset_on_toggle", reset_on_toggle);
cm.Set("gpu_idx", gpu_idx);
cm.Set("keybind", keybind);
@@ -196,6 +198,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) {
cm.Get("remove_trailing_period", c.remove_trailing_period);
cm.Get("enable_uppercase_filter", c.enable_uppercase_filter);
cm.Get("enable_lowercase_filter", c.enable_lowercase_filter);
+ cm.Get("enable_debug_mode", c.enable_debug_mode);
cm.Get("reset_on_toggle", c.reset_on_toggle);
cm.Get("gpu_idx", c.gpu_idx);
cm.Get("keybind", c.keybind);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
index 6711d79..e4a9bf4 100644
--- a/GUI/GUI/GUI/Config.h
+++ b/GUI/GUI/GUI/Config.h
@@ -65,6 +65,7 @@ public:
bool remove_trailing_period;
bool enable_uppercase_filter;
bool enable_lowercase_filter;
+ bool enable_debug_mode;
bool reset_on_toggle;
int gpu_idx;
std::string keybind;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 1df60e7..706165b 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -47,6 +47,7 @@ namespace {
ID_PY_APP_REMOVE_TRAILING_PERIOD,
ID_PY_APP_ENABLE_UPPERCASE_FILTER,
ID_PY_APP_ENABLE_LOWERCASE_FILTER,
+ ID_PY_APP_ENABLE_DEBUG_MODE,
ID_PY_APP_RESET_ON_TOGGLE,
ID_PY_APP_ROWS,
ID_PY_APP_COLS,
@@ -881,6 +882,16 @@ Frame::Frame()
);
py_app_enable_lowercase_filter_ = py_app_enable_lowercase_filter;
+ auto* py_app_enable_debug_mode = new wxCheckBox(py_config_panel,
+ ID_PY_APP_ENABLE_DEBUG_MODE, "Enable debug mode");
+ py_app_enable_debug_mode->SetValue(app_c_->enable_debug_mode);
+ py_app_enable_debug_mode->SetToolTip(
+ "If checked, the transcription engine will print out "
+ "additional information. Use this if you're debugging a "
+ "technical issue."
+ );
+ py_app_enable_debug_mode_ = py_app_enable_debug_mode;
+
auto* py_app_reset_on_toggle = new wxCheckBox(py_config_panel,
ID_PY_APP_RESET_ON_TOGGLE, "Reset transcript on toggle");
py_app_reset_on_toggle->SetValue(app_c_->reset_on_toggle);
@@ -922,6 +933,8 @@ Frame::Frame()
/*flags=*/wxEXPAND);
sizer->Add(py_app_enable_lowercase_filter, /*proportion=*/0,
/*flags=*/wxEXPAND);
+ sizer->Add(py_app_enable_debug_mode, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
sizer->Add(py_app_start_button, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(py_app_stop_button, /*proportion=*/0,
@@ -1440,6 +1453,9 @@ void Frame::ApplyConfigToInputFields()
auto* py_app_enable_lowercase_filter = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_LOWERCASE_FILTER));
py_app_enable_lowercase_filter->SetValue(app_c_->enable_lowercase_filter);
+ auto* py_app_enable_debug_mode = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_DEBUG_MODE));
+ py_app_enable_debug_mode->SetValue(app_c_->enable_debug_mode);
+
auto* py_app_reset_on_toggle = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_RESET_ON_TOGGLE));
py_app_reset_on_toggle->SetValue(app_c_->reset_on_toggle);
@@ -2028,6 +2044,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
const bool remove_trailing_period = py_app_remove_trailing_period_->GetValue();
const bool enable_uppercase_filter = py_app_enable_uppercase_filter_->GetValue();
const bool enable_lowercase_filter = py_app_enable_lowercase_filter_->GetValue();
+ const bool enable_debug_mode = py_app_enable_debug_mode_->GetValue();
const bool reset_on_toggle = py_app_reset_on_toggle_->GetValue();
std::string rows_str = py_app_rows_->GetValue().ToStdString();
std::string cols_str = py_app_cols_->GetValue().ToStdString();
@@ -2126,6 +2143,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
app_c_->remove_trailing_period = remove_trailing_period;
app_c_->enable_uppercase_filter = enable_uppercase_filter;
app_c_->enable_lowercase_filter = enable_lowercase_filter;
+ app_c_->enable_debug_mode = enable_debug_mode;
app_c_->reset_on_toggle = reset_on_toggle;
app_c_->gpu_idx = gpu_idx;
app_c_->keybind = keybind;
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 7afc005..46f5bcd 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -69,6 +69,7 @@ private:
wxCheckBox* py_app_remove_trailing_period_;
wxCheckBox* py_app_enable_uppercase_filter_;
wxCheckBox* py_app_enable_lowercase_filter_;
+ wxCheckBox* py_app_enable_debug_mode_;
wxCheckBox* py_app_reset_on_toggle_;
wxCheckBox* unity_clear_osc_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index e6f10c2..c5421e8 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -493,6 +493,7 @@ std::future<bool> PythonWrapper::StartApp(
"--remove_trailing_period", config.remove_trailing_period ? "1" : "0",
"--enable_uppercase_filter", config.enable_uppercase_filter ? "1" : "0",
"--enable_lowercase_filter", config.enable_lowercase_filter ? "1" : "0",
+ "--enable_debug_mode", config.enable_debug_mode ? "1" : "0",
"--emotes_pickle", kEmotesPickle,
"--gpu_idx", std::to_string(config.gpu_idx),
"--keybind", Quote(config.keybind),
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index cc1944c..28b6ca0 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -38,6 +38,9 @@ class AudioState:
# This matches the framerate expected by whisper.
self.RATE = 16000
+ # If set, print additional information to stdout while transcribing.
+ self.enable_debug_mode = False
+
# The maximum length that recordAudio() will put into frames before it
# starts dropping from the start.
self.MAX_LENGTH_S = 300
@@ -211,7 +214,8 @@ def resetAudioLocked(audio_state):
audio_state.transcribe_sleep_duration_min_s
if audio_state.reset_on_toggle:
- print("resetAudioLocked resetting text")
+ if audio_state.enable_debug_mode:
+ print("resetAudioLocked resetting text")
audio_state.text = ""
audio_state.preview_text = ""
audio_state.filtered_text = ""
@@ -244,7 +248,8 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st
without_timestamps = False)
ranges = []
for s in segments:
- #print(f"Segment: {s}")
+ if audio_state.enable_debug_mode:
+ print(f"Segment: {s}")
ranges.append((s.start, s.end, s.text))
audio_state.ranges_ls.append(ranges)
@@ -270,13 +275,14 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st
max_edit = audio_state.commit_fuzz_threshold
- #print(f"c0: {c0}, c1: {c1}, c2: {c2}")
- #if c0 == c1 and c1 == c2 and c2 == c3:
+ if audio_state.enable_debug_mode:
+ print(f"c0: {c0}, c1: {c1}, c2: {c2}")
if c0_c1_d < max_edit and c1_c2_d < max_edit and c2_c3_d < max_edit:
# For simplicity, completely reset saved audio ranges.
audio_state.ranges_ls = []
committed_text = c0[2]
- print(f"Dropping frames until {c0[1]}")
+ if audio_state.enable_debug_mode:
+ print(f"Dropping frames until {c0[1]}")
n_frames_to_drop = int(ceil(audio_state.RATE * c0[1]))
audio_state.drop_frames_till_i = n_frames_to_drop
@@ -320,7 +326,8 @@ def transcribeAudio(audio_state,
text, preview_text = transcribe(audio_state, model, audio_state.frames, use_cpu)
if len(text) == 0 and len(preview_text) == 0:
- print("no transcription, spin ({} seconds)".format(time.time() - last_transcribe_time))
+ if audio_state.enable_debug_mode:
+ print("no transcription, spin ({} seconds)".format(time.time() - last_transcribe_time))
last_transcribe_time = time.time()
continue
@@ -329,7 +336,8 @@ def transcribeAudio(audio_state,
audio_state.text = ""
audio_state.preview_text = ""
audio_state.filtered_text = ""
- print("drop transcription ({} seconds)".format(time.time() - last_transcribe_time))
+ if audio_state.enable_debug_mode:
+ print("drop transcription ({} seconds)".format(time.time() - last_transcribe_time))
last_transcribe_time = time.time()
continue
@@ -436,13 +444,14 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
if not use_builtin:
osc_ctrl.indicateSpeech(audio_state.osc_state.client, False)
osc_ctrl.toggleBoard(audio_state.osc_state.client, False)
- #playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav"))
if audio_state.reset_on_toggle:
- print("Toggle detected, dropping transcript (-2)")
+ if audio_state.enable_debug_mode:
+ print("Toggle detected, dropping transcript (1)")
audio_state.drop_transcription = True
else:
- print("Toggle detected, committing preview text (2)")
+ if audio_state.enable_debug_mode:
+ print("Toggle detected, committing preview text (1)")
audio_state.text += audio_state.preview_text
audio_state.audio_paused = True
resetAudioLocked(audio_state)
@@ -458,7 +467,6 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
audio_state.audio_paused = True
- resetAudioLocked(audio_state)
if enable_local_beep == 1:
playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"),
@@ -470,10 +478,12 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
osc_ctrl.toggleBoard(audio_state.osc_state.client, True)
osc_ctrl.lockWorld(audio_state.osc_state.client, False)
if audio_state.reset_on_toggle:
- print("Toggle detected, dropping transcript (2)")
+ if audio_state.enable_debug_mode:
+ print("Toggle detected, dropping transcript (2)")
audio_state.drop_transcription = True
else:
- print("Toggle detected, committing preview text (2)")
+ if audio_state.enable_debug_mode:
+ print("Toggle detected, committing preview text (2)")
audio_state.text += audio_state.preview_text
audio_state.audio_paused = False
@@ -492,7 +502,8 @@ def readControllerInput(audio_state, enable_local_beep: bool,
try:
session = steamvr.SessionState()
except:
- print("steamvr is off, no controller input")
+ if audio_state.enable_debug_mode:
+ print("steamvr is off, no controller input")
session = None
time.sleep(5)
@@ -607,6 +618,7 @@ def transcribeLoop(mic: str,
remove_trailing_period: bool,
enable_uppercase_filter: bool,
enable_lowercase_filter: bool,
+ enable_debug_mode: bool,
button: str,
estate: EmotesState,
window_duration_s: int,
@@ -620,6 +632,7 @@ def transcribeLoop(mic: str,
audio_state.MAX_LENGTH_S = window_duration_s
audio_state.reset_on_toggle = reset_on_toggle
audio_state.commit_fuzz_threshold = commit_fuzz_threshold
+ audio_state.enable_debug_mode = enable_debug_mode
lang_bits = language_target.split(" | ")
if len(lang_bits) == 2:
@@ -721,7 +734,6 @@ def transcribeLoop(mic: str,
keyboard_input_thd.daemon = True
keyboard_input_thd.start()
- print("Press enter to start a new message.")
for line in sys.stdin:
audio_state.transcribe_lock.acquire()
audio_state.audio_lock.acquire()
@@ -745,12 +757,6 @@ if __name__ == "__main__":
print("args: {}".format(" ".join(sys.argv)))
- # Set cwd to TaSTT/
- abspath = os.path.abspath(__file__)
- dname = os.path.dirname(abspath)
- dname = os.path.dirname(dname)
- dname = os.path.dirname(dname)
- #os.chdir(dname)
print(f"Set cwd to {os.getcwd()}")
parser = argparse.ArgumentParser()
@@ -780,6 +786,7 @@ if __name__ == "__main__":
parser.add_argument("--keybind", type=str, help="The keyboard hotkey to use to toggle transcription. For example, ctrl+shift+s")
parser.add_argument("--reset_on_toggle", type=int, help="Whether to reset (clear) the transcript every time that transcription is toggled on.")
parser.add_argument("--commit_fuzz_threshold", type=int, help="The edit distance under which two consecutive transcripts are considered to match.")
+ parser.add_argument("--enable_debug_mode", type=int, help="If set to 1, print additional information to stdout while transcribing.")
args = parser.parse_args()
if not args.mic:
@@ -863,6 +870,11 @@ if __name__ == "__main__":
else:
args.enable_lowercase_filter = False
+ if args.enable_debug_mode == 1:
+ args.enable_debug_mode = True
+ else:
+ args.enable_debug_mode = False
+
estate = EmotesState()
estate.load(args.emotes_pickle)
@@ -884,6 +896,7 @@ if __name__ == "__main__":
args.remove_trailing_period,
args.enable_uppercase_filter,
args.enable_lowercase_filter,
+ args.enable_debug_mode,
args.button,
estate, window_duration_s,
args.gpu_idx,