diff options
| -rw-r--r-- | GUI/GUI/GUI/Config.cpp | 3 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Config.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 18 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.cpp | 1 | ||||
| -rw-r--r-- | Scripts/transcribe.py | 51 |
6 files changed, 60 insertions, 15 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index c549843..c851983 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -78,6 +78,7 @@ AppConfig::AppConfig(wxTextCtrl* out) remove_trailing_period(false),
enable_uppercase_filter(false),
enable_lowercase_filter(false),
+ reset_on_toggle(true),
gpu_idx(0),
keybind("ctrl+x"),
@@ -128,6 +129,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("remove_trailing_period", remove_trailing_period);
cm.Set("enable_uppercase_filter", enable_uppercase_filter);
cm.Set("enable_lowercase_filter", enable_lowercase_filter);
+ cm.Set("reset_on_toggle", reset_on_toggle);
cm.Set("gpu_idx", gpu_idx);
cm.Set("keybind", keybind);
@@ -191,6 +193,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("remove_trailing_period", c.remove_trailing_period);
cm.Get("enable_uppercase_filter", c.enable_uppercase_filter);
cm.Get("enable_lowercase_filter", c.enable_lowercase_filter);
+ cm.Get("reset_on_toggle", c.reset_on_toggle);
cm.Get("gpu_idx", c.gpu_idx);
cm.Get("keybind", c.keybind);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index dd7e47a..d71aeb4 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -64,6 +64,7 @@ public: bool remove_trailing_period;
bool enable_uppercase_filter;
bool enable_lowercase_filter;
+ bool reset_on_toggle;
int gpu_idx;
std::string keybind;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 76b85ae..f4e99b9 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -47,6 +47,7 @@ namespace { ID_PY_APP_REMOVE_TRAILING_PERIOD,
ID_PY_APP_ENABLE_UPPERCASE_FILTER,
ID_PY_APP_ENABLE_LOWERCASE_FILTER,
+ ID_PY_APP_RESET_ON_TOGGLE,
ID_PY_APP_ROWS,
ID_PY_APP_COLS,
ID_PY_APP_GPU_IDX,
@@ -860,6 +861,16 @@ Frame::Frame() );
py_app_enable_lowercase_filter_ = py_app_enable_lowercase_filter;
+ auto* py_app_reset_on_toggle = new wxCheckBox(py_config_panel,
+ ID_PY_APP_RESET_ON_TOGGLE, "Reset transcript on toggle");
+ py_app_reset_on_toggle->SetValue(app_c_->reset_on_toggle);
+ py_app_reset_on_toggle->SetToolTip(
+ "If checked, the transcript will be reset (cleared) every "
+ "time that transcription is toggled on. Only affects "
+ "keyboard controls, not the VR controls."
+ );
+ py_app_reset_on_toggle_ = py_app_reset_on_toggle;
+
// Hack: Add newlines before and after the button text to make
// the buttons bigger, and easier to click from inside VR.
auto* py_app_start_button = new wxButton(py_config_panel,
@@ -873,6 +884,8 @@ Frame::Frame() /*flags=*/wxEXPAND);
sizer->Add(py_app_config_panel_pairs, /*proportion=*/0,
/*flags=*/wxEXPAND);
+ sizer->Add(py_app_reset_on_toggle, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
sizer->Add(py_app_enable_browser_src, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(py_app_enable_local_beep, /*proportion=*/0,
@@ -1403,6 +1416,9 @@ void Frame::ApplyConfigToInputFields() auto* py_app_enable_lowercase_filter = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_LOWERCASE_FILTER));
py_app_enable_lowercase_filter->SetValue(app_c_->enable_lowercase_filter);
+ auto* py_app_reset_on_toggle = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_RESET_ON_TOGGLE));
+ py_app_reset_on_toggle->SetValue(app_c_->reset_on_toggle);
+
// Unity panel
auto* unity_chars_per_sync = static_cast<wxChoice*>(FindWindowById(ID_UNITY_CHARS_PER_SYNC));
unity_chars_per_sync->SetSelection(chars_idx);
@@ -1988,6 +2004,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { const bool remove_trailing_period = py_app_remove_trailing_period_->GetValue();
const bool enable_uppercase_filter = py_app_enable_uppercase_filter_->GetValue();
const bool enable_lowercase_filter = py_app_enable_lowercase_filter_->GetValue();
+ const bool reset_on_toggle = py_app_reset_on_toggle_->GetValue();
std::string rows_str = py_app_rows_->GetValue().ToStdString();
std::string cols_str = py_app_cols_->GetValue().ToStdString();
std::string chars_per_sync_str =
@@ -2062,6 +2079,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->remove_trailing_period = remove_trailing_period;
app_c_->enable_uppercase_filter = enable_uppercase_filter;
app_c_->enable_lowercase_filter = enable_lowercase_filter;
+ app_c_->reset_on_toggle = reset_on_toggle;
app_c_->gpu_idx = gpu_idx;
app_c_->keybind = keybind;
app_c_->Serialize(AppConfig::kConfigPath);
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index ede2afc..1856e7d 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -68,6 +68,7 @@ private: wxCheckBox* py_app_remove_trailing_period_;
wxCheckBox* py_app_enable_uppercase_filter_;
wxCheckBox* py_app_enable_lowercase_filter_;
+ wxCheckBox* py_app_reset_on_toggle_;
wxCheckBox* unity_clear_osc_;
std::future<bool> py_app_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 9855b00..1402ed5 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -496,6 +496,7 @@ std::future<bool> PythonWrapper::StartApp( "--emotes_pickle", kEmotesPickle, "--gpu_idx", std::to_string(config.gpu_idx), "--keybind", Quote(config.keybind), + "--reset_on_toggle", config.reset_on_toggle ? "1" : "0", }, std::move(out_cb), std::move(in_cb), diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 8fe6190..694fd0b 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -49,9 +49,15 @@ class AudioState: # PyAudio stream object self.stream = None - self.committed_text = "" + self.preview_text = "" self.text = "" self.filtered_text = "" + + # If set to true, then the transcript strings (`text` and friends) will + # be reset whenever transcription is toggled on. At time of writing, + # this only applies to keyboard controls. + self.reset_on_toggle = True + # List of: # List of tuples of: # Segment start time, end time, and text @@ -198,20 +204,15 @@ def resetAudioLocked(audio_state): audio_state.transcribe_sleep_duration = \ audio_state.transcribe_sleep_duration_min_s - audio_state.text = "" - audio_state.preview_text = "" - audio_state.filtered_text = "" + if audio_state.reset_on_toggle: + print("resetAudioLocked resetting text") + audio_state.text = "" + audio_state.preview_text = "" + audio_state.filtered_text = "" def resetDisplayLocked(audio_state): osc_ctrl.clear(audio_state.osc_state) -def resetAudio(audio_state): - audio_state.transcribe_lock.acquire() - audio_state.audio_lock.acquire() - resetAudioLocked(audio_state) - audio_state.audio_lock.release() - audio_state.transcribe_lock.release() - # Transcribe the audio recorded in a file. # Returns two strings: committed text, and preview text. # Committed text is temporally stable. Preview text is *not* temporally stable, @@ -422,7 +423,12 @@ def readKeyboardInput(audio_state, enable_local_beep: bool, osc_ctrl.toggleBoard(audio_state.osc_state.client, False) #playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav")) - audio_state.drop_transcription = True + if audio_state.reset_on_toggle: + print("Toggle detected, dropping transcript (-2)") + audio_state.drop_transcription = True + else: + print("Toggle detected, committing preview text (2)") + audio_state.text += audio_state.preview_text audio_state.audio_paused = True resetAudioLocked(audio_state) resetDisplayLocked(audio_state) @@ -448,7 +454,12 @@ def readKeyboardInput(audio_state, enable_local_beep: bool, osc_ctrl.indicateSpeech(audio_state.osc_state.client, True) osc_ctrl.toggleBoard(audio_state.osc_state.client, True) osc_ctrl.lockWorld(audio_state.osc_state.client, False) - audio_state.drop_transcription = True + if audio_state.reset_on_toggle: + print("Toggle detected, dropping transcript (2)") + audio_state.drop_transcription = True + else: + print("Toggle detected, committing preview text (2)") + audio_state.text += audio_state.preview_text audio_state.audio_paused = False resetAudioLocked(audio_state) @@ -585,11 +596,13 @@ def transcribeLoop(mic: str, estate: EmotesState, window_duration_s: int, gpu_idx: int, - keyboard_hotkey: str): + keyboard_hotkey: str, + reset_on_toggle: bool): audio_state = getMicStream(mic) audio_state.whisper_language = language audio_state.language = langcodes.find(language).language audio_state.MAX_LENGTH_S = window_duration_s + audio_state.reset_on_toggle = reset_on_toggle lang_bits = language_target.split(" | ") if len(lang_bits) == 2: @@ -748,6 +761,7 @@ if __name__ == "__main__": parser.add_argument("--emotes_pickle", type=str, help="The path to emotes pickle. See emotes_v2.py for details.") parser.add_argument("--gpu_idx", type=str, help="The index of the GPU device to use. On single GPU systems, use 0.") parser.add_argument("--keybind", type=str, help="The keyboard hotkey to use to toggle transcription. For example, ctrl+shift+s") + parser.add_argument("--reset_on_toggle", type=int, help="Whether to reset (clear) the transcript every time that transcription is toggled on.") args = parser.parse_args() if not args.mic: @@ -796,6 +810,11 @@ if __name__ == "__main__": else: args.cpu = False + if args.reset_on_toggle == 1: + args.reset_on_toggle = True + else: + args.reset_on_toggle = False + if args.use_builtin == 1: args.use_builtin = True else: @@ -844,5 +863,7 @@ if __name__ == "__main__": args.enable_lowercase_filter, args.button, estate, window_duration_s, - args.gpu_idx, args.keybind) + args.gpu_idx, + args.keybind, + args.reset_on_toggle) |
