summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--GUI/GUI/GUI/Config.cpp3
-rw-r--r--GUI/GUI/GUI/Config.h1
-rw-r--r--GUI/GUI/GUI/Frame.cpp18
-rw-r--r--GUI/GUI/GUI/Frame.h1
-rw-r--r--GUI/GUI/GUI/PythonWrapper.cpp1
-rw-r--r--Scripts/transcribe.py51
6 files changed, 60 insertions, 15 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
index c549843..c851983 100644
--- a/GUI/GUI/GUI/Config.cpp
+++ b/GUI/GUI/GUI/Config.cpp
@@ -78,6 +78,7 @@ AppConfig::AppConfig(wxTextCtrl* out)
remove_trailing_period(false),
enable_uppercase_filter(false),
enable_lowercase_filter(false),
+ reset_on_toggle(true),
gpu_idx(0),
keybind("ctrl+x"),
@@ -128,6 +129,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) {
cm.Set("remove_trailing_period", remove_trailing_period);
cm.Set("enable_uppercase_filter", enable_uppercase_filter);
cm.Set("enable_lowercase_filter", enable_lowercase_filter);
+ cm.Set("reset_on_toggle", reset_on_toggle);
cm.Set("gpu_idx", gpu_idx);
cm.Set("keybind", keybind);
@@ -191,6 +193,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) {
cm.Get("remove_trailing_period", c.remove_trailing_period);
cm.Get("enable_uppercase_filter", c.enable_uppercase_filter);
cm.Get("enable_lowercase_filter", c.enable_lowercase_filter);
+ cm.Get("reset_on_toggle", c.reset_on_toggle);
cm.Get("gpu_idx", c.gpu_idx);
cm.Get("keybind", c.keybind);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
index dd7e47a..d71aeb4 100644
--- a/GUI/GUI/GUI/Config.h
+++ b/GUI/GUI/GUI/Config.h
@@ -64,6 +64,7 @@ public:
bool remove_trailing_period;
bool enable_uppercase_filter;
bool enable_lowercase_filter;
+ bool reset_on_toggle;
int gpu_idx;
std::string keybind;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 76b85ae..f4e99b9 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -47,6 +47,7 @@ namespace {
ID_PY_APP_REMOVE_TRAILING_PERIOD,
ID_PY_APP_ENABLE_UPPERCASE_FILTER,
ID_PY_APP_ENABLE_LOWERCASE_FILTER,
+ ID_PY_APP_RESET_ON_TOGGLE,
ID_PY_APP_ROWS,
ID_PY_APP_COLS,
ID_PY_APP_GPU_IDX,
@@ -860,6 +861,16 @@ Frame::Frame()
);
py_app_enable_lowercase_filter_ = py_app_enable_lowercase_filter;
+ auto* py_app_reset_on_toggle = new wxCheckBox(py_config_panel,
+ ID_PY_APP_RESET_ON_TOGGLE, "Reset transcript on toggle");
+ py_app_reset_on_toggle->SetValue(app_c_->reset_on_toggle);
+ py_app_reset_on_toggle->SetToolTip(
+ "If checked, the transcript will be reset (cleared) every "
+ "time that transcription is toggled on. Only affects "
+ "keyboard controls, not the VR controls."
+ );
+ py_app_reset_on_toggle_ = py_app_reset_on_toggle;
+
// Hack: Add newlines before and after the button text to make
// the buttons bigger, and easier to click from inside VR.
auto* py_app_start_button = new wxButton(py_config_panel,
@@ -873,6 +884,8 @@ Frame::Frame()
/*flags=*/wxEXPAND);
sizer->Add(py_app_config_panel_pairs, /*proportion=*/0,
/*flags=*/wxEXPAND);
+ sizer->Add(py_app_reset_on_toggle, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
sizer->Add(py_app_enable_browser_src, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(py_app_enable_local_beep, /*proportion=*/0,
@@ -1403,6 +1416,9 @@ void Frame::ApplyConfigToInputFields()
auto* py_app_enable_lowercase_filter = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_LOWERCASE_FILTER));
py_app_enable_lowercase_filter->SetValue(app_c_->enable_lowercase_filter);
+ auto* py_app_reset_on_toggle = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_RESET_ON_TOGGLE));
+ py_app_reset_on_toggle->SetValue(app_c_->reset_on_toggle);
+
// Unity panel
auto* unity_chars_per_sync = static_cast<wxChoice*>(FindWindowById(ID_UNITY_CHARS_PER_SYNC));
unity_chars_per_sync->SetSelection(chars_idx);
@@ -1988,6 +2004,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
const bool remove_trailing_period = py_app_remove_trailing_period_->GetValue();
const bool enable_uppercase_filter = py_app_enable_uppercase_filter_->GetValue();
const bool enable_lowercase_filter = py_app_enable_lowercase_filter_->GetValue();
+ const bool reset_on_toggle = py_app_reset_on_toggle_->GetValue();
std::string rows_str = py_app_rows_->GetValue().ToStdString();
std::string cols_str = py_app_cols_->GetValue().ToStdString();
std::string chars_per_sync_str =
@@ -2062,6 +2079,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
app_c_->remove_trailing_period = remove_trailing_period;
app_c_->enable_uppercase_filter = enable_uppercase_filter;
app_c_->enable_lowercase_filter = enable_lowercase_filter;
+ app_c_->reset_on_toggle = reset_on_toggle;
app_c_->gpu_idx = gpu_idx;
app_c_->keybind = keybind;
app_c_->Serialize(AppConfig::kConfigPath);
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index ede2afc..1856e7d 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -68,6 +68,7 @@ private:
wxCheckBox* py_app_remove_trailing_period_;
wxCheckBox* py_app_enable_uppercase_filter_;
wxCheckBox* py_app_enable_lowercase_filter_;
+ wxCheckBox* py_app_reset_on_toggle_;
wxCheckBox* unity_clear_osc_;
std::future<bool> py_app_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index 9855b00..1402ed5 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -496,6 +496,7 @@ std::future<bool> PythonWrapper::StartApp(
"--emotes_pickle", kEmotesPickle,
"--gpu_idx", std::to_string(config.gpu_idx),
"--keybind", Quote(config.keybind),
+ "--reset_on_toggle", config.reset_on_toggle ? "1" : "0",
},
std::move(out_cb),
std::move(in_cb),
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 8fe6190..694fd0b 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -49,9 +49,15 @@ class AudioState:
# PyAudio stream object
self.stream = None
- self.committed_text = ""
+ self.preview_text = ""
self.text = ""
self.filtered_text = ""
+
+ # If set to true, then the transcript strings (`text` and friends) will
+ # be reset whenever transcription is toggled on. At time of writing,
+ # this only applies to keyboard controls.
+ self.reset_on_toggle = True
+
# List of:
# List of tuples of:
# Segment start time, end time, and text
@@ -198,20 +204,15 @@ def resetAudioLocked(audio_state):
audio_state.transcribe_sleep_duration = \
audio_state.transcribe_sleep_duration_min_s
- audio_state.text = ""
- audio_state.preview_text = ""
- audio_state.filtered_text = ""
+ if audio_state.reset_on_toggle:
+ print("resetAudioLocked resetting text")
+ audio_state.text = ""
+ audio_state.preview_text = ""
+ audio_state.filtered_text = ""
def resetDisplayLocked(audio_state):
osc_ctrl.clear(audio_state.osc_state)
-def resetAudio(audio_state):
- audio_state.transcribe_lock.acquire()
- audio_state.audio_lock.acquire()
- resetAudioLocked(audio_state)
- audio_state.audio_lock.release()
- audio_state.transcribe_lock.release()
-
# Transcribe the audio recorded in a file.
# Returns two strings: committed text, and preview text.
# Committed text is temporally stable. Preview text is *not* temporally stable,
@@ -422,7 +423,12 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
osc_ctrl.toggleBoard(audio_state.osc_state.client, False)
#playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav"))
- audio_state.drop_transcription = True
+ if audio_state.reset_on_toggle:
+ print("Toggle detected, dropping transcript (-2)")
+ audio_state.drop_transcription = True
+ else:
+ print("Toggle detected, committing preview text (2)")
+ audio_state.text += audio_state.preview_text
audio_state.audio_paused = True
resetAudioLocked(audio_state)
resetDisplayLocked(audio_state)
@@ -448,7 +454,12 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
osc_ctrl.indicateSpeech(audio_state.osc_state.client, True)
osc_ctrl.toggleBoard(audio_state.osc_state.client, True)
osc_ctrl.lockWorld(audio_state.osc_state.client, False)
- audio_state.drop_transcription = True
+ if audio_state.reset_on_toggle:
+ print("Toggle detected, dropping transcript (2)")
+ audio_state.drop_transcription = True
+ else:
+ print("Toggle detected, committing preview text (2)")
+ audio_state.text += audio_state.preview_text
audio_state.audio_paused = False
resetAudioLocked(audio_state)
@@ -585,11 +596,13 @@ def transcribeLoop(mic: str,
estate: EmotesState,
window_duration_s: int,
gpu_idx: int,
- keyboard_hotkey: str):
+ keyboard_hotkey: str,
+ reset_on_toggle: bool):
audio_state = getMicStream(mic)
audio_state.whisper_language = language
audio_state.language = langcodes.find(language).language
audio_state.MAX_LENGTH_S = window_duration_s
+ audio_state.reset_on_toggle = reset_on_toggle
lang_bits = language_target.split(" | ")
if len(lang_bits) == 2:
@@ -748,6 +761,7 @@ if __name__ == "__main__":
parser.add_argument("--emotes_pickle", type=str, help="The path to emotes pickle. See emotes_v2.py for details.")
parser.add_argument("--gpu_idx", type=str, help="The index of the GPU device to use. On single GPU systems, use 0.")
parser.add_argument("--keybind", type=str, help="The keyboard hotkey to use to toggle transcription. For example, ctrl+shift+s")
+ parser.add_argument("--reset_on_toggle", type=int, help="Whether to reset (clear) the transcript every time that transcription is toggled on.")
args = parser.parse_args()
if not args.mic:
@@ -796,6 +810,11 @@ if __name__ == "__main__":
else:
args.cpu = False
+ if args.reset_on_toggle == 1:
+ args.reset_on_toggle = True
+ else:
+ args.reset_on_toggle = False
+
if args.use_builtin == 1:
args.use_builtin = True
else:
@@ -844,5 +863,7 @@ if __name__ == "__main__":
args.enable_lowercase_filter,
args.button,
estate, window_duration_s,
- args.gpu_idx, args.keybind)
+ args.gpu_idx,
+ args.keybind,
+ args.reset_on_toggle)