diff options
Diffstat (limited to 'Scripts/transcribe.py')
| -rw-r--r-- | Scripts/transcribe.py | 85 |
1 files changed, 82 insertions, 3 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index c066957..366f3a2 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -11,6 +11,7 @@ import copy import os import osc_ctrl import generate_utils +import keybind_event_machine import langcodes import pyaudio import numpy as np @@ -280,6 +281,75 @@ def sendAudio(audio_state, use_builtin: bool, estate: EmotesState): # Pace this out time.sleep(0.01) +def readKeyboardInput(audio_state, enable_local_beep: bool, + use_builtin: bool, keybind: str): + machine = keybind_event_machine.KeybindEventMachine(keybind) + last_press_time = 0 + + # double pressing the keybind + double_press_timeout = 0.25 + + RECORD_STATE = 0 + PAUSE_STATE = 1 + state = PAUSE_STATE + + while audio_state.run_app == True: + time.sleep(0.05) + + cur_press_time = machine.getNextPressTime() + if cur_press_time == 0: + continue + + EVENT_SINGLE_PRESS = 0 + EVENT_DOUBLE_PRESS = 1 + if last_press_time == 0: + event = EVENT_SINGLE_PRESS + elif cur_press_time - last_press_time < double_press_timeout: + event = EVENT_DOUBLE_PRESS + else: + event = EVENT_SINGLE_PRESS + last_press_time = cur_press_time + + if event == EVENT_DOUBLE_PRESS: + state = PAUSE_STATE + if not use_builtin: + osc_ctrl.indicateSpeech(audio_state.osc_state.client, False) + osc_ctrl.toggleBoard(audio_state.osc_state.client, False) + #playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav")) + + resetAudioLocked(audio_state) + resetDisplayLocked(audio_state) + audio_state.drop_transcription = True + audio_state.audio_paused = True + continue + + # Short hold + if state == RECORD_STATE: + state = PAUSE_STATE + if not use_builtin: + osc_ctrl.indicateSpeech(audio_state.osc_state.client, False) + osc_ctrl.lockWorld(audio_state.osc_state.client, True) + audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s + + audio_state.audio_paused = True + + if enable_local_beep == 1: + playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav")) + elif state == PAUSE_STATE: + state = RECORD_STATE + if not use_builtin: + osc_ctrl.indicateSpeech(audio_state.osc_state.client, True) + osc_ctrl.toggleBoard(audio_state.osc_state.client, True) + osc_ctrl.lockWorld(audio_state.osc_state.client, False) + resetAudioLocked(audio_state) + resetDisplayLocked(audio_state) + + audio_state.drop_transcription = True + audio_state.audio_paused = False + + if enable_local_beep == 1: + playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav")) + def readControllerInput(audio_state, enable_local_beep: bool, use_builtin: bool, button: str): session = None @@ -357,7 +427,8 @@ def readControllerInput(audio_state, enable_local_beep: bool, def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool, use_cpu: bool, use_builtin: bool, button: str, estate: EmotesState, - window_duration_s: int, gpu_idx: int): + window_duration_s: int, gpu_idx: int, + keyboard_hotkey: str): audio_state = getMicStream(mic) audio_state.language = langcodes.find(language).language audio_state.MAX_LENGTH_S = window_duration_s @@ -391,10 +462,16 @@ def transcribeLoop(mic: str, language: str, model: str, send_audio_thd.daemon = True send_audio_thd.start() - controller_input_thd = threading.Thread(target = readControllerInput, args = [audio_state, enable_local_beep, use_builtin, button]) + controller_input_thd = threading.Thread(target = readControllerInput, args + = [audio_state, enable_local_beep, use_builtin, button]) controller_input_thd.daemon = True controller_input_thd.start() + keyboard_input_thd = threading.Thread(target = readKeyboardInput, args + = [audio_state, enable_local_beep, use_builtin, keyboard_hotkey]) + keyboard_input_thd.daemon = True + keyboard_input_thd.start() + print("Press enter to start a new message.") for line in sys.stdin: audio_state.transcribe_lock.acquire() @@ -412,6 +489,7 @@ def transcribeLoop(mic: str, language: str, model: str, audio_state.run_app = False transcribe_audio_thd.join() controller_input_thd.join() + keyboard_input_thd.join() if __name__ == "__main__": sys.stdout.reconfigure(encoding="utf-8") @@ -443,6 +521,7 @@ if __name__ == "__main__": parser.add_argument("--button", type=str, help="The controller button used to start/stop transcription. E.g. \"left joystick\"") parser.add_argument("--emotes_pickle", type=str, help="The path to emotes pickle. See emotes_v2.py for details.") parser.add_argument("--gpu_idx", type=str, help="The index of the GPU device to use. On single GPU systems, use 0.") + parser.add_argument("--keybind", type=str, help="The keyboard hotkey to use to toggle transcription. For example, ctrl+shift+s") args = parser.parse_args() if not args.mic: @@ -501,5 +580,5 @@ if __name__ == "__main__": transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep, args.cpu, args.use_builtin, args.button, estate, window_duration_s, - args.gpu_idx) + args.gpu_idx, args.keybind) |
