diff options
| author | yum <yum.food.vr@gmail.com> | 2023-08-25 12:50:59 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-08-25 12:50:59 -0700 |
| commit | 302f7ba09f2ee115d0ee4b8f0841f6ffcd50ec57 (patch) | |
| tree | 5c07175619a1e9d5e56a30f8d2fdd4e6bbde1623 /Scripts/transcribe.py | |
| parent | 9e43487c1bf62402e96cb6139b24cd8446515673 (diff) | |
Put audio feedback into its own thread
I this improves the code structure of the controller input thread and
leads to some deduplication, so I'm going to keep it. However, the
intended purpose was to decrease lag when pressing buttons, and in that
regard it failed.
The lag goes all the way down to the input layer, implying that the
input thread is not able to consistently run at its intended 100 Hz
sample rate. I suspect that the Python global interpreter lock (GIL) is
at fault.
Since we can't realistically move all our functionality into one thread
in a non-blocking model, I think multiprocessing is the logical choice
going forward. Each thread in transcribe.py would become its own
process, and pub/sub through some intermediary process sitting in the
middle.
Diffstat (limited to 'Scripts/transcribe.py')
| -rw-r--r-- | Scripts/transcribe.py | 68 |
1 files changed, 52 insertions, 16 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index e9873e9..4b00bd0 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -5,7 +5,6 @@ from emotes_v2 import EmotesState from faster_whisper import WhisperModel from functools import partial from math import ceil, floor -from playsound import playsound from profanity_filter import ProfanityFilter from sentence_splitter import split_text_into_sentences @@ -30,6 +29,7 @@ import time import transformers import typing import wave +import winsound class AudioState: def __init__(self): @@ -86,6 +86,14 @@ class AudioState: # Locks access to `frames`, and audio stored on disk. self.audio_lock = threading.Lock() + # Audio events that should play. Input thread appends to this list, + # audio feedback thread drains it. + self.audio_events = [] + self.AUDIO_EVENT_TOGGLE_ON = 1 + self.AUDIO_EVENT_TOGGLE_OFF = 2 + self.AUDIO_EVENT_DISMISS = 3 + self.AUDIO_EVENT_UPDATE = 4 + # Used to tell the threads when to stop. self.run_app = True @@ -532,8 +540,7 @@ def readKeyboardInput(audio_state, enable_local_beep: bool, audio_state.audio_paused = True if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF) elif state == PAUSE_STATE: state = RECORD_STATE if not use_builtin: @@ -554,8 +561,37 @@ def readKeyboardInput(audio_state, enable_local_beep: bool, resetDisplayLocked(audio_state) if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_ON) + +def audioFeedbackThread(audio_state, enable_local_beep: bool, + use_builtin: bool, button: str): + with open(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), "rb") as f: + waveform0 = f.read() + with open(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), "rb") as f: + waveform1 = f.read() + with open(os.path.abspath("Resources/Sounds/Dismiss_Noise_Quiet.wav"), "rb") as f: + waveform2 = f.read() + with open(os.path.abspath("Resources/Sounds/KB_Noise_Off_Quiet.wav"), "rb") as f: + waveform3 = f.read() + while audio_state.run_app == True: + time.sleep(0.01) + + if len(audio_state.audio_events) == 0: + continue + + event = audio_state.audio_events[0] + audio_state.audio_events = audio_state.audio_events[1:] + + waveform = waveform0 + if event == audio_state.AUDIO_EVENT_TOGGLE_ON: + waveform = waveform0 + elif event == audio_state.AUDIO_EVENT_TOGGLE_OFF: + waveform = waveform1 + elif event == audio_state.AUDIO_EVENT_DISMISS: + waveform = waveform2 + elif event == audio_state.AUDIO_EVENT_UPDATE: + waveform = waveform3 + winsound.PlaySound(waveform, winsound.SND_MEMORY) def readControllerInput(audio_state, enable_local_beep: bool, use_builtin: bool, button: str): @@ -578,7 +614,6 @@ def readControllerInput(audio_state, enable_local_beep: bool, button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id) while audio_state.run_app == True: time.sleep(0.01) - event = next(button_generator) if event == steamvr.EVENT_RISING_EDGE: @@ -603,13 +638,11 @@ def readControllerInput(audio_state, enable_local_beep: bool, if last_rising - last_medium_press_end < 1.0: # Type transcription if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/KB_Noise_Off_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_UPDATE) keyboard.write(audio_state.filtered_text) else: if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF) elif now - last_rising > 0.5: # Medium press @@ -617,8 +650,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, state = PAUSE_STATE if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Dismiss_Noise_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_DISMISS) if not use_builtin: osc_ctrl.toggleBoard(audio_state.osc_state.client, False) @@ -638,8 +670,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, audio_state.audio_paused = True if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF) elif state == PAUSE_STATE: state = RECORD_STATE if not use_builtin: @@ -659,8 +690,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, resetDisplayLocked(audio_state) if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_ON) # model should correspond to one of the Whisper models defined in # whisper/__init__.py. Examples: tiny, base, small, medium. @@ -794,6 +824,11 @@ def transcribeLoop(mic: str, controller_input_thd.daemon = True controller_input_thd.start() + audio_feedback_thd = threading.Thread(target = audioFeedbackThread, args + = [audio_state, enable_local_beep, use_builtin, button]) + audio_feedback_thd.daemon = True + audio_feedback_thd.start() + keyboard_input_thd = threading.Thread(target = readKeyboardInput, args = [audio_state, enable_local_beep, use_builtin, keyboard_hotkey]) keyboard_input_thd.daemon = True @@ -815,6 +850,7 @@ def transcribeLoop(mic: str, audio_state.run_app = False transcribe_audio_thd.join() controller_input_thd.join() + audio_feedback_thd.join() keyboard_input_thd.join() if __name__ == "__main__": |
