From 302f7ba09f2ee115d0ee4b8f0841f6ffcd50ec57 Mon Sep 17 00:00:00 2001 From: yum Date: Fri, 25 Aug 2023 12:50:59 -0700 Subject: Put audio feedback into its own thread I this improves the code structure of the controller input thread and leads to some deduplication, so I'm going to keep it. However, the intended purpose was to decrease lag when pressing buttons, and in that regard it failed. The lag goes all the way down to the input layer, implying that the input thread is not able to consistently run at its intended 100 Hz sample rate. I suspect that the Python global interpreter lock (GIL) is at fault. Since we can't realistically move all our functionality into one thread in a non-blocking model, I think multiprocessing is the logical choice going forward. Each thread in transcribe.py would become its own process, and pub/sub through some intermediary process sitting in the middle. --- Scripts/requirements.txt | 1 - Scripts/transcribe.py | 68 ++++++++++++++++++++++++++++++++++++------------ 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/Scripts/requirements.txt b/Scripts/requirements.txt index bdc93a1..3a2cf42 100644 --- a/Scripts/requirements.txt +++ b/Scripts/requirements.txt @@ -9,7 +9,6 @@ pyopenxr pillow pyaudio python-osc -playsound==1.2.2 pyyaml sentence_splitter transformers>=4.21.0 diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index e9873e9..4b00bd0 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -5,7 +5,6 @@ from emotes_v2 import EmotesState from faster_whisper import WhisperModel from functools import partial from math import ceil, floor -from playsound import playsound from profanity_filter import ProfanityFilter from sentence_splitter import split_text_into_sentences @@ -30,6 +29,7 @@ import time import transformers import typing import wave +import winsound class AudioState: def __init__(self): @@ -86,6 +86,14 @@ class AudioState: # Locks access to `frames`, and audio stored on disk. self.audio_lock = threading.Lock() + # Audio events that should play. Input thread appends to this list, + # audio feedback thread drains it. + self.audio_events = [] + self.AUDIO_EVENT_TOGGLE_ON = 1 + self.AUDIO_EVENT_TOGGLE_OFF = 2 + self.AUDIO_EVENT_DISMISS = 3 + self.AUDIO_EVENT_UPDATE = 4 + # Used to tell the threads when to stop. self.run_app = True @@ -532,8 +540,7 @@ def readKeyboardInput(audio_state, enable_local_beep: bool, audio_state.audio_paused = True if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF) elif state == PAUSE_STATE: state = RECORD_STATE if not use_builtin: @@ -554,8 +561,37 @@ def readKeyboardInput(audio_state, enable_local_beep: bool, resetDisplayLocked(audio_state) if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_ON) + +def audioFeedbackThread(audio_state, enable_local_beep: bool, + use_builtin: bool, button: str): + with open(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), "rb") as f: + waveform0 = f.read() + with open(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), "rb") as f: + waveform1 = f.read() + with open(os.path.abspath("Resources/Sounds/Dismiss_Noise_Quiet.wav"), "rb") as f: + waveform2 = f.read() + with open(os.path.abspath("Resources/Sounds/KB_Noise_Off_Quiet.wav"), "rb") as f: + waveform3 = f.read() + while audio_state.run_app == True: + time.sleep(0.01) + + if len(audio_state.audio_events) == 0: + continue + + event = audio_state.audio_events[0] + audio_state.audio_events = audio_state.audio_events[1:] + + waveform = waveform0 + if event == audio_state.AUDIO_EVENT_TOGGLE_ON: + waveform = waveform0 + elif event == audio_state.AUDIO_EVENT_TOGGLE_OFF: + waveform = waveform1 + elif event == audio_state.AUDIO_EVENT_DISMISS: + waveform = waveform2 + elif event == audio_state.AUDIO_EVENT_UPDATE: + waveform = waveform3 + winsound.PlaySound(waveform, winsound.SND_MEMORY) def readControllerInput(audio_state, enable_local_beep: bool, use_builtin: bool, button: str): @@ -578,7 +614,6 @@ def readControllerInput(audio_state, enable_local_beep: bool, button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id) while audio_state.run_app == True: time.sleep(0.01) - event = next(button_generator) if event == steamvr.EVENT_RISING_EDGE: @@ -603,13 +638,11 @@ def readControllerInput(audio_state, enable_local_beep: bool, if last_rising - last_medium_press_end < 1.0: # Type transcription if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/KB_Noise_Off_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_UPDATE) keyboard.write(audio_state.filtered_text) else: if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF) elif now - last_rising > 0.5: # Medium press @@ -617,8 +650,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, state = PAUSE_STATE if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Dismiss_Noise_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_DISMISS) if not use_builtin: osc_ctrl.toggleBoard(audio_state.osc_state.client, False) @@ -638,8 +670,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, audio_state.audio_paused = True if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF) elif state == PAUSE_STATE: state = RECORD_STATE if not use_builtin: @@ -659,8 +690,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, resetDisplayLocked(audio_state) if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), - block=False) + audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_ON) # model should correspond to one of the Whisper models defined in # whisper/__init__.py. Examples: tiny, base, small, medium. @@ -794,6 +824,11 @@ def transcribeLoop(mic: str, controller_input_thd.daemon = True controller_input_thd.start() + audio_feedback_thd = threading.Thread(target = audioFeedbackThread, args + = [audio_state, enable_local_beep, use_builtin, button]) + audio_feedback_thd.daemon = True + audio_feedback_thd.start() + keyboard_input_thd = threading.Thread(target = readKeyboardInput, args = [audio_state, enable_local_beep, use_builtin, keyboard_hotkey]) keyboard_input_thd.daemon = True @@ -815,6 +850,7 @@ def transcribeLoop(mic: str, audio_state.run_app = False transcribe_audio_thd.join() controller_input_thd.join() + audio_feedback_thd.join() keyboard_input_thd.join() if __name__ == "__main__": -- cgit v1.2.3