summaryrefslogtreecommitdiffstats
path: root/Scripts
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-08-25 12:50:59 -0700
committeryum <yum.food.vr@gmail.com>2023-08-25 12:50:59 -0700
commit302f7ba09f2ee115d0ee4b8f0841f6ffcd50ec57 (patch)
tree5c07175619a1e9d5e56a30f8d2fdd4e6bbde1623 /Scripts
parent9e43487c1bf62402e96cb6139b24cd8446515673 (diff)
Put audio feedback into its own thread
I this improves the code structure of the controller input thread and leads to some deduplication, so I'm going to keep it. However, the intended purpose was to decrease lag when pressing buttons, and in that regard it failed. The lag goes all the way down to the input layer, implying that the input thread is not able to consistently run at its intended 100 Hz sample rate. I suspect that the Python global interpreter lock (GIL) is at fault. Since we can't realistically move all our functionality into one thread in a non-blocking model, I think multiprocessing is the logical choice going forward. Each thread in transcribe.py would become its own process, and pub/sub through some intermediary process sitting in the middle.
Diffstat (limited to 'Scripts')
-rw-r--r--Scripts/requirements.txt1
-rw-r--r--Scripts/transcribe.py68
2 files changed, 52 insertions, 17 deletions
diff --git a/Scripts/requirements.txt b/Scripts/requirements.txt
index bdc93a1..3a2cf42 100644
--- a/Scripts/requirements.txt
+++ b/Scripts/requirements.txt
@@ -9,7 +9,6 @@ pyopenxr
pillow
pyaudio
python-osc
-playsound==1.2.2
pyyaml
sentence_splitter
transformers>=4.21.0
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index e9873e9..4b00bd0 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -5,7 +5,6 @@ from emotes_v2 import EmotesState
from faster_whisper import WhisperModel
from functools import partial
from math import ceil, floor
-from playsound import playsound
from profanity_filter import ProfanityFilter
from sentence_splitter import split_text_into_sentences
@@ -30,6 +29,7 @@ import time
import transformers
import typing
import wave
+import winsound
class AudioState:
def __init__(self):
@@ -86,6 +86,14 @@ class AudioState:
# Locks access to `frames`, and audio stored on disk.
self.audio_lock = threading.Lock()
+ # Audio events that should play. Input thread appends to this list,
+ # audio feedback thread drains it.
+ self.audio_events = []
+ self.AUDIO_EVENT_TOGGLE_ON = 1
+ self.AUDIO_EVENT_TOGGLE_OFF = 2
+ self.AUDIO_EVENT_DISMISS = 3
+ self.AUDIO_EVENT_UPDATE = 4
+
# Used to tell the threads when to stop.
self.run_app = True
@@ -532,8 +540,7 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
audio_state.audio_paused = True
if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"),
- block=False)
+ audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF)
elif state == PAUSE_STATE:
state = RECORD_STATE
if not use_builtin:
@@ -554,8 +561,37 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
resetDisplayLocked(audio_state)
if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"),
- block=False)
+ audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_ON)
+
+def audioFeedbackThread(audio_state, enable_local_beep: bool,
+ use_builtin: bool, button: str):
+ with open(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), "rb") as f:
+ waveform0 = f.read()
+ with open(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), "rb") as f:
+ waveform1 = f.read()
+ with open(os.path.abspath("Resources/Sounds/Dismiss_Noise_Quiet.wav"), "rb") as f:
+ waveform2 = f.read()
+ with open(os.path.abspath("Resources/Sounds/KB_Noise_Off_Quiet.wav"), "rb") as f:
+ waveform3 = f.read()
+ while audio_state.run_app == True:
+ time.sleep(0.01)
+
+ if len(audio_state.audio_events) == 0:
+ continue
+
+ event = audio_state.audio_events[0]
+ audio_state.audio_events = audio_state.audio_events[1:]
+
+ waveform = waveform0
+ if event == audio_state.AUDIO_EVENT_TOGGLE_ON:
+ waveform = waveform0
+ elif event == audio_state.AUDIO_EVENT_TOGGLE_OFF:
+ waveform = waveform1
+ elif event == audio_state.AUDIO_EVENT_DISMISS:
+ waveform = waveform2
+ elif event == audio_state.AUDIO_EVENT_UPDATE:
+ waveform = waveform3
+ winsound.PlaySound(waveform, winsound.SND_MEMORY)
def readControllerInput(audio_state, enable_local_beep: bool,
use_builtin: bool, button: str):
@@ -578,7 +614,6 @@ def readControllerInput(audio_state, enable_local_beep: bool,
button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id)
while audio_state.run_app == True:
time.sleep(0.01)
-
event = next(button_generator)
if event == steamvr.EVENT_RISING_EDGE:
@@ -603,13 +638,11 @@ def readControllerInput(audio_state, enable_local_beep: bool,
if last_rising - last_medium_press_end < 1.0:
# Type transcription
if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/KB_Noise_Off_Quiet.wav"),
- block=False)
+ audio_state.audio_events.append(audio_state.AUDIO_EVENT_UPDATE)
keyboard.write(audio_state.filtered_text)
else:
if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"),
- block=False)
+ audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF)
elif now - last_rising > 0.5:
# Medium press
@@ -617,8 +650,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
state = PAUSE_STATE
if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/Dismiss_Noise_Quiet.wav"),
- block=False)
+ audio_state.audio_events.append(audio_state.AUDIO_EVENT_DISMISS)
if not use_builtin:
osc_ctrl.toggleBoard(audio_state.osc_state.client, False)
@@ -638,8 +670,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
audio_state.audio_paused = True
if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"),
- block=False)
+ audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF)
elif state == PAUSE_STATE:
state = RECORD_STATE
if not use_builtin:
@@ -659,8 +690,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
resetDisplayLocked(audio_state)
if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"),
- block=False)
+ audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_ON)
# model should correspond to one of the Whisper models defined in
# whisper/__init__.py. Examples: tiny, base, small, medium.
@@ -794,6 +824,11 @@ def transcribeLoop(mic: str,
controller_input_thd.daemon = True
controller_input_thd.start()
+ audio_feedback_thd = threading.Thread(target = audioFeedbackThread, args
+ = [audio_state, enable_local_beep, use_builtin, button])
+ audio_feedback_thd.daemon = True
+ audio_feedback_thd.start()
+
keyboard_input_thd = threading.Thread(target = readKeyboardInput, args
= [audio_state, enable_local_beep, use_builtin, keyboard_hotkey])
keyboard_input_thd.daemon = True
@@ -815,6 +850,7 @@ def transcribeLoop(mic: str,
audio_state.run_app = False
transcribe_audio_thd.join()
controller_input_thd.join()
+ audio_feedback_thd.join()
keyboard_input_thd.join()
if __name__ == "__main__":