From 302f7ba09f2ee115d0ee4b8f0841f6ffcd50ec57 Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Fri, 25 Aug 2023 12:50:59 -0700
Subject: Put audio feedback into its own thread

I this improves the code structure of the controller input thread and
leads to some deduplication, so I'm going to keep it. However, the
intended purpose was to decrease lag when pressing buttons, and in that
regard it failed.

The lag goes all the way down to the input layer, implying that the
input thread is not able to consistently run at its intended 100 Hz
sample rate. I suspect that the Python global interpreter lock (GIL) is
at fault.

Since we can't realistically move all our functionality into one thread
in a non-blocking model, I think multiprocessing is the logical choice
going forward. Each thread in transcribe.py would become its own
process, and pub/sub through some intermediary process sitting in the
middle.
---
 Scripts/requirements.txt |  1 -
 Scripts/transcribe.py    | 68 ++++++++++++++++++++++++++++++++++++------------
 2 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/Scripts/requirements.txt b/Scripts/requirements.txt
index bdc93a1..3a2cf42 100644
--- a/Scripts/requirements.txt
+++ b/Scripts/requirements.txt
@@ -9,7 +9,6 @@ pyopenxr
 pillow
 pyaudio
 python-osc
-playsound==1.2.2
 pyyaml
 sentence_splitter
 transformers>=4.21.0
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index e9873e9..4b00bd0 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -5,7 +5,6 @@ from emotes_v2 import EmotesState
 from faster_whisper import WhisperModel
 from functools import partial
 from math import ceil, floor
-from playsound import playsound
 from profanity_filter import ProfanityFilter
 from sentence_splitter import split_text_into_sentences
 
@@ -30,6 +29,7 @@ import time
 import transformers
 import typing
 import wave
+import winsound
 
 class AudioState:
     def __init__(self):
@@ -86,6 +86,14 @@ class AudioState:
         # Locks access to `frames`, and audio stored on disk.
         self.audio_lock = threading.Lock()
 
+        # Audio events that should play. Input thread appends to this list,
+        # audio feedback thread drains it.
+        self.audio_events = []
+        self.AUDIO_EVENT_TOGGLE_ON = 1
+        self.AUDIO_EVENT_TOGGLE_OFF = 2
+        self.AUDIO_EVENT_DISMISS = 3
+        self.AUDIO_EVENT_UPDATE = 4
+
         # Used to tell the threads when to stop.
         self.run_app = True
 
@@ -532,8 +540,7 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
             audio_state.audio_paused = True
 
             if enable_local_beep == 1:
-                playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"),
-                    block=False)
+                audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF)
         elif state == PAUSE_STATE:
             state = RECORD_STATE
             if not use_builtin:
@@ -554,8 +561,37 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
             resetDisplayLocked(audio_state)
 
             if enable_local_beep == 1:
-                playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"),
-                    block=False)
+                audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_ON)
+
+def audioFeedbackThread(audio_state, enable_local_beep: bool,
+        use_builtin: bool, button: str):
+    with open(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), "rb") as f:
+        waveform0 = f.read()
+    with open(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), "rb") as f:
+        waveform1 = f.read()
+    with open(os.path.abspath("Resources/Sounds/Dismiss_Noise_Quiet.wav"), "rb") as f:
+        waveform2 = f.read()
+    with open(os.path.abspath("Resources/Sounds/KB_Noise_Off_Quiet.wav"), "rb") as f:
+        waveform3 = f.read()
+    while audio_state.run_app == True:
+        time.sleep(0.01)
+
+        if len(audio_state.audio_events) == 0:
+            continue
+
+        event = audio_state.audio_events[0]
+        audio_state.audio_events = audio_state.audio_events[1:]
+
+        waveform = waveform0
+        if event == audio_state.AUDIO_EVENT_TOGGLE_ON:
+            waveform = waveform0
+        elif event == audio_state.AUDIO_EVENT_TOGGLE_OFF:
+            waveform = waveform1
+        elif event == audio_state.AUDIO_EVENT_DISMISS:
+            waveform = waveform2
+        elif event == audio_state.AUDIO_EVENT_UPDATE:
+            waveform = waveform3
+        winsound.PlaySound(waveform, winsound.SND_MEMORY)
 
 def readControllerInput(audio_state, enable_local_beep: bool,
         use_builtin: bool, button: str):
@@ -578,7 +614,6 @@ def readControllerInput(audio_state, enable_local_beep: bool,
     button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id)
     while audio_state.run_app == True:
         time.sleep(0.01)
-
         event = next(button_generator)
 
         if event == steamvr.EVENT_RISING_EDGE:
@@ -603,13 +638,11 @@ def readControllerInput(audio_state, enable_local_beep: bool,
                 if last_rising - last_medium_press_end < 1.0:
                     # Type transcription
                     if enable_local_beep == 1:
-                        playsound(os.path.abspath("Resources/Sounds/KB_Noise_Off_Quiet.wav"),
-                            block=False)
+                        audio_state.audio_events.append(audio_state.AUDIO_EVENT_UPDATE)
                     keyboard.write(audio_state.filtered_text)
                 else:
                     if enable_local_beep == 1:
-                        playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"),
-                            block=False)
+                        audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF)
 
             elif now - last_rising > 0.5:
                 # Medium press
@@ -617,8 +650,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
                 state = PAUSE_STATE
 
                 if enable_local_beep == 1:
-                    playsound(os.path.abspath("Resources/Sounds/Dismiss_Noise_Quiet.wav"),
-                        block=False)
+                    audio_state.audio_events.append(audio_state.AUDIO_EVENT_DISMISS)
 
                 if not use_builtin:
                     osc_ctrl.toggleBoard(audio_state.osc_state.client, False)
@@ -638,8 +670,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
                     audio_state.audio_paused = True
 
                     if enable_local_beep == 1:
-                        playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"),
-                            block=False)
+                        audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF)
                 elif state == PAUSE_STATE:
                     state = RECORD_STATE
                     if not use_builtin:
@@ -659,8 +690,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
                     resetDisplayLocked(audio_state)
 
                     if enable_local_beep == 1:
-                        playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"),
-                            block=False)
+                        audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_ON)
 
 # model should correspond to one of the Whisper models defined in
 # whisper/__init__.py. Examples: tiny, base, small, medium.
@@ -794,6 +824,11 @@ def transcribeLoop(mic: str,
     controller_input_thd.daemon = True
     controller_input_thd.start()
 
+    audio_feedback_thd = threading.Thread(target = audioFeedbackThread, args
+            = [audio_state, enable_local_beep, use_builtin, button])
+    audio_feedback_thd.daemon = True
+    audio_feedback_thd.start()
+
     keyboard_input_thd = threading.Thread(target = readKeyboardInput, args
             = [audio_state, enable_local_beep, use_builtin, keyboard_hotkey])
     keyboard_input_thd.daemon = True
@@ -815,6 +850,7 @@ def transcribeLoop(mic: str,
     audio_state.run_app = False
     transcribe_audio_thd.join()
     controller_input_thd.join()
+    audio_feedback_thd.join()
     keyboard_input_thd.join()
 
 if __name__ == "__main__":
-- 
cgit v1.2.3