Fix race condition around audio frames dropping

onAudioFramesAvailable would bail out if audio_state.audio_paused is set, preventing frames from being dropped. This would cause transcriptions to get repeated sometimes. Now that frame dropping code always runs. Also adjust the code structure of the keyboard/VR input handlers to be more similar.
author: yum <yum.food.vr@gmail.com> 2023-06-28 23:50:34 -0700
committer: yum <yum.food.vr@gmail.com> 2023-06-28 23:50:34 -0700
commit: 3b10d3ab3073af2ed716d1607bb92394bb8817fc (patch)
tree: 47832ebee674f2e3c852548d3e05e2074202308f
parent: b1efbf5ce1ebd584796d4a57cf9c7b6517f91fac (diff)
1 files changed, 17 insertions, 9 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 4a438b7..a6fbe38 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -132,9 +132,6 @@ def onAudioFramesAvailable(
         frame_count,
         time_info,
         status_flags):
-    if audio_state.audio_paused:
-        return (frames, pyaudio.paContinue)
-
     # Reduce sample rate from mic rate to Whisper rate by dropping frames.
     decimated = b''
     frame_len = int(len(frames) / frame_count)
@@ -147,7 +144,8 @@ def onAudioFramesAvailable(
             next_frame += keep_every
         i += 1
 
-    audio_state.frames.append(decimated)
+    if not audio_state.audio_paused:
+        audio_state.frames.append(decimated)
 
     max_frames = int(input_rate * audio_state.MAX_LENGTH_S /
             audio_state.CHUNK)
@@ -625,15 +623,25 @@ def readControllerInput(audio_state, enable_local_beep: bool,
                             block=False)
                 elif state == PAUSE_STATE:
                     state = RECORD_STATE
-
-                    if enable_local_beep == 1:
-                        playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"),
-                            block=False)
-
                     if not use_builtin:
                         osc_ctrl.indicateSpeech(audio_state.osc_state.client, True)
                         osc_ctrl.toggleBoard(audio_state.osc_state.client, True)
                         osc_ctrl.lockWorld(audio_state.osc_state.client, False)
+                    if audio_state.reset_on_toggle:
+                        if audio_state.enable_debug_mode:
+                            print("Toggle detected, dropping transcript (3)")
+                        audio_state.drop_transcription = True
+                    else:
+                        if audio_state.enable_debug_mode:
+                            print("Toggle detected, committing preview text (3)")
+                        audio_state.text += audio_state.preview_text
+
+                    resetAudioLocked(audio_state)
+                    resetDisplayLocked(audio_state)
+
+                    if enable_local_beep == 1:
+                        playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"),
+                            block=False)
 
 # model should correspond to one of the Whisper models defined in
 # whisper/__init__.py. Examples: tiny, base, small, medium.
author	yum <yum.food.vr@gmail.com>	2023-06-28 23:50:34 -0700
committer	yum <yum.food.vr@gmail.com>	2023-06-28 23:50:34 -0700
commit	3b10d3ab3073af2ed716d1607bb92394bb8817fc (patch)
tree	47832ebee674f2e3c852548d3e05e2074202308f
parent	b1efbf5ce1ebd584796d4a57cf9c7b6517f91fac (diff)