summaryrefslogtreecommitdiffstats
path: root/Scripts/transcribe.py
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-06-28 23:50:34 -0700
committeryum <yum.food.vr@gmail.com>2023-06-28 23:50:34 -0700
commit3b10d3ab3073af2ed716d1607bb92394bb8817fc (patch)
tree47832ebee674f2e3c852548d3e05e2074202308f /Scripts/transcribe.py
parentb1efbf5ce1ebd584796d4a57cf9c7b6517f91fac (diff)
Fix race condition around audio frames dropping
onAudioFramesAvailable would bail out if audio_state.audio_paused is set, preventing frames from being dropped. This would cause transcriptions to get repeated sometimes. Now that frame dropping code always runs. Also adjust the code structure of the keyboard/VR input handlers to be more similar.
Diffstat (limited to 'Scripts/transcribe.py')
-rw-r--r--Scripts/transcribe.py26
1 files changed, 17 insertions, 9 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 4a438b7..a6fbe38 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -132,9 +132,6 @@ def onAudioFramesAvailable(
frame_count,
time_info,
status_flags):
- if audio_state.audio_paused:
- return (frames, pyaudio.paContinue)
-
# Reduce sample rate from mic rate to Whisper rate by dropping frames.
decimated = b''
frame_len = int(len(frames) / frame_count)
@@ -147,7 +144,8 @@ def onAudioFramesAvailable(
next_frame += keep_every
i += 1
- audio_state.frames.append(decimated)
+ if not audio_state.audio_paused:
+ audio_state.frames.append(decimated)
max_frames = int(input_rate * audio_state.MAX_LENGTH_S /
audio_state.CHUNK)
@@ -625,15 +623,25 @@ def readControllerInput(audio_state, enable_local_beep: bool,
block=False)
elif state == PAUSE_STATE:
state = RECORD_STATE
-
- if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"),
- block=False)
-
if not use_builtin:
osc_ctrl.indicateSpeech(audio_state.osc_state.client, True)
osc_ctrl.toggleBoard(audio_state.osc_state.client, True)
osc_ctrl.lockWorld(audio_state.osc_state.client, False)
+ if audio_state.reset_on_toggle:
+ if audio_state.enable_debug_mode:
+ print("Toggle detected, dropping transcript (3)")
+ audio_state.drop_transcription = True
+ else:
+ if audio_state.enable_debug_mode:
+ print("Toggle detected, committing preview text (3)")
+ audio_state.text += audio_state.preview_text
+
+ resetAudioLocked(audio_state)
+ resetDisplayLocked(audio_state)
+
+ if enable_local_beep == 1:
+ playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"),
+ block=False)
# model should correspond to one of the Whisper models defined in
# whisper/__init__.py. Examples: tiny, base, small, medium.