diff options
| author | yum <yum.food.vr@gmail.com> | 2023-06-28 23:50:34 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-06-28 23:50:34 -0700 |
| commit | 3b10d3ab3073af2ed716d1607bb92394bb8817fc (patch) | |
| tree | 47832ebee674f2e3c852548d3e05e2074202308f | |
| parent | b1efbf5ce1ebd584796d4a57cf9c7b6517f91fac (diff) | |
Fix race condition around audio frames dropping
onAudioFramesAvailable would bail out if audio_state.audio_paused is
set, preventing frames from being dropped. This would cause
transcriptions to get repeated sometimes.
Now that frame dropping code always runs.
Also adjust the code structure of the keyboard/VR input handlers to be
more similar.
| -rw-r--r-- | Scripts/transcribe.py | 26 |
1 files changed, 17 insertions, 9 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 4a438b7..a6fbe38 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -132,9 +132,6 @@ def onAudioFramesAvailable( frame_count, time_info, status_flags): - if audio_state.audio_paused: - return (frames, pyaudio.paContinue) - # Reduce sample rate from mic rate to Whisper rate by dropping frames. decimated = b'' frame_len = int(len(frames) / frame_count) @@ -147,7 +144,8 @@ def onAudioFramesAvailable( next_frame += keep_every i += 1 - audio_state.frames.append(decimated) + if not audio_state.audio_paused: + audio_state.frames.append(decimated) max_frames = int(input_rate * audio_state.MAX_LENGTH_S / audio_state.CHUNK) @@ -625,15 +623,25 @@ def readControllerInput(audio_state, enable_local_beep: bool, block=False) elif state == PAUSE_STATE: state = RECORD_STATE - - if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), - block=False) - if not use_builtin: osc_ctrl.indicateSpeech(audio_state.osc_state.client, True) osc_ctrl.toggleBoard(audio_state.osc_state.client, True) osc_ctrl.lockWorld(audio_state.osc_state.client, False) + if audio_state.reset_on_toggle: + if audio_state.enable_debug_mode: + print("Toggle detected, dropping transcript (3)") + audio_state.drop_transcription = True + else: + if audio_state.enable_debug_mode: + print("Toggle detected, committing preview text (3)") + audio_state.text += audio_state.preview_text + + resetAudioLocked(audio_state) + resetDisplayLocked(audio_state) + + if enable_local_beep == 1: + playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), + block=False) # model should correspond to one of the Whisper models defined in # whisper/__init__.py. Examples: tiny, base, small, medium. |
