From 9f87674d1b484a2e61e87ad53d8ebcf9985dce6b Mon Sep 17 00:00:00 2001 From: yum Date: Tue, 22 Nov 2022 19:01:01 -0800 Subject: Shorten audio window to 10 seconds This helps with temporal stability in long-running transcriptions, and lets us get rid of that hack where we refuse to update old pages. --- transcribe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'transcribe.py') diff --git a/transcribe.py b/transcribe.py index 91fcd54..b316014 100644 --- a/transcribe.py +++ b/transcribe.py @@ -33,7 +33,8 @@ class AudioState: # The maximum length that recordAudio() will put into frames before it # starts dropping from the start. - MAX_LENGTH_S = 30 + MAX_LENGTH_S = 10 + MAX_LENGTH_S_WHISPER = 30 # The minimum length that recordAudio() will wait for before saving audio. MIN_LENGTH_S = 1 @@ -202,7 +203,7 @@ def transcribe(audio_state, model, filename): audio_state.transcribe_lock.release() audio = whisper.pad_or_trim(audio, length = audio_state.RATE * - audio_state.MAX_LENGTH_S) + audio_state.MAX_LENGTH_S_WHISPER) mel = whisper.log_mel_spectrogram(audio).to(model.device) result = None -- cgit v1.2.3