From fd7ea2f72a486888c30726a2482fcc1f7fce9378 Mon Sep 17 00:00:00 2001 From: yum Date: Thu, 27 Oct 2022 16:00:22 -0700 Subject: Add fast clear animation The old clear mechanism would write an empty cell in every layer, which would take (0.3 seconds) * (11 layers) == about 3 seconds. The new mechanism drives an animation which overwrites every character slot simultaneously, taking only 0.1 seconds. A nice ~30x speedup. * Fix the transcription exponential backoff logic. Saying new things will reset the delay to the minimum again. * Clearing the board will also reset the transcription delay back to the minimum. * Tune the noise detection minimum to 0.2 instead of 0.1. Speaking softly into the mic seems to fail to exceed the 0.1 threshold pretty often. --- transcribe.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'transcribe.py') diff --git a/transcribe.py b/transcribe.py index 474cd59..0a31608 100644 --- a/transcribe.py +++ b/transcribe.py @@ -150,6 +150,9 @@ def resetDiskAudioLocked(audio_state, filename): def resetAudioLocked(audio_state): audio_state.frames = [] + audio_state.transcribe_no_change_count = 0 + audio_state.transcribe_sleep_duration = \ + audio_state.transcribe_sleep_duration_min_s def resetAudio(audio_state): audio_state.frames_lock.acquire() @@ -170,7 +173,7 @@ def transcribe(model, filename): options = whisper.DecodingOptions(language = "en") result = whisper.decode(model, mel, options) - if result.no_speech_prob > 0.1: + if result.no_speech_prob > 0.2: print("no speech prob: {}".format(result.no_speech_prob)) return "" @@ -251,6 +254,11 @@ def transcribeAudio(audio_state, model): old_words = audio_state.text.split() new_words = text.split() audio_state.text = string_matcher.matchStringList(old_words, new_words) + if old_text != audio_state.text: + # We think the user said something, so reset the amount of + # time we sleep between transcriptions to the minimum. + audio_state.transcribe_no_change_count = 0 + audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s audio_state.text_candidate = text -- cgit v1.2.3