summaryrefslogtreecommitdiffstats
path: root/transcribe.py
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2022-10-27 16:00:22 -0700
committeryum <yum.food.vr@gmail.com>2022-10-27 16:00:22 -0700
commitfd7ea2f72a486888c30726a2482fcc1f7fce9378 (patch)
treecae0ee99fca59d65062074a546c3975404f7ad89 /transcribe.py
parenteefa14c431efa4e3bc16cafbcb004e41622c2411 (diff)
Add fast clear animation
The old clear mechanism would write an empty cell in every layer, which would take (0.3 seconds) * (11 layers) == about 3 seconds. The new mechanism drives an animation which overwrites every character slot simultaneously, taking only 0.1 seconds. A nice ~30x speedup. * Fix the transcription exponential backoff logic. Saying new things will reset the delay to the minimum again. * Clearing the board will also reset the transcription delay back to the minimum. * Tune the noise detection minimum to 0.2 instead of 0.1. Speaking softly into the mic seems to fail to exceed the 0.1 threshold pretty often.
Diffstat (limited to 'transcribe.py')
-rw-r--r--transcribe.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/transcribe.py b/transcribe.py
index 474cd59..0a31608 100644
--- a/transcribe.py
+++ b/transcribe.py
@@ -150,6 +150,9 @@ def resetDiskAudioLocked(audio_state, filename):
def resetAudioLocked(audio_state):
audio_state.frames = []
+ audio_state.transcribe_no_change_count = 0
+ audio_state.transcribe_sleep_duration = \
+ audio_state.transcribe_sleep_duration_min_s
def resetAudio(audio_state):
audio_state.frames_lock.acquire()
@@ -170,7 +173,7 @@ def transcribe(model, filename):
options = whisper.DecodingOptions(language = "en")
result = whisper.decode(model, mel, options)
- if result.no_speech_prob > 0.1:
+ if result.no_speech_prob > 0.2:
print("no speech prob: {}".format(result.no_speech_prob))
return ""
@@ -251,6 +254,11 @@ def transcribeAudio(audio_state, model):
old_words = audio_state.text.split()
new_words = text.split()
audio_state.text = string_matcher.matchStringList(old_words, new_words)
+ if old_text != audio_state.text:
+ # We think the user said something, so reset the amount of
+ # time we sleep between transcriptions to the minimum.
+ audio_state.transcribe_no_change_count = 0
+ audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
audio_state.text_candidate = text