From fd7ea2f72a486888c30726a2482fcc1f7fce9378 Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Thu, 27 Oct 2022 16:00:22 -0700
Subject: Add fast clear animation

The old clear mechanism would write an empty cell in every layer,
which would take (0.3 seconds) * (11 layers) == about 3 seconds.

The new mechanism drives an animation which overwrites every character
slot simultaneously, taking only 0.1 seconds. A nice ~30x speedup.

* Fix the transcription exponential backoff logic. Saying new things
  will reset the delay to the minimum again.
* Clearing the board will also reset the transcription delay back to
  the minimum.
* Tune the noise detection minimum to 0.2 instead of 0.1. Speaking
  softly into the mic seems to fail to exceed the 0.1 threshold pretty
  often.
---
 transcribe.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'transcribe.py')

diff --git a/transcribe.py b/transcribe.py
index 474cd59..0a31608 100644
--- a/transcribe.py
+++ b/transcribe.py
@@ -150,6 +150,9 @@ def resetDiskAudioLocked(audio_state, filename):
 
 def resetAudioLocked(audio_state):
     audio_state.frames = []
+    audio_state.transcribe_no_change_count = 0
+    audio_state.transcribe_sleep_duration = \
+            audio_state.transcribe_sleep_duration_min_s
 
 def resetAudio(audio_state):
     audio_state.frames_lock.acquire()
@@ -170,7 +173,7 @@ def transcribe(model, filename):
     options = whisper.DecodingOptions(language = "en")
     result = whisper.decode(model, mel, options)
 
-    if result.no_speech_prob > 0.1:
+    if result.no_speech_prob > 0.2:
         print("no speech prob: {}".format(result.no_speech_prob))
         return ""
 
@@ -251,6 +254,11 @@ def transcribeAudio(audio_state, model):
             old_words = audio_state.text.split()
             new_words = text.split()
             audio_state.text = string_matcher.matchStringList(old_words, new_words)
+            if old_text != audio_state.text:
+                # We think the user said something, so  reset the amount of
+                # time we sleep between transcriptions to the minimum.
+                audio_state.transcribe_no_change_count = 0
+                audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
 
         audio_state.text_candidate = text
 
-- 
cgit v1.2.3