1 files changed, 20 insertions, 2 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 5301b0b..cea2da0 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -66,7 +66,7 @@ class AudioState:
         # The edit distance under which two consecutive transcripts are
         # considered to match. This affects how easily `preview_text`
         # gets appended to `text`.
-        self.commit_fuzz_threshold = 8
+        self.commit_fuzz_threshold = 1
 
         # If set, profanity in transcriptions will have their vowels replaced
         # with asterisks. Only works in English.
@@ -157,6 +157,19 @@ def onAudioFramesAvailable(
     if not audio_state.audio_paused:
         audio_state.frames.append(decimated)
 
+    # If buffer is getting long, tell the transcription loop to be more ready
+    # to accept transcripts.
+    fps = int(input_rate / audio_state.CHUNK)
+    cur_len_s = len(audio_state.frames) / fps
+    double_at_s = 3.0
+    double_every_s = 1.5
+    delta_s = cur_len_s - double_at_s
+    n_doubles = ceil(delta_s / double_every_s)
+    if n_doubles >= 1:
+        audio_state.commit_fuzz_threshold = 2 ** n_doubles
+    else:
+        audio_state.commit_fuzz_threshold = 1
+
     max_frames = int(input_rate * audio_state.MAX_LENGTH_S /
             audio_state.CHUNK)
     if len(audio_state.frames) > max_frames:
@@ -380,7 +393,12 @@ def transcribeAudio(audio_state,
             if audio_state.enable_debug_mode:
                 print("no transcription, spin ({} seconds)".format(time.time() - last_transcribe_time))
             last_transcribe_time = time.time()
+            # Prevent audio buffer from holding more than 1 second of silence
+            # before real speech.
+            audio_state.MAX_LENGTH_S = 1
             continue
+        else:
+            audio_state.MAX_LENGTH_S = 300
 
         if audio_state.drop_transcription:
             audio_state.drop_transcription = False
@@ -720,7 +738,7 @@ def transcribeLoop(mic: str,
     audio_state.language = langcodes.find(language).language
     audio_state.MAX_LENGTH_S = window_duration_s
     audio_state.reset_on_toggle = reset_on_toggle
-    audio_state.commit_fuzz_threshold = commit_fuzz_threshold
+    #audio_state.commit_fuzz_threshold = commit_fuzz_threshold
     audio_state.enable_debug_mode = enable_debug_mode
     audio_state.enable_profanity_filter = enable_profanity_filter