summaryrefslogtreecommitdiffstats
path: root/Scripts
diff options
context:
space:
mode:
Diffstat (limited to 'Scripts')
-rw-r--r--Scripts/transcribe.py22
1 files changed, 20 insertions, 2 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 5301b0b..cea2da0 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -66,7 +66,7 @@ class AudioState:
# The edit distance under which two consecutive transcripts are
# considered to match. This affects how easily `preview_text`
# gets appended to `text`.
- self.commit_fuzz_threshold = 8
+ self.commit_fuzz_threshold = 1
# If set, profanity in transcriptions will have their vowels replaced
# with asterisks. Only works in English.
@@ -157,6 +157,19 @@ def onAudioFramesAvailable(
if not audio_state.audio_paused:
audio_state.frames.append(decimated)
+ # If buffer is getting long, tell the transcription loop to be more ready
+ # to accept transcripts.
+ fps = int(input_rate / audio_state.CHUNK)
+ cur_len_s = len(audio_state.frames) / fps
+ double_at_s = 3.0
+ double_every_s = 1.5
+ delta_s = cur_len_s - double_at_s
+ n_doubles = ceil(delta_s / double_every_s)
+ if n_doubles >= 1:
+ audio_state.commit_fuzz_threshold = 2 ** n_doubles
+ else:
+ audio_state.commit_fuzz_threshold = 1
+
max_frames = int(input_rate * audio_state.MAX_LENGTH_S /
audio_state.CHUNK)
if len(audio_state.frames) > max_frames:
@@ -380,7 +393,12 @@ def transcribeAudio(audio_state,
if audio_state.enable_debug_mode:
print("no transcription, spin ({} seconds)".format(time.time() - last_transcribe_time))
last_transcribe_time = time.time()
+ # Prevent audio buffer from holding more than 1 second of silence
+ # before real speech.
+ audio_state.MAX_LENGTH_S = 1
continue
+ else:
+ audio_state.MAX_LENGTH_S = 300
if audio_state.drop_transcription:
audio_state.drop_transcription = False
@@ -720,7 +738,7 @@ def transcribeLoop(mic: str,
audio_state.language = langcodes.find(language).language
audio_state.MAX_LENGTH_S = window_duration_s
audio_state.reset_on_toggle = reset_on_toggle
- audio_state.commit_fuzz_threshold = commit_fuzz_threshold
+ #audio_state.commit_fuzz_threshold = commit_fuzz_threshold
audio_state.enable_debug_mode = enable_debug_mode
audio_state.enable_profanity_filter = enable_profanity_filter