Filter out segments based on avg_log_prob & no_speech_prob

Surprisingly, these args do not cause transcribe() to omit those segments from the result, so we have to manually filter them out. Hallucinated phrases generally have one or both of these params set high.
author: yum <yum.food.vr@gmail.com> 2023-07-07 01:57:56 -0700
committer: yum <yum.food.vr@gmail.com> 2023-07-07 01:58:45 -0700
commit: 2793ac9dd31059f2fc29f7978bcb688a7de664ed (patch)
tree: 4b76f8d7d797d0f15e52f7744f4bbe4614b4381f
parent: 742eb86d652d7689bbf3ae8b286bf0a6b1c2380d (diff)
1 files changed, 4 insertions, 1 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 39c6563..cebd70d 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -278,12 +278,15 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st
             audio,
             beam_size = 5,
             language = audio_state.language,
-            temperature = [0.0],
+            temperature = [0.0, 0.2, 0.4],
+            log_prob_threshold = -1.0,
             vad_filter = True,
             condition_on_previous_text = True,
             without_timestamps = False)
     ranges = []
     for s in segments:
+        if s.avg_log_prob < -1.0 or s.no_speech_prob > 0.6:
+            continue
         if audio_state.enable_debug_mode:
             print(f"Segment: {s}")
         ranges.append((s.start, s.end, s.text))
author	yum <yum.food.vr@gmail.com>	2023-07-07 01:57:56 -0700
committer	yum <yum.food.vr@gmail.com>	2023-07-07 01:58:45 -0700
commit	2793ac9dd31059f2fc29f7978bcb688a7de664ed (patch)
tree	4b76f8d7d797d0f15e52f7744f4bbe4614b4381f
parent	742eb86d652d7689bbf3ae8b286bf0a6b1c2380d (diff)