summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-07-07 01:57:56 -0700
committeryum <yum.food.vr@gmail.com>2023-07-07 01:58:45 -0700
commit2793ac9dd31059f2fc29f7978bcb688a7de664ed (patch)
tree4b76f8d7d797d0f15e52f7744f4bbe4614b4381f
parent742eb86d652d7689bbf3ae8b286bf0a6b1c2380d (diff)
Filter out segments based on avg_log_prob & no_speech_prob
Surprisingly, these args do not cause transcribe() to omit those segments from the result, so we have to manually filter them out. Hallucinated phrases generally have one or both of these params set high.
-rw-r--r--Scripts/transcribe.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 39c6563..cebd70d 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -278,12 +278,15 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st
audio,
beam_size = 5,
language = audio_state.language,
- temperature = [0.0],
+ temperature = [0.0, 0.2, 0.4],
+ log_prob_threshold = -1.0,
vad_filter = True,
condition_on_previous_text = True,
without_timestamps = False)
ranges = []
for s in segments:
+ if s.avg_log_prob < -1.0 or s.no_speech_prob > 0.6:
+ continue
if audio_state.enable_debug_mode:
print(f"Segment: {s}")
ranges.append((s.start, s.end, s.text))