From 2793ac9dd31059f2fc29f7978bcb688a7de664ed Mon Sep 17 00:00:00 2001 From: yum Date: Fri, 7 Jul 2023 01:57:56 -0700 Subject: Filter out segments based on avg_log_prob & no_speech_prob Surprisingly, these args do not cause transcribe() to omit those segments from the result, so we have to manually filter them out. Hallucinated phrases generally have one or both of these params set high. --- Scripts/transcribe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'Scripts') diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 39c6563..cebd70d 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -278,12 +278,15 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st audio, beam_size = 5, language = audio_state.language, - temperature = [0.0], + temperature = [0.0, 0.2, 0.4], + log_prob_threshold = -1.0, vad_filter = True, condition_on_previous_text = True, without_timestamps = False) ranges = [] for s in segments: + if s.avg_log_prob < -1.0 or s.no_speech_prob > 0.6: + continue if audio_state.enable_debug_mode: print(f"Segment: {s}") ranges.append((s.start, s.end, s.text)) -- cgit v1.2.3