diff options
| author | yum <yum.food.vr@gmail.com> | 2023-07-07 01:57:56 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-07-07 01:58:45 -0700 |
| commit | 2793ac9dd31059f2fc29f7978bcb688a7de664ed (patch) | |
| tree | 4b76f8d7d797d0f15e52f7744f4bbe4614b4381f | |
| parent | 742eb86d652d7689bbf3ae8b286bf0a6b1c2380d (diff) | |
Filter out segments based on avg_log_prob & no_speech_prob
Surprisingly, these args do not cause transcribe() to omit those
segments from the result, so we have to manually filter them out.
Hallucinated phrases generally have one or both of these params set
high.
| -rw-r--r-- | Scripts/transcribe.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 39c6563..cebd70d 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -278,12 +278,15 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st audio, beam_size = 5, language = audio_state.language, - temperature = [0.0], + temperature = [0.0, 0.2, 0.4], + log_prob_threshold = -1.0, vad_filter = True, condition_on_previous_text = True, without_timestamps = False) ranges = [] for s in segments: + if s.avg_log_prob < -1.0 or s.no_speech_prob > 0.6: + continue if audio_state.enable_debug_mode: print(f"Segment: {s}") ranges.append((s.start, s.end, s.text)) |
