summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-07-07 02:30:18 -0700
committeryum <yum.food.vr@gmail.com>2023-07-07 02:35:51 -0700
commit7a576bcac1c37c3c5a59fadf172aa70b15ff83c8 (patch)
treee7fa6e499e3bc12ddf4096aad30f2705f34ce556
parent2793ac9dd31059f2fc29f7978bcb688a7de664ed (diff)
Enforce a stricter avg_logbprob than defaultv0.13.1
Common hallucinations sneak in around -0.9 avg_logprob. Also: * Limit temperatures to just 0.0. Multiple values cause latency to occasionally spike.
-rw-r--r--Scripts/requirements.txt2
-rw-r--r--Scripts/transcribe.py6
2 files changed, 4 insertions, 4 deletions
diff --git a/Scripts/requirements.txt b/Scripts/requirements.txt
index 647e942..80bf93d 100644
--- a/Scripts/requirements.txt
+++ b/Scripts/requirements.txt
@@ -1,6 +1,6 @@
ctranslate2
editdistance
-faster-whisper@https://github.com/guillaumekln/faster-whisper/archive/358d373691c95205021bd4bbf28cde7ce4d10030.tar.gz
+faster-whisper@https://github.com/guillaumekln/faster-whisper/archive/78d57d73c5b4a76b32d1d5a415e4e7aea760295c.tar.gz
future==0.18.2
keyboard
langcodes
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index cebd70d..25ce22b 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -278,14 +278,14 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st
audio,
beam_size = 5,
language = audio_state.language,
- temperature = [0.0, 0.2, 0.4],
- log_prob_threshold = -1.0,
+ temperature = 0.0,
+ log_prob_threshold = -0.8,
vad_filter = True,
condition_on_previous_text = True,
without_timestamps = False)
ranges = []
for s in segments:
- if s.avg_log_prob < -1.0 or s.no_speech_prob > 0.6:
+ if s.avg_logprob < -0.8 or s.no_speech_prob > 0.6:
continue
if audio_state.enable_debug_mode:
print(f"Segment: {s}")