summaryrefslogtreecommitdiffstats
path: root/Scripts
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2024-07-12 15:41:23 -0700
committeryum <yum.food.vr@gmail.com>2024-07-12 15:41:23 -0700
commit426d0139273a3112ed18f1b7c39eb75b9278fbe1 (patch)
tree37e0d725fe6d41897b77e967038dc70ac0bfdddb /Scripts
parent75069522ffc8863a356d95e509c81612a3703458 (diff)
Translation shows original language by default
* Add checkbox to disable this feature if so desired. * Delete old optimization code; can get it back from git if needed. * Enforce that there's at least one space character ' ' between committed segments.
Diffstat (limited to 'Scripts')
-rw-r--r--Scripts/transcribe_v2.py125
1 files changed, 11 insertions, 114 deletions
diff --git a/Scripts/transcribe_v2.py b/Scripts/transcribe_v2.py
index 2a206fd..94b8257 100644
--- a/Scripts/transcribe_v2.py
+++ b/Scripts/transcribe_v2.py
@@ -546,8 +546,8 @@ class VadCommitter:
self.collector.keepLast(1.0)
return TranscriptCommit(
- delta,
- preview,
+ delta.strip(),
+ preview.strip(),
latency_s,
audio=audio,
duration_s=duration_s,
@@ -651,6 +651,8 @@ class TranslationPlugin(StreamingPlugin):
translated_sentence = self.tokenizer.decode(self.tokenizer.convert_tokens_to_ids(target))
translated_sentences.append(translated_sentence)
translated = " ".join(translated_sentences)
+ if cfg["enable_orig_lang"] and len(sentences) > 0:
+ translated += f" ({text})"
return translated
commit.delta = _translate_text(commit.delta)
@@ -783,118 +785,6 @@ class OscPager:
osc_ctrl.ellipsis(self.osc_state.client, state)
self.bumpSyncWindow()
-def evaluate(cfg,
- audio_path: str,
- control_path: str):
- stream = DiskStream(audio_path)
-
- collector = AudioCollector(stream)
- collector = CompressingAudioCollector(collector)
- whisper = Whisper(collector, cfg)
- segmenter = AudioSegmenter(min_silence_ms=250)
- committer = VadCommitter(cfg, collector, whisper, segmenter)
- transcript = ""
- commits = []
- last_commit_ts = None
-
- while True:
- time.sleep(.005)
-
- commit = committer.getDelta()
-
- if last_commit_ts != None and collector.now() - last_commit_ts > 30:
- break
-
- if len(commit.delta) > 0:
- print(f"Commit latency: {commit.latency_s}", file=sys.stderr)
- commits.append(commit)
- last_commit_ts = collector.now()
-
- transcript += commit.delta
- preview = commit.preview
-
- if False and len(commit.delta):
- print(f"transcript: {transcript}", file=sys.stderr)
- print(f"commit latency: {commit.latency_s}", file=sys.stderr)
- print(f"commit thresh: {commit.thresh_at_commit}", file=sys.stderr)
-
- with open(control_path, "r") as f:
- control = f.read()
- normalizer = EnglishTextNormalizer()
- control = normalizer(control)
- experiment = normalizer(transcript)
-
- sum_latency = 0
- for commit in commits:
- sum_latency += commit.latency_s
- avg_latency = sum_latency / len(commits)
-
- dist = editdistance.eval(control, experiment)
-
- print(f"RESULTS", file=sys.stderr)
- print(f"edit distance: {dist}", file=sys.stderr)
- print(f"avg latency: {avg_latency}", file=sys.stderr)
- print(f"num commits: {len(commits)}", file=sys.stderr)
- print(f"final transcript: {transcript}", file=sys.stderr)
-
- score = (3 + (dist/len(control)) * 100) * avg_latency
- print(f"score: {score}", file=sys.stderr)
- return score
-
-def optimize(cfg,
- experiments: typing.List[typing.Tuple[str, str]]):
-
- install_in_venv(["git+https://github.com/openai/whisper.git",
- "scipy"])
-
- from scipy.optimize import minimize
- from whisper.normalizers import EnglishTextNormalizer
-
- def wrapper_to_optimize(x):
- s = 0
- for audio_path, control_path in experiments:
- s += evaluate(
- cfg,
- audio_path,
- control_path,
- int(x[0]), # last_n_must_match
- 2**x[1], # edit_thresh_min
- (2**x[2])-1,# edit_thresh_grow_begin_s
- x[3], # edit_thresh_grow_halflife_s
- x[4] # min_segment_age_s
- )
- return s
-
- initial_guess = [2.3, 1, 1.75, 1.5, 0.5]
- bounds = [
- (2, 3), # last_n_must_match
- (1, 4), # edit_thresh_min
- (0, 2.5), # edit_thresh_grow_begin_s
- (0.1, 2), # edit_thresh_grow_halflife_s
- (0, 3) # min_segment_age_s
- ]
-
- result = minimize(
- wrapper_to_optimize,
- initial_guess,
- bounds=bounds,
- method='L-BFGS-B',
- options={"maxfun": int((60/.5)*12),
- "eps": 0.2},
- )
-
- optimized_params = result.x
-
- print("Optimized Parameters:", file=sys.stderr)
- print(f"last_n_must_match: {int(optimized_params[0])}", file=sys.stderr)
- print(f"edit_thresh_min: {optimized_params[1]}", file=sys.stderr)
- print(f"edit_thresh_grow_begin_s: {optimized_params[2]}", file=sys.stderr)
- print(f"edit_thresh_grow_halflife_s: {optimized_params[3]}",
- file=sys.stderr)
- print(f"min_segment_age_s: {optimized_params[4]}", file=sys.stderr)
-
- return optimized_params
-
def transcriptionThread(ctrl: ThreadControl):
last_stable_commit = None
@@ -951,6 +841,13 @@ def transcriptionThread(ctrl: ThreadControl):
print(f"commit thresh: {commit.thresh_at_commit}",
file=sys.stderr)
+ if not ctrl.transcript.endswith(' ') and not \
+ commit.delta.startswith(' '):
+ commit.delta = ' ' + commit.delta
+ if not commit.delta.endswith(' ') and not \
+ commit.preview.startswith(' '):
+ commit.preview = ' ' + commit.preview
+
ctrl.transcript += commit.delta
ctrl.preview = ctrl.transcript + commit.preview
for plugin in ctrl.plugins: