diff options
| author | yum <yum.food.vr@gmail.com> | 2024-07-12 15:41:23 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2024-07-12 15:41:23 -0700 |
| commit | 426d0139273a3112ed18f1b7c39eb75b9278fbe1 (patch) | |
| tree | 37e0d725fe6d41897b77e967038dc70ac0bfdddb | |
| parent | 75069522ffc8863a356d95e509c81612a3703458 (diff) | |
Translation shows original language by default
* Add checkbox to disable this feature if so desired.
* Delete old optimization code; can get it back from git if needed.
* Enforce that there's at least one space character ' ' between
committed segments.
| -rw-r--r-- | GUI/GUI/GUI/Config.cpp | 3 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Config.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 18 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.h | 1 | ||||
| -rw-r--r-- | Scripts/transcribe_v2.py | 125 |
5 files changed, 34 insertions, 114 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index 605b25a..2bf8750 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -72,6 +72,7 @@ AppConfig::AppConfig(wxTextCtrl* out) compute_type("float16"),
enable_local_beep(true),
+ enable_orig_lang(true),
enable_browser_src(false),
browser_src_port(8097),
commit_fuzz_threshold(4),
@@ -122,6 +123,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("compute_type", compute_type);
cm.Set("enable_local_beep", enable_local_beep);
+ cm.Set("enable_orig_lang", enable_orig_lang);
cm.Set("enable_browser_src", enable_browser_src);
cm.Set("browser_src_port", browser_src_port);
cm.Set("commit_fuzz_threshold", commit_fuzz_threshold);
@@ -185,6 +187,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("compute_type", c.compute_type);
cm.Get("enable_local_beep", c.enable_local_beep);
+ cm.Get("enable_orig_lang", c.enable_orig_lang);
cm.Get("enable_browser_src", c.enable_browser_src);
cm.Get("browser_src_port", c.browser_src_port);
cm.Get("commit_fuzz_threshold", c.commit_fuzz_threshold);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index e75e4d5..48b49a2 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -58,6 +58,7 @@ public: std::string compute_type;
bool enable_local_beep;
+ bool enable_orig_lang;
bool enable_browser_src;
int browser_src_port;
int commit_fuzz_threshold;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 9a69651..e3c1964 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -62,6 +62,7 @@ namespace { ID_PY_APP_COMPUTE_TYPE,
ID_PY_APP_MODEL_PANEL,
ID_PY_APP_ENABLE_LOCAL_BEEP,
+ ID_PY_APP_ENABLE_ORIG_LANG,
ID_PY_APP_ENABLE_BROWSER_SRC,
ID_PY_APP_USE_CPU,
ID_PY_APP_USE_FLASH_ATTENTION,
@@ -985,6 +986,16 @@ Frame::Frame() );
py_app_enable_local_beep_ = py_app_enable_local_beep;
+ auto* py_app_enable_orig_lang = new wxCheckBox(py_config_panel,
+ ID_PY_APP_ENABLE_ORIG_LANG, "Translation shows original language");
+ py_app_enable_orig_lang->SetValue(app_c_->enable_orig_lang);
+ py_app_enable_orig_lang->SetToolTip(
+ "When translation is enabled, this checkbox determines whether "
+ "the original language is shown in parentheses after the "
+ "translated text - c'est comme ça. ( like this)."
+ );
+ py_app_enable_orig_lang_ = py_app_enable_orig_lang;
+
auto* py_app_use_cpu = new wxCheckBox(py_config_panel,
ID_PY_APP_USE_CPU, "Use CPU");
py_app_use_cpu->SetValue(app_c_->use_cpu);
@@ -1121,6 +1132,8 @@ Frame::Frame() /*flags=*/wxEXPAND);
sizer->Add(py_app_enable_local_beep, /*proportion=*/0,
/*flags=*/wxEXPAND);
+ sizer->Add(py_app_enable_orig_lang, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
sizer->Add(py_app_use_cpu, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(py_app_use_flash_attention, /*proportion=*/0,
@@ -1708,6 +1721,9 @@ void Frame::ApplyConfigToInputFields() auto* py_app_enable_local_beep = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_LOCAL_BEEP));
py_app_enable_local_beep->SetValue(app_c_->enable_local_beep);
+ auto* py_app_enable_orig_lang = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_ORIG_LANG));
+ py_app_enable_orig_lang->SetValue(app_c_->enable_orig_lang);
+
auto* py_app_enable_browser_src = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_BROWSER_SRC));
py_app_enable_browser_src->SetValue(app_c_->enable_browser_src);
@@ -2464,6 +2480,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { }
const bool enable_local_beep = py_app_enable_local_beep_->GetValue();
+ const bool enable_orig_lang = py_app_enable_orig_lang_->GetValue();
const bool enable_browser_src = py_app_enable_browser_src_->GetValue();
const bool use_cpu = py_app_use_cpu_->GetValue();
const bool use_flash_attention = py_app_use_flash_attention_->GetValue();
@@ -2504,6 +2521,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->rows = rows;
app_c_->cols = cols;
app_c_->enable_local_beep = enable_local_beep;
+ app_c_->enable_orig_lang = enable_orig_lang;
app_c_->enable_browser_src = enable_browser_src;
app_c_->browser_src_port = browser_src_port;
app_c_->use_cpu = use_cpu;
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index fc8bac8..2d682a7 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -69,6 +69,7 @@ private: wxChoice* unity_bytes_per_char_;
wxCheckBox* py_app_enable_local_beep_;
+ wxCheckBox* py_app_enable_orig_lang_;
wxCheckBox* py_app_enable_browser_src_;
wxCheckBox* py_app_use_cpu_;
wxCheckBox* py_app_use_flash_attention_;
diff --git a/Scripts/transcribe_v2.py b/Scripts/transcribe_v2.py index 2a206fd..94b8257 100644 --- a/Scripts/transcribe_v2.py +++ b/Scripts/transcribe_v2.py @@ -546,8 +546,8 @@ class VadCommitter: self.collector.keepLast(1.0) return TranscriptCommit( - delta, - preview, + delta.strip(), + preview.strip(), latency_s, audio=audio, duration_s=duration_s, @@ -651,6 +651,8 @@ class TranslationPlugin(StreamingPlugin): translated_sentence = self.tokenizer.decode(self.tokenizer.convert_tokens_to_ids(target)) translated_sentences.append(translated_sentence) translated = " ".join(translated_sentences) + if cfg["enable_orig_lang"] and len(sentences) > 0: + translated += f" ({text})" return translated commit.delta = _translate_text(commit.delta) @@ -783,118 +785,6 @@ class OscPager: osc_ctrl.ellipsis(self.osc_state.client, state) self.bumpSyncWindow() -def evaluate(cfg, - audio_path: str, - control_path: str): - stream = DiskStream(audio_path) - - collector = AudioCollector(stream) - collector = CompressingAudioCollector(collector) - whisper = Whisper(collector, cfg) - segmenter = AudioSegmenter(min_silence_ms=250) - committer = VadCommitter(cfg, collector, whisper, segmenter) - transcript = "" - commits = [] - last_commit_ts = None - - while True: - time.sleep(.005) - - commit = committer.getDelta() - - if last_commit_ts != None and collector.now() - last_commit_ts > 30: - break - - if len(commit.delta) > 0: - print(f"Commit latency: {commit.latency_s}", file=sys.stderr) - commits.append(commit) - last_commit_ts = collector.now() - - transcript += commit.delta - preview = commit.preview - - if False and len(commit.delta): - print(f"transcript: {transcript}", file=sys.stderr) - print(f"commit latency: {commit.latency_s}", file=sys.stderr) - print(f"commit thresh: {commit.thresh_at_commit}", file=sys.stderr) - - with open(control_path, "r") as f: - control = f.read() - normalizer = EnglishTextNormalizer() - control = normalizer(control) - experiment = normalizer(transcript) - - sum_latency = 0 - for commit in commits: - sum_latency += commit.latency_s - avg_latency = sum_latency / len(commits) - - dist = editdistance.eval(control, experiment) - - print(f"RESULTS", file=sys.stderr) - print(f"edit distance: {dist}", file=sys.stderr) - print(f"avg latency: {avg_latency}", file=sys.stderr) - print(f"num commits: {len(commits)}", file=sys.stderr) - print(f"final transcript: {transcript}", file=sys.stderr) - - score = (3 + (dist/len(control)) * 100) * avg_latency - print(f"score: {score}", file=sys.stderr) - return score - -def optimize(cfg, - experiments: typing.List[typing.Tuple[str, str]]): - - install_in_venv(["git+https://github.com/openai/whisper.git", - "scipy"]) - - from scipy.optimize import minimize - from whisper.normalizers import EnglishTextNormalizer - - def wrapper_to_optimize(x): - s = 0 - for audio_path, control_path in experiments: - s += evaluate( - cfg, - audio_path, - control_path, - int(x[0]), # last_n_must_match - 2**x[1], # edit_thresh_min - (2**x[2])-1,# edit_thresh_grow_begin_s - x[3], # edit_thresh_grow_halflife_s - x[4] # min_segment_age_s - ) - return s - - initial_guess = [2.3, 1, 1.75, 1.5, 0.5] - bounds = [ - (2, 3), # last_n_must_match - (1, 4), # edit_thresh_min - (0, 2.5), # edit_thresh_grow_begin_s - (0.1, 2), # edit_thresh_grow_halflife_s - (0, 3) # min_segment_age_s - ] - - result = minimize( - wrapper_to_optimize, - initial_guess, - bounds=bounds, - method='L-BFGS-B', - options={"maxfun": int((60/.5)*12), - "eps": 0.2}, - ) - - optimized_params = result.x - - print("Optimized Parameters:", file=sys.stderr) - print(f"last_n_must_match: {int(optimized_params[0])}", file=sys.stderr) - print(f"edit_thresh_min: {optimized_params[1]}", file=sys.stderr) - print(f"edit_thresh_grow_begin_s: {optimized_params[2]}", file=sys.stderr) - print(f"edit_thresh_grow_halflife_s: {optimized_params[3]}", - file=sys.stderr) - print(f"min_segment_age_s: {optimized_params[4]}", file=sys.stderr) - - return optimized_params - def transcriptionThread(ctrl: ThreadControl): last_stable_commit = None @@ -951,6 +841,13 @@ def transcriptionThread(ctrl: ThreadControl): print(f"commit thresh: {commit.thresh_at_commit}", file=sys.stderr) + if not ctrl.transcript.endswith(' ') and not \ + commit.delta.startswith(' '): + commit.delta = ' ' + commit.delta + if not commit.delta.endswith(' ') and not \ + commit.preview.startswith(' '): + commit.preview = ' ' + commit.preview + ctrl.transcript += commit.delta ctrl.preview = ctrl.transcript + commit.preview for plugin in ctrl.plugins: |
