summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2024-07-12 15:41:23 -0700
committeryum <yum.food.vr@gmail.com>2024-07-12 15:41:23 -0700
commit426d0139273a3112ed18f1b7c39eb75b9278fbe1 (patch)
tree37e0d725fe6d41897b77e967038dc70ac0bfdddb
parent75069522ffc8863a356d95e509c81612a3703458 (diff)
Translation shows original language by default
* Add checkbox to disable this feature if so desired. * Delete old optimization code; can get it back from git if needed. * Enforce that there's at least one space character ' ' between committed segments.
-rw-r--r--GUI/GUI/GUI/Config.cpp3
-rw-r--r--GUI/GUI/GUI/Config.h1
-rw-r--r--GUI/GUI/GUI/Frame.cpp18
-rw-r--r--GUI/GUI/GUI/Frame.h1
-rw-r--r--Scripts/transcribe_v2.py125
5 files changed, 34 insertions, 114 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
index 605b25a..2bf8750 100644
--- a/GUI/GUI/GUI/Config.cpp
+++ b/GUI/GUI/GUI/Config.cpp
@@ -72,6 +72,7 @@ AppConfig::AppConfig(wxTextCtrl* out)
compute_type("float16"),
enable_local_beep(true),
+ enable_orig_lang(true),
enable_browser_src(false),
browser_src_port(8097),
commit_fuzz_threshold(4),
@@ -122,6 +123,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) {
cm.Set("compute_type", compute_type);
cm.Set("enable_local_beep", enable_local_beep);
+ cm.Set("enable_orig_lang", enable_orig_lang);
cm.Set("enable_browser_src", enable_browser_src);
cm.Set("browser_src_port", browser_src_port);
cm.Set("commit_fuzz_threshold", commit_fuzz_threshold);
@@ -185,6 +187,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) {
cm.Get("compute_type", c.compute_type);
cm.Get("enable_local_beep", c.enable_local_beep);
+ cm.Get("enable_orig_lang", c.enable_orig_lang);
cm.Get("enable_browser_src", c.enable_browser_src);
cm.Get("browser_src_port", c.browser_src_port);
cm.Get("commit_fuzz_threshold", c.commit_fuzz_threshold);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
index e75e4d5..48b49a2 100644
--- a/GUI/GUI/GUI/Config.h
+++ b/GUI/GUI/GUI/Config.h
@@ -58,6 +58,7 @@ public:
std::string compute_type;
bool enable_local_beep;
+ bool enable_orig_lang;
bool enable_browser_src;
int browser_src_port;
int commit_fuzz_threshold;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 9a69651..e3c1964 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -62,6 +62,7 @@ namespace {
ID_PY_APP_COMPUTE_TYPE,
ID_PY_APP_MODEL_PANEL,
ID_PY_APP_ENABLE_LOCAL_BEEP,
+ ID_PY_APP_ENABLE_ORIG_LANG,
ID_PY_APP_ENABLE_BROWSER_SRC,
ID_PY_APP_USE_CPU,
ID_PY_APP_USE_FLASH_ATTENTION,
@@ -985,6 +986,16 @@ Frame::Frame()
);
py_app_enable_local_beep_ = py_app_enable_local_beep;
+ auto* py_app_enable_orig_lang = new wxCheckBox(py_config_panel,
+ ID_PY_APP_ENABLE_ORIG_LANG, "Translation shows original language");
+ py_app_enable_orig_lang->SetValue(app_c_->enable_orig_lang);
+ py_app_enable_orig_lang->SetToolTip(
+ "When translation is enabled, this checkbox determines whether "
+ "the original language is shown in parentheses after the "
+ "translated text - c'est comme ça. ( like this)."
+ );
+ py_app_enable_orig_lang_ = py_app_enable_orig_lang;
+
auto* py_app_use_cpu = new wxCheckBox(py_config_panel,
ID_PY_APP_USE_CPU, "Use CPU");
py_app_use_cpu->SetValue(app_c_->use_cpu);
@@ -1121,6 +1132,8 @@ Frame::Frame()
/*flags=*/wxEXPAND);
sizer->Add(py_app_enable_local_beep, /*proportion=*/0,
/*flags=*/wxEXPAND);
+ sizer->Add(py_app_enable_orig_lang, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
sizer->Add(py_app_use_cpu, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(py_app_use_flash_attention, /*proportion=*/0,
@@ -1708,6 +1721,9 @@ void Frame::ApplyConfigToInputFields()
auto* py_app_enable_local_beep = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_LOCAL_BEEP));
py_app_enable_local_beep->SetValue(app_c_->enable_local_beep);
+ auto* py_app_enable_orig_lang = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_ORIG_LANG));
+ py_app_enable_orig_lang->SetValue(app_c_->enable_orig_lang);
+
auto* py_app_enable_browser_src = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_BROWSER_SRC));
py_app_enable_browser_src->SetValue(app_c_->enable_browser_src);
@@ -2464,6 +2480,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
}
const bool enable_local_beep = py_app_enable_local_beep_->GetValue();
+ const bool enable_orig_lang = py_app_enable_orig_lang_->GetValue();
const bool enable_browser_src = py_app_enable_browser_src_->GetValue();
const bool use_cpu = py_app_use_cpu_->GetValue();
const bool use_flash_attention = py_app_use_flash_attention_->GetValue();
@@ -2504,6 +2521,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
app_c_->rows = rows;
app_c_->cols = cols;
app_c_->enable_local_beep = enable_local_beep;
+ app_c_->enable_orig_lang = enable_orig_lang;
app_c_->enable_browser_src = enable_browser_src;
app_c_->browser_src_port = browser_src_port;
app_c_->use_cpu = use_cpu;
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index fc8bac8..2d682a7 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -69,6 +69,7 @@ private:
wxChoice* unity_bytes_per_char_;
wxCheckBox* py_app_enable_local_beep_;
+ wxCheckBox* py_app_enable_orig_lang_;
wxCheckBox* py_app_enable_browser_src_;
wxCheckBox* py_app_use_cpu_;
wxCheckBox* py_app_use_flash_attention_;
diff --git a/Scripts/transcribe_v2.py b/Scripts/transcribe_v2.py
index 2a206fd..94b8257 100644
--- a/Scripts/transcribe_v2.py
+++ b/Scripts/transcribe_v2.py
@@ -546,8 +546,8 @@ class VadCommitter:
self.collector.keepLast(1.0)
return TranscriptCommit(
- delta,
- preview,
+ delta.strip(),
+ preview.strip(),
latency_s,
audio=audio,
duration_s=duration_s,
@@ -651,6 +651,8 @@ class TranslationPlugin(StreamingPlugin):
translated_sentence = self.tokenizer.decode(self.tokenizer.convert_tokens_to_ids(target))
translated_sentences.append(translated_sentence)
translated = " ".join(translated_sentences)
+ if cfg["enable_orig_lang"] and len(sentences) > 0:
+ translated += f" ({text})"
return translated
commit.delta = _translate_text(commit.delta)
@@ -783,118 +785,6 @@ class OscPager:
osc_ctrl.ellipsis(self.osc_state.client, state)
self.bumpSyncWindow()
-def evaluate(cfg,
- audio_path: str,
- control_path: str):
- stream = DiskStream(audio_path)
-
- collector = AudioCollector(stream)
- collector = CompressingAudioCollector(collector)
- whisper = Whisper(collector, cfg)
- segmenter = AudioSegmenter(min_silence_ms=250)
- committer = VadCommitter(cfg, collector, whisper, segmenter)
- transcript = ""
- commits = []
- last_commit_ts = None
-
- while True:
- time.sleep(.005)
-
- commit = committer.getDelta()
-
- if last_commit_ts != None and collector.now() - last_commit_ts > 30:
- break
-
- if len(commit.delta) > 0:
- print(f"Commit latency: {commit.latency_s}", file=sys.stderr)
- commits.append(commit)
- last_commit_ts = collector.now()
-
- transcript += commit.delta
- preview = commit.preview
-
- if False and len(commit.delta):
- print(f"transcript: {transcript}", file=sys.stderr)
- print(f"commit latency: {commit.latency_s}", file=sys.stderr)
- print(f"commit thresh: {commit.thresh_at_commit}", file=sys.stderr)
-
- with open(control_path, "r") as f:
- control = f.read()
- normalizer = EnglishTextNormalizer()
- control = normalizer(control)
- experiment = normalizer(transcript)
-
- sum_latency = 0
- for commit in commits:
- sum_latency += commit.latency_s
- avg_latency = sum_latency / len(commits)
-
- dist = editdistance.eval(control, experiment)
-
- print(f"RESULTS", file=sys.stderr)
- print(f"edit distance: {dist}", file=sys.stderr)
- print(f"avg latency: {avg_latency}", file=sys.stderr)
- print(f"num commits: {len(commits)}", file=sys.stderr)
- print(f"final transcript: {transcript}", file=sys.stderr)
-
- score = (3 + (dist/len(control)) * 100) * avg_latency
- print(f"score: {score}", file=sys.stderr)
- return score
-
-def optimize(cfg,
- experiments: typing.List[typing.Tuple[str, str]]):
-
- install_in_venv(["git+https://github.com/openai/whisper.git",
- "scipy"])
-
- from scipy.optimize import minimize
- from whisper.normalizers import EnglishTextNormalizer
-
- def wrapper_to_optimize(x):
- s = 0
- for audio_path, control_path in experiments:
- s += evaluate(
- cfg,
- audio_path,
- control_path,
- int(x[0]), # last_n_must_match
- 2**x[1], # edit_thresh_min
- (2**x[2])-1,# edit_thresh_grow_begin_s
- x[3], # edit_thresh_grow_halflife_s
- x[4] # min_segment_age_s
- )
- return s
-
- initial_guess = [2.3, 1, 1.75, 1.5, 0.5]
- bounds = [
- (2, 3), # last_n_must_match
- (1, 4), # edit_thresh_min
- (0, 2.5), # edit_thresh_grow_begin_s
- (0.1, 2), # edit_thresh_grow_halflife_s
- (0, 3) # min_segment_age_s
- ]
-
- result = minimize(
- wrapper_to_optimize,
- initial_guess,
- bounds=bounds,
- method='L-BFGS-B',
- options={"maxfun": int((60/.5)*12),
- "eps": 0.2},
- )
-
- optimized_params = result.x
-
- print("Optimized Parameters:", file=sys.stderr)
- print(f"last_n_must_match: {int(optimized_params[0])}", file=sys.stderr)
- print(f"edit_thresh_min: {optimized_params[1]}", file=sys.stderr)
- print(f"edit_thresh_grow_begin_s: {optimized_params[2]}", file=sys.stderr)
- print(f"edit_thresh_grow_halflife_s: {optimized_params[3]}",
- file=sys.stderr)
- print(f"min_segment_age_s: {optimized_params[4]}", file=sys.stderr)
-
- return optimized_params
-
def transcriptionThread(ctrl: ThreadControl):
last_stable_commit = None
@@ -951,6 +841,13 @@ def transcriptionThread(ctrl: ThreadControl):
print(f"commit thresh: {commit.thresh_at_commit}",
file=sys.stderr)
+ if not ctrl.transcript.endswith(' ') and not \
+ commit.delta.startswith(' '):
+ commit.delta = ' ' + commit.delta
+ if not commit.delta.endswith(' ') and not \
+ commit.preview.startswith(' '):
+ commit.preview = ' ' + commit.preview
+
ctrl.transcript += commit.delta
ctrl.preview = ctrl.transcript + commit.preview
for plugin in ctrl.plugins: