From 241813a5af11093c6b86e70ada729788c1f0dee6 Mon Sep 17 00:00:00 2001 From: yum Date: Tue, 27 Jun 2023 15:35:30 -0700 Subject: Adjust commit logic to use fuzzy string match threshold ... instead of simple equality. TODO: add UI for threshold. Bugfix: Frame::onAppStop() joins the OBS app thread. --- GUI/GUI/GUI/Frame.cpp | 18 +++++++++++++----- Scripts/transcribe.py | 14 ++++++++++++-- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index f4e99b9..8d4c868 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -2122,16 +2122,24 @@ void Frame::OnAppStart(wxCommandEvent& event) { } void Frame::OnAppStop() { + run_py_app_ = false; auto status = py_app_.wait_for(std::chrono::seconds(0)); if (status == std::future_status::ready) { Log(transcribe_out_, "Transcription engine already stopped\n"); - return; } - run_py_app_ = false; - py_app_.wait(); - obs_app_.wait(); + else { + py_app_.wait(); + Log(transcribe_out_, "Stopped transcription engine\n"); + } + status = obs_app_.wait_for(std::chrono::seconds(0)); + if (status == std::future_status::ready) { + Log(transcribe_out_, "Browser source already stopped\n"); + } + else { + obs_app_.wait(); + Log(transcribe_out_, "Stopped browser source\n"); + } transcript_.Clear(); - Log(transcribe_out_, "Stopped transcription engine\n"); } void Frame::OnAppStop(wxCommandEvent& event) { diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 694fd0b..6cb78cd 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -11,6 +11,7 @@ from sentence_splitter import split_text_into_sentences import argparse import copy import ctranslate2 +import editdistance import generate_utils import keybind_event_machine import keyboard @@ -39,7 +40,7 @@ class AudioState: # The maximum length that recordAudio() will put into frames before it # starts dropping from the start. - self.MAX_LENGTH_S = 30 + self.MAX_LENGTH_S = 300 # The minimum length that recordAudio() will wait for before saving audio. self.MIN_LENGTH_S = 1 @@ -257,11 +258,20 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st c1 = first_segments[-2] c2 = first_segments[-3] c3 = first_segments[-4] + + c0_c1_d = editdistance.eval(c0[2], c1[2]) + c1_c2_d = editdistance.eval(c1[2], c2[2]) + c2_c3_d = editdistance.eval(c2[2], c3[2]) + + max_edit = 8 + #print(f"c0: {c0}, c1: {c1}, c2: {c2}") - if c0 == c1 and c1 == c2 and c2 == c3: + #if c0 == c1 and c1 == c2 and c2 == c3: + if c0_c1_d < max_edit and c1_c2_d < max_edit and c2_c3_d < max_edit: # For simplicity, completely reset saved audio ranges. audio_state.ranges_ls = [] committed_text = c0[2] + print(f"Dropping frames until {c0[1]}") n_frames_to_drop = int(ceil(audio_state.RATE * c0[1])) audio_state.drop_frames_till_i = n_frames_to_drop -- cgit v1.2.3