From 241813a5af11093c6b86e70ada729788c1f0dee6 Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Tue, 27 Jun 2023 15:35:30 -0700
Subject: Adjust commit logic to use fuzzy string match threshold

... instead of simple equality.

TODO: add UI for threshold.

Bugfix: Frame::onAppStop() joins the OBS app thread.
---
 GUI/GUI/GUI/Frame.cpp | 18 +++++++++++++-----
 Scripts/transcribe.py | 14 ++++++++++++--
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index f4e99b9..8d4c868 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -2122,16 +2122,24 @@ void Frame::OnAppStart(wxCommandEvent& event) {
 }
 
 void Frame::OnAppStop() {
+    run_py_app_ = false;
     auto status = py_app_.wait_for(std::chrono::seconds(0));
     if (status == std::future_status::ready) {
 		Log(transcribe_out_, "Transcription engine already stopped\n");
-        return;
     }
-    run_py_app_ = false;
-    py_app_.wait();
-    obs_app_.wait();
+    else {
+		py_app_.wait();
+		Log(transcribe_out_, "Stopped transcription engine\n");
+    }
+    status = obs_app_.wait_for(std::chrono::seconds(0));
+    if (status == std::future_status::ready) {
+		Log(transcribe_out_, "Browser source already stopped\n");
+    }
+    else {
+		obs_app_.wait();
+		Log(transcribe_out_, "Stopped browser source\n");
+    }
     transcript_.Clear();
-	Log(transcribe_out_, "Stopped transcription engine\n");
 }
 
 void Frame::OnAppStop(wxCommandEvent& event) {
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 694fd0b..6cb78cd 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -11,6 +11,7 @@ from sentence_splitter import split_text_into_sentences
 import argparse
 import copy
 import ctranslate2
+import editdistance
 import generate_utils
 import keybind_event_machine
 import keyboard
@@ -39,7 +40,7 @@ class AudioState:
 
         # The maximum length that recordAudio() will put into frames before it
         # starts dropping from the start.
-        self.MAX_LENGTH_S = 30
+        self.MAX_LENGTH_S = 300
         # The minimum length that recordAudio() will wait for before saving audio.
         self.MIN_LENGTH_S = 1
 
@@ -257,11 +258,20 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st
             c1 = first_segments[-2]
             c2 = first_segments[-3]
             c3 = first_segments[-4]
+
+            c0_c1_d = editdistance.eval(c0[2], c1[2])
+            c1_c2_d = editdistance.eval(c1[2], c2[2])
+            c2_c3_d = editdistance.eval(c2[2], c3[2])
+
+            max_edit = 8
+
             #print(f"c0: {c0}, c1: {c1}, c2: {c2}")
-            if c0 == c1 and c1 == c2 and c2 == c3:
+            #if c0 == c1 and c1 == c2 and c2 == c3:
+            if c0_c1_d < max_edit and c1_c2_d < max_edit and c2_c3_d < max_edit:
                 # For simplicity, completely reset saved audio ranges.
                 audio_state.ranges_ls = []
                 committed_text = c0[2]
+                print(f"Dropping frames until {c0[1]}")
                 n_frames_to_drop = int(ceil(audio_state.RATE * c0[1]))
                 audio_state.drop_frames_till_i = n_frames_to_drop
 
-- 
cgit v1.2.3