From 9a18326aecd53c5619109dd2c5da1f26689f2f7b Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Sat, 9 Sep 2023 17:00:36 -0700
Subject: Bugfix: fix process leak in PythonWrapper::InvokeCommandWithArgs

It now waits up to 10 seconds for a graceful exit and falls back on
the equivalent of a SIGKILL. The caller is assumed to have signaled to the
process through `in_cb` that an exit is desired.

Also:

* Fix graceful exit path of transcribe_v2.py.
* Add toggle to enable/disable preview text. It is enabled by default.
* Constrain transcription temperature to 0.0. This keeps latency more
  predictable at the cost of some accuracy.
---
 Scripts/steamvr.py       |  5 +++--
 Scripts/transcribe_v2.py | 38 ++++++++++++++++++++++++++------------
 2 files changed, 29 insertions(+), 14 deletions(-)

(limited to 'Scripts')

diff --git a/Scripts/steamvr.py b/Scripts/steamvr.py
index da07134..3e6c6c9 100644
--- a/Scripts/steamvr.py
+++ b/Scripts/steamvr.py
@@ -19,6 +19,7 @@ class InputEvent:
 def pollButtonPress(
         hand: str = "right",
         button: str = "b",
+        ctrl = None  # ThreadControl object
         ) -> int:
     hands = {}
     hands["left"] = vr.TrackedControllerRole_LeftHand
@@ -31,7 +32,7 @@ def pollButtonPress(
 
     system = None
     first = True
-    while not system:
+    while ctrl.run_app and not system:
         try:
             system = vr.init(vr.VRApplication_Background)
         except Exception as e:
@@ -42,7 +43,7 @@ def pollButtonPress(
     last_packet = 0
     event_high = False
 
-    while True:
+    while ctrl.run_app:
         time.sleep(0.01)
 
         lh_idx = system.getTrackedDeviceIndexForControllerRole(hands[hand])
diff --git a/Scripts/transcribe_v2.py b/Scripts/transcribe_v2.py
index 9b3a404..9e642e9 100644
--- a/Scripts/transcribe_v2.py
+++ b/Scripts/transcribe_v2.py
@@ -428,6 +428,7 @@ class Whisper:
                 audio,
                 language = langcodes.find(self.cfg["language"]).language,
                 vad_filter = True,
+                temperature=0.0,
                 without_timestamps = False)
         res = []
         for s in segments:
@@ -463,9 +464,11 @@ def saveAudio(audio: bytes, path: str):
 
 class VadCommitter:
     def __init__(self,
+            cfg: typing.Dict,
             collector: AudioCollector,
             whisper: Whisper,
             segmenter: AudioSegmenter):
+        self.cfg = cfg
         self.collector = collector
         self.whisper = whisper
         self.segmenter = segmenter
@@ -486,7 +489,7 @@ class VadCommitter:
             for s in segments:
                 print(f"commit segment: {s}")
             delta = ''.join(s.transcript for s in segments)
-            #print(f"delta get: {delta}")
+            print(f"delta get: {delta}")
             audio = self.collector.getAudio()
 
             #ts = datetime.fromtimestamp(self.collector.now() - latency_s)
@@ -494,12 +497,13 @@ class VadCommitter:
             #saveAudio(commit_audio, filename)
 
         preview = ""
-        if has_audio:
-            segments = self.whisper.transcribe(audio)
-            preview = "".join(s.transcript for s in segments)
-        else:
-            #print("VAD detects no audio, skip transcription")
-            self.collector.keepLast(1.0)
+        if self.cfg["enable_previews"]:
+            if has_audio:
+                segments = self.whisper.transcribe(audio)
+                preview = "".join(s.transcript for s in segments)
+            else:
+                #print("VAD detects no audio, skip transcription")
+                self.collector.keepLast(1.0)
 
         return TranscriptCommit(
                 delta,
@@ -556,7 +560,7 @@ def evaluate(cfg,
     collector = CompressingAudioCollector(collector)
     whisper = Whisper(collector, cfg)
     segmenter = AudioSegmenter(min_silence_ms=250)
-    committer = VadCommitter(collector, whisper, segmenter)
+    committer = VadCommitter(cfg, collector, whisper, segmenter)
     transcript = ""
     commits = []
     last_commit_ts = None
@@ -685,10 +689,14 @@ def vrInputThread(ctrl: ThreadControl):
     last_rising = time.time()
     last_medium_press_end = 0
 
-    button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id)
+    button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id,
+            ctrl=ctrl)
     while ctrl.run_app:
         time.sleep(0.01)
-        event = next(button_generator)
+        try:
+            event = next(button_generator)
+        except StopIteration:
+            break
 
         if event.opcode == steamvr.EVENT_RISING_EDGE:
             last_rising = time.time()
@@ -790,7 +798,7 @@ def kbInputThread(ctrl: ThreadControl):
     PAUSE_STATE = 1
     state = PAUSE_STATE
 
-    while ctrl.run_app == True:
+    while ctrl.run_app:
         time.sleep(0.01)
 
         cur_press_time = machine.getNextPressTime()
@@ -879,7 +887,7 @@ def run(cfg):
     collector = CompressingAudioCollector(collector)
     whisper = Whisper(collector, cfg)
     segmenter = AudioSegmenter(min_silence_ms=250)
-    committer = VadCommitter(collector, whisper, segmenter)
+    committer = VadCommitter(cfg, collector, whisper, segmenter)
     pager = OscPager(cfg)
 
     ctrl = ThreadControl(cfg)
@@ -909,13 +917,19 @@ def run(cfg):
 
     for line in sys.stdin:
         if "exit" in line or "quit" in line:
+            print("Exit requested", file=sys.stderr)
             break
 
     ctrl.run_app = False
+    print("Join transcription thread")
     transcribe_audio_thd.join()
+    print("Join vr input thread")
     vr_input_thd.join()
+    print("Join kb input thread")
     kb_input_thd.join()
+    print("Join osc thread")
     osc_thd.join()
+    print("Done")
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-- 
cgit v1.2.3