From 7fb9c575aea4d318e9c14b82174d1b323171b62b Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Fri, 30 May 2025 13:32:36 -0700
Subject: More stuff

- fix unicode output from python terminal
- fix cpu inference
- add filters
- add beam search params to UI
- DRY up config definition in UI
---
 ui/config-schema.js | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 ui/config-schema.js

(limited to 'ui/config-schema.js')

diff --git a/ui/config-schema.js b/ui/config-schema.js
new file mode 100644
index 0000000..b1108ff
--- /dev/null
+++ b/ui/config-schema.js
@@ -0,0 +1,49 @@
+// Shared configuration schema with types and defaults
+const CONFIG_SCHEMA = {
+    // String fields
+    compute_type: { type: 'select', default: 'float16' },
+    language: { type: 'select', default: 'english' },
+    model: { type: 'select', default: 'turbo' },
+    microphone: { type: 'number', default: 0 },
+    user_prompt: { type: 'text', default: 'Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm.' },
+    
+    // Number fields
+    gpu_idx: { type: 'number', default: 0 },
+    max_speech_duration_s: { type: 'number', default: 10 },
+    min_silence_duration_ms: { type: 'number', default: 250 },
+    reset_after_silence_s: { type: 'number', default: 15 },
+    transcription_loop_delay_ms: { type: 'number', default: 100 },
+    block_width: { type: 'number', default: 2 },
+    num_blocks: { type: 'number', default: 40 },
+    rows: { type: 'number', default: 10 },
+    cols: { type: 'number', default: 24 },
+    beam_size: { type: 'number', default: 5 },
+    best_of: { type: 'number', default: 5 },
+    
+    // Boolean fields (stored as 1/0)
+    enable_debug_mode: { type: 'boolean', default: 0 },
+    enable_previews: { type: 'boolean', default: 1 },
+    save_audio: { type: 'boolean', default: 0 },
+    use_cpu: { type: 'boolean', default: 0 },
+    enable_lowercase_filter: { type: 'boolean', default: 0 },
+    enable_uppercase_filter: { type: 'boolean', default: 0 },
+    enable_profanity_filter: { type: 'boolean', default: 0 },
+    remove_trailing_period: { type: 'boolean', default: 0 }
+};
+
+// Helper to extract just the default values
+function getDefaultConfig() {
+    const defaults = {};
+    for (const [key, schema] of Object.entries(CONFIG_SCHEMA)) {
+        defaults[key] = schema.default;
+    }
+    return defaults;
+}
+
+// Export for both CommonJS (main process) and ES modules (renderer)
+if (typeof module !== 'undefined' && module.exports) {
+    module.exports = { CONFIG_SCHEMA, getDefaultConfig };
+} else {
+    window.CONFIG_SCHEMA = CONFIG_SCHEMA;
+    window.getDefaultConfig = getDefaultConfig;
+} 
\ No newline at end of file
-- 
cgit v1.2.3


From 73de7cb2d8fb964e7f76ab55420e9bc331bf7bea Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Fri, 30 May 2025 21:31:05 -0700
Subject: More stuff

- add desktop and vr input threads
- add audio feedback for input
- add volume control for audio feedback
- add UI for custom chatbox/built in chatbox
- add ability to dismiss built in chatbox (sync empty messages)
- limit lines in python console
- limit length of each transcript
---
 Sounds/Dismiss_Noise.wav       | Bin 0 -> 192078 bytes
 Sounds/Dismiss_Noise_Quiet.wav | Bin 0 -> 192078 bytes
 Sounds/KB_Noise_Off.wav        | Bin 0 -> 192078 bytes
 Sounds/KB_Noise_Off_Quiet.wav  | Bin 0 -> 192078 bytes
 Sounds/KB_Noise_On.wav         | Bin 0 -> 266318 bytes
 Sounds/KB_Noise_On_Quiet.wav   | Bin 0 -> 266318 bytes
 Sounds/Noise_Off.wav           | Bin 0 -> 67278 bytes
 Sounds/Noise_Off_Quiet.wav     | Bin 0 -> 67278 bytes
 Sounds/Noise_On.wav            | Bin 0 -> 67278 bytes
 Sounds/Noise_On_Quiet.wav      | Bin 0 -> 67278 bytes
 Sounds/speech_noise.wav        | Bin 0 -> 61518 bytes
 app/hi.py                      | 308 ++++++++++++++++++++++++++++++++++-------
 app/keybind_event_machine.py   |  21 +++
 app/requirements.txt           |   3 +
 app/shared_thread_data.py      |   7 +-
 app/steamvr.py                 |  87 ++++++++++++
 app/stt.py                     | 143 ++++++++++---------
 config.yaml                    |  15 +-
 ui/config-schema.js            |  11 +-
 ui/index.html                  |  50 +++++++
 ui/index.js                    |  16 ++-
 ui/preload.js                  |   1 +
 ui/renderer.js                 |  58 ++++++++
 23 files changed, 595 insertions(+), 125 deletions(-)
 create mode 100644 Sounds/Dismiss_Noise.wav
 create mode 100644 Sounds/Dismiss_Noise_Quiet.wav
 create mode 100644 Sounds/KB_Noise_Off.wav
 create mode 100644 Sounds/KB_Noise_Off_Quiet.wav
 create mode 100644 Sounds/KB_Noise_On.wav
 create mode 100644 Sounds/KB_Noise_On_Quiet.wav
 create mode 100644 Sounds/Noise_Off.wav
 create mode 100644 Sounds/Noise_Off_Quiet.wav
 create mode 100644 Sounds/Noise_On.wav
 create mode 100644 Sounds/Noise_On_Quiet.wav
 create mode 100644 Sounds/speech_noise.wav
 create mode 100644 app/keybind_event_machine.py
 create mode 100644 app/steamvr.py

(limited to 'ui/config-schema.js')

diff --git a/Sounds/Dismiss_Noise.wav b/Sounds/Dismiss_Noise.wav
new file mode 100644
index 0000000..fe60f21
Binary files /dev/null and b/Sounds/Dismiss_Noise.wav differ
diff --git a/Sounds/Dismiss_Noise_Quiet.wav b/Sounds/Dismiss_Noise_Quiet.wav
new file mode 100644
index 0000000..5c3b1cb
Binary files /dev/null and b/Sounds/Dismiss_Noise_Quiet.wav differ
diff --git a/Sounds/KB_Noise_Off.wav b/Sounds/KB_Noise_Off.wav
new file mode 100644
index 0000000..64d9c6f
Binary files /dev/null and b/Sounds/KB_Noise_Off.wav differ
diff --git a/Sounds/KB_Noise_Off_Quiet.wav b/Sounds/KB_Noise_Off_Quiet.wav
new file mode 100644
index 0000000..b965e6a
Binary files /dev/null and b/Sounds/KB_Noise_Off_Quiet.wav differ
diff --git a/Sounds/KB_Noise_On.wav b/Sounds/KB_Noise_On.wav
new file mode 100644
index 0000000..a959041
Binary files /dev/null and b/Sounds/KB_Noise_On.wav differ
diff --git a/Sounds/KB_Noise_On_Quiet.wav b/Sounds/KB_Noise_On_Quiet.wav
new file mode 100644
index 0000000..e49513e
Binary files /dev/null and b/Sounds/KB_Noise_On_Quiet.wav differ
diff --git a/Sounds/Noise_Off.wav b/Sounds/Noise_Off.wav
new file mode 100644
index 0000000..0d3843c
Binary files /dev/null and b/Sounds/Noise_Off.wav differ
diff --git a/Sounds/Noise_Off_Quiet.wav b/Sounds/Noise_Off_Quiet.wav
new file mode 100644
index 0000000..d5c6171
Binary files /dev/null and b/Sounds/Noise_Off_Quiet.wav differ
diff --git a/Sounds/Noise_On.wav b/Sounds/Noise_On.wav
new file mode 100644
index 0000000..28c8f6b
Binary files /dev/null and b/Sounds/Noise_On.wav differ
diff --git a/Sounds/Noise_On_Quiet.wav b/Sounds/Noise_On_Quiet.wav
new file mode 100644
index 0000000..79170f5
Binary files /dev/null and b/Sounds/Noise_On_Quiet.wav differ
diff --git a/Sounds/speech_noise.wav b/Sounds/speech_noise.wav
new file mode 100644
index 0000000..a6224ee
Binary files /dev/null and b/Sounds/speech_noise.wav differ
diff --git a/app/hi.py b/app/hi.py
index bab0fd4..1297b37 100644
--- a/app/hi.py
+++ b/app/hi.py
@@ -1,25 +1,34 @@
 import app_config
 import argparse
 import io
+import keybind_event_machine
 from math import floor, ceil
 import msvcrt
 import os
 from pythonosc import udp_client
 import sentencepiece as spm
+import steamvr
 from shared_thread_data import SharedThreadData
 import stt
 import sys
 import threading
 import time
+import pygame
 
 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
 sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
 
+# Initialize pygame mixer
+pygame.mixer.init()
+
 TESTS_ENABLED = True
 
 # 0 = quiet, 1 = verbose, 2 = very verbose
 LOG_LEVEL = 0
 
+# Global volume control (0.0 to 1.0)
+VOLUME = 0.3
+
 APP_ROOT = os.path.dirname(os.path.abspath(__file__))
 PROJECT_ROOT = os.path.dirname(APP_ROOT)
 
@@ -315,79 +324,276 @@ def handle_input(state: InputState, line: str, tokenizer, osc_client, cfg):
     send_data(osc_client, [indices[0]], [diff_blocks[0]], [diff_visual_pointers[0]])
 
 def osc_thread(shared_data: SharedThreadData):
-    tokenizer = get_tokenizer()
     osc_client = getOscClient()
 
-    # Prime the board
-    print("Priming the board")
-    input_state = InputState()
-    handle_input(input_state, "", tokenizer, osc_client, shared_data.cfg)
+    def join_segments(a, b):
+        if len(a) > 0 and a[-1] != ' ':
+            return a + ' ' + b
+        else:
+            return a + b
+
+    if shared_data.cfg["use_builtin"]:
+        last_change = time.time()
+        remote_word = ""
+        while not shared_data.exit_event.is_set():
+            time.sleep(0.1)
+            local_word = ""
+            with shared_data.word_lock:
+                local_word = join_segments(shared_data.transcript,
+                                           shared_data.preview)
+            local_word = local_word[-140:]
+            if local_word == remote_word:
+                continue
+            if time.time() - last_change < 1.5:
+                continue
+            addr = "/chatbox/input"
+            print(f"Send {local_word}", flush=True)
+            osc_client.send_message(addr, (local_word, True, False))
+            last_change = time.time()
+            remote_word = local_word
+    else:
+        # Custom chatbox
+        tokenizer = get_tokenizer()
+
+        # Prime the board
+        print("Priming the board")
+        input_state = InputState()
+        handle_input(input_state, "", tokenizer, osc_client, shared_data.cfg)
+
+        while not shared_data.exit_event.is_set():
+            word_copy = ""
+            with shared_data.word_lock:
+                word_copy = shared_data.word
+            handle_input(input_state, word_copy, tokenizer, osc_client, shared_data.cfg)
+            time.sleep(0.01)
+
+
+def vrInputThread(shared_data: SharedThreadData):
+    RECORD_STATE = 0
+    PAUSE_STATE = 1
+    state = PAUSE_STATE
+
+    hand_id = shared_data.cfg["button_hand"]
+    button_id = shared_data.cfg["button_type"]
 
+    # Rough description of state machine:
+    #   Single short press: toggle transcription
+    #   Medium press: dismiss custom chatbox
+    #   Long press: update chatbox in place
+    #   Medium press + long press: type transcription
+
+    last_rising = time.time()
+    last_medium_press_end = 0
+
+    waveform0 =  os.path.join(PROJECT_ROOT, "Sounds/Noise_On_Quiet.wav")
+    waveform1 =  os.path.join(PROJECT_ROOT, "Sounds/Noise_Off_Quiet.wav")
+    waveform2 =  os.path.join(PROJECT_ROOT, "Sounds/Dismiss_Noise_Quiet.wav")
+    waveform3 =  os.path.join(PROJECT_ROOT, "Sounds/KB_Noise_Off_Quiet.wav")
+
+    button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id,
+            shared_data=shared_data)
     while not shared_data.exit_event.is_set():
-        word_copy = ""
+        time.sleep(0.01)
+        try:
+            event = next(button_generator)
+        except StopIteration:
+            break
+
         with shared_data.word_lock:
-            word_copy = shared_data.word
-        handle_input(input_state, word_copy, tokenizer, osc_client, shared_data.cfg)
+            if not shared_data.stream or not shared_data.collector:
+                continue
+
+            if event.opcode == steamvr.EVENT_RISING_EDGE:
+                last_rising = time.time()
+
+                if state == PAUSE_STATE:
+                    shared_data.stream.pause(False)
+                    shared_data.stream.getSamples()
+
+            elif event.opcode == steamvr.EVENT_FALLING_EDGE:
+                now = time.time()
+                if now - last_rising > 1.5:
+                    # Long press: treat as the end of transcription.
+                    state = PAUSE_STATE
+
+                    shared_data.stream.pause(True)
+
+                    if last_rising - last_medium_press_end < 1.0:
+                        # Type transcription
+                        if shared_data.cfg["enable_local_beep"]:
+                            play_sound_with_volume(waveform3)
+                    else:
+                        if shared_data.cfg["enable_local_beep"]:
+                            play_sound_with_volume(waveform1)
+
+                elif now - last_rising > 0.5:
+                    # Medium press
+                    print("CLEARING", file=sys.stderr)
+                    last_medium_press_end = now
+                    state = PAUSE_STATE
+
+                    if shared_data.cfg["enable_local_beep"]:
+                        play_sound_with_volume(waveform2)
+
+                    # Flush the *entire* pipeline.
+                    shared_data.stream.pause(True)
+                    shared_data.stream.getSamples()
+                    shared_data.collector.dropAudio()
+                    shared_data.transcript = ""
+                    shared_data.preview = ""
+                    continue
+
+                # Short hold
+                if state == RECORD_STATE:
+                    print("PAUSED", file=sys.stderr)
+                    state = PAUSE_STATE
+
+                    shared_data.stream.pause(True)
+
+                    if shared_data.cfg["enable_local_beep"]:
+                        play_sound_with_volume(waveform1)
+                elif state == PAUSE_STATE:
+                    print("RECORDING", file=sys.stderr)
+                    state = RECORD_STATE
+                    if shared_data.cfg["reset_on_toggle"]:
+                        if shared_data.cfg["enable_debug_mode"]:
+                            print("Toggle detected, dropping transcript (3)",
+                                    file=sys.stderr)
+                        shared_data.transcript = ""
+                        shared_data.preview = ""
+                        #audio_state.drop_transcription = True
+                    else:
+                        if shared_data.cfg["enable_debug_mode"]:
+                            print("Toggle detected, committing preview text (3)",
+                                  file=sys.stderr)
+                        #audio_state.text += audio_state.preview_text
+
+                    shared_data.stream.pause(False)
+
+                    if shared_data.cfg["enable_local_beep"]:
+                        play_sound_with_volume(waveform0)
+
+
+def kbInputThread(shared_data: SharedThreadData):
+    machine = keybind_event_machine.KeybindEventMachine(shared_data.cfg["keybind"])
+    last_press_time = 0
+
+    # double pressing the keybind
+    double_press_timeout = 0.5
+
+    RECORD_STATE = 0
+    PAUSE_STATE = 1
+    state = PAUSE_STATE
+
+    waveform0 = os.path.join(PROJECT_ROOT, "Sounds/Noise_On_Quiet.wav")
+    waveform1 = os.path.join(PROJECT_ROOT, "Sounds/Noise_Off_Quiet.wav")
+    waveform2 = os.path.join(PROJECT_ROOT, "Sounds/Dismiss_Noise_Quiet.wav")
+    waveform3 = os.path.join(PROJECT_ROOT, "Sounds/KB_Noise_Off_Quiet.wav")
+
+    while not shared_data.exit_event.is_set():
         time.sleep(0.01)
 
+        cur_press_time = machine.getNextPressTime()
+        if cur_press_time == 0:
+            continue
+
+        with shared_data.word_lock:
+            if not shared_data.stream or not shared_data.collector:
+                continue
+
+            EVENT_SINGLE_PRESS = 0
+            EVENT_DOUBLE_PRESS = 1
+            if last_press_time == 0:
+                event = EVENT_SINGLE_PRESS
+            elif cur_press_time - last_press_time < double_press_timeout:
+                event = EVENT_DOUBLE_PRESS
+            else:
+                event = EVENT_SINGLE_PRESS
+            last_press_time = cur_press_time
+
+            if event == EVENT_DOUBLE_PRESS:
+                print("CLEARING", file=sys.stderr)
+                state = PAUSE_STATE
+
+                if shared_data.cfg["enable_local_beep"]:
+                    play_sound_with_volume(waveform2)
+
+                # Flush the *entire* pipeline.
+                shared_data.stream.pause(True)
+                shared_data.stream.getSamples()
+                shared_data.collector.dropAudio()
+                shared_data.transcript = ""
+                shared_data.preview = ""
+                continue
+
+            # Short hold
+            if state == RECORD_STATE:
+                print("PAUSED", file=sys.stderr)
+                state = PAUSE_STATE
+
+                shared_data.stream.pause(True)
+
+                if shared_data.cfg["enable_local_beep"]:
+                    play_sound_with_volume(waveform1)
+            elif state == PAUSE_STATE:
+                print("RECORDING", file=sys.stderr)
+                state = RECORD_STATE
+                if shared_data.cfg["reset_on_toggle"]:
+                    if shared_data.cfg["enable_debug_mode"]:
+                        print("Toggle detected, dropping transcript (2)",
+                                file=sys.stderr)
+                    shared_data.transcript = ""
+                    shared_data.preview = ""
+                else:
+                    if shared_data.cfg["enable_debug_mode"]:
+                        print("Toggle detected, committing preview text (2)",
+                                file=sys.stderr)
+                    #audio_state.text += audio_state.preview_text
+
+                shared_data.stream.pause(False)
+
+                if shared_data.cfg["enable_local_beep"]:
+                    play_sound_with_volume(waveform0)
+
+def play_sound_with_volume(filepath):
+    """Play a WAV file with adjusted volume"""
+    volume = VOLUME
+    
+    try:
+        sound = pygame.mixer.Sound(filepath)
+        sound.set_volume(volume)
+        sound.play()
+    except Exception as e:
+        print(f"Error playing sound {filepath}: {e}", file=sys.stderr)
+
 if __name__ == "__main__":
     cli_args = parse_args()
     cfg = app_config.getConfig(cli_args.config)
     shared_data = SharedThreadData(cfg)
-    if False:
-        osc_thread = threading.Thread(
-                target=osc_thread,
-                args=(shared_data,))
-        osc_thread.start()
+    osc_thread = threading.Thread(
+            target=osc_thread,
+            args=(shared_data,))
+    osc_thread.start()
 
     transcribe_thread = threading.Thread(
             target=stt.transcriptionThread,
             args=(shared_data,))
     transcribe_thread.start()
 
+    vr_input_thd = threading.Thread(target=vrInputThread, args=(shared_data,))
+    vr_input_thd.start()
+
+    kb_input_thd = threading.Thread(target=kbInputThread, args=(shared_data,))
+    kb_input_thd.start()
+
     word_is_over = False
     local_word = ""
     while True:
-        char_bytes = msvcrt.getch()
-        if char_bytes == b'\x03':  # ctrl+C
-            break
-
         time.sleep(0.1)
         continue
-
-        try:
-            char = char_bytes.decode('utf-8')
-            if char == '\r' or char == '\n':
-                word_is_over = True
-                continue
-        except UnicodeDecodeError:
-            print(f"Unsupported character: {char_bytes}")
-            if char_bytes == b'\x00' or char_bytes == b'\xe0':
-                special_char = msvcrt.getch()
-            continue
-
-        if char_bytes == b'\x03':  # ctrl+C
-            break
-        elif char_bytes == b'\x08':  # backspace
-            with shared_data.word_lock:
-                shared_data.word = shared_data.word[:-1]
-                local_word = shared_data.word
-        elif char_bytes == b'\x0c':  # ctrl+L
-            with shared_data.word_lock:
-                shared_data.word = ""
-                local_word = shared_data.word
-        elif word_is_over:
-            with shared_data.word_lock:
-                shared_data.word = char
-                local_word = shared_data.word
-            word_is_over = False
-        else:
-            with shared_data.word_lock:
-                shared_data.word += char
-                local_word = shared_data.word
-        print(local_word + "_")
     shared_data.exit_event.set()
-    if False:
-        osc_thread.join()
+    osc_thread.join()
     transcribe_thread.join()
+    vr_input_thd.join()
+    kb_input_thd.join()
 
diff --git a/app/keybind_event_machine.py b/app/keybind_event_machine.py
new file mode 100644
index 0000000..3ce6794
--- /dev/null
+++ b/app/keybind_event_machine.py
@@ -0,0 +1,21 @@
+import keyboard
+import time
+
+class KeybindEventMachine:
+    def __init__(self, keybind: str):
+        self.keybind = keybind
+        self.events = []
+        keyboard.add_hotkey(keybind, self.onPress)
+
+    def onPress(self) -> None:
+        self.events.append(time.time())
+
+    # Returns the timestamp when the keybind was pressed, or 0 if no keypresses
+    # are queued.
+    def getNextPressTime(self) -> int:
+        if len(self.events) == 0:
+            return 0
+        ret = self.events[0]
+        self.events = self.events[1:]
+        return ret
+
diff --git a/app/requirements.txt b/app/requirements.txt
index f8b7069..e68a16c 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -1,8 +1,11 @@
 faster-whisper
 hf-xet
+keyboard
 langcodes
 pyaudio
+pygame
 pydub
 python-osc
 sentencepiece
 silero-vad
+openvr
diff --git a/app/shared_thread_data.py b/app/shared_thread_data.py
index ba0a419..40885e8 100644
--- a/app/shared_thread_data.py
+++ b/app/shared_thread_data.py
@@ -2,7 +2,12 @@ import threading
 
 class SharedThreadData:
     def __init__(self, cfg):
-        self.word = ""
+        self.transcript = ""
+        self.preview = ""
+
+        self.stream = None
+        self.collector = None
+
         self.word_lock = threading.Lock()
         self.exit_event = threading.Event()
         self.cfg = cfg
diff --git a/app/steamvr.py b/app/steamvr.py
new file mode 100644
index 0000000..64f34f5
--- /dev/null
+++ b/app/steamvr.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+
+import openvr as vr
+import sys
+import time
+
+EVENT_NONE = 0
+EVENT_RISING_EDGE = 1
+EVENT_FALLING_EDGE = 2
+
+class InputEvent:
+    def __init__(self,
+            opcode: int):
+        self.opcode = opcode
+
+# Checks if the given button on the given controller is pressed.
+def pollButtonPress(
+        hand: str = "right",
+        button: str = "b",
+        shared_data = None  # SharedThreadData object
+        ) -> int:
+    hands = {}
+    hands["left"] = vr.TrackedControllerRole_LeftHand
+    hands["right"] = vr.TrackedControllerRole_RightHand
+
+    buttons = {}
+    buttons["a"] = vr.k_EButton_IndexController_A
+    buttons["b"] = vr.k_EButton_IndexController_B
+    buttons["thumbstick"] = vr.k_EButton_Axis0
+
+    system = None
+    first = True
+    while not shared_data.exit_event.is_set() and not system:
+        try:
+            system = vr.init(vr.VRApplication_Background)
+        except Exception as e:
+            if first:
+                print(f"Failed to start steamVR input thread: {repr(e)}", file=sys.stderr)
+            first = False
+            time.sleep(1)
+    last_packet = 0
+    event_high = False
+
+    while not shared_data.exit_event.is_set():
+        time.sleep(0.01)
+
+        lh_idx = system.getTrackedDeviceIndexForControllerRole(hands[hand])
+        #print("left hand device idx: {}".format(lh_idx))
+
+        got_state, state = system.getControllerState(lh_idx)
+        if not got_state:
+            continue
+
+        if state.unPacketNum == last_packet:
+            continue
+
+        # Clicking joysticks and moving joysticks fire the same events. To
+        # differentiate movement from clicking, we create a dead zone: if the event
+        # fires while the stick isn't moved far from center, we assume it's a
+        # click, not movement.
+        dead_zone_radius = 0.7
+
+        button_mask = (1 << buttons[button])
+        ret = EVENT_NONE
+        if (state.ulButtonPressed & button_mask) != 0 and\
+                (state.rAxis[0].x**2 + state.rAxis[0].y**2 < dead_zone_radius**2):
+            #print("button pressed: %016x" % state.ulButtonPressed)
+            #for i in range(0, 5):
+            #    print("axis {} x: {} y: {}".format(i, state.rAxis[i].x, state.rAxis[i].y))
+            if not event_high:
+                yield InputEvent(EVENT_RISING_EDGE)
+            event_high = True
+        elif event_high:
+            event_high = False
+            yield InputEvent(EVENT_FALLING_EDGE)
+
+if __name__ == "__main__":
+    gen = pollButtonPress()
+    while True:
+        time.sleep(0.1)
+
+        event = pollButtonPress(session_state)
+        if event == EVENT_RISING_EDGE:
+            print("rising edge")
+        elif event == EVENT_FALLING_EDGE:
+            print("falling edge")
+
diff --git a/app/stt.py b/app/stt.py
index a3988e1..c1f4836 100644
--- a/app/stt.py
+++ b/app/stt.py
@@ -299,9 +299,11 @@ class CompressingAudioCollector(AudioCollectorFilter):
 class AudioSegmenter:
     def __init__(self,
             min_silence_ms=250,
-            max_speech_s=5):
+            max_speech_s=5,
+            min_speech_duration_ms=100):
         self.min_silence_ms = min_silence_ms
         self.max_speech_s = max_speech_s
+        self.min_speech_duration_ms = min_speech_duration_ms
 
         # Load Silero VAD model
         self.model = load_silero_vad()
@@ -309,6 +311,7 @@ class AudioSegmenter:
         self.vad_threshold = 0.3
         self.min_silence_duration_ms = min_silence_ms
         self.max_speech_duration_s = max_speech_s
+        self.min_speech_duration_ms = min_speech_duration_ms
 
     def segmentAudio(self, audio: bytes):
         # Convert audio bytes to numpy array expected by silero-vad
@@ -324,6 +327,7 @@ class AudioSegmenter:
             threshold=self.vad_threshold,
             min_silence_duration_ms=self.min_silence_duration_ms,
             max_speech_duration_s=self.max_speech_duration_s,
+            min_speech_duration_ms=self.min_speech_duration_ms,
             return_seconds=False  # We want frame indices, not seconds
         )
 
@@ -698,7 +702,8 @@ def transcriptionThread(shared_data: SharedThreadData):
     collector = NormalizingAudioCollector(collector)
     whisper = Whisper(collector, shared_data.cfg)
     segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"],
-            max_speech_s=shared_data.cfg["max_speech_duration_s"])
+            max_speech_s=shared_data.cfg["max_speech_duration_s"],
+            min_speech_duration_ms=shared_data.cfg["min_speech_duration_ms"])
     committer = VadCommitter(shared_data.cfg, collector, whisper, segmenter)
 
     plugins = []
@@ -715,6 +720,10 @@ def transcriptionThread(shared_data: SharedThreadData):
     transcript = ""
     preview = ""
 
+    with shared_data.word_lock:
+        shared_data.stream = stream
+        shared_data.collector = collector
+
     print(f"Ready to go!", flush=True)
 
     while not shared_data.exit_event.is_set():
@@ -724,70 +733,72 @@ def transcriptionThread(shared_data: SharedThreadData):
 
         commit = committer.getDelta()
 
-        for plugin in plugins:
-            commit = plugin.transform(commit)
-
-        if len(commit.delta) > 0 or len(commit.preview) > 0:
-            # Avoid re-sending text after long pauses
-            if shared_data.cfg["reset_after_silence_s"] > 0:
-                silence_duration = 0
-                if last_stable_commit:
-                    last_commit_end_ts = \
-                            last_stable_commit.start_ts + \
-                            last_stable_commit.duration_s
-                    silence_duration = commit.start_ts - last_commit_end_ts
-                if silence_duration > shared_data.cfg["reset_after_silence_s"]:
-                    if shared_data.cfg["enable_debug_mode"]:
-                        print(f"Resetting transcript after {silence_duration}-second "
-                                "silence", file=sys.stderr)
-                    transcript = ""
-                    preview = ""
-                    whisper.recent_context = ""  # Reset context too
-                if commit.delta:
-                    last_stable_commit = commit
-
-            # Hard-cap displayed transcript length at 4k characters to prevent
-            # runaway memory use in UI. Keep the full transcript to avoid
-            # breaking OSC pager.
-            transcript = transcript[-4096:]
-            def join_segments(a, b):
-                if len(a) > 0 and a[-1] != ' ':
-                    return a + ' ' + b
-                else:
-                    return a + b
-            transcript = join_segments(transcript, commit.delta)
-            preview = commit.preview
-
-            for filt in filters:
-                transcript, preview = filt.transform(transcript, preview)
-
-            try:
-                print(f"Transcript: {transcript}", flush=True)
-            except UnicodeEncodeError:
-                print("Failed to encode transcript - discarding delta",
-                        file=sys.stderr)
-                continue
-            try:
-                print(f"Preview: {preview}", flush=True)
-            except UnicodeEncodeError:
-                print("Failed to encode preview - discarding", file=sys.stderr)
-
-            with shared_data.word_lock:
-                shared_data.word = join_segments(transcript, preview)
-
-            if shared_data.cfg["enable_debug_mode"]:
-                print(f"commit latency: {commit.latency_s}", file=sys.stderr)
-                print(f"commit thresh: {commit.thresh_at_commit}",
-                        file=sys.stderr)
-
-        if len(transcript) > 0 and \
-                (not transcript.endswith(' ')) and \
-                (not commit.delta.startswith(' ')):
-            commit.delta = ' ' + commit.delta
-        if len(commit.delta) > 0 and \
-                (not commit.delta.endswith(' ')) and \
-                (not commit.preview.startswith(' ')):
-            commit.preview = ' ' + commit.preview
+        with shared_data.word_lock:
+            for plugin in plugins:
+                commit = plugin.transform(commit)
+
+            if len(commit.delta) > 0 or len(commit.preview) > 0:
+                # Avoid re-sending text after long pauses
+                if shared_data.cfg["reset_after_silence_s"] > 0:
+                    silence_duration = 0
+                    if last_stable_commit:
+                        last_commit_end_ts = \
+                                last_stable_commit.start_ts + \
+                                last_stable_commit.duration_s
+                        silence_duration = commit.start_ts - last_commit_end_ts
+                    if silence_duration > shared_data.cfg["reset_after_silence_s"]:
+                        if shared_data.cfg["enable_debug_mode"]:
+                            print(f"Resetting transcript after {silence_duration}-second "
+                                    "silence", file=sys.stderr)
+                        shared_data.transcript = ""
+                        shared_data.preview = ""
+                        whisper.recent_context = ""  # Reset context too
+                    if commit.delta:
+                        last_stable_commit = commit
+
+                # Hard-cap displayed transcript length to prevent
+                # runaway memory use in UI. Keep the full transcript to avoid
+                # breaking OSC pager.
+                if len(shared_data.transcript) >= 1024:
+                    shared_data.transcript = shared_data.transcript[-512:]
+                def join_segments(a, b):
+                    if len(a) > 0 and a[-1] != ' ':
+                        return a + ' ' + b
+                    else:
+                        return a + b
+                shared_data.transcript = \
+                        join_segments(shared_data.transcript, commit.delta)
+                shared_data.preview = commit.preview
+
+                for filt in filters:
+                    shared_data.transcript, shared_data.preview = \
+                            filt.transform(shared_data.transcript,
+                                           shared_data.preview)
+
+                try:
+                    print(f"Transcript: {shared_data.transcript}", flush=True)
+                except UnicodeEncodeError:
+                    print("Failed to encode transcript - discarding delta",
+                            file=sys.stderr)
+                    continue
+                try:
+                    print(f"Preview: {shared_data.preview}", flush=True)
+                except UnicodeEncodeError:
+                    print("Failed to encode preview - discarding", file=sys.stderr)
+
+                if shared_data.cfg["enable_debug_mode"]:
+                    print(f"commit latency: {commit.latency_s}", file=sys.stderr)
+                    print(f"commit thresh: {commit.thresh_at_commit}",
+                            file=sys.stderr)
+
+            if len(shared_data.transcript) > 0 and \
+                    (not shared_data.transcript.endswith(' ')) and \
+                    (not commit.delta.startswith(' ')):
+                commit.delta = ' ' + commit.delta
+            if len(commit.delta) > 0 and \
+                    (not commit.delta.endswith(' ')) and \
+                    (not commit.preview.startswith(' ')):
+                commit.preview = ' ' + commit.preview
     for plugin in plugins:
         plugin.stop()
     for filt in filters:
diff --git a/config.yaml b/config.yaml
index fea03bb..6f4b65b 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,11 +1,15 @@
 compute_type: float16
 language: english
 model: turbo
-microphone: 2
-user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm.
+microphone: 1
+user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm. Phi, NOPPERS, clearrainbow, Noia, Kuuderekitten.
+keybind: ctrl+alt+x
+button_hand: right
+button_type: b
 gpu_idx: 0
 max_speech_duration_s: 10
-min_silence_duration_ms: 250
+min_speech_duration_ms: 250
+min_silence_duration_ms: 100
 reset_after_silence_s: 15
 transcription_loop_delay_ms: 100
 block_width: 2
@@ -16,9 +20,12 @@ beam_size: 5
 best_of: 5
 enable_debug_mode: 0
 enable_previews: 1
-save_audio: 0
+save_audio: 1
 use_cpu: 0
 enable_lowercase_filter: 0
 enable_uppercase_filter: 0
 enable_profanity_filter: 0
 remove_trailing_period: 0
+reset_on_toggle: 0
+enable_local_beep: 1
+use_builtin: 1
diff --git a/ui/config-schema.js b/ui/config-schema.js
index b1108ff..6b11277 100644
--- a/ui/config-schema.js
+++ b/ui/config-schema.js
@@ -6,11 +6,15 @@ const CONFIG_SCHEMA = {
     model: { type: 'select', default: 'turbo' },
     microphone: { type: 'number', default: 0 },
     user_prompt: { type: 'text', default: 'Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm.' },
+    keybind: { type: 'text', default: 'ctrl+alt+x' },
+    button_hand: { type: 'select', default: 'right' },
+    button_type: { type: 'select', default: 'b' },
     
     // Number fields
     gpu_idx: { type: 'number', default: 0 },
     max_speech_duration_s: { type: 'number', default: 10 },
-    min_silence_duration_ms: { type: 'number', default: 250 },
+    min_speech_duration_ms: { type: 'number', default: 250 },
+    min_silence_duration_ms: { type: 'number', default: 100 },
     reset_after_silence_s: { type: 'number', default: 15 },
     transcription_loop_delay_ms: { type: 'number', default: 100 },
     block_width: { type: 'number', default: 2 },
@@ -28,7 +32,10 @@ const CONFIG_SCHEMA = {
     enable_lowercase_filter: { type: 'boolean', default: 0 },
     enable_uppercase_filter: { type: 'boolean', default: 0 },
     enable_profanity_filter: { type: 'boolean', default: 0 },
-    remove_trailing_period: { type: 'boolean', default: 0 }
+    remove_trailing_period: { type: 'boolean', default: 0 },
+    reset_on_toggle: { type: 'boolean', default: 0 },
+    enable_local_beep: { type: 'boolean', default: 1 },
+    use_builtin: { type: 'boolean', default: 1 }
 };
 
 // Helper to extract just the default values
diff --git a/ui/index.html b/ui/index.html
index 97da3d2..99e64dd 100644
--- a/ui/index.html
+++ b/ui/index.html
@@ -64,6 +64,31 @@
                                         </button>
                                     </div>
                                 </div>
+                                <div>
+                                    <label for="button_hand" class="form-label">
+                                        VR Hand
+                                    </label>
+                                    <select id="button_hand" class="form-input">
+                                        <option value="left">Left</option>
+                                        <option value="right">Right</option>
+                                    </select>
+                                </div>
+                                <div>
+                                    <label for="button_type" class="form-label">
+                                        VR Button
+                                    </label>
+                                    <select id="button_type" class="form-input">
+                                        <option value="a">A</option>
+                                        <option value="b">B</option>
+                                        <option value="thumbstick">Thumbstick</option>
+                                    </select>
+                                </div>
+                                <div class="col-span-2">
+                                    <label for="keybind" class="form-label">
+                                        Keyboard Binding
+                                    </label>
+                                    <input type="text" id="keybind" value="f24" class="form-input" placeholder="f24">
+                                </div>
                             </div>
                         </section>
 
@@ -110,6 +135,10 @@
                                         <label for="max_speech_duration_s" class="form-label">Max Speech Duration (seconds)</label>
                                         <input type="number" id="max_speech_duration_s" min="1" value="10" class="form-input">
                                     </div>
+                                    <div>
+                                        <label for="min_speech_duration_ms" class="form-label">Min Speech Duration (ms)</label>
+                                        <input type="number" id="min_speech_duration_ms" min="0" value="100" class="form-input">
+                                    </div>
                                     <div>
                                         <label for="min_silence_duration_ms" class="form-label">Min Silence Duration (ms)</label>
                                         <input type="number" id="min_silence_duration_ms" min="0" value="250" class="form-input">
@@ -211,9 +240,30 @@
                                 </div>
                             </section>
 
+                            <!-- Input Settings -->
+                            <section class="config-section">
+                                <h2 class="section-title">Input Settings</h2>
+                                <div class="space-y-4">
+                                    <label for="reset_on_toggle" class="checkbox-label">
+                                        <input type="checkbox" id="reset_on_toggle" class="mr-2">
+                                        <span class="checkbox-text">Reset transcript on toggle</span>
+                                    </label>
+                                    <label for="enable_local_beep" class="checkbox-label">
+                                        <input type="checkbox" id="enable_local_beep" checked class="mr-2">
+                                        <span class="checkbox-text">Enable local beep sounds</span>
+                                    </label>
+                                </div>
+                            </section>
+
                             <!-- Display Settings -->
                             <section class="config-section">
                                 <h2 class="section-title">Custom Chatbox Settings</h2>
+                                <div class="mb-4">
+                                    <label for="use_builtin" class="checkbox-label">
+                                        <input type="checkbox" id="use_builtin" class="mr-2">
+                                        <span class="checkbox-text">Use built-in VRChat chatbox</span>
+                                    </label>
+                                </div>
                                 <div class="grid grid-cols-2 gap-4">
                                     <div>
                                         <label for="block_width" class="form-label">Block Width</label>
diff --git a/ui/index.js b/ui/index.js
index 7717c92..24a7e13 100644
--- a/ui/index.js
+++ b/ui/index.js
@@ -246,6 +246,21 @@ ipcMain.handle('reset-config', async () => {
   }
 });
 
+ipcMain.handle('deleteVenvIndicatorFile', async () => {
+  const venvMarkerPath = path.join(APP_ROOT, '.venv_is_set_up');
+  try {
+    await fs.unlink(venvMarkerPath);
+    return { success: true, message: '.venv_is_set_up deleted successfully.' };
+  } catch (error) {
+    if (error.code === 'ENOENT') {
+      return { success: true, message: '.venv_is_set_up not found.' };
+    }
+    console.error('Error deleting .venv_is_set_up file:', error);
+    sendPythonOutput(`Error deleting .venv_is_set_up: ${error.message}`, 'stderr');
+    throw error;
+  }
+});
+
 // Generic function to ensure required files are present
 async function ensureRequiredFiles(config) {
   const { 
@@ -332,7 +347,6 @@ ipcMain.handle('install-requirements', async () => {
     // Check if venv is already set up
     try {
       await fs.access(venvMarkerPath);
-      sendPythonOutput('Virtual environment already set up, skipping installation', 'info');
       return { success: true, message: 'Virtual environment already set up' };
     } catch (error) {
       // Marker doesn't exist, proceed with setup
diff --git a/ui/preload.js b/ui/preload.js
index 35cc8d6..f2e0a81 100644
--- a/ui/preload.js
+++ b/ui/preload.js
@@ -6,6 +6,7 @@ contextBridge.exposeInMainWorld('electronAPI', {
     resetConfig: () => ipcRenderer.invoke('reset-config'),
     getMicrophones: () => ipcRenderer.invoke('get-microphones'),
     installRequirements: () => ipcRenderer.invoke('install-requirements'),
+    deleteVenvIndicatorFile: () => ipcRenderer.invoke('deleteVenvIndicatorFile'),
     resetVenv: () => ipcRenderer.invoke('reset-venv'),
     startProcess: () => ipcRenderer.invoke('start-process'),
     stopProcess: () => ipcRenderer.invoke('stop-process'),
diff --git a/ui/renderer.js b/ui/renderer.js
index 133a79b..2f4c8f1 100644
--- a/ui/renderer.js
+++ b/ui/renderer.js
@@ -162,11 +162,28 @@ function setFormValues(config) {
         }
     }
     
+    // Handle use_builtin toggle state
+    const useBuiltin = config.use_builtin === 1;
+    const customChatboxInputs = ['block_width', 'num_blocks', 'rows', 'cols'];
+    customChatboxInputs.forEach(inputId => {
+        const input = document.getElementById(inputId);
+        if (input) {
+            input.disabled = useBuiltin;
+            if (useBuiltin) {
+                input.classList.add('opacity-50', 'cursor-not-allowed');
+            } else {
+                input.classList.remove('opacity-50', 'cursor-not-allowed');
+            }
+        }
+    });
+    
     isSettingValues = false; // Re-enable auto-save
 }
 
 // Console management
 const consoleContent = document.getElementById('console-content');
+const MAX_CONSOLE_LINES = 512;
+let consoleLineCount = 0;
 
 function appendToConsole(message, type = 'stdout') {
     const timestamp = new Date().toLocaleTimeString();
@@ -183,6 +200,28 @@ function appendToConsole(message, type = 'stdout') {
     lineDiv.appendChild(messageSpan);
     
     consoleContent.appendChild(lineDiv);
+    consoleLineCount++;
+    
+    // Remove old lines if we exceed the limit
+    if (consoleLineCount > MAX_CONSOLE_LINES) {
+        // Calculate how many lines to remove (remove 10% to avoid frequent trimming)
+        const linesToRemove = Math.floor(MAX_CONSOLE_LINES * 0.1);
+        
+        // Remove the oldest lines
+        for (let i = 0; i < linesToRemove; i++) {
+            if (consoleContent.firstChild) {
+                consoleContent.removeChild(consoleContent.firstChild);
+            }
+        }
+        
+        consoleLineCount -= linesToRemove;
+        
+        // Add a notice that lines were trimmed
+        const trimNotice = document.createElement('div');
+        trimNotice.className = 'console-info';
+        trimNotice.innerHTML = '<span class="console-timestamp">[System] </span><span class="console-info">... older lines removed to maintain performance ...</span>';
+        consoleContent.insertBefore(trimNotice, consoleContent.firstChild);
+    }
     
     // Auto-scroll to bottom
     const pythonConsole = document.getElementById('python-console');
@@ -316,11 +355,30 @@ function setupEventHandlers() {
         }
     });
     
+    // Use builtin chatbox toggle
+    document.getElementById('use_builtin').addEventListener('change', (e) => {
+        const customChatboxInputs = ['block_width', 'num_blocks', 'rows', 'cols'];
+        const isBuiltin = e.target.checked;
+        
+        customChatboxInputs.forEach(inputId => {
+            const input = document.getElementById(inputId);
+            if (input) {
+                input.disabled = isBuiltin;
+                if (isBuiltin) {
+                    input.classList.add('opacity-50', 'cursor-not-allowed');
+                } else {
+                    input.classList.remove('opacity-50', 'cursor-not-allowed');
+                }
+            }
+        });
+    });
+    
     // Setup virtual environment
     document.getElementById('setup-venv').addEventListener('click', async () => {
         loadingOverlay.show('Setting up virtual environment - please wait...'); // Show overlay with custom message
         try {
             await buttonManager.withButtonLoading('setupVenv', async () => {
+                await window.electronAPI.deleteVenvIndicatorFile();
                 await handleAsyncAction('Install requirements', () => window.electronAPI.installRequirements());
             });
         } finally {
-- 
cgit v1.2.3


From 790c91d7ad515c3c0a22ca1341316265b8f0d779 Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Wed, 23 Jul 2025 17:41:49 -0700
Subject: bugfixes

* fix model acquisition
* fix local beepsnd
* fix volume control
---
 app/hi.py            |  45 ++++--------
 app/requirements.txt |   1 +
 app/stt.py           |  62 ++++++++++++----
 config.yaml          |   8 +--
 ui/config-schema.js  |   2 +-
 ui/index.html        |  13 ++--
 ui/index.js          |  17 +++--
 ui/preload.js        |   1 +
 ui/renderer.js       | 198 ++++++++++++++++++++++++++++-----------------------
 9 files changed, 196 insertions(+), 151 deletions(-)

(limited to 'ui/config-schema.js')

diff --git a/app/hi.py b/app/hi.py
index 1297b37..bb09418 100644
--- a/app/hi.py
+++ b/app/hi.py
@@ -26,9 +26,6 @@ TESTS_ENABLED = True
 # 0 = quiet, 1 = verbose, 2 = very verbose
 LOG_LEVEL = 0
 
-# Global volume control (0.0 to 1.0)
-VOLUME = 0.3
-
 APP_ROOT = os.path.dirname(os.path.abspath(__file__))
 PROJECT_ROOT = os.path.dirname(APP_ROOT)
 
@@ -347,7 +344,8 @@ def osc_thread(shared_data: SharedThreadData):
             if time.time() - last_change < 1.5:
                 continue
             addr = "/chatbox/input"
-            print(f"Send {local_word}", flush=True)
+            if shared_data.cfg["enable_debug_mode"]:
+                print(f"Send {local_word}", flush=True)
             osc_client.send_message(addr, (local_word, True, False))
             last_change = time.time()
             remote_word = local_word
@@ -420,20 +418,16 @@ def vrInputThread(shared_data: SharedThreadData):
 
                     if last_rising - last_medium_press_end < 1.0:
                         # Type transcription
-                        if shared_data.cfg["enable_local_beep"]:
-                            play_sound_with_volume(waveform3)
+                        play_sound_with_volume(waveform3, shared_data.cfg)
                     else:
-                        if shared_data.cfg["enable_local_beep"]:
-                            play_sound_with_volume(waveform1)
+                        play_sound_with_volume(waveform1, shared_data.cfg)
 
                 elif now - last_rising > 0.5:
                     # Medium press
                     print("CLEARING", file=sys.stderr)
                     last_medium_press_end = now
                     state = PAUSE_STATE
-
-                    if shared_data.cfg["enable_local_beep"]:
-                        play_sound_with_volume(waveform2)
+                    play_sound_with_volume(waveform2, shared_data.cfg)
 
                     # Flush the *entire* pipeline.
                     shared_data.stream.pause(True)
@@ -449,9 +443,7 @@ def vrInputThread(shared_data: SharedThreadData):
                     state = PAUSE_STATE
 
                     shared_data.stream.pause(True)
-
-                    if shared_data.cfg["enable_local_beep"]:
-                        play_sound_with_volume(waveform1)
+                    play_sound_with_volume(waveform1, shared_data.cfg)
                 elif state == PAUSE_STATE:
                     print("RECORDING", file=sys.stderr)
                     state = RECORD_STATE
@@ -469,9 +461,7 @@ def vrInputThread(shared_data: SharedThreadData):
                         #audio_state.text += audio_state.preview_text
 
                     shared_data.stream.pause(False)
-
-                    if shared_data.cfg["enable_local_beep"]:
-                        play_sound_with_volume(waveform0)
+                    play_sound_with_volume(waveform0, shared_data.cfg)
 
 
 def kbInputThread(shared_data: SharedThreadData):
@@ -514,9 +504,7 @@ def kbInputThread(shared_data: SharedThreadData):
             if event == EVENT_DOUBLE_PRESS:
                 print("CLEARING", file=sys.stderr)
                 state = PAUSE_STATE
-
-                if shared_data.cfg["enable_local_beep"]:
-                    play_sound_with_volume(waveform2)
+                play_sound_with_volume(waveform2, shared_data.cfg)
 
                 # Flush the *entire* pipeline.
                 shared_data.stream.pause(True)
@@ -530,11 +518,8 @@ def kbInputThread(shared_data: SharedThreadData):
             if state == RECORD_STATE:
                 print("PAUSED", file=sys.stderr)
                 state = PAUSE_STATE
-
                 shared_data.stream.pause(True)
-
-                if shared_data.cfg["enable_local_beep"]:
-                    play_sound_with_volume(waveform1)
+                play_sound_with_volume(waveform1, shared_data.cfg)
             elif state == PAUSE_STATE:
                 print("RECORDING", file=sys.stderr)
                 state = RECORD_STATE
@@ -548,20 +533,16 @@ def kbInputThread(shared_data: SharedThreadData):
                     if shared_data.cfg["enable_debug_mode"]:
                         print("Toggle detected, committing preview text (2)",
                                 file=sys.stderr)
-                    #audio_state.text += audio_state.preview_text
-
                 shared_data.stream.pause(False)
+                play_sound_with_volume(waveform0, shared_data.cfg)
 
-                if shared_data.cfg["enable_local_beep"]:
-                    play_sound_with_volume(waveform0)
-
-def play_sound_with_volume(filepath):
+def play_sound_with_volume(filepath, cfg):
     """Play a WAV file with adjusted volume"""
-    volume = VOLUME
+    volume = cfg.get("volume", 30)
     
     try:
         sound = pygame.mixer.Sound(filepath)
-        sound.set_volume(volume)
+        sound.set_volume(volume * 0.01)
         sound.play()
     except Exception as e:
         print(f"Error playing sound {filepath}: {e}", file=sys.stderr)
diff --git a/app/requirements.txt b/app/requirements.txt
index e68a16c..c8d69df 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -2,6 +2,7 @@ faster-whisper
 hf-xet
 keyboard
 langcodes
+noisereduce
 pyaudio
 pygame
 pydub
diff --git a/app/stt.py b/app/stt.py
index c1f4836..79ab0d1 100644
--- a/app/stt.py
+++ b/app/stt.py
@@ -3,6 +3,7 @@ from faster_whisper import WhisperModel
 import langcodes
 import numpy as np
 import os
+import noisereduce as nr
 try:
     from profanity_filter import ProfanityFilter
     PROFANITY_FILTER_AVAILABLE = True
@@ -260,9 +261,13 @@ class NormalizingAudioCollector(AudioCollectorFilter):
         return frames
 
 class BoostingAudioCollector(AudioCollectorFilter):
-    def __init__(self, parent: AudioCollector, target_dBFS: float, cfg: typing.Dict):
+    def __init__(self, parent: AudioCollector,
+                 target_dBFS: float,
+                 max_gain_dB: float,
+                 cfg: typing.Dict):
         AudioCollectorFilter.__init__(self, parent)
         self.target_dBFS = target_dBFS
+        self.max_gain_dB = max_gain_dB
         self.cfg = cfg
 
     def getAudio(self) -> bytes:
@@ -270,9 +275,10 @@ class BoostingAudioCollector(AudioCollectorFilter):
 
         audio = AudioSegment(audio, sample_width=AudioStream.FRAME_SZ,
                 frame_rate=AudioStream.FPS, channels=AudioStream.CHANNELS)
+        gain = min(self.target_dBFS - audio.dBFS, self.max_gain_dB)
         if self.cfg["enable_debug_mode"]:
-            print(f"Boosting audio from {audio.dBFS}dB to {self.target_dBFS}dB", file=sys.stderr)
-        audio = audio.apply_gain(self.target_dBFS - audio.dBFS)
+            print(f"Boosting audio by {gain} dB (from {audio.dBFS} to {audio.dBFS + gain})", flush=True)
+        audio = audio.apply_gain(gain)
 
         frames = np.array(audio.get_array_of_samples())
         frames = np.int16(frames).tobytes()
@@ -296,6 +302,26 @@ class CompressingAudioCollector(AudioCollectorFilter):
 
         return frames
 
+class NoiseReducingAudioCollector(AudioCollectorFilter):
+    def __init__(self, parent: AudioCollector, cfg: typing.Dict):
+        AudioCollectorFilter.__init__(self, parent)
+        self.cfg = cfg
+
+    def getAudio(self) -> bytes:
+        audio = self.parent.getAudio()
+        audio_array = np.frombuffer(audio, dtype=np.int16).astype(np.float32)
+
+        reduced_audio = nr.reduce_noise(
+            y=audio_array,
+            sr=AudioStream.FPS,
+        )
+
+        # Convert back to int16
+        reduced_audio = np.clip(reduced_audio, -32768, 32767)
+        frames = np.int16(reduced_audio).tobytes()
+
+        return frames
+
 class AudioSegmenter:
     def __init__(self,
             min_silence_ms=250,
@@ -398,6 +424,12 @@ class Segment:
         avg_logprob = f"(avg_logprob: {self.avg_logprob}) "
         return f"{self.transcript} " + ts + wall_ts + no_speech + avg_logprob
 
+def join_segments(a, b):
+    if len(a) > 0 and a[-1] != ' ':
+        return a + ' ' + b
+    else:
+        return a + b
+
 class Whisper:
     def __init__(self,
             collector: AudioCollector,
@@ -421,6 +453,9 @@ class Whisper:
 
         already_downloaded = os.path.exists(model_root)
 
+        if not already_downloaded:
+            print(f"Model {model_str} not already downloaded, downloading now...", flush=True)
+
         self.model = WhisperModel(model_str,
                 device = model_device,
                 device_index = cfg["gpu_idx"],
@@ -433,10 +468,12 @@ class Whisper:
 
     def update_context(self, committed_text: str):
         """Update the context with recently committed text."""
-        self.recent_context = (self.recent_context + " " + committed_text).strip()
-        # Keep only the last N characters to avoid prompt getting too long
+        self.recent_context = join_segments(self.recent_context, committed_text).strip()
+        # Drop half of the context window.
         if len(self.recent_context) > self.context_window_chars:
-            self.recent_context = self.recent_context[-self.context_window_chars:]
+            words = self.recent_context.split()
+            words = words[len(words)//2:]
+            self.recent_context = ' '.join(words)
 
     def transcribe(self, frames: bytes = None) -> typing.List[Segment]:
         if frames is None:
@@ -449,6 +486,8 @@ class Whisper:
         # Build context-aware prompt
         prompt = self._build_prompt()
 
+        print(f"Prompt: {prompt}", flush=True)
+
         t0 = time.time()
         segments, info = self.model.transcribe(
                 audio,
@@ -698,8 +737,10 @@ def transcriptionThread(shared_data: SharedThreadData):
     stream = MicStream(shared_data.cfg)
     collector = AudioCollector(stream)
     collector = CompressingAudioCollector(collector)
-    collector = BoostingAudioCollector(collector, -12.0, shared_data.cfg)
-    collector = NormalizingAudioCollector(collector)
+    collector = BoostingAudioCollector(collector, -24.0, 24.0,
+                                       shared_data.cfg)
+    collector = NoiseReducingAudioCollector(collector, shared_data.cfg)
+    #collector = NormalizingAudioCollector(collector)
     whisper = Whisper(collector, shared_data.cfg)
     segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"],
             max_speech_s=shared_data.cfg["max_speech_duration_s"],
@@ -761,11 +802,6 @@ def transcriptionThread(shared_data: SharedThreadData):
                 # breaking OSC pager.
                 if len(shared_data.transcript) >= 1024:
                     shared_data.transcript = shared_data.transcript[-512:]
-                def join_segments(a, b):
-                    if len(a) > 0 and a[-1] != ' ':
-                        return a + ' ' + b
-                    else:
-                        return a + b
                 shared_data.transcript = \
                         join_segments(shared_data.transcript, commit.delta)
                 shared_data.preview = commit.preview
diff --git a/config.yaml b/config.yaml
index 6f4b65b..dfa2e1f 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,8 +1,8 @@
 compute_type: float16
 language: english
 model: turbo
-microphone: 1
-user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm. Phi, NOPPERS, clearrainbow, Noia, Kuuderekitten.
+microphone: 4
+user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm.
 keybind: ctrl+alt+x
 button_hand: right
 button_type: b
@@ -18,6 +18,7 @@ rows: 10
 cols: 24
 beam_size: 5
 best_of: 5
+volume: 10
 enable_debug_mode: 0
 enable_previews: 1
 save_audio: 1
@@ -26,6 +27,5 @@ enable_lowercase_filter: 0
 enable_uppercase_filter: 0
 enable_profanity_filter: 0
 remove_trailing_period: 0
-reset_on_toggle: 0
-enable_local_beep: 1
+reset_on_toggle: 1
 use_builtin: 1
diff --git a/ui/config-schema.js b/ui/config-schema.js
index 6b11277..bf91fce 100644
--- a/ui/config-schema.js
+++ b/ui/config-schema.js
@@ -23,6 +23,7 @@ const CONFIG_SCHEMA = {
     cols: { type: 'number', default: 24 },
     beam_size: { type: 'number', default: 5 },
     best_of: { type: 'number', default: 5 },
+    volume: { type: 'number', default: 30 },
     
     // Boolean fields (stored as 1/0)
     enable_debug_mode: { type: 'boolean', default: 0 },
@@ -34,7 +35,6 @@ const CONFIG_SCHEMA = {
     enable_profanity_filter: { type: 'boolean', default: 0 },
     remove_trailing_period: { type: 'boolean', default: 0 },
     reset_on_toggle: { type: 'boolean', default: 0 },
-    enable_local_beep: { type: 'boolean', default: 1 },
     use_builtin: { type: 'boolean', default: 1 }
 };
 
diff --git a/ui/index.html b/ui/index.html
index 99e64dd..19c41ce 100644
--- a/ui/index.html
+++ b/ui/index.html
@@ -248,10 +248,13 @@
                                         <input type="checkbox" id="reset_on_toggle" class="mr-2">
                                         <span class="checkbox-text">Reset transcript on toggle</span>
                                     </label>
-                                    <label for="enable_local_beep" class="checkbox-label">
-                                        <input type="checkbox" id="enable_local_beep" checked class="mr-2">
-                                        <span class="checkbox-text">Enable local beep sounds</span>
-                                    </label>
+                                    <div>
+                                        <label for="volume" class="form-label">
+                                            Local Beep Volume
+                                            <span id="volume-display" class="text-gray-500 text-sm ml-2">30%</span>
+                                        </label>
+                                        <input type="range" id="volume" min="0" max="100" step="10" value="30" class="form-input w-full">
+                                    </div>
                                 </div>
                             </section>
 
@@ -314,7 +317,7 @@
                                 <button type="button" id="start-process" class="btn btn-green flex-1">
                                     Start
                                 </button>
-                                <button type="button" id="stop-process" class="btn btn-red flex-1" disabled>
+                                <button type="button" id="stop-process" class="btn btn-red flex-1">
                                     Stop
                                 </button>
                             </div>
diff --git a/ui/index.js b/ui/index.js
index 24a7e13..5a5d0a6 100644
--- a/ui/index.js
+++ b/ui/index.js
@@ -530,19 +530,20 @@ ipcMain.handle('start-process', async () => {
 });
 
 ipcMain.handle('stop-process', async () => {
+  if (!runningProcess) {
+    sendPythonOutput('No process to stop', 'info');
+    return { success: true, forcefullyKilled: false };
+  }
+
   return new Promise((resolve) => {
     let forcefullyKilled = false;
-
-    if (!runningProcess) {
-      resolve({ success: true, forcefullyKilled });
-    }
     
     // Set up a timeout to force kill after 10 seconds
     const killTimeout = setTimeout(() => {
       if (runningProcess) {
         sendPythonOutput('Process did not stop gracefully, forcing termination...', 'stderr');
         forcefullyKilled = true;
-        runningProcess.kill();
+        runningProcess.kill('SIGKILL');
       }
     }, 10000);
     
@@ -562,10 +563,14 @@ ipcMain.handle('stop-process', async () => {
     
     // Send termination signal
     sendPythonOutput('Stopping process gracefully...', 'info');
-    runningProcess.kill();
+    runningProcess.kill('SIGTERM');
   });
 });
 
+ipcMain.handle('get-process-state', () => {
+  return { isRunning: runningProcess !== null };
+});
+
 // Clean up on app quit
 app.on('before-quit', () => {
   if (runningProcess) {
diff --git a/ui/preload.js b/ui/preload.js
index f2e0a81..6f6e54f 100644
--- a/ui/preload.js
+++ b/ui/preload.js
@@ -10,6 +10,7 @@ contextBridge.exposeInMainWorld('electronAPI', {
     resetVenv: () => ipcRenderer.invoke('reset-venv'),
     startProcess: () => ipcRenderer.invoke('start-process'),
     stopProcess: () => ipcRenderer.invoke('stop-process'),
+    getProcessState: () => ipcRenderer.invoke('get-process-state'),
     onPythonOutput: (callback) => ipcRenderer.on('python-output', (event, data) => callback(data)),
     onProcessStopped: (callback) => ipcRenderer.on('process-stopped', () => callback())
 });
diff --git a/ui/renderer.js b/ui/renderer.js
index 2f4c8f1..008e0da 100644
--- a/ui/renderer.js
+++ b/ui/renderer.js
@@ -1,6 +1,21 @@
 // Import configuration schema
 const CONFIG_FIELDS = window.CONFIG_SCHEMA;
 
+// Process state tracking
+let isProcessRunning = false;
+let buttonManager;
+let loadingOverlay;
+
+// Auto-save functionality with debouncing
+let saveTimeout;
+const SAVE_DELAY = 500;
+let isSettingValues = false;
+
+// Console management
+const consoleContent = document.getElementById('console-content');
+const MAX_CONSOLE_LINES = 512;
+let consoleLineCount = 0;
+
 // Button management system
 class ButtonManager {
     constructor() {
@@ -11,33 +26,30 @@ class ButtonManager {
             resetVenv: document.getElementById('reset-venv'),
             refreshMicrophones: document.getElementById('refresh-microphones')
         };
-        
-        // Initialize button states on construction
+
+        // Initialize button states - process is not running at startup
         this.setProcessStopped();
     }
-    
+
     setState(buttonName, disabled) {
         const button = this.buttons[buttonName];
         if (!button) return;
-        
+
         button.disabled = disabled;
-        if (disabled) {
-            button.classList.add('opacity-50', 'cursor-not-allowed');
-        } else {
-            button.classList.remove('opacity-50', 'cursor-not-allowed');
-        }
     }
-    
+
     setProcessRunning() {
         this.setState('start', true);
         this.setState('stop', false);
+        isProcessRunning = true;
     }
-    
+
     setProcessStopped() {
         this.setState('start', false);
         this.setState('stop', true);
+        isProcessRunning = false;
     }
-    
+
     async withButtonLoading(buttonName, asyncFn) {
         this.setState(buttonName, true);
         try {
@@ -48,8 +60,6 @@ class ButtonManager {
     }
 }
 
-const buttonManager = new ButtonManager();
-
 // Add loading overlay management
 class LoadingOverlay {
     constructor() {
@@ -57,8 +67,9 @@ class LoadingOverlay {
         this.form = document.getElementById('config-form');
         this.messageElement = this.overlay.querySelector('p');
         this.defaultMessage = 'Environment setup underway - please wait.';
+        this.originalStates = new Map(); // Track original disabled states
     }
-    
+
     show(message = null) {
         this.messageElement.textContent = message || this.defaultMessage;
         this.overlay.classList.remove('hidden');
@@ -66,68 +77,69 @@ class LoadingOverlay {
         const leftPanel = this.overlay.parentElement;
         const inputs = leftPanel.querySelectorAll('input, select, textarea, button');
         inputs.forEach(input => {
+            // Store original disabled state before disabling
+            this.originalStates.set(input, input.disabled);
             input.disabled = true;
             input.classList.add('opacity-50');
         });
     }
-    
+
     hide() {
         this.overlay.classList.add('hidden');
-        // Re-enable all form inputs and buttons in the entire left panel
+        // Restore original states of form inputs and buttons
         const leftPanel = this.overlay.parentElement;
         const inputs = leftPanel.querySelectorAll('input, select, textarea, button');
         inputs.forEach(input => {
-            input.disabled = false;
+            // Restore original disabled state
+            input.disabled = this.originalStates.get(input) || false;
             input.classList.remove('opacity-50');
         });
+        // Clear the stored states
+        this.originalStates.clear();
         // Reset to default message
         this.messageElement.textContent = this.defaultMessage;
     }
 }
 
-const loadingOverlay = new LoadingOverlay();
-
-// Add a flag to prevent auto-save during programmatic updates
-let isSettingValues = false;
-
 // Handle status messages with better color management
 function showStatus(message, type = 'info') {
     const statusEl = document.getElementById('status-message');
     statusEl.textContent = message;
-    
+
     // Remove all status classes
     const statusClasses = ['hidden', 'bg-green-100', 'bg-red-100', 'bg-blue-100', 'text-green-800', 'text-red-800', 'text-blue-800'];
     statusEl.classList.remove(...statusClasses);
-    
+
     // Add appropriate classes based on type
     const typeMap = {
         success: ['bg-green-100', 'text-green-800'],
         error: ['bg-red-100', 'text-red-800'],
         info: ['bg-blue-100', 'text-blue-800']
     };
-    
+
     statusEl.classList.add(...(typeMap[type] || typeMap.info));
-    
+
     // Also log to console
     appendToConsole(message, type === 'error' ? 'stderr' : 'info');
-    
+
     setTimeout(() => statusEl.classList.add('hidden'), 5000);
 }
 
 // Get form values using field mappings
 function getFormValues() {
     const config = {};
-    
+
     for (const [fieldName, fieldConfig] of Object.entries(CONFIG_FIELDS)) {
         const element = document.getElementById(fieldName);
         if (!element) continue;
-        
+
         switch (fieldConfig.type) {
             case 'boolean':
                 config[fieldName] = element.checked ? 1 : 0;
                 break;
             case 'number':
-                config[fieldName] = parseInt(element.value) || fieldConfig.default;
+                const numValue = parseInt(element.value);
+                config[fieldName] = isNaN(numValue) ? fieldConfig.default : numValue;
                 break;
             case 'text':
                 config[fieldName] = element.value || fieldConfig.default;
@@ -136,20 +148,20 @@ function getFormValues() {
                 config[fieldName] = element.value || fieldConfig.default;
         }
     }
-    
+
     return config;
 }
 
 // Set form values using field mappings
 function setFormValues(config) {
     isSettingValues = true; // Disable auto-save temporarily
-    
+
     for (const [fieldName, fieldConfig] of Object.entries(CONFIG_FIELDS)) {
         const element = document.getElementById(fieldName);
         if (!element) continue;
-        
+
         const value = config[fieldName] ?? fieldConfig.default;
-        
+
         switch (fieldConfig.type) {
             case 'boolean':
                 element.checked = value === 1;
@@ -161,7 +173,7 @@ function setFormValues(config) {
                 element.value = value;
         }
     }
-    
+
     // Handle use_builtin toggle state
     const useBuiltin = config.use_builtin === 1;
     const customChatboxInputs = ['block_width', 'num_blocks', 'rows', 'cols'];
@@ -176,53 +188,54 @@ function setFormValues(config) {
             }
         }
     });
-    
+
+    // Update volume display
+    if (config.volume !== undefined) {
+        const volumePercent = Math.round(config.volume);
+        document.getElementById('volume-display').textContent = `${volumePercent}%`;
+    }
+
     isSettingValues = false; // Re-enable auto-save
 }
 
-// Console management
-const consoleContent = document.getElementById('console-content');
-const MAX_CONSOLE_LINES = 512;
-let consoleLineCount = 0;
-
 function appendToConsole(message, type = 'stdout') {
     const timestamp = new Date().toLocaleTimeString();
     const timestampSpan = document.createElement('span');
     timestampSpan.className = 'console-timestamp';
     timestampSpan.textContent = `[${timestamp}] `;
-    
+
     const messageSpan = document.createElement('span');
     messageSpan.className = `console-${type}`;
     messageSpan.textContent = message;
-    
+
     const lineDiv = document.createElement('div');
     lineDiv.appendChild(timestampSpan);
     lineDiv.appendChild(messageSpan);
-    
+
     consoleContent.appendChild(lineDiv);
     consoleLineCount++;
-    
+
     // Remove old lines if we exceed the limit
     if (consoleLineCount > MAX_CONSOLE_LINES) {
         // Calculate how many lines to remove (remove 10% to avoid frequent trimming)
         const linesToRemove = Math.floor(MAX_CONSOLE_LINES * 0.1);
-        
+
         // Remove the oldest lines
         for (let i = 0; i < linesToRemove; i++) {
             if (consoleContent.firstChild) {
                 consoleContent.removeChild(consoleContent.firstChild);
             }
         }
-        
+
         consoleLineCount -= linesToRemove;
-        
+
         // Add a notice that lines were trimmed
         const trimNotice = document.createElement('div');
         trimNotice.className = 'console-info';
         trimNotice.innerHTML = '<span class="console-timestamp">[System] </span><span class="console-info">... older lines removed to maintain performance ...</span>';
         consoleContent.insertBefore(trimNotice, consoleContent.firstChild);
     }
-    
+
     // Auto-scroll to bottom
     const pythonConsole = document.getElementById('python-console');
     pythonConsole.scrollTop = pythonConsole.scrollHeight;
@@ -242,24 +255,20 @@ async function handleAsyncAction(actionName, actionFn) {
     }
 }
 
-// Auto-save functionality with debouncing
-let saveTimeout;
-const SAVE_DELAY = 500;
-
 async function autoSaveConfig() {
     if (isSettingValues) return;
-    
+
     clearTimeout(saveTimeout);
     saveTimeout = setTimeout(async () => {
         try {
             const config = getFormValues();
             await window.electronAPI.saveConfig(config);
             showStatus('Configuration saved', 'success');
-            
+
             // Restart process if running
-            if (!buttonManager.buttons.stop.disabled) {
+            if (isProcessRunning) {
                 appendToConsole('Restarting process with new configuration...', 'info');
-                
+
                 try {
                     await window.electronAPI.stopProcess();
                     await new Promise(resolve => setTimeout(resolve, 1000));
@@ -281,9 +290,9 @@ async function autoSaveConfig() {
 function setupAutoSave() {
     const form = document.getElementById('config-form');
     const inputs = form.querySelectorAll('input, select, textarea');
-    
+
     inputs.forEach(input => {
-        const eventType = input.type === 'checkbox' ? 'change' : 
+        const eventType = input.type === 'checkbox' ? 'change' :
                          (input.type === 'number' || input.type === 'text' || input.tagName === 'TEXTAREA') ? 'input' : 'change';
         input.addEventListener(eventType, autoSaveConfig);
     });
@@ -292,7 +301,7 @@ function setupAutoSave() {
 // Microphone loading
 async function loadMicrophones() {
     const microphoneSelect = document.getElementById('microphone');
-    
+
     try {
         // Check/install requirements during startup
         appendToConsole('Checking virtual environment and requirements...', 'info');
@@ -305,15 +314,15 @@ async function loadMicrophones() {
 
         appendToConsole('Loading available microphones...', 'info');
         const microphones = await window.electronAPI.getMicrophones();
-        
+
         microphoneSelect.innerHTML = '';
-        
+
         if (microphones.length === 0) {
             microphoneSelect.innerHTML = '<option value="" disabled>No microphones found</option>';
             appendToConsole('No microphones found', 'stderr');
             return;
         }
-        
+
         appendToConsole(`Found ${microphones.length} microphone(s)`, 'info');
         microphones.forEach(mic => {
             const option = document.createElement('option');
@@ -322,7 +331,7 @@ async function loadMicrophones() {
             microphoneSelect.appendChild(option);
             appendToConsole(`  - ${mic.name} (Device ${mic.index})`, 'stdout');
         });
-        
+
         // Restore previously selected microphone
         try {
             const config = await window.electronAPI.loadConfig();
@@ -332,7 +341,7 @@ async function loadMicrophones() {
         } catch (error) {
             // Ignore config load errors here
         }
-        
+
     } catch (error) {
         appendToConsole(`Failed to load microphones: ${error.message}`, 'stderr');
         microphoneSelect.innerHTML = '<option value="" disabled>Error loading microphones</option>';
@@ -345,7 +354,7 @@ function setupEventHandlers() {
     document.getElementById('toggle-advanced').addEventListener('click', () => {
         const advancedSettings = document.getElementById('advanced-settings');
         const chevron = document.getElementById('chevron');
-        
+
         if (advancedSettings.classList.contains('hidden')) {
             advancedSettings.classList.remove('hidden');
             chevron.classList.add('rotate-90');
@@ -354,12 +363,12 @@ function setupEventHandlers() {
             chevron.classList.remove('rotate-90');
         }
     });
-    
+
     // Use builtin chatbox toggle
     document.getElementById('use_builtin').addEventListener('change', (e) => {
         const customChatboxInputs = ['block_width', 'num_blocks', 'rows', 'cols'];
         const isBuiltin = e.target.checked;
-        
+
         customChatboxInputs.forEach(inputId => {
             const input = document.getElementById(inputId);
             if (input) {
@@ -372,7 +381,13 @@ function setupEventHandlers() {
             }
         });
     });
-    
+
+    // Volume slider update
+    document.getElementById('volume').addEventListener('input', (e) => {
+        const volumePercent = Math.round(e.target.value);
+        document.getElementById('volume-display').textContent = `${volumePercent}%`;
+    });
+
     // Setup virtual environment
     document.getElementById('setup-venv').addEventListener('click', async () => {
         loadingOverlay.show('Setting up virtual environment - please wait...'); // Show overlay with custom message
@@ -385,7 +400,7 @@ function setupEventHandlers() {
             loadingOverlay.hide(); // Always hide overlay when done
         }
     });
-    
+
     // Reset virtual environment
     document.getElementById('reset-venv').addEventListener('click', async () => {
         loadingOverlay.show('Resetting virtual environment - please wait...'); // Show overlay with custom message
@@ -397,33 +412,33 @@ function setupEventHandlers() {
             loadingOverlay.hide(); // Always hide overlay when done
         }
     });
-    
+
     // Reset configuration
     document.getElementById('reset-config').addEventListener('click', async () => {
         const confirmReset = confirm('Are you sure you want to reset all settings to defaults? This cannot be undone.');
         if (!confirmReset) return;
-        
+
         try {
             // Stop process if running
-            const wasRunning = !buttonManager.buttons.stop.disabled;
+            const wasRunning = isProcessRunning;
             if (wasRunning) {
                 appendToConsole('Stopping process before resetting configuration...', 'info');
                 await window.electronAPI.stopProcess();
                 buttonManager.setProcessStopped();
                 await new Promise(resolve => setTimeout(resolve, 500));
             }
-            
+
             // Reset configuration
             appendToConsole('Resetting configuration to defaults...', 'info');
             const result = await window.electronAPI.resetConfig();
-            
+
             // Reload configuration with defaults
             const config = await window.electronAPI.loadConfig();
             setFormValues(config);
-            
+
             showStatus(result.message, 'success');
             appendToConsole('Configuration reset successfully', 'info');
-            
+
             // Restart process if it was running
             if (wasRunning) {
                 appendToConsole('Restarting process with default configuration...', 'info');
@@ -436,18 +451,18 @@ function setupEventHandlers() {
             appendToConsole(`Failed to reset configuration: ${error.message}`, 'stderr');
         }
     });
-    
+
     // Refresh microphones
     document.getElementById('refresh-microphones').addEventListener('click', async () => {
         await buttonManager.withButtonLoading('refreshMicrophones', async () => {
             await loadMicrophones();
         });
     });
-    
+
     // Start process
     document.getElementById('start-process').addEventListener('click', async () => {
         buttonManager.setState('start', true);
-        
+
         try {
             // The installRequirements function will now check if venv is set up.
             loadingOverlay.show('Verifying environment setup - please wait...'); // Show overlay with custom message
@@ -457,7 +472,7 @@ function setupEventHandlers() {
             } finally {
                 loadingOverlay.hide(); // Always hide overlay when done
             }
-            
+
             await window.electronAPI.startProcess();
             buttonManager.setProcessRunning();
             appendToConsole('Process started successfully', 'info');
@@ -466,11 +481,11 @@ function setupEventHandlers() {
             buttonManager.setState('start', false);
         }
     });
-    
+
     // Stop process
     document.getElementById('stop-process').addEventListener('click', async () => {
         buttonManager.setState('stop', true);
-        
+
         try {
             await window.electronAPI.stopProcess();
             appendToConsole('Process stop initiated', 'info');
@@ -479,7 +494,7 @@ function setupEventHandlers() {
             buttonManager.setState('stop', false);
         }
     });
-    
+
     // Listen for process stopped event
     window.electronAPI.onProcessStopped(() => {
         buttonManager.setProcessStopped();
@@ -489,12 +504,15 @@ function setupEventHandlers() {
 // Initialize application
 window.addEventListener('load', async () => {
     appendToConsole('TaSTT Configuration UI initialized', 'info');
-    
+
+    loadingOverlay = new LoadingOverlay();
+    buttonManager = new ButtonManager();
+
     // Set up Python output listener first so we capture all output
     window.electronAPI.onPythonOutput((data) => {
         appendToConsole(data.message, data.type);
     });
-    
+
     // Load configuration
     try {
         const config = await window.electronAPI.loadConfig();
@@ -503,11 +521,11 @@ window.addEventListener('load', async () => {
     } catch (error) {
         appendToConsole(`Failed to load configuration: ${error.message}`, 'stderr');
     }
-    
+
     // Load microphones
     await loadMicrophones();
-    
+
     // Setup event handlers and auto-save
     setupEventHandlers();
     setupAutoSave();
-}); 
\ No newline at end of file
+});
-- 
cgit v1.2.3


From 9bf33a4cad8196bfe7253c841ab5c35ffdbc0173 Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Wed, 23 Jul 2025 19:05:15 -0700
Subject: add segment metadata logging feature

Segment metadata can now be logged to a json as the app runs. The goal
is to identify the params that heavily correlate with hallucinations.

Also:

* use 7zip for compression in build, speeding things up
* log dll download progress every few seconds
* shrink package
---
 app/stt.py          | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 config.yaml         |  1 +
 ui/.gitignore       |  2 ++
 ui/config-schema.js |  1 +
 ui/index.html       |  6 ++++-
 ui/index.js         | 28 +++++++++++++++++++--
 ui/package.json     | 35 ++++++++++++++++----------
 7 files changed, 126 insertions(+), 19 deletions(-)

(limited to 'ui/config-schema.js')

diff --git a/app/stt.py b/app/stt.py
index 79ab0d1..f36de97 100644
--- a/app/stt.py
+++ b/app/stt.py
@@ -1,5 +1,6 @@
 from datetime import datetime
 from faster_whisper import WhisperModel
+import json
 import langcodes
 import numpy as np
 import os
@@ -486,7 +487,8 @@ class Whisper:
         # Build context-aware prompt
         prompt = self._build_prompt()
 
-        print(f"Prompt: {prompt}", flush=True)
+        if self.cfg["enable_debug_mode"]:
+            print(f"Prompt: {prompt}", flush=True)
 
         t0 = time.time()
         segments, info = self.model.transcribe(
@@ -578,16 +580,69 @@ def saveAudio(audio: bytes, path: str, cfg: typing.Dict):
         wf.writeframes(audio)
 
 
+class SegmentLogger:
+    def __init__(self, cfg: typing.Dict):
+        self.cfg = cfg
+        self.enabled = cfg.get("enable_segment_logging", False)
+        self.session_data = []
+        self.log_file = None
+
+        if self.enabled:
+            log_dir = os.path.join(PROJECT_ROOT, "logs")
+            if not os.path.exists(log_dir):
+                os.makedirs(log_dir)
+
+            # Create file
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            self.log_file = os.path.join(log_dir, f"session_debug_{timestamp}.json")
+            print(f"Segment logging enabled. Logging to: {self.log_file}", file=sys.stderr)
+
+    def log_segment(self, segment: Segment, commit_type: str = "commit"):
+        if not self.enabled:
+            return
+
+        segment_data = {
+            "timestamp": datetime.now().isoformat(),
+            "type": commit_type,
+            "text": segment.transcript,
+            "start_ts": segment.start_ts,
+            "end_ts": segment.end_ts,
+            "wall_ts": segment.wall_ts,
+            "avg_logprob": segment.avg_logprob,
+            "no_speech_prob": segment.no_speech_prob,
+            "compression_ratio": segment.compression_ratio,
+            "duration": segment.end_ts - segment.start_ts
+        }
+
+        self.session_data.append(segment_data)
+
+        # Write to file incrementally
+        try:
+            with open(self.log_file, 'w') as f:
+                json.dump({
+                    "session_start": self.session_data[0]["timestamp"] if self.session_data else None,
+                    "segments": self.session_data
+                }, f, indent=2)
+        except Exception as e:
+            print(f"Error writing segment log: {e}", file=sys.stderr)
+
+    def close(self):
+        if self.enabled and self.session_data:
+            print(f"Session complete. Logged {len(self.session_data)} segments to {self.log_file}", file=sys.stderr)
+
+
 class VadCommitter:
     def __init__(self,
             cfg: typing.Dict,
             collector: AudioCollector,
             whisper: Whisper,
-            segmenter: AudioSegmenter):
+            segmenter: AudioSegmenter,
+            segment_logger: SegmentLogger = None):
         self.cfg = cfg
         self.collector = collector
         self.whisper = whisper
         self.segmenter = segmenter
+        self.segment_logger = segment_logger
 
     def getDelta(self) -> TranscriptCommit:
         audio = self.collector.getAudio()
@@ -618,6 +673,10 @@ class VadCommitter:
             if delta.strip():
                 self.whisper.update_context(delta.strip())
 
+            if self.segment_logger:
+                for s in segments:
+                    self.segment_logger.log_segment(s, "commit")
+
             audio = self.collector.getAudio()
             if self.cfg["enable_debug_mode"]:
                 for s in segments:
@@ -638,6 +697,10 @@ class VadCommitter:
             segments = self.whisper.transcribe(audio)
             preview = "".join(s.transcript for s in segments)
 
+            if self.segment_logger:
+                for s in segments:
+                    self.segment_logger.log_segment(s, "preview")
+
         if not has_audio:
             self.collector.keepLast(1.0)
 
@@ -745,7 +808,9 @@ def transcriptionThread(shared_data: SharedThreadData):
     segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"],
             max_speech_s=shared_data.cfg["max_speech_duration_s"],
             min_speech_duration_ms=shared_data.cfg["min_speech_duration_ms"])
-    committer = VadCommitter(shared_data.cfg, collector, whisper, segmenter)
+
+    segment_logger = SegmentLogger(shared_data.cfg)
+    committer = VadCommitter(shared_data.cfg, collector, whisper, segmenter, segment_logger)
 
     plugins = []
     # plugins.append(TranslationPlugin(shared_data.cfg))  # Not implemented yet
@@ -839,4 +904,5 @@ def transcriptionThread(shared_data: SharedThreadData):
         plugin.stop()
     for filt in filters:
         filt.stop()
+    segment_logger.close()
 
diff --git a/config.yaml b/config.yaml
index dfa2e1f..db25405 100644
--- a/config.yaml
+++ b/config.yaml
@@ -22,6 +22,7 @@ volume: 10
 enable_debug_mode: 0
 enable_previews: 1
 save_audio: 1
+enable_segment_logging: 0
 use_cpu: 0
 enable_lowercase_filter: 0
 enable_uppercase_filter: 0
diff --git a/ui/.gitignore b/ui/.gitignore
index 2109e19..c1dbe3c 100644
--- a/ui/.gitignore
+++ b/ui/.gitignore
@@ -1,3 +1,5 @@
 build
 node_modules
 package-lock.json
+output.css
+dist
diff --git a/ui/config-schema.js b/ui/config-schema.js
index bf91fce..fb90f3f 100644
--- a/ui/config-schema.js
+++ b/ui/config-schema.js
@@ -29,6 +29,7 @@ const CONFIG_SCHEMA = {
     enable_debug_mode: { type: 'boolean', default: 0 },
     enable_previews: { type: 'boolean', default: 1 },
     save_audio: { type: 'boolean', default: 0 },
+    enable_segment_logging: { type: 'boolean', default: 0 },
     use_cpu: { type: 'boolean', default: 0 },
     enable_lowercase_filter: { type: 'boolean', default: 0 },
     enable_uppercase_filter: { type: 'boolean', default: 0 },
diff --git a/ui/index.html b/ui/index.html
index 19c41ce..29d4a78 100644
--- a/ui/index.html
+++ b/ui/index.html
@@ -4,7 +4,7 @@
     <meta charset="UTF-8">
     <meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'">
     <title>TaSTT</title>
-    <link rel="stylesheet" href="build/output.css">
+    <link rel="stylesheet" href="output.css">
 </head>
 <body class="bg-gray-100">
     <div class="container-fluid px-6 py-6 h-screen flex flex-col">
@@ -214,6 +214,10 @@
                                         <input type="checkbox" id="save_audio" class="mr-2">
                                         <span class="checkbox-text">Save Audio Segments</span>
                                     </label>
+                                    <label for="enable_segment_logging" class="checkbox-label">
+                                        <input type="checkbox" id="enable_segment_logging" class="mr-2">
+                                        <span class="checkbox-text">Log Segment Metadata (Debug)</span>
+                                    </label>
                                 </div>
                             </section>
 
diff --git a/ui/index.js b/ui/index.js
index 5a5d0a6..afaaf7f 100644
--- a/ui/index.js
+++ b/ui/index.js
@@ -6,7 +6,12 @@ const { spawn } = require('child_process');
 const https = require('https');
 const { CONFIG_SCHEMA, getDefaultConfig } = require('./config-schema.js');
 
-const APP_ROOT = path.join(__dirname, '..');
+// Detect if we're running in development or production
+const isDev = !app.isPackaged;
+const APP_ROOT = isDev 
+  ? path.join(__dirname, '..')  // Development: go up from ui/ to project root
+  : process.resourcesPath;       // Production: use Electron's resource path
+
 const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml');
 
 let mainWindow;
@@ -50,13 +55,32 @@ function createPythonEnvironment() {
   return env;
 }
 
-// Helper function to download a file from URL
+// Helper function to download a file from URL with progress
 function downloadFile(url, outputPath) {
   return new Promise((resolve, reject) => {
     const file = require('fs').createWriteStream(outputPath);
+    const fileName = path.basename(outputPath);
     
     const request = https.get(url, (response) => {
       if (response.statusCode === 200) {
+        const totalSize = parseInt(response.headers['content-length'], 10);
+        let downloadedSize = 0;
+        let lastProgressTime = Date.now();
+        
+        response.on('data', (chunk) => {
+          downloadedSize += chunk.length;
+          
+          // Log progress every 5 seconds
+          const now = Date.now();
+          if (totalSize && (now - lastProgressTime >= 5000)) {
+            const progress = Math.round((downloadedSize / totalSize) * 100);
+            const mb = (downloadedSize / 1024 / 1024).toFixed(1);
+            const totalMb = (totalSize / 1024 / 1024).toFixed(1);
+            sendPythonOutput(`Downloading ${fileName}: ${mb}/${totalMb} MB (${progress}%)`, 'info');
+            lastProgressTime = now;
+          }
+        });
+        
         response.pipe(file);
         
         file.on('finish', () => {
diff --git a/ui/package.json b/ui/package.json
index 3a58298..4742cd7 100644
--- a/ui/package.json
+++ b/ui/package.json
@@ -6,14 +6,16 @@
   "homepage": "./",
   "scripts": {
     "start": "npm run build:css && electron .",
-    "build:css": "tailwindcss -i ./src/components.css -o ./build/output.css",
-    "watch:css": "tailwindcss -i ./src/components.css -o ./build/output.css --watch",
+    "build:css": "tailwindcss -i ./src/components.css -o ./output.css",
+    "watch:css": "tailwindcss -i ./src/components.css -o ./output.css --watch",
     "dev": "concurrently \"npm run watch:css\" \"electron .\"",
     "test": "echo \"Error: no test specified\" && exit 1",
-    "dist": "npm run build:css && electron-builder",
-    "dist:win": "npm run build:css && electron-builder --win",
-    "dist:portable": "npm run build:css && electron-builder --win portable",
-    "dist:zip": "npm run build:css && electron-builder --win zip"
+    "clean:meta": "node -e \"const fs=require('fs');const path=require('path');function deleteMeta(dir){fs.readdirSync(dir).forEach(f=>{const p=path.join(dir,f);if(f.endsWith('.meta'))fs.unlinkSync(p);else if(fs.statSync(p).isDirectory()&&!f.startsWith('.'))deleteMeta(p);})}deleteMeta('./node_modules')\"",
+    "prebuild": "node build_scripts/setup-empty-venv.js",
+    "dist": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder",
+    "dist:win": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win",
+    "dist:portable": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win portable",
+    "dist:zip": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win zip"
   },
   "build": {
     "appId": "com.yum_food.tastt",
@@ -46,11 +48,6 @@
         "from": "../config.yaml",
         "to": "config.yaml"
       },
-      {
-        "from": "../dll",
-        "to": "dll",
-        "filter": ["**/*"]
-      },
       {
         "from": "../Images",
         "to": "Images",
@@ -60,10 +57,20 @@
         "from": "../bin",
         "to": "bin",
         "filter": ["**/*"]
+      },
+      {
+        "from": "../venv_clean",
+        "to": "venv",
+        "filter": ["**/*"]
+      },
+      {
+        "from": "../dll_empty",
+        "to": "dll",
+        "filter": ["**/*"]
       }
     ],
     "win": {
-      "icon": "../Images/logo.png",
+      "icon": "../Images/favicon.ico",
       "target": [
         {
           "target": "portable",
@@ -81,7 +88,9 @@
     "nsis": {
       "oneClick": false,
       "allowToChangeInstallationDirectory": true
-    }
+    },
+    "compression": "maximum",
+    "artifactName": "${productName}-${version}-${arch}.${ext}"
   },
   "keywords": [],
   "author": "yum_food",
-- 
cgit v1.2.3