summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.cursorignore2
-rw-r--r--.gitignore2
-rw-r--r--Images/favicon.icobin0 -> 92015 bytes
-rw-r--r--app/hi.py12
-rw-r--r--app/requirements.txt2
-rw-r--r--app/stt.py128
-rw-r--r--app/vad.py314
-rw-r--r--config.yaml1
-rw-r--r--ui/index.html336
-rw-r--r--ui/index.js382
-rw-r--r--ui/package.json76
-rw-r--r--ui/preload.js7
-rw-r--r--ui/renderer.js564
-rw-r--r--ui/src/components.css8
-rw-r--r--ui_design.md9
15 files changed, 1085 insertions, 758 deletions
diff --git a/.cursorignore b/.cursorignore
new file mode 100644
index 0000000..a8f4624
--- /dev/null
+++ b/.cursorignore
@@ -0,0 +1,2 @@
+**/node_modules
+**/site-packages \ No newline at end of file
diff --git a/.gitignore b/.gitignore
index a102cf0..d3886ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
.*.sw[po]
*.meta
-
+.venv_is_set_up
diff --git a/Images/favicon.ico b/Images/favicon.ico
new file mode 100644
index 0000000..25ea9ac
--- /dev/null
+++ b/Images/favicon.ico
Binary files differ
diff --git a/app/hi.py b/app/hi.py
index 0d80b9d..e6877ff 100644
--- a/app/hi.py
+++ b/app/hi.py
@@ -330,10 +330,11 @@ if __name__ == "__main__":
cli_args = parse_args()
cfg = app_config.getConfig(cli_args.config)
shared_data = SharedThreadData(cfg)
- osc_thread = threading.Thread(
- target=osc_thread,
- args=(shared_data,))
- osc_thread.start()
+ if False:
+ osc_thread = threading.Thread(
+ target=osc_thread,
+ args=(shared_data,))
+ osc_thread.start()
transcribe_thread = threading.Thread(
target=stt.transcriptionThread,
@@ -382,6 +383,7 @@ if __name__ == "__main__":
local_word = shared_data.word
print(local_word + "_")
shared_data.exit_event.set()
- osc_thread.join()
+ if False:
+ osc_thread.join()
transcribe_thread.join()
diff --git a/app/requirements.txt b/app/requirements.txt
index 07f94cd..f8b7069 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -5,4 +5,4 @@ pyaudio
pydub
python-osc
sentencepiece
-wave
+silero-vad
diff --git a/app/stt.py b/app/stt.py
index c157f6d..7d76333 100644
--- a/app/stt.py
+++ b/app/stt.py
@@ -6,10 +6,10 @@ import os
import pyaudio
from pydub import AudioSegment
from shared_thread_data import SharedThreadData
+from silero_vad import load_silero_vad, get_speech_timestamps
import sys
import time
import typing
-import vad
import wave
@@ -33,7 +33,7 @@ class AudioStream():
class MicStream(AudioStream):
CHUNK_SZ = 1024
- def __init__(self, which_mic: str):
+ def __init__(self, cfg: typing.Dict):
self.p = pyaudio.PyAudio()
self.stream = None
self.sample_rate = None
@@ -45,8 +45,11 @@ class MicStream(AudioStream):
# If set, incoming frames are simply discarded.
self.paused = False
- print(f"Finding mic {which_mic}", file=sys.stderr)
- self.dumpMicDevices()
+ which_mic = cfg["microphone"]
+
+ if cfg["enable_debug_mode"]:
+ print(f"Finding mic {which_mic}", file=sys.stderr)
+ self.dumpMicDevices()
got_match = False
device_index = -1
@@ -59,8 +62,9 @@ class MicStream(AudioStream):
elif which_mic == "beyond":
target_str = "Microphone (Beyond)"
else:
- print(f"Mic {which_mic} requested, treating it as a numerical " +
- "device ID", file=sys.stderr)
+ if cfg["enable_debug_mode"]:
+ print(f"Mic {which_mic} requested, treating it as a numerical " +
+ "device ID", file=sys.stderr)
device_index = int(which_mic)
got_match = True
if not got_match:
@@ -79,9 +83,11 @@ class MicStream(AudioStream):
raise KeyError(f"Mic {which_mic} not found")
info = self.p.get_device_info_by_host_api_device_index(0, device_index)
- print(f"Found mic {which_mic}: {info['name']}", file=sys.stderr)
+ if cfg["enable_debug_mode"]:
+ print(f"Found mic {which_mic}: {info['name']}", file=sys.stderr)
self.sample_rate = int(info['defaultSampleRate'])
- print(f"Mic sample rate: {self.sample_rate}", file=sys.stderr)
+ if cfg["enable_debug_mode"]:
+ print(f"Mic sample rate: {self.sample_rate}", file=sys.stderr)
self.stream = self.p.open(
rate=self.sample_rate,
@@ -289,19 +295,40 @@ class AudioSegmenter:
def __init__(self,
min_silence_ms=250,
max_speech_s=5):
- self.vad_options = vad.VadOptions(
- min_silence_duration_ms=min_silence_ms,
- max_speech_duration_s=max_speech_s)
- pass
+ self.min_silence_ms = min_silence_ms
+ self.max_speech_s = max_speech_s
+
+ # Load Silero VAD model
+ self.model = load_silero_vad()
+
+ self.vad_threshold = 0.3
+ self.min_silence_duration_ms = min_silence_ms
+ self.max_speech_duration_s = max_speech_s
+
+ self.speech_pad_ms = 300
def segmentAudio(self, audio: bytes):
- audio = np.frombuffer(audio,
+ # Convert audio bytes to numpy array expected by silero-vad
+ audio_array = np.frombuffer(audio,
dtype=np.int16).flatten().astype(np.float32) / 32768.0
- return vad.get_speech_timestamps(audio, vad_options=self.vad_options)
+
+ # Get speech timestamps using silero-vad
+ # Note: silero-vad expects sample rate of 16000 Hz which matches AudioStream.FPS
+ speech_timestamps = get_speech_timestamps(
+ audio_array,
+ self.model,
+ sampling_rate=AudioStream.FPS,
+ threshold=self.vad_threshold,
+ min_silence_duration_ms=self.min_silence_duration_ms,
+ max_speech_duration_s=self.max_speech_duration_s,
+ return_seconds=False # We want frame indices, not seconds
+ )
+
+ return speech_timestamps
# Returns the stable cutoff (if any) and whether there are any segments.
def getStableCutoff(self, audio: bytes) -> typing.Tuple[int, bool]:
- min_delta_frames = int((self.vad_options.min_silence_duration_ms *
+ min_delta_frames = int((self.min_silence_duration_ms *
AudioStream.FPS) / 1000.0)
cutoff = None
@@ -379,8 +406,9 @@ class Whisper:
model_str = cfg["model"]
model_root = os.path.join(parent_dir, "Models",
os.path.normpath(model_str))
- print(f"Model {cfg['model']} will be saved to {model_root}",
- file=sys.stderr)
+ if cfg["enable_debug_mode"]:
+ print(f"Model {cfg['model']} will be saved to {model_root}",
+ file=sys.stderr)
model_device = "cuda"
if cfg["use_cpu"]:
@@ -395,21 +423,42 @@ class Whisper:
download_root = model_root,
local_files_only = already_downloaded)
+ self.context_window_chars = 200 # Keep last 200 chars of context
+ self.recent_context = "" # Store recent committed text
+
+ def update_context(self, committed_text: str):
+ """Update the context with recently committed text."""
+ self.recent_context = (self.recent_context + " " + committed_text).strip()
+ # Keep only the last N characters to avoid prompt getting too long
+ if len(self.recent_context) > self.context_window_chars:
+ self.recent_context = self.recent_context[-self.context_window_chars:]
+
def transcribe(self, frames: bytes = None) -> typing.List[Segment]:
if frames is None:
frames = self.collector.getAudio()
- # Convert from signed 16-bit int [-32768, 32767] to signed 32-bit float on
- # [-1, 1].
+
+ # Convert audio to float32
audio = np.frombuffer(frames,
dtype=np.int16).flatten().astype(np.float32) / 32768.0
+ # Build context-aware prompt
+ prompt = self._build_prompt()
+
t0 = time.time()
segments, info = self.model.transcribe(
audio,
language = langcodes.find(self.cfg["language"]).language,
vad_filter = True,
temperature=0.0,
- without_timestamps = False)
+ without_timestamps = False,
+ initial_prompt=prompt,
+ beam_size=5,
+ best_of=5,
+ condition_on_previous_text=True,
+ compression_ratio_threshold=2.4,
+ log_prob_threshold=-1.0,
+ no_speech_threshold=0.6
+ )
res = []
for s in segments:
# Manual touchup. I see a decent number of hallucinations sneaking
@@ -445,6 +494,17 @@ class Whisper:
print(f"Transcription latency (s): {t1 - t0}")
return res
+ def _build_prompt(self) -> str:
+ """Build a context-aware prompt for Whisper."""
+ user_prompt = self.cfg["user_prompt"]
+ context_prompt = ""
+ if self.recent_context and len(self.recent_context) > 0:
+ context_prompt = f"Here is the context so far: {self.recent_context}"
+
+ prompts = [user_prompt, context_prompt]
+ prompts = [p for p in prompts if p and len(p) > 0]
+ return " ".join(prompts)
+
class TranscriptCommit:
def __init__(self,
delta: str,
@@ -502,10 +562,21 @@ class VadCommitter:
latency_s = self.collector.now() - self.collector.begin()
duration_s = stable_cutoff / AudioStream.FPS
start_ts = self.collector.begin()
- commit_audio = self.collector.dropAudioPrefixByFrames(stable_cutoff)
+
+ # Get the filtered audio first, then extract the portion we need
+ filtered_audio = self.collector.getAudio()
+ commit_audio = filtered_audio[:stable_cutoff * AudioStream.FRAME_SZ]
+
+ # Now drop the prefix from the collector
+ self.collector.dropAudioPrefixByFrames(stable_cutoff)
segments = self.whisper.transcribe(commit_audio)
delta = ''.join(s.transcript for s in segments)
+
+ # Update whisper's context with the committed text
+ if delta.strip():
+ self.whisper.update_context(delta.strip())
+
audio = self.collector.getAudio()
if self.cfg["enable_debug_mode"]:
for s in segments:
@@ -540,11 +611,11 @@ class VadCommitter:
def transcriptionThread(shared_data: SharedThreadData):
last_stable_commit = None
- stream = MicStream(shared_data.cfg["microphone"])
+ stream = MicStream(shared_data.cfg)
collector = AudioCollector(stream)
collector = CompressingAudioCollector(collector)
+ collector = BoostingAudioCollector(collector, -12.0, shared_data.cfg)
collector = NormalizingAudioCollector(collector)
- collector = BoostingAudioCollector(collector, 0.0, shared_data.cfg)
whisper = Whisper(collector, shared_data.cfg)
segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"],
max_speech_s=shared_data.cfg["max_speech_duration_s"])
@@ -553,6 +624,8 @@ def transcriptionThread(shared_data: SharedThreadData):
transcript = ""
preview = ""
+ print(f"Ready to go!", flush=True)
+
while not shared_data.exit_event.is_set():
time.sleep(shared_data.cfg["transcription_loop_delay_ms"] / 1000.0);
@@ -561,8 +634,7 @@ def transcriptionThread(shared_data: SharedThreadData):
commit = committer.getDelta()
if len(commit.delta) > 0 or len(commit.preview) > 0:
- # Avoid re-sending text after long pauses. User controls the length
- # of the pause in the UI.
+ # Avoid re-sending text after long pauses
if shared_data.cfg["reset_after_silence_s"] > 0:
silence_duration = 0
if last_stable_commit:
@@ -571,10 +643,12 @@ def transcriptionThread(shared_data: SharedThreadData):
last_stable_commit.duration_s
silence_duration = commit.start_ts - last_commit_end_ts
if silence_duration > shared_data.cfg["reset_after_silence_s"]:
- print(f"Resetting transcript after {silence_duration}-second "
- "silence", file=sys.stderr)
+ if shared_data.cfg["enable_debug_mode"]:
+ print(f"Resetting transcript after {silence_duration}-second "
+ "silence", file=sys.stderr)
transcript = ""
preview = ""
+ whisper.recent_context = "" # Reset context too
if commit.delta:
last_stable_commit = commit
diff --git a/app/vad.py b/app/vad.py
deleted file mode 100644
index 1dea765..0000000
--- a/app/vad.py
+++ /dev/null
@@ -1,314 +0,0 @@
-# MIT License
-#
-# Copyright (c) 2023 Guillaume Klein
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import bisect
-import functools
-import os
-import warnings
-
-from typing import List, NamedTuple, Optional
-
-import numpy as np
-
-
-# The code below is adapted from https://github.com/snakers4/silero-vad.
-class VadOptions(NamedTuple):
- """VAD options.
-
- Attributes:
- threshold: Speech threshold. Silero VAD outputs speech probabilities for each audio chunk,
- probabilities ABOVE this value are considered as SPEECH. It is better to tune this
- parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets.
- min_speech_duration_ms: Final speech chunks shorter min_speech_duration_ms are thrown out.
- max_speech_duration_s: Maximum duration of speech chunks in seconds. Chunks longer
- than max_speech_duration_s will be split at the timestamp of the last silence that
- lasts more than 100ms (if any), to prevent aggressive cutting. Otherwise, they will be
- split aggressively just before max_speech_duration_s.
- min_silence_duration_ms: In the end of each speech chunk wait for min_silence_duration_ms
- before separating it
- window_size_samples: Audio chunks of window_size_samples size are fed to the silero VAD model.
- WARNING! Silero VAD models were trained using 512, 1024, 1536 samples for 16000 sample rate.
- Values other than these may affect model performance!!
- speech_pad_ms: Final speech chunks are padded by speech_pad_ms each side
- """
-
- threshold: float = 0.5
- min_speech_duration_ms: int = 250
- max_speech_duration_s: float = float("inf")
- min_silence_duration_ms: int = 2000
- window_size_samples: int = 1024
- speech_pad_ms: int = 400
-
-
-def get_speech_timestamps(
- audio: np.ndarray,
- vad_options: Optional[VadOptions] = None,
- **kwargs,
-) -> List[dict]:
- """This method is used for splitting long audios into speech chunks using silero VAD.
-
- Args:
- audio: One dimensional float array.
- vad_options: Options for VAD processing.
- kwargs: VAD options passed as keyword arguments for backward compatibility.
-
- Returns:
- List of dicts containing begin and end samples of each speech chunk.
- """
- if vad_options is None:
- vad_options = VadOptions(**kwargs)
-
- threshold = vad_options.threshold
- min_speech_duration_ms = vad_options.min_speech_duration_ms
- max_speech_duration_s = vad_options.max_speech_duration_s
- min_silence_duration_ms = vad_options.min_silence_duration_ms
- window_size_samples = vad_options.window_size_samples
- speech_pad_ms = vad_options.speech_pad_ms
-
- if window_size_samples not in [512, 1024, 1536]:
- warnings.warn(
- "Unusual window_size_samples! Supported window_size_samples:\n"
- " - [512, 1024, 1536] for 16000 sampling_rate"
- )
-
- sampling_rate = 16000
- min_speech_samples = sampling_rate * min_speech_duration_ms / 1000
- speech_pad_samples = sampling_rate * speech_pad_ms / 1000
- max_speech_samples = (
- sampling_rate * max_speech_duration_s
- - window_size_samples
- - 2 * speech_pad_samples
- )
- min_silence_samples = sampling_rate * min_silence_duration_ms / 1000
- min_silence_samples_at_max_speech = sampling_rate * 98 / 1000
-
- audio_length_samples = len(audio)
-
- model = get_vad_model()
- state = model.get_initial_state(batch_size=1)
-
- speech_probs = []
- for current_start_sample in range(0, audio_length_samples, window_size_samples):
- chunk = audio[current_start_sample : current_start_sample + window_size_samples]
- if len(chunk) < window_size_samples:
- chunk = np.pad(chunk, (0, int(window_size_samples - len(chunk))))
- speech_prob, state = model(chunk, state, sampling_rate)
- speech_probs.append(speech_prob)
-
- triggered = False
- speeches = []
- current_speech = {}
- neg_threshold = threshold - 0.15
-
- # to save potential segment end (and tolerate some silence)
- temp_end = 0
- # to save potential segment limits in case of maximum segment size reached
- prev_end = next_start = 0
-
- for i, speech_prob in enumerate(speech_probs):
- if (speech_prob >= threshold) and temp_end:
- temp_end = 0
- if next_start < prev_end:
- next_start = window_size_samples * i
-
- if (speech_prob >= threshold) and not triggered:
- triggered = True
- current_speech["start"] = window_size_samples * i
- continue
-
- if (
- triggered
- and (window_size_samples * i) - current_speech["start"] > max_speech_samples
- ):
- if prev_end:
- current_speech["end"] = prev_end
- speeches.append(current_speech)
- current_speech = {}
- # previously reached silence (< neg_thres) and is still not speech (< thres)
- if next_start < prev_end:
- triggered = False
- else:
- current_speech["start"] = next_start
- prev_end = next_start = temp_end = 0
- else:
- current_speech["end"] = window_size_samples * i
- speeches.append(current_speech)
- current_speech = {}
- prev_end = next_start = temp_end = 0
- triggered = False
- continue
-
- if (speech_prob < neg_threshold) and triggered:
- if not temp_end:
- temp_end = window_size_samples * i
- # condition to avoid cutting in very short silence
- if (window_size_samples * i) - temp_end > min_silence_samples_at_max_speech:
- prev_end = temp_end
- if (window_size_samples * i) - temp_end < min_silence_samples:
- continue
- else:
- current_speech["end"] = temp_end
- if (
- current_speech["end"] - current_speech["start"]
- ) > min_speech_samples:
- speeches.append(current_speech)
- current_speech = {}
- prev_end = next_start = temp_end = 0
- triggered = False
- continue
-
- if (
- current_speech
- and (audio_length_samples - current_speech["start"]) > min_speech_samples
- ):
- current_speech["end"] = audio_length_samples
- speeches.append(current_speech)
-
- for i, speech in enumerate(speeches):
- if i == 0:
- speech["start"] = int(max(0, speech["start"] - speech_pad_samples))
- if i != len(speeches) - 1:
- silence_duration = speeches[i + 1]["start"] - speech["end"]
- if silence_duration < 2 * speech_pad_samples:
- speech["end"] += int(silence_duration // 2)
- speeches[i + 1]["start"] = int(
- max(0, speeches[i + 1]["start"] - silence_duration // 2)
- )
- else:
- speech["end"] = int(
- min(audio_length_samples, speech["end"] + speech_pad_samples)
- )
- speeches[i + 1]["start"] = int(
- max(0, speeches[i + 1]["start"] - speech_pad_samples)
- )
- else:
- speech["end"] = int(
- min(audio_length_samples, speech["end"] + speech_pad_samples)
- )
-
- return speeches
-
-
-def collect_chunks(audio: np.ndarray, chunks: List[dict]) -> np.ndarray:
- """Collects and concatenates audio chunks."""
- if not chunks:
- return np.array([], dtype=np.float32)
-
- return np.concatenate([audio[chunk["start"] : chunk["end"]] for chunk in chunks])
-
-
-class SpeechTimestampsMap:
- """Helper class to restore original speech timestamps."""
-
- def __init__(self, chunks: List[dict], sampling_rate: int, time_precision: int = 2):
- self.sampling_rate = sampling_rate
- self.time_precision = time_precision
- self.chunk_end_sample = []
- self.total_silence_before = []
-
- previous_end = 0
- silent_samples = 0
-
- for chunk in chunks:
- silent_samples += chunk["start"] - previous_end
- previous_end = chunk["end"]
-
- self.chunk_end_sample.append(chunk["end"] - silent_samples)
- self.total_silence_before.append(silent_samples / sampling_rate)
-
- def get_original_time(
- self,
- time: float,
- chunk_index: Optional[int] = None,
- ) -> float:
- if chunk_index is None:
- chunk_index = self.get_chunk_index(time)
-
- total_silence_before = self.total_silence_before[chunk_index]
- return round(total_silence_before + time, self.time_precision)
-
- def get_chunk_index(self, time: float) -> int:
- sample = int(time * self.sampling_rate)
- return min(
- bisect.bisect(self.chunk_end_sample, sample),
- len(self.chunk_end_sample) - 1,
- )
-
-
-@functools.lru_cache
-def get_vad_model():
- """Returns the VAD model instance."""
- abspath = os.path.abspath(__file__)
- my_dir = os.path.dirname(abspath)
- parent_dir = os.path.dirname(my_dir)
- path = os.path.join(parent_dir, "Models", "silero_vad.onnx")
- return SileroVADModel(path)
-
-
-class SileroVADModel:
- def __init__(self, path):
- try:
- import onnxruntime
- except ImportError as e:
- raise RuntimeError(
- "Applying the VAD filter requires the onnxruntime package"
- ) from e
-
- opts = onnxruntime.SessionOptions()
- opts.inter_op_num_threads = 1
- opts.intra_op_num_threads = 1
- opts.log_severity_level = 4
-
- self.session = onnxruntime.InferenceSession(
- path,
- providers=["CPUExecutionProvider"],
- sess_options=opts,
- )
-
- def get_initial_state(self, batch_size: int):
- h = np.zeros((2, batch_size, 64), dtype=np.float32)
- c = np.zeros((2, batch_size, 64), dtype=np.float32)
- return h, c
-
- def __call__(self, x, state, sr: int):
- if len(x.shape) == 1:
- x = np.expand_dims(x, 0)
- if len(x.shape) > 2:
- raise ValueError(
- f"Too many dimensions for input audio chunk {len(x.shape)}"
- )
- if sr / x.shape[1] > 31.25:
- raise ValueError("Input audio chunk is too short")
-
- h, c = state
-
- ort_inputs = {
- "input": x,
- "h": h,
- "c": c,
- "sr": np.array(sr, dtype="int64"),
- }
-
- out, h, c = self.session.run(None, ort_inputs)
- state = (h, c)
-
- return out, state
diff --git a/config.yaml b/config.yaml
index 34d88f1..5eec7a2 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,6 +1,7 @@
compute_type: float16
enable_debug_mode: 0
enable_previews: 1
+user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc.
save_audio: 0
language: english
gpu_idx: 0
diff --git a/ui/index.html b/ui/index.html
index b06e56b..90f78c1 100644
--- a/ui/index.html
+++ b/ui/index.html
@@ -10,179 +10,229 @@
<div class="container-fluid px-6 py-6 h-screen flex flex-col">
<div class="flex flex-1 gap-6 overflow-hidden">
<!-- Left Panel: Configuration Form -->
- <div class="max-w-96 overflow-y-auto">
- <form id="config-form" class="space-y-6 pr-3">
- <!-- Basic settings (Always Visible) -->
- <section class="config-section">
- <div class="grid grid-cols-2 gap-4">
- <div>
- <label for="model" class="form-label">Model</label>
- <select id="model" class="form-input">
- <option value="tiny">tiny</option>
- <option value="base">base</option>
- <option value="small">small</option>
- <option value="medium">medium</option>
- <option value="large">large</option>
- <option value="turbo">turbo</option>
- </select>
- </div>
- <div>
- <label for="language" class="form-label">Language</label>
- <select id="language" class="form-input">
- <option value="english">English</option>
- <option value="spanish">Spanish</option>
- <option value="french">French</option>
- <option value="german">German</option>
- <option value="italian">Italian</option>
- <option value="portuguese">Portuguese</option>
- <option value="russian">Russian</option>
- <option value="chinese">Chinese</option>
- <option value="japanese">Japanese</option>
- <option value="korean">Korean</option>
- </select>
- </div>
- <div class="col-span-2">
- <label for="microphone" class="form-label">Microphone</label>
- <select id="microphone" class="form-input">
- <option value="">Loading microphones...</option>
- </select>
- </div>
- </div>
- </section>
-
- <!-- Advanced settings toggle -->
- <button type="button" id="toggle-advanced" class="flex items-center gap-2 text-gray-600 hover:text-gray-800 font-medium">
- <svg id="chevron" class="w-5 h-5 transform transition-transform duration-200" fill="none" stroke="currentColor" viewBox="0 0 24 24">
- <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7"/>
- </svg>
- Advanced Settings
- </button>
-
- <!-- Advanced settings (initially hidden) -->
- <div id="advanced-settings" class="hidden space-y-6">
- <!-- Compute Settings -->
+ <div class="max-w-96 relative flex flex-col">
+ <!-- Loading Overlay -->
+ <div id="loading-overlay" class="absolute inset-0 bg-white bg-opacity-75 backdrop-blur-sm z-50 hidden flex items-center justify-center rounded-lg">
+ <div class="text-center p-6">
+ <div class="animate-spin rounded-full h-12 w-12 border-b-2 border-blue-600 mx-auto mb-4"></div>
+ <p class="text-gray-700 font-medium"></p>
+ </div>
+ </div>
+
+ <!-- Scrollable form container -->
+ <div class="overflow-y-auto flex-1">
+ <form id="config-form" class="space-y-6 pr-3">
+ <!-- Basic settings (Always Visible) -->
<section class="config-section">
- <h2 class="section-title">Compute Settings</h2>
<div class="grid grid-cols-2 gap-4">
<div>
- <label for="compute_type" class="form-label">Compute Type</label>
- <select id="compute_type" class="form-input">
- <option value="int8">int8</option>
- <option value="float16">float16</option>
- <option value="float32">float32</option>
+ <label for="model" class="form-label">Model</label>
+ <select id="model" class="form-input">
+ <option value="tiny">tiny</option>
+ <option value="base">base</option>
+ <option value="small">small</option>
+ <option value="medium">medium</option>
+ <option value="large">large</option>
+ <option value="turbo">turbo</option>
</select>
</div>
<div>
- <label for="gpu_idx" class="form-label">GPU Index</label>
- <input type="number" id="gpu_idx" min="0" value="0" class="form-input">
+ <label for="language" class="form-label">Language</label>
+ <select id="language" class="form-input">
+ <option value="english">English</option>
+ <option value="spanish">Spanish</option>
+ <option value="french">French</option>
+ <option value="german">German</option>
+ <option value="italian">Italian</option>
+ <option value="portuguese">Portuguese</option>
+ <option value="russian">Russian</option>
+ <option value="chinese">Chinese</option>
+ <option value="japanese">Japanese</option>
+ <option value="korean">Korean</option>
+ </select>
</div>
<div class="col-span-2">
- <label for="use_cpu" class="checkbox-label">
- <input type="checkbox" id="use_cpu" class="mr-2">
- <span class="checkbox-text">Use CPU</span>
- </label>
+ <label for="microphone" class="form-label">Microphone</label>
+ <div class="flex gap-2">
+ <select id="microphone" class="form-input flex-1">
+ <option value="">Loading microphones...</option>
+ </select>
+ <button type="button" id="refresh-microphones" class="btn btn-gray px-3 py-2 flex items-center" title="Refresh microphone list">
+ <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15"/>
+ </svg>
+ </button>
+ </div>
</div>
</div>
</section>
- <!-- Audio Settings -->
- <section class="config-section">
- <h2 class="section-title">Audio Settings</h2>
- <div class="grid grid-cols-2 gap-4">
- <div>
- <label for="max_speech_duration_s" class="form-label">Max Speech Duration (seconds)</label>
- <input type="number" id="max_speech_duration_s" min="1" value="10" class="form-input">
+ <!-- Advanced settings toggle -->
+ <button type="button" id="toggle-advanced" class="flex items-center gap-2 text-gray-600 hover:text-gray-800 font-medium">
+ <svg id="chevron" class="w-5 h-5 transform transition-transform duration-200" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7"/>
+ </svg>
+ Advanced Settings
+ </button>
+
+ <!-- Advanced settings (initially hidden) -->
+ <div id="advanced-settings" class="hidden space-y-6">
+ <!-- Compute Settings -->
+ <section class="config-section">
+ <h2 class="section-title">Compute Settings</h2>
+ <div class="grid grid-cols-2 gap-4">
+ <div>
+ <label for="compute_type" class="form-label">Compute Type</label>
+ <select id="compute_type" class="form-input">
+ <option value="int8">int8</option>
+ <option value="float16">float16</option>
+ <option value="float32">float32</option>
+ </select>
+ </div>
+ <div>
+ <label for="gpu_idx" class="form-label">GPU Index</label>
+ <input type="number" id="gpu_idx" min="0" value="0" class="form-input">
+ </div>
+ <div class="col-span-2">
+ <label for="use_cpu" class="checkbox-label">
+ <input type="checkbox" id="use_cpu" class="mr-2">
+ <span class="checkbox-text">Use CPU</span>
+ </label>
+ </div>
</div>
- <div>
- <label for="min_silence_duration_ms" class="form-label">Min Silence Duration (ms)</label>
- <input type="number" id="min_silence_duration_ms" min="0" value="250" class="form-input">
+ </section>
+
+ <!-- Audio Settings -->
+ <section class="config-section">
+ <h2 class="section-title">Voice Activity Detection</h2>
+ <div class="grid grid-cols-2 gap-4">
+ <div>
+ <label for="max_speech_duration_s" class="form-label">Max Speech Duration (seconds)</label>
+ <input type="number" id="max_speech_duration_s" min="1" value="10" class="form-input">
+ </div>
+ <div>
+ <label for="min_silence_duration_ms" class="form-label">Min Silence Duration (ms)</label>
+ <input type="number" id="min_silence_duration_ms" min="0" value="250" class="form-input">
+ </div>
+ <div>
+ <label for="reset_after_silence_s" class="form-label">Reset After Silence (seconds)</label>
+ <input type="number" id="reset_after_silence_s" min="1" value="15" class="form-input">
+ </div>
</div>
+ </section>
+
+ <!-- Transcription Settings -->
+ <section class="config-section">
+ <h2 class="section-title">Transcription Settings</h2>
<div>
- <label for="reset_after_silence_s" class="form-label">Reset After Silence (seconds)</label>
- <input type="number" id="reset_after_silence_s" min="1" value="15" class="form-input">
+ <label for="user_prompt" class="form-label">
+ Custom Prompt
+ <span class="text-gray-500 text-xs block mt-1"
+ title="Whisper is given this prompt before transcribing. It helps guide the transcription style. For example, you could improve the spelling of your friends' names with: 'My friends' names are Saoirse, Azariah, and Caoimhe.'">
+ (Hover for details)
+ </span>
+ </label>
+ <textarea id="user_prompt"
+ class="form-input h-20 resize-none"
+ placeholder="My friends' names are Saoirse, Azariah, and Caoimhe."></textarea>
</div>
- </div>
- </section>
-
- <!-- Performance Settings -->
- <section class="config-section">
- <h2 class="section-title">Performance Settings</h2>
- <div>
- <label for="transcription_loop_delay_ms" class="form-label">Transcription Loop Delay (ms)</label>
- <input type="number" id="transcription_loop_delay_ms" min="0" value="100" class="form-input">
- </div>
- </section>
+ </section>
- <!-- Debug/Preview Settings -->
- <section class="config-section">
- <h2 class="section-title">Debug/Preview Settings</h2>
- <div class="space-y-3">
- <label for="enable_debug_mode" class="checkbox-label">
- <input type="checkbox" id="enable_debug_mode" class="mr-2">
- <span class="checkbox-text">Enable Debug Mode</span>
- </label>
- <label for="enable_previews" class="checkbox-label">
- <input type="checkbox" id="enable_previews" checked class="mr-2">
- <span class="checkbox-text">Enable Previews</span>
- </label>
- <label for="save_audio" class="checkbox-label">
- <input type="checkbox" id="save_audio" class="mr-2">
- <span class="checkbox-text">Save Audio Segments</span>
- </label>
- </div>
- </section>
-
- <!-- Display Settings -->
- <section class="config-section">
- <h2 class="section-title">Display Settings</h2>
- <div class="grid grid-cols-2 gap-4">
+ <!-- Performance Settings -->
+ <section class="config-section">
+ <h2 class="section-title">Performance Settings</h2>
<div>
- <label for="block_width" class="form-label">Block Width</label>
- <input type="number" id="block_width" min="1" value="2" class="form-input">
+ <label for="transcription_loop_delay_ms" class="form-label">Transcription Loop Delay (ms)</label>
+ <input type="number" id="transcription_loop_delay_ms" min="0" value="100" class="form-input">
</div>
- <div>
- <label for="num_blocks" class="form-label">Number of Blocks</label>
- <input type="number" id="num_blocks" min="1" value="40" class="form-input">
+ </section>
+
+ <!-- Debug/Preview Settings -->
+ <section class="config-section">
+ <h2 class="section-title">Debug/Preview Settings</h2>
+ <div class="space-y-3">
+ <label for="enable_debug_mode" class="checkbox-label">
+ <input type="checkbox" id="enable_debug_mode" class="mr-2">
+ <span class="checkbox-text">Enable Debug Mode</span>
+ </label>
+ <label for="enable_previews" class="checkbox-label">
+ <input type="checkbox" id="enable_previews" checked class="mr-2">
+ <span class="checkbox-text">Enable Previews</span>
+ </label>
+ <label for="save_audio" class="checkbox-label">
+ <input type="checkbox" id="save_audio" class="mr-2">
+ <span class="checkbox-text">Save Audio Segments</span>
+ </label>
</div>
- <div>
- <label for="rows" class="form-label">Rows</label>
- <input type="number" id="rows" min="1" value="10" class="form-input">
+ </section>
+
+ <!-- Display Settings -->
+ <section class="config-section">
+ <h2 class="section-title">Custom Chatbox Settings</h2>
+ <div class="grid grid-cols-2 gap-4">
+ <div>
+ <label for="block_width" class="form-label">Block Width</label>
+ <input type="number" id="block_width" min="1" value="2" class="form-input">
+ </div>
+ <div>
+ <label for="num_blocks" class="form-label">Number of Blocks</label>
+ <input type="number" id="num_blocks" min="1" value="40" class="form-input">
+ </div>
+ <div>
+ <label for="rows" class="form-label">Rows</label>
+ <input type="number" id="rows" min="1" value="10" class="form-input">
+ </div>
+ <div>
+ <label for="cols" class="form-label">Columns</label>
+ <input type="number" id="cols" min="1" value="24" class="form-input">
+ </div>
</div>
+ </section>
+
+ <!-- Configuration Settings -->
+ <section class="config-section">
+ <h2 class="section-title">Configuration</h2>
<div>
- <label for="cols" class="form-label">Columns</label>
- <input type="number" id="cols" min="1" value="24" class="form-input">
+ <button type="button" id="reset-config" class="btn btn-blue w-full">
+ Reset Config to Defaults
+ </button>
</div>
- </div>
- </section>
- </div>
+ </section>
- <!-- Action Buttons -->
- <div class="flex justify-between pb-6">
- <div class="space-x-3">
- <button type="button" id="setup-venv" class="btn btn-blue">
- Set up virtual environment
- </button>
- <button type="button" id="start-process" class="btn btn-green">
- Start
- </button>
- <button type="button" id="stop-process" class="btn btn-red" disabled>
- Stop
- </button>
+ <!-- Virtual Environment Settings -->
+ <section class="config-section">
+ <h2 class="section-title">Virtual Environment</h2>
+ <div class="flex space-x-3">
+ <button type="button" id="setup-venv" class="btn btn-blue flex-1">
+ Setup venv
+ </button>
+ <button type="button" id="reset-venv" class="btn btn-blue flex-1">
+ Reset venv
+ </button>
+ </div>
+ </section>
</div>
- </div>
- </form>
- <!-- Status Message -->
- <div id="status-message" class="mt-6 p-4 rounded-md hidden"></div>
+ <!-- Action Buttons -->
+ <div class="pb-6">
+ <div class="flex space-x-3">
+ <button type="button" id="start-process" class="btn btn-green flex-1">
+ Start
+ </button>
+ <button type="button" id="stop-process" class="btn btn-red flex-1" disabled>
+ Stop
+ </button>
+ </div>
+ </div>
+ </form>
+
+ <!-- Status Message -->
+ <div id="status-message" class="mt-6 p-4 rounded-md hidden"></div>
+ </div>
</div>
<!-- Right Panel: Python Console -->
<div class="flex-1 flex flex-col bg-gray-900 rounded-lg overflow-hidden">
- <div class="bg-gray-800 px-4 py-2 flex justify-between items-center">
- <button id="clear-console" class="text-gray-400 hover:text-white text-sm">
- Clear
- </button>
- </div>
<div id="python-console" class="flex-1 overflow-y-auto p-4 font-mono text-sm">
<div id="console-content" class="text-gray-300 whitespace-pre-wrap"></div>
</div>
diff --git a/ui/index.js b/ui/index.js
index a056156..2420ece 100644
--- a/ui/index.js
+++ b/ui/index.js
@@ -3,6 +3,7 @@ const path = require('node:path');
const fs = require('node:fs').promises;
const yaml = require('js-yaml');
const { spawn } = require('child_process');
+const https = require('https');
const APP_ROOT = path.join(__dirname, '..');
const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml');
@@ -10,6 +11,20 @@ const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml');
let mainWindow;
let runningProcess = null; // Track the running Python process
+// Required DLL files for CUDA/cuDNN support
+const REQUIRED_DLLS = [
+ 'cublas64_12.dll',
+ 'cublasLt64_12.dll',
+ 'cudnn64_9.dll',
+ 'cudnn_adv64_9.dll',
+ 'cudnn_cnn64_9.dll',
+ 'cudnn_engines_precompiled64_9.dll',
+ 'cudnn_engines_runtime_compiled64_9.dll',
+ 'cudnn_graph64_9.dll',
+ 'cudnn_heuristic64_9.dll',
+ 'cudnn_ops64_9.dll'
+];
+
// Helper function to get the correct Python executable from venv
function getVenvPython() {
const venvPath = path.join(APP_ROOT, 'venv');
@@ -24,6 +39,78 @@ function sendPythonOutput(message, type = 'stdout') {
}
}
+// Helper function to create environment with DLL path
+function createPythonEnvironment() {
+ const dllPath = path.join(APP_ROOT, 'dll');
+ const binPath = path.join(APP_ROOT, 'bin');
+ const env = { ...process.env };
+ env.PATH = `${dllPath};${binPath};${env.PATH}`;
+ env.HF_HUB_DISABLE_SYMLINKS_WARNING = '1';
+ return env;
+}
+
+// Helper function to download a file from URL
+function downloadFile(url, outputPath) {
+ return new Promise((resolve, reject) => {
+ const file = require('fs').createWriteStream(outputPath);
+
+ const request = https.get(url, (response) => {
+ if (response.statusCode === 200) {
+ response.pipe(file);
+
+ file.on('finish', () => {
+ file.close();
+ resolve();
+ });
+
+ file.on('error', (err) => {
+ fs.unlink(outputPath).catch(() => {}); // Clean up on error
+ reject(err);
+ });
+ } else {
+ file.close();
+ fs.unlink(outputPath).catch(() => {}); // Clean up on error
+ reject(new Error(`Failed to download: HTTP ${response.statusCode}`));
+ }
+ });
+
+ request.on('error', (err) => {
+ file.close();
+ fs.unlink(outputPath).catch(() => {}); // Clean up on error
+ reject(err);
+ });
+ });
+}
+
+// Helper function to setup process event handlers
+function setupProcessHandlers(process) {
+ process.stdout.on('data', (data) => {
+ const text = data.toString();
+ sendPythonOutput(text.trimEnd(), 'stdout');
+ });
+
+ process.stderr.on('data', (data) => {
+ const text = data.toString();
+ sendPythonOutput(text.trimEnd(), 'stderr');
+ });
+
+ process.on('error', (error) => {
+ sendPythonOutput(`Process error: ${error.message}`, 'stderr');
+ runningProcess = null;
+ if (mainWindow && !mainWindow.isDestroyed()) {
+ mainWindow.webContents.send('process-stopped');
+ }
+ });
+
+ process.on('close', (code) => {
+ sendPythonOutput(`Process exited with code ${code}`, 'info');
+ runningProcess = null;
+ if (mainWindow && !mainWindow.isDestroyed()) {
+ mainWindow.webContents.send('process-stopped');
+ }
+ });
+}
+
// Helper function to execute Python commands using venv
function executePythonCommand(args, options = {}) {
return new Promise((resolve, reject) => {
@@ -31,14 +118,9 @@ function executePythonCommand(args, options = {}) {
const commandStr = `${path.basename(pythonPath)} ${args.join(' ')}`;
sendPythonOutput(`> ${commandStr}`, 'info');
- // Add dll directory to PATH for Windows DLL loading
- const dllPath = path.join(APP_ROOT, 'dll');
- const env = { ...process.env };
- env.PATH = `${dllPath};${env.PATH}`;
-
const spawnOptions = {
...options,
- env
+ env: createPythonEnvironment()
};
const pythonProcess = spawn(pythonPath, args, spawnOptions);
@@ -78,6 +160,7 @@ function createWindow () {
mainWindow = new BrowserWindow({
width: 1000,
height: 800,
+ icon: path.join(APP_ROOT, 'Images', 'favicon.ico'),
webPreferences: {
preload: path.join(__dirname, 'preload.js'),
contextIsolation: true,
@@ -93,6 +176,7 @@ const DEFAULT_CONFIG = {
compute_type: 'float16',
enable_debug_mode: 0,
enable_previews: 1,
+ user_prompt: 'Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc.',
save_audio: 0,
language: 'english',
gpu_idx: 0,
@@ -117,11 +201,11 @@ ipcMain.handle('load-config', async () => {
} catch (error) {
if (error.code === 'ENOENT') {
// Config file doesn't exist, create it with defaults
- console.log('Config file not found, creating with defaults...');
+ console.error('Config file not found, creating with defaults...');
try {
const yamlContent = yaml.dump(DEFAULT_CONFIG, { lineWidth: -1 });
await fs.writeFile(CONFIG_PATH, yamlContent, 'utf8');
- console.log('Created config.yaml with default values');
+ console.error('Created config.yaml with default values');
return DEFAULT_CONFIG;
} catch (writeError) {
console.error('Error creating default config:', writeError);
@@ -145,21 +229,138 @@ ipcMain.handle('save-config', async (event, config) => {
}
});
-ipcMain.handle('restart-app', () => {
- app.relaunch();
- app.exit();
+ipcMain.handle('reset-config', async () => {
+ try {
+ // Check if the file exists first
+ try {
+ await fs.access(CONFIG_PATH);
+ // File exists, delete it
+ await fs.unlink(CONFIG_PATH);
+ console.error('Config file deleted successfully');
+ return { success: true, message: 'Configuration reset to defaults' };
+ } catch (error) {
+ if (error.code === 'ENOENT') {
+ // Config file doesn't exist, that's fine
+ return { success: true, message: 'Configuration already at defaults' };
+ }
+ throw error;
+ }
+ } catch (error) {
+ console.error('Error resetting config:', error);
+ throw new Error(`Failed to reset configuration: ${error.message}`);
+ }
});
-ipcMain.handle('install-requirements', async (event) => {
+// Generic function to ensure required files are present
+async function ensureRequiredFiles(config) {
+ const {
+ directoryName,
+ requiredFiles,
+ downloadBaseUrl,
+ resourceType
+ } = config;
+
+ const targetPath = path.join(APP_ROOT, directoryName);
+
+ try {
+ // Check if target directory exists, create it if not
+ try {
+ await fs.access(targetPath);
+ sendPythonOutput(`${resourceType} directory exists`, 'info');
+ } catch (error) {
+ if (error.code === 'ENOENT') {
+ sendPythonOutput(`Creating ${resourceType} directory...`, 'info');
+ await fs.mkdir(targetPath, { recursive: true });
+ sendPythonOutput(`${resourceType} directory created`, 'info');
+ } else {
+ throw error;
+ }
+ }
+
+ // Check each required file
+ const missingFiles = [];
+ for (const fileName of requiredFiles) {
+ const filePath = path.join(targetPath, fileName);
+ try {
+ await fs.access(filePath);
+ sendPythonOutput(`✓ ${fileName} exists`, 'info');
+ } catch (error) {
+ if (error.code === 'ENOENT') {
+ missingFiles.push(fileName);
+ sendPythonOutput(`✗ ${fileName} missing`, 'info');
+ } else {
+ throw error;
+ }
+ }
+ }
+
+ // Download missing files
+ if (missingFiles.length > 0) {
+ sendPythonOutput(`Downloading ${missingFiles.length} missing ${resourceType} file${missingFiles.length > 1 ? 's' : ''}...`, 'info');
+
+ for (const fileName of missingFiles) {
+ const filePath = path.join(targetPath, fileName);
+ const downloadUrl = `${downloadBaseUrl}/${fileName}`;
+
+ try {
+ sendPythonOutput(`Downloading ${fileName}...`, 'info');
+ await downloadFile(downloadUrl, filePath);
+ sendPythonOutput(`✓ Downloaded ${fileName}`, 'info');
+ } catch (downloadError) {
+ sendPythonOutput(`✗ Failed to download ${fileName}: ${downloadError.message}`, 'stderr');
+ throw new Error(`Failed to download ${fileName}: ${downloadError.message}`);
+ }
+ }
+
+ sendPythonOutput(`All missing ${resourceType} files downloaded successfully`, 'info');
+ } else {
+ sendPythonOutput(`All required ${resourceType} files are present`, 'info');
+ }
+
+ return {
+ success: true,
+ message: `${resourceType} setup complete. ${missingFiles.length} file${missingFiles.length > 1 ? 's' : ''} downloaded.`,
+ downloadedFiles: missingFiles
+ };
+ } catch (error) {
+ console.error(`Error setting up ${resourceType} files:`, error);
+ throw new Error(`${resourceType} setup failed: ${error.message}`);
+ }
+}
+
+// Update the install-requirements handler
+ipcMain.handle('install-requirements', async () => {
const requirementsPath = path.join(APP_ROOT, 'app', 'requirements.txt');
+ const venvMarkerPath = path.join(APP_ROOT, '.venv_is_set_up');
try {
+ // Check if venv is already set up
+ try {
+ await fs.access(venvMarkerPath);
+ sendPythonOutput('Virtual environment already set up, skipping installation', 'info');
+ return { success: true, message: 'Virtual environment already set up' };
+ } catch (error) {
+ // Marker doesn't exist, proceed with setup
+ }
+
// Check if requirements.txt exists
await fs.access(requirementsPath);
- const result = await executePythonCommand(['-m', 'pip', 'install', '-r', requirementsPath]);
+ await executePythonCommand(['-m', 'pip', 'install', '-r', requirementsPath]);
+
+ await ensureRequiredFiles({
+ directoryName: 'dll',
+ requiredFiles: REQUIRED_DLLS,
+ downloadBaseUrl: 'https://yummers.dev/tastt/dll',
+ resourceType: 'DLL'
+ });
+
+ await fs.mkdir(path.join(APP_ROOT, 'Models'), { recursive: true });
+
+ await fs.writeFile(venvMarkerPath, new Date().toISOString(), 'utf8');
+ sendPythonOutput('Created .venv_is_set_up marker file', 'info');
- return { success: true, message: 'Requirements installed successfully' };
+ return { success: true, message: 'Requirements and dependencies installed successfully' };
} catch (error) {
console.error('Error installing requirements:', error);
if (error.code === 'ENOENT') {
@@ -175,7 +376,6 @@ ipcMain.handle('get-microphones', async () => {
try {
const result = await executePythonCommand([scriptPath]);
const microphones = JSON.parse(result.stdout.trim());
- console.log('Successfully retrieved microphones:', microphones);
return microphones;
} catch (error) {
console.error('Failed to get microphones:', error);
@@ -183,53 +383,135 @@ ipcMain.handle('get-microphones', async () => {
}
});
-// Add handlers for starting and stopping the process
-ipcMain.handle('start-process', async () => {
- if (runningProcess) {
- throw new Error('Process is already running');
+// Helper function to safely delete directory contents
+async function clearDirectory(dirPath, dirName) {
+ try {
+ await fs.access(dirPath);
+ sendPythonOutput(`Clearing ${dirName} directory...`, 'info');
+
+ const files = await fs.readdir(dirPath);
+ let deletedCount = 0;
+
+ for (const file of files) {
+ const filePath = path.join(dirPath, file);
+
+ try {
+ await fs.rm(filePath, { recursive: true, force: true });
+ sendPythonOutput(`✗ Deleted file ${file}`, 'info');
+
+ deletedCount++;
+ } catch (deleteError) {
+ sendPythonOutput(`Warning: Could not delete ${file}: ${deleteError.message}`, 'stderr');
+ // Continue with other files even if one fails
+ }
+ }
+
+ sendPythonOutput(`${dirName} directory cleared`, 'info');
+ return deletedCount;
+ } catch (error) {
+ if (error.code === 'ENOENT') {
+ sendPythonOutput(`${dirName} directory doesn't exist, skipping`, 'info');
+ return 0;
+ } else {
+ sendPythonOutput(`Error clearing ${dirName} directory: ${error.message}`, 'stderr');
+ throw error;
+ }
}
+}
- const scriptPath = path.join(APP_ROOT, 'app', 'hi.py');
- const configPath = CONFIG_PATH;
+ipcMain.handle('reset-venv', async () => {
+ const venvMarkerPath = path.join(APP_ROOT, '.venv_is_set_up');
try {
- const pythonPath = getVenvPython();
- const args = [scriptPath, '--config', configPath];
+ sendPythonOutput('Starting virtual environment reset...', 'info');
- sendPythonOutput(`Starting process: ${path.basename(pythonPath)} ${args.join(' ')}`, 'info');
+ // Delete the venv marker file first
+ try {
+ await fs.unlink(venvMarkerPath);
+ sendPythonOutput('Deleted .venv_is_set_up marker file', 'info');
+ } catch (error) {
+ if (error.code !== 'ENOENT') {
+ sendPythonOutput(`Warning: Could not delete marker file: ${error.message}`, 'stderr');
+ }
+ }
+
+ // Get list of installed packages
+ sendPythonOutput('Getting list of installed packages...', 'info');
+ const freezeResult = await executePythonCommand(['-m', 'pip', 'freeze']);
+ const installedPackages = freezeResult.stdout.trim();
+
+ let uninstalledPackages = [];
+
+ if (!installedPackages) {
+ sendPythonOutput('No packages found to uninstall', 'info');
+ } else {
+ // Parse package names and filter out core packages
+ const packageLines = installedPackages.split('\n').filter(line => line.trim());
+ const packageNames = packageLines
+ .map(line => line.split('==')[0].trim())
+ .filter(name => name && !name.startsWith('#'));
+
+ const corePackages = ['pip', 'setuptools', 'wheel'];
+ const packagesToUninstall = packageNames.filter(name => !corePackages.includes(name.toLowerCase()));
+
+ if (packagesToUninstall.length === 0) {
+ sendPythonOutput('Only core packages found, nothing to uninstall', 'info');
+ } else {
+ sendPythonOutput(`Uninstalling ${packagesToUninstall.length} packages...`, 'info');
+
+ const uninstallArgs = ['-m', 'pip', 'uninstall', '-y', ...packagesToUninstall];
+ await executePythonCommand(uninstallArgs);
+ uninstalledPackages = packagesToUninstall;
+ }
+ }
+
+ // Clear downloaded files
+ sendPythonOutput('Clearing downloaded files...', 'info');
- // Add dll directory to PATH for Windows DLL loading
const dllPath = path.join(APP_ROOT, 'dll');
- const env = { ...process.env };
- env.PATH = `${dllPath};${env.PATH}`;
+ const modelsPath = path.join(APP_ROOT, 'Models');
+ const binPath = path.join(APP_ROOT, 'bin');
- runningProcess = spawn(pythonPath, args, { env });
+ const deletedDlls = await clearDirectory(dllPath, 'DLL');
+ const deletedModels = await clearDirectory(modelsPath, 'Models');
+ const deletedBins = await clearDirectory(binPath, 'Binary');
- runningProcess.stdout.on('data', (data) => {
- const text = data.toString();
- sendPythonOutput(text.trimEnd(), 'stdout');
- });
+ const totalDeletedFiles = deletedDlls + deletedModels + deletedBins;
- runningProcess.stderr.on('data', (data) => {
- const text = data.toString();
- sendPythonOutput(text.trimEnd(), 'stderr');
- });
+ sendPythonOutput('Virtual environment reset successfully!', 'info');
- runningProcess.on('error', (error) => {
- sendPythonOutput(`Process error: ${error.message}`, 'stderr');
- runningProcess = null;
- if (mainWindow && !mainWindow.isDestroyed()) {
- mainWindow.webContents.send('process-stopped');
+ return {
+ success: true,
+ message: `Virtual environment reset complete. Uninstalled ${uninstalledPackages.length} packages and deleted ${totalDeletedFiles} downloaded files.`,
+ uninstalledPackages,
+ deletedFiles: {
+ dlls: deletedDlls,
+ models: deletedModels,
+ binaries: deletedBins,
+ total: totalDeletedFiles
}
- });
+ };
+ } catch (error) {
+ console.error('Error resetting virtual environment:', error);
+ throw new Error(`Virtual environment reset failed: ${error.message}`);
+ }
+});
+
+// Add handlers for starting and stopping the process
+ipcMain.handle('start-process', async () => {
+ if (runningProcess) {
+ throw new Error('Process is already running');
+ }
+
+ const scriptPath = path.join(APP_ROOT, 'app', 'hi.py');
+ const args = [scriptPath, '--config', CONFIG_PATH];
+
+ try {
+ const pythonPath = getVenvPython();
+ sendPythonOutput(`Starting process: ${path.basename(pythonPath)} ${args.join(' ')}`, 'info');
- runningProcess.on('close', (code) => {
- sendPythonOutput(`Process exited with code ${code}`, 'info');
- runningProcess = null;
- if (mainWindow && !mainWindow.isDestroyed()) {
- mainWindow.webContents.send('process-stopped');
- }
- });
+ runningProcess = spawn(pythonPath, args, { env: createPythonEnvironment() });
+ setupProcessHandlers(runningProcess);
return { success: true };
} catch (error) {
@@ -243,7 +525,7 @@ ipcMain.handle('stop-process', async () => {
throw new Error('No process is running');
}
- return new Promise((resolve, reject) => {
+ return new Promise((resolve) => {
let forcefullyKilled = false;
// Set up a timeout to force kill after 10 seconds
diff --git a/ui/package.json b/ui/package.json
index fee2d67..3a58298 100644
--- a/ui/package.json
+++ b/ui/package.json
@@ -3,12 +3,85 @@
"version": "1.0.0",
"description": "Speech-to-text tool for VRChat",
"main": "index.js",
+ "homepage": "./",
"scripts": {
"start": "npm run build:css && electron .",
"build:css": "tailwindcss -i ./src/components.css -o ./build/output.css",
"watch:css": "tailwindcss -i ./src/components.css -o ./build/output.css --watch",
"dev": "concurrently \"npm run watch:css\" \"electron .\"",
- "test": "echo \"Error: no test specified\" && exit 1"
+ "test": "echo \"Error: no test specified\" && exit 1",
+ "dist": "npm run build:css && electron-builder",
+ "dist:win": "npm run build:css && electron-builder --win",
+ "dist:portable": "npm run build:css && electron-builder --win portable",
+ "dist:zip": "npm run build:css && electron-builder --win zip"
+ },
+ "build": {
+ "appId": "com.yum_food.tastt",
+ "productName": "TaSTT",
+ "directories": {
+ "output": "dist"
+ },
+ "files": [
+ "**/*",
+ "!dist/**/*",
+ "!src/**/*",
+ "!node_modules/**/{CHANGELOG.md,README.md,README,readme.md,readme}",
+ "!node_modules/**/{test,__tests__,tests,powered-test,example,examples}",
+ "!node_modules/**/*.d.ts",
+ "!node_modules/.bin",
+ "!.git/**/*",
+ "!.gitignore"
+ ],
+ "extraResources": [
+ {
+ "from": "../app",
+ "to": "app",
+ "filter": [
+ "**/*.py",
+ "requirements.txt",
+ "!**/__pycache__/**/*"
+ ]
+ },
+ {
+ "from": "../config.yaml",
+ "to": "config.yaml"
+ },
+ {
+ "from": "../dll",
+ "to": "dll",
+ "filter": ["**/*"]
+ },
+ {
+ "from": "../Images",
+ "to": "Images",
+ "filter": ["**/*"]
+ },
+ {
+ "from": "../bin",
+ "to": "bin",
+ "filter": ["**/*"]
+ }
+ ],
+ "win": {
+ "icon": "../Images/logo.png",
+ "target": [
+ {
+ "target": "portable",
+ "arch": ["x64"]
+ },
+ {
+ "target": "zip",
+ "arch": ["x64"]
+ }
+ ]
+ },
+ "portable": {
+ "artifactName": "${productName}-${version}-portable.exe"
+ },
+ "nsis": {
+ "oneClick": false,
+ "allowToChangeInstallationDirectory": true
+ }
},
"keywords": [],
"author": "yum_food",
@@ -22,6 +95,7 @@
"concurrently": "^9.1.2",
"cross-env": "^7.0.3",
"electron": "^36.3.2",
+ "electron-builder": "^25.1.8",
"postcss": "^8.5.4",
"tailwindcss": "^3.4.17",
"vite": "^6.3.5",
diff --git a/ui/preload.js b/ui/preload.js
index e6c0623..35cc8d6 100644
--- a/ui/preload.js
+++ b/ui/preload.js
@@ -3,14 +3,13 @@ const { contextBridge, ipcRenderer } = require('electron');
contextBridge.exposeInMainWorld('electronAPI', {
loadConfig: () => ipcRenderer.invoke('load-config'),
saveConfig: (config) => ipcRenderer.invoke('save-config', config),
- restartApp: () => ipcRenderer.invoke('restart-app'),
+ resetConfig: () => ipcRenderer.invoke('reset-config'),
getMicrophones: () => ipcRenderer.invoke('get-microphones'),
installRequirements: () => ipcRenderer.invoke('install-requirements'),
+ resetVenv: () => ipcRenderer.invoke('reset-venv'),
startProcess: () => ipcRenderer.invoke('start-process'),
stopProcess: () => ipcRenderer.invoke('stop-process'),
onPythonOutput: (callback) => ipcRenderer.on('python-output', (event, data) => callback(data)),
- onProcessStopped: (callback) => ipcRenderer.on('process-stopped', (event) => callback())
+ onProcessStopped: (callback) => ipcRenderer.on('process-stopped', () => callback())
});
-console.log('Preload script loaded.');
-
diff --git a/ui/renderer.js b/ui/renderer.js
index b3f05a6..201eef6 100644
--- a/ui/renderer.js
+++ b/ui/renderer.js
@@ -1,99 +1,220 @@
-// Handle status messages
+// Configuration and form field mappings
+const CONFIG_FIELDS = {
+ // String fields
+ compute_type: { type: 'select', default: 'float16' },
+ language: { type: 'select', default: 'english' },
+ model: { type: 'select', default: 'turbo' },
+ microphone: { type: 'number', default: 0 },
+ user_prompt: { type: 'text', default: '' },
+
+ // Number fields
+ gpu_idx: { type: 'number', default: 0 },
+ max_speech_duration_s: { type: 'number', default: 10 },
+ min_silence_duration_ms: { type: 'number', default: 250 },
+ reset_after_silence_s: { type: 'number', default: 15 },
+ transcription_loop_delay_ms: { type: 'number', default: 100 },
+ block_width: { type: 'number', default: 2 },
+ num_blocks: { type: 'number', default: 40 },
+ rows: { type: 'number', default: 10 },
+ cols: { type: 'number', default: 24 },
+
+ // Boolean fields (stored as 1/0)
+ enable_debug_mode: { type: 'boolean', default: 0 },
+ enable_previews: { type: 'boolean', default: 1 },
+ save_audio: { type: 'boolean', default: 0 },
+ use_cpu: { type: 'boolean', default: 0 }
+};
+
+// Button management system
+class ButtonManager {
+ constructor() {
+ this.buttons = {
+ start: document.getElementById('start-process'),
+ stop: document.getElementById('stop-process'),
+ setupVenv: document.getElementById('setup-venv'),
+ resetVenv: document.getElementById('reset-venv'),
+ refreshMicrophones: document.getElementById('refresh-microphones')
+ };
+ }
+
+ setState(buttonName, disabled) {
+ const button = this.buttons[buttonName];
+ if (!button) return;
+
+ button.disabled = disabled;
+ if (disabled) {
+ button.classList.add('opacity-50', 'cursor-not-allowed');
+ } else {
+ button.classList.remove('opacity-50', 'cursor-not-allowed');
+ }
+ }
+
+ setProcessRunning() {
+ this.setState('start', true);
+ this.setState('stop', false);
+ }
+
+ setProcessStopped() {
+ this.setState('start', false);
+ this.setState('stop', true);
+ }
+
+ async withButtonLoading(buttonName, asyncFn) {
+ this.setState(buttonName, true);
+ try {
+ return await asyncFn();
+ } finally {
+ this.setState(buttonName, false);
+ }
+ }
+}
+
+const buttonManager = new ButtonManager();
+
+// Add loading overlay management
+class LoadingOverlay {
+ constructor() {
+ this.overlay = document.getElementById('loading-overlay');
+ this.form = document.getElementById('config-form');
+ this.messageElement = this.overlay.querySelector('p');
+ this.defaultMessage = 'Environment setup underway - please wait.';
+ }
+
+ show(message = null) {
+ this.messageElement.textContent = message || this.defaultMessage;
+ this.overlay.classList.remove('hidden');
+ // Disable all form inputs and buttons in the entire left panel
+ const leftPanel = this.overlay.parentElement;
+ const inputs = leftPanel.querySelectorAll('input, select, textarea, button');
+ inputs.forEach(input => {
+ input.disabled = true;
+ input.classList.add('opacity-50');
+ });
+ }
+
+ hide() {
+ this.overlay.classList.add('hidden');
+ // Re-enable all form inputs and buttons in the entire left panel
+ const leftPanel = this.overlay.parentElement;
+ const inputs = leftPanel.querySelectorAll('input, select, textarea, button');
+ inputs.forEach(input => {
+ input.disabled = false;
+ input.classList.remove('opacity-50');
+ });
+ // Reset to default message
+ this.messageElement.textContent = this.defaultMessage;
+ }
+}
+
+const loadingOverlay = new LoadingOverlay();
+
+// Add a flag to prevent auto-save during programmatic updates
+let isSettingValues = false;
+
+// Handle status messages with better color management
function showStatus(message, type = 'info') {
const statusEl = document.getElementById('status-message');
statusEl.textContent = message;
- statusEl.classList.remove('hidden', 'bg-green-100', 'bg-red-100', 'bg-blue-100', 'text-green-800', 'text-red-800', 'text-blue-800');
-
- if (type === 'success') {
- statusEl.classList.add('bg-green-100', 'text-green-800');
- } else if (type === 'error') {
- statusEl.classList.add('bg-red-100', 'text-red-800');
- } else {
- statusEl.classList.add('bg-blue-100', 'text-blue-800');
- }
+
+ // Remove all status classes
+ const statusClasses = ['hidden', 'bg-green-100', 'bg-red-100', 'bg-blue-100', 'text-green-800', 'text-red-800', 'text-blue-800'];
+ statusEl.classList.remove(...statusClasses);
+
+ // Add appropriate classes based on type
+ const typeMap = {
+ success: ['bg-green-100', 'text-green-800'],
+ error: ['bg-red-100', 'text-red-800'],
+ info: ['bg-blue-100', 'text-blue-800']
+ };
+
+ statusEl.classList.add(...(typeMap[type] || typeMap.info));
// Also log to console
appendToConsole(message, type === 'error' ? 'stderr' : 'info');
- setTimeout(() => {
- statusEl.classList.add('hidden');
- }, 5000);
+ setTimeout(() => statusEl.classList.add('hidden'), 5000);
}
-// Get form values
+// Get form values using field mappings
function getFormValues() {
- const microphoneValue = document.getElementById('microphone').value;
- // Convert to number if it's a numeric string (device index)
- const microphoneForConfig = /^\d+$/.test(microphoneValue) ? parseInt(microphoneValue) : microphoneValue;
-
- return {
- compute_type: document.getElementById('compute_type').value,
- enable_debug_mode: document.getElementById('enable_debug_mode').checked ? 1 : 0,
- enable_previews: document.getElementById('enable_previews').checked ? 1 : 0,
- save_audio: document.getElementById('save_audio').checked ? 1 : 0,
- language: document.getElementById('language').value,
- gpu_idx: parseInt(document.getElementById('gpu_idx').value),
- max_speech_duration_s: parseInt(document.getElementById('max_speech_duration_s').value),
- min_silence_duration_ms: parseInt(document.getElementById('min_silence_duration_ms').value),
- microphone: microphoneForConfig,
- model: document.getElementById('model').value,
- reset_after_silence_s: parseInt(document.getElementById('reset_after_silence_s').value),
- transcription_loop_delay_ms: parseInt(document.getElementById('transcription_loop_delay_ms').value),
- use_cpu: document.getElementById('use_cpu').checked ? 1 : 0,
- block_width: parseInt(document.getElementById('block_width').value),
- num_blocks: parseInt(document.getElementById('num_blocks').value),
- rows: parseInt(document.getElementById('rows').value),
- cols: parseInt(document.getElementById('cols').value)
- };
+ const config = {};
+
+ for (const [fieldName, fieldConfig] of Object.entries(CONFIG_FIELDS)) {
+ const element = document.getElementById(fieldName);
+ if (!element) continue;
+
+ switch (fieldConfig.type) {
+ case 'boolean':
+ config[fieldName] = element.checked ? 1 : 0;
+ break;
+ case 'number':
+ config[fieldName] = parseInt(element.value) || fieldConfig.default;
+ break;
+ case 'text':
+ config[fieldName] = element.value || fieldConfig.default;
+ break;
+ default:
+ config[fieldName] = element.value || fieldConfig.default;
+ }
+ }
+
+ return config;
}
-// Add a flag to prevent auto-save during programmatic updates
-let isSettingValues = false;
-
-// Set form values
+// Set form values using field mappings
function setFormValues(config) {
isSettingValues = true; // Disable auto-save temporarily
- document.getElementById('compute_type').value = config.compute_type || 'int8';
- document.getElementById('enable_debug_mode').checked = config.enable_debug_mode === 1;
- document.getElementById('enable_previews').checked = config.enable_previews === 1;
- document.getElementById('save_audio').checked = config.save_audio === 1;
- document.getElementById('language').value = config.language || 'english';
- document.getElementById('gpu_idx').value = config.gpu_idx || 0;
- document.getElementById('max_speech_duration_s').value = config.max_speech_duration_s || 10;
- document.getElementById('min_silence_duration_ms').value = config.min_silence_duration_ms || 250;
- document.getElementById('microphone').value = config.microphone || 'motu';
- document.getElementById('model').value = config.model || 'turbo';
- document.getElementById('reset_after_silence_s').value = config.reset_after_silence_s || 15;
- document.getElementById('transcription_loop_delay_ms').value = config.transcription_loop_delay_ms || 100;
- document.getElementById('use_cpu').checked = config.use_cpu === 1;
- document.getElementById('block_width').value = config.block_width || 2;
- document.getElementById('num_blocks').value = config.num_blocks || 40;
- document.getElementById('rows').value = config.rows || 10;
- document.getElementById('cols').value = config.cols || 24;
+ for (const [fieldName, fieldConfig] of Object.entries(CONFIG_FIELDS)) {
+ const element = document.getElementById(fieldName);
+ if (!element) continue;
+
+ const value = config[fieldName] ?? fieldConfig.default;
+
+ switch (fieldConfig.type) {
+ case 'boolean':
+ element.checked = value === 1;
+ break;
+ case 'text':
+ element.value = value || '';
+ break;
+ default:
+ element.value = value;
+ }
+ }
isSettingValues = false; // Re-enable auto-save
}
-// Toggle advanced settings
-document.getElementById('toggle-advanced').addEventListener('click', () => {
- const advancedSettings = document.getElementById('advanced-settings');
- const chevron = document.getElementById('chevron');
-
- if (advancedSettings.classList.contains('hidden')) {
- advancedSettings.classList.remove('hidden');
- chevron.classList.add('rotate-90');
- } else {
- advancedSettings.classList.add('hidden');
- chevron.classList.remove('rotate-90');
- }
-});
+// Console management
+const consoleContent = document.getElementById('console-content');
+
+function appendToConsole(message, type = 'stdout') {
+ const timestamp = new Date().toLocaleTimeString();
+ const timestampSpan = document.createElement('span');
+ timestampSpan.className = 'console-timestamp';
+ timestampSpan.textContent = `[${timestamp}] `;
+
+ const messageSpan = document.createElement('span');
+ messageSpan.className = `console-${type}`;
+ messageSpan.textContent = message;
+
+ const lineDiv = document.createElement('div');
+ lineDiv.appendChild(timestampSpan);
+ lineDiv.appendChild(messageSpan);
+
+ consoleContent.appendChild(lineDiv);
+
+ // Auto-scroll to bottom
+ const pythonConsole = document.getElementById('python-console');
+ pythonConsole.scrollTop = pythonConsole.scrollHeight;
+}
-// Simplify button handlers by extracting common patterns
+// Async action handler with better error handling
async function handleAsyncAction(actionName, actionFn) {
try {
const result = await actionFn();
- if (result && result.message) {
+ if (result?.message) {
showStatus(result.message, 'success');
}
return result;
@@ -103,36 +224,12 @@ async function handleAsyncAction(actionName, actionFn) {
}
}
-// Process control buttons
-const startButton = document.getElementById('start-process');
-const stopButton = document.getElementById('stop-process');
-
-// Helper functions for button state management
-function setButtonState(button, disabled) {
- button.disabled = disabled;
- if (disabled) {
- button.classList.add('opacity-50', 'cursor-not-allowed');
- } else {
- button.classList.remove('opacity-50', 'cursor-not-allowed');
- }
-}
-
-function setProcessRunningState() {
- setButtonState(startButton, true);
- setButtonState(stopButton, false);
-}
-
-function setProcessStoppedState() {
- setButtonState(startButton, false);
- setButtonState(stopButton, true);
-}
-
// Auto-save functionality with debouncing
let saveTimeout;
-const SAVE_DELAY = 500; // milliseconds
+const SAVE_DELAY = 500;
async function autoSaveConfig() {
- if (isSettingValues) return; // Don't save during programmatic updates
+ if (isSettingValues) return;
clearTimeout(saveTimeout);
saveTimeout = setTimeout(async () => {
@@ -141,28 +238,19 @@ async function autoSaveConfig() {
await window.electronAPI.saveConfig(config);
showStatus('Configuration saved', 'success');
- // Check if process is running (stop button is enabled means process is running)
- const stopButton = document.getElementById('stop-process');
-
- if (!stopButton.disabled) {
- // Process is running, restart it with new config
+ // Restart process if running
+ if (!buttonManager.buttons.stop.disabled) {
appendToConsole('Restarting process with new configuration...', 'info');
try {
await window.electronAPI.stopProcess();
-
await new Promise(resolve => setTimeout(resolve, 1000));
-
await window.electronAPI.startProcess();
-
- // Update button states to reflect running process
- setProcessRunningState();
-
+ buttonManager.setProcessRunning();
appendToConsole('Process restarted with new configuration', 'info');
} catch (error) {
appendToConsole(`Failed to restart process: ${error.message}`, 'stderr');
- // Process is stopped, update button states
- setProcessStoppedState();
+ buttonManager.setProcessStopped();
}
}
} catch (error) {
@@ -171,47 +259,32 @@ async function autoSaveConfig() {
}, SAVE_DELAY);
}
-// Add event listeners to all form inputs for auto-save
+// Auto-save setup
function setupAutoSave() {
- // Get all form inputs
const form = document.getElementById('config-form');
- const inputs = form.querySelectorAll('input, select');
+ const inputs = form.querySelectorAll('input, select, textarea');
- // Add change listener to each input
inputs.forEach(input => {
- if (input.type === 'checkbox') {
- input.addEventListener('change', autoSaveConfig);
- } else if (input.type === 'number' || input.type === 'text') {
- input.addEventListener('input', autoSaveConfig);
- } else if (input.tagName === 'SELECT') {
- input.addEventListener('change', autoSaveConfig);
- }
+ const eventType = input.type === 'checkbox' ? 'change' :
+ (input.type === 'number' || input.type === 'text' || input.tagName === 'TEXTAREA') ? 'input' : 'change';
+ input.addEventListener(eventType, autoSaveConfig);
});
}
-// Update the setup-venv handler
-document.getElementById('setup-venv').addEventListener('click', async () => {
- const setupButton = document.getElementById('setup-venv');
- setupButton.disabled = true;
- setupButton.classList.add('opacity-50', 'cursor-not-allowed');
-
- try {
- await handleAsyncAction('Install requirements', async () => {
- return await window.electronAPI.installRequirements();
- });
- // Reload microphones after successful installation
- await loadMicrophones();
- } finally {
- setupButton.disabled = false;
- setupButton.classList.remove('opacity-50', 'cursor-not-allowed');
- }
-});
-
-// Simplified microphone loading
+// Microphone loading
async function loadMicrophones() {
const microphoneSelect = document.getElementById('microphone');
try {
+ // Check/install requirements during startup
+ appendToConsole('Checking virtual environment and requirements...', 'info');
+ loadingOverlay.show('Setting up environment - this can take several minutes.');
+ try {
+ await handleAsyncAction('Install requirements', () => window.electronAPI.installRequirements());
+ } finally {
+ loadingOverlay.hide(); // Always hide overlay when done
+ }
+
appendToConsole('Loading available microphones...', 'info');
const microphones = await window.electronAPI.getMicrophones();
@@ -232,7 +305,7 @@ async function loadMicrophones() {
appendToConsole(` - ${mic.name} (Device ${mic.index})`, 'stdout');
});
- // Restore previously selected microphone if possible
+ // Restore previously selected microphone
try {
const config = await window.electronAPI.loadConfig();
if (config.microphone) {
@@ -248,11 +321,144 @@ async function loadMicrophones() {
}
}
-// Update window load to include auto-save setup
+// Event handlers setup
+function setupEventHandlers() {
+ // Advanced settings toggle
+ document.getElementById('toggle-advanced').addEventListener('click', () => {
+ const advancedSettings = document.getElementById('advanced-settings');
+ const chevron = document.getElementById('chevron');
+
+ if (advancedSettings.classList.contains('hidden')) {
+ advancedSettings.classList.remove('hidden');
+ chevron.classList.add('rotate-90');
+ } else {
+ advancedSettings.classList.add('hidden');
+ chevron.classList.remove('rotate-90');
+ }
+ });
+
+ // Setup virtual environment
+ document.getElementById('setup-venv').addEventListener('click', async () => {
+ loadingOverlay.show('Setting up virtual environment - please wait...'); // Show overlay with custom message
+ try {
+ await buttonManager.withButtonLoading('setupVenv', async () => {
+ await handleAsyncAction('Install requirements', () => window.electronAPI.installRequirements());
+ });
+ } finally {
+ loadingOverlay.hide(); // Always hide overlay when done
+ }
+ });
+
+ // Reset virtual environment
+ document.getElementById('reset-venv').addEventListener('click', async () => {
+ loadingOverlay.show('Resetting virtual environment - please wait...'); // Show overlay with custom message
+ try {
+ await buttonManager.withButtonLoading('resetVenv', async () => {
+ await handleAsyncAction('Reset virtual environment', () => window.electronAPI.resetVenv());
+ });
+ } finally {
+ loadingOverlay.hide(); // Always hide overlay when done
+ }
+ });
+
+ // Reset configuration
+ document.getElementById('reset-config').addEventListener('click', async () => {
+ const confirmReset = confirm('Are you sure you want to reset all settings to defaults? This cannot be undone.');
+ if (!confirmReset) return;
+
+ try {
+ // Stop process if running
+ const wasRunning = !buttonManager.buttons.stop.disabled;
+ if (wasRunning) {
+ appendToConsole('Stopping process before resetting configuration...', 'info');
+ await window.electronAPI.stopProcess();
+ buttonManager.setProcessStopped();
+ await new Promise(resolve => setTimeout(resolve, 500));
+ }
+
+ // Reset configuration
+ appendToConsole('Resetting configuration to defaults...', 'info');
+ const result = await window.electronAPI.resetConfig();
+
+ // Reload configuration with defaults
+ const config = await window.electronAPI.loadConfig();
+ setFormValues(config);
+
+ showStatus(result.message, 'success');
+ appendToConsole('Configuration reset successfully', 'info');
+
+ // Restart process if it was running
+ if (wasRunning) {
+ appendToConsole('Restarting process with default configuration...', 'info');
+ await window.electronAPI.startProcess();
+ buttonManager.setProcessRunning();
+ appendToConsole('Process restarted with default configuration', 'info');
+ }
+ } catch (error) {
+ showStatus(`Failed to reset configuration: ${error.message}`, 'error');
+ appendToConsole(`Failed to reset configuration: ${error.message}`, 'stderr');
+ }
+ });
+
+ // Refresh microphones
+ document.getElementById('refresh-microphones').addEventListener('click', async () => {
+ await buttonManager.withButtonLoading('refreshMicrophones', async () => {
+ await loadMicrophones();
+ });
+ });
+
+ // Start process
+ document.getElementById('start-process').addEventListener('click', async () => {
+ buttonManager.setState('start', true);
+
+ try {
+ // The installRequirements function will now check if venv is set up.
+ loadingOverlay.show('Verifying environment setup - please wait...'); // Show overlay with custom message
+ try {
+ await window.electronAPI.installRequirements();
+ appendToConsole('Virtual environment setup checked/completed', 'info');
+ } finally {
+ loadingOverlay.hide(); // Always hide overlay when done
+ }
+
+ await window.electronAPI.startProcess();
+ buttonManager.setProcessRunning();
+ appendToConsole('Process started successfully', 'info');
+ } catch (error) {
+ appendToConsole(`Failed to start process: ${error.message}`, 'stderr');
+ buttonManager.setState('start', false);
+ }
+ });
+
+ // Stop process
+ document.getElementById('stop-process').addEventListener('click', async () => {
+ buttonManager.setState('stop', true);
+
+ try {
+ await window.electronAPI.stopProcess();
+ appendToConsole('Process stop initiated', 'info');
+ } catch (error) {
+ appendToConsole(`Failed to stop process: ${error.message}`, 'stderr');
+ buttonManager.setState('stop', false);
+ }
+ });
+
+ // Listen for process stopped event
+ window.electronAPI.onProcessStopped(() => {
+ buttonManager.setProcessStopped();
+ });
+}
+
+// Initialize application
window.addEventListener('load', async () => {
appendToConsole('TaSTT Configuration UI initialized', 'info');
- // Load config first
+ // Set up Python output listener first so we capture all output
+ window.electronAPI.onPythonOutput((data) => {
+ appendToConsole(data.message, data.type);
+ });
+
+ // Load configuration
try {
const config = await window.electronAPI.loadConfig();
setFormValues(config);
@@ -264,71 +470,7 @@ window.addEventListener('load', async () => {
// Load microphones
await loadMicrophones();
- // Set up auto-save after everything is loaded
+ // Setup event handlers and auto-save
+ setupEventHandlers();
setupAutoSave();
-});
-
-// Console management
-const consoleContent = document.getElementById('console-content');
-
-function appendToConsole(message, type = 'stdout') {
- const timestamp = new Date().toLocaleTimeString();
- const timestampSpan = document.createElement('span');
- timestampSpan.className = 'console-timestamp';
- timestampSpan.textContent = `[${timestamp}] `;
-
- const messageSpan = document.createElement('span');
- messageSpan.className = `console-${type}`;
- messageSpan.textContent = message;
-
- const lineDiv = document.createElement('div');
- lineDiv.appendChild(timestampSpan);
- lineDiv.appendChild(messageSpan);
-
- consoleContent.appendChild(lineDiv);
-
- // Auto-scroll to bottom
- const pythonConsole = document.getElementById('python-console');
- pythonConsole.scrollTop = pythonConsole.scrollHeight;
-}
-
-// Clear console button
-document.getElementById('clear-console').addEventListener('click', () => {
- consoleContent.innerHTML = '';
- appendToConsole('Console cleared', 'info');
-});
-
-// Listen for Python output
-window.electronAPI.onPythonOutput((data) => {
- appendToConsole(data.message, data.type);
-});
-
-document.getElementById('start-process').addEventListener('click', async () => {
- setButtonState(startButton, true);
-
- try {
- await window.electronAPI.startProcess();
- setProcessRunningState();
- appendToConsole('Process started successfully', 'info');
- } catch (error) {
- appendToConsole(`Failed to start process: ${error.message}`, 'stderr');
- setButtonState(startButton, false);
- }
-});
-
-document.getElementById('stop-process').addEventListener('click', async () => {
- setButtonState(stopButton, true);
-
- try {
- const result = await window.electronAPI.stopProcess();
- appendToConsole('Process stop initiated', 'info');
- } catch (error) {
- appendToConsole(`Failed to stop process: ${error.message}`, 'stderr');
- setButtonState(stopButton, false);
- }
-});
-
-// Listen for process stopped event
-window.electronAPI.onProcessStopped(() => {
- setProcessStoppedState();
}); \ No newline at end of file
diff --git a/ui/src/components.css b/ui/src/components.css
index d8d909d..2832e12 100644
--- a/ui/src/components.css
+++ b/ui/src/components.css
@@ -46,6 +46,14 @@
.btn-red {
@apply bg-red-600 text-white hover:bg-red-700 focus:ring-red-500;
}
+
+ .btn-purple {
+ @apply bg-purple-600 text-white hover:bg-purple-700 focus:ring-purple-500;
+ }
+
+ .btn-orange {
+ @apply bg-orange-600 text-white hover:bg-orange-700 focus:ring-orange-500;
+ }
}
/* Console styling */
diff --git a/ui_design.md b/ui_design.md
index 06eee65..e1ff095 100644
--- a/ui_design.md
+++ b/ui_design.md
@@ -10,7 +10,13 @@ $ choco uninstall nodejs -y
$ choco install nodejs-lts -y
```
-Now open a non-admin PowerShell terminal:
+To build the app:
+```
+$ npm install
+$ npm run dev
+```
+
+For posterity, this is how I set up the ui directory initially. In a non-admin PowerShell window:
```bash
# Check your node and npm versions.
@@ -30,3 +36,4 @@ npx tailwindcss init -p
npm install --save-dev vue@3 @vitejs/plugin-vue vite yaml
npm install --save-dev js-yaml
```
+