diff options
| -rw-r--r-- | .cursorignore | 2 | ||||
| -rw-r--r-- | .gitignore | 2 | ||||
| -rw-r--r-- | Images/favicon.ico | bin | 0 -> 92015 bytes | |||
| -rw-r--r-- | app/hi.py | 12 | ||||
| -rw-r--r-- | app/requirements.txt | 2 | ||||
| -rw-r--r-- | app/stt.py | 128 | ||||
| -rw-r--r-- | app/vad.py | 314 | ||||
| -rw-r--r-- | config.yaml | 1 | ||||
| -rw-r--r-- | ui/index.html | 336 | ||||
| -rw-r--r-- | ui/index.js | 382 | ||||
| -rw-r--r-- | ui/package.json | 76 | ||||
| -rw-r--r-- | ui/preload.js | 7 | ||||
| -rw-r--r-- | ui/renderer.js | 564 | ||||
| -rw-r--r-- | ui/src/components.css | 8 | ||||
| -rw-r--r-- | ui_design.md | 9 |
15 files changed, 1085 insertions, 758 deletions
diff --git a/.cursorignore b/.cursorignore new file mode 100644 index 0000000..a8f4624 --- /dev/null +++ b/.cursorignore @@ -0,0 +1,2 @@ +**/node_modules +**/site-packages
\ No newline at end of file @@ -1,3 +1,3 @@ .*.sw[po] *.meta - +.venv_is_set_up diff --git a/Images/favicon.ico b/Images/favicon.ico Binary files differnew file mode 100644 index 0000000..25ea9ac --- /dev/null +++ b/Images/favicon.ico @@ -330,10 +330,11 @@ if __name__ == "__main__": cli_args = parse_args() cfg = app_config.getConfig(cli_args.config) shared_data = SharedThreadData(cfg) - osc_thread = threading.Thread( - target=osc_thread, - args=(shared_data,)) - osc_thread.start() + if False: + osc_thread = threading.Thread( + target=osc_thread, + args=(shared_data,)) + osc_thread.start() transcribe_thread = threading.Thread( target=stt.transcriptionThread, @@ -382,6 +383,7 @@ if __name__ == "__main__": local_word = shared_data.word print(local_word + "_") shared_data.exit_event.set() - osc_thread.join() + if False: + osc_thread.join() transcribe_thread.join() diff --git a/app/requirements.txt b/app/requirements.txt index 07f94cd..f8b7069 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -5,4 +5,4 @@ pyaudio pydub python-osc sentencepiece -wave +silero-vad @@ -6,10 +6,10 @@ import os import pyaudio from pydub import AudioSegment from shared_thread_data import SharedThreadData +from silero_vad import load_silero_vad, get_speech_timestamps import sys import time import typing -import vad import wave @@ -33,7 +33,7 @@ class AudioStream(): class MicStream(AudioStream): CHUNK_SZ = 1024 - def __init__(self, which_mic: str): + def __init__(self, cfg: typing.Dict): self.p = pyaudio.PyAudio() self.stream = None self.sample_rate = None @@ -45,8 +45,11 @@ class MicStream(AudioStream): # If set, incoming frames are simply discarded. self.paused = False - print(f"Finding mic {which_mic}", file=sys.stderr) - self.dumpMicDevices() + which_mic = cfg["microphone"] + + if cfg["enable_debug_mode"]: + print(f"Finding mic {which_mic}", file=sys.stderr) + self.dumpMicDevices() got_match = False device_index = -1 @@ -59,8 +62,9 @@ class MicStream(AudioStream): elif which_mic == "beyond": target_str = "Microphone (Beyond)" else: - print(f"Mic {which_mic} requested, treating it as a numerical " + - "device ID", file=sys.stderr) + if cfg["enable_debug_mode"]: + print(f"Mic {which_mic} requested, treating it as a numerical " + + "device ID", file=sys.stderr) device_index = int(which_mic) got_match = True if not got_match: @@ -79,9 +83,11 @@ class MicStream(AudioStream): raise KeyError(f"Mic {which_mic} not found") info = self.p.get_device_info_by_host_api_device_index(0, device_index) - print(f"Found mic {which_mic}: {info['name']}", file=sys.stderr) + if cfg["enable_debug_mode"]: + print(f"Found mic {which_mic}: {info['name']}", file=sys.stderr) self.sample_rate = int(info['defaultSampleRate']) - print(f"Mic sample rate: {self.sample_rate}", file=sys.stderr) + if cfg["enable_debug_mode"]: + print(f"Mic sample rate: {self.sample_rate}", file=sys.stderr) self.stream = self.p.open( rate=self.sample_rate, @@ -289,19 +295,40 @@ class AudioSegmenter: def __init__(self, min_silence_ms=250, max_speech_s=5): - self.vad_options = vad.VadOptions( - min_silence_duration_ms=min_silence_ms, - max_speech_duration_s=max_speech_s) - pass + self.min_silence_ms = min_silence_ms + self.max_speech_s = max_speech_s + + # Load Silero VAD model + self.model = load_silero_vad() + + self.vad_threshold = 0.3 + self.min_silence_duration_ms = min_silence_ms + self.max_speech_duration_s = max_speech_s + + self.speech_pad_ms = 300 def segmentAudio(self, audio: bytes): - audio = np.frombuffer(audio, + # Convert audio bytes to numpy array expected by silero-vad + audio_array = np.frombuffer(audio, dtype=np.int16).flatten().astype(np.float32) / 32768.0 - return vad.get_speech_timestamps(audio, vad_options=self.vad_options) + + # Get speech timestamps using silero-vad + # Note: silero-vad expects sample rate of 16000 Hz which matches AudioStream.FPS + speech_timestamps = get_speech_timestamps( + audio_array, + self.model, + sampling_rate=AudioStream.FPS, + threshold=self.vad_threshold, + min_silence_duration_ms=self.min_silence_duration_ms, + max_speech_duration_s=self.max_speech_duration_s, + return_seconds=False # We want frame indices, not seconds + ) + + return speech_timestamps # Returns the stable cutoff (if any) and whether there are any segments. def getStableCutoff(self, audio: bytes) -> typing.Tuple[int, bool]: - min_delta_frames = int((self.vad_options.min_silence_duration_ms * + min_delta_frames = int((self.min_silence_duration_ms * AudioStream.FPS) / 1000.0) cutoff = None @@ -379,8 +406,9 @@ class Whisper: model_str = cfg["model"] model_root = os.path.join(parent_dir, "Models", os.path.normpath(model_str)) - print(f"Model {cfg['model']} will be saved to {model_root}", - file=sys.stderr) + if cfg["enable_debug_mode"]: + print(f"Model {cfg['model']} will be saved to {model_root}", + file=sys.stderr) model_device = "cuda" if cfg["use_cpu"]: @@ -395,21 +423,42 @@ class Whisper: download_root = model_root, local_files_only = already_downloaded) + self.context_window_chars = 200 # Keep last 200 chars of context + self.recent_context = "" # Store recent committed text + + def update_context(self, committed_text: str): + """Update the context with recently committed text.""" + self.recent_context = (self.recent_context + " " + committed_text).strip() + # Keep only the last N characters to avoid prompt getting too long + if len(self.recent_context) > self.context_window_chars: + self.recent_context = self.recent_context[-self.context_window_chars:] + def transcribe(self, frames: bytes = None) -> typing.List[Segment]: if frames is None: frames = self.collector.getAudio() - # Convert from signed 16-bit int [-32768, 32767] to signed 32-bit float on - # [-1, 1]. + + # Convert audio to float32 audio = np.frombuffer(frames, dtype=np.int16).flatten().astype(np.float32) / 32768.0 + # Build context-aware prompt + prompt = self._build_prompt() + t0 = time.time() segments, info = self.model.transcribe( audio, language = langcodes.find(self.cfg["language"]).language, vad_filter = True, temperature=0.0, - without_timestamps = False) + without_timestamps = False, + initial_prompt=prompt, + beam_size=5, + best_of=5, + condition_on_previous_text=True, + compression_ratio_threshold=2.4, + log_prob_threshold=-1.0, + no_speech_threshold=0.6 + ) res = [] for s in segments: # Manual touchup. I see a decent number of hallucinations sneaking @@ -445,6 +494,17 @@ class Whisper: print(f"Transcription latency (s): {t1 - t0}") return res + def _build_prompt(self) -> str: + """Build a context-aware prompt for Whisper.""" + user_prompt = self.cfg["user_prompt"] + context_prompt = "" + if self.recent_context and len(self.recent_context) > 0: + context_prompt = f"Here is the context so far: {self.recent_context}" + + prompts = [user_prompt, context_prompt] + prompts = [p for p in prompts if p and len(p) > 0] + return " ".join(prompts) + class TranscriptCommit: def __init__(self, delta: str, @@ -502,10 +562,21 @@ class VadCommitter: latency_s = self.collector.now() - self.collector.begin() duration_s = stable_cutoff / AudioStream.FPS start_ts = self.collector.begin() - commit_audio = self.collector.dropAudioPrefixByFrames(stable_cutoff) + + # Get the filtered audio first, then extract the portion we need + filtered_audio = self.collector.getAudio() + commit_audio = filtered_audio[:stable_cutoff * AudioStream.FRAME_SZ] + + # Now drop the prefix from the collector + self.collector.dropAudioPrefixByFrames(stable_cutoff) segments = self.whisper.transcribe(commit_audio) delta = ''.join(s.transcript for s in segments) + + # Update whisper's context with the committed text + if delta.strip(): + self.whisper.update_context(delta.strip()) + audio = self.collector.getAudio() if self.cfg["enable_debug_mode"]: for s in segments: @@ -540,11 +611,11 @@ class VadCommitter: def transcriptionThread(shared_data: SharedThreadData): last_stable_commit = None - stream = MicStream(shared_data.cfg["microphone"]) + stream = MicStream(shared_data.cfg) collector = AudioCollector(stream) collector = CompressingAudioCollector(collector) + collector = BoostingAudioCollector(collector, -12.0, shared_data.cfg) collector = NormalizingAudioCollector(collector) - collector = BoostingAudioCollector(collector, 0.0, shared_data.cfg) whisper = Whisper(collector, shared_data.cfg) segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"], max_speech_s=shared_data.cfg["max_speech_duration_s"]) @@ -553,6 +624,8 @@ def transcriptionThread(shared_data: SharedThreadData): transcript = "" preview = "" + print(f"Ready to go!", flush=True) + while not shared_data.exit_event.is_set(): time.sleep(shared_data.cfg["transcription_loop_delay_ms"] / 1000.0); @@ -561,8 +634,7 @@ def transcriptionThread(shared_data: SharedThreadData): commit = committer.getDelta() if len(commit.delta) > 0 or len(commit.preview) > 0: - # Avoid re-sending text after long pauses. User controls the length - # of the pause in the UI. + # Avoid re-sending text after long pauses if shared_data.cfg["reset_after_silence_s"] > 0: silence_duration = 0 if last_stable_commit: @@ -571,10 +643,12 @@ def transcriptionThread(shared_data: SharedThreadData): last_stable_commit.duration_s silence_duration = commit.start_ts - last_commit_end_ts if silence_duration > shared_data.cfg["reset_after_silence_s"]: - print(f"Resetting transcript after {silence_duration}-second " - "silence", file=sys.stderr) + if shared_data.cfg["enable_debug_mode"]: + print(f"Resetting transcript after {silence_duration}-second " + "silence", file=sys.stderr) transcript = "" preview = "" + whisper.recent_context = "" # Reset context too if commit.delta: last_stable_commit = commit diff --git a/app/vad.py b/app/vad.py deleted file mode 100644 index 1dea765..0000000 --- a/app/vad.py +++ /dev/null @@ -1,314 +0,0 @@ -# MIT License -# -# Copyright (c) 2023 Guillaume Klein -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import bisect -import functools -import os -import warnings - -from typing import List, NamedTuple, Optional - -import numpy as np - - -# The code below is adapted from https://github.com/snakers4/silero-vad. -class VadOptions(NamedTuple): - """VAD options. - - Attributes: - threshold: Speech threshold. Silero VAD outputs speech probabilities for each audio chunk, - probabilities ABOVE this value are considered as SPEECH. It is better to tune this - parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets. - min_speech_duration_ms: Final speech chunks shorter min_speech_duration_ms are thrown out. - max_speech_duration_s: Maximum duration of speech chunks in seconds. Chunks longer - than max_speech_duration_s will be split at the timestamp of the last silence that - lasts more than 100ms (if any), to prevent aggressive cutting. Otherwise, they will be - split aggressively just before max_speech_duration_s. - min_silence_duration_ms: In the end of each speech chunk wait for min_silence_duration_ms - before separating it - window_size_samples: Audio chunks of window_size_samples size are fed to the silero VAD model. - WARNING! Silero VAD models were trained using 512, 1024, 1536 samples for 16000 sample rate. - Values other than these may affect model performance!! - speech_pad_ms: Final speech chunks are padded by speech_pad_ms each side - """ - - threshold: float = 0.5 - min_speech_duration_ms: int = 250 - max_speech_duration_s: float = float("inf") - min_silence_duration_ms: int = 2000 - window_size_samples: int = 1024 - speech_pad_ms: int = 400 - - -def get_speech_timestamps( - audio: np.ndarray, - vad_options: Optional[VadOptions] = None, - **kwargs, -) -> List[dict]: - """This method is used for splitting long audios into speech chunks using silero VAD. - - Args: - audio: One dimensional float array. - vad_options: Options for VAD processing. - kwargs: VAD options passed as keyword arguments for backward compatibility. - - Returns: - List of dicts containing begin and end samples of each speech chunk. - """ - if vad_options is None: - vad_options = VadOptions(**kwargs) - - threshold = vad_options.threshold - min_speech_duration_ms = vad_options.min_speech_duration_ms - max_speech_duration_s = vad_options.max_speech_duration_s - min_silence_duration_ms = vad_options.min_silence_duration_ms - window_size_samples = vad_options.window_size_samples - speech_pad_ms = vad_options.speech_pad_ms - - if window_size_samples not in [512, 1024, 1536]: - warnings.warn( - "Unusual window_size_samples! Supported window_size_samples:\n" - " - [512, 1024, 1536] for 16000 sampling_rate" - ) - - sampling_rate = 16000 - min_speech_samples = sampling_rate * min_speech_duration_ms / 1000 - speech_pad_samples = sampling_rate * speech_pad_ms / 1000 - max_speech_samples = ( - sampling_rate * max_speech_duration_s - - window_size_samples - - 2 * speech_pad_samples - ) - min_silence_samples = sampling_rate * min_silence_duration_ms / 1000 - min_silence_samples_at_max_speech = sampling_rate * 98 / 1000 - - audio_length_samples = len(audio) - - model = get_vad_model() - state = model.get_initial_state(batch_size=1) - - speech_probs = [] - for current_start_sample in range(0, audio_length_samples, window_size_samples): - chunk = audio[current_start_sample : current_start_sample + window_size_samples] - if len(chunk) < window_size_samples: - chunk = np.pad(chunk, (0, int(window_size_samples - len(chunk)))) - speech_prob, state = model(chunk, state, sampling_rate) - speech_probs.append(speech_prob) - - triggered = False - speeches = [] - current_speech = {} - neg_threshold = threshold - 0.15 - - # to save potential segment end (and tolerate some silence) - temp_end = 0 - # to save potential segment limits in case of maximum segment size reached - prev_end = next_start = 0 - - for i, speech_prob in enumerate(speech_probs): - if (speech_prob >= threshold) and temp_end: - temp_end = 0 - if next_start < prev_end: - next_start = window_size_samples * i - - if (speech_prob >= threshold) and not triggered: - triggered = True - current_speech["start"] = window_size_samples * i - continue - - if ( - triggered - and (window_size_samples * i) - current_speech["start"] > max_speech_samples - ): - if prev_end: - current_speech["end"] = prev_end - speeches.append(current_speech) - current_speech = {} - # previously reached silence (< neg_thres) and is still not speech (< thres) - if next_start < prev_end: - triggered = False - else: - current_speech["start"] = next_start - prev_end = next_start = temp_end = 0 - else: - current_speech["end"] = window_size_samples * i - speeches.append(current_speech) - current_speech = {} - prev_end = next_start = temp_end = 0 - triggered = False - continue - - if (speech_prob < neg_threshold) and triggered: - if not temp_end: - temp_end = window_size_samples * i - # condition to avoid cutting in very short silence - if (window_size_samples * i) - temp_end > min_silence_samples_at_max_speech: - prev_end = temp_end - if (window_size_samples * i) - temp_end < min_silence_samples: - continue - else: - current_speech["end"] = temp_end - if ( - current_speech["end"] - current_speech["start"] - ) > min_speech_samples: - speeches.append(current_speech) - current_speech = {} - prev_end = next_start = temp_end = 0 - triggered = False - continue - - if ( - current_speech - and (audio_length_samples - current_speech["start"]) > min_speech_samples - ): - current_speech["end"] = audio_length_samples - speeches.append(current_speech) - - for i, speech in enumerate(speeches): - if i == 0: - speech["start"] = int(max(0, speech["start"] - speech_pad_samples)) - if i != len(speeches) - 1: - silence_duration = speeches[i + 1]["start"] - speech["end"] - if silence_duration < 2 * speech_pad_samples: - speech["end"] += int(silence_duration // 2) - speeches[i + 1]["start"] = int( - max(0, speeches[i + 1]["start"] - silence_duration // 2) - ) - else: - speech["end"] = int( - min(audio_length_samples, speech["end"] + speech_pad_samples) - ) - speeches[i + 1]["start"] = int( - max(0, speeches[i + 1]["start"] - speech_pad_samples) - ) - else: - speech["end"] = int( - min(audio_length_samples, speech["end"] + speech_pad_samples) - ) - - return speeches - - -def collect_chunks(audio: np.ndarray, chunks: List[dict]) -> np.ndarray: - """Collects and concatenates audio chunks.""" - if not chunks: - return np.array([], dtype=np.float32) - - return np.concatenate([audio[chunk["start"] : chunk["end"]] for chunk in chunks]) - - -class SpeechTimestampsMap: - """Helper class to restore original speech timestamps.""" - - def __init__(self, chunks: List[dict], sampling_rate: int, time_precision: int = 2): - self.sampling_rate = sampling_rate - self.time_precision = time_precision - self.chunk_end_sample = [] - self.total_silence_before = [] - - previous_end = 0 - silent_samples = 0 - - for chunk in chunks: - silent_samples += chunk["start"] - previous_end - previous_end = chunk["end"] - - self.chunk_end_sample.append(chunk["end"] - silent_samples) - self.total_silence_before.append(silent_samples / sampling_rate) - - def get_original_time( - self, - time: float, - chunk_index: Optional[int] = None, - ) -> float: - if chunk_index is None: - chunk_index = self.get_chunk_index(time) - - total_silence_before = self.total_silence_before[chunk_index] - return round(total_silence_before + time, self.time_precision) - - def get_chunk_index(self, time: float) -> int: - sample = int(time * self.sampling_rate) - return min( - bisect.bisect(self.chunk_end_sample, sample), - len(self.chunk_end_sample) - 1, - ) - - -@functools.lru_cache -def get_vad_model(): - """Returns the VAD model instance.""" - abspath = os.path.abspath(__file__) - my_dir = os.path.dirname(abspath) - parent_dir = os.path.dirname(my_dir) - path = os.path.join(parent_dir, "Models", "silero_vad.onnx") - return SileroVADModel(path) - - -class SileroVADModel: - def __init__(self, path): - try: - import onnxruntime - except ImportError as e: - raise RuntimeError( - "Applying the VAD filter requires the onnxruntime package" - ) from e - - opts = onnxruntime.SessionOptions() - opts.inter_op_num_threads = 1 - opts.intra_op_num_threads = 1 - opts.log_severity_level = 4 - - self.session = onnxruntime.InferenceSession( - path, - providers=["CPUExecutionProvider"], - sess_options=opts, - ) - - def get_initial_state(self, batch_size: int): - h = np.zeros((2, batch_size, 64), dtype=np.float32) - c = np.zeros((2, batch_size, 64), dtype=np.float32) - return h, c - - def __call__(self, x, state, sr: int): - if len(x.shape) == 1: - x = np.expand_dims(x, 0) - if len(x.shape) > 2: - raise ValueError( - f"Too many dimensions for input audio chunk {len(x.shape)}" - ) - if sr / x.shape[1] > 31.25: - raise ValueError("Input audio chunk is too short") - - h, c = state - - ort_inputs = { - "input": x, - "h": h, - "c": c, - "sr": np.array(sr, dtype="int64"), - } - - out, h, c = self.session.run(None, ort_inputs) - state = (h, c) - - return out, state diff --git a/config.yaml b/config.yaml index 34d88f1..5eec7a2 100644 --- a/config.yaml +++ b/config.yaml @@ -1,6 +1,7 @@ compute_type: float16 enable_debug_mode: 0 enable_previews: 1 +user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. save_audio: 0 language: english gpu_idx: 0 diff --git a/ui/index.html b/ui/index.html index b06e56b..90f78c1 100644 --- a/ui/index.html +++ b/ui/index.html @@ -10,179 +10,229 @@ <div class="container-fluid px-6 py-6 h-screen flex flex-col"> <div class="flex flex-1 gap-6 overflow-hidden"> <!-- Left Panel: Configuration Form --> - <div class="max-w-96 overflow-y-auto"> - <form id="config-form" class="space-y-6 pr-3"> - <!-- Basic settings (Always Visible) --> - <section class="config-section"> - <div class="grid grid-cols-2 gap-4"> - <div> - <label for="model" class="form-label">Model</label> - <select id="model" class="form-input"> - <option value="tiny">tiny</option> - <option value="base">base</option> - <option value="small">small</option> - <option value="medium">medium</option> - <option value="large">large</option> - <option value="turbo">turbo</option> - </select> - </div> - <div> - <label for="language" class="form-label">Language</label> - <select id="language" class="form-input"> - <option value="english">English</option> - <option value="spanish">Spanish</option> - <option value="french">French</option> - <option value="german">German</option> - <option value="italian">Italian</option> - <option value="portuguese">Portuguese</option> - <option value="russian">Russian</option> - <option value="chinese">Chinese</option> - <option value="japanese">Japanese</option> - <option value="korean">Korean</option> - </select> - </div> - <div class="col-span-2"> - <label for="microphone" class="form-label">Microphone</label> - <select id="microphone" class="form-input"> - <option value="">Loading microphones...</option> - </select> - </div> - </div> - </section> - - <!-- Advanced settings toggle --> - <button type="button" id="toggle-advanced" class="flex items-center gap-2 text-gray-600 hover:text-gray-800 font-medium"> - <svg id="chevron" class="w-5 h-5 transform transition-transform duration-200" fill="none" stroke="currentColor" viewBox="0 0 24 24"> - <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7"/> - </svg> - Advanced Settings - </button> - - <!-- Advanced settings (initially hidden) --> - <div id="advanced-settings" class="hidden space-y-6"> - <!-- Compute Settings --> + <div class="max-w-96 relative flex flex-col"> + <!-- Loading Overlay --> + <div id="loading-overlay" class="absolute inset-0 bg-white bg-opacity-75 backdrop-blur-sm z-50 hidden flex items-center justify-center rounded-lg"> + <div class="text-center p-6"> + <div class="animate-spin rounded-full h-12 w-12 border-b-2 border-blue-600 mx-auto mb-4"></div> + <p class="text-gray-700 font-medium"></p> + </div> + </div> + + <!-- Scrollable form container --> + <div class="overflow-y-auto flex-1"> + <form id="config-form" class="space-y-6 pr-3"> + <!-- Basic settings (Always Visible) --> <section class="config-section"> - <h2 class="section-title">Compute Settings</h2> <div class="grid grid-cols-2 gap-4"> <div> - <label for="compute_type" class="form-label">Compute Type</label> - <select id="compute_type" class="form-input"> - <option value="int8">int8</option> - <option value="float16">float16</option> - <option value="float32">float32</option> + <label for="model" class="form-label">Model</label> + <select id="model" class="form-input"> + <option value="tiny">tiny</option> + <option value="base">base</option> + <option value="small">small</option> + <option value="medium">medium</option> + <option value="large">large</option> + <option value="turbo">turbo</option> </select> </div> <div> - <label for="gpu_idx" class="form-label">GPU Index</label> - <input type="number" id="gpu_idx" min="0" value="0" class="form-input"> + <label for="language" class="form-label">Language</label> + <select id="language" class="form-input"> + <option value="english">English</option> + <option value="spanish">Spanish</option> + <option value="french">French</option> + <option value="german">German</option> + <option value="italian">Italian</option> + <option value="portuguese">Portuguese</option> + <option value="russian">Russian</option> + <option value="chinese">Chinese</option> + <option value="japanese">Japanese</option> + <option value="korean">Korean</option> + </select> </div> <div class="col-span-2"> - <label for="use_cpu" class="checkbox-label"> - <input type="checkbox" id="use_cpu" class="mr-2"> - <span class="checkbox-text">Use CPU</span> - </label> + <label for="microphone" class="form-label">Microphone</label> + <div class="flex gap-2"> + <select id="microphone" class="form-input flex-1"> + <option value="">Loading microphones...</option> + </select> + <button type="button" id="refresh-microphones" class="btn btn-gray px-3 py-2 flex items-center" title="Refresh microphone list"> + <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"> + <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15"/> + </svg> + </button> + </div> </div> </div> </section> - <!-- Audio Settings --> - <section class="config-section"> - <h2 class="section-title">Audio Settings</h2> - <div class="grid grid-cols-2 gap-4"> - <div> - <label for="max_speech_duration_s" class="form-label">Max Speech Duration (seconds)</label> - <input type="number" id="max_speech_duration_s" min="1" value="10" class="form-input"> + <!-- Advanced settings toggle --> + <button type="button" id="toggle-advanced" class="flex items-center gap-2 text-gray-600 hover:text-gray-800 font-medium"> + <svg id="chevron" class="w-5 h-5 transform transition-transform duration-200" fill="none" stroke="currentColor" viewBox="0 0 24 24"> + <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7"/> + </svg> + Advanced Settings + </button> + + <!-- Advanced settings (initially hidden) --> + <div id="advanced-settings" class="hidden space-y-6"> + <!-- Compute Settings --> + <section class="config-section"> + <h2 class="section-title">Compute Settings</h2> + <div class="grid grid-cols-2 gap-4"> + <div> + <label for="compute_type" class="form-label">Compute Type</label> + <select id="compute_type" class="form-input"> + <option value="int8">int8</option> + <option value="float16">float16</option> + <option value="float32">float32</option> + </select> + </div> + <div> + <label for="gpu_idx" class="form-label">GPU Index</label> + <input type="number" id="gpu_idx" min="0" value="0" class="form-input"> + </div> + <div class="col-span-2"> + <label for="use_cpu" class="checkbox-label"> + <input type="checkbox" id="use_cpu" class="mr-2"> + <span class="checkbox-text">Use CPU</span> + </label> + </div> </div> - <div> - <label for="min_silence_duration_ms" class="form-label">Min Silence Duration (ms)</label> - <input type="number" id="min_silence_duration_ms" min="0" value="250" class="form-input"> + </section> + + <!-- Audio Settings --> + <section class="config-section"> + <h2 class="section-title">Voice Activity Detection</h2> + <div class="grid grid-cols-2 gap-4"> + <div> + <label for="max_speech_duration_s" class="form-label">Max Speech Duration (seconds)</label> + <input type="number" id="max_speech_duration_s" min="1" value="10" class="form-input"> + </div> + <div> + <label for="min_silence_duration_ms" class="form-label">Min Silence Duration (ms)</label> + <input type="number" id="min_silence_duration_ms" min="0" value="250" class="form-input"> + </div> + <div> + <label for="reset_after_silence_s" class="form-label">Reset After Silence (seconds)</label> + <input type="number" id="reset_after_silence_s" min="1" value="15" class="form-input"> + </div> </div> + </section> + + <!-- Transcription Settings --> + <section class="config-section"> + <h2 class="section-title">Transcription Settings</h2> <div> - <label for="reset_after_silence_s" class="form-label">Reset After Silence (seconds)</label> - <input type="number" id="reset_after_silence_s" min="1" value="15" class="form-input"> + <label for="user_prompt" class="form-label"> + Custom Prompt + <span class="text-gray-500 text-xs block mt-1" + title="Whisper is given this prompt before transcribing. It helps guide the transcription style. For example, you could improve the spelling of your friends' names with: 'My friends' names are Saoirse, Azariah, and Caoimhe.'"> + (Hover for details) + </span> + </label> + <textarea id="user_prompt" + class="form-input h-20 resize-none" + placeholder="My friends' names are Saoirse, Azariah, and Caoimhe."></textarea> </div> - </div> - </section> - - <!-- Performance Settings --> - <section class="config-section"> - <h2 class="section-title">Performance Settings</h2> - <div> - <label for="transcription_loop_delay_ms" class="form-label">Transcription Loop Delay (ms)</label> - <input type="number" id="transcription_loop_delay_ms" min="0" value="100" class="form-input"> - </div> - </section> + </section> - <!-- Debug/Preview Settings --> - <section class="config-section"> - <h2 class="section-title">Debug/Preview Settings</h2> - <div class="space-y-3"> - <label for="enable_debug_mode" class="checkbox-label"> - <input type="checkbox" id="enable_debug_mode" class="mr-2"> - <span class="checkbox-text">Enable Debug Mode</span> - </label> - <label for="enable_previews" class="checkbox-label"> - <input type="checkbox" id="enable_previews" checked class="mr-2"> - <span class="checkbox-text">Enable Previews</span> - </label> - <label for="save_audio" class="checkbox-label"> - <input type="checkbox" id="save_audio" class="mr-2"> - <span class="checkbox-text">Save Audio Segments</span> - </label> - </div> - </section> - - <!-- Display Settings --> - <section class="config-section"> - <h2 class="section-title">Display Settings</h2> - <div class="grid grid-cols-2 gap-4"> + <!-- Performance Settings --> + <section class="config-section"> + <h2 class="section-title">Performance Settings</h2> <div> - <label for="block_width" class="form-label">Block Width</label> - <input type="number" id="block_width" min="1" value="2" class="form-input"> + <label for="transcription_loop_delay_ms" class="form-label">Transcription Loop Delay (ms)</label> + <input type="number" id="transcription_loop_delay_ms" min="0" value="100" class="form-input"> </div> - <div> - <label for="num_blocks" class="form-label">Number of Blocks</label> - <input type="number" id="num_blocks" min="1" value="40" class="form-input"> + </section> + + <!-- Debug/Preview Settings --> + <section class="config-section"> + <h2 class="section-title">Debug/Preview Settings</h2> + <div class="space-y-3"> + <label for="enable_debug_mode" class="checkbox-label"> + <input type="checkbox" id="enable_debug_mode" class="mr-2"> + <span class="checkbox-text">Enable Debug Mode</span> + </label> + <label for="enable_previews" class="checkbox-label"> + <input type="checkbox" id="enable_previews" checked class="mr-2"> + <span class="checkbox-text">Enable Previews</span> + </label> + <label for="save_audio" class="checkbox-label"> + <input type="checkbox" id="save_audio" class="mr-2"> + <span class="checkbox-text">Save Audio Segments</span> + </label> </div> - <div> - <label for="rows" class="form-label">Rows</label> - <input type="number" id="rows" min="1" value="10" class="form-input"> + </section> + + <!-- Display Settings --> + <section class="config-section"> + <h2 class="section-title">Custom Chatbox Settings</h2> + <div class="grid grid-cols-2 gap-4"> + <div> + <label for="block_width" class="form-label">Block Width</label> + <input type="number" id="block_width" min="1" value="2" class="form-input"> + </div> + <div> + <label for="num_blocks" class="form-label">Number of Blocks</label> + <input type="number" id="num_blocks" min="1" value="40" class="form-input"> + </div> + <div> + <label for="rows" class="form-label">Rows</label> + <input type="number" id="rows" min="1" value="10" class="form-input"> + </div> + <div> + <label for="cols" class="form-label">Columns</label> + <input type="number" id="cols" min="1" value="24" class="form-input"> + </div> </div> + </section> + + <!-- Configuration Settings --> + <section class="config-section"> + <h2 class="section-title">Configuration</h2> <div> - <label for="cols" class="form-label">Columns</label> - <input type="number" id="cols" min="1" value="24" class="form-input"> + <button type="button" id="reset-config" class="btn btn-blue w-full"> + Reset Config to Defaults + </button> </div> - </div> - </section> - </div> + </section> - <!-- Action Buttons --> - <div class="flex justify-between pb-6"> - <div class="space-x-3"> - <button type="button" id="setup-venv" class="btn btn-blue"> - Set up virtual environment - </button> - <button type="button" id="start-process" class="btn btn-green"> - Start - </button> - <button type="button" id="stop-process" class="btn btn-red" disabled> - Stop - </button> + <!-- Virtual Environment Settings --> + <section class="config-section"> + <h2 class="section-title">Virtual Environment</h2> + <div class="flex space-x-3"> + <button type="button" id="setup-venv" class="btn btn-blue flex-1"> + Setup venv + </button> + <button type="button" id="reset-venv" class="btn btn-blue flex-1"> + Reset venv + </button> + </div> + </section> </div> - </div> - </form> - <!-- Status Message --> - <div id="status-message" class="mt-6 p-4 rounded-md hidden"></div> + <!-- Action Buttons --> + <div class="pb-6"> + <div class="flex space-x-3"> + <button type="button" id="start-process" class="btn btn-green flex-1"> + Start + </button> + <button type="button" id="stop-process" class="btn btn-red flex-1" disabled> + Stop + </button> + </div> + </div> + </form> + + <!-- Status Message --> + <div id="status-message" class="mt-6 p-4 rounded-md hidden"></div> + </div> </div> <!-- Right Panel: Python Console --> <div class="flex-1 flex flex-col bg-gray-900 rounded-lg overflow-hidden"> - <div class="bg-gray-800 px-4 py-2 flex justify-between items-center"> - <button id="clear-console" class="text-gray-400 hover:text-white text-sm"> - Clear - </button> - </div> <div id="python-console" class="flex-1 overflow-y-auto p-4 font-mono text-sm"> <div id="console-content" class="text-gray-300 whitespace-pre-wrap"></div> </div> diff --git a/ui/index.js b/ui/index.js index a056156..2420ece 100644 --- a/ui/index.js +++ b/ui/index.js @@ -3,6 +3,7 @@ const path = require('node:path'); const fs = require('node:fs').promises; const yaml = require('js-yaml'); const { spawn } = require('child_process'); +const https = require('https'); const APP_ROOT = path.join(__dirname, '..'); const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml'); @@ -10,6 +11,20 @@ const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml'); let mainWindow; let runningProcess = null; // Track the running Python process +// Required DLL files for CUDA/cuDNN support +const REQUIRED_DLLS = [ + 'cublas64_12.dll', + 'cublasLt64_12.dll', + 'cudnn64_9.dll', + 'cudnn_adv64_9.dll', + 'cudnn_cnn64_9.dll', + 'cudnn_engines_precompiled64_9.dll', + 'cudnn_engines_runtime_compiled64_9.dll', + 'cudnn_graph64_9.dll', + 'cudnn_heuristic64_9.dll', + 'cudnn_ops64_9.dll' +]; + // Helper function to get the correct Python executable from venv function getVenvPython() { const venvPath = path.join(APP_ROOT, 'venv'); @@ -24,6 +39,78 @@ function sendPythonOutput(message, type = 'stdout') { } } +// Helper function to create environment with DLL path +function createPythonEnvironment() { + const dllPath = path.join(APP_ROOT, 'dll'); + const binPath = path.join(APP_ROOT, 'bin'); + const env = { ...process.env }; + env.PATH = `${dllPath};${binPath};${env.PATH}`; + env.HF_HUB_DISABLE_SYMLINKS_WARNING = '1'; + return env; +} + +// Helper function to download a file from URL +function downloadFile(url, outputPath) { + return new Promise((resolve, reject) => { + const file = require('fs').createWriteStream(outputPath); + + const request = https.get(url, (response) => { + if (response.statusCode === 200) { + response.pipe(file); + + file.on('finish', () => { + file.close(); + resolve(); + }); + + file.on('error', (err) => { + fs.unlink(outputPath).catch(() => {}); // Clean up on error + reject(err); + }); + } else { + file.close(); + fs.unlink(outputPath).catch(() => {}); // Clean up on error + reject(new Error(`Failed to download: HTTP ${response.statusCode}`)); + } + }); + + request.on('error', (err) => { + file.close(); + fs.unlink(outputPath).catch(() => {}); // Clean up on error + reject(err); + }); + }); +} + +// Helper function to setup process event handlers +function setupProcessHandlers(process) { + process.stdout.on('data', (data) => { + const text = data.toString(); + sendPythonOutput(text.trimEnd(), 'stdout'); + }); + + process.stderr.on('data', (data) => { + const text = data.toString(); + sendPythonOutput(text.trimEnd(), 'stderr'); + }); + + process.on('error', (error) => { + sendPythonOutput(`Process error: ${error.message}`, 'stderr'); + runningProcess = null; + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('process-stopped'); + } + }); + + process.on('close', (code) => { + sendPythonOutput(`Process exited with code ${code}`, 'info'); + runningProcess = null; + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('process-stopped'); + } + }); +} + // Helper function to execute Python commands using venv function executePythonCommand(args, options = {}) { return new Promise((resolve, reject) => { @@ -31,14 +118,9 @@ function executePythonCommand(args, options = {}) { const commandStr = `${path.basename(pythonPath)} ${args.join(' ')}`; sendPythonOutput(`> ${commandStr}`, 'info'); - // Add dll directory to PATH for Windows DLL loading - const dllPath = path.join(APP_ROOT, 'dll'); - const env = { ...process.env }; - env.PATH = `${dllPath};${env.PATH}`; - const spawnOptions = { ...options, - env + env: createPythonEnvironment() }; const pythonProcess = spawn(pythonPath, args, spawnOptions); @@ -78,6 +160,7 @@ function createWindow () { mainWindow = new BrowserWindow({ width: 1000, height: 800, + icon: path.join(APP_ROOT, 'Images', 'favicon.ico'), webPreferences: { preload: path.join(__dirname, 'preload.js'), contextIsolation: true, @@ -93,6 +176,7 @@ const DEFAULT_CONFIG = { compute_type: 'float16', enable_debug_mode: 0, enable_previews: 1, + user_prompt: 'Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc.', save_audio: 0, language: 'english', gpu_idx: 0, @@ -117,11 +201,11 @@ ipcMain.handle('load-config', async () => { } catch (error) { if (error.code === 'ENOENT') { // Config file doesn't exist, create it with defaults - console.log('Config file not found, creating with defaults...'); + console.error('Config file not found, creating with defaults...'); try { const yamlContent = yaml.dump(DEFAULT_CONFIG, { lineWidth: -1 }); await fs.writeFile(CONFIG_PATH, yamlContent, 'utf8'); - console.log('Created config.yaml with default values'); + console.error('Created config.yaml with default values'); return DEFAULT_CONFIG; } catch (writeError) { console.error('Error creating default config:', writeError); @@ -145,21 +229,138 @@ ipcMain.handle('save-config', async (event, config) => { } }); -ipcMain.handle('restart-app', () => { - app.relaunch(); - app.exit(); +ipcMain.handle('reset-config', async () => { + try { + // Check if the file exists first + try { + await fs.access(CONFIG_PATH); + // File exists, delete it + await fs.unlink(CONFIG_PATH); + console.error('Config file deleted successfully'); + return { success: true, message: 'Configuration reset to defaults' }; + } catch (error) { + if (error.code === 'ENOENT') { + // Config file doesn't exist, that's fine + return { success: true, message: 'Configuration already at defaults' }; + } + throw error; + } + } catch (error) { + console.error('Error resetting config:', error); + throw new Error(`Failed to reset configuration: ${error.message}`); + } }); -ipcMain.handle('install-requirements', async (event) => { +// Generic function to ensure required files are present +async function ensureRequiredFiles(config) { + const { + directoryName, + requiredFiles, + downloadBaseUrl, + resourceType + } = config; + + const targetPath = path.join(APP_ROOT, directoryName); + + try { + // Check if target directory exists, create it if not + try { + await fs.access(targetPath); + sendPythonOutput(`${resourceType} directory exists`, 'info'); + } catch (error) { + if (error.code === 'ENOENT') { + sendPythonOutput(`Creating ${resourceType} directory...`, 'info'); + await fs.mkdir(targetPath, { recursive: true }); + sendPythonOutput(`${resourceType} directory created`, 'info'); + } else { + throw error; + } + } + + // Check each required file + const missingFiles = []; + for (const fileName of requiredFiles) { + const filePath = path.join(targetPath, fileName); + try { + await fs.access(filePath); + sendPythonOutput(`✓ ${fileName} exists`, 'info'); + } catch (error) { + if (error.code === 'ENOENT') { + missingFiles.push(fileName); + sendPythonOutput(`✗ ${fileName} missing`, 'info'); + } else { + throw error; + } + } + } + + // Download missing files + if (missingFiles.length > 0) { + sendPythonOutput(`Downloading ${missingFiles.length} missing ${resourceType} file${missingFiles.length > 1 ? 's' : ''}...`, 'info'); + + for (const fileName of missingFiles) { + const filePath = path.join(targetPath, fileName); + const downloadUrl = `${downloadBaseUrl}/${fileName}`; + + try { + sendPythonOutput(`Downloading ${fileName}...`, 'info'); + await downloadFile(downloadUrl, filePath); + sendPythonOutput(`✓ Downloaded ${fileName}`, 'info'); + } catch (downloadError) { + sendPythonOutput(`✗ Failed to download ${fileName}: ${downloadError.message}`, 'stderr'); + throw new Error(`Failed to download ${fileName}: ${downloadError.message}`); + } + } + + sendPythonOutput(`All missing ${resourceType} files downloaded successfully`, 'info'); + } else { + sendPythonOutput(`All required ${resourceType} files are present`, 'info'); + } + + return { + success: true, + message: `${resourceType} setup complete. ${missingFiles.length} file${missingFiles.length > 1 ? 's' : ''} downloaded.`, + downloadedFiles: missingFiles + }; + } catch (error) { + console.error(`Error setting up ${resourceType} files:`, error); + throw new Error(`${resourceType} setup failed: ${error.message}`); + } +} + +// Update the install-requirements handler +ipcMain.handle('install-requirements', async () => { const requirementsPath = path.join(APP_ROOT, 'app', 'requirements.txt'); + const venvMarkerPath = path.join(APP_ROOT, '.venv_is_set_up'); try { + // Check if venv is already set up + try { + await fs.access(venvMarkerPath); + sendPythonOutput('Virtual environment already set up, skipping installation', 'info'); + return { success: true, message: 'Virtual environment already set up' }; + } catch (error) { + // Marker doesn't exist, proceed with setup + } + // Check if requirements.txt exists await fs.access(requirementsPath); - const result = await executePythonCommand(['-m', 'pip', 'install', '-r', requirementsPath]); + await executePythonCommand(['-m', 'pip', 'install', '-r', requirementsPath]); + + await ensureRequiredFiles({ + directoryName: 'dll', + requiredFiles: REQUIRED_DLLS, + downloadBaseUrl: 'https://yummers.dev/tastt/dll', + resourceType: 'DLL' + }); + + await fs.mkdir(path.join(APP_ROOT, 'Models'), { recursive: true }); + + await fs.writeFile(venvMarkerPath, new Date().toISOString(), 'utf8'); + sendPythonOutput('Created .venv_is_set_up marker file', 'info'); - return { success: true, message: 'Requirements installed successfully' }; + return { success: true, message: 'Requirements and dependencies installed successfully' }; } catch (error) { console.error('Error installing requirements:', error); if (error.code === 'ENOENT') { @@ -175,7 +376,6 @@ ipcMain.handle('get-microphones', async () => { try { const result = await executePythonCommand([scriptPath]); const microphones = JSON.parse(result.stdout.trim()); - console.log('Successfully retrieved microphones:', microphones); return microphones; } catch (error) { console.error('Failed to get microphones:', error); @@ -183,53 +383,135 @@ ipcMain.handle('get-microphones', async () => { } }); -// Add handlers for starting and stopping the process -ipcMain.handle('start-process', async () => { - if (runningProcess) { - throw new Error('Process is already running'); +// Helper function to safely delete directory contents +async function clearDirectory(dirPath, dirName) { + try { + await fs.access(dirPath); + sendPythonOutput(`Clearing ${dirName} directory...`, 'info'); + + const files = await fs.readdir(dirPath); + let deletedCount = 0; + + for (const file of files) { + const filePath = path.join(dirPath, file); + + try { + await fs.rm(filePath, { recursive: true, force: true }); + sendPythonOutput(`✗ Deleted file ${file}`, 'info'); + + deletedCount++; + } catch (deleteError) { + sendPythonOutput(`Warning: Could not delete ${file}: ${deleteError.message}`, 'stderr'); + // Continue with other files even if one fails + } + } + + sendPythonOutput(`${dirName} directory cleared`, 'info'); + return deletedCount; + } catch (error) { + if (error.code === 'ENOENT') { + sendPythonOutput(`${dirName} directory doesn't exist, skipping`, 'info'); + return 0; + } else { + sendPythonOutput(`Error clearing ${dirName} directory: ${error.message}`, 'stderr'); + throw error; + } } +} - const scriptPath = path.join(APP_ROOT, 'app', 'hi.py'); - const configPath = CONFIG_PATH; +ipcMain.handle('reset-venv', async () => { + const venvMarkerPath = path.join(APP_ROOT, '.venv_is_set_up'); try { - const pythonPath = getVenvPython(); - const args = [scriptPath, '--config', configPath]; + sendPythonOutput('Starting virtual environment reset...', 'info'); - sendPythonOutput(`Starting process: ${path.basename(pythonPath)} ${args.join(' ')}`, 'info'); + // Delete the venv marker file first + try { + await fs.unlink(venvMarkerPath); + sendPythonOutput('Deleted .venv_is_set_up marker file', 'info'); + } catch (error) { + if (error.code !== 'ENOENT') { + sendPythonOutput(`Warning: Could not delete marker file: ${error.message}`, 'stderr'); + } + } + + // Get list of installed packages + sendPythonOutput('Getting list of installed packages...', 'info'); + const freezeResult = await executePythonCommand(['-m', 'pip', 'freeze']); + const installedPackages = freezeResult.stdout.trim(); + + let uninstalledPackages = []; + + if (!installedPackages) { + sendPythonOutput('No packages found to uninstall', 'info'); + } else { + // Parse package names and filter out core packages + const packageLines = installedPackages.split('\n').filter(line => line.trim()); + const packageNames = packageLines + .map(line => line.split('==')[0].trim()) + .filter(name => name && !name.startsWith('#')); + + const corePackages = ['pip', 'setuptools', 'wheel']; + const packagesToUninstall = packageNames.filter(name => !corePackages.includes(name.toLowerCase())); + + if (packagesToUninstall.length === 0) { + sendPythonOutput('Only core packages found, nothing to uninstall', 'info'); + } else { + sendPythonOutput(`Uninstalling ${packagesToUninstall.length} packages...`, 'info'); + + const uninstallArgs = ['-m', 'pip', 'uninstall', '-y', ...packagesToUninstall]; + await executePythonCommand(uninstallArgs); + uninstalledPackages = packagesToUninstall; + } + } + + // Clear downloaded files + sendPythonOutput('Clearing downloaded files...', 'info'); - // Add dll directory to PATH for Windows DLL loading const dllPath = path.join(APP_ROOT, 'dll'); - const env = { ...process.env }; - env.PATH = `${dllPath};${env.PATH}`; + const modelsPath = path.join(APP_ROOT, 'Models'); + const binPath = path.join(APP_ROOT, 'bin'); - runningProcess = spawn(pythonPath, args, { env }); + const deletedDlls = await clearDirectory(dllPath, 'DLL'); + const deletedModels = await clearDirectory(modelsPath, 'Models'); + const deletedBins = await clearDirectory(binPath, 'Binary'); - runningProcess.stdout.on('data', (data) => { - const text = data.toString(); - sendPythonOutput(text.trimEnd(), 'stdout'); - }); + const totalDeletedFiles = deletedDlls + deletedModels + deletedBins; - runningProcess.stderr.on('data', (data) => { - const text = data.toString(); - sendPythonOutput(text.trimEnd(), 'stderr'); - }); + sendPythonOutput('Virtual environment reset successfully!', 'info'); - runningProcess.on('error', (error) => { - sendPythonOutput(`Process error: ${error.message}`, 'stderr'); - runningProcess = null; - if (mainWindow && !mainWindow.isDestroyed()) { - mainWindow.webContents.send('process-stopped'); + return { + success: true, + message: `Virtual environment reset complete. Uninstalled ${uninstalledPackages.length} packages and deleted ${totalDeletedFiles} downloaded files.`, + uninstalledPackages, + deletedFiles: { + dlls: deletedDlls, + models: deletedModels, + binaries: deletedBins, + total: totalDeletedFiles } - }); + }; + } catch (error) { + console.error('Error resetting virtual environment:', error); + throw new Error(`Virtual environment reset failed: ${error.message}`); + } +}); + +// Add handlers for starting and stopping the process +ipcMain.handle('start-process', async () => { + if (runningProcess) { + throw new Error('Process is already running'); + } + + const scriptPath = path.join(APP_ROOT, 'app', 'hi.py'); + const args = [scriptPath, '--config', CONFIG_PATH]; + + try { + const pythonPath = getVenvPython(); + sendPythonOutput(`Starting process: ${path.basename(pythonPath)} ${args.join(' ')}`, 'info'); - runningProcess.on('close', (code) => { - sendPythonOutput(`Process exited with code ${code}`, 'info'); - runningProcess = null; - if (mainWindow && !mainWindow.isDestroyed()) { - mainWindow.webContents.send('process-stopped'); - } - }); + runningProcess = spawn(pythonPath, args, { env: createPythonEnvironment() }); + setupProcessHandlers(runningProcess); return { success: true }; } catch (error) { @@ -243,7 +525,7 @@ ipcMain.handle('stop-process', async () => { throw new Error('No process is running'); } - return new Promise((resolve, reject) => { + return new Promise((resolve) => { let forcefullyKilled = false; // Set up a timeout to force kill after 10 seconds diff --git a/ui/package.json b/ui/package.json index fee2d67..3a58298 100644 --- a/ui/package.json +++ b/ui/package.json @@ -3,12 +3,85 @@ "version": "1.0.0", "description": "Speech-to-text tool for VRChat", "main": "index.js", + "homepage": "./", "scripts": { "start": "npm run build:css && electron .", "build:css": "tailwindcss -i ./src/components.css -o ./build/output.css", "watch:css": "tailwindcss -i ./src/components.css -o ./build/output.css --watch", "dev": "concurrently \"npm run watch:css\" \"electron .\"", - "test": "echo \"Error: no test specified\" && exit 1" + "test": "echo \"Error: no test specified\" && exit 1", + "dist": "npm run build:css && electron-builder", + "dist:win": "npm run build:css && electron-builder --win", + "dist:portable": "npm run build:css && electron-builder --win portable", + "dist:zip": "npm run build:css && electron-builder --win zip" + }, + "build": { + "appId": "com.yum_food.tastt", + "productName": "TaSTT", + "directories": { + "output": "dist" + }, + "files": [ + "**/*", + "!dist/**/*", + "!src/**/*", + "!node_modules/**/{CHANGELOG.md,README.md,README,readme.md,readme}", + "!node_modules/**/{test,__tests__,tests,powered-test,example,examples}", + "!node_modules/**/*.d.ts", + "!node_modules/.bin", + "!.git/**/*", + "!.gitignore" + ], + "extraResources": [ + { + "from": "../app", + "to": "app", + "filter": [ + "**/*.py", + "requirements.txt", + "!**/__pycache__/**/*" + ] + }, + { + "from": "../config.yaml", + "to": "config.yaml" + }, + { + "from": "../dll", + "to": "dll", + "filter": ["**/*"] + }, + { + "from": "../Images", + "to": "Images", + "filter": ["**/*"] + }, + { + "from": "../bin", + "to": "bin", + "filter": ["**/*"] + } + ], + "win": { + "icon": "../Images/logo.png", + "target": [ + { + "target": "portable", + "arch": ["x64"] + }, + { + "target": "zip", + "arch": ["x64"] + } + ] + }, + "portable": { + "artifactName": "${productName}-${version}-portable.exe" + }, + "nsis": { + "oneClick": false, + "allowToChangeInstallationDirectory": true + } }, "keywords": [], "author": "yum_food", @@ -22,6 +95,7 @@ "concurrently": "^9.1.2", "cross-env": "^7.0.3", "electron": "^36.3.2", + "electron-builder": "^25.1.8", "postcss": "^8.5.4", "tailwindcss": "^3.4.17", "vite": "^6.3.5", diff --git a/ui/preload.js b/ui/preload.js index e6c0623..35cc8d6 100644 --- a/ui/preload.js +++ b/ui/preload.js @@ -3,14 +3,13 @@ const { contextBridge, ipcRenderer } = require('electron'); contextBridge.exposeInMainWorld('electronAPI', { loadConfig: () => ipcRenderer.invoke('load-config'), saveConfig: (config) => ipcRenderer.invoke('save-config', config), - restartApp: () => ipcRenderer.invoke('restart-app'), + resetConfig: () => ipcRenderer.invoke('reset-config'), getMicrophones: () => ipcRenderer.invoke('get-microphones'), installRequirements: () => ipcRenderer.invoke('install-requirements'), + resetVenv: () => ipcRenderer.invoke('reset-venv'), startProcess: () => ipcRenderer.invoke('start-process'), stopProcess: () => ipcRenderer.invoke('stop-process'), onPythonOutput: (callback) => ipcRenderer.on('python-output', (event, data) => callback(data)), - onProcessStopped: (callback) => ipcRenderer.on('process-stopped', (event) => callback()) + onProcessStopped: (callback) => ipcRenderer.on('process-stopped', () => callback()) }); -console.log('Preload script loaded.'); - diff --git a/ui/renderer.js b/ui/renderer.js index b3f05a6..201eef6 100644 --- a/ui/renderer.js +++ b/ui/renderer.js @@ -1,99 +1,220 @@ -// Handle status messages +// Configuration and form field mappings +const CONFIG_FIELDS = { + // String fields + compute_type: { type: 'select', default: 'float16' }, + language: { type: 'select', default: 'english' }, + model: { type: 'select', default: 'turbo' }, + microphone: { type: 'number', default: 0 }, + user_prompt: { type: 'text', default: '' }, + + // Number fields + gpu_idx: { type: 'number', default: 0 }, + max_speech_duration_s: { type: 'number', default: 10 }, + min_silence_duration_ms: { type: 'number', default: 250 }, + reset_after_silence_s: { type: 'number', default: 15 }, + transcription_loop_delay_ms: { type: 'number', default: 100 }, + block_width: { type: 'number', default: 2 }, + num_blocks: { type: 'number', default: 40 }, + rows: { type: 'number', default: 10 }, + cols: { type: 'number', default: 24 }, + + // Boolean fields (stored as 1/0) + enable_debug_mode: { type: 'boolean', default: 0 }, + enable_previews: { type: 'boolean', default: 1 }, + save_audio: { type: 'boolean', default: 0 }, + use_cpu: { type: 'boolean', default: 0 } +}; + +// Button management system +class ButtonManager { + constructor() { + this.buttons = { + start: document.getElementById('start-process'), + stop: document.getElementById('stop-process'), + setupVenv: document.getElementById('setup-venv'), + resetVenv: document.getElementById('reset-venv'), + refreshMicrophones: document.getElementById('refresh-microphones') + }; + } + + setState(buttonName, disabled) { + const button = this.buttons[buttonName]; + if (!button) return; + + button.disabled = disabled; + if (disabled) { + button.classList.add('opacity-50', 'cursor-not-allowed'); + } else { + button.classList.remove('opacity-50', 'cursor-not-allowed'); + } + } + + setProcessRunning() { + this.setState('start', true); + this.setState('stop', false); + } + + setProcessStopped() { + this.setState('start', false); + this.setState('stop', true); + } + + async withButtonLoading(buttonName, asyncFn) { + this.setState(buttonName, true); + try { + return await asyncFn(); + } finally { + this.setState(buttonName, false); + } + } +} + +const buttonManager = new ButtonManager(); + +// Add loading overlay management +class LoadingOverlay { + constructor() { + this.overlay = document.getElementById('loading-overlay'); + this.form = document.getElementById('config-form'); + this.messageElement = this.overlay.querySelector('p'); + this.defaultMessage = 'Environment setup underway - please wait.'; + } + + show(message = null) { + this.messageElement.textContent = message || this.defaultMessage; + this.overlay.classList.remove('hidden'); + // Disable all form inputs and buttons in the entire left panel + const leftPanel = this.overlay.parentElement; + const inputs = leftPanel.querySelectorAll('input, select, textarea, button'); + inputs.forEach(input => { + input.disabled = true; + input.classList.add('opacity-50'); + }); + } + + hide() { + this.overlay.classList.add('hidden'); + // Re-enable all form inputs and buttons in the entire left panel + const leftPanel = this.overlay.parentElement; + const inputs = leftPanel.querySelectorAll('input, select, textarea, button'); + inputs.forEach(input => { + input.disabled = false; + input.classList.remove('opacity-50'); + }); + // Reset to default message + this.messageElement.textContent = this.defaultMessage; + } +} + +const loadingOverlay = new LoadingOverlay(); + +// Add a flag to prevent auto-save during programmatic updates +let isSettingValues = false; + +// Handle status messages with better color management function showStatus(message, type = 'info') { const statusEl = document.getElementById('status-message'); statusEl.textContent = message; - statusEl.classList.remove('hidden', 'bg-green-100', 'bg-red-100', 'bg-blue-100', 'text-green-800', 'text-red-800', 'text-blue-800'); - - if (type === 'success') { - statusEl.classList.add('bg-green-100', 'text-green-800'); - } else if (type === 'error') { - statusEl.classList.add('bg-red-100', 'text-red-800'); - } else { - statusEl.classList.add('bg-blue-100', 'text-blue-800'); - } + + // Remove all status classes + const statusClasses = ['hidden', 'bg-green-100', 'bg-red-100', 'bg-blue-100', 'text-green-800', 'text-red-800', 'text-blue-800']; + statusEl.classList.remove(...statusClasses); + + // Add appropriate classes based on type + const typeMap = { + success: ['bg-green-100', 'text-green-800'], + error: ['bg-red-100', 'text-red-800'], + info: ['bg-blue-100', 'text-blue-800'] + }; + + statusEl.classList.add(...(typeMap[type] || typeMap.info)); // Also log to console appendToConsole(message, type === 'error' ? 'stderr' : 'info'); - setTimeout(() => { - statusEl.classList.add('hidden'); - }, 5000); + setTimeout(() => statusEl.classList.add('hidden'), 5000); } -// Get form values +// Get form values using field mappings function getFormValues() { - const microphoneValue = document.getElementById('microphone').value; - // Convert to number if it's a numeric string (device index) - const microphoneForConfig = /^\d+$/.test(microphoneValue) ? parseInt(microphoneValue) : microphoneValue; - - return { - compute_type: document.getElementById('compute_type').value, - enable_debug_mode: document.getElementById('enable_debug_mode').checked ? 1 : 0, - enable_previews: document.getElementById('enable_previews').checked ? 1 : 0, - save_audio: document.getElementById('save_audio').checked ? 1 : 0, - language: document.getElementById('language').value, - gpu_idx: parseInt(document.getElementById('gpu_idx').value), - max_speech_duration_s: parseInt(document.getElementById('max_speech_duration_s').value), - min_silence_duration_ms: parseInt(document.getElementById('min_silence_duration_ms').value), - microphone: microphoneForConfig, - model: document.getElementById('model').value, - reset_after_silence_s: parseInt(document.getElementById('reset_after_silence_s').value), - transcription_loop_delay_ms: parseInt(document.getElementById('transcription_loop_delay_ms').value), - use_cpu: document.getElementById('use_cpu').checked ? 1 : 0, - block_width: parseInt(document.getElementById('block_width').value), - num_blocks: parseInt(document.getElementById('num_blocks').value), - rows: parseInt(document.getElementById('rows').value), - cols: parseInt(document.getElementById('cols').value) - }; + const config = {}; + + for (const [fieldName, fieldConfig] of Object.entries(CONFIG_FIELDS)) { + const element = document.getElementById(fieldName); + if (!element) continue; + + switch (fieldConfig.type) { + case 'boolean': + config[fieldName] = element.checked ? 1 : 0; + break; + case 'number': + config[fieldName] = parseInt(element.value) || fieldConfig.default; + break; + case 'text': + config[fieldName] = element.value || fieldConfig.default; + break; + default: + config[fieldName] = element.value || fieldConfig.default; + } + } + + return config; } -// Add a flag to prevent auto-save during programmatic updates -let isSettingValues = false; - -// Set form values +// Set form values using field mappings function setFormValues(config) { isSettingValues = true; // Disable auto-save temporarily - document.getElementById('compute_type').value = config.compute_type || 'int8'; - document.getElementById('enable_debug_mode').checked = config.enable_debug_mode === 1; - document.getElementById('enable_previews').checked = config.enable_previews === 1; - document.getElementById('save_audio').checked = config.save_audio === 1; - document.getElementById('language').value = config.language || 'english'; - document.getElementById('gpu_idx').value = config.gpu_idx || 0; - document.getElementById('max_speech_duration_s').value = config.max_speech_duration_s || 10; - document.getElementById('min_silence_duration_ms').value = config.min_silence_duration_ms || 250; - document.getElementById('microphone').value = config.microphone || 'motu'; - document.getElementById('model').value = config.model || 'turbo'; - document.getElementById('reset_after_silence_s').value = config.reset_after_silence_s || 15; - document.getElementById('transcription_loop_delay_ms').value = config.transcription_loop_delay_ms || 100; - document.getElementById('use_cpu').checked = config.use_cpu === 1; - document.getElementById('block_width').value = config.block_width || 2; - document.getElementById('num_blocks').value = config.num_blocks || 40; - document.getElementById('rows').value = config.rows || 10; - document.getElementById('cols').value = config.cols || 24; + for (const [fieldName, fieldConfig] of Object.entries(CONFIG_FIELDS)) { + const element = document.getElementById(fieldName); + if (!element) continue; + + const value = config[fieldName] ?? fieldConfig.default; + + switch (fieldConfig.type) { + case 'boolean': + element.checked = value === 1; + break; + case 'text': + element.value = value || ''; + break; + default: + element.value = value; + } + } isSettingValues = false; // Re-enable auto-save } -// Toggle advanced settings -document.getElementById('toggle-advanced').addEventListener('click', () => { - const advancedSettings = document.getElementById('advanced-settings'); - const chevron = document.getElementById('chevron'); - - if (advancedSettings.classList.contains('hidden')) { - advancedSettings.classList.remove('hidden'); - chevron.classList.add('rotate-90'); - } else { - advancedSettings.classList.add('hidden'); - chevron.classList.remove('rotate-90'); - } -}); +// Console management +const consoleContent = document.getElementById('console-content'); + +function appendToConsole(message, type = 'stdout') { + const timestamp = new Date().toLocaleTimeString(); + const timestampSpan = document.createElement('span'); + timestampSpan.className = 'console-timestamp'; + timestampSpan.textContent = `[${timestamp}] `; + + const messageSpan = document.createElement('span'); + messageSpan.className = `console-${type}`; + messageSpan.textContent = message; + + const lineDiv = document.createElement('div'); + lineDiv.appendChild(timestampSpan); + lineDiv.appendChild(messageSpan); + + consoleContent.appendChild(lineDiv); + + // Auto-scroll to bottom + const pythonConsole = document.getElementById('python-console'); + pythonConsole.scrollTop = pythonConsole.scrollHeight; +} -// Simplify button handlers by extracting common patterns +// Async action handler with better error handling async function handleAsyncAction(actionName, actionFn) { try { const result = await actionFn(); - if (result && result.message) { + if (result?.message) { showStatus(result.message, 'success'); } return result; @@ -103,36 +224,12 @@ async function handleAsyncAction(actionName, actionFn) { } } -// Process control buttons -const startButton = document.getElementById('start-process'); -const stopButton = document.getElementById('stop-process'); - -// Helper functions for button state management -function setButtonState(button, disabled) { - button.disabled = disabled; - if (disabled) { - button.classList.add('opacity-50', 'cursor-not-allowed'); - } else { - button.classList.remove('opacity-50', 'cursor-not-allowed'); - } -} - -function setProcessRunningState() { - setButtonState(startButton, true); - setButtonState(stopButton, false); -} - -function setProcessStoppedState() { - setButtonState(startButton, false); - setButtonState(stopButton, true); -} - // Auto-save functionality with debouncing let saveTimeout; -const SAVE_DELAY = 500; // milliseconds +const SAVE_DELAY = 500; async function autoSaveConfig() { - if (isSettingValues) return; // Don't save during programmatic updates + if (isSettingValues) return; clearTimeout(saveTimeout); saveTimeout = setTimeout(async () => { @@ -141,28 +238,19 @@ async function autoSaveConfig() { await window.electronAPI.saveConfig(config); showStatus('Configuration saved', 'success'); - // Check if process is running (stop button is enabled means process is running) - const stopButton = document.getElementById('stop-process'); - - if (!stopButton.disabled) { - // Process is running, restart it with new config + // Restart process if running + if (!buttonManager.buttons.stop.disabled) { appendToConsole('Restarting process with new configuration...', 'info'); try { await window.electronAPI.stopProcess(); - await new Promise(resolve => setTimeout(resolve, 1000)); - await window.electronAPI.startProcess(); - - // Update button states to reflect running process - setProcessRunningState(); - + buttonManager.setProcessRunning(); appendToConsole('Process restarted with new configuration', 'info'); } catch (error) { appendToConsole(`Failed to restart process: ${error.message}`, 'stderr'); - // Process is stopped, update button states - setProcessStoppedState(); + buttonManager.setProcessStopped(); } } } catch (error) { @@ -171,47 +259,32 @@ async function autoSaveConfig() { }, SAVE_DELAY); } -// Add event listeners to all form inputs for auto-save +// Auto-save setup function setupAutoSave() { - // Get all form inputs const form = document.getElementById('config-form'); - const inputs = form.querySelectorAll('input, select'); + const inputs = form.querySelectorAll('input, select, textarea'); - // Add change listener to each input inputs.forEach(input => { - if (input.type === 'checkbox') { - input.addEventListener('change', autoSaveConfig); - } else if (input.type === 'number' || input.type === 'text') { - input.addEventListener('input', autoSaveConfig); - } else if (input.tagName === 'SELECT') { - input.addEventListener('change', autoSaveConfig); - } + const eventType = input.type === 'checkbox' ? 'change' : + (input.type === 'number' || input.type === 'text' || input.tagName === 'TEXTAREA') ? 'input' : 'change'; + input.addEventListener(eventType, autoSaveConfig); }); } -// Update the setup-venv handler -document.getElementById('setup-venv').addEventListener('click', async () => { - const setupButton = document.getElementById('setup-venv'); - setupButton.disabled = true; - setupButton.classList.add('opacity-50', 'cursor-not-allowed'); - - try { - await handleAsyncAction('Install requirements', async () => { - return await window.electronAPI.installRequirements(); - }); - // Reload microphones after successful installation - await loadMicrophones(); - } finally { - setupButton.disabled = false; - setupButton.classList.remove('opacity-50', 'cursor-not-allowed'); - } -}); - -// Simplified microphone loading +// Microphone loading async function loadMicrophones() { const microphoneSelect = document.getElementById('microphone'); try { + // Check/install requirements during startup + appendToConsole('Checking virtual environment and requirements...', 'info'); + loadingOverlay.show('Setting up environment - this can take several minutes.'); + try { + await handleAsyncAction('Install requirements', () => window.electronAPI.installRequirements()); + } finally { + loadingOverlay.hide(); // Always hide overlay when done + } + appendToConsole('Loading available microphones...', 'info'); const microphones = await window.electronAPI.getMicrophones(); @@ -232,7 +305,7 @@ async function loadMicrophones() { appendToConsole(` - ${mic.name} (Device ${mic.index})`, 'stdout'); }); - // Restore previously selected microphone if possible + // Restore previously selected microphone try { const config = await window.electronAPI.loadConfig(); if (config.microphone) { @@ -248,11 +321,144 @@ async function loadMicrophones() { } } -// Update window load to include auto-save setup +// Event handlers setup +function setupEventHandlers() { + // Advanced settings toggle + document.getElementById('toggle-advanced').addEventListener('click', () => { + const advancedSettings = document.getElementById('advanced-settings'); + const chevron = document.getElementById('chevron'); + + if (advancedSettings.classList.contains('hidden')) { + advancedSettings.classList.remove('hidden'); + chevron.classList.add('rotate-90'); + } else { + advancedSettings.classList.add('hidden'); + chevron.classList.remove('rotate-90'); + } + }); + + // Setup virtual environment + document.getElementById('setup-venv').addEventListener('click', async () => { + loadingOverlay.show('Setting up virtual environment - please wait...'); // Show overlay with custom message + try { + await buttonManager.withButtonLoading('setupVenv', async () => { + await handleAsyncAction('Install requirements', () => window.electronAPI.installRequirements()); + }); + } finally { + loadingOverlay.hide(); // Always hide overlay when done + } + }); + + // Reset virtual environment + document.getElementById('reset-venv').addEventListener('click', async () => { + loadingOverlay.show('Resetting virtual environment - please wait...'); // Show overlay with custom message + try { + await buttonManager.withButtonLoading('resetVenv', async () => { + await handleAsyncAction('Reset virtual environment', () => window.electronAPI.resetVenv()); + }); + } finally { + loadingOverlay.hide(); // Always hide overlay when done + } + }); + + // Reset configuration + document.getElementById('reset-config').addEventListener('click', async () => { + const confirmReset = confirm('Are you sure you want to reset all settings to defaults? This cannot be undone.'); + if (!confirmReset) return; + + try { + // Stop process if running + const wasRunning = !buttonManager.buttons.stop.disabled; + if (wasRunning) { + appendToConsole('Stopping process before resetting configuration...', 'info'); + await window.electronAPI.stopProcess(); + buttonManager.setProcessStopped(); + await new Promise(resolve => setTimeout(resolve, 500)); + } + + // Reset configuration + appendToConsole('Resetting configuration to defaults...', 'info'); + const result = await window.electronAPI.resetConfig(); + + // Reload configuration with defaults + const config = await window.electronAPI.loadConfig(); + setFormValues(config); + + showStatus(result.message, 'success'); + appendToConsole('Configuration reset successfully', 'info'); + + // Restart process if it was running + if (wasRunning) { + appendToConsole('Restarting process with default configuration...', 'info'); + await window.electronAPI.startProcess(); + buttonManager.setProcessRunning(); + appendToConsole('Process restarted with default configuration', 'info'); + } + } catch (error) { + showStatus(`Failed to reset configuration: ${error.message}`, 'error'); + appendToConsole(`Failed to reset configuration: ${error.message}`, 'stderr'); + } + }); + + // Refresh microphones + document.getElementById('refresh-microphones').addEventListener('click', async () => { + await buttonManager.withButtonLoading('refreshMicrophones', async () => { + await loadMicrophones(); + }); + }); + + // Start process + document.getElementById('start-process').addEventListener('click', async () => { + buttonManager.setState('start', true); + + try { + // The installRequirements function will now check if venv is set up. + loadingOverlay.show('Verifying environment setup - please wait...'); // Show overlay with custom message + try { + await window.electronAPI.installRequirements(); + appendToConsole('Virtual environment setup checked/completed', 'info'); + } finally { + loadingOverlay.hide(); // Always hide overlay when done + } + + await window.electronAPI.startProcess(); + buttonManager.setProcessRunning(); + appendToConsole('Process started successfully', 'info'); + } catch (error) { + appendToConsole(`Failed to start process: ${error.message}`, 'stderr'); + buttonManager.setState('start', false); + } + }); + + // Stop process + document.getElementById('stop-process').addEventListener('click', async () => { + buttonManager.setState('stop', true); + + try { + await window.electronAPI.stopProcess(); + appendToConsole('Process stop initiated', 'info'); + } catch (error) { + appendToConsole(`Failed to stop process: ${error.message}`, 'stderr'); + buttonManager.setState('stop', false); + } + }); + + // Listen for process stopped event + window.electronAPI.onProcessStopped(() => { + buttonManager.setProcessStopped(); + }); +} + +// Initialize application window.addEventListener('load', async () => { appendToConsole('TaSTT Configuration UI initialized', 'info'); - // Load config first + // Set up Python output listener first so we capture all output + window.electronAPI.onPythonOutput((data) => { + appendToConsole(data.message, data.type); + }); + + // Load configuration try { const config = await window.electronAPI.loadConfig(); setFormValues(config); @@ -264,71 +470,7 @@ window.addEventListener('load', async () => { // Load microphones await loadMicrophones(); - // Set up auto-save after everything is loaded + // Setup event handlers and auto-save + setupEventHandlers(); setupAutoSave(); -}); - -// Console management -const consoleContent = document.getElementById('console-content'); - -function appendToConsole(message, type = 'stdout') { - const timestamp = new Date().toLocaleTimeString(); - const timestampSpan = document.createElement('span'); - timestampSpan.className = 'console-timestamp'; - timestampSpan.textContent = `[${timestamp}] `; - - const messageSpan = document.createElement('span'); - messageSpan.className = `console-${type}`; - messageSpan.textContent = message; - - const lineDiv = document.createElement('div'); - lineDiv.appendChild(timestampSpan); - lineDiv.appendChild(messageSpan); - - consoleContent.appendChild(lineDiv); - - // Auto-scroll to bottom - const pythonConsole = document.getElementById('python-console'); - pythonConsole.scrollTop = pythonConsole.scrollHeight; -} - -// Clear console button -document.getElementById('clear-console').addEventListener('click', () => { - consoleContent.innerHTML = ''; - appendToConsole('Console cleared', 'info'); -}); - -// Listen for Python output -window.electronAPI.onPythonOutput((data) => { - appendToConsole(data.message, data.type); -}); - -document.getElementById('start-process').addEventListener('click', async () => { - setButtonState(startButton, true); - - try { - await window.electronAPI.startProcess(); - setProcessRunningState(); - appendToConsole('Process started successfully', 'info'); - } catch (error) { - appendToConsole(`Failed to start process: ${error.message}`, 'stderr'); - setButtonState(startButton, false); - } -}); - -document.getElementById('stop-process').addEventListener('click', async () => { - setButtonState(stopButton, true); - - try { - const result = await window.electronAPI.stopProcess(); - appendToConsole('Process stop initiated', 'info'); - } catch (error) { - appendToConsole(`Failed to stop process: ${error.message}`, 'stderr'); - setButtonState(stopButton, false); - } -}); - -// Listen for process stopped event -window.electronAPI.onProcessStopped(() => { - setProcessStoppedState(); });
\ No newline at end of file diff --git a/ui/src/components.css b/ui/src/components.css index d8d909d..2832e12 100644 --- a/ui/src/components.css +++ b/ui/src/components.css @@ -46,6 +46,14 @@ .btn-red { @apply bg-red-600 text-white hover:bg-red-700 focus:ring-red-500; } + + .btn-purple { + @apply bg-purple-600 text-white hover:bg-purple-700 focus:ring-purple-500; + } + + .btn-orange { + @apply bg-orange-600 text-white hover:bg-orange-700 focus:ring-orange-500; + } } /* Console styling */ diff --git a/ui_design.md b/ui_design.md index 06eee65..e1ff095 100644 --- a/ui_design.md +++ b/ui_design.md @@ -10,7 +10,13 @@ $ choco uninstall nodejs -y $ choco install nodejs-lts -y ``` -Now open a non-admin PowerShell terminal: +To build the app: +``` +$ npm install +$ npm run dev +``` + +For posterity, this is how I set up the ui directory initially. In a non-admin PowerShell window: ```bash # Check your node and npm versions. @@ -30,3 +36,4 @@ npx tailwindcss init -p npm install --save-dev vue@3 @vitejs/plugin-vue vite yaml npm install --save-dev js-yaml ``` + |
