From 9bf33a4cad8196bfe7253c841ab5c35ffdbc0173 Mon Sep 17 00:00:00 2001 From: yum Date: Wed, 23 Jul 2025 19:05:15 -0700 Subject: add segment metadata logging feature Segment metadata can now be logged to a json as the app runs. The goal is to identify the params that heavily correlate with hallucinations. Also: * use 7zip for compression in build, speeding things up * log dll download progress every few seconds * shrink package --- app/stt.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++--- config.yaml | 1 + ui/.gitignore | 2 ++ ui/config-schema.js | 1 + ui/index.html | 6 ++++- ui/index.js | 28 +++++++++++++++++++-- ui/package.json | 35 ++++++++++++++++---------- 7 files changed, 126 insertions(+), 19 deletions(-) diff --git a/app/stt.py b/app/stt.py index 79ab0d1..f36de97 100644 --- a/app/stt.py +++ b/app/stt.py @@ -1,5 +1,6 @@ from datetime import datetime from faster_whisper import WhisperModel +import json import langcodes import numpy as np import os @@ -486,7 +487,8 @@ class Whisper: # Build context-aware prompt prompt = self._build_prompt() - print(f"Prompt: {prompt}", flush=True) + if self.cfg["enable_debug_mode"]: + print(f"Prompt: {prompt}", flush=True) t0 = time.time() segments, info = self.model.transcribe( @@ -578,16 +580,69 @@ def saveAudio(audio: bytes, path: str, cfg: typing.Dict): wf.writeframes(audio) +class SegmentLogger: + def __init__(self, cfg: typing.Dict): + self.cfg = cfg + self.enabled = cfg.get("enable_segment_logging", False) + self.session_data = [] + self.log_file = None + + if self.enabled: + log_dir = os.path.join(PROJECT_ROOT, "logs") + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + # Create file + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + self.log_file = os.path.join(log_dir, f"session_debug_{timestamp}.json") + print(f"Segment logging enabled. Logging to: {self.log_file}", file=sys.stderr) + + def log_segment(self, segment: Segment, commit_type: str = "commit"): + if not self.enabled: + return + + segment_data = { + "timestamp": datetime.now().isoformat(), + "type": commit_type, + "text": segment.transcript, + "start_ts": segment.start_ts, + "end_ts": segment.end_ts, + "wall_ts": segment.wall_ts, + "avg_logprob": segment.avg_logprob, + "no_speech_prob": segment.no_speech_prob, + "compression_ratio": segment.compression_ratio, + "duration": segment.end_ts - segment.start_ts + } + + self.session_data.append(segment_data) + + # Write to file incrementally + try: + with open(self.log_file, 'w') as f: + json.dump({ + "session_start": self.session_data[0]["timestamp"] if self.session_data else None, + "segments": self.session_data + }, f, indent=2) + except Exception as e: + print(f"Error writing segment log: {e}", file=sys.stderr) + + def close(self): + if self.enabled and self.session_data: + print(f"Session complete. Logged {len(self.session_data)} segments to {self.log_file}", file=sys.stderr) + + class VadCommitter: def __init__(self, cfg: typing.Dict, collector: AudioCollector, whisper: Whisper, - segmenter: AudioSegmenter): + segmenter: AudioSegmenter, + segment_logger: SegmentLogger = None): self.cfg = cfg self.collector = collector self.whisper = whisper self.segmenter = segmenter + self.segment_logger = segment_logger def getDelta(self) -> TranscriptCommit: audio = self.collector.getAudio() @@ -618,6 +673,10 @@ class VadCommitter: if delta.strip(): self.whisper.update_context(delta.strip()) + if self.segment_logger: + for s in segments: + self.segment_logger.log_segment(s, "commit") + audio = self.collector.getAudio() if self.cfg["enable_debug_mode"]: for s in segments: @@ -638,6 +697,10 @@ class VadCommitter: segments = self.whisper.transcribe(audio) preview = "".join(s.transcript for s in segments) + if self.segment_logger: + for s in segments: + self.segment_logger.log_segment(s, "preview") + if not has_audio: self.collector.keepLast(1.0) @@ -745,7 +808,9 @@ def transcriptionThread(shared_data: SharedThreadData): segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"], max_speech_s=shared_data.cfg["max_speech_duration_s"], min_speech_duration_ms=shared_data.cfg["min_speech_duration_ms"]) - committer = VadCommitter(shared_data.cfg, collector, whisper, segmenter) + + segment_logger = SegmentLogger(shared_data.cfg) + committer = VadCommitter(shared_data.cfg, collector, whisper, segmenter, segment_logger) plugins = [] # plugins.append(TranslationPlugin(shared_data.cfg)) # Not implemented yet @@ -839,4 +904,5 @@ def transcriptionThread(shared_data: SharedThreadData): plugin.stop() for filt in filters: filt.stop() + segment_logger.close() diff --git a/config.yaml b/config.yaml index dfa2e1f..db25405 100644 --- a/config.yaml +++ b/config.yaml @@ -22,6 +22,7 @@ volume: 10 enable_debug_mode: 0 enable_previews: 1 save_audio: 1 +enable_segment_logging: 0 use_cpu: 0 enable_lowercase_filter: 0 enable_uppercase_filter: 0 diff --git a/ui/.gitignore b/ui/.gitignore index 2109e19..c1dbe3c 100644 --- a/ui/.gitignore +++ b/ui/.gitignore @@ -1,3 +1,5 @@ build node_modules package-lock.json +output.css +dist diff --git a/ui/config-schema.js b/ui/config-schema.js index bf91fce..fb90f3f 100644 --- a/ui/config-schema.js +++ b/ui/config-schema.js @@ -29,6 +29,7 @@ const CONFIG_SCHEMA = { enable_debug_mode: { type: 'boolean', default: 0 }, enable_previews: { type: 'boolean', default: 1 }, save_audio: { type: 'boolean', default: 0 }, + enable_segment_logging: { type: 'boolean', default: 0 }, use_cpu: { type: 'boolean', default: 0 }, enable_lowercase_filter: { type: 'boolean', default: 0 }, enable_uppercase_filter: { type: 'boolean', default: 0 }, diff --git a/ui/index.html b/ui/index.html index 19c41ce..29d4a78 100644 --- a/ui/index.html +++ b/ui/index.html @@ -4,7 +4,7 @@ TaSTT - +
@@ -214,6 +214,10 @@ Save Audio Segments +
diff --git a/ui/index.js b/ui/index.js index 5a5d0a6..afaaf7f 100644 --- a/ui/index.js +++ b/ui/index.js @@ -6,7 +6,12 @@ const { spawn } = require('child_process'); const https = require('https'); const { CONFIG_SCHEMA, getDefaultConfig } = require('./config-schema.js'); -const APP_ROOT = path.join(__dirname, '..'); +// Detect if we're running in development or production +const isDev = !app.isPackaged; +const APP_ROOT = isDev + ? path.join(__dirname, '..') // Development: go up from ui/ to project root + : process.resourcesPath; // Production: use Electron's resource path + const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml'); let mainWindow; @@ -50,13 +55,32 @@ function createPythonEnvironment() { return env; } -// Helper function to download a file from URL +// Helper function to download a file from URL with progress function downloadFile(url, outputPath) { return new Promise((resolve, reject) => { const file = require('fs').createWriteStream(outputPath); + const fileName = path.basename(outputPath); const request = https.get(url, (response) => { if (response.statusCode === 200) { + const totalSize = parseInt(response.headers['content-length'], 10); + let downloadedSize = 0; + let lastProgressTime = Date.now(); + + response.on('data', (chunk) => { + downloadedSize += chunk.length; + + // Log progress every 5 seconds + const now = Date.now(); + if (totalSize && (now - lastProgressTime >= 5000)) { + const progress = Math.round((downloadedSize / totalSize) * 100); + const mb = (downloadedSize / 1024 / 1024).toFixed(1); + const totalMb = (totalSize / 1024 / 1024).toFixed(1); + sendPythonOutput(`Downloading ${fileName}: ${mb}/${totalMb} MB (${progress}%)`, 'info'); + lastProgressTime = now; + } + }); + response.pipe(file); file.on('finish', () => { diff --git a/ui/package.json b/ui/package.json index 3a58298..4742cd7 100644 --- a/ui/package.json +++ b/ui/package.json @@ -6,14 +6,16 @@ "homepage": "./", "scripts": { "start": "npm run build:css && electron .", - "build:css": "tailwindcss -i ./src/components.css -o ./build/output.css", - "watch:css": "tailwindcss -i ./src/components.css -o ./build/output.css --watch", + "build:css": "tailwindcss -i ./src/components.css -o ./output.css", + "watch:css": "tailwindcss -i ./src/components.css -o ./output.css --watch", "dev": "concurrently \"npm run watch:css\" \"electron .\"", "test": "echo \"Error: no test specified\" && exit 1", - "dist": "npm run build:css && electron-builder", - "dist:win": "npm run build:css && electron-builder --win", - "dist:portable": "npm run build:css && electron-builder --win portable", - "dist:zip": "npm run build:css && electron-builder --win zip" + "clean:meta": "node -e \"const fs=require('fs');const path=require('path');function deleteMeta(dir){fs.readdirSync(dir).forEach(f=>{const p=path.join(dir,f);if(f.endsWith('.meta'))fs.unlinkSync(p);else if(fs.statSync(p).isDirectory()&&!f.startsWith('.'))deleteMeta(p);})}deleteMeta('./node_modules')\"", + "prebuild": "node build_scripts/setup-empty-venv.js", + "dist": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder", + "dist:win": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win", + "dist:portable": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win portable", + "dist:zip": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win zip" }, "build": { "appId": "com.yum_food.tastt", @@ -46,11 +48,6 @@ "from": "../config.yaml", "to": "config.yaml" }, - { - "from": "../dll", - "to": "dll", - "filter": ["**/*"] - }, { "from": "../Images", "to": "Images", @@ -60,10 +57,20 @@ "from": "../bin", "to": "bin", "filter": ["**/*"] + }, + { + "from": "../venv_clean", + "to": "venv", + "filter": ["**/*"] + }, + { + "from": "../dll_empty", + "to": "dll", + "filter": ["**/*"] } ], "win": { - "icon": "../Images/logo.png", + "icon": "../Images/favicon.ico", "target": [ { "target": "portable", @@ -81,7 +88,9 @@ "nsis": { "oneClick": false, "allowToChangeInstallationDirectory": true - } + }, + "compression": "maximum", + "artifactName": "${productName}-${version}-${arch}.${ext}" }, "keywords": [], "author": "yum_food", -- cgit v1.2.3