summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/stt.py72
-rw-r--r--config.yaml1
-rw-r--r--ui/.gitignore2
-rw-r--r--ui/config-schema.js1
-rw-r--r--ui/index.html6
-rw-r--r--ui/index.js28
-rw-r--r--ui/package.json35
7 files changed, 126 insertions, 19 deletions
diff --git a/app/stt.py b/app/stt.py
index 79ab0d1..f36de97 100644
--- a/app/stt.py
+++ b/app/stt.py
@@ -1,5 +1,6 @@
from datetime import datetime
from faster_whisper import WhisperModel
+import json
import langcodes
import numpy as np
import os
@@ -486,7 +487,8 @@ class Whisper:
# Build context-aware prompt
prompt = self._build_prompt()
- print(f"Prompt: {prompt}", flush=True)
+ if self.cfg["enable_debug_mode"]:
+ print(f"Prompt: {prompt}", flush=True)
t0 = time.time()
segments, info = self.model.transcribe(
@@ -578,16 +580,69 @@ def saveAudio(audio: bytes, path: str, cfg: typing.Dict):
wf.writeframes(audio)
+class SegmentLogger:
+ def __init__(self, cfg: typing.Dict):
+ self.cfg = cfg
+ self.enabled = cfg.get("enable_segment_logging", False)
+ self.session_data = []
+ self.log_file = None
+
+ if self.enabled:
+ log_dir = os.path.join(PROJECT_ROOT, "logs")
+ if not os.path.exists(log_dir):
+ os.makedirs(log_dir)
+
+ # Create file
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ self.log_file = os.path.join(log_dir, f"session_debug_{timestamp}.json")
+ print(f"Segment logging enabled. Logging to: {self.log_file}", file=sys.stderr)
+
+ def log_segment(self, segment: Segment, commit_type: str = "commit"):
+ if not self.enabled:
+ return
+
+ segment_data = {
+ "timestamp": datetime.now().isoformat(),
+ "type": commit_type,
+ "text": segment.transcript,
+ "start_ts": segment.start_ts,
+ "end_ts": segment.end_ts,
+ "wall_ts": segment.wall_ts,
+ "avg_logprob": segment.avg_logprob,
+ "no_speech_prob": segment.no_speech_prob,
+ "compression_ratio": segment.compression_ratio,
+ "duration": segment.end_ts - segment.start_ts
+ }
+
+ self.session_data.append(segment_data)
+
+ # Write to file incrementally
+ try:
+ with open(self.log_file, 'w') as f:
+ json.dump({
+ "session_start": self.session_data[0]["timestamp"] if self.session_data else None,
+ "segments": self.session_data
+ }, f, indent=2)
+ except Exception as e:
+ print(f"Error writing segment log: {e}", file=sys.stderr)
+
+ def close(self):
+ if self.enabled and self.session_data:
+ print(f"Session complete. Logged {len(self.session_data)} segments to {self.log_file}", file=sys.stderr)
+
+
class VadCommitter:
def __init__(self,
cfg: typing.Dict,
collector: AudioCollector,
whisper: Whisper,
- segmenter: AudioSegmenter):
+ segmenter: AudioSegmenter,
+ segment_logger: SegmentLogger = None):
self.cfg = cfg
self.collector = collector
self.whisper = whisper
self.segmenter = segmenter
+ self.segment_logger = segment_logger
def getDelta(self) -> TranscriptCommit:
audio = self.collector.getAudio()
@@ -618,6 +673,10 @@ class VadCommitter:
if delta.strip():
self.whisper.update_context(delta.strip())
+ if self.segment_logger:
+ for s in segments:
+ self.segment_logger.log_segment(s, "commit")
+
audio = self.collector.getAudio()
if self.cfg["enable_debug_mode"]:
for s in segments:
@@ -638,6 +697,10 @@ class VadCommitter:
segments = self.whisper.transcribe(audio)
preview = "".join(s.transcript for s in segments)
+ if self.segment_logger:
+ for s in segments:
+ self.segment_logger.log_segment(s, "preview")
+
if not has_audio:
self.collector.keepLast(1.0)
@@ -745,7 +808,9 @@ def transcriptionThread(shared_data: SharedThreadData):
segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"],
max_speech_s=shared_data.cfg["max_speech_duration_s"],
min_speech_duration_ms=shared_data.cfg["min_speech_duration_ms"])
- committer = VadCommitter(shared_data.cfg, collector, whisper, segmenter)
+
+ segment_logger = SegmentLogger(shared_data.cfg)
+ committer = VadCommitter(shared_data.cfg, collector, whisper, segmenter, segment_logger)
plugins = []
# plugins.append(TranslationPlugin(shared_data.cfg)) # Not implemented yet
@@ -839,4 +904,5 @@ def transcriptionThread(shared_data: SharedThreadData):
plugin.stop()
for filt in filters:
filt.stop()
+ segment_logger.close()
diff --git a/config.yaml b/config.yaml
index dfa2e1f..db25405 100644
--- a/config.yaml
+++ b/config.yaml
@@ -22,6 +22,7 @@ volume: 10
enable_debug_mode: 0
enable_previews: 1
save_audio: 1
+enable_segment_logging: 0
use_cpu: 0
enable_lowercase_filter: 0
enable_uppercase_filter: 0
diff --git a/ui/.gitignore b/ui/.gitignore
index 2109e19..c1dbe3c 100644
--- a/ui/.gitignore
+++ b/ui/.gitignore
@@ -1,3 +1,5 @@
build
node_modules
package-lock.json
+output.css
+dist
diff --git a/ui/config-schema.js b/ui/config-schema.js
index bf91fce..fb90f3f 100644
--- a/ui/config-schema.js
+++ b/ui/config-schema.js
@@ -29,6 +29,7 @@ const CONFIG_SCHEMA = {
enable_debug_mode: { type: 'boolean', default: 0 },
enable_previews: { type: 'boolean', default: 1 },
save_audio: { type: 'boolean', default: 0 },
+ enable_segment_logging: { type: 'boolean', default: 0 },
use_cpu: { type: 'boolean', default: 0 },
enable_lowercase_filter: { type: 'boolean', default: 0 },
enable_uppercase_filter: { type: 'boolean', default: 0 },
diff --git a/ui/index.html b/ui/index.html
index 19c41ce..29d4a78 100644
--- a/ui/index.html
+++ b/ui/index.html
@@ -4,7 +4,7 @@
<meta charset="UTF-8">
<meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'">
<title>TaSTT</title>
- <link rel="stylesheet" href="build/output.css">
+ <link rel="stylesheet" href="output.css">
</head>
<body class="bg-gray-100">
<div class="container-fluid px-6 py-6 h-screen flex flex-col">
@@ -214,6 +214,10 @@
<input type="checkbox" id="save_audio" class="mr-2">
<span class="checkbox-text">Save Audio Segments</span>
</label>
+ <label for="enable_segment_logging" class="checkbox-label">
+ <input type="checkbox" id="enable_segment_logging" class="mr-2">
+ <span class="checkbox-text">Log Segment Metadata (Debug)</span>
+ </label>
</div>
</section>
diff --git a/ui/index.js b/ui/index.js
index 5a5d0a6..afaaf7f 100644
--- a/ui/index.js
+++ b/ui/index.js
@@ -6,7 +6,12 @@ const { spawn } = require('child_process');
const https = require('https');
const { CONFIG_SCHEMA, getDefaultConfig } = require('./config-schema.js');
-const APP_ROOT = path.join(__dirname, '..');
+// Detect if we're running in development or production
+const isDev = !app.isPackaged;
+const APP_ROOT = isDev
+ ? path.join(__dirname, '..') // Development: go up from ui/ to project root
+ : process.resourcesPath; // Production: use Electron's resource path
+
const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml');
let mainWindow;
@@ -50,13 +55,32 @@ function createPythonEnvironment() {
return env;
}
-// Helper function to download a file from URL
+// Helper function to download a file from URL with progress
function downloadFile(url, outputPath) {
return new Promise((resolve, reject) => {
const file = require('fs').createWriteStream(outputPath);
+ const fileName = path.basename(outputPath);
const request = https.get(url, (response) => {
if (response.statusCode === 200) {
+ const totalSize = parseInt(response.headers['content-length'], 10);
+ let downloadedSize = 0;
+ let lastProgressTime = Date.now();
+
+ response.on('data', (chunk) => {
+ downloadedSize += chunk.length;
+
+ // Log progress every 5 seconds
+ const now = Date.now();
+ if (totalSize && (now - lastProgressTime >= 5000)) {
+ const progress = Math.round((downloadedSize / totalSize) * 100);
+ const mb = (downloadedSize / 1024 / 1024).toFixed(1);
+ const totalMb = (totalSize / 1024 / 1024).toFixed(1);
+ sendPythonOutput(`Downloading ${fileName}: ${mb}/${totalMb} MB (${progress}%)`, 'info');
+ lastProgressTime = now;
+ }
+ });
+
response.pipe(file);
file.on('finish', () => {
diff --git a/ui/package.json b/ui/package.json
index 3a58298..4742cd7 100644
--- a/ui/package.json
+++ b/ui/package.json
@@ -6,14 +6,16 @@
"homepage": "./",
"scripts": {
"start": "npm run build:css && electron .",
- "build:css": "tailwindcss -i ./src/components.css -o ./build/output.css",
- "watch:css": "tailwindcss -i ./src/components.css -o ./build/output.css --watch",
+ "build:css": "tailwindcss -i ./src/components.css -o ./output.css",
+ "watch:css": "tailwindcss -i ./src/components.css -o ./output.css --watch",
"dev": "concurrently \"npm run watch:css\" \"electron .\"",
"test": "echo \"Error: no test specified\" && exit 1",
- "dist": "npm run build:css && electron-builder",
- "dist:win": "npm run build:css && electron-builder --win",
- "dist:portable": "npm run build:css && electron-builder --win portable",
- "dist:zip": "npm run build:css && electron-builder --win zip"
+ "clean:meta": "node -e \"const fs=require('fs');const path=require('path');function deleteMeta(dir){fs.readdirSync(dir).forEach(f=>{const p=path.join(dir,f);if(f.endsWith('.meta'))fs.unlinkSync(p);else if(fs.statSync(p).isDirectory()&&!f.startsWith('.'))deleteMeta(p);})}deleteMeta('./node_modules')\"",
+ "prebuild": "node build_scripts/setup-empty-venv.js",
+ "dist": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder",
+ "dist:win": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win",
+ "dist:portable": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win portable",
+ "dist:zip": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win zip"
},
"build": {
"appId": "com.yum_food.tastt",
@@ -47,11 +49,6 @@
"to": "config.yaml"
},
{
- "from": "../dll",
- "to": "dll",
- "filter": ["**/*"]
- },
- {
"from": "../Images",
"to": "Images",
"filter": ["**/*"]
@@ -60,10 +57,20 @@
"from": "../bin",
"to": "bin",
"filter": ["**/*"]
+ },
+ {
+ "from": "../venv_clean",
+ "to": "venv",
+ "filter": ["**/*"]
+ },
+ {
+ "from": "../dll_empty",
+ "to": "dll",
+ "filter": ["**/*"]
}
],
"win": {
- "icon": "../Images/logo.png",
+ "icon": "../Images/favicon.ico",
"target": [
{
"target": "portable",
@@ -81,7 +88,9 @@
"nsis": {
"oneClick": false,
"allowToChangeInstallationDirectory": true
- }
+ },
+ "compression": "maximum",
+ "artifactName": "${productName}-${version}-${arch}.${ext}"
},
"keywords": [],
"author": "yum_food",