From 1ede199387c072a85e8757a6aaec04d2c7cdeba4 Mon Sep 17 00:00:00 2001 From: yum Date: Thu, 29 May 2025 15:56:51 -0700 Subject: Add basic electron+tailwind hello world --- ui/.gitignore | 3 +++ ui/index.html | 20 ++++++++++++++++++++ ui/index.js | 29 +++++++++++++++++++++++++++++ ui/package.json | 24 ++++++++++++++++++++++++ ui/postcss.config.js | 6 ++++++ ui/preload.js | 7 +++++++ ui/src/input.css | 3 +++ ui/tailwind.config.js | 12 ++++++++++++ 8 files changed, 104 insertions(+) create mode 100644 ui/.gitignore create mode 100644 ui/index.html create mode 100644 ui/index.js create mode 100644 ui/package.json create mode 100644 ui/postcss.config.js create mode 100644 ui/preload.js create mode 100644 ui/src/input.css create mode 100644 ui/tailwind.config.js (limited to 'ui') diff --git a/ui/.gitignore b/ui/.gitignore new file mode 100644 index 0000000..2109e19 --- /dev/null +++ b/ui/.gitignore @@ -0,0 +1,3 @@ +build +node_modules +package-lock.json diff --git a/ui/index.html b/ui/index.html new file mode 100644 index 0000000..240e6ca --- /dev/null +++ b/ui/index.html @@ -0,0 +1,20 @@ + + + + + + Hello World! + + +
+

+ Hello World! +

+

+ Welcome to your Electron app with Tailwind CSS! +

+
+ + + + diff --git a/ui/index.js b/ui/index.js new file mode 100644 index 0000000..9751fb2 --- /dev/null +++ b/ui/index.js @@ -0,0 +1,29 @@ +const { app, BrowserWindow, ipcMain } = require('electron'); +const path = require('node:path'); + +function createWindow () { + const mainWindow = new BrowserWindow({ + width: 800, + height: 600, + webPreferences: { + preload: path.join(__dirname, 'preload.js'), + contextIsolation: true, + nodeIntegration: false + } + }); + + mainWindow.loadFile('index.html'); +} + +app.whenReady().then(() => { + createWindow(); + + app.on('activate', function () { + if (BrowserWindow.getAllWindows().length === 0) createWindow(); + }); +}); + +app.on('window-all-closed', function () { + if (process.platform !== 'darwin') app.quit(); +}); + diff --git a/ui/package.json b/ui/package.json new file mode 100644 index 0000000..1c56341 --- /dev/null +++ b/ui/package.json @@ -0,0 +1,24 @@ +{ + "name": "TaSTT", + "version": "1.0.0", + "description": "Speech-to-text tool for VRChat", + "main": "index.js", + "scripts": { + "start": "npm run build:css && electron .", + "build:css": "tailwindcss -i ./src/input.css -o ./build/output.css", + "watch:css": "tailwindcss -i ./src/input.css -o ./build/output.css --watch", + "dev": "npm run watch:css & electron .", + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "yum_food", + "license": "MIT", + "devDependencies": { + "autoprefixer": "^10.4.21", + "concurrently": "^9.1.2", + "cross-env": "^7.0.3", + "electron": "^36.3.2", + "postcss": "^8.5.4", + "tailwindcss": "^3.4.17" + } +} diff --git a/ui/postcss.config.js b/ui/postcss.config.js new file mode 100644 index 0000000..33ad091 --- /dev/null +++ b/ui/postcss.config.js @@ -0,0 +1,6 @@ +module.exports = { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} diff --git a/ui/preload.js b/ui/preload.js new file mode 100644 index 0000000..9f87d19 --- /dev/null +++ b/ui/preload.js @@ -0,0 +1,7 @@ +const { contextBridge, ipcRenderer } = require('electron'); + +contextBridge.exposeInMainWorld('electronAPI', { +}); + +console.log('Preload script loaded.'); + diff --git a/ui/src/input.css b/ui/src/input.css new file mode 100644 index 0000000..b5c61c9 --- /dev/null +++ b/ui/src/input.css @@ -0,0 +1,3 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; diff --git a/ui/tailwind.config.js b/ui/tailwind.config.js new file mode 100644 index 0000000..fa93053 --- /dev/null +++ b/ui/tailwind.config.js @@ -0,0 +1,12 @@ +/** @type {import('tailwindcss').Config} */ +module.exports = { + content: [ + "./index.html", + "./src/**/*.{html,js}", + ], + theme: { + extend: {}, + }, + plugins: [], +} + -- cgit v1.2.3 From 82a5b3805b2a54faea501ee362419330664c277a Mon Sep 17 00:00:00 2001 From: yum Date: Thu, 29 May 2025 17:23:09 -0700 Subject: Begin roughing out STT UI HEAVILY VIBE CODED! --- ui/index.html | 189 +++++++++++++++++++++++++++++++++++--- ui/index.js | 155 ++++++++++++++++++++++++++++++- ui/package.json | 14 ++- ui/preload.js | 6 ++ ui/renderer.js | 249 ++++++++++++++++++++++++++++++++++++++++++++++++++ ui/src/components.css | 110 ++++++++++++++++++++++ ui/tailwind.config.js | 5 +- 7 files changed, 708 insertions(+), 20 deletions(-) create mode 100644 ui/renderer.js create mode 100644 ui/src/components.css (limited to 'ui') diff --git a/ui/index.html b/ui/index.html index 240e6ca..14cc354 100644 --- a/ui/index.html +++ b/ui/index.html @@ -3,18 +3,185 @@ - Hello World! - - -
-

- Hello World! -

-

- Welcome to your Electron app with Tailwind CSS! -

+ TaSTT + + + +
+

TaSTT

+ +
+ +
+
+ +
+
+
+ + +
+
+ + +
+
+ + +
+
+
+ + + + + + + + +
+ +
+
+ + + +
+ + +
+
+

Python Output

+ +
+
+
+
+
+
- + + diff --git a/ui/index.js b/ui/index.js index 9751fb2..0a7fdf9 100644 --- a/ui/index.js +++ b/ui/index.js @@ -1,10 +1,71 @@ const { app, BrowserWindow, ipcMain } = require('electron'); const path = require('node:path'); +const fs = require('node:fs').promises; +const yaml = require('js-yaml'); +const { spawn } = require('child_process'); + +let mainWindow; + +// Helper function to get the correct Python executable from venv +function getVenvPython() { + const venvPath = path.join(__dirname, '..', 'venv'); + const isWindows = process.platform === 'win32'; + const pythonExecutable = isWindows ? 'python.exe' : 'python'; + const pythonPath = path.join(venvPath, isWindows ? 'Scripts' : 'bin', pythonExecutable); + return pythonPath; +} + +// Helper function to send Python output to renderer +function sendPythonOutput(message, type = 'stdout') { + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('python-output', { message, type }); + } +} + +// Helper function to execute Python commands using venv +function executePythonCommand(args, options = {}) { + return new Promise((resolve, reject) => { + const pythonPath = getVenvPython(); + const commandStr = `${path.basename(pythonPath)} ${args.join(' ')}`; + sendPythonOutput(`> ${commandStr}`, 'info'); + + const pythonProcess = spawn(pythonPath, args, options); + + let stdout = ''; + let stderr = ''; + + pythonProcess.stdout.on('data', (data) => { + const text = data.toString(); + stdout += text; + sendPythonOutput(text.trimEnd(), 'stdout'); + }); + + pythonProcess.stderr.on('data', (data) => { + const text = data.toString(); + stderr += text; + sendPythonOutput(text.trimEnd(), 'stderr'); + }); + + pythonProcess.on('error', (error) => { + sendPythonOutput(`Failed to start Python process: ${error.message}`, 'stderr'); + reject({ error: error.message, stdout, stderr }); + }); + + pythonProcess.on('close', (code) => { + if (code !== 0) { + sendPythonOutput(`Process exited with code ${code}`, 'stderr'); + reject({ code, stdout, stderr }); + } else { + resolve({ stdout, stderr }); + } + }); + }); +} function createWindow () { - const mainWindow = new BrowserWindow({ - width: 800, - height: 600, + mainWindow = new BrowserWindow({ + width: 1000, + height: 800, webPreferences: { preload: path.join(__dirname, 'preload.js'), contextIsolation: true, @@ -15,6 +76,94 @@ function createWindow () { mainWindow.loadFile('index.html'); } +// Path to config.yaml (one level up from ui directory) +const configPath = path.join(__dirname, '..', 'config.yaml'); + +// IPC handlers +ipcMain.handle('load-config', async () => { + try { + const fileContent = await fs.readFile(configPath, 'utf8'); + return yaml.load(fileContent); + } catch (error) { + console.error('Error loading config:', error); + throw error; + } +}); + +ipcMain.handle('save-config', async (event, config) => { + try { + const yamlContent = yaml.dump(config, { lineWidth: -1 }); + await fs.writeFile(configPath, yamlContent, 'utf8'); + return { success: true }; + } catch (error) { + console.error('Error saving config:', error); + throw error; + } +}); + +ipcMain.handle('restart-app', () => { + app.relaunch(); + app.exit(); +}); + +ipcMain.handle('install-requirements', async (event) => { + const requirementsPath = path.join(__dirname, '..', 'app', 'requirements.txt'); + + try { + // Check if requirements.txt exists + await fs.access(requirementsPath); + + const result = await executePythonCommand(['-m', 'pip', 'install', '-r', requirementsPath]); + + return { success: true, message: 'Requirements installed successfully' }; + } catch (error) { + console.error('Error installing requirements:', error); + if (error.code === 'ENOENT') { + throw new Error('requirements.txt not found'); + } + throw new Error(`Installation failed: ${error.stderr || error.error || 'Unknown error'}`); + } +}); + +ipcMain.handle('get-microphones', async () => { + const pythonScript = ` +import pyaudio +import json +import sys + +try: + p = pyaudio.PyAudio() + info = p.get_host_api_info_by_index(0) + numdevices = info.get('deviceCount') + + microphones = [] + for i in range(0, numdevices): + device_info = p.get_device_info_by_host_api_device_index(0, i) + if device_info.get('maxInputChannels') > 0: + microphones.append({ + 'index': i, + 'name': device_info.get('name'), + 'defaultSampleRate': device_info.get('defaultSampleRate') + }) + + print(json.dumps(microphones)) + p.terminate() +except Exception as e: + print(json.dumps({'error': str(e)}), file=sys.stderr) + sys.exit(1) +`; + + try { + const result = await executePythonCommand(['-c', pythonScript]); + const microphones = JSON.parse(result.stdout.trim()); + console.log('Successfully retrieved microphones:', microphones); + return microphones; + } catch (error) { + console.error('Failed to get microphones:', error); + throw new Error(`Failed to get microphones: ${error.stderr || error.error || 'Unknown error'}`); + } +}); + app.whenReady().then(() => { createWindow(); diff --git a/ui/package.json b/ui/package.json index 1c56341..fee2d67 100644 --- a/ui/package.json +++ b/ui/package.json @@ -5,20 +5,26 @@ "main": "index.js", "scripts": { "start": "npm run build:css && electron .", - "build:css": "tailwindcss -i ./src/input.css -o ./build/output.css", - "watch:css": "tailwindcss -i ./src/input.css -o ./build/output.css --watch", - "dev": "npm run watch:css & electron .", + "build:css": "tailwindcss -i ./src/components.css -o ./build/output.css", + "watch:css": "tailwindcss -i ./src/components.css -o ./build/output.css --watch", + "dev": "concurrently \"npm run watch:css\" \"electron .\"", "test": "echo \"Error: no test specified\" && exit 1" }, "keywords": [], "author": "yum_food", "license": "MIT", + "dependencies": { + "js-yaml": "^4.1.0" + }, "devDependencies": { + "@vitejs/plugin-vue": "^5.2.4", "autoprefixer": "^10.4.21", "concurrently": "^9.1.2", "cross-env": "^7.0.3", "electron": "^36.3.2", "postcss": "^8.5.4", - "tailwindcss": "^3.4.17" + "tailwindcss": "^3.4.17", + "vite": "^6.3.5", + "vue": "^3.5.16" } } diff --git a/ui/preload.js b/ui/preload.js index 9f87d19..108bffe 100644 --- a/ui/preload.js +++ b/ui/preload.js @@ -1,6 +1,12 @@ const { contextBridge, ipcRenderer } = require('electron'); contextBridge.exposeInMainWorld('electronAPI', { + loadConfig: () => ipcRenderer.invoke('load-config'), + saveConfig: (config) => ipcRenderer.invoke('save-config', config), + restartApp: () => ipcRenderer.invoke('restart-app'), + getMicrophones: () => ipcRenderer.invoke('get-microphones'), + installRequirements: () => ipcRenderer.invoke('install-requirements'), + onPythonOutput: (callback) => ipcRenderer.on('python-output', (event, data) => callback(data)) }); console.log('Preload script loaded.'); diff --git a/ui/renderer.js b/ui/renderer.js new file mode 100644 index 0000000..83c652c --- /dev/null +++ b/ui/renderer.js @@ -0,0 +1,249 @@ +// Handle status messages +function showStatus(message, type = 'info') { + const statusEl = document.getElementById('status-message'); + statusEl.textContent = message; + statusEl.classList.remove('hidden', 'bg-green-100', 'bg-red-100', 'bg-blue-100', 'text-green-800', 'text-red-800', 'text-blue-800'); + + if (type === 'success') { + statusEl.classList.add('bg-green-100', 'text-green-800'); + } else if (type === 'error') { + statusEl.classList.add('bg-red-100', 'text-red-800'); + } else { + statusEl.classList.add('bg-blue-100', 'text-blue-800'); + } + + // Also log to console + appendToConsole(message, type === 'error' ? 'stderr' : 'info'); + + setTimeout(() => { + statusEl.classList.add('hidden'); + }, 5000); +} + +// Get form values +function getFormValues() { + return { + compute_type: document.getElementById('compute_type').value, + enable_debug_mode: document.getElementById('enable_debug_mode').checked ? 1 : 0, + enable_previews: document.getElementById('enable_previews').checked ? 1 : 0, + language: document.getElementById('language').value, + gpu_idx: parseInt(document.getElementById('gpu_idx').value), + max_speech_duration_s: parseInt(document.getElementById('max_speech_duration_s').value), + min_silence_duration_ms: parseInt(document.getElementById('min_silence_duration_ms').value), + microphone: document.getElementById('microphone').value, + model: document.getElementById('model').value, + reset_after_silence_s: parseInt(document.getElementById('reset_after_silence_s').value), + transcription_loop_delay_ms: parseInt(document.getElementById('transcription_loop_delay_ms').value), + use_cpu: document.getElementById('use_cpu').checked ? 1 : 0, + block_width: parseInt(document.getElementById('block_width').value), + num_blocks: parseInt(document.getElementById('num_blocks').value), + rows: parseInt(document.getElementById('rows').value), + cols: parseInt(document.getElementById('cols').value) + }; +} + +// Add a flag to prevent auto-save during programmatic updates +let isSettingValues = false; + +// Set form values +function setFormValues(config) { + isSettingValues = true; // Disable auto-save temporarily + + document.getElementById('compute_type').value = config.compute_type || 'int8'; + document.getElementById('enable_debug_mode').checked = config.enable_debug_mode === 1; + document.getElementById('enable_previews').checked = config.enable_previews === 1; + document.getElementById('language').value = config.language || 'english'; + document.getElementById('gpu_idx').value = config.gpu_idx || 0; + document.getElementById('max_speech_duration_s').value = config.max_speech_duration_s || 10; + document.getElementById('min_silence_duration_ms').value = config.min_silence_duration_ms || 250; + document.getElementById('microphone').value = config.microphone || 'motu'; + document.getElementById('model').value = config.model || 'turbo'; + document.getElementById('reset_after_silence_s').value = config.reset_after_silence_s || 15; + document.getElementById('transcription_loop_delay_ms').value = config.transcription_loop_delay_ms || 100; + document.getElementById('use_cpu').checked = config.use_cpu === 1; + document.getElementById('block_width').value = config.block_width || 2; + document.getElementById('num_blocks').value = config.num_blocks || 40; + document.getElementById('rows').value = config.rows || 10; + document.getElementById('cols').value = config.cols || 24; + + isSettingValues = false; // Re-enable auto-save +} + +// Toggle advanced settings +document.getElementById('toggle-advanced').addEventListener('click', () => { + const advancedSettings = document.getElementById('advanced-settings'); + const chevron = document.getElementById('chevron'); + + if (advancedSettings.classList.contains('hidden')) { + advancedSettings.classList.remove('hidden'); + chevron.classList.add('rotate-90'); + } else { + advancedSettings.classList.add('hidden'); + chevron.classList.remove('rotate-90'); + } +}); + +// Simplify button handlers by extracting common patterns +async function handleAsyncAction(actionName, actionFn) { + try { + const result = await actionFn(); + if (result && result.message) { + showStatus(result.message, 'success'); + } + return result; + } catch (error) { + showStatus(`${actionName} failed: ${error.message}`, 'error'); + throw error; + } +} + +// Auto-save functionality with debouncing +let saveTimeout; +const SAVE_DELAY = 500; // milliseconds + +async function autoSaveConfig() { + if (isSettingValues) return; // Don't save during programmatic updates + + clearTimeout(saveTimeout); + saveTimeout = setTimeout(async () => { + try { + const config = getFormValues(); + await window.electronAPI.saveConfig(config); + showStatus('Configuration saved', 'success'); + } catch (error) { + showStatus(`Failed to save configuration: ${error.message}`, 'error'); + } + }, SAVE_DELAY); +} + +// Add event listeners to all form inputs for auto-save +function setupAutoSave() { + // Get all form inputs + const form = document.getElementById('config-form'); + const inputs = form.querySelectorAll('input, select'); + + // Add change listener to each input + inputs.forEach(input => { + if (input.type === 'checkbox') { + input.addEventListener('change', autoSaveConfig); + } else if (input.type === 'number' || input.type === 'text') { + input.addEventListener('input', autoSaveConfig); + } else if (input.tagName === 'SELECT') { + input.addEventListener('change', autoSaveConfig); + } + }); +} + +// Update the setup-venv handler +document.getElementById('setup-venv').addEventListener('click', async () => { + const setupButton = document.getElementById('setup-venv'); + setupButton.disabled = true; + setupButton.classList.add('opacity-50', 'cursor-not-allowed'); + + try { + await handleAsyncAction('Install requirements', async () => { + return await window.electronAPI.installRequirements(); + }); + // Reload microphones after successful installation + await loadMicrophones(); + } finally { + setupButton.disabled = false; + setupButton.classList.remove('opacity-50', 'cursor-not-allowed'); + } +}); + +// Simplified microphone loading +async function loadMicrophones() { + const microphoneSelect = document.getElementById('microphone'); + + try { + appendToConsole('Loading available microphones...', 'info'); + const microphones = await window.electronAPI.getMicrophones(); + + microphoneSelect.innerHTML = ''; + + if (microphones.length === 0) { + microphoneSelect.innerHTML = ''; + appendToConsole('No microphones found', 'stderr'); + return; + } + + appendToConsole(`Found ${microphones.length} microphone(s)`, 'info'); + microphones.forEach(mic => { + const option = document.createElement('option'); + option.value = mic.index.toString(); + option.textContent = mic.name; + microphoneSelect.appendChild(option); + appendToConsole(` - ${mic.name} (Device ${mic.index})`, 'stdout'); + }); + + // Restore previously selected microphone if possible + try { + const config = await window.electronAPI.loadConfig(); + if (config.microphone) { + microphoneSelect.value = config.microphone; + } + } catch (error) { + // Ignore config load errors here + } + + } catch (error) { + appendToConsole(`Failed to load microphones: ${error.message}`, 'stderr'); + microphoneSelect.innerHTML = ''; + } +} + +// Update window load to include auto-save setup +window.addEventListener('load', async () => { + appendToConsole('TaSTT Configuration UI initialized', 'info'); + + // Load config first + try { + const config = await window.electronAPI.loadConfig(); + setFormValues(config); + appendToConsole('Configuration loaded', 'info'); + } catch (error) { + appendToConsole(`Failed to load configuration: ${error.message}`, 'stderr'); + } + + // Load microphones + await loadMicrophones(); + + // Set up auto-save after everything is loaded + setupAutoSave(); +}); + +// Console management +const consoleContent = document.getElementById('console-content'); + +function appendToConsole(message, type = 'stdout') { + const timestamp = new Date().toLocaleTimeString(); + const timestampSpan = document.createElement('span'); + timestampSpan.className = 'console-timestamp'; + timestampSpan.textContent = `[${timestamp}] `; + + const messageSpan = document.createElement('span'); + messageSpan.className = `console-${type}`; + messageSpan.textContent = message; + + const lineDiv = document.createElement('div'); + lineDiv.appendChild(timestampSpan); + lineDiv.appendChild(messageSpan); + + consoleContent.appendChild(lineDiv); + + // Auto-scroll to bottom + const pythonConsole = document.getElementById('python-console'); + pythonConsole.scrollTop = pythonConsole.scrollHeight; +} + +// Clear console button +document.getElementById('clear-console').addEventListener('click', () => { + consoleContent.innerHTML = ''; + appendToConsole('Console cleared', 'info'); +}); + +// Listen for Python output +window.electronAPI.onPythonOutput((data) => { + appendToConsole(data.message, data.type); +}); \ No newline at end of file diff --git a/ui/src/components.css b/ui/src/components.css new file mode 100644 index 0000000..be046ea --- /dev/null +++ b/ui/src/components.css @@ -0,0 +1,110 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +@layer components { + .config-section { + @apply bg-white rounded-lg shadow-md p-6; + } + + .section-title { + @apply text-xl font-semibold text-gray-700 mb-4; + } + + .form-label { + @apply block text-sm font-medium text-gray-700 mb-2; + } + + .form-input { + @apply w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm; + } + + .checkbox-label { + @apply flex items-center cursor-pointer hover:bg-gray-50 p-2 rounded; + } + + .checkbox-text { + @apply text-sm text-gray-700; + } + + .btn { + @apply px-4 py-2 font-medium text-sm rounded-md transition-colors focus:outline-none focus:ring-2 focus:ring-offset-2; + } + + .btn-blue { + @apply bg-blue-600 text-white hover:bg-blue-700 focus:ring-blue-500; + } + + .btn-green { + @apply bg-green-600 text-white hover:bg-green-700 focus:ring-green-500; + } + + .btn-gray { + @apply bg-gray-600 text-white hover:bg-gray-700 focus:ring-gray-500; + } +} + +/* Console styling */ +#python-console { + background-color: #1a1a1a; + font-family: 'Consolas', 'Monaco', 'Courier New', monospace; + line-height: 1.4; +} + +#console-content { + word-wrap: break-word; +} + +/* Console text colors */ +.console-stdout { + color: #a8cc8c; +} + +.console-stderr { + color: #e88388; +} + +.console-info { + color: #66c2cd; +} + +.console-timestamp { + color: #6c7986; + font-size: 0.875rem; +} + +/* Ensure full height layout */ +html, body { + height: 100%; + margin: 0; + padding: 0; +} + +.container-fluid { + max-width: 100%; + height: 100vh; +} + +/* Scrollbar styling for console */ +#python-console::-webkit-scrollbar { + width: 8px; +} + +#python-console::-webkit-scrollbar-track { + background: #2a2a2a; +} + +#python-console::-webkit-scrollbar-thumb { + background: #4a4a4a; + border-radius: 4px; +} + +#python-console::-webkit-scrollbar-thumb:hover { + background: #5a5a5a; +} + +/* Ensure buttons have proper disabled states */ +button:disabled { + cursor: not-allowed; + opacity: 0.5; +} diff --git a/ui/tailwind.config.js b/ui/tailwind.config.js index fa93053..804b7f0 100644 --- a/ui/tailwind.config.js +++ b/ui/tailwind.config.js @@ -1,8 +1,9 @@ /** @type {import('tailwindcss').Config} */ module.exports = { content: [ - "./index.html", - "./src/**/*.{html,js}", + "./*.html", + "./*.js", + "./src/**/*.{html,js}" ], theme: { extend: {}, -- cgit v1.2.3 From f97cef182de55b6dbae8d2bc0477acfca6cc1f66 Mon Sep 17 00:00:00 2001 From: yum Date: Thu, 29 May 2025 19:45:48 -0700 Subject: More UI work 1. main STT app works in new project structure 2. UI dumps mics on startup to populate mic list 3. add missing deps (hf-xet, wave) 4. normalize audio volume when transcribing. Probably still wrong tbqh. 5. add checkbox to save audio segments & improve logic so only segments with speech get saved. 6. add default config settings --- app/hi.py | 7 +- app/list_microphones.py | 24 ++++++ app/requirements.txt | 3 +- app/stt.py | 55 ++++++++++++-- app/vad.py | 3 +- config.yaml | 7 +- ui/index.html | 27 ++++--- ui/index.js | 196 ++++++++++++++++++++++++++++++++++++++---------- ui/preload.js | 5 +- ui/renderer.js | 87 ++++++++++++++++++++- ui/src/components.css | 4 + ui_design.md | 3 + 12 files changed, 355 insertions(+), 66 deletions(-) create mode 100644 app/list_microphones.py (limited to 'ui') diff --git a/app/hi.py b/app/hi.py index 0129958..0d80b9d 100644 --- a/app/hi.py +++ b/app/hi.py @@ -2,6 +2,7 @@ import app_config import argparse from math import floor, ceil import msvcrt +import os from pythonosc import udp_client import sentencepiece as spm from shared_thread_data import SharedThreadData @@ -15,8 +16,11 @@ TESTS_ENABLED = True # 0 = quiet, 1 = verbose, 2 = very verbose LOG_LEVEL = 0 +APP_ROOT = os.path.dirname(os.path.abspath(__file__)) +PROJECT_ROOT = os.path.dirname(APP_ROOT) + def get_tokenizer(): - model_path = "./custom_unigram_tokenizer_65k/unigram.model" + model_path = os.path.join(PROJECT_ROOT, "custom_unigram_tokenizer_65k", "unigram.model") print(f"Loading SentencePiece tokenizer from: {model_path}") sp = spm.SentencePieceProcessor() sp.load(model_path) @@ -346,7 +350,6 @@ if __name__ == "__main__": time.sleep(0.1) continue - try: char = char_bytes.decode('utf-8') if char == '\r' or char == '\n': diff --git a/app/list_microphones.py b/app/list_microphones.py new file mode 100644 index 0000000..a6b1f36 --- /dev/null +++ b/app/list_microphones.py @@ -0,0 +1,24 @@ +import pyaudio +import json +import sys + +try: + p = pyaudio.PyAudio() + info = p.get_host_api_info_by_index(0) + numdevices = info.get('deviceCount') + + microphones = [] + for i in range(0, numdevices): + device_info = p.get_device_info_by_host_api_device_index(0, i) + if device_info.get('maxInputChannels') > 0: + microphones.append({ + 'index': i, + 'name': device_info.get('name'), + 'defaultSampleRate': device_info.get('defaultSampleRate') + }) + + print(json.dumps(microphones)) + p.terminate() +except Exception as e: + print(json.dumps({'error': str(e)}), file=sys.stderr) + sys.exit(1) \ No newline at end of file diff --git a/app/requirements.txt b/app/requirements.txt index 4e79312..07f94cd 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -1,7 +1,8 @@ faster-whisper +hf-xet langcodes pyaudio pydub python-osc sentencepiece - +wave diff --git a/app/stt.py b/app/stt.py index 34ef2e9..c157f6d 100644 --- a/app/stt.py +++ b/app/stt.py @@ -1,3 +1,4 @@ +from datetime import datetime from faster_whisper import WhisperModel import langcodes import numpy as np @@ -9,6 +10,11 @@ import sys import time import typing import vad +import wave + + +APP_ROOT = os.path.dirname(os.path.abspath(__file__)) +PROJECT_ROOT = os.path.dirname(APP_ROOT) class AudioStream(): FORMAT = pyaudio.paInt16 @@ -242,6 +248,26 @@ class NormalizingAudioCollector(AudioCollectorFilter): return frames +class BoostingAudioCollector(AudioCollectorFilter): + def __init__(self, parent: AudioCollector, target_dBFS: float, cfg: typing.Dict): + AudioCollectorFilter.__init__(self, parent) + self.target_dBFS = target_dBFS + self.cfg = cfg + + def getAudio(self) -> bytes: + audio = self.parent.getAudio() + + audio = AudioSegment(audio, sample_width=AudioStream.FRAME_SZ, + frame_rate=AudioStream.FPS, channels=AudioStream.CHANNELS) + if self.cfg["enable_debug_mode"]: + print(f"Boosting audio from {audio.dBFS}dB to {self.target_dBFS}dB", file=sys.stderr) + audio = audio.apply_gain(self.target_dBFS - audio.dBFS) + + frames = np.array(audio.get_array_of_samples()) + frames = np.int16(frames).tobytes() + + return frames + class CompressingAudioCollector(AudioCollectorFilter): def __init__(self, parent: AudioCollector): AudioCollectorFilter.__init__(self, parent) @@ -441,6 +467,16 @@ class TranscriptCommit: self.duration_s = duration_s +def saveAudio(audio: bytes, path: str, cfg: typing.Dict): + with wave.open(path, 'wb') as wf: + if cfg["enable_debug_mode"]: + print(f"Saving audio to {path}", file=sys.stderr) + wf.setnchannels(AudioStream.CHANNELS) + wf.setsampwidth(AudioStream.FRAME_SZ) + wf.setframerate(AudioStream.FPS) + wf.writeframes(audio) + + class VadCommitter: def __init__(self, cfg: typing.Dict, @@ -463,7 +499,6 @@ class VadCommitter: start_ts = self.collector.begin() if has_audio and stable_cutoff: - #print(f"stable cutoff get: {stable_cutoff}", file=sys.stderr) latency_s = self.collector.now() - self.collector.begin() duration_s = stable_cutoff / AudioStream.FPS start_ts = self.collector.begin() @@ -475,12 +510,16 @@ class VadCommitter: if self.cfg["enable_debug_mode"]: for s in segments: print(f"commit segment: {s}", file=sys.stderr) - print(f"delta get: {delta}", file=sys.stderr) + if len(delta) > 0: + print(f"delta get: {delta}", file=sys.stderr) - if False: + if self.cfg["save_audio"] and len(delta) > 0: ts = datetime.fromtimestamp(self.collector.now() - latency_s) filename = str(ts.strftime('%Y_%m_%d__%H-%M-%S')) + ".wav" - saveAudio(commit_audio, filename) + audio_dir = os.path.join(PROJECT_ROOT, "audio") + if not os.path.exists(audio_dir): + os.makedirs(audio_dir) + saveAudio(commit_audio, os.path.join(audio_dir, filename), self.cfg) preview = "" if self.cfg["enable_previews"] and has_audio: @@ -488,7 +527,6 @@ class VadCommitter: preview = "".join(s.transcript for s in segments) if not has_audio: - #print("VAD detects no audio, skip transcription", file=sys.stderr) self.collector.keepLast(1.0) return TranscriptCommit( @@ -504,8 +542,9 @@ def transcriptionThread(shared_data: SharedThreadData): stream = MicStream(shared_data.cfg["microphone"]) collector = AudioCollector(stream) - collector = NormalizingAudioCollector(collector) collector = CompressingAudioCollector(collector) + collector = NormalizingAudioCollector(collector) + collector = BoostingAudioCollector(collector, 0.0, shared_data.cfg) whisper = Whisper(collector, shared_data.cfg) segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"], max_speech_s=shared_data.cfg["max_speech_duration_s"]) @@ -552,13 +591,13 @@ def transcriptionThread(shared_data: SharedThreadData): preview = commit.preview try: - print(f"Transcript: {transcript}") + print(f"Transcript: {transcript}", flush=True) except UnicodeEncodeError: print("Failed to encode transcript - discarding delta", file=sys.stderr) continue try: - print(f"Preview: {preview}") + print(f"Preview: {preview}", flush=True) except UnicodeEncodeError: print("Failed to encode preview - discarding", file=sys.stderr) diff --git a/app/vad.py b/app/vad.py index 10a72d3..1dea765 100644 --- a/app/vad.py +++ b/app/vad.py @@ -259,7 +259,8 @@ def get_vad_model(): """Returns the VAD model instance.""" abspath = os.path.abspath(__file__) my_dir = os.path.dirname(abspath) - path = os.path.join(my_dir, "Models/silero_vad.onnx") + parent_dir = os.path.dirname(my_dir) + path = os.path.join(parent_dir, "Models", "silero_vad.onnx") return SileroVADModel(path) diff --git a/config.yaml b/config.yaml index 164b4e6..34d88f1 100644 --- a/config.yaml +++ b/config.yaml @@ -1,18 +1,17 @@ -compute_type: int8 +compute_type: float16 enable_debug_mode: 0 enable_previews: 1 +save_audio: 0 language: english gpu_idx: 0 max_speech_duration_s: 10 min_silence_duration_ms: 250 -microphone: motu +microphone: 0 model: turbo reset_after_silence_s: 15 transcription_loop_delay_ms: 100 use_cpu: 0 - block_width: 2 num_blocks: 40 rows: 10 cols: 24 - diff --git a/ui/index.html b/ui/index.html index 14cc354..b06e56b 100644 --- a/ui/index.html +++ b/ui/index.html @@ -8,11 +8,9 @@
-

TaSTT

-
- -
+ +
@@ -127,6 +125,10 @@ Enable Previews +
@@ -156,9 +158,17 @@
- +
+ + + +
@@ -167,9 +177,8 @@
-
+
-

Python Output

diff --git a/ui/index.js b/ui/index.js index 0a7fdf9..a056156 100644 --- a/ui/index.js +++ b/ui/index.js @@ -4,14 +4,16 @@ const fs = require('node:fs').promises; const yaml = require('js-yaml'); const { spawn } = require('child_process'); +const APP_ROOT = path.join(__dirname, '..'); +const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml'); + let mainWindow; +let runningProcess = null; // Track the running Python process // Helper function to get the correct Python executable from venv function getVenvPython() { - const venvPath = path.join(__dirname, '..', 'venv'); - const isWindows = process.platform === 'win32'; - const pythonExecutable = isWindows ? 'python.exe' : 'python'; - const pythonPath = path.join(venvPath, isWindows ? 'Scripts' : 'bin', pythonExecutable); + const venvPath = path.join(APP_ROOT, 'venv'); + const pythonPath = path.join(venvPath, 'Scripts', 'python.exe'); return pythonPath; } @@ -29,7 +31,17 @@ function executePythonCommand(args, options = {}) { const commandStr = `${path.basename(pythonPath)} ${args.join(' ')}`; sendPythonOutput(`> ${commandStr}`, 'info'); - const pythonProcess = spawn(pythonPath, args, options); + // Add dll directory to PATH for Windows DLL loading + const dllPath = path.join(APP_ROOT, 'dll'); + const env = { ...process.env }; + env.PATH = `${dllPath};${env.PATH}`; + + const spawnOptions = { + ...options, + env + }; + + const pythonProcess = spawn(pythonPath, args, spawnOptions); let stdout = ''; let stderr = ''; @@ -76,15 +88,47 @@ function createWindow () { mainWindow.loadFile('index.html'); } -// Path to config.yaml (one level up from ui directory) -const configPath = path.join(__dirname, '..', 'config.yaml'); +// Default configuration based on user's current config.yaml +const DEFAULT_CONFIG = { + compute_type: 'float16', + enable_debug_mode: 0, + enable_previews: 1, + save_audio: 0, + language: 'english', + gpu_idx: 0, + max_speech_duration_s: 10, + min_silence_duration_ms: 250, + microphone: 0, + model: 'turbo', + reset_after_silence_s: 15, + transcription_loop_delay_ms: 100, + use_cpu: 0, + block_width: 2, + num_blocks: 40, + rows: 10, + cols: 24 +}; // IPC handlers ipcMain.handle('load-config', async () => { try { - const fileContent = await fs.readFile(configPath, 'utf8'); + const fileContent = await fs.readFile(CONFIG_PATH, 'utf8'); return yaml.load(fileContent); } catch (error) { + if (error.code === 'ENOENT') { + // Config file doesn't exist, create it with defaults + console.log('Config file not found, creating with defaults...'); + try { + const yamlContent = yaml.dump(DEFAULT_CONFIG, { lineWidth: -1 }); + await fs.writeFile(CONFIG_PATH, yamlContent, 'utf8'); + console.log('Created config.yaml with default values'); + return DEFAULT_CONFIG; + } catch (writeError) { + console.error('Error creating default config:', writeError); + // Return defaults even if we can't write the file + return DEFAULT_CONFIG; + } + } console.error('Error loading config:', error); throw error; } @@ -93,7 +137,7 @@ ipcMain.handle('load-config', async () => { ipcMain.handle('save-config', async (event, config) => { try { const yamlContent = yaml.dump(config, { lineWidth: -1 }); - await fs.writeFile(configPath, yamlContent, 'utf8'); + await fs.writeFile(CONFIG_PATH, yamlContent, 'utf8'); return { success: true }; } catch (error) { console.error('Error saving config:', error); @@ -107,7 +151,7 @@ ipcMain.handle('restart-app', () => { }); ipcMain.handle('install-requirements', async (event) => { - const requirementsPath = path.join(__dirname, '..', 'app', 'requirements.txt'); + const requirementsPath = path.join(APP_ROOT, 'app', 'requirements.txt'); try { // Check if requirements.txt exists @@ -126,35 +170,10 @@ ipcMain.handle('install-requirements', async (event) => { }); ipcMain.handle('get-microphones', async () => { - const pythonScript = ` -import pyaudio -import json -import sys - -try: - p = pyaudio.PyAudio() - info = p.get_host_api_info_by_index(0) - numdevices = info.get('deviceCount') - - microphones = [] - for i in range(0, numdevices): - device_info = p.get_device_info_by_host_api_device_index(0, i) - if device_info.get('maxInputChannels') > 0: - microphones.append({ - 'index': i, - 'name': device_info.get('name'), - 'defaultSampleRate': device_info.get('defaultSampleRate') - }) - - print(json.dumps(microphones)) - p.terminate() -except Exception as e: - print(json.dumps({'error': str(e)}), file=sys.stderr) - sys.exit(1) -`; - + const scriptPath = path.join(APP_ROOT, 'app', 'list_microphones.py'); + try { - const result = await executePythonCommand(['-c', pythonScript]); + const result = await executePythonCommand([scriptPath]); const microphones = JSON.parse(result.stdout.trim()); console.log('Successfully retrieved microphones:', microphones); return microphones; @@ -164,6 +183,105 @@ except Exception as e: } }); +// Add handlers for starting and stopping the process +ipcMain.handle('start-process', async () => { + if (runningProcess) { + throw new Error('Process is already running'); + } + + const scriptPath = path.join(APP_ROOT, 'app', 'hi.py'); + const configPath = CONFIG_PATH; + + try { + const pythonPath = getVenvPython(); + const args = [scriptPath, '--config', configPath]; + + sendPythonOutput(`Starting process: ${path.basename(pythonPath)} ${args.join(' ')}`, 'info'); + + // Add dll directory to PATH for Windows DLL loading + const dllPath = path.join(APP_ROOT, 'dll'); + const env = { ...process.env }; + env.PATH = `${dllPath};${env.PATH}`; + + runningProcess = spawn(pythonPath, args, { env }); + + runningProcess.stdout.on('data', (data) => { + const text = data.toString(); + sendPythonOutput(text.trimEnd(), 'stdout'); + }); + + runningProcess.stderr.on('data', (data) => { + const text = data.toString(); + sendPythonOutput(text.trimEnd(), 'stderr'); + }); + + runningProcess.on('error', (error) => { + sendPythonOutput(`Process error: ${error.message}`, 'stderr'); + runningProcess = null; + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('process-stopped'); + } + }); + + runningProcess.on('close', (code) => { + sendPythonOutput(`Process exited with code ${code}`, 'info'); + runningProcess = null; + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('process-stopped'); + } + }); + + return { success: true }; + } catch (error) { + runningProcess = null; + throw error; + } +}); + +ipcMain.handle('stop-process', async () => { + if (!runningProcess) { + throw new Error('No process is running'); + } + + return new Promise((resolve, reject) => { + let forcefullyKilled = false; + + // Set up a timeout to force kill after 10 seconds + const killTimeout = setTimeout(() => { + if (runningProcess) { + sendPythonOutput('Process did not stop gracefully, forcing termination...', 'stderr'); + forcefullyKilled = true; + runningProcess.kill(); + } + }, 10000); + + // Listen for the process to exit + runningProcess.once('exit', (code, signal) => { + clearTimeout(killTimeout); + runningProcess = null; + + if (forcefullyKilled) { + sendPythonOutput('Process forcefully terminated', 'info'); + } else { + sendPythonOutput('Process stopped gracefully', 'info'); + } + + resolve({ success: true, forcefullyKilled }); + }); + + // Send termination signal + sendPythonOutput('Stopping process gracefully...', 'info'); + runningProcess.kill(); + }); +}); + +// Clean up on app quit +app.on('before-quit', () => { + if (runningProcess) { + runningProcess.kill(); + } +}); + app.whenReady().then(() => { createWindow(); @@ -173,6 +291,6 @@ app.whenReady().then(() => { }); app.on('window-all-closed', function () { - if (process.platform !== 'darwin') app.quit(); + app.quit(); }); diff --git a/ui/preload.js b/ui/preload.js index 108bffe..e6c0623 100644 --- a/ui/preload.js +++ b/ui/preload.js @@ -6,7 +6,10 @@ contextBridge.exposeInMainWorld('electronAPI', { restartApp: () => ipcRenderer.invoke('restart-app'), getMicrophones: () => ipcRenderer.invoke('get-microphones'), installRequirements: () => ipcRenderer.invoke('install-requirements'), - onPythonOutput: (callback) => ipcRenderer.on('python-output', (event, data) => callback(data)) + startProcess: () => ipcRenderer.invoke('start-process'), + stopProcess: () => ipcRenderer.invoke('stop-process'), + onPythonOutput: (callback) => ipcRenderer.on('python-output', (event, data) => callback(data)), + onProcessStopped: (callback) => ipcRenderer.on('process-stopped', (event) => callback()) }); console.log('Preload script loaded.'); diff --git a/ui/renderer.js b/ui/renderer.js index 83c652c..b3f05a6 100644 --- a/ui/renderer.js +++ b/ui/renderer.js @@ -22,15 +22,20 @@ function showStatus(message, type = 'info') { // Get form values function getFormValues() { + const microphoneValue = document.getElementById('microphone').value; + // Convert to number if it's a numeric string (device index) + const microphoneForConfig = /^\d+$/.test(microphoneValue) ? parseInt(microphoneValue) : microphoneValue; + return { compute_type: document.getElementById('compute_type').value, enable_debug_mode: document.getElementById('enable_debug_mode').checked ? 1 : 0, enable_previews: document.getElementById('enable_previews').checked ? 1 : 0, + save_audio: document.getElementById('save_audio').checked ? 1 : 0, language: document.getElementById('language').value, gpu_idx: parseInt(document.getElementById('gpu_idx').value), max_speech_duration_s: parseInt(document.getElementById('max_speech_duration_s').value), min_silence_duration_ms: parseInt(document.getElementById('min_silence_duration_ms').value), - microphone: document.getElementById('microphone').value, + microphone: microphoneForConfig, model: document.getElementById('model').value, reset_after_silence_s: parseInt(document.getElementById('reset_after_silence_s').value), transcription_loop_delay_ms: parseInt(document.getElementById('transcription_loop_delay_ms').value), @@ -52,6 +57,7 @@ function setFormValues(config) { document.getElementById('compute_type').value = config.compute_type || 'int8'; document.getElementById('enable_debug_mode').checked = config.enable_debug_mode === 1; document.getElementById('enable_previews').checked = config.enable_previews === 1; + document.getElementById('save_audio').checked = config.save_audio === 1; document.getElementById('language').value = config.language || 'english'; document.getElementById('gpu_idx').value = config.gpu_idx || 0; document.getElementById('max_speech_duration_s').value = config.max_speech_duration_s || 10; @@ -97,6 +103,30 @@ async function handleAsyncAction(actionName, actionFn) { } } +// Process control buttons +const startButton = document.getElementById('start-process'); +const stopButton = document.getElementById('stop-process'); + +// Helper functions for button state management +function setButtonState(button, disabled) { + button.disabled = disabled; + if (disabled) { + button.classList.add('opacity-50', 'cursor-not-allowed'); + } else { + button.classList.remove('opacity-50', 'cursor-not-allowed'); + } +} + +function setProcessRunningState() { + setButtonState(startButton, true); + setButtonState(stopButton, false); +} + +function setProcessStoppedState() { + setButtonState(startButton, false); + setButtonState(stopButton, true); +} + // Auto-save functionality with debouncing let saveTimeout; const SAVE_DELAY = 500; // milliseconds @@ -110,6 +140,31 @@ async function autoSaveConfig() { const config = getFormValues(); await window.electronAPI.saveConfig(config); showStatus('Configuration saved', 'success'); + + // Check if process is running (stop button is enabled means process is running) + const stopButton = document.getElementById('stop-process'); + + if (!stopButton.disabled) { + // Process is running, restart it with new config + appendToConsole('Restarting process with new configuration...', 'info'); + + try { + await window.electronAPI.stopProcess(); + + await new Promise(resolve => setTimeout(resolve, 1000)); + + await window.electronAPI.startProcess(); + + // Update button states to reflect running process + setProcessRunningState(); + + appendToConsole('Process restarted with new configuration', 'info'); + } catch (error) { + appendToConsole(`Failed to restart process: ${error.message}`, 'stderr'); + // Process is stopped, update button states + setProcessStoppedState(); + } + } } catch (error) { showStatus(`Failed to save configuration: ${error.message}`, 'error'); } @@ -246,4 +301,34 @@ document.getElementById('clear-console').addEventListener('click', () => { // Listen for Python output window.electronAPI.onPythonOutput((data) => { appendToConsole(data.message, data.type); +}); + +document.getElementById('start-process').addEventListener('click', async () => { + setButtonState(startButton, true); + + try { + await window.electronAPI.startProcess(); + setProcessRunningState(); + appendToConsole('Process started successfully', 'info'); + } catch (error) { + appendToConsole(`Failed to start process: ${error.message}`, 'stderr'); + setButtonState(startButton, false); + } +}); + +document.getElementById('stop-process').addEventListener('click', async () => { + setButtonState(stopButton, true); + + try { + const result = await window.electronAPI.stopProcess(); + appendToConsole('Process stop initiated', 'info'); + } catch (error) { + appendToConsole(`Failed to stop process: ${error.message}`, 'stderr'); + setButtonState(stopButton, false); + } +}); + +// Listen for process stopped event +window.electronAPI.onProcessStopped(() => { + setProcessStoppedState(); }); \ No newline at end of file diff --git a/ui/src/components.css b/ui/src/components.css index be046ea..d8d909d 100644 --- a/ui/src/components.css +++ b/ui/src/components.css @@ -42,6 +42,10 @@ .btn-gray { @apply bg-gray-600 text-white hover:bg-gray-700 focus:ring-gray-500; } + + .btn-red { + @apply bg-red-600 text-white hover:bg-red-700 focus:ring-red-500; + } } /* Console styling */ diff --git a/ui_design.md b/ui_design.md index e38c632..06eee65 100644 --- a/ui_design.md +++ b/ui_design.md @@ -26,4 +26,7 @@ npm install --save-dev electron # Get tailwind and deps npm install --save-dev tailwindcss@3 postcss autoprefixer concurrently cross-env npx tailwindcss init -p +# Install vue.js +npm install --save-dev vue@3 @vitejs/plugin-vue vite yaml +npm install --save-dev js-yaml ``` -- cgit v1.2.3 From e1b3f638a1ea448de9691f69eb62ebf4c3944c9f Mon Sep 17 00:00:00 2001 From: yum Date: Fri, 30 May 2025 02:50:55 -0700 Subject: More polish - Filters actually get applied now, huge accuracy boost - Use silero-vad python library instead of rolling our own - Expose prompt parameter - Auto setup venv on launch - Clean up python output - Auto acquire all dependencies on launch - Add icon --- .cursorignore | 2 + .gitignore | 2 +- Images/favicon.ico | Bin 0 -> 92015 bytes app/hi.py | 12 +- app/requirements.txt | 2 +- app/stt.py | 128 +++++++++--- app/vad.py | 314 ---------------------------- config.yaml | 1 + ui/index.html | 336 +++++++++++++++++------------- ui/index.js | 382 +++++++++++++++++++++++++++++----- ui/package.json | 76 ++++++- ui/preload.js | 7 +- ui/renderer.js | 564 +++++++++++++++++++++++++++++++------------------- ui/src/components.css | 8 + ui_design.md | 9 +- 15 files changed, 1085 insertions(+), 758 deletions(-) create mode 100644 .cursorignore create mode 100644 Images/favicon.ico delete mode 100644 app/vad.py (limited to 'ui') diff --git a/.cursorignore b/.cursorignore new file mode 100644 index 0000000..a8f4624 --- /dev/null +++ b/.cursorignore @@ -0,0 +1,2 @@ +**/node_modules +**/site-packages \ No newline at end of file diff --git a/.gitignore b/.gitignore index a102cf0..d3886ca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ .*.sw[po] *.meta - +.venv_is_set_up diff --git a/Images/favicon.ico b/Images/favicon.ico new file mode 100644 index 0000000..25ea9ac Binary files /dev/null and b/Images/favicon.ico differ diff --git a/app/hi.py b/app/hi.py index 0d80b9d..e6877ff 100644 --- a/app/hi.py +++ b/app/hi.py @@ -330,10 +330,11 @@ if __name__ == "__main__": cli_args = parse_args() cfg = app_config.getConfig(cli_args.config) shared_data = SharedThreadData(cfg) - osc_thread = threading.Thread( - target=osc_thread, - args=(shared_data,)) - osc_thread.start() + if False: + osc_thread = threading.Thread( + target=osc_thread, + args=(shared_data,)) + osc_thread.start() transcribe_thread = threading.Thread( target=stt.transcriptionThread, @@ -382,6 +383,7 @@ if __name__ == "__main__": local_word = shared_data.word print(local_word + "_") shared_data.exit_event.set() - osc_thread.join() + if False: + osc_thread.join() transcribe_thread.join() diff --git a/app/requirements.txt b/app/requirements.txt index 07f94cd..f8b7069 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -5,4 +5,4 @@ pyaudio pydub python-osc sentencepiece -wave +silero-vad diff --git a/app/stt.py b/app/stt.py index c157f6d..7d76333 100644 --- a/app/stt.py +++ b/app/stt.py @@ -6,10 +6,10 @@ import os import pyaudio from pydub import AudioSegment from shared_thread_data import SharedThreadData +from silero_vad import load_silero_vad, get_speech_timestamps import sys import time import typing -import vad import wave @@ -33,7 +33,7 @@ class AudioStream(): class MicStream(AudioStream): CHUNK_SZ = 1024 - def __init__(self, which_mic: str): + def __init__(self, cfg: typing.Dict): self.p = pyaudio.PyAudio() self.stream = None self.sample_rate = None @@ -45,8 +45,11 @@ class MicStream(AudioStream): # If set, incoming frames are simply discarded. self.paused = False - print(f"Finding mic {which_mic}", file=sys.stderr) - self.dumpMicDevices() + which_mic = cfg["microphone"] + + if cfg["enable_debug_mode"]: + print(f"Finding mic {which_mic}", file=sys.stderr) + self.dumpMicDevices() got_match = False device_index = -1 @@ -59,8 +62,9 @@ class MicStream(AudioStream): elif which_mic == "beyond": target_str = "Microphone (Beyond)" else: - print(f"Mic {which_mic} requested, treating it as a numerical " + - "device ID", file=sys.stderr) + if cfg["enable_debug_mode"]: + print(f"Mic {which_mic} requested, treating it as a numerical " + + "device ID", file=sys.stderr) device_index = int(which_mic) got_match = True if not got_match: @@ -79,9 +83,11 @@ class MicStream(AudioStream): raise KeyError(f"Mic {which_mic} not found") info = self.p.get_device_info_by_host_api_device_index(0, device_index) - print(f"Found mic {which_mic}: {info['name']}", file=sys.stderr) + if cfg["enable_debug_mode"]: + print(f"Found mic {which_mic}: {info['name']}", file=sys.stderr) self.sample_rate = int(info['defaultSampleRate']) - print(f"Mic sample rate: {self.sample_rate}", file=sys.stderr) + if cfg["enable_debug_mode"]: + print(f"Mic sample rate: {self.sample_rate}", file=sys.stderr) self.stream = self.p.open( rate=self.sample_rate, @@ -289,19 +295,40 @@ class AudioSegmenter: def __init__(self, min_silence_ms=250, max_speech_s=5): - self.vad_options = vad.VadOptions( - min_silence_duration_ms=min_silence_ms, - max_speech_duration_s=max_speech_s) - pass + self.min_silence_ms = min_silence_ms + self.max_speech_s = max_speech_s + + # Load Silero VAD model + self.model = load_silero_vad() + + self.vad_threshold = 0.3 + self.min_silence_duration_ms = min_silence_ms + self.max_speech_duration_s = max_speech_s + + self.speech_pad_ms = 300 def segmentAudio(self, audio: bytes): - audio = np.frombuffer(audio, + # Convert audio bytes to numpy array expected by silero-vad + audio_array = np.frombuffer(audio, dtype=np.int16).flatten().astype(np.float32) / 32768.0 - return vad.get_speech_timestamps(audio, vad_options=self.vad_options) + + # Get speech timestamps using silero-vad + # Note: silero-vad expects sample rate of 16000 Hz which matches AudioStream.FPS + speech_timestamps = get_speech_timestamps( + audio_array, + self.model, + sampling_rate=AudioStream.FPS, + threshold=self.vad_threshold, + min_silence_duration_ms=self.min_silence_duration_ms, + max_speech_duration_s=self.max_speech_duration_s, + return_seconds=False # We want frame indices, not seconds + ) + + return speech_timestamps # Returns the stable cutoff (if any) and whether there are any segments. def getStableCutoff(self, audio: bytes) -> typing.Tuple[int, bool]: - min_delta_frames = int((self.vad_options.min_silence_duration_ms * + min_delta_frames = int((self.min_silence_duration_ms * AudioStream.FPS) / 1000.0) cutoff = None @@ -379,8 +406,9 @@ class Whisper: model_str = cfg["model"] model_root = os.path.join(parent_dir, "Models", os.path.normpath(model_str)) - print(f"Model {cfg['model']} will be saved to {model_root}", - file=sys.stderr) + if cfg["enable_debug_mode"]: + print(f"Model {cfg['model']} will be saved to {model_root}", + file=sys.stderr) model_device = "cuda" if cfg["use_cpu"]: @@ -395,21 +423,42 @@ class Whisper: download_root = model_root, local_files_only = already_downloaded) + self.context_window_chars = 200 # Keep last 200 chars of context + self.recent_context = "" # Store recent committed text + + def update_context(self, committed_text: str): + """Update the context with recently committed text.""" + self.recent_context = (self.recent_context + " " + committed_text).strip() + # Keep only the last N characters to avoid prompt getting too long + if len(self.recent_context) > self.context_window_chars: + self.recent_context = self.recent_context[-self.context_window_chars:] + def transcribe(self, frames: bytes = None) -> typing.List[Segment]: if frames is None: frames = self.collector.getAudio() - # Convert from signed 16-bit int [-32768, 32767] to signed 32-bit float on - # [-1, 1]. + + # Convert audio to float32 audio = np.frombuffer(frames, dtype=np.int16).flatten().astype(np.float32) / 32768.0 + # Build context-aware prompt + prompt = self._build_prompt() + t0 = time.time() segments, info = self.model.transcribe( audio, language = langcodes.find(self.cfg["language"]).language, vad_filter = True, temperature=0.0, - without_timestamps = False) + without_timestamps = False, + initial_prompt=prompt, + beam_size=5, + best_of=5, + condition_on_previous_text=True, + compression_ratio_threshold=2.4, + log_prob_threshold=-1.0, + no_speech_threshold=0.6 + ) res = [] for s in segments: # Manual touchup. I see a decent number of hallucinations sneaking @@ -445,6 +494,17 @@ class Whisper: print(f"Transcription latency (s): {t1 - t0}") return res + def _build_prompt(self) -> str: + """Build a context-aware prompt for Whisper.""" + user_prompt = self.cfg["user_prompt"] + context_prompt = "" + if self.recent_context and len(self.recent_context) > 0: + context_prompt = f"Here is the context so far: {self.recent_context}" + + prompts = [user_prompt, context_prompt] + prompts = [p for p in prompts if p and len(p) > 0] + return " ".join(prompts) + class TranscriptCommit: def __init__(self, delta: str, @@ -502,10 +562,21 @@ class VadCommitter: latency_s = self.collector.now() - self.collector.begin() duration_s = stable_cutoff / AudioStream.FPS start_ts = self.collector.begin() - commit_audio = self.collector.dropAudioPrefixByFrames(stable_cutoff) + + # Get the filtered audio first, then extract the portion we need + filtered_audio = self.collector.getAudio() + commit_audio = filtered_audio[:stable_cutoff * AudioStream.FRAME_SZ] + + # Now drop the prefix from the collector + self.collector.dropAudioPrefixByFrames(stable_cutoff) segments = self.whisper.transcribe(commit_audio) delta = ''.join(s.transcript for s in segments) + + # Update whisper's context with the committed text + if delta.strip(): + self.whisper.update_context(delta.strip()) + audio = self.collector.getAudio() if self.cfg["enable_debug_mode"]: for s in segments: @@ -540,11 +611,11 @@ class VadCommitter: def transcriptionThread(shared_data: SharedThreadData): last_stable_commit = None - stream = MicStream(shared_data.cfg["microphone"]) + stream = MicStream(shared_data.cfg) collector = AudioCollector(stream) collector = CompressingAudioCollector(collector) + collector = BoostingAudioCollector(collector, -12.0, shared_data.cfg) collector = NormalizingAudioCollector(collector) - collector = BoostingAudioCollector(collector, 0.0, shared_data.cfg) whisper = Whisper(collector, shared_data.cfg) segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"], max_speech_s=shared_data.cfg["max_speech_duration_s"]) @@ -553,6 +624,8 @@ def transcriptionThread(shared_data: SharedThreadData): transcript = "" preview = "" + print(f"Ready to go!", flush=True) + while not shared_data.exit_event.is_set(): time.sleep(shared_data.cfg["transcription_loop_delay_ms"] / 1000.0); @@ -561,8 +634,7 @@ def transcriptionThread(shared_data: SharedThreadData): commit = committer.getDelta() if len(commit.delta) > 0 or len(commit.preview) > 0: - # Avoid re-sending text after long pauses. User controls the length - # of the pause in the UI. + # Avoid re-sending text after long pauses if shared_data.cfg["reset_after_silence_s"] > 0: silence_duration = 0 if last_stable_commit: @@ -571,10 +643,12 @@ def transcriptionThread(shared_data: SharedThreadData): last_stable_commit.duration_s silence_duration = commit.start_ts - last_commit_end_ts if silence_duration > shared_data.cfg["reset_after_silence_s"]: - print(f"Resetting transcript after {silence_duration}-second " - "silence", file=sys.stderr) + if shared_data.cfg["enable_debug_mode"]: + print(f"Resetting transcript after {silence_duration}-second " + "silence", file=sys.stderr) transcript = "" preview = "" + whisper.recent_context = "" # Reset context too if commit.delta: last_stable_commit = commit diff --git a/app/vad.py b/app/vad.py deleted file mode 100644 index 1dea765..0000000 --- a/app/vad.py +++ /dev/null @@ -1,314 +0,0 @@ -# MIT License -# -# Copyright (c) 2023 Guillaume Klein -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import bisect -import functools -import os -import warnings - -from typing import List, NamedTuple, Optional - -import numpy as np - - -# The code below is adapted from https://github.com/snakers4/silero-vad. -class VadOptions(NamedTuple): - """VAD options. - - Attributes: - threshold: Speech threshold. Silero VAD outputs speech probabilities for each audio chunk, - probabilities ABOVE this value are considered as SPEECH. It is better to tune this - parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets. - min_speech_duration_ms: Final speech chunks shorter min_speech_duration_ms are thrown out. - max_speech_duration_s: Maximum duration of speech chunks in seconds. Chunks longer - than max_speech_duration_s will be split at the timestamp of the last silence that - lasts more than 100ms (if any), to prevent aggressive cutting. Otherwise, they will be - split aggressively just before max_speech_duration_s. - min_silence_duration_ms: In the end of each speech chunk wait for min_silence_duration_ms - before separating it - window_size_samples: Audio chunks of window_size_samples size are fed to the silero VAD model. - WARNING! Silero VAD models were trained using 512, 1024, 1536 samples for 16000 sample rate. - Values other than these may affect model performance!! - speech_pad_ms: Final speech chunks are padded by speech_pad_ms each side - """ - - threshold: float = 0.5 - min_speech_duration_ms: int = 250 - max_speech_duration_s: float = float("inf") - min_silence_duration_ms: int = 2000 - window_size_samples: int = 1024 - speech_pad_ms: int = 400 - - -def get_speech_timestamps( - audio: np.ndarray, - vad_options: Optional[VadOptions] = None, - **kwargs, -) -> List[dict]: - """This method is used for splitting long audios into speech chunks using silero VAD. - - Args: - audio: One dimensional float array. - vad_options: Options for VAD processing. - kwargs: VAD options passed as keyword arguments for backward compatibility. - - Returns: - List of dicts containing begin and end samples of each speech chunk. - """ - if vad_options is None: - vad_options = VadOptions(**kwargs) - - threshold = vad_options.threshold - min_speech_duration_ms = vad_options.min_speech_duration_ms - max_speech_duration_s = vad_options.max_speech_duration_s - min_silence_duration_ms = vad_options.min_silence_duration_ms - window_size_samples = vad_options.window_size_samples - speech_pad_ms = vad_options.speech_pad_ms - - if window_size_samples not in [512, 1024, 1536]: - warnings.warn( - "Unusual window_size_samples! Supported window_size_samples:\n" - " - [512, 1024, 1536] for 16000 sampling_rate" - ) - - sampling_rate = 16000 - min_speech_samples = sampling_rate * min_speech_duration_ms / 1000 - speech_pad_samples = sampling_rate * speech_pad_ms / 1000 - max_speech_samples = ( - sampling_rate * max_speech_duration_s - - window_size_samples - - 2 * speech_pad_samples - ) - min_silence_samples = sampling_rate * min_silence_duration_ms / 1000 - min_silence_samples_at_max_speech = sampling_rate * 98 / 1000 - - audio_length_samples = len(audio) - - model = get_vad_model() - state = model.get_initial_state(batch_size=1) - - speech_probs = [] - for current_start_sample in range(0, audio_length_samples, window_size_samples): - chunk = audio[current_start_sample : current_start_sample + window_size_samples] - if len(chunk) < window_size_samples: - chunk = np.pad(chunk, (0, int(window_size_samples - len(chunk)))) - speech_prob, state = model(chunk, state, sampling_rate) - speech_probs.append(speech_prob) - - triggered = False - speeches = [] - current_speech = {} - neg_threshold = threshold - 0.15 - - # to save potential segment end (and tolerate some silence) - temp_end = 0 - # to save potential segment limits in case of maximum segment size reached - prev_end = next_start = 0 - - for i, speech_prob in enumerate(speech_probs): - if (speech_prob >= threshold) and temp_end: - temp_end = 0 - if next_start < prev_end: - next_start = window_size_samples * i - - if (speech_prob >= threshold) and not triggered: - triggered = True - current_speech["start"] = window_size_samples * i - continue - - if ( - triggered - and (window_size_samples * i) - current_speech["start"] > max_speech_samples - ): - if prev_end: - current_speech["end"] = prev_end - speeches.append(current_speech) - current_speech = {} - # previously reached silence (< neg_thres) and is still not speech (< thres) - if next_start < prev_end: - triggered = False - else: - current_speech["start"] = next_start - prev_end = next_start = temp_end = 0 - else: - current_speech["end"] = window_size_samples * i - speeches.append(current_speech) - current_speech = {} - prev_end = next_start = temp_end = 0 - triggered = False - continue - - if (speech_prob < neg_threshold) and triggered: - if not temp_end: - temp_end = window_size_samples * i - # condition to avoid cutting in very short silence - if (window_size_samples * i) - temp_end > min_silence_samples_at_max_speech: - prev_end = temp_end - if (window_size_samples * i) - temp_end < min_silence_samples: - continue - else: - current_speech["end"] = temp_end - if ( - current_speech["end"] - current_speech["start"] - ) > min_speech_samples: - speeches.append(current_speech) - current_speech = {} - prev_end = next_start = temp_end = 0 - triggered = False - continue - - if ( - current_speech - and (audio_length_samples - current_speech["start"]) > min_speech_samples - ): - current_speech["end"] = audio_length_samples - speeches.append(current_speech) - - for i, speech in enumerate(speeches): - if i == 0: - speech["start"] = int(max(0, speech["start"] - speech_pad_samples)) - if i != len(speeches) - 1: - silence_duration = speeches[i + 1]["start"] - speech["end"] - if silence_duration < 2 * speech_pad_samples: - speech["end"] += int(silence_duration // 2) - speeches[i + 1]["start"] = int( - max(0, speeches[i + 1]["start"] - silence_duration // 2) - ) - else: - speech["end"] = int( - min(audio_length_samples, speech["end"] + speech_pad_samples) - ) - speeches[i + 1]["start"] = int( - max(0, speeches[i + 1]["start"] - speech_pad_samples) - ) - else: - speech["end"] = int( - min(audio_length_samples, speech["end"] + speech_pad_samples) - ) - - return speeches - - -def collect_chunks(audio: np.ndarray, chunks: List[dict]) -> np.ndarray: - """Collects and concatenates audio chunks.""" - if not chunks: - return np.array([], dtype=np.float32) - - return np.concatenate([audio[chunk["start"] : chunk["end"]] for chunk in chunks]) - - -class SpeechTimestampsMap: - """Helper class to restore original speech timestamps.""" - - def __init__(self, chunks: List[dict], sampling_rate: int, time_precision: int = 2): - self.sampling_rate = sampling_rate - self.time_precision = time_precision - self.chunk_end_sample = [] - self.total_silence_before = [] - - previous_end = 0 - silent_samples = 0 - - for chunk in chunks: - silent_samples += chunk["start"] - previous_end - previous_end = chunk["end"] - - self.chunk_end_sample.append(chunk["end"] - silent_samples) - self.total_silence_before.append(silent_samples / sampling_rate) - - def get_original_time( - self, - time: float, - chunk_index: Optional[int] = None, - ) -> float: - if chunk_index is None: - chunk_index = self.get_chunk_index(time) - - total_silence_before = self.total_silence_before[chunk_index] - return round(total_silence_before + time, self.time_precision) - - def get_chunk_index(self, time: float) -> int: - sample = int(time * self.sampling_rate) - return min( - bisect.bisect(self.chunk_end_sample, sample), - len(self.chunk_end_sample) - 1, - ) - - -@functools.lru_cache -def get_vad_model(): - """Returns the VAD model instance.""" - abspath = os.path.abspath(__file__) - my_dir = os.path.dirname(abspath) - parent_dir = os.path.dirname(my_dir) - path = os.path.join(parent_dir, "Models", "silero_vad.onnx") - return SileroVADModel(path) - - -class SileroVADModel: - def __init__(self, path): - try: - import onnxruntime - except ImportError as e: - raise RuntimeError( - "Applying the VAD filter requires the onnxruntime package" - ) from e - - opts = onnxruntime.SessionOptions() - opts.inter_op_num_threads = 1 - opts.intra_op_num_threads = 1 - opts.log_severity_level = 4 - - self.session = onnxruntime.InferenceSession( - path, - providers=["CPUExecutionProvider"], - sess_options=opts, - ) - - def get_initial_state(self, batch_size: int): - h = np.zeros((2, batch_size, 64), dtype=np.float32) - c = np.zeros((2, batch_size, 64), dtype=np.float32) - return h, c - - def __call__(self, x, state, sr: int): - if len(x.shape) == 1: - x = np.expand_dims(x, 0) - if len(x.shape) > 2: - raise ValueError( - f"Too many dimensions for input audio chunk {len(x.shape)}" - ) - if sr / x.shape[1] > 31.25: - raise ValueError("Input audio chunk is too short") - - h, c = state - - ort_inputs = { - "input": x, - "h": h, - "c": c, - "sr": np.array(sr, dtype="int64"), - } - - out, h, c = self.session.run(None, ort_inputs) - state = (h, c) - - return out, state diff --git a/config.yaml b/config.yaml index 34d88f1..5eec7a2 100644 --- a/config.yaml +++ b/config.yaml @@ -1,6 +1,7 @@ compute_type: float16 enable_debug_mode: 0 enable_previews: 1 +user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. save_audio: 0 language: english gpu_idx: 0 diff --git a/ui/index.html b/ui/index.html index b06e56b..90f78c1 100644 --- a/ui/index.html +++ b/ui/index.html @@ -10,179 +10,229 @@
-
-
- -
-
-
- - -
-
- - -
-
- - -
-
-
- - - - - -
-
- -
diff --git a/ui/index.js b/ui/index.js index a056156..2420ece 100644 --- a/ui/index.js +++ b/ui/index.js @@ -3,6 +3,7 @@ const path = require('node:path'); const fs = require('node:fs').promises; const yaml = require('js-yaml'); const { spawn } = require('child_process'); +const https = require('https'); const APP_ROOT = path.join(__dirname, '..'); const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml'); @@ -10,6 +11,20 @@ const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml'); let mainWindow; let runningProcess = null; // Track the running Python process +// Required DLL files for CUDA/cuDNN support +const REQUIRED_DLLS = [ + 'cublas64_12.dll', + 'cublasLt64_12.dll', + 'cudnn64_9.dll', + 'cudnn_adv64_9.dll', + 'cudnn_cnn64_9.dll', + 'cudnn_engines_precompiled64_9.dll', + 'cudnn_engines_runtime_compiled64_9.dll', + 'cudnn_graph64_9.dll', + 'cudnn_heuristic64_9.dll', + 'cudnn_ops64_9.dll' +]; + // Helper function to get the correct Python executable from venv function getVenvPython() { const venvPath = path.join(APP_ROOT, 'venv'); @@ -24,6 +39,78 @@ function sendPythonOutput(message, type = 'stdout') { } } +// Helper function to create environment with DLL path +function createPythonEnvironment() { + const dllPath = path.join(APP_ROOT, 'dll'); + const binPath = path.join(APP_ROOT, 'bin'); + const env = { ...process.env }; + env.PATH = `${dllPath};${binPath};${env.PATH}`; + env.HF_HUB_DISABLE_SYMLINKS_WARNING = '1'; + return env; +} + +// Helper function to download a file from URL +function downloadFile(url, outputPath) { + return new Promise((resolve, reject) => { + const file = require('fs').createWriteStream(outputPath); + + const request = https.get(url, (response) => { + if (response.statusCode === 200) { + response.pipe(file); + + file.on('finish', () => { + file.close(); + resolve(); + }); + + file.on('error', (err) => { + fs.unlink(outputPath).catch(() => {}); // Clean up on error + reject(err); + }); + } else { + file.close(); + fs.unlink(outputPath).catch(() => {}); // Clean up on error + reject(new Error(`Failed to download: HTTP ${response.statusCode}`)); + } + }); + + request.on('error', (err) => { + file.close(); + fs.unlink(outputPath).catch(() => {}); // Clean up on error + reject(err); + }); + }); +} + +// Helper function to setup process event handlers +function setupProcessHandlers(process) { + process.stdout.on('data', (data) => { + const text = data.toString(); + sendPythonOutput(text.trimEnd(), 'stdout'); + }); + + process.stderr.on('data', (data) => { + const text = data.toString(); + sendPythonOutput(text.trimEnd(), 'stderr'); + }); + + process.on('error', (error) => { + sendPythonOutput(`Process error: ${error.message}`, 'stderr'); + runningProcess = null; + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('process-stopped'); + } + }); + + process.on('close', (code) => { + sendPythonOutput(`Process exited with code ${code}`, 'info'); + runningProcess = null; + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('process-stopped'); + } + }); +} + // Helper function to execute Python commands using venv function executePythonCommand(args, options = {}) { return new Promise((resolve, reject) => { @@ -31,14 +118,9 @@ function executePythonCommand(args, options = {}) { const commandStr = `${path.basename(pythonPath)} ${args.join(' ')}`; sendPythonOutput(`> ${commandStr}`, 'info'); - // Add dll directory to PATH for Windows DLL loading - const dllPath = path.join(APP_ROOT, 'dll'); - const env = { ...process.env }; - env.PATH = `${dllPath};${env.PATH}`; - const spawnOptions = { ...options, - env + env: createPythonEnvironment() }; const pythonProcess = spawn(pythonPath, args, spawnOptions); @@ -78,6 +160,7 @@ function createWindow () { mainWindow = new BrowserWindow({ width: 1000, height: 800, + icon: path.join(APP_ROOT, 'Images', 'favicon.ico'), webPreferences: { preload: path.join(__dirname, 'preload.js'), contextIsolation: true, @@ -93,6 +176,7 @@ const DEFAULT_CONFIG = { compute_type: 'float16', enable_debug_mode: 0, enable_previews: 1, + user_prompt: 'Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc.', save_audio: 0, language: 'english', gpu_idx: 0, @@ -117,11 +201,11 @@ ipcMain.handle('load-config', async () => { } catch (error) { if (error.code === 'ENOENT') { // Config file doesn't exist, create it with defaults - console.log('Config file not found, creating with defaults...'); + console.error('Config file not found, creating with defaults...'); try { const yamlContent = yaml.dump(DEFAULT_CONFIG, { lineWidth: -1 }); await fs.writeFile(CONFIG_PATH, yamlContent, 'utf8'); - console.log('Created config.yaml with default values'); + console.error('Created config.yaml with default values'); return DEFAULT_CONFIG; } catch (writeError) { console.error('Error creating default config:', writeError); @@ -145,21 +229,138 @@ ipcMain.handle('save-config', async (event, config) => { } }); -ipcMain.handle('restart-app', () => { - app.relaunch(); - app.exit(); +ipcMain.handle('reset-config', async () => { + try { + // Check if the file exists first + try { + await fs.access(CONFIG_PATH); + // File exists, delete it + await fs.unlink(CONFIG_PATH); + console.error('Config file deleted successfully'); + return { success: true, message: 'Configuration reset to defaults' }; + } catch (error) { + if (error.code === 'ENOENT') { + // Config file doesn't exist, that's fine + return { success: true, message: 'Configuration already at defaults' }; + } + throw error; + } + } catch (error) { + console.error('Error resetting config:', error); + throw new Error(`Failed to reset configuration: ${error.message}`); + } }); -ipcMain.handle('install-requirements', async (event) => { +// Generic function to ensure required files are present +async function ensureRequiredFiles(config) { + const { + directoryName, + requiredFiles, + downloadBaseUrl, + resourceType + } = config; + + const targetPath = path.join(APP_ROOT, directoryName); + + try { + // Check if target directory exists, create it if not + try { + await fs.access(targetPath); + sendPythonOutput(`${resourceType} directory exists`, 'info'); + } catch (error) { + if (error.code === 'ENOENT') { + sendPythonOutput(`Creating ${resourceType} directory...`, 'info'); + await fs.mkdir(targetPath, { recursive: true }); + sendPythonOutput(`${resourceType} directory created`, 'info'); + } else { + throw error; + } + } + + // Check each required file + const missingFiles = []; + for (const fileName of requiredFiles) { + const filePath = path.join(targetPath, fileName); + try { + await fs.access(filePath); + sendPythonOutput(`✓ ${fileName} exists`, 'info'); + } catch (error) { + if (error.code === 'ENOENT') { + missingFiles.push(fileName); + sendPythonOutput(`✗ ${fileName} missing`, 'info'); + } else { + throw error; + } + } + } + + // Download missing files + if (missingFiles.length > 0) { + sendPythonOutput(`Downloading ${missingFiles.length} missing ${resourceType} file${missingFiles.length > 1 ? 's' : ''}...`, 'info'); + + for (const fileName of missingFiles) { + const filePath = path.join(targetPath, fileName); + const downloadUrl = `${downloadBaseUrl}/${fileName}`; + + try { + sendPythonOutput(`Downloading ${fileName}...`, 'info'); + await downloadFile(downloadUrl, filePath); + sendPythonOutput(`✓ Downloaded ${fileName}`, 'info'); + } catch (downloadError) { + sendPythonOutput(`✗ Failed to download ${fileName}: ${downloadError.message}`, 'stderr'); + throw new Error(`Failed to download ${fileName}: ${downloadError.message}`); + } + } + + sendPythonOutput(`All missing ${resourceType} files downloaded successfully`, 'info'); + } else { + sendPythonOutput(`All required ${resourceType} files are present`, 'info'); + } + + return { + success: true, + message: `${resourceType} setup complete. ${missingFiles.length} file${missingFiles.length > 1 ? 's' : ''} downloaded.`, + downloadedFiles: missingFiles + }; + } catch (error) { + console.error(`Error setting up ${resourceType} files:`, error); + throw new Error(`${resourceType} setup failed: ${error.message}`); + } +} + +// Update the install-requirements handler +ipcMain.handle('install-requirements', async () => { const requirementsPath = path.join(APP_ROOT, 'app', 'requirements.txt'); + const venvMarkerPath = path.join(APP_ROOT, '.venv_is_set_up'); try { + // Check if venv is already set up + try { + await fs.access(venvMarkerPath); + sendPythonOutput('Virtual environment already set up, skipping installation', 'info'); + return { success: true, message: 'Virtual environment already set up' }; + } catch (error) { + // Marker doesn't exist, proceed with setup + } + // Check if requirements.txt exists await fs.access(requirementsPath); - const result = await executePythonCommand(['-m', 'pip', 'install', '-r', requirementsPath]); + await executePythonCommand(['-m', 'pip', 'install', '-r', requirementsPath]); + + await ensureRequiredFiles({ + directoryName: 'dll', + requiredFiles: REQUIRED_DLLS, + downloadBaseUrl: 'https://yummers.dev/tastt/dll', + resourceType: 'DLL' + }); + + await fs.mkdir(path.join(APP_ROOT, 'Models'), { recursive: true }); + + await fs.writeFile(venvMarkerPath, new Date().toISOString(), 'utf8'); + sendPythonOutput('Created .venv_is_set_up marker file', 'info'); - return { success: true, message: 'Requirements installed successfully' }; + return { success: true, message: 'Requirements and dependencies installed successfully' }; } catch (error) { console.error('Error installing requirements:', error); if (error.code === 'ENOENT') { @@ -175,7 +376,6 @@ ipcMain.handle('get-microphones', async () => { try { const result = await executePythonCommand([scriptPath]); const microphones = JSON.parse(result.stdout.trim()); - console.log('Successfully retrieved microphones:', microphones); return microphones; } catch (error) { console.error('Failed to get microphones:', error); @@ -183,53 +383,135 @@ ipcMain.handle('get-microphones', async () => { } }); -// Add handlers for starting and stopping the process -ipcMain.handle('start-process', async () => { - if (runningProcess) { - throw new Error('Process is already running'); +// Helper function to safely delete directory contents +async function clearDirectory(dirPath, dirName) { + try { + await fs.access(dirPath); + sendPythonOutput(`Clearing ${dirName} directory...`, 'info'); + + const files = await fs.readdir(dirPath); + let deletedCount = 0; + + for (const file of files) { + const filePath = path.join(dirPath, file); + + try { + await fs.rm(filePath, { recursive: true, force: true }); + sendPythonOutput(`✗ Deleted file ${file}`, 'info'); + + deletedCount++; + } catch (deleteError) { + sendPythonOutput(`Warning: Could not delete ${file}: ${deleteError.message}`, 'stderr'); + // Continue with other files even if one fails + } + } + + sendPythonOutput(`${dirName} directory cleared`, 'info'); + return deletedCount; + } catch (error) { + if (error.code === 'ENOENT') { + sendPythonOutput(`${dirName} directory doesn't exist, skipping`, 'info'); + return 0; + } else { + sendPythonOutput(`Error clearing ${dirName} directory: ${error.message}`, 'stderr'); + throw error; + } } +} - const scriptPath = path.join(APP_ROOT, 'app', 'hi.py'); - const configPath = CONFIG_PATH; +ipcMain.handle('reset-venv', async () => { + const venvMarkerPath = path.join(APP_ROOT, '.venv_is_set_up'); try { - const pythonPath = getVenvPython(); - const args = [scriptPath, '--config', configPath]; + sendPythonOutput('Starting virtual environment reset...', 'info'); - sendPythonOutput(`Starting process: ${path.basename(pythonPath)} ${args.join(' ')}`, 'info'); + // Delete the venv marker file first + try { + await fs.unlink(venvMarkerPath); + sendPythonOutput('Deleted .venv_is_set_up marker file', 'info'); + } catch (error) { + if (error.code !== 'ENOENT') { + sendPythonOutput(`Warning: Could not delete marker file: ${error.message}`, 'stderr'); + } + } + + // Get list of installed packages + sendPythonOutput('Getting list of installed packages...', 'info'); + const freezeResult = await executePythonCommand(['-m', 'pip', 'freeze']); + const installedPackages = freezeResult.stdout.trim(); + + let uninstalledPackages = []; + + if (!installedPackages) { + sendPythonOutput('No packages found to uninstall', 'info'); + } else { + // Parse package names and filter out core packages + const packageLines = installedPackages.split('\n').filter(line => line.trim()); + const packageNames = packageLines + .map(line => line.split('==')[0].trim()) + .filter(name => name && !name.startsWith('#')); + + const corePackages = ['pip', 'setuptools', 'wheel']; + const packagesToUninstall = packageNames.filter(name => !corePackages.includes(name.toLowerCase())); + + if (packagesToUninstall.length === 0) { + sendPythonOutput('Only core packages found, nothing to uninstall', 'info'); + } else { + sendPythonOutput(`Uninstalling ${packagesToUninstall.length} packages...`, 'info'); + + const uninstallArgs = ['-m', 'pip', 'uninstall', '-y', ...packagesToUninstall]; + await executePythonCommand(uninstallArgs); + uninstalledPackages = packagesToUninstall; + } + } + + // Clear downloaded files + sendPythonOutput('Clearing downloaded files...', 'info'); - // Add dll directory to PATH for Windows DLL loading const dllPath = path.join(APP_ROOT, 'dll'); - const env = { ...process.env }; - env.PATH = `${dllPath};${env.PATH}`; + const modelsPath = path.join(APP_ROOT, 'Models'); + const binPath = path.join(APP_ROOT, 'bin'); - runningProcess = spawn(pythonPath, args, { env }); + const deletedDlls = await clearDirectory(dllPath, 'DLL'); + const deletedModels = await clearDirectory(modelsPath, 'Models'); + const deletedBins = await clearDirectory(binPath, 'Binary'); - runningProcess.stdout.on('data', (data) => { - const text = data.toString(); - sendPythonOutput(text.trimEnd(), 'stdout'); - }); + const totalDeletedFiles = deletedDlls + deletedModels + deletedBins; - runningProcess.stderr.on('data', (data) => { - const text = data.toString(); - sendPythonOutput(text.trimEnd(), 'stderr'); - }); + sendPythonOutput('Virtual environment reset successfully!', 'info'); - runningProcess.on('error', (error) => { - sendPythonOutput(`Process error: ${error.message}`, 'stderr'); - runningProcess = null; - if (mainWindow && !mainWindow.isDestroyed()) { - mainWindow.webContents.send('process-stopped'); + return { + success: true, + message: `Virtual environment reset complete. Uninstalled ${uninstalledPackages.length} packages and deleted ${totalDeletedFiles} downloaded files.`, + uninstalledPackages, + deletedFiles: { + dlls: deletedDlls, + models: deletedModels, + binaries: deletedBins, + total: totalDeletedFiles } - }); + }; + } catch (error) { + console.error('Error resetting virtual environment:', error); + throw new Error(`Virtual environment reset failed: ${error.message}`); + } +}); + +// Add handlers for starting and stopping the process +ipcMain.handle('start-process', async () => { + if (runningProcess) { + throw new Error('Process is already running'); + } + + const scriptPath = path.join(APP_ROOT, 'app', 'hi.py'); + const args = [scriptPath, '--config', CONFIG_PATH]; + + try { + const pythonPath = getVenvPython(); + sendPythonOutput(`Starting process: ${path.basename(pythonPath)} ${args.join(' ')}`, 'info'); - runningProcess.on('close', (code) => { - sendPythonOutput(`Process exited with code ${code}`, 'info'); - runningProcess = null; - if (mainWindow && !mainWindow.isDestroyed()) { - mainWindow.webContents.send('process-stopped'); - } - }); + runningProcess = spawn(pythonPath, args, { env: createPythonEnvironment() }); + setupProcessHandlers(runningProcess); return { success: true }; } catch (error) { @@ -243,7 +525,7 @@ ipcMain.handle('stop-process', async () => { throw new Error('No process is running'); } - return new Promise((resolve, reject) => { + return new Promise((resolve) => { let forcefullyKilled = false; // Set up a timeout to force kill after 10 seconds diff --git a/ui/package.json b/ui/package.json index fee2d67..3a58298 100644 --- a/ui/package.json +++ b/ui/package.json @@ -3,12 +3,85 @@ "version": "1.0.0", "description": "Speech-to-text tool for VRChat", "main": "index.js", + "homepage": "./", "scripts": { "start": "npm run build:css && electron .", "build:css": "tailwindcss -i ./src/components.css -o ./build/output.css", "watch:css": "tailwindcss -i ./src/components.css -o ./build/output.css --watch", "dev": "concurrently \"npm run watch:css\" \"electron .\"", - "test": "echo \"Error: no test specified\" && exit 1" + "test": "echo \"Error: no test specified\" && exit 1", + "dist": "npm run build:css && electron-builder", + "dist:win": "npm run build:css && electron-builder --win", + "dist:portable": "npm run build:css && electron-builder --win portable", + "dist:zip": "npm run build:css && electron-builder --win zip" + }, + "build": { + "appId": "com.yum_food.tastt", + "productName": "TaSTT", + "directories": { + "output": "dist" + }, + "files": [ + "**/*", + "!dist/**/*", + "!src/**/*", + "!node_modules/**/{CHANGELOG.md,README.md,README,readme.md,readme}", + "!node_modules/**/{test,__tests__,tests,powered-test,example,examples}", + "!node_modules/**/*.d.ts", + "!node_modules/.bin", + "!.git/**/*", + "!.gitignore" + ], + "extraResources": [ + { + "from": "../app", + "to": "app", + "filter": [ + "**/*.py", + "requirements.txt", + "!**/__pycache__/**/*" + ] + }, + { + "from": "../config.yaml", + "to": "config.yaml" + }, + { + "from": "../dll", + "to": "dll", + "filter": ["**/*"] + }, + { + "from": "../Images", + "to": "Images", + "filter": ["**/*"] + }, + { + "from": "../bin", + "to": "bin", + "filter": ["**/*"] + } + ], + "win": { + "icon": "../Images/logo.png", + "target": [ + { + "target": "portable", + "arch": ["x64"] + }, + { + "target": "zip", + "arch": ["x64"] + } + ] + }, + "portable": { + "artifactName": "${productName}-${version}-portable.exe" + }, + "nsis": { + "oneClick": false, + "allowToChangeInstallationDirectory": true + } }, "keywords": [], "author": "yum_food", @@ -22,6 +95,7 @@ "concurrently": "^9.1.2", "cross-env": "^7.0.3", "electron": "^36.3.2", + "electron-builder": "^25.1.8", "postcss": "^8.5.4", "tailwindcss": "^3.4.17", "vite": "^6.3.5", diff --git a/ui/preload.js b/ui/preload.js index e6c0623..35cc8d6 100644 --- a/ui/preload.js +++ b/ui/preload.js @@ -3,14 +3,13 @@ const { contextBridge, ipcRenderer } = require('electron'); contextBridge.exposeInMainWorld('electronAPI', { loadConfig: () => ipcRenderer.invoke('load-config'), saveConfig: (config) => ipcRenderer.invoke('save-config', config), - restartApp: () => ipcRenderer.invoke('restart-app'), + resetConfig: () => ipcRenderer.invoke('reset-config'), getMicrophones: () => ipcRenderer.invoke('get-microphones'), installRequirements: () => ipcRenderer.invoke('install-requirements'), + resetVenv: () => ipcRenderer.invoke('reset-venv'), startProcess: () => ipcRenderer.invoke('start-process'), stopProcess: () => ipcRenderer.invoke('stop-process'), onPythonOutput: (callback) => ipcRenderer.on('python-output', (event, data) => callback(data)), - onProcessStopped: (callback) => ipcRenderer.on('process-stopped', (event) => callback()) + onProcessStopped: (callback) => ipcRenderer.on('process-stopped', () => callback()) }); -console.log('Preload script loaded.'); - diff --git a/ui/renderer.js b/ui/renderer.js index b3f05a6..201eef6 100644 --- a/ui/renderer.js +++ b/ui/renderer.js @@ -1,99 +1,220 @@ -// Handle status messages +// Configuration and form field mappings +const CONFIG_FIELDS = { + // String fields + compute_type: { type: 'select', default: 'float16' }, + language: { type: 'select', default: 'english' }, + model: { type: 'select', default: 'turbo' }, + microphone: { type: 'number', default: 0 }, + user_prompt: { type: 'text', default: '' }, + + // Number fields + gpu_idx: { type: 'number', default: 0 }, + max_speech_duration_s: { type: 'number', default: 10 }, + min_silence_duration_ms: { type: 'number', default: 250 }, + reset_after_silence_s: { type: 'number', default: 15 }, + transcription_loop_delay_ms: { type: 'number', default: 100 }, + block_width: { type: 'number', default: 2 }, + num_blocks: { type: 'number', default: 40 }, + rows: { type: 'number', default: 10 }, + cols: { type: 'number', default: 24 }, + + // Boolean fields (stored as 1/0) + enable_debug_mode: { type: 'boolean', default: 0 }, + enable_previews: { type: 'boolean', default: 1 }, + save_audio: { type: 'boolean', default: 0 }, + use_cpu: { type: 'boolean', default: 0 } +}; + +// Button management system +class ButtonManager { + constructor() { + this.buttons = { + start: document.getElementById('start-process'), + stop: document.getElementById('stop-process'), + setupVenv: document.getElementById('setup-venv'), + resetVenv: document.getElementById('reset-venv'), + refreshMicrophones: document.getElementById('refresh-microphones') + }; + } + + setState(buttonName, disabled) { + const button = this.buttons[buttonName]; + if (!button) return; + + button.disabled = disabled; + if (disabled) { + button.classList.add('opacity-50', 'cursor-not-allowed'); + } else { + button.classList.remove('opacity-50', 'cursor-not-allowed'); + } + } + + setProcessRunning() { + this.setState('start', true); + this.setState('stop', false); + } + + setProcessStopped() { + this.setState('start', false); + this.setState('stop', true); + } + + async withButtonLoading(buttonName, asyncFn) { + this.setState(buttonName, true); + try { + return await asyncFn(); + } finally { + this.setState(buttonName, false); + } + } +} + +const buttonManager = new ButtonManager(); + +// Add loading overlay management +class LoadingOverlay { + constructor() { + this.overlay = document.getElementById('loading-overlay'); + this.form = document.getElementById('config-form'); + this.messageElement = this.overlay.querySelector('p'); + this.defaultMessage = 'Environment setup underway - please wait.'; + } + + show(message = null) { + this.messageElement.textContent = message || this.defaultMessage; + this.overlay.classList.remove('hidden'); + // Disable all form inputs and buttons in the entire left panel + const leftPanel = this.overlay.parentElement; + const inputs = leftPanel.querySelectorAll('input, select, textarea, button'); + inputs.forEach(input => { + input.disabled = true; + input.classList.add('opacity-50'); + }); + } + + hide() { + this.overlay.classList.add('hidden'); + // Re-enable all form inputs and buttons in the entire left panel + const leftPanel = this.overlay.parentElement; + const inputs = leftPanel.querySelectorAll('input, select, textarea, button'); + inputs.forEach(input => { + input.disabled = false; + input.classList.remove('opacity-50'); + }); + // Reset to default message + this.messageElement.textContent = this.defaultMessage; + } +} + +const loadingOverlay = new LoadingOverlay(); + +// Add a flag to prevent auto-save during programmatic updates +let isSettingValues = false; + +// Handle status messages with better color management function showStatus(message, type = 'info') { const statusEl = document.getElementById('status-message'); statusEl.textContent = message; - statusEl.classList.remove('hidden', 'bg-green-100', 'bg-red-100', 'bg-blue-100', 'text-green-800', 'text-red-800', 'text-blue-800'); - - if (type === 'success') { - statusEl.classList.add('bg-green-100', 'text-green-800'); - } else if (type === 'error') { - statusEl.classList.add('bg-red-100', 'text-red-800'); - } else { - statusEl.classList.add('bg-blue-100', 'text-blue-800'); - } + + // Remove all status classes + const statusClasses = ['hidden', 'bg-green-100', 'bg-red-100', 'bg-blue-100', 'text-green-800', 'text-red-800', 'text-blue-800']; + statusEl.classList.remove(...statusClasses); + + // Add appropriate classes based on type + const typeMap = { + success: ['bg-green-100', 'text-green-800'], + error: ['bg-red-100', 'text-red-800'], + info: ['bg-blue-100', 'text-blue-800'] + }; + + statusEl.classList.add(...(typeMap[type] || typeMap.info)); // Also log to console appendToConsole(message, type === 'error' ? 'stderr' : 'info'); - setTimeout(() => { - statusEl.classList.add('hidden'); - }, 5000); + setTimeout(() => statusEl.classList.add('hidden'), 5000); } -// Get form values +// Get form values using field mappings function getFormValues() { - const microphoneValue = document.getElementById('microphone').value; - // Convert to number if it's a numeric string (device index) - const microphoneForConfig = /^\d+$/.test(microphoneValue) ? parseInt(microphoneValue) : microphoneValue; - - return { - compute_type: document.getElementById('compute_type').value, - enable_debug_mode: document.getElementById('enable_debug_mode').checked ? 1 : 0, - enable_previews: document.getElementById('enable_previews').checked ? 1 : 0, - save_audio: document.getElementById('save_audio').checked ? 1 : 0, - language: document.getElementById('language').value, - gpu_idx: parseInt(document.getElementById('gpu_idx').value), - max_speech_duration_s: parseInt(document.getElementById('max_speech_duration_s').value), - min_silence_duration_ms: parseInt(document.getElementById('min_silence_duration_ms').value), - microphone: microphoneForConfig, - model: document.getElementById('model').value, - reset_after_silence_s: parseInt(document.getElementById('reset_after_silence_s').value), - transcription_loop_delay_ms: parseInt(document.getElementById('transcription_loop_delay_ms').value), - use_cpu: document.getElementById('use_cpu').checked ? 1 : 0, - block_width: parseInt(document.getElementById('block_width').value), - num_blocks: parseInt(document.getElementById('num_blocks').value), - rows: parseInt(document.getElementById('rows').value), - cols: parseInt(document.getElementById('cols').value) - }; + const config = {}; + + for (const [fieldName, fieldConfig] of Object.entries(CONFIG_FIELDS)) { + const element = document.getElementById(fieldName); + if (!element) continue; + + switch (fieldConfig.type) { + case 'boolean': + config[fieldName] = element.checked ? 1 : 0; + break; + case 'number': + config[fieldName] = parseInt(element.value) || fieldConfig.default; + break; + case 'text': + config[fieldName] = element.value || fieldConfig.default; + break; + default: + config[fieldName] = element.value || fieldConfig.default; + } + } + + return config; } -// Add a flag to prevent auto-save during programmatic updates -let isSettingValues = false; - -// Set form values +// Set form values using field mappings function setFormValues(config) { isSettingValues = true; // Disable auto-save temporarily - document.getElementById('compute_type').value = config.compute_type || 'int8'; - document.getElementById('enable_debug_mode').checked = config.enable_debug_mode === 1; - document.getElementById('enable_previews').checked = config.enable_previews === 1; - document.getElementById('save_audio').checked = config.save_audio === 1; - document.getElementById('language').value = config.language || 'english'; - document.getElementById('gpu_idx').value = config.gpu_idx || 0; - document.getElementById('max_speech_duration_s').value = config.max_speech_duration_s || 10; - document.getElementById('min_silence_duration_ms').value = config.min_silence_duration_ms || 250; - document.getElementById('microphone').value = config.microphone || 'motu'; - document.getElementById('model').value = config.model || 'turbo'; - document.getElementById('reset_after_silence_s').value = config.reset_after_silence_s || 15; - document.getElementById('transcription_loop_delay_ms').value = config.transcription_loop_delay_ms || 100; - document.getElementById('use_cpu').checked = config.use_cpu === 1; - document.getElementById('block_width').value = config.block_width || 2; - document.getElementById('num_blocks').value = config.num_blocks || 40; - document.getElementById('rows').value = config.rows || 10; - document.getElementById('cols').value = config.cols || 24; + for (const [fieldName, fieldConfig] of Object.entries(CONFIG_FIELDS)) { + const element = document.getElementById(fieldName); + if (!element) continue; + + const value = config[fieldName] ?? fieldConfig.default; + + switch (fieldConfig.type) { + case 'boolean': + element.checked = value === 1; + break; + case 'text': + element.value = value || ''; + break; + default: + element.value = value; + } + } isSettingValues = false; // Re-enable auto-save } -// Toggle advanced settings -document.getElementById('toggle-advanced').addEventListener('click', () => { - const advancedSettings = document.getElementById('advanced-settings'); - const chevron = document.getElementById('chevron'); - - if (advancedSettings.classList.contains('hidden')) { - advancedSettings.classList.remove('hidden'); - chevron.classList.add('rotate-90'); - } else { - advancedSettings.classList.add('hidden'); - chevron.classList.remove('rotate-90'); - } -}); +// Console management +const consoleContent = document.getElementById('console-content'); + +function appendToConsole(message, type = 'stdout') { + const timestamp = new Date().toLocaleTimeString(); + const timestampSpan = document.createElement('span'); + timestampSpan.className = 'console-timestamp'; + timestampSpan.textContent = `[${timestamp}] `; + + const messageSpan = document.createElement('span'); + messageSpan.className = `console-${type}`; + messageSpan.textContent = message; + + const lineDiv = document.createElement('div'); + lineDiv.appendChild(timestampSpan); + lineDiv.appendChild(messageSpan); + + consoleContent.appendChild(lineDiv); + + // Auto-scroll to bottom + const pythonConsole = document.getElementById('python-console'); + pythonConsole.scrollTop = pythonConsole.scrollHeight; +} -// Simplify button handlers by extracting common patterns +// Async action handler with better error handling async function handleAsyncAction(actionName, actionFn) { try { const result = await actionFn(); - if (result && result.message) { + if (result?.message) { showStatus(result.message, 'success'); } return result; @@ -103,36 +224,12 @@ async function handleAsyncAction(actionName, actionFn) { } } -// Process control buttons -const startButton = document.getElementById('start-process'); -const stopButton = document.getElementById('stop-process'); - -// Helper functions for button state management -function setButtonState(button, disabled) { - button.disabled = disabled; - if (disabled) { - button.classList.add('opacity-50', 'cursor-not-allowed'); - } else { - button.classList.remove('opacity-50', 'cursor-not-allowed'); - } -} - -function setProcessRunningState() { - setButtonState(startButton, true); - setButtonState(stopButton, false); -} - -function setProcessStoppedState() { - setButtonState(startButton, false); - setButtonState(stopButton, true); -} - // Auto-save functionality with debouncing let saveTimeout; -const SAVE_DELAY = 500; // milliseconds +const SAVE_DELAY = 500; async function autoSaveConfig() { - if (isSettingValues) return; // Don't save during programmatic updates + if (isSettingValues) return; clearTimeout(saveTimeout); saveTimeout = setTimeout(async () => { @@ -141,28 +238,19 @@ async function autoSaveConfig() { await window.electronAPI.saveConfig(config); showStatus('Configuration saved', 'success'); - // Check if process is running (stop button is enabled means process is running) - const stopButton = document.getElementById('stop-process'); - - if (!stopButton.disabled) { - // Process is running, restart it with new config + // Restart process if running + if (!buttonManager.buttons.stop.disabled) { appendToConsole('Restarting process with new configuration...', 'info'); try { await window.electronAPI.stopProcess(); - await new Promise(resolve => setTimeout(resolve, 1000)); - await window.electronAPI.startProcess(); - - // Update button states to reflect running process - setProcessRunningState(); - + buttonManager.setProcessRunning(); appendToConsole('Process restarted with new configuration', 'info'); } catch (error) { appendToConsole(`Failed to restart process: ${error.message}`, 'stderr'); - // Process is stopped, update button states - setProcessStoppedState(); + buttonManager.setProcessStopped(); } } } catch (error) { @@ -171,47 +259,32 @@ async function autoSaveConfig() { }, SAVE_DELAY); } -// Add event listeners to all form inputs for auto-save +// Auto-save setup function setupAutoSave() { - // Get all form inputs const form = document.getElementById('config-form'); - const inputs = form.querySelectorAll('input, select'); + const inputs = form.querySelectorAll('input, select, textarea'); - // Add change listener to each input inputs.forEach(input => { - if (input.type === 'checkbox') { - input.addEventListener('change', autoSaveConfig); - } else if (input.type === 'number' || input.type === 'text') { - input.addEventListener('input', autoSaveConfig); - } else if (input.tagName === 'SELECT') { - input.addEventListener('change', autoSaveConfig); - } + const eventType = input.type === 'checkbox' ? 'change' : + (input.type === 'number' || input.type === 'text' || input.tagName === 'TEXTAREA') ? 'input' : 'change'; + input.addEventListener(eventType, autoSaveConfig); }); } -// Update the setup-venv handler -document.getElementById('setup-venv').addEventListener('click', async () => { - const setupButton = document.getElementById('setup-venv'); - setupButton.disabled = true; - setupButton.classList.add('opacity-50', 'cursor-not-allowed'); - - try { - await handleAsyncAction('Install requirements', async () => { - return await window.electronAPI.installRequirements(); - }); - // Reload microphones after successful installation - await loadMicrophones(); - } finally { - setupButton.disabled = false; - setupButton.classList.remove('opacity-50', 'cursor-not-allowed'); - } -}); - -// Simplified microphone loading +// Microphone loading async function loadMicrophones() { const microphoneSelect = document.getElementById('microphone'); try { + // Check/install requirements during startup + appendToConsole('Checking virtual environment and requirements...', 'info'); + loadingOverlay.show('Setting up environment - this can take several minutes.'); + try { + await handleAsyncAction('Install requirements', () => window.electronAPI.installRequirements()); + } finally { + loadingOverlay.hide(); // Always hide overlay when done + } + appendToConsole('Loading available microphones...', 'info'); const microphones = await window.electronAPI.getMicrophones(); @@ -232,7 +305,7 @@ async function loadMicrophones() { appendToConsole(` - ${mic.name} (Device ${mic.index})`, 'stdout'); }); - // Restore previously selected microphone if possible + // Restore previously selected microphone try { const config = await window.electronAPI.loadConfig(); if (config.microphone) { @@ -248,11 +321,144 @@ async function loadMicrophones() { } } -// Update window load to include auto-save setup +// Event handlers setup +function setupEventHandlers() { + // Advanced settings toggle + document.getElementById('toggle-advanced').addEventListener('click', () => { + const advancedSettings = document.getElementById('advanced-settings'); + const chevron = document.getElementById('chevron'); + + if (advancedSettings.classList.contains('hidden')) { + advancedSettings.classList.remove('hidden'); + chevron.classList.add('rotate-90'); + } else { + advancedSettings.classList.add('hidden'); + chevron.classList.remove('rotate-90'); + } + }); + + // Setup virtual environment + document.getElementById('setup-venv').addEventListener('click', async () => { + loadingOverlay.show('Setting up virtual environment - please wait...'); // Show overlay with custom message + try { + await buttonManager.withButtonLoading('setupVenv', async () => { + await handleAsyncAction('Install requirements', () => window.electronAPI.installRequirements()); + }); + } finally { + loadingOverlay.hide(); // Always hide overlay when done + } + }); + + // Reset virtual environment + document.getElementById('reset-venv').addEventListener('click', async () => { + loadingOverlay.show('Resetting virtual environment - please wait...'); // Show overlay with custom message + try { + await buttonManager.withButtonLoading('resetVenv', async () => { + await handleAsyncAction('Reset virtual environment', () => window.electronAPI.resetVenv()); + }); + } finally { + loadingOverlay.hide(); // Always hide overlay when done + } + }); + + // Reset configuration + document.getElementById('reset-config').addEventListener('click', async () => { + const confirmReset = confirm('Are you sure you want to reset all settings to defaults? This cannot be undone.'); + if (!confirmReset) return; + + try { + // Stop process if running + const wasRunning = !buttonManager.buttons.stop.disabled; + if (wasRunning) { + appendToConsole('Stopping process before resetting configuration...', 'info'); + await window.electronAPI.stopProcess(); + buttonManager.setProcessStopped(); + await new Promise(resolve => setTimeout(resolve, 500)); + } + + // Reset configuration + appendToConsole('Resetting configuration to defaults...', 'info'); + const result = await window.electronAPI.resetConfig(); + + // Reload configuration with defaults + const config = await window.electronAPI.loadConfig(); + setFormValues(config); + + showStatus(result.message, 'success'); + appendToConsole('Configuration reset successfully', 'info'); + + // Restart process if it was running + if (wasRunning) { + appendToConsole('Restarting process with default configuration...', 'info'); + await window.electronAPI.startProcess(); + buttonManager.setProcessRunning(); + appendToConsole('Process restarted with default configuration', 'info'); + } + } catch (error) { + showStatus(`Failed to reset configuration: ${error.message}`, 'error'); + appendToConsole(`Failed to reset configuration: ${error.message}`, 'stderr'); + } + }); + + // Refresh microphones + document.getElementById('refresh-microphones').addEventListener('click', async () => { + await buttonManager.withButtonLoading('refreshMicrophones', async () => { + await loadMicrophones(); + }); + }); + + // Start process + document.getElementById('start-process').addEventListener('click', async () => { + buttonManager.setState('start', true); + + try { + // The installRequirements function will now check if venv is set up. + loadingOverlay.show('Verifying environment setup - please wait...'); // Show overlay with custom message + try { + await window.electronAPI.installRequirements(); + appendToConsole('Virtual environment setup checked/completed', 'info'); + } finally { + loadingOverlay.hide(); // Always hide overlay when done + } + + await window.electronAPI.startProcess(); + buttonManager.setProcessRunning(); + appendToConsole('Process started successfully', 'info'); + } catch (error) { + appendToConsole(`Failed to start process: ${error.message}`, 'stderr'); + buttonManager.setState('start', false); + } + }); + + // Stop process + document.getElementById('stop-process').addEventListener('click', async () => { + buttonManager.setState('stop', true); + + try { + await window.electronAPI.stopProcess(); + appendToConsole('Process stop initiated', 'info'); + } catch (error) { + appendToConsole(`Failed to stop process: ${error.message}`, 'stderr'); + buttonManager.setState('stop', false); + } + }); + + // Listen for process stopped event + window.electronAPI.onProcessStopped(() => { + buttonManager.setProcessStopped(); + }); +} + +// Initialize application window.addEventListener('load', async () => { appendToConsole('TaSTT Configuration UI initialized', 'info'); - // Load config first + // Set up Python output listener first so we capture all output + window.electronAPI.onPythonOutput((data) => { + appendToConsole(data.message, data.type); + }); + + // Load configuration try { const config = await window.electronAPI.loadConfig(); setFormValues(config); @@ -264,71 +470,7 @@ window.addEventListener('load', async () => { // Load microphones await loadMicrophones(); - // Set up auto-save after everything is loaded + // Setup event handlers and auto-save + setupEventHandlers(); setupAutoSave(); -}); - -// Console management -const consoleContent = document.getElementById('console-content'); - -function appendToConsole(message, type = 'stdout') { - const timestamp = new Date().toLocaleTimeString(); - const timestampSpan = document.createElement('span'); - timestampSpan.className = 'console-timestamp'; - timestampSpan.textContent = `[${timestamp}] `; - - const messageSpan = document.createElement('span'); - messageSpan.className = `console-${type}`; - messageSpan.textContent = message; - - const lineDiv = document.createElement('div'); - lineDiv.appendChild(timestampSpan); - lineDiv.appendChild(messageSpan); - - consoleContent.appendChild(lineDiv); - - // Auto-scroll to bottom - const pythonConsole = document.getElementById('python-console'); - pythonConsole.scrollTop = pythonConsole.scrollHeight; -} - -// Clear console button -document.getElementById('clear-console').addEventListener('click', () => { - consoleContent.innerHTML = ''; - appendToConsole('Console cleared', 'info'); -}); - -// Listen for Python output -window.electronAPI.onPythonOutput((data) => { - appendToConsole(data.message, data.type); -}); - -document.getElementById('start-process').addEventListener('click', async () => { - setButtonState(startButton, true); - - try { - await window.electronAPI.startProcess(); - setProcessRunningState(); - appendToConsole('Process started successfully', 'info'); - } catch (error) { - appendToConsole(`Failed to start process: ${error.message}`, 'stderr'); - setButtonState(startButton, false); - } -}); - -document.getElementById('stop-process').addEventListener('click', async () => { - setButtonState(stopButton, true); - - try { - const result = await window.electronAPI.stopProcess(); - appendToConsole('Process stop initiated', 'info'); - } catch (error) { - appendToConsole(`Failed to stop process: ${error.message}`, 'stderr'); - setButtonState(stopButton, false); - } -}); - -// Listen for process stopped event -window.electronAPI.onProcessStopped(() => { - setProcessStoppedState(); }); \ No newline at end of file diff --git a/ui/src/components.css b/ui/src/components.css index d8d909d..2832e12 100644 --- a/ui/src/components.css +++ b/ui/src/components.css @@ -46,6 +46,14 @@ .btn-red { @apply bg-red-600 text-white hover:bg-red-700 focus:ring-red-500; } + + .btn-purple { + @apply bg-purple-600 text-white hover:bg-purple-700 focus:ring-purple-500; + } + + .btn-orange { + @apply bg-orange-600 text-white hover:bg-orange-700 focus:ring-orange-500; + } } /* Console styling */ diff --git a/ui_design.md b/ui_design.md index 06eee65..e1ff095 100644 --- a/ui_design.md +++ b/ui_design.md @@ -10,7 +10,13 @@ $ choco uninstall nodejs -y $ choco install nodejs-lts -y ``` -Now open a non-admin PowerShell terminal: +To build the app: +``` +$ npm install +$ npm run dev +``` + +For posterity, this is how I set up the ui directory initially. In a non-admin PowerShell window: ```bash # Check your node and npm versions. @@ -30,3 +36,4 @@ npx tailwindcss init -p npm install --save-dev vue@3 @vitejs/plugin-vue vite yaml npm install --save-dev js-yaml ``` + -- cgit v1.2.3 From 7fb9c575aea4d318e9c14b82174d1b323171b62b Mon Sep 17 00:00:00 2001 From: yum Date: Fri, 30 May 2025 13:32:36 -0700 Subject: More stuff - fix unicode output from python terminal - fix cpu inference - add filters - add beam search params to UI - DRY up config definition in UI --- Third_Party/Profanity | 1 + app/hi.py | 4 ++ app/profanity_filter.py | 43 ++++++++++++++ app/stt.py | 151 ++++++++++++++++++++++++++++++++++++++++-------- config.yaml | 20 ++++--- ui/config-schema.js | 49 ++++++++++++++++ ui/index.html | 52 ++++++++++++++++- ui/index.js | 49 +++++++--------- ui/renderer.js | 31 ++-------- 9 files changed, 312 insertions(+), 88 deletions(-) create mode 160000 Third_Party/Profanity create mode 100644 app/profanity_filter.py create mode 100644 ui/config-schema.js (limited to 'ui') diff --git a/Third_Party/Profanity b/Third_Party/Profanity new file mode 160000 index 0000000..5faf2ba --- /dev/null +++ b/Third_Party/Profanity @@ -0,0 +1 @@ +Subproject commit 5faf2ba42d7b1c0977169ec3611df25a3c08eb13 diff --git a/app/hi.py b/app/hi.py index e6877ff..bab0fd4 100644 --- a/app/hi.py +++ b/app/hi.py @@ -1,5 +1,6 @@ import app_config import argparse +import io from math import floor, ceil import msvcrt import os @@ -11,6 +12,9 @@ import sys import threading import time +sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') +sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') + TESTS_ENABLED = True # 0 = quiet, 1 = verbose, 2 = very verbose diff --git a/app/profanity_filter.py b/app/profanity_filter.py new file mode 100644 index 0000000..b8c84ed --- /dev/null +++ b/app/profanity_filter.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +class ProfanityFilter: + def __init__(self, en_path: str): + self.en_path = en_path + self.en_profanity = set() + + def load(self): + with open(self.en_path, 'r') as f: + for line in f: + self.en_profanity.add(line.strip()) + + def filter(self, line: str, language_code: str = "en") -> str: + filtered = "" + + if language_code not in {"en"}: + raise ValueError(f"Language code \"{language_code}\" is " + + "unsupported by the profanity filter") + + # Translation table converting vowels to asterisks. + vowel_to_asterisk = str.maketrans('aeiouAEIOU', '**********') + + result = [] + for word in line.split(): + word_clean = word.lower() + # Filter out non-alphabet characters from the word. + word_clean = ''.join([char for char in word_clean if char.isalpha()]) + if word_clean in self.en_profanity: + result.append(word.translate(vowel_to_asterisk)) + else: + result.append(word) + + return " ".join(result) + +if __name__ == "__main__": + en_path = "/mnt/d/vrc/TaSTT/GUI/Profanity/Profanity/en" + p = ProfanityFilter(en_path) + p.load() + assert(p.filter("fuck") == "f*ck") + assert(p.filter("fuck!") == "f*ck!") + assert(p.filter("fuck shit") == "f*ck sh*t") + assert(p.filter("fuck shit this should not be filtered") == "f*ck sh*t this should not be filtered") + assert(p.filter("ASS") == "*SS") diff --git a/app/stt.py b/app/stt.py index 7d76333..a3988e1 100644 --- a/app/stt.py +++ b/app/stt.py @@ -3,6 +3,12 @@ from faster_whisper import WhisperModel import langcodes import numpy as np import os +try: + from profanity_filter import ProfanityFilter + PROFANITY_FILTER_AVAILABLE = True +except ImportError: + PROFANITY_FILTER_AVAILABLE = False + print("Warning: profanity_filter module not available", file=sys.stderr) import pyaudio from pydub import AudioSegment from shared_thread_data import SharedThreadData @@ -12,7 +18,6 @@ import time import typing import wave - APP_ROOT = os.path.dirname(os.path.abspath(__file__)) PROJECT_ROOT = os.path.dirname(APP_ROOT) @@ -297,21 +302,19 @@ class AudioSegmenter: max_speech_s=5): self.min_silence_ms = min_silence_ms self.max_speech_s = max_speech_s - + # Load Silero VAD model self.model = load_silero_vad() - + self.vad_threshold = 0.3 self.min_silence_duration_ms = min_silence_ms self.max_speech_duration_s = max_speech_s - - self.speech_pad_ms = 300 def segmentAudio(self, audio: bytes): # Convert audio bytes to numpy array expected by silero-vad audio_array = np.frombuffer(audio, dtype=np.int16).flatten().astype(np.float32) / 32768.0 - + # Get speech timestamps using silero-vad # Note: silero-vad expects sample rate of 16000 Hz which matches AudioStream.FPS speech_timestamps = get_speech_timestamps( @@ -323,7 +326,7 @@ class AudioSegmenter: max_speech_duration_s=self.max_speech_duration_s, return_seconds=False # We want frame indices, not seconds ) - + return speech_timestamps # Returns the stable cutoff (if any) and whether there are any segments. @@ -399,27 +402,25 @@ class Whisper: self.model = None self.cfg = cfg - abspath = os.path.abspath(__file__) - my_dir = os.path.dirname(abspath) - parent_dir = os.path.dirname(my_dir) - model_str = cfg["model"] - model_root = os.path.join(parent_dir, "Models", + model_root = os.path.join(PROJECT_ROOT, "Models", os.path.normpath(model_str)) if cfg["enable_debug_mode"]: print(f"Model {cfg['model']} will be saved to {model_root}", file=sys.stderr) model_device = "cuda" + compute_type = cfg["compute_type"] if cfg["use_cpu"]: model_device = "cpu" + compute_type = "int8" already_downloaded = os.path.exists(model_root) self.model = WhisperModel(model_str, device = model_device, device_index = cfg["gpu_idx"], - compute_type = cfg["compute_type"], + compute_type = compute_type, download_root = model_root, local_files_only = already_downloaded) @@ -436,14 +437,14 @@ class Whisper: def transcribe(self, frames: bytes = None) -> typing.List[Segment]: if frames is None: frames = self.collector.getAudio() - + # Convert audio to float32 audio = np.frombuffer(frames, dtype=np.int16).flatten().astype(np.float32) / 32768.0 # Build context-aware prompt prompt = self._build_prompt() - + t0 = time.time() segments, info = self.model.transcribe( audio, @@ -452,12 +453,9 @@ class Whisper: temperature=0.0, without_timestamps = False, initial_prompt=prompt, - beam_size=5, - best_of=5, - condition_on_previous_text=True, - compression_ratio_threshold=2.4, - log_prob_threshold=-1.0, - no_speech_threshold=0.6 + beam_size=self.cfg.get("beam_size", 5), + best_of=self.cfg.get("best_of", 5), + condition_on_previous_text=True ) res = [] for s in segments: @@ -562,21 +560,21 @@ class VadCommitter: latency_s = self.collector.now() - self.collector.begin() duration_s = stable_cutoff / AudioStream.FPS start_ts = self.collector.begin() - + # Get the filtered audio first, then extract the portion we need filtered_audio = self.collector.getAudio() commit_audio = filtered_audio[:stable_cutoff * AudioStream.FRAME_SZ] - + # Now drop the prefix from the collector self.collector.dropAudioPrefixByFrames(stable_cutoff) segments = self.whisper.transcribe(commit_audio) delta = ''.join(s.transcript for s in segments) - + # Update whisper's context with the committed text if delta.strip(): self.whisper.update_context(delta.strip()) - + audio = self.collector.getAudio() if self.cfg["enable_debug_mode"]: for s in segments: @@ -608,6 +606,88 @@ class VadCommitter: duration_s=duration_s, start_ts=start_ts) + +class StreamingPlugin: + def __init__(self): + pass + + def transform(self, commit: TranscriptCommit) -> TranscriptCommit: + return commit + + def stop(self): + pass + + +class LowercasePlugin(StreamingPlugin): + def __init__(self, cfg): + self.cfg = cfg + + def transform(self, commit: TranscriptCommit) -> TranscriptCommit: + if self.cfg["enable_lowercase_filter"]: + commit.delta = commit.delta.lower() + commit.preview = commit.preview.lower() + return commit + + +class UppercasePlugin(StreamingPlugin): + def __init__(self, cfg): + self.cfg = cfg + + def transform(self, commit: TranscriptCommit) -> TranscriptCommit: + if self.cfg["enable_uppercase_filter"]: + commit.delta = commit.delta.upper() + commit.preview = commit.preview.upper() + return commit + + +class ProfanityPlugin(StreamingPlugin): + def __init__(self, cfg): + self.cfg = cfg + self.filter = None + if PROFANITY_FILTER_AVAILABLE and cfg["enable_profanity_filter"]: + en_profanity_path = os.path.join(PROJECT_ROOT, "Third_Party/Profanity/en") + try: + self.filter = ProfanityFilter(en_profanity_path) + self.filter.load() + except Exception as e: + print(f"Warning: Could not load profanity filter: {e}", file=sys.stderr) + self.filter = None + + def transform(self, commit: TranscriptCommit) -> TranscriptCommit: + if self.cfg["enable_profanity_filter"] and self.filter: + commit.delta = self.filter.filter(commit.delta) + commit.preview = self.filter.filter(commit.preview) + return commit + + +class PresentationFilter: + def __init__(self): + pass + + def transform(self, transcript: str, preview: str) -> typing.Tuple[str, str]: + return transcript, preview + + def stop(self): + pass + + +class TrailingPeriodFilter(PresentationFilter): + def __init__(self, cfg): + self.cfg = cfg + + def transform(self, transcript: str, preview: str) -> typing.Tuple[str, str]: + if self.cfg["remove_trailing_period"]: + def _remove_trailing_period(s: str) -> str: + if len(s) > 0 and s[-1] == '.' and not s.endswith("..."): + s = s[0:len(s)-1] + return s + if len(preview) == 0: + transcript = _remove_trailing_period(transcript) + else: + preview = _remove_trailing_period(preview) + return transcript, preview + + def transcriptionThread(shared_data: SharedThreadData): last_stable_commit = None @@ -621,6 +701,17 @@ def transcriptionThread(shared_data: SharedThreadData): max_speech_s=shared_data.cfg["max_speech_duration_s"]) committer = VadCommitter(shared_data.cfg, collector, whisper, segmenter) + plugins = [] + # plugins.append(TranslationPlugin(shared_data.cfg)) # Not implemented yet + plugins.append(UppercasePlugin(shared_data.cfg)) + plugins.append(LowercasePlugin(shared_data.cfg)) + plugins.append(ProfanityPlugin(shared_data.cfg)) + # plugins.append(UwuPlugin(shared_data.cfg)) # Not implemented yet + # plugins.append(BrowserSource(shared_data.cfg)) # Not implemented yet + + filters = [] + filters.append(TrailingPeriodFilter(shared_data.cfg)) + transcript = "" preview = "" @@ -633,6 +724,9 @@ def transcriptionThread(shared_data: SharedThreadData): commit = committer.getDelta() + for plugin in plugins: + commit = plugin.transform(commit) + if len(commit.delta) > 0 or len(commit.preview) > 0: # Avoid re-sending text after long pauses if shared_data.cfg["reset_after_silence_s"] > 0: @@ -664,6 +758,9 @@ def transcriptionThread(shared_data: SharedThreadData): transcript = join_segments(transcript, commit.delta) preview = commit.preview + for filt in filters: + transcript, preview = filt.transform(transcript, preview) + try: print(f"Transcript: {transcript}", flush=True) except UnicodeEncodeError: @@ -691,4 +788,8 @@ def transcriptionThread(shared_data: SharedThreadData): (not commit.delta.endswith(' ')) and \ (not commit.preview.startswith(' ')): commit.preview = ' ' + commit.preview + for plugin in plugins: + plugin.stop() + for filt in filters: + filt.stop() diff --git a/config.yaml b/config.yaml index 5eec7a2..fea03bb 100644 --- a/config.yaml +++ b/config.yaml @@ -1,18 +1,24 @@ compute_type: float16 -enable_debug_mode: 0 -enable_previews: 1 -user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. -save_audio: 0 language: english +model: turbo +microphone: 2 +user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm. gpu_idx: 0 max_speech_duration_s: 10 min_silence_duration_ms: 250 -microphone: 0 -model: turbo reset_after_silence_s: 15 transcription_loop_delay_ms: 100 -use_cpu: 0 block_width: 2 num_blocks: 40 rows: 10 cols: 24 +beam_size: 5 +best_of: 5 +enable_debug_mode: 0 +enable_previews: 1 +save_audio: 0 +use_cpu: 0 +enable_lowercase_filter: 0 +enable_uppercase_filter: 0 +enable_profanity_filter: 0 +remove_trailing_period: 0 diff --git a/ui/config-schema.js b/ui/config-schema.js new file mode 100644 index 0000000..b1108ff --- /dev/null +++ b/ui/config-schema.js @@ -0,0 +1,49 @@ +// Shared configuration schema with types and defaults +const CONFIG_SCHEMA = { + // String fields + compute_type: { type: 'select', default: 'float16' }, + language: { type: 'select', default: 'english' }, + model: { type: 'select', default: 'turbo' }, + microphone: { type: 'number', default: 0 }, + user_prompt: { type: 'text', default: 'Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm.' }, + + // Number fields + gpu_idx: { type: 'number', default: 0 }, + max_speech_duration_s: { type: 'number', default: 10 }, + min_silence_duration_ms: { type: 'number', default: 250 }, + reset_after_silence_s: { type: 'number', default: 15 }, + transcription_loop_delay_ms: { type: 'number', default: 100 }, + block_width: { type: 'number', default: 2 }, + num_blocks: { type: 'number', default: 40 }, + rows: { type: 'number', default: 10 }, + cols: { type: 'number', default: 24 }, + beam_size: { type: 'number', default: 5 }, + best_of: { type: 'number', default: 5 }, + + // Boolean fields (stored as 1/0) + enable_debug_mode: { type: 'boolean', default: 0 }, + enable_previews: { type: 'boolean', default: 1 }, + save_audio: { type: 'boolean', default: 0 }, + use_cpu: { type: 'boolean', default: 0 }, + enable_lowercase_filter: { type: 'boolean', default: 0 }, + enable_uppercase_filter: { type: 'boolean', default: 0 }, + enable_profanity_filter: { type: 'boolean', default: 0 }, + remove_trailing_period: { type: 'boolean', default: 0 } +}; + +// Helper to extract just the default values +function getDefaultConfig() { + const defaults = {}; + for (const [key, schema] of Object.entries(CONFIG_SCHEMA)) { + defaults[key] = schema.default; + } + return defaults; +} + +// Export for both CommonJS (main process) and ES modules (renderer) +if (typeof module !== 'undefined' && module.exports) { + module.exports = { CONFIG_SCHEMA, getDefaultConfig }; +} else { + window.CONFIG_SCHEMA = CONFIG_SCHEMA; + window.getDefaultConfig = getDefaultConfig; +} \ No newline at end of file diff --git a/ui/index.html b/ui/index.html index 90f78c1..97da3d2 100644 --- a/ui/index.html +++ b/ui/index.html @@ -10,9 +10,9 @@
-
+
- + diff --git a/ui/index.js b/ui/index.js index 2420ece..7717c92 100644 --- a/ui/index.js +++ b/ui/index.js @@ -4,6 +4,7 @@ const fs = require('node:fs').promises; const yaml = require('js-yaml'); const { spawn } = require('child_process'); const https = require('https'); +const { CONFIG_SCHEMA, getDefaultConfig } = require('./config-schema.js'); const APP_ROOT = path.join(__dirname, '..'); const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml'); @@ -82,6 +83,14 @@ function downloadFile(url, outputPath) { }); } +function shouldFilterMessage(message) { + // Filter out pydub ffmpeg/avconv warning. It does not actually matter. + if (message.includes("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work")) { + return true; + } + return false; +} + // Helper function to setup process event handlers function setupProcessHandlers(process) { process.stdout.on('data', (data) => { @@ -91,7 +100,9 @@ function setupProcessHandlers(process) { process.stderr.on('data', (data) => { const text = data.toString(); - sendPythonOutput(text.trimEnd(), 'stderr'); + if (!shouldFilterMessage(text)) { + sendPythonOutput(text.trimEnd(), 'stderr'); + } }); process.on('error', (error) => { @@ -137,7 +148,10 @@ function executePythonCommand(args, options = {}) { pythonProcess.stderr.on('data', (data) => { const text = data.toString(); stderr += text; - sendPythonOutput(text.trimEnd(), 'stderr'); + // Filter out specific warning messages + if (!shouldFilterMessage(text)) { + sendPythonOutput(text.trimEnd(), 'stderr'); + } }); pythonProcess.on('error', (error) => { @@ -171,27 +185,8 @@ function createWindow () { mainWindow.loadFile('index.html'); } -// Default configuration based on user's current config.yaml -const DEFAULT_CONFIG = { - compute_type: 'float16', - enable_debug_mode: 0, - enable_previews: 1, - user_prompt: 'Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc.', - save_audio: 0, - language: 'english', - gpu_idx: 0, - max_speech_duration_s: 10, - min_silence_duration_ms: 250, - microphone: 0, - model: 'turbo', - reset_after_silence_s: 15, - transcription_loop_delay_ms: 100, - use_cpu: 0, - block_width: 2, - num_blocks: 40, - rows: 10, - cols: 24 -}; +// Replace the DEFAULT_CONFIG constant with: +const DEFAULT_CONFIG = getDefaultConfig(); // IPC handlers ipcMain.handle('load-config', async () => { @@ -521,12 +516,12 @@ ipcMain.handle('start-process', async () => { }); ipcMain.handle('stop-process', async () => { - if (!runningProcess) { - throw new Error('No process is running'); - } - return new Promise((resolve) => { let forcefullyKilled = false; + + if (!runningProcess) { + resolve({ success: true, forcefullyKilled }); + } // Set up a timeout to force kill after 10 seconds const killTimeout = setTimeout(() => { diff --git a/ui/renderer.js b/ui/renderer.js index 201eef6..133a79b 100644 --- a/ui/renderer.js +++ b/ui/renderer.js @@ -1,29 +1,5 @@ -// Configuration and form field mappings -const CONFIG_FIELDS = { - // String fields - compute_type: { type: 'select', default: 'float16' }, - language: { type: 'select', default: 'english' }, - model: { type: 'select', default: 'turbo' }, - microphone: { type: 'number', default: 0 }, - user_prompt: { type: 'text', default: '' }, - - // Number fields - gpu_idx: { type: 'number', default: 0 }, - max_speech_duration_s: { type: 'number', default: 10 }, - min_silence_duration_ms: { type: 'number', default: 250 }, - reset_after_silence_s: { type: 'number', default: 15 }, - transcription_loop_delay_ms: { type: 'number', default: 100 }, - block_width: { type: 'number', default: 2 }, - num_blocks: { type: 'number', default: 40 }, - rows: { type: 'number', default: 10 }, - cols: { type: 'number', default: 24 }, - - // Boolean fields (stored as 1/0) - enable_debug_mode: { type: 'boolean', default: 0 }, - enable_previews: { type: 'boolean', default: 1 }, - save_audio: { type: 'boolean', default: 0 }, - use_cpu: { type: 'boolean', default: 0 } -}; +// Import configuration schema +const CONFIG_FIELDS = window.CONFIG_SCHEMA; // Button management system class ButtonManager { @@ -35,6 +11,9 @@ class ButtonManager { resetVenv: document.getElementById('reset-venv'), refreshMicrophones: document.getElementById('refresh-microphones') }; + + // Initialize button states on construction + this.setProcessStopped(); } setState(buttonName, disabled) { -- cgit v1.2.3 From 73de7cb2d8fb964e7f76ab55420e9bc331bf7bea Mon Sep 17 00:00:00 2001 From: yum Date: Fri, 30 May 2025 21:31:05 -0700 Subject: More stuff - add desktop and vr input threads - add audio feedback for input - add volume control for audio feedback - add UI for custom chatbox/built in chatbox - add ability to dismiss built in chatbox (sync empty messages) - limit lines in python console - limit length of each transcript --- Sounds/Dismiss_Noise.wav | Bin 0 -> 192078 bytes Sounds/Dismiss_Noise_Quiet.wav | Bin 0 -> 192078 bytes Sounds/KB_Noise_Off.wav | Bin 0 -> 192078 bytes Sounds/KB_Noise_Off_Quiet.wav | Bin 0 -> 192078 bytes Sounds/KB_Noise_On.wav | Bin 0 -> 266318 bytes Sounds/KB_Noise_On_Quiet.wav | Bin 0 -> 266318 bytes Sounds/Noise_Off.wav | Bin 0 -> 67278 bytes Sounds/Noise_Off_Quiet.wav | Bin 0 -> 67278 bytes Sounds/Noise_On.wav | Bin 0 -> 67278 bytes Sounds/Noise_On_Quiet.wav | Bin 0 -> 67278 bytes Sounds/speech_noise.wav | Bin 0 -> 61518 bytes app/hi.py | 308 ++++++++++++++++++++++++++++++++++------- app/keybind_event_machine.py | 21 +++ app/requirements.txt | 3 + app/shared_thread_data.py | 7 +- app/steamvr.py | 87 ++++++++++++ app/stt.py | 143 ++++++++++--------- config.yaml | 15 +- ui/config-schema.js | 11 +- ui/index.html | 50 +++++++ ui/index.js | 16 ++- ui/preload.js | 1 + ui/renderer.js | 58 ++++++++ 23 files changed, 595 insertions(+), 125 deletions(-) create mode 100644 Sounds/Dismiss_Noise.wav create mode 100644 Sounds/Dismiss_Noise_Quiet.wav create mode 100644 Sounds/KB_Noise_Off.wav create mode 100644 Sounds/KB_Noise_Off_Quiet.wav create mode 100644 Sounds/KB_Noise_On.wav create mode 100644 Sounds/KB_Noise_On_Quiet.wav create mode 100644 Sounds/Noise_Off.wav create mode 100644 Sounds/Noise_Off_Quiet.wav create mode 100644 Sounds/Noise_On.wav create mode 100644 Sounds/Noise_On_Quiet.wav create mode 100644 Sounds/speech_noise.wav create mode 100644 app/keybind_event_machine.py create mode 100644 app/steamvr.py (limited to 'ui') diff --git a/Sounds/Dismiss_Noise.wav b/Sounds/Dismiss_Noise.wav new file mode 100644 index 0000000..fe60f21 Binary files /dev/null and b/Sounds/Dismiss_Noise.wav differ diff --git a/Sounds/Dismiss_Noise_Quiet.wav b/Sounds/Dismiss_Noise_Quiet.wav new file mode 100644 index 0000000..5c3b1cb Binary files /dev/null and b/Sounds/Dismiss_Noise_Quiet.wav differ diff --git a/Sounds/KB_Noise_Off.wav b/Sounds/KB_Noise_Off.wav new file mode 100644 index 0000000..64d9c6f Binary files /dev/null and b/Sounds/KB_Noise_Off.wav differ diff --git a/Sounds/KB_Noise_Off_Quiet.wav b/Sounds/KB_Noise_Off_Quiet.wav new file mode 100644 index 0000000..b965e6a Binary files /dev/null and b/Sounds/KB_Noise_Off_Quiet.wav differ diff --git a/Sounds/KB_Noise_On.wav b/Sounds/KB_Noise_On.wav new file mode 100644 index 0000000..a959041 Binary files /dev/null and b/Sounds/KB_Noise_On.wav differ diff --git a/Sounds/KB_Noise_On_Quiet.wav b/Sounds/KB_Noise_On_Quiet.wav new file mode 100644 index 0000000..e49513e Binary files /dev/null and b/Sounds/KB_Noise_On_Quiet.wav differ diff --git a/Sounds/Noise_Off.wav b/Sounds/Noise_Off.wav new file mode 100644 index 0000000..0d3843c Binary files /dev/null and b/Sounds/Noise_Off.wav differ diff --git a/Sounds/Noise_Off_Quiet.wav b/Sounds/Noise_Off_Quiet.wav new file mode 100644 index 0000000..d5c6171 Binary files /dev/null and b/Sounds/Noise_Off_Quiet.wav differ diff --git a/Sounds/Noise_On.wav b/Sounds/Noise_On.wav new file mode 100644 index 0000000..28c8f6b Binary files /dev/null and b/Sounds/Noise_On.wav differ diff --git a/Sounds/Noise_On_Quiet.wav b/Sounds/Noise_On_Quiet.wav new file mode 100644 index 0000000..79170f5 Binary files /dev/null and b/Sounds/Noise_On_Quiet.wav differ diff --git a/Sounds/speech_noise.wav b/Sounds/speech_noise.wav new file mode 100644 index 0000000..a6224ee Binary files /dev/null and b/Sounds/speech_noise.wav differ diff --git a/app/hi.py b/app/hi.py index bab0fd4..1297b37 100644 --- a/app/hi.py +++ b/app/hi.py @@ -1,25 +1,34 @@ import app_config import argparse import io +import keybind_event_machine from math import floor, ceil import msvcrt import os from pythonosc import udp_client import sentencepiece as spm +import steamvr from shared_thread_data import SharedThreadData import stt import sys import threading import time +import pygame sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') +# Initialize pygame mixer +pygame.mixer.init() + TESTS_ENABLED = True # 0 = quiet, 1 = verbose, 2 = very verbose LOG_LEVEL = 0 +# Global volume control (0.0 to 1.0) +VOLUME = 0.3 + APP_ROOT = os.path.dirname(os.path.abspath(__file__)) PROJECT_ROOT = os.path.dirname(APP_ROOT) @@ -315,79 +324,276 @@ def handle_input(state: InputState, line: str, tokenizer, osc_client, cfg): send_data(osc_client, [indices[0]], [diff_blocks[0]], [diff_visual_pointers[0]]) def osc_thread(shared_data: SharedThreadData): - tokenizer = get_tokenizer() osc_client = getOscClient() - # Prime the board - print("Priming the board") - input_state = InputState() - handle_input(input_state, "", tokenizer, osc_client, shared_data.cfg) + def join_segments(a, b): + if len(a) > 0 and a[-1] != ' ': + return a + ' ' + b + else: + return a + b + + if shared_data.cfg["use_builtin"]: + last_change = time.time() + remote_word = "" + while not shared_data.exit_event.is_set(): + time.sleep(0.1) + local_word = "" + with shared_data.word_lock: + local_word = join_segments(shared_data.transcript, + shared_data.preview) + local_word = local_word[-140:] + if local_word == remote_word: + continue + if time.time() - last_change < 1.5: + continue + addr = "/chatbox/input" + print(f"Send {local_word}", flush=True) + osc_client.send_message(addr, (local_word, True, False)) + last_change = time.time() + remote_word = local_word + else: + # Custom chatbox + tokenizer = get_tokenizer() + + # Prime the board + print("Priming the board") + input_state = InputState() + handle_input(input_state, "", tokenizer, osc_client, shared_data.cfg) + + while not shared_data.exit_event.is_set(): + word_copy = "" + with shared_data.word_lock: + word_copy = shared_data.word + handle_input(input_state, word_copy, tokenizer, osc_client, shared_data.cfg) + time.sleep(0.01) + + +def vrInputThread(shared_data: SharedThreadData): + RECORD_STATE = 0 + PAUSE_STATE = 1 + state = PAUSE_STATE + + hand_id = shared_data.cfg["button_hand"] + button_id = shared_data.cfg["button_type"] + # Rough description of state machine: + # Single short press: toggle transcription + # Medium press: dismiss custom chatbox + # Long press: update chatbox in place + # Medium press + long press: type transcription + + last_rising = time.time() + last_medium_press_end = 0 + + waveform0 = os.path.join(PROJECT_ROOT, "Sounds/Noise_On_Quiet.wav") + waveform1 = os.path.join(PROJECT_ROOT, "Sounds/Noise_Off_Quiet.wav") + waveform2 = os.path.join(PROJECT_ROOT, "Sounds/Dismiss_Noise_Quiet.wav") + waveform3 = os.path.join(PROJECT_ROOT, "Sounds/KB_Noise_Off_Quiet.wav") + + button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id, + shared_data=shared_data) while not shared_data.exit_event.is_set(): - word_copy = "" + time.sleep(0.01) + try: + event = next(button_generator) + except StopIteration: + break + with shared_data.word_lock: - word_copy = shared_data.word - handle_input(input_state, word_copy, tokenizer, osc_client, shared_data.cfg) + if not shared_data.stream or not shared_data.collector: + continue + + if event.opcode == steamvr.EVENT_RISING_EDGE: + last_rising = time.time() + + if state == PAUSE_STATE: + shared_data.stream.pause(False) + shared_data.stream.getSamples() + + elif event.opcode == steamvr.EVENT_FALLING_EDGE: + now = time.time() + if now - last_rising > 1.5: + # Long press: treat as the end of transcription. + state = PAUSE_STATE + + shared_data.stream.pause(True) + + if last_rising - last_medium_press_end < 1.0: + # Type transcription + if shared_data.cfg["enable_local_beep"]: + play_sound_with_volume(waveform3) + else: + if shared_data.cfg["enable_local_beep"]: + play_sound_with_volume(waveform1) + + elif now - last_rising > 0.5: + # Medium press + print("CLEARING", file=sys.stderr) + last_medium_press_end = now + state = PAUSE_STATE + + if shared_data.cfg["enable_local_beep"]: + play_sound_with_volume(waveform2) + + # Flush the *entire* pipeline. + shared_data.stream.pause(True) + shared_data.stream.getSamples() + shared_data.collector.dropAudio() + shared_data.transcript = "" + shared_data.preview = "" + continue + + # Short hold + if state == RECORD_STATE: + print("PAUSED", file=sys.stderr) + state = PAUSE_STATE + + shared_data.stream.pause(True) + + if shared_data.cfg["enable_local_beep"]: + play_sound_with_volume(waveform1) + elif state == PAUSE_STATE: + print("RECORDING", file=sys.stderr) + state = RECORD_STATE + if shared_data.cfg["reset_on_toggle"]: + if shared_data.cfg["enable_debug_mode"]: + print("Toggle detected, dropping transcript (3)", + file=sys.stderr) + shared_data.transcript = "" + shared_data.preview = "" + #audio_state.drop_transcription = True + else: + if shared_data.cfg["enable_debug_mode"]: + print("Toggle detected, committing preview text (3)", + file=sys.stderr) + #audio_state.text += audio_state.preview_text + + shared_data.stream.pause(False) + + if shared_data.cfg["enable_local_beep"]: + play_sound_with_volume(waveform0) + + +def kbInputThread(shared_data: SharedThreadData): + machine = keybind_event_machine.KeybindEventMachine(shared_data.cfg["keybind"]) + last_press_time = 0 + + # double pressing the keybind + double_press_timeout = 0.5 + + RECORD_STATE = 0 + PAUSE_STATE = 1 + state = PAUSE_STATE + + waveform0 = os.path.join(PROJECT_ROOT, "Sounds/Noise_On_Quiet.wav") + waveform1 = os.path.join(PROJECT_ROOT, "Sounds/Noise_Off_Quiet.wav") + waveform2 = os.path.join(PROJECT_ROOT, "Sounds/Dismiss_Noise_Quiet.wav") + waveform3 = os.path.join(PROJECT_ROOT, "Sounds/KB_Noise_Off_Quiet.wav") + + while not shared_data.exit_event.is_set(): time.sleep(0.01) + cur_press_time = machine.getNextPressTime() + if cur_press_time == 0: + continue + + with shared_data.word_lock: + if not shared_data.stream or not shared_data.collector: + continue + + EVENT_SINGLE_PRESS = 0 + EVENT_DOUBLE_PRESS = 1 + if last_press_time == 0: + event = EVENT_SINGLE_PRESS + elif cur_press_time - last_press_time < double_press_timeout: + event = EVENT_DOUBLE_PRESS + else: + event = EVENT_SINGLE_PRESS + last_press_time = cur_press_time + + if event == EVENT_DOUBLE_PRESS: + print("CLEARING", file=sys.stderr) + state = PAUSE_STATE + + if shared_data.cfg["enable_local_beep"]: + play_sound_with_volume(waveform2) + + # Flush the *entire* pipeline. + shared_data.stream.pause(True) + shared_data.stream.getSamples() + shared_data.collector.dropAudio() + shared_data.transcript = "" + shared_data.preview = "" + continue + + # Short hold + if state == RECORD_STATE: + print("PAUSED", file=sys.stderr) + state = PAUSE_STATE + + shared_data.stream.pause(True) + + if shared_data.cfg["enable_local_beep"]: + play_sound_with_volume(waveform1) + elif state == PAUSE_STATE: + print("RECORDING", file=sys.stderr) + state = RECORD_STATE + if shared_data.cfg["reset_on_toggle"]: + if shared_data.cfg["enable_debug_mode"]: + print("Toggle detected, dropping transcript (2)", + file=sys.stderr) + shared_data.transcript = "" + shared_data.preview = "" + else: + if shared_data.cfg["enable_debug_mode"]: + print("Toggle detected, committing preview text (2)", + file=sys.stderr) + #audio_state.text += audio_state.preview_text + + shared_data.stream.pause(False) + + if shared_data.cfg["enable_local_beep"]: + play_sound_with_volume(waveform0) + +def play_sound_with_volume(filepath): + """Play a WAV file with adjusted volume""" + volume = VOLUME + + try: + sound = pygame.mixer.Sound(filepath) + sound.set_volume(volume) + sound.play() + except Exception as e: + print(f"Error playing sound {filepath}: {e}", file=sys.stderr) + if __name__ == "__main__": cli_args = parse_args() cfg = app_config.getConfig(cli_args.config) shared_data = SharedThreadData(cfg) - if False: - osc_thread = threading.Thread( - target=osc_thread, - args=(shared_data,)) - osc_thread.start() + osc_thread = threading.Thread( + target=osc_thread, + args=(shared_data,)) + osc_thread.start() transcribe_thread = threading.Thread( target=stt.transcriptionThread, args=(shared_data,)) transcribe_thread.start() + vr_input_thd = threading.Thread(target=vrInputThread, args=(shared_data,)) + vr_input_thd.start() + + kb_input_thd = threading.Thread(target=kbInputThread, args=(shared_data,)) + kb_input_thd.start() + word_is_over = False local_word = "" while True: - char_bytes = msvcrt.getch() - if char_bytes == b'\x03': # ctrl+C - break - time.sleep(0.1) continue - - try: - char = char_bytes.decode('utf-8') - if char == '\r' or char == '\n': - word_is_over = True - continue - except UnicodeDecodeError: - print(f"Unsupported character: {char_bytes}") - if char_bytes == b'\x00' or char_bytes == b'\xe0': - special_char = msvcrt.getch() - continue - - if char_bytes == b'\x03': # ctrl+C - break - elif char_bytes == b'\x08': # backspace - with shared_data.word_lock: - shared_data.word = shared_data.word[:-1] - local_word = shared_data.word - elif char_bytes == b'\x0c': # ctrl+L - with shared_data.word_lock: - shared_data.word = "" - local_word = shared_data.word - elif word_is_over: - with shared_data.word_lock: - shared_data.word = char - local_word = shared_data.word - word_is_over = False - else: - with shared_data.word_lock: - shared_data.word += char - local_word = shared_data.word - print(local_word + "_") shared_data.exit_event.set() - if False: - osc_thread.join() + osc_thread.join() transcribe_thread.join() + vr_input_thd.join() + kb_input_thd.join() diff --git a/app/keybind_event_machine.py b/app/keybind_event_machine.py new file mode 100644 index 0000000..3ce6794 --- /dev/null +++ b/app/keybind_event_machine.py @@ -0,0 +1,21 @@ +import keyboard +import time + +class KeybindEventMachine: + def __init__(self, keybind: str): + self.keybind = keybind + self.events = [] + keyboard.add_hotkey(keybind, self.onPress) + + def onPress(self) -> None: + self.events.append(time.time()) + + # Returns the timestamp when the keybind was pressed, or 0 if no keypresses + # are queued. + def getNextPressTime(self) -> int: + if len(self.events) == 0: + return 0 + ret = self.events[0] + self.events = self.events[1:] + return ret + diff --git a/app/requirements.txt b/app/requirements.txt index f8b7069..e68a16c 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -1,8 +1,11 @@ faster-whisper hf-xet +keyboard langcodes pyaudio +pygame pydub python-osc sentencepiece silero-vad +openvr diff --git a/app/shared_thread_data.py b/app/shared_thread_data.py index ba0a419..40885e8 100644 --- a/app/shared_thread_data.py +++ b/app/shared_thread_data.py @@ -2,7 +2,12 @@ import threading class SharedThreadData: def __init__(self, cfg): - self.word = "" + self.transcript = "" + self.preview = "" + + self.stream = None + self.collector = None + self.word_lock = threading.Lock() self.exit_event = threading.Event() self.cfg = cfg diff --git a/app/steamvr.py b/app/steamvr.py new file mode 100644 index 0000000..64f34f5 --- /dev/null +++ b/app/steamvr.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +import openvr as vr +import sys +import time + +EVENT_NONE = 0 +EVENT_RISING_EDGE = 1 +EVENT_FALLING_EDGE = 2 + +class InputEvent: + def __init__(self, + opcode: int): + self.opcode = opcode + +# Checks if the given button on the given controller is pressed. +def pollButtonPress( + hand: str = "right", + button: str = "b", + shared_data = None # SharedThreadData object + ) -> int: + hands = {} + hands["left"] = vr.TrackedControllerRole_LeftHand + hands["right"] = vr.TrackedControllerRole_RightHand + + buttons = {} + buttons["a"] = vr.k_EButton_IndexController_A + buttons["b"] = vr.k_EButton_IndexController_B + buttons["thumbstick"] = vr.k_EButton_Axis0 + + system = None + first = True + while not shared_data.exit_event.is_set() and not system: + try: + system = vr.init(vr.VRApplication_Background) + except Exception as e: + if first: + print(f"Failed to start steamVR input thread: {repr(e)}", file=sys.stderr) + first = False + time.sleep(1) + last_packet = 0 + event_high = False + + while not shared_data.exit_event.is_set(): + time.sleep(0.01) + + lh_idx = system.getTrackedDeviceIndexForControllerRole(hands[hand]) + #print("left hand device idx: {}".format(lh_idx)) + + got_state, state = system.getControllerState(lh_idx) + if not got_state: + continue + + if state.unPacketNum == last_packet: + continue + + # Clicking joysticks and moving joysticks fire the same events. To + # differentiate movement from clicking, we create a dead zone: if the event + # fires while the stick isn't moved far from center, we assume it's a + # click, not movement. + dead_zone_radius = 0.7 + + button_mask = (1 << buttons[button]) + ret = EVENT_NONE + if (state.ulButtonPressed & button_mask) != 0 and\ + (state.rAxis[0].x**2 + state.rAxis[0].y**2 < dead_zone_radius**2): + #print("button pressed: %016x" % state.ulButtonPressed) + #for i in range(0, 5): + # print("axis {} x: {} y: {}".format(i, state.rAxis[i].x, state.rAxis[i].y)) + if not event_high: + yield InputEvent(EVENT_RISING_EDGE) + event_high = True + elif event_high: + event_high = False + yield InputEvent(EVENT_FALLING_EDGE) + +if __name__ == "__main__": + gen = pollButtonPress() + while True: + time.sleep(0.1) + + event = pollButtonPress(session_state) + if event == EVENT_RISING_EDGE: + print("rising edge") + elif event == EVENT_FALLING_EDGE: + print("falling edge") + diff --git a/app/stt.py b/app/stt.py index a3988e1..c1f4836 100644 --- a/app/stt.py +++ b/app/stt.py @@ -299,9 +299,11 @@ class CompressingAudioCollector(AudioCollectorFilter): class AudioSegmenter: def __init__(self, min_silence_ms=250, - max_speech_s=5): + max_speech_s=5, + min_speech_duration_ms=100): self.min_silence_ms = min_silence_ms self.max_speech_s = max_speech_s + self.min_speech_duration_ms = min_speech_duration_ms # Load Silero VAD model self.model = load_silero_vad() @@ -309,6 +311,7 @@ class AudioSegmenter: self.vad_threshold = 0.3 self.min_silence_duration_ms = min_silence_ms self.max_speech_duration_s = max_speech_s + self.min_speech_duration_ms = min_speech_duration_ms def segmentAudio(self, audio: bytes): # Convert audio bytes to numpy array expected by silero-vad @@ -324,6 +327,7 @@ class AudioSegmenter: threshold=self.vad_threshold, min_silence_duration_ms=self.min_silence_duration_ms, max_speech_duration_s=self.max_speech_duration_s, + min_speech_duration_ms=self.min_speech_duration_ms, return_seconds=False # We want frame indices, not seconds ) @@ -698,7 +702,8 @@ def transcriptionThread(shared_data: SharedThreadData): collector = NormalizingAudioCollector(collector) whisper = Whisper(collector, shared_data.cfg) segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"], - max_speech_s=shared_data.cfg["max_speech_duration_s"]) + max_speech_s=shared_data.cfg["max_speech_duration_s"], + min_speech_duration_ms=shared_data.cfg["min_speech_duration_ms"]) committer = VadCommitter(shared_data.cfg, collector, whisper, segmenter) plugins = [] @@ -715,6 +720,10 @@ def transcriptionThread(shared_data: SharedThreadData): transcript = "" preview = "" + with shared_data.word_lock: + shared_data.stream = stream + shared_data.collector = collector + print(f"Ready to go!", flush=True) while not shared_data.exit_event.is_set(): @@ -724,70 +733,72 @@ def transcriptionThread(shared_data: SharedThreadData): commit = committer.getDelta() - for plugin in plugins: - commit = plugin.transform(commit) - - if len(commit.delta) > 0 or len(commit.preview) > 0: - # Avoid re-sending text after long pauses - if shared_data.cfg["reset_after_silence_s"] > 0: - silence_duration = 0 - if last_stable_commit: - last_commit_end_ts = \ - last_stable_commit.start_ts + \ - last_stable_commit.duration_s - silence_duration = commit.start_ts - last_commit_end_ts - if silence_duration > shared_data.cfg["reset_after_silence_s"]: - if shared_data.cfg["enable_debug_mode"]: - print(f"Resetting transcript after {silence_duration}-second " - "silence", file=sys.stderr) - transcript = "" - preview = "" - whisper.recent_context = "" # Reset context too - if commit.delta: - last_stable_commit = commit - - # Hard-cap displayed transcript length at 4k characters to prevent - # runaway memory use in UI. Keep the full transcript to avoid - # breaking OSC pager. - transcript = transcript[-4096:] - def join_segments(a, b): - if len(a) > 0 and a[-1] != ' ': - return a + ' ' + b - else: - return a + b - transcript = join_segments(transcript, commit.delta) - preview = commit.preview - - for filt in filters: - transcript, preview = filt.transform(transcript, preview) - - try: - print(f"Transcript: {transcript}", flush=True) - except UnicodeEncodeError: - print("Failed to encode transcript - discarding delta", - file=sys.stderr) - continue - try: - print(f"Preview: {preview}", flush=True) - except UnicodeEncodeError: - print("Failed to encode preview - discarding", file=sys.stderr) - - with shared_data.word_lock: - shared_data.word = join_segments(transcript, preview) - - if shared_data.cfg["enable_debug_mode"]: - print(f"commit latency: {commit.latency_s}", file=sys.stderr) - print(f"commit thresh: {commit.thresh_at_commit}", - file=sys.stderr) - - if len(transcript) > 0 and \ - (not transcript.endswith(' ')) and \ - (not commit.delta.startswith(' ')): - commit.delta = ' ' + commit.delta - if len(commit.delta) > 0 and \ - (not commit.delta.endswith(' ')) and \ - (not commit.preview.startswith(' ')): - commit.preview = ' ' + commit.preview + with shared_data.word_lock: + for plugin in plugins: + commit = plugin.transform(commit) + + if len(commit.delta) > 0 or len(commit.preview) > 0: + # Avoid re-sending text after long pauses + if shared_data.cfg["reset_after_silence_s"] > 0: + silence_duration = 0 + if last_stable_commit: + last_commit_end_ts = \ + last_stable_commit.start_ts + \ + last_stable_commit.duration_s + silence_duration = commit.start_ts - last_commit_end_ts + if silence_duration > shared_data.cfg["reset_after_silence_s"]: + if shared_data.cfg["enable_debug_mode"]: + print(f"Resetting transcript after {silence_duration}-second " + "silence", file=sys.stderr) + shared_data.transcript = "" + shared_data.preview = "" + whisper.recent_context = "" # Reset context too + if commit.delta: + last_stable_commit = commit + + # Hard-cap displayed transcript length to prevent + # runaway memory use in UI. Keep the full transcript to avoid + # breaking OSC pager. + if len(shared_data.transcript) >= 1024: + shared_data.transcript = shared_data.transcript[-512:] + def join_segments(a, b): + if len(a) > 0 and a[-1] != ' ': + return a + ' ' + b + else: + return a + b + shared_data.transcript = \ + join_segments(shared_data.transcript, commit.delta) + shared_data.preview = commit.preview + + for filt in filters: + shared_data.transcript, shared_data.preview = \ + filt.transform(shared_data.transcript, + shared_data.preview) + + try: + print(f"Transcript: {shared_data.transcript}", flush=True) + except UnicodeEncodeError: + print("Failed to encode transcript - discarding delta", + file=sys.stderr) + continue + try: + print(f"Preview: {shared_data.preview}", flush=True) + except UnicodeEncodeError: + print("Failed to encode preview - discarding", file=sys.stderr) + + if shared_data.cfg["enable_debug_mode"]: + print(f"commit latency: {commit.latency_s}", file=sys.stderr) + print(f"commit thresh: {commit.thresh_at_commit}", + file=sys.stderr) + + if len(shared_data.transcript) > 0 and \ + (not shared_data.transcript.endswith(' ')) and \ + (not commit.delta.startswith(' ')): + commit.delta = ' ' + commit.delta + if len(commit.delta) > 0 and \ + (not commit.delta.endswith(' ')) and \ + (not commit.preview.startswith(' ')): + commit.preview = ' ' + commit.preview for plugin in plugins: plugin.stop() for filt in filters: diff --git a/config.yaml b/config.yaml index fea03bb..6f4b65b 100644 --- a/config.yaml +++ b/config.yaml @@ -1,11 +1,15 @@ compute_type: float16 language: english model: turbo -microphone: 2 -user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm. +microphone: 1 +user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm. Phi, NOPPERS, clearrainbow, Noia, Kuuderekitten. +keybind: ctrl+alt+x +button_hand: right +button_type: b gpu_idx: 0 max_speech_duration_s: 10 -min_silence_duration_ms: 250 +min_speech_duration_ms: 250 +min_silence_duration_ms: 100 reset_after_silence_s: 15 transcription_loop_delay_ms: 100 block_width: 2 @@ -16,9 +20,12 @@ beam_size: 5 best_of: 5 enable_debug_mode: 0 enable_previews: 1 -save_audio: 0 +save_audio: 1 use_cpu: 0 enable_lowercase_filter: 0 enable_uppercase_filter: 0 enable_profanity_filter: 0 remove_trailing_period: 0 +reset_on_toggle: 0 +enable_local_beep: 1 +use_builtin: 1 diff --git a/ui/config-schema.js b/ui/config-schema.js index b1108ff..6b11277 100644 --- a/ui/config-schema.js +++ b/ui/config-schema.js @@ -6,11 +6,15 @@ const CONFIG_SCHEMA = { model: { type: 'select', default: 'turbo' }, microphone: { type: 'number', default: 0 }, user_prompt: { type: 'text', default: 'Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm.' }, + keybind: { type: 'text', default: 'ctrl+alt+x' }, + button_hand: { type: 'select', default: 'right' }, + button_type: { type: 'select', default: 'b' }, // Number fields gpu_idx: { type: 'number', default: 0 }, max_speech_duration_s: { type: 'number', default: 10 }, - min_silence_duration_ms: { type: 'number', default: 250 }, + min_speech_duration_ms: { type: 'number', default: 250 }, + min_silence_duration_ms: { type: 'number', default: 100 }, reset_after_silence_s: { type: 'number', default: 15 }, transcription_loop_delay_ms: { type: 'number', default: 100 }, block_width: { type: 'number', default: 2 }, @@ -28,7 +32,10 @@ const CONFIG_SCHEMA = { enable_lowercase_filter: { type: 'boolean', default: 0 }, enable_uppercase_filter: { type: 'boolean', default: 0 }, enable_profanity_filter: { type: 'boolean', default: 0 }, - remove_trailing_period: { type: 'boolean', default: 0 } + remove_trailing_period: { type: 'boolean', default: 0 }, + reset_on_toggle: { type: 'boolean', default: 0 }, + enable_local_beep: { type: 'boolean', default: 1 }, + use_builtin: { type: 'boolean', default: 1 } }; // Helper to extract just the default values diff --git a/ui/index.html b/ui/index.html index 97da3d2..99e64dd 100644 --- a/ui/index.html +++ b/ui/index.html @@ -64,6 +64,31 @@
+
+ + +
+
+ + +
+
+ + +
@@ -110,6 +135,10 @@
+
+ + +
@@ -211,9 +240,30 @@
+ +
+

Input Settings

+
+ + +
+
+

Custom Chatbox Settings

+
+ +
diff --git a/ui/index.js b/ui/index.js index 7717c92..24a7e13 100644 --- a/ui/index.js +++ b/ui/index.js @@ -246,6 +246,21 @@ ipcMain.handle('reset-config', async () => { } }); +ipcMain.handle('deleteVenvIndicatorFile', async () => { + const venvMarkerPath = path.join(APP_ROOT, '.venv_is_set_up'); + try { + await fs.unlink(venvMarkerPath); + return { success: true, message: '.venv_is_set_up deleted successfully.' }; + } catch (error) { + if (error.code === 'ENOENT') { + return { success: true, message: '.venv_is_set_up not found.' }; + } + console.error('Error deleting .venv_is_set_up file:', error); + sendPythonOutput(`Error deleting .venv_is_set_up: ${error.message}`, 'stderr'); + throw error; + } +}); + // Generic function to ensure required files are present async function ensureRequiredFiles(config) { const { @@ -332,7 +347,6 @@ ipcMain.handle('install-requirements', async () => { // Check if venv is already set up try { await fs.access(venvMarkerPath); - sendPythonOutput('Virtual environment already set up, skipping installation', 'info'); return { success: true, message: 'Virtual environment already set up' }; } catch (error) { // Marker doesn't exist, proceed with setup diff --git a/ui/preload.js b/ui/preload.js index 35cc8d6..f2e0a81 100644 --- a/ui/preload.js +++ b/ui/preload.js @@ -6,6 +6,7 @@ contextBridge.exposeInMainWorld('electronAPI', { resetConfig: () => ipcRenderer.invoke('reset-config'), getMicrophones: () => ipcRenderer.invoke('get-microphones'), installRequirements: () => ipcRenderer.invoke('install-requirements'), + deleteVenvIndicatorFile: () => ipcRenderer.invoke('deleteVenvIndicatorFile'), resetVenv: () => ipcRenderer.invoke('reset-venv'), startProcess: () => ipcRenderer.invoke('start-process'), stopProcess: () => ipcRenderer.invoke('stop-process'), diff --git a/ui/renderer.js b/ui/renderer.js index 133a79b..2f4c8f1 100644 --- a/ui/renderer.js +++ b/ui/renderer.js @@ -162,11 +162,28 @@ function setFormValues(config) { } } + // Handle use_builtin toggle state + const useBuiltin = config.use_builtin === 1; + const customChatboxInputs = ['block_width', 'num_blocks', 'rows', 'cols']; + customChatboxInputs.forEach(inputId => { + const input = document.getElementById(inputId); + if (input) { + input.disabled = useBuiltin; + if (useBuiltin) { + input.classList.add('opacity-50', 'cursor-not-allowed'); + } else { + input.classList.remove('opacity-50', 'cursor-not-allowed'); + } + } + }); + isSettingValues = false; // Re-enable auto-save } // Console management const consoleContent = document.getElementById('console-content'); +const MAX_CONSOLE_LINES = 512; +let consoleLineCount = 0; function appendToConsole(message, type = 'stdout') { const timestamp = new Date().toLocaleTimeString(); @@ -183,6 +200,28 @@ function appendToConsole(message, type = 'stdout') { lineDiv.appendChild(messageSpan); consoleContent.appendChild(lineDiv); + consoleLineCount++; + + // Remove old lines if we exceed the limit + if (consoleLineCount > MAX_CONSOLE_LINES) { + // Calculate how many lines to remove (remove 10% to avoid frequent trimming) + const linesToRemove = Math.floor(MAX_CONSOLE_LINES * 0.1); + + // Remove the oldest lines + for (let i = 0; i < linesToRemove; i++) { + if (consoleContent.firstChild) { + consoleContent.removeChild(consoleContent.firstChild); + } + } + + consoleLineCount -= linesToRemove; + + // Add a notice that lines were trimmed + const trimNotice = document.createElement('div'); + trimNotice.className = 'console-info'; + trimNotice.innerHTML = '[System] ... older lines removed to maintain performance ...'; + consoleContent.insertBefore(trimNotice, consoleContent.firstChild); + } // Auto-scroll to bottom const pythonConsole = document.getElementById('python-console'); @@ -316,11 +355,30 @@ function setupEventHandlers() { } }); + // Use builtin chatbox toggle + document.getElementById('use_builtin').addEventListener('change', (e) => { + const customChatboxInputs = ['block_width', 'num_blocks', 'rows', 'cols']; + const isBuiltin = e.target.checked; + + customChatboxInputs.forEach(inputId => { + const input = document.getElementById(inputId); + if (input) { + input.disabled = isBuiltin; + if (isBuiltin) { + input.classList.add('opacity-50', 'cursor-not-allowed'); + } else { + input.classList.remove('opacity-50', 'cursor-not-allowed'); + } + } + }); + }); + // Setup virtual environment document.getElementById('setup-venv').addEventListener('click', async () => { loadingOverlay.show('Setting up virtual environment - please wait...'); // Show overlay with custom message try { await buttonManager.withButtonLoading('setupVenv', async () => { + await window.electronAPI.deleteVenvIndicatorFile(); await handleAsyncAction('Install requirements', () => window.electronAPI.installRequirements()); }); } finally { -- cgit v1.2.3 From 790c91d7ad515c3c0a22ca1341316265b8f0d779 Mon Sep 17 00:00:00 2001 From: yum Date: Wed, 23 Jul 2025 17:41:49 -0700 Subject: bugfixes * fix model acquisition * fix local beepsnd * fix volume control --- app/hi.py | 45 ++++-------- app/requirements.txt | 1 + app/stt.py | 62 ++++++++++++---- config.yaml | 8 +-- ui/config-schema.js | 2 +- ui/index.html | 13 ++-- ui/index.js | 17 +++-- ui/preload.js | 1 + ui/renderer.js | 198 ++++++++++++++++++++++++++++----------------------- 9 files changed, 196 insertions(+), 151 deletions(-) (limited to 'ui') diff --git a/app/hi.py b/app/hi.py index 1297b37..bb09418 100644 --- a/app/hi.py +++ b/app/hi.py @@ -26,9 +26,6 @@ TESTS_ENABLED = True # 0 = quiet, 1 = verbose, 2 = very verbose LOG_LEVEL = 0 -# Global volume control (0.0 to 1.0) -VOLUME = 0.3 - APP_ROOT = os.path.dirname(os.path.abspath(__file__)) PROJECT_ROOT = os.path.dirname(APP_ROOT) @@ -347,7 +344,8 @@ def osc_thread(shared_data: SharedThreadData): if time.time() - last_change < 1.5: continue addr = "/chatbox/input" - print(f"Send {local_word}", flush=True) + if shared_data.cfg["enable_debug_mode"]: + print(f"Send {local_word}", flush=True) osc_client.send_message(addr, (local_word, True, False)) last_change = time.time() remote_word = local_word @@ -420,20 +418,16 @@ def vrInputThread(shared_data: SharedThreadData): if last_rising - last_medium_press_end < 1.0: # Type transcription - if shared_data.cfg["enable_local_beep"]: - play_sound_with_volume(waveform3) + play_sound_with_volume(waveform3, shared_data.cfg) else: - if shared_data.cfg["enable_local_beep"]: - play_sound_with_volume(waveform1) + play_sound_with_volume(waveform1, shared_data.cfg) elif now - last_rising > 0.5: # Medium press print("CLEARING", file=sys.stderr) last_medium_press_end = now state = PAUSE_STATE - - if shared_data.cfg["enable_local_beep"]: - play_sound_with_volume(waveform2) + play_sound_with_volume(waveform2, shared_data.cfg) # Flush the *entire* pipeline. shared_data.stream.pause(True) @@ -449,9 +443,7 @@ def vrInputThread(shared_data: SharedThreadData): state = PAUSE_STATE shared_data.stream.pause(True) - - if shared_data.cfg["enable_local_beep"]: - play_sound_with_volume(waveform1) + play_sound_with_volume(waveform1, shared_data.cfg) elif state == PAUSE_STATE: print("RECORDING", file=sys.stderr) state = RECORD_STATE @@ -469,9 +461,7 @@ def vrInputThread(shared_data: SharedThreadData): #audio_state.text += audio_state.preview_text shared_data.stream.pause(False) - - if shared_data.cfg["enable_local_beep"]: - play_sound_with_volume(waveform0) + play_sound_with_volume(waveform0, shared_data.cfg) def kbInputThread(shared_data: SharedThreadData): @@ -514,9 +504,7 @@ def kbInputThread(shared_data: SharedThreadData): if event == EVENT_DOUBLE_PRESS: print("CLEARING", file=sys.stderr) state = PAUSE_STATE - - if shared_data.cfg["enable_local_beep"]: - play_sound_with_volume(waveform2) + play_sound_with_volume(waveform2, shared_data.cfg) # Flush the *entire* pipeline. shared_data.stream.pause(True) @@ -530,11 +518,8 @@ def kbInputThread(shared_data: SharedThreadData): if state == RECORD_STATE: print("PAUSED", file=sys.stderr) state = PAUSE_STATE - shared_data.stream.pause(True) - - if shared_data.cfg["enable_local_beep"]: - play_sound_with_volume(waveform1) + play_sound_with_volume(waveform1, shared_data.cfg) elif state == PAUSE_STATE: print("RECORDING", file=sys.stderr) state = RECORD_STATE @@ -548,20 +533,16 @@ def kbInputThread(shared_data: SharedThreadData): if shared_data.cfg["enable_debug_mode"]: print("Toggle detected, committing preview text (2)", file=sys.stderr) - #audio_state.text += audio_state.preview_text - shared_data.stream.pause(False) + play_sound_with_volume(waveform0, shared_data.cfg) - if shared_data.cfg["enable_local_beep"]: - play_sound_with_volume(waveform0) - -def play_sound_with_volume(filepath): +def play_sound_with_volume(filepath, cfg): """Play a WAV file with adjusted volume""" - volume = VOLUME + volume = cfg.get("volume", 30) try: sound = pygame.mixer.Sound(filepath) - sound.set_volume(volume) + sound.set_volume(volume * 0.01) sound.play() except Exception as e: print(f"Error playing sound {filepath}: {e}", file=sys.stderr) diff --git a/app/requirements.txt b/app/requirements.txt index e68a16c..c8d69df 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -2,6 +2,7 @@ faster-whisper hf-xet keyboard langcodes +noisereduce pyaudio pygame pydub diff --git a/app/stt.py b/app/stt.py index c1f4836..79ab0d1 100644 --- a/app/stt.py +++ b/app/stt.py @@ -3,6 +3,7 @@ from faster_whisper import WhisperModel import langcodes import numpy as np import os +import noisereduce as nr try: from profanity_filter import ProfanityFilter PROFANITY_FILTER_AVAILABLE = True @@ -260,9 +261,13 @@ class NormalizingAudioCollector(AudioCollectorFilter): return frames class BoostingAudioCollector(AudioCollectorFilter): - def __init__(self, parent: AudioCollector, target_dBFS: float, cfg: typing.Dict): + def __init__(self, parent: AudioCollector, + target_dBFS: float, + max_gain_dB: float, + cfg: typing.Dict): AudioCollectorFilter.__init__(self, parent) self.target_dBFS = target_dBFS + self.max_gain_dB = max_gain_dB self.cfg = cfg def getAudio(self) -> bytes: @@ -270,9 +275,10 @@ class BoostingAudioCollector(AudioCollectorFilter): audio = AudioSegment(audio, sample_width=AudioStream.FRAME_SZ, frame_rate=AudioStream.FPS, channels=AudioStream.CHANNELS) + gain = min(self.target_dBFS - audio.dBFS, self.max_gain_dB) if self.cfg["enable_debug_mode"]: - print(f"Boosting audio from {audio.dBFS}dB to {self.target_dBFS}dB", file=sys.stderr) - audio = audio.apply_gain(self.target_dBFS - audio.dBFS) + print(f"Boosting audio by {gain} dB (from {audio.dBFS} to {audio.dBFS + gain})", flush=True) + audio = audio.apply_gain(gain) frames = np.array(audio.get_array_of_samples()) frames = np.int16(frames).tobytes() @@ -296,6 +302,26 @@ class CompressingAudioCollector(AudioCollectorFilter): return frames +class NoiseReducingAudioCollector(AudioCollectorFilter): + def __init__(self, parent: AudioCollector, cfg: typing.Dict): + AudioCollectorFilter.__init__(self, parent) + self.cfg = cfg + + def getAudio(self) -> bytes: + audio = self.parent.getAudio() + audio_array = np.frombuffer(audio, dtype=np.int16).astype(np.float32) + + reduced_audio = nr.reduce_noise( + y=audio_array, + sr=AudioStream.FPS, + ) + + # Convert back to int16 + reduced_audio = np.clip(reduced_audio, -32768, 32767) + frames = np.int16(reduced_audio).tobytes() + + return frames + class AudioSegmenter: def __init__(self, min_silence_ms=250, @@ -398,6 +424,12 @@ class Segment: avg_logprob = f"(avg_logprob: {self.avg_logprob}) " return f"{self.transcript} " + ts + wall_ts + no_speech + avg_logprob +def join_segments(a, b): + if len(a) > 0 and a[-1] != ' ': + return a + ' ' + b + else: + return a + b + class Whisper: def __init__(self, collector: AudioCollector, @@ -421,6 +453,9 @@ class Whisper: already_downloaded = os.path.exists(model_root) + if not already_downloaded: + print(f"Model {model_str} not already downloaded, downloading now...", flush=True) + self.model = WhisperModel(model_str, device = model_device, device_index = cfg["gpu_idx"], @@ -433,10 +468,12 @@ class Whisper: def update_context(self, committed_text: str): """Update the context with recently committed text.""" - self.recent_context = (self.recent_context + " " + committed_text).strip() - # Keep only the last N characters to avoid prompt getting too long + self.recent_context = join_segments(self.recent_context, committed_text).strip() + # Drop half of the context window. if len(self.recent_context) > self.context_window_chars: - self.recent_context = self.recent_context[-self.context_window_chars:] + words = self.recent_context.split() + words = words[len(words)//2:] + self.recent_context = ' '.join(words) def transcribe(self, frames: bytes = None) -> typing.List[Segment]: if frames is None: @@ -449,6 +486,8 @@ class Whisper: # Build context-aware prompt prompt = self._build_prompt() + print(f"Prompt: {prompt}", flush=True) + t0 = time.time() segments, info = self.model.transcribe( audio, @@ -698,8 +737,10 @@ def transcriptionThread(shared_data: SharedThreadData): stream = MicStream(shared_data.cfg) collector = AudioCollector(stream) collector = CompressingAudioCollector(collector) - collector = BoostingAudioCollector(collector, -12.0, shared_data.cfg) - collector = NormalizingAudioCollector(collector) + collector = BoostingAudioCollector(collector, -24.0, 24.0, + shared_data.cfg) + collector = NoiseReducingAudioCollector(collector, shared_data.cfg) + #collector = NormalizingAudioCollector(collector) whisper = Whisper(collector, shared_data.cfg) segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"], max_speech_s=shared_data.cfg["max_speech_duration_s"], @@ -761,11 +802,6 @@ def transcriptionThread(shared_data: SharedThreadData): # breaking OSC pager. if len(shared_data.transcript) >= 1024: shared_data.transcript = shared_data.transcript[-512:] - def join_segments(a, b): - if len(a) > 0 and a[-1] != ' ': - return a + ' ' + b - else: - return a + b shared_data.transcript = \ join_segments(shared_data.transcript, commit.delta) shared_data.preview = commit.preview diff --git a/config.yaml b/config.yaml index 6f4b65b..dfa2e1f 100644 --- a/config.yaml +++ b/config.yaml @@ -1,8 +1,8 @@ compute_type: float16 language: english model: turbo -microphone: 1 -user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm. Phi, NOPPERS, clearrainbow, Noia, Kuuderekitten. +microphone: 4 +user_prompt: Use proper punctuation and grammar. Prefer spelled out numbers like one, eleven, twenty, etc. Mm. keybind: ctrl+alt+x button_hand: right button_type: b @@ -18,6 +18,7 @@ rows: 10 cols: 24 beam_size: 5 best_of: 5 +volume: 10 enable_debug_mode: 0 enable_previews: 1 save_audio: 1 @@ -26,6 +27,5 @@ enable_lowercase_filter: 0 enable_uppercase_filter: 0 enable_profanity_filter: 0 remove_trailing_period: 0 -reset_on_toggle: 0 -enable_local_beep: 1 +reset_on_toggle: 1 use_builtin: 1 diff --git a/ui/config-schema.js b/ui/config-schema.js index 6b11277..bf91fce 100644 --- a/ui/config-schema.js +++ b/ui/config-schema.js @@ -23,6 +23,7 @@ const CONFIG_SCHEMA = { cols: { type: 'number', default: 24 }, beam_size: { type: 'number', default: 5 }, best_of: { type: 'number', default: 5 }, + volume: { type: 'number', default: 30 }, // Boolean fields (stored as 1/0) enable_debug_mode: { type: 'boolean', default: 0 }, @@ -34,7 +35,6 @@ const CONFIG_SCHEMA = { enable_profanity_filter: { type: 'boolean', default: 0 }, remove_trailing_period: { type: 'boolean', default: 0 }, reset_on_toggle: { type: 'boolean', default: 0 }, - enable_local_beep: { type: 'boolean', default: 1 }, use_builtin: { type: 'boolean', default: 1 } }; diff --git a/ui/index.html b/ui/index.html index 99e64dd..19c41ce 100644 --- a/ui/index.html +++ b/ui/index.html @@ -248,10 +248,13 @@ Reset transcript on toggle - +
+ + +
@@ -314,7 +317,7 @@ -
diff --git a/ui/index.js b/ui/index.js index 24a7e13..5a5d0a6 100644 --- a/ui/index.js +++ b/ui/index.js @@ -530,19 +530,20 @@ ipcMain.handle('start-process', async () => { }); ipcMain.handle('stop-process', async () => { + if (!runningProcess) { + sendPythonOutput('No process to stop', 'info'); + return { success: true, forcefullyKilled: false }; + } + return new Promise((resolve) => { let forcefullyKilled = false; - - if (!runningProcess) { - resolve({ success: true, forcefullyKilled }); - } // Set up a timeout to force kill after 10 seconds const killTimeout = setTimeout(() => { if (runningProcess) { sendPythonOutput('Process did not stop gracefully, forcing termination...', 'stderr'); forcefullyKilled = true; - runningProcess.kill(); + runningProcess.kill('SIGKILL'); } }, 10000); @@ -562,10 +563,14 @@ ipcMain.handle('stop-process', async () => { // Send termination signal sendPythonOutput('Stopping process gracefully...', 'info'); - runningProcess.kill(); + runningProcess.kill('SIGTERM'); }); }); +ipcMain.handle('get-process-state', () => { + return { isRunning: runningProcess !== null }; +}); + // Clean up on app quit app.on('before-quit', () => { if (runningProcess) { diff --git a/ui/preload.js b/ui/preload.js index f2e0a81..6f6e54f 100644 --- a/ui/preload.js +++ b/ui/preload.js @@ -10,6 +10,7 @@ contextBridge.exposeInMainWorld('electronAPI', { resetVenv: () => ipcRenderer.invoke('reset-venv'), startProcess: () => ipcRenderer.invoke('start-process'), stopProcess: () => ipcRenderer.invoke('stop-process'), + getProcessState: () => ipcRenderer.invoke('get-process-state'), onPythonOutput: (callback) => ipcRenderer.on('python-output', (event, data) => callback(data)), onProcessStopped: (callback) => ipcRenderer.on('process-stopped', () => callback()) }); diff --git a/ui/renderer.js b/ui/renderer.js index 2f4c8f1..008e0da 100644 --- a/ui/renderer.js +++ b/ui/renderer.js @@ -1,6 +1,21 @@ // Import configuration schema const CONFIG_FIELDS = window.CONFIG_SCHEMA; +// Process state tracking +let isProcessRunning = false; +let buttonManager; +let loadingOverlay; + +// Auto-save functionality with debouncing +let saveTimeout; +const SAVE_DELAY = 500; +let isSettingValues = false; + +// Console management +const consoleContent = document.getElementById('console-content'); +const MAX_CONSOLE_LINES = 512; +let consoleLineCount = 0; + // Button management system class ButtonManager { constructor() { @@ -11,33 +26,30 @@ class ButtonManager { resetVenv: document.getElementById('reset-venv'), refreshMicrophones: document.getElementById('refresh-microphones') }; - - // Initialize button states on construction + + // Initialize button states - process is not running at startup this.setProcessStopped(); } - + setState(buttonName, disabled) { const button = this.buttons[buttonName]; if (!button) return; - + button.disabled = disabled; - if (disabled) { - button.classList.add('opacity-50', 'cursor-not-allowed'); - } else { - button.classList.remove('opacity-50', 'cursor-not-allowed'); - } } - + setProcessRunning() { this.setState('start', true); this.setState('stop', false); + isProcessRunning = true; } - + setProcessStopped() { this.setState('start', false); this.setState('stop', true); + isProcessRunning = false; } - + async withButtonLoading(buttonName, asyncFn) { this.setState(buttonName, true); try { @@ -48,8 +60,6 @@ class ButtonManager { } } -const buttonManager = new ButtonManager(); - // Add loading overlay management class LoadingOverlay { constructor() { @@ -57,8 +67,9 @@ class LoadingOverlay { this.form = document.getElementById('config-form'); this.messageElement = this.overlay.querySelector('p'); this.defaultMessage = 'Environment setup underway - please wait.'; + this.originalStates = new Map(); // Track original disabled states } - + show(message = null) { this.messageElement.textContent = message || this.defaultMessage; this.overlay.classList.remove('hidden'); @@ -66,68 +77,69 @@ class LoadingOverlay { const leftPanel = this.overlay.parentElement; const inputs = leftPanel.querySelectorAll('input, select, textarea, button'); inputs.forEach(input => { + // Store original disabled state before disabling + this.originalStates.set(input, input.disabled); input.disabled = true; input.classList.add('opacity-50'); }); } - + hide() { this.overlay.classList.add('hidden'); - // Re-enable all form inputs and buttons in the entire left panel + // Restore original states of form inputs and buttons const leftPanel = this.overlay.parentElement; const inputs = leftPanel.querySelectorAll('input, select, textarea, button'); inputs.forEach(input => { - input.disabled = false; + // Restore original disabled state + input.disabled = this.originalStates.get(input) || false; input.classList.remove('opacity-50'); }); + // Clear the stored states + this.originalStates.clear(); // Reset to default message this.messageElement.textContent = this.defaultMessage; } } -const loadingOverlay = new LoadingOverlay(); - -// Add a flag to prevent auto-save during programmatic updates -let isSettingValues = false; - // Handle status messages with better color management function showStatus(message, type = 'info') { const statusEl = document.getElementById('status-message'); statusEl.textContent = message; - + // Remove all status classes const statusClasses = ['hidden', 'bg-green-100', 'bg-red-100', 'bg-blue-100', 'text-green-800', 'text-red-800', 'text-blue-800']; statusEl.classList.remove(...statusClasses); - + // Add appropriate classes based on type const typeMap = { success: ['bg-green-100', 'text-green-800'], error: ['bg-red-100', 'text-red-800'], info: ['bg-blue-100', 'text-blue-800'] }; - + statusEl.classList.add(...(typeMap[type] || typeMap.info)); - + // Also log to console appendToConsole(message, type === 'error' ? 'stderr' : 'info'); - + setTimeout(() => statusEl.classList.add('hidden'), 5000); } // Get form values using field mappings function getFormValues() { const config = {}; - + for (const [fieldName, fieldConfig] of Object.entries(CONFIG_FIELDS)) { const element = document.getElementById(fieldName); if (!element) continue; - + switch (fieldConfig.type) { case 'boolean': config[fieldName] = element.checked ? 1 : 0; break; case 'number': - config[fieldName] = parseInt(element.value) || fieldConfig.default; + const numValue = parseInt(element.value); + config[fieldName] = isNaN(numValue) ? fieldConfig.default : numValue; break; case 'text': config[fieldName] = element.value || fieldConfig.default; @@ -136,20 +148,20 @@ function getFormValues() { config[fieldName] = element.value || fieldConfig.default; } } - + return config; } // Set form values using field mappings function setFormValues(config) { isSettingValues = true; // Disable auto-save temporarily - + for (const [fieldName, fieldConfig] of Object.entries(CONFIG_FIELDS)) { const element = document.getElementById(fieldName); if (!element) continue; - + const value = config[fieldName] ?? fieldConfig.default; - + switch (fieldConfig.type) { case 'boolean': element.checked = value === 1; @@ -161,7 +173,7 @@ function setFormValues(config) { element.value = value; } } - + // Handle use_builtin toggle state const useBuiltin = config.use_builtin === 1; const customChatboxInputs = ['block_width', 'num_blocks', 'rows', 'cols']; @@ -176,53 +188,54 @@ function setFormValues(config) { } } }); - + + // Update volume display + if (config.volume !== undefined) { + const volumePercent = Math.round(config.volume); + document.getElementById('volume-display').textContent = `${volumePercent}%`; + } + isSettingValues = false; // Re-enable auto-save } -// Console management -const consoleContent = document.getElementById('console-content'); -const MAX_CONSOLE_LINES = 512; -let consoleLineCount = 0; - function appendToConsole(message, type = 'stdout') { const timestamp = new Date().toLocaleTimeString(); const timestampSpan = document.createElement('span'); timestampSpan.className = 'console-timestamp'; timestampSpan.textContent = `[${timestamp}] `; - + const messageSpan = document.createElement('span'); messageSpan.className = `console-${type}`; messageSpan.textContent = message; - + const lineDiv = document.createElement('div'); lineDiv.appendChild(timestampSpan); lineDiv.appendChild(messageSpan); - + consoleContent.appendChild(lineDiv); consoleLineCount++; - + // Remove old lines if we exceed the limit if (consoleLineCount > MAX_CONSOLE_LINES) { // Calculate how many lines to remove (remove 10% to avoid frequent trimming) const linesToRemove = Math.floor(MAX_CONSOLE_LINES * 0.1); - + // Remove the oldest lines for (let i = 0; i < linesToRemove; i++) { if (consoleContent.firstChild) { consoleContent.removeChild(consoleContent.firstChild); } } - + consoleLineCount -= linesToRemove; - + // Add a notice that lines were trimmed const trimNotice = document.createElement('div'); trimNotice.className = 'console-info'; trimNotice.innerHTML = '[System] ... older lines removed to maintain performance ...'; consoleContent.insertBefore(trimNotice, consoleContent.firstChild); } - + // Auto-scroll to bottom const pythonConsole = document.getElementById('python-console'); pythonConsole.scrollTop = pythonConsole.scrollHeight; @@ -242,24 +255,20 @@ async function handleAsyncAction(actionName, actionFn) { } } -// Auto-save functionality with debouncing -let saveTimeout; -const SAVE_DELAY = 500; - async function autoSaveConfig() { if (isSettingValues) return; - + clearTimeout(saveTimeout); saveTimeout = setTimeout(async () => { try { const config = getFormValues(); await window.electronAPI.saveConfig(config); showStatus('Configuration saved', 'success'); - + // Restart process if running - if (!buttonManager.buttons.stop.disabled) { + if (isProcessRunning) { appendToConsole('Restarting process with new configuration...', 'info'); - + try { await window.electronAPI.stopProcess(); await new Promise(resolve => setTimeout(resolve, 1000)); @@ -281,9 +290,9 @@ async function autoSaveConfig() { function setupAutoSave() { const form = document.getElementById('config-form'); const inputs = form.querySelectorAll('input, select, textarea'); - + inputs.forEach(input => { - const eventType = input.type === 'checkbox' ? 'change' : + const eventType = input.type === 'checkbox' ? 'change' : (input.type === 'number' || input.type === 'text' || input.tagName === 'TEXTAREA') ? 'input' : 'change'; input.addEventListener(eventType, autoSaveConfig); }); @@ -292,7 +301,7 @@ function setupAutoSave() { // Microphone loading async function loadMicrophones() { const microphoneSelect = document.getElementById('microphone'); - + try { // Check/install requirements during startup appendToConsole('Checking virtual environment and requirements...', 'info'); @@ -305,15 +314,15 @@ async function loadMicrophones() { appendToConsole('Loading available microphones...', 'info'); const microphones = await window.electronAPI.getMicrophones(); - + microphoneSelect.innerHTML = ''; - + if (microphones.length === 0) { microphoneSelect.innerHTML = ''; appendToConsole('No microphones found', 'stderr'); return; } - + appendToConsole(`Found ${microphones.length} microphone(s)`, 'info'); microphones.forEach(mic => { const option = document.createElement('option'); @@ -322,7 +331,7 @@ async function loadMicrophones() { microphoneSelect.appendChild(option); appendToConsole(` - ${mic.name} (Device ${mic.index})`, 'stdout'); }); - + // Restore previously selected microphone try { const config = await window.electronAPI.loadConfig(); @@ -332,7 +341,7 @@ async function loadMicrophones() { } catch (error) { // Ignore config load errors here } - + } catch (error) { appendToConsole(`Failed to load microphones: ${error.message}`, 'stderr'); microphoneSelect.innerHTML = ''; @@ -345,7 +354,7 @@ function setupEventHandlers() { document.getElementById('toggle-advanced').addEventListener('click', () => { const advancedSettings = document.getElementById('advanced-settings'); const chevron = document.getElementById('chevron'); - + if (advancedSettings.classList.contains('hidden')) { advancedSettings.classList.remove('hidden'); chevron.classList.add('rotate-90'); @@ -354,12 +363,12 @@ function setupEventHandlers() { chevron.classList.remove('rotate-90'); } }); - + // Use builtin chatbox toggle document.getElementById('use_builtin').addEventListener('change', (e) => { const customChatboxInputs = ['block_width', 'num_blocks', 'rows', 'cols']; const isBuiltin = e.target.checked; - + customChatboxInputs.forEach(inputId => { const input = document.getElementById(inputId); if (input) { @@ -372,7 +381,13 @@ function setupEventHandlers() { } }); }); - + + // Volume slider update + document.getElementById('volume').addEventListener('input', (e) => { + const volumePercent = Math.round(e.target.value); + document.getElementById('volume-display').textContent = `${volumePercent}%`; + }); + // Setup virtual environment document.getElementById('setup-venv').addEventListener('click', async () => { loadingOverlay.show('Setting up virtual environment - please wait...'); // Show overlay with custom message @@ -385,7 +400,7 @@ function setupEventHandlers() { loadingOverlay.hide(); // Always hide overlay when done } }); - + // Reset virtual environment document.getElementById('reset-venv').addEventListener('click', async () => { loadingOverlay.show('Resetting virtual environment - please wait...'); // Show overlay with custom message @@ -397,33 +412,33 @@ function setupEventHandlers() { loadingOverlay.hide(); // Always hide overlay when done } }); - + // Reset configuration document.getElementById('reset-config').addEventListener('click', async () => { const confirmReset = confirm('Are you sure you want to reset all settings to defaults? This cannot be undone.'); if (!confirmReset) return; - + try { // Stop process if running - const wasRunning = !buttonManager.buttons.stop.disabled; + const wasRunning = isProcessRunning; if (wasRunning) { appendToConsole('Stopping process before resetting configuration...', 'info'); await window.electronAPI.stopProcess(); buttonManager.setProcessStopped(); await new Promise(resolve => setTimeout(resolve, 500)); } - + // Reset configuration appendToConsole('Resetting configuration to defaults...', 'info'); const result = await window.electronAPI.resetConfig(); - + // Reload configuration with defaults const config = await window.electronAPI.loadConfig(); setFormValues(config); - + showStatus(result.message, 'success'); appendToConsole('Configuration reset successfully', 'info'); - + // Restart process if it was running if (wasRunning) { appendToConsole('Restarting process with default configuration...', 'info'); @@ -436,18 +451,18 @@ function setupEventHandlers() { appendToConsole(`Failed to reset configuration: ${error.message}`, 'stderr'); } }); - + // Refresh microphones document.getElementById('refresh-microphones').addEventListener('click', async () => { await buttonManager.withButtonLoading('refreshMicrophones', async () => { await loadMicrophones(); }); }); - + // Start process document.getElementById('start-process').addEventListener('click', async () => { buttonManager.setState('start', true); - + try { // The installRequirements function will now check if venv is set up. loadingOverlay.show('Verifying environment setup - please wait...'); // Show overlay with custom message @@ -457,7 +472,7 @@ function setupEventHandlers() { } finally { loadingOverlay.hide(); // Always hide overlay when done } - + await window.electronAPI.startProcess(); buttonManager.setProcessRunning(); appendToConsole('Process started successfully', 'info'); @@ -466,11 +481,11 @@ function setupEventHandlers() { buttonManager.setState('start', false); } }); - + // Stop process document.getElementById('stop-process').addEventListener('click', async () => { buttonManager.setState('stop', true); - + try { await window.electronAPI.stopProcess(); appendToConsole('Process stop initiated', 'info'); @@ -479,7 +494,7 @@ function setupEventHandlers() { buttonManager.setState('stop', false); } }); - + // Listen for process stopped event window.electronAPI.onProcessStopped(() => { buttonManager.setProcessStopped(); @@ -489,12 +504,15 @@ function setupEventHandlers() { // Initialize application window.addEventListener('load', async () => { appendToConsole('TaSTT Configuration UI initialized', 'info'); - + + loadingOverlay = new LoadingOverlay(); + buttonManager = new ButtonManager(); + // Set up Python output listener first so we capture all output window.electronAPI.onPythonOutput((data) => { appendToConsole(data.message, data.type); }); - + // Load configuration try { const config = await window.electronAPI.loadConfig(); @@ -503,11 +521,11 @@ window.addEventListener('load', async () => { } catch (error) { appendToConsole(`Failed to load configuration: ${error.message}`, 'stderr'); } - + // Load microphones await loadMicrophones(); - + // Setup event handlers and auto-save setupEventHandlers(); setupAutoSave(); -}); \ No newline at end of file +}); -- cgit v1.2.3 From 9bf33a4cad8196bfe7253c841ab5c35ffdbc0173 Mon Sep 17 00:00:00 2001 From: yum Date: Wed, 23 Jul 2025 19:05:15 -0700 Subject: add segment metadata logging feature Segment metadata can now be logged to a json as the app runs. The goal is to identify the params that heavily correlate with hallucinations. Also: * use 7zip for compression in build, speeding things up * log dll download progress every few seconds * shrink package --- app/stt.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++--- config.yaml | 1 + ui/.gitignore | 2 ++ ui/config-schema.js | 1 + ui/index.html | 6 ++++- ui/index.js | 28 +++++++++++++++++++-- ui/package.json | 35 ++++++++++++++++---------- 7 files changed, 126 insertions(+), 19 deletions(-) (limited to 'ui') diff --git a/app/stt.py b/app/stt.py index 79ab0d1..f36de97 100644 --- a/app/stt.py +++ b/app/stt.py @@ -1,5 +1,6 @@ from datetime import datetime from faster_whisper import WhisperModel +import json import langcodes import numpy as np import os @@ -486,7 +487,8 @@ class Whisper: # Build context-aware prompt prompt = self._build_prompt() - print(f"Prompt: {prompt}", flush=True) + if self.cfg["enable_debug_mode"]: + print(f"Prompt: {prompt}", flush=True) t0 = time.time() segments, info = self.model.transcribe( @@ -578,16 +580,69 @@ def saveAudio(audio: bytes, path: str, cfg: typing.Dict): wf.writeframes(audio) +class SegmentLogger: + def __init__(self, cfg: typing.Dict): + self.cfg = cfg + self.enabled = cfg.get("enable_segment_logging", False) + self.session_data = [] + self.log_file = None + + if self.enabled: + log_dir = os.path.join(PROJECT_ROOT, "logs") + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + # Create file + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + self.log_file = os.path.join(log_dir, f"session_debug_{timestamp}.json") + print(f"Segment logging enabled. Logging to: {self.log_file}", file=sys.stderr) + + def log_segment(self, segment: Segment, commit_type: str = "commit"): + if not self.enabled: + return + + segment_data = { + "timestamp": datetime.now().isoformat(), + "type": commit_type, + "text": segment.transcript, + "start_ts": segment.start_ts, + "end_ts": segment.end_ts, + "wall_ts": segment.wall_ts, + "avg_logprob": segment.avg_logprob, + "no_speech_prob": segment.no_speech_prob, + "compression_ratio": segment.compression_ratio, + "duration": segment.end_ts - segment.start_ts + } + + self.session_data.append(segment_data) + + # Write to file incrementally + try: + with open(self.log_file, 'w') as f: + json.dump({ + "session_start": self.session_data[0]["timestamp"] if self.session_data else None, + "segments": self.session_data + }, f, indent=2) + except Exception as e: + print(f"Error writing segment log: {e}", file=sys.stderr) + + def close(self): + if self.enabled and self.session_data: + print(f"Session complete. Logged {len(self.session_data)} segments to {self.log_file}", file=sys.stderr) + + class VadCommitter: def __init__(self, cfg: typing.Dict, collector: AudioCollector, whisper: Whisper, - segmenter: AudioSegmenter): + segmenter: AudioSegmenter, + segment_logger: SegmentLogger = None): self.cfg = cfg self.collector = collector self.whisper = whisper self.segmenter = segmenter + self.segment_logger = segment_logger def getDelta(self) -> TranscriptCommit: audio = self.collector.getAudio() @@ -618,6 +673,10 @@ class VadCommitter: if delta.strip(): self.whisper.update_context(delta.strip()) + if self.segment_logger: + for s in segments: + self.segment_logger.log_segment(s, "commit") + audio = self.collector.getAudio() if self.cfg["enable_debug_mode"]: for s in segments: @@ -638,6 +697,10 @@ class VadCommitter: segments = self.whisper.transcribe(audio) preview = "".join(s.transcript for s in segments) + if self.segment_logger: + for s in segments: + self.segment_logger.log_segment(s, "preview") + if not has_audio: self.collector.keepLast(1.0) @@ -745,7 +808,9 @@ def transcriptionThread(shared_data: SharedThreadData): segmenter = AudioSegmenter(min_silence_ms=shared_data.cfg["min_silence_duration_ms"], max_speech_s=shared_data.cfg["max_speech_duration_s"], min_speech_duration_ms=shared_data.cfg["min_speech_duration_ms"]) - committer = VadCommitter(shared_data.cfg, collector, whisper, segmenter) + + segment_logger = SegmentLogger(shared_data.cfg) + committer = VadCommitter(shared_data.cfg, collector, whisper, segmenter, segment_logger) plugins = [] # plugins.append(TranslationPlugin(shared_data.cfg)) # Not implemented yet @@ -839,4 +904,5 @@ def transcriptionThread(shared_data: SharedThreadData): plugin.stop() for filt in filters: filt.stop() + segment_logger.close() diff --git a/config.yaml b/config.yaml index dfa2e1f..db25405 100644 --- a/config.yaml +++ b/config.yaml @@ -22,6 +22,7 @@ volume: 10 enable_debug_mode: 0 enable_previews: 1 save_audio: 1 +enable_segment_logging: 0 use_cpu: 0 enable_lowercase_filter: 0 enable_uppercase_filter: 0 diff --git a/ui/.gitignore b/ui/.gitignore index 2109e19..c1dbe3c 100644 --- a/ui/.gitignore +++ b/ui/.gitignore @@ -1,3 +1,5 @@ build node_modules package-lock.json +output.css +dist diff --git a/ui/config-schema.js b/ui/config-schema.js index bf91fce..fb90f3f 100644 --- a/ui/config-schema.js +++ b/ui/config-schema.js @@ -29,6 +29,7 @@ const CONFIG_SCHEMA = { enable_debug_mode: { type: 'boolean', default: 0 }, enable_previews: { type: 'boolean', default: 1 }, save_audio: { type: 'boolean', default: 0 }, + enable_segment_logging: { type: 'boolean', default: 0 }, use_cpu: { type: 'boolean', default: 0 }, enable_lowercase_filter: { type: 'boolean', default: 0 }, enable_uppercase_filter: { type: 'boolean', default: 0 }, diff --git a/ui/index.html b/ui/index.html index 19c41ce..29d4a78 100644 --- a/ui/index.html +++ b/ui/index.html @@ -4,7 +4,7 @@ TaSTT - +
@@ -214,6 +214,10 @@ Save Audio Segments +
diff --git a/ui/index.js b/ui/index.js index 5a5d0a6..afaaf7f 100644 --- a/ui/index.js +++ b/ui/index.js @@ -6,7 +6,12 @@ const { spawn } = require('child_process'); const https = require('https'); const { CONFIG_SCHEMA, getDefaultConfig } = require('./config-schema.js'); -const APP_ROOT = path.join(__dirname, '..'); +// Detect if we're running in development or production +const isDev = !app.isPackaged; +const APP_ROOT = isDev + ? path.join(__dirname, '..') // Development: go up from ui/ to project root + : process.resourcesPath; // Production: use Electron's resource path + const CONFIG_PATH = path.join(APP_ROOT, 'config.yaml'); let mainWindow; @@ -50,13 +55,32 @@ function createPythonEnvironment() { return env; } -// Helper function to download a file from URL +// Helper function to download a file from URL with progress function downloadFile(url, outputPath) { return new Promise((resolve, reject) => { const file = require('fs').createWriteStream(outputPath); + const fileName = path.basename(outputPath); const request = https.get(url, (response) => { if (response.statusCode === 200) { + const totalSize = parseInt(response.headers['content-length'], 10); + let downloadedSize = 0; + let lastProgressTime = Date.now(); + + response.on('data', (chunk) => { + downloadedSize += chunk.length; + + // Log progress every 5 seconds + const now = Date.now(); + if (totalSize && (now - lastProgressTime >= 5000)) { + const progress = Math.round((downloadedSize / totalSize) * 100); + const mb = (downloadedSize / 1024 / 1024).toFixed(1); + const totalMb = (totalSize / 1024 / 1024).toFixed(1); + sendPythonOutput(`Downloading ${fileName}: ${mb}/${totalMb} MB (${progress}%)`, 'info'); + lastProgressTime = now; + } + }); + response.pipe(file); file.on('finish', () => { diff --git a/ui/package.json b/ui/package.json index 3a58298..4742cd7 100644 --- a/ui/package.json +++ b/ui/package.json @@ -6,14 +6,16 @@ "homepage": "./", "scripts": { "start": "npm run build:css && electron .", - "build:css": "tailwindcss -i ./src/components.css -o ./build/output.css", - "watch:css": "tailwindcss -i ./src/components.css -o ./build/output.css --watch", + "build:css": "tailwindcss -i ./src/components.css -o ./output.css", + "watch:css": "tailwindcss -i ./src/components.css -o ./output.css --watch", "dev": "concurrently \"npm run watch:css\" \"electron .\"", "test": "echo \"Error: no test specified\" && exit 1", - "dist": "npm run build:css && electron-builder", - "dist:win": "npm run build:css && electron-builder --win", - "dist:portable": "npm run build:css && electron-builder --win portable", - "dist:zip": "npm run build:css && electron-builder --win zip" + "clean:meta": "node -e \"const fs=require('fs');const path=require('path');function deleteMeta(dir){fs.readdirSync(dir).forEach(f=>{const p=path.join(dir,f);if(f.endsWith('.meta'))fs.unlinkSync(p);else if(fs.statSync(p).isDirectory()&&!f.startsWith('.'))deleteMeta(p);})}deleteMeta('./node_modules')\"", + "prebuild": "node build_scripts/setup-empty-venv.js", + "dist": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder", + "dist:win": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win", + "dist:portable": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win portable", + "dist:zip": "npm run prebuild && npm run clean:meta && npm run build:css && electron-builder --win zip" }, "build": { "appId": "com.yum_food.tastt", @@ -46,11 +48,6 @@ "from": "../config.yaml", "to": "config.yaml" }, - { - "from": "../dll", - "to": "dll", - "filter": ["**/*"] - }, { "from": "../Images", "to": "Images", @@ -60,10 +57,20 @@ "from": "../bin", "to": "bin", "filter": ["**/*"] + }, + { + "from": "../venv_clean", + "to": "venv", + "filter": ["**/*"] + }, + { + "from": "../dll_empty", + "to": "dll", + "filter": ["**/*"] } ], "win": { - "icon": "../Images/logo.png", + "icon": "../Images/favicon.ico", "target": [ { "target": "portable", @@ -81,7 +88,9 @@ "nsis": { "oneClick": false, "allowToChangeInstallationDirectory": true - } + }, + "compression": "maximum", + "artifactName": "${productName}-${version}-${arch}.${ext}" }, "keywords": [], "author": "yum_food", -- cgit v1.2.3 From e1730a63538d2b1a23c948d25580612303733eba Mon Sep 17 00:00:00 2001 From: yum Date: Wed, 23 Jul 2025 19:51:35 -0700 Subject: Update avg_logprob cutoff, fix sounds, fix electron build --- app/stt.py | 9 ++- ui/build_scripts/setup-empty-venv.js | 25 +++++++ ui/index.html | 2 +- ui/index.js | 136 +++++++++++++++++------------------ ui/package.json | 7 +- 5 files changed, 108 insertions(+), 71 deletions(-) create mode 100644 ui/build_scripts/setup-empty-venv.js (limited to 'ui') diff --git a/app/stt.py b/app/stt.py index f36de97..b476ac0 100644 --- a/app/stt.py +++ b/app/stt.py @@ -523,6 +523,13 @@ class Whisper: f"no_speech_prob={s.no_speech_prob}, " + f"avg_logprob={s.avg_logprob})", file=sys.stderr) continue + if s.avg_logprob < -0.75: + if self.cfg["enable_debug_mode"]: + print(f"Drop probable hallucination (case 3) " + + f"(text='{s.text}', " + + f"no_speech_prob={s.no_speech_prob}, " + + f"avg_logprob={s.avg_logprob})", file=sys.stderr) + continue if self.cfg["enable_debug_mode"]: print(f"s get: {s}") if s.avg_logprob < -1.0: @@ -686,7 +693,7 @@ class VadCommitter: if self.cfg["save_audio"] and len(delta) > 0: ts = datetime.fromtimestamp(self.collector.now() - latency_s) - filename = str(ts.strftime('%Y_%m_%d__%H-%M-%S')) + ".wav" + filename = str(ts.strftime('%Y_%m_%d__%H-%M-%S')) + delta.strip() + ".wav" audio_dir = os.path.join(PROJECT_ROOT, "audio") if not os.path.exists(audio_dir): os.makedirs(audio_dir) diff --git a/ui/build_scripts/setup-empty-venv.js b/ui/build_scripts/setup-empty-venv.js new file mode 100644 index 0000000..0691a51 --- /dev/null +++ b/ui/build_scripts/setup-empty-venv.js @@ -0,0 +1,25 @@ +const { execSync } = require('child_process'); +const path = require('path'); +const fs = require('fs'); + +const projectRoot = path.join(__dirname, '..', '..'); +const venvPath = path.join(projectRoot, 'venv_clean'); +const dllPath = path.join(projectRoot, 'dll_empty'); + +console.log('Creating empty virtual environment and dll directory...'); + +// Create empty dll directory +if (!fs.existsSync(dllPath)) { + fs.mkdirSync(dllPath, { recursive: true }); + console.log('Created empty dll directory'); +} + +try { + console.log('Creating new venv...'); + execSync(`python -m venv "${venvPath}"`, { stdio: 'inherit' }); + console.log('Empty venv created successfully!'); +} catch (error) { + console.error('Failed to create venv:', error); + process.exit(1); +} + diff --git a/ui/index.html b/ui/index.html index 29d4a78..70eaa68 100644 --- a/ui/index.html +++ b/ui/index.html @@ -216,7 +216,7 @@
diff --git a/ui/index.js b/ui/index.js index afaaf7f..63c633a 100644 --- a/ui/index.js +++ b/ui/index.js @@ -8,7 +8,7 @@ const { CONFIG_SCHEMA, getDefaultConfig } = require('./config-schema.js'); // Detect if we're running in development or production const isDev = !app.isPackaged; -const APP_ROOT = isDev +const APP_ROOT = isDev ? path.join(__dirname, '..') // Development: go up from ui/ to project root : process.resourcesPath; // Production: use Electron's resource path @@ -60,16 +60,16 @@ function downloadFile(url, outputPath) { return new Promise((resolve, reject) => { const file = require('fs').createWriteStream(outputPath); const fileName = path.basename(outputPath); - + const request = https.get(url, (response) => { if (response.statusCode === 200) { const totalSize = parseInt(response.headers['content-length'], 10); let downloadedSize = 0; let lastProgressTime = Date.now(); - + response.on('data', (chunk) => { downloadedSize += chunk.length; - + // Log progress every 5 seconds const now = Date.now(); if (totalSize && (now - lastProgressTime >= 5000)) { @@ -80,14 +80,14 @@ function downloadFile(url, outputPath) { lastProgressTime = now; } }); - + response.pipe(file); - + file.on('finish', () => { file.close(); resolve(); }); - + file.on('error', (err) => { fs.unlink(outputPath).catch(() => {}); // Clean up on error reject(err); @@ -98,7 +98,7 @@ function downloadFile(url, outputPath) { reject(new Error(`Failed to download: HTTP ${response.statusCode}`)); } }); - + request.on('error', (err) => { file.close(); fs.unlink(outputPath).catch(() => {}); // Clean up on error @@ -121,14 +121,14 @@ function setupProcessHandlers(process) { const text = data.toString(); sendPythonOutput(text.trimEnd(), 'stdout'); }); - + process.stderr.on('data', (data) => { const text = data.toString(); if (!shouldFilterMessage(text)) { sendPythonOutput(text.trimEnd(), 'stderr'); } }); - + process.on('error', (error) => { sendPythonOutput(`Process error: ${error.message}`, 'stderr'); runningProcess = null; @@ -136,7 +136,7 @@ function setupProcessHandlers(process) { mainWindow.webContents.send('process-stopped'); } }); - + process.on('close', (code) => { sendPythonOutput(`Process exited with code ${code}`, 'info'); runningProcess = null; @@ -152,23 +152,23 @@ function executePythonCommand(args, options = {}) { const pythonPath = getVenvPython(); const commandStr = `${path.basename(pythonPath)} ${args.join(' ')}`; sendPythonOutput(`> ${commandStr}`, 'info'); - + const spawnOptions = { ...options, env: createPythonEnvironment() }; - + const pythonProcess = spawn(pythonPath, args, spawnOptions); - + let stdout = ''; let stderr = ''; - + pythonProcess.stdout.on('data', (data) => { const text = data.toString(); stdout += text; sendPythonOutput(text.trimEnd(), 'stdout'); }); - + pythonProcess.stderr.on('data', (data) => { const text = data.toString(); stderr += text; @@ -177,12 +177,12 @@ function executePythonCommand(args, options = {}) { sendPythonOutput(text.trimEnd(), 'stderr'); } }); - + pythonProcess.on('error', (error) => { sendPythonOutput(`Failed to start Python process: ${error.message}`, 'stderr'); reject({ error: error.message, stdout, stderr }); }); - + pythonProcess.on('close', (code) => { if (code !== 0) { sendPythonOutput(`Process exited with code ${code}`, 'stderr'); @@ -287,15 +287,15 @@ ipcMain.handle('deleteVenvIndicatorFile', async () => { // Generic function to ensure required files are present async function ensureRequiredFiles(config) { - const { - directoryName, - requiredFiles, - downloadBaseUrl, - resourceType + const { + directoryName, + requiredFiles, + downloadBaseUrl, + resourceType } = config; - + const targetPath = path.join(APP_ROOT, directoryName); - + try { // Check if target directory exists, create it if not try { @@ -310,7 +310,7 @@ async function ensureRequiredFiles(config) { throw error; } } - + // Check each required file const missingFiles = []; for (const fileName of requiredFiles) { @@ -327,15 +327,15 @@ async function ensureRequiredFiles(config) { } } } - + // Download missing files if (missingFiles.length > 0) { sendPythonOutput(`Downloading ${missingFiles.length} missing ${resourceType} file${missingFiles.length > 1 ? 's' : ''}...`, 'info'); - + for (const fileName of missingFiles) { const filePath = path.join(targetPath, fileName); const downloadUrl = `${downloadBaseUrl}/${fileName}`; - + try { sendPythonOutput(`Downloading ${fileName}...`, 'info'); await downloadFile(downloadUrl, filePath); @@ -345,14 +345,14 @@ async function ensureRequiredFiles(config) { throw new Error(`Failed to download ${fileName}: ${downloadError.message}`); } } - + sendPythonOutput(`All missing ${resourceType} files downloaded successfully`, 'info'); } else { sendPythonOutput(`All required ${resourceType} files are present`, 'info'); } - - return { - success: true, + + return { + success: true, message: `${resourceType} setup complete. ${missingFiles.length} file${missingFiles.length > 1 ? 's' : ''} downloaded.`, downloadedFiles: missingFiles }; @@ -366,7 +366,7 @@ async function ensureRequiredFiles(config) { ipcMain.handle('install-requirements', async () => { const requirementsPath = path.join(APP_ROOT, 'app', 'requirements.txt'); const venvMarkerPath = path.join(APP_ROOT, '.venv_is_set_up'); - + try { // Check if venv is already set up try { @@ -375,10 +375,10 @@ ipcMain.handle('install-requirements', async () => { } catch (error) { // Marker doesn't exist, proceed with setup } - + // Check if requirements.txt exists await fs.access(requirementsPath); - + await executePythonCommand(['-m', 'pip', 'install', '-r', requirementsPath]); await ensureRequiredFiles({ @@ -389,10 +389,10 @@ ipcMain.handle('install-requirements', async () => { }); await fs.mkdir(path.join(APP_ROOT, 'Models'), { recursive: true }); - + await fs.writeFile(venvMarkerPath, new Date().toISOString(), 'utf8'); sendPythonOutput('Created .venv_is_set_up marker file', 'info'); - + return { success: true, message: 'Requirements and dependencies installed successfully' }; } catch (error) { console.error('Error installing requirements:', error); @@ -405,7 +405,7 @@ ipcMain.handle('install-requirements', async () => { ipcMain.handle('get-microphones', async () => { const scriptPath = path.join(APP_ROOT, 'app', 'list_microphones.py'); - + try { const result = await executePythonCommand([scriptPath]); const microphones = JSON.parse(result.stdout.trim()); @@ -421,24 +421,24 @@ async function clearDirectory(dirPath, dirName) { try { await fs.access(dirPath); sendPythonOutput(`Clearing ${dirName} directory...`, 'info'); - + const files = await fs.readdir(dirPath); let deletedCount = 0; - + for (const file of files) { const filePath = path.join(dirPath, file); - + try { await fs.rm(filePath, { recursive: true, force: true }); sendPythonOutput(`✗ Deleted file ${file}`, 'info'); - + deletedCount++; } catch (deleteError) { sendPythonOutput(`Warning: Could not delete ${file}: ${deleteError.message}`, 'stderr'); // Continue with other files even if one fails } } - + sendPythonOutput(`${dirName} directory cleared`, 'info'); return deletedCount; } catch (error) { @@ -454,10 +454,10 @@ async function clearDirectory(dirPath, dirName) { ipcMain.handle('reset-venv', async () => { const venvMarkerPath = path.join(APP_ROOT, '.venv_is_set_up'); - + try { sendPythonOutput('Starting virtual environment reset...', 'info'); - + // Delete the venv marker file first try { await fs.unlink(venvMarkerPath); @@ -467,14 +467,14 @@ ipcMain.handle('reset-venv', async () => { sendPythonOutput(`Warning: Could not delete marker file: ${error.message}`, 'stderr'); } } - + // Get list of installed packages sendPythonOutput('Getting list of installed packages...', 'info'); const freezeResult = await executePythonCommand(['-m', 'pip', 'freeze']); const installedPackages = freezeResult.stdout.trim(); - + let uninstalledPackages = []; - + if (!installedPackages) { sendPythonOutput('No packages found to uninstall', 'info'); } else { @@ -483,38 +483,38 @@ ipcMain.handle('reset-venv', async () => { const packageNames = packageLines .map(line => line.split('==')[0].trim()) .filter(name => name && !name.startsWith('#')); - + const corePackages = ['pip', 'setuptools', 'wheel']; const packagesToUninstall = packageNames.filter(name => !corePackages.includes(name.toLowerCase())); - + if (packagesToUninstall.length === 0) { sendPythonOutput('Only core packages found, nothing to uninstall', 'info'); } else { sendPythonOutput(`Uninstalling ${packagesToUninstall.length} packages...`, 'info'); - + const uninstallArgs = ['-m', 'pip', 'uninstall', '-y', ...packagesToUninstall]; await executePythonCommand(uninstallArgs); uninstalledPackages = packagesToUninstall; } } - + // Clear downloaded files sendPythonOutput('Clearing downloaded files...', 'info'); - + const dllPath = path.join(APP_ROOT, 'dll'); const modelsPath = path.join(APP_ROOT, 'Models'); const binPath = path.join(APP_ROOT, 'bin'); - + const deletedDlls = await clearDirectory(dllPath, 'DLL'); const deletedModels = await clearDirectory(modelsPath, 'Models'); const deletedBins = await clearDirectory(binPath, 'Binary'); - + const totalDeletedFiles = deletedDlls + deletedModels + deletedBins; - + sendPythonOutput('Virtual environment reset successfully!', 'info'); - - return { - success: true, + + return { + success: true, message: `Virtual environment reset complete. Uninstalled ${uninstalledPackages.length} packages and deleted ${totalDeletedFiles} downloaded files.`, uninstalledPackages, deletedFiles: { @@ -538,14 +538,14 @@ ipcMain.handle('start-process', async () => { const scriptPath = path.join(APP_ROOT, 'app', 'hi.py'); const args = [scriptPath, '--config', CONFIG_PATH]; - + try { const pythonPath = getVenvPython(); sendPythonOutput(`Starting process: ${path.basename(pythonPath)} ${args.join(' ')}`, 'info'); - + runningProcess = spawn(pythonPath, args, { env: createPythonEnvironment() }); setupProcessHandlers(runningProcess); - + return { success: true }; } catch (error) { runningProcess = null; @@ -561,7 +561,7 @@ ipcMain.handle('stop-process', async () => { return new Promise((resolve) => { let forcefullyKilled = false; - + // Set up a timeout to force kill after 10 seconds const killTimeout = setTimeout(() => { if (runningProcess) { @@ -570,21 +570,21 @@ ipcMain.handle('stop-process', async () => { runningProcess.kill('SIGKILL'); } }, 10000); - + // Listen for the process to exit runningProcess.once('exit', (code, signal) => { clearTimeout(killTimeout); runningProcess = null; - + if (forcefullyKilled) { sendPythonOutput('Process forcefully terminated', 'info'); } else { sendPythonOutput('Process stopped gracefully', 'info'); } - + resolve({ success: true, forcefullyKilled }); }); - + // Send termination signal sendPythonOutput('Stopping process gracefully...', 'info'); runningProcess.kill('SIGTERM'); diff --git a/ui/package.json b/ui/package.json index 4742cd7..d99424c 100644 --- a/ui/package.json +++ b/ui/package.json @@ -67,6 +67,11 @@ "from": "../dll_empty", "to": "dll", "filter": ["**/*"] + }, + { + "from": "../Sounds", + "to": "Sounds", + "filter": ["*.wav"] } ], "win": { @@ -89,7 +94,7 @@ "oneClick": false, "allowToChangeInstallationDirectory": true }, - "compression": "maximum", + "compression": "normal", "artifactName": "${productName}-${version}-${arch}.${ext}" }, "keywords": [], -- cgit v1.2.3