summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-05-23 15:12:04 -0700
committeryum <yum.food.vr@gmail.com>2023-05-23 15:15:39 -0700
commit78de7f02bc364606b0939d66903f02b2f91c141b (patch)
treee90e441b45ccf777098d8e581bea082855f9e510
parent0f08da58a59a126f5307395e822fd140f15f8b36 (diff)
Add ability to type using STT
To use it, do a medium hold + long hold. Keep the long hold depressed until you're done speaking. The transcription will be typed into the currently selected input field. * Add more audio feedback * Make audio feedback play asynchronously so it doesn't slow down the controller input state machine as much.
-rw-r--r--README.md6
-rw-r--r--Scripts/transcribe.py53
-rw-r--r--Sounds/Dismiss_Noise.wavbin0 -> 192078 bytes
-rw-r--r--Sounds/Dismiss_Noise_Quiet.wavbin0 -> 192078 bytes
-rw-r--r--Sounds/KB_Noise_Off.wavbin0 -> 192078 bytes
-rw-r--r--Sounds/KB_Noise_Off_Quiet.wavbin0 -> 192078 bytes
-rw-r--r--Sounds/KB_Noise_On.wavbin0 -> 266318 bytes
-rw-r--r--Sounds/KB_Noise_On_Quiet.wavbin0 -> 266318 bytes
8 files changed, 45 insertions, 14 deletions
diff --git a/README.md b/README.md
index 057acb4..6b75998 100644
--- a/README.md
+++ b/README.md
@@ -32,8 +32,10 @@ To build your own package from source, see GUI/README.md.
Basic controls:
* Short click to toggle transcription.
-* Long click to hide the text box.
-* Scale it up/down in the radial menu.
+* Medium click to hide the text box.
+* Hold to update text box without unlocking from worldspace.
+* Medium click + hold to type using STT.
+* Scale up/down in the radial menu.
## Design philosophy
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 422e9c0..b4b7198 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -8,13 +8,14 @@ from playsound import playsound
import argparse
import copy
-import os
-import osc_ctrl
import generate_utils
import keybind_event_machine
+import keyboard
import langcodes
-import pyaudio
import numpy as np
+import os
+import osc_ctrl
+import pyaudio
import steamvr
import string_matcher
import sys
@@ -334,7 +335,8 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
audio_state.audio_paused = True
if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"))
+ playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"),
+ block=False)
elif state == PAUSE_STATE:
state = RECORD_STATE
if not use_builtin:
@@ -348,7 +350,8 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
audio_state.audio_paused = False
if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"))
+ playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"),
+ block=False)
def readControllerInput(audio_state, enable_local_beep: bool,
use_builtin: bool, button: str):
@@ -371,7 +374,14 @@ def readControllerInput(audio_state, enable_local_beep: bool,
hand_id = steamvr.hands[button.split()[0]]
button_id = steamvr.buttons[button.split()[1]]
+ # Rough description of state machine:
+ # Single short press: toggle transcription
+ # Medium press: dismiss custom chatbox
+ # Long press: update chatbox in place
+ # Medium press + long press: type transcription
+
last_rising = time.time()
+ last_medium_press_end = 0
while audio_state.run_app == True:
time.sleep(0.05)
@@ -390,7 +400,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
elif event == steamvr.EVENT_FALLING_EDGE:
now = time.time()
if now - last_rising > 1.5:
- # Very long hold: treat as the end of transcription.
+ # Long press: treat as the end of transcription.
state = PAUSE_STATE
if not use_builtin:
osc_ctrl.indicateSpeech(audio_state.osc_state.client, False)
@@ -398,13 +408,29 @@ def readControllerInput(audio_state, enable_local_beep: bool,
audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
audio_state.audio_paused = True
+ if last_rising - last_medium_press_end < 1.0:
+ # Type transcription
+ if enable_local_beep == 1:
+ playsound(os.path.abspath("Resources/Sounds/KB_Noise_Off_Quiet.wav"),
+ block=False)
+ keyboard.write(audio_state.text)
+ else:
+ if enable_local_beep == 1:
+ playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"),
+ block=False)
+
elif now - last_rising > 0.5:
- # Long hold
+ # Medium press
+ last_medium_press_end = now
state = PAUSE_STATE
+
+ if enable_local_beep == 1:
+ playsound(os.path.abspath("Resources/Sounds/Dismiss_Noise_Quiet.wav"),
+ block=False)
+
if not use_builtin:
osc_ctrl.indicateSpeech(audio_state.osc_state.client, False)
osc_ctrl.toggleBoard(audio_state.osc_state.client, False)
- #playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav"))
resetAudioLocked(audio_state)
resetDisplayLocked(audio_state)
@@ -422,17 +448,20 @@ def readControllerInput(audio_state, enable_local_beep: bool,
audio_state.audio_paused = True
if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"))
+ playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"),
+ block=False)
elif state == PAUSE_STATE:
state = RECORD_STATE
+
+ if enable_local_beep == 1:
+ playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"),
+ block=False)
+
if not use_builtin:
osc_ctrl.indicateSpeech(audio_state.osc_state.client, True)
osc_ctrl.toggleBoard(audio_state.osc_state.client, True)
osc_ctrl.lockWorld(audio_state.osc_state.client, False)
- if enable_local_beep == 1:
- playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"))
-
# model should correspond to one of the Whisper models defined in
# whisper/__init__.py. Examples: tiny, base, small, medium.
def transcribeLoop(mic: str, language: str, model: str,
diff --git a/Sounds/Dismiss_Noise.wav b/Sounds/Dismiss_Noise.wav
new file mode 100644
index 0000000..fe60f21
--- /dev/null
+++ b/Sounds/Dismiss_Noise.wav
Binary files differ
diff --git a/Sounds/Dismiss_Noise_Quiet.wav b/Sounds/Dismiss_Noise_Quiet.wav
new file mode 100644
index 0000000..5c3b1cb
--- /dev/null
+++ b/Sounds/Dismiss_Noise_Quiet.wav
Binary files differ
diff --git a/Sounds/KB_Noise_Off.wav b/Sounds/KB_Noise_Off.wav
new file mode 100644
index 0000000..64d9c6f
--- /dev/null
+++ b/Sounds/KB_Noise_Off.wav
Binary files differ
diff --git a/Sounds/KB_Noise_Off_Quiet.wav b/Sounds/KB_Noise_Off_Quiet.wav
new file mode 100644
index 0000000..b965e6a
--- /dev/null
+++ b/Sounds/KB_Noise_Off_Quiet.wav
Binary files differ
diff --git a/Sounds/KB_Noise_On.wav b/Sounds/KB_Noise_On.wav
new file mode 100644
index 0000000..a959041
--- /dev/null
+++ b/Sounds/KB_Noise_On.wav
Binary files differ
diff --git a/Sounds/KB_Noise_On_Quiet.wav b/Sounds/KB_Noise_On_Quiet.wav
new file mode 100644
index 0000000..e49513e
--- /dev/null
+++ b/Sounds/KB_Noise_On_Quiet.wav
Binary files differ