diff options
| author | yum <yum.food.vr@gmail.com> | 2023-05-23 15:12:04 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-05-23 15:15:39 -0700 |
| commit | 78de7f02bc364606b0939d66903f02b2f91c141b (patch) | |
| tree | e90e441b45ccf777098d8e581bea082855f9e510 | |
| parent | 0f08da58a59a126f5307395e822fd140f15f8b36 (diff) | |
Add ability to type using STT
To use it, do a medium hold + long hold. Keep the long hold depressed
until you're done speaking. The transcription will be typed into the
currently selected input field.
* Add more audio feedback
* Make audio feedback play asynchronously so it doesn't slow down the
controller input state machine as much.
| -rw-r--r-- | README.md | 6 | ||||
| -rw-r--r-- | Scripts/transcribe.py | 53 | ||||
| -rw-r--r-- | Sounds/Dismiss_Noise.wav | bin | 0 -> 192078 bytes | |||
| -rw-r--r-- | Sounds/Dismiss_Noise_Quiet.wav | bin | 0 -> 192078 bytes | |||
| -rw-r--r-- | Sounds/KB_Noise_Off.wav | bin | 0 -> 192078 bytes | |||
| -rw-r--r-- | Sounds/KB_Noise_Off_Quiet.wav | bin | 0 -> 192078 bytes | |||
| -rw-r--r-- | Sounds/KB_Noise_On.wav | bin | 0 -> 266318 bytes | |||
| -rw-r--r-- | Sounds/KB_Noise_On_Quiet.wav | bin | 0 -> 266318 bytes |
8 files changed, 45 insertions, 14 deletions
@@ -32,8 +32,10 @@ To build your own package from source, see GUI/README.md. Basic controls: * Short click to toggle transcription. -* Long click to hide the text box. -* Scale it up/down in the radial menu. +* Medium click to hide the text box. +* Hold to update text box without unlocking from worldspace. +* Medium click + hold to type using STT. +* Scale up/down in the radial menu. ## Design philosophy diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 422e9c0..b4b7198 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -8,13 +8,14 @@ from playsound import playsound import argparse import copy -import os -import osc_ctrl import generate_utils import keybind_event_machine +import keyboard import langcodes -import pyaudio import numpy as np +import os +import osc_ctrl +import pyaudio import steamvr import string_matcher import sys @@ -334,7 +335,8 @@ def readKeyboardInput(audio_state, enable_local_beep: bool, audio_state.audio_paused = True if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav")) + playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), + block=False) elif state == PAUSE_STATE: state = RECORD_STATE if not use_builtin: @@ -348,7 +350,8 @@ def readKeyboardInput(audio_state, enable_local_beep: bool, audio_state.audio_paused = False if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav")) + playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), + block=False) def readControllerInput(audio_state, enable_local_beep: bool, use_builtin: bool, button: str): @@ -371,7 +374,14 @@ def readControllerInput(audio_state, enable_local_beep: bool, hand_id = steamvr.hands[button.split()[0]] button_id = steamvr.buttons[button.split()[1]] + # Rough description of state machine: + # Single short press: toggle transcription + # Medium press: dismiss custom chatbox + # Long press: update chatbox in place + # Medium press + long press: type transcription + last_rising = time.time() + last_medium_press_end = 0 while audio_state.run_app == True: time.sleep(0.05) @@ -390,7 +400,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, elif event == steamvr.EVENT_FALLING_EDGE: now = time.time() if now - last_rising > 1.5: - # Very long hold: treat as the end of transcription. + # Long press: treat as the end of transcription. state = PAUSE_STATE if not use_builtin: osc_ctrl.indicateSpeech(audio_state.osc_state.client, False) @@ -398,13 +408,29 @@ def readControllerInput(audio_state, enable_local_beep: bool, audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s audio_state.audio_paused = True + if last_rising - last_medium_press_end < 1.0: + # Type transcription + if enable_local_beep == 1: + playsound(os.path.abspath("Resources/Sounds/KB_Noise_Off_Quiet.wav"), + block=False) + keyboard.write(audio_state.text) + else: + if enable_local_beep == 1: + playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), + block=False) + elif now - last_rising > 0.5: - # Long hold + # Medium press + last_medium_press_end = now state = PAUSE_STATE + + if enable_local_beep == 1: + playsound(os.path.abspath("Resources/Sounds/Dismiss_Noise_Quiet.wav"), + block=False) + if not use_builtin: osc_ctrl.indicateSpeech(audio_state.osc_state.client, False) osc_ctrl.toggleBoard(audio_state.osc_state.client, False) - #playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav")) resetAudioLocked(audio_state) resetDisplayLocked(audio_state) @@ -422,17 +448,20 @@ def readControllerInput(audio_state, enable_local_beep: bool, audio_state.audio_paused = True if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav")) + playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), + block=False) elif state == PAUSE_STATE: state = RECORD_STATE + + if enable_local_beep == 1: + playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), + block=False) + if not use_builtin: osc_ctrl.indicateSpeech(audio_state.osc_state.client, True) osc_ctrl.toggleBoard(audio_state.osc_state.client, True) osc_ctrl.lockWorld(audio_state.osc_state.client, False) - if enable_local_beep == 1: - playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav")) - # model should correspond to one of the Whisper models defined in # whisper/__init__.py. Examples: tiny, base, small, medium. def transcribeLoop(mic: str, language: str, model: str, diff --git a/Sounds/Dismiss_Noise.wav b/Sounds/Dismiss_Noise.wav Binary files differnew file mode 100644 index 0000000..fe60f21 --- /dev/null +++ b/Sounds/Dismiss_Noise.wav diff --git a/Sounds/Dismiss_Noise_Quiet.wav b/Sounds/Dismiss_Noise_Quiet.wav Binary files differnew file mode 100644 index 0000000..5c3b1cb --- /dev/null +++ b/Sounds/Dismiss_Noise_Quiet.wav diff --git a/Sounds/KB_Noise_Off.wav b/Sounds/KB_Noise_Off.wav Binary files differnew file mode 100644 index 0000000..64d9c6f --- /dev/null +++ b/Sounds/KB_Noise_Off.wav diff --git a/Sounds/KB_Noise_Off_Quiet.wav b/Sounds/KB_Noise_Off_Quiet.wav Binary files differnew file mode 100644 index 0000000..b965e6a --- /dev/null +++ b/Sounds/KB_Noise_Off_Quiet.wav diff --git a/Sounds/KB_Noise_On.wav b/Sounds/KB_Noise_On.wav Binary files differnew file mode 100644 index 0000000..a959041 --- /dev/null +++ b/Sounds/KB_Noise_On.wav diff --git a/Sounds/KB_Noise_On_Quiet.wav b/Sounds/KB_Noise_On_Quiet.wav Binary files differnew file mode 100644 index 0000000..e49513e --- /dev/null +++ b/Sounds/KB_Noise_On_Quiet.wav |
