summaryrefslogtreecommitdiffstats
path: root/Scripts/transcribe.py
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2022-12-29 14:47:49 -0800
committeryum <yum.food.vr@gmail.com>2022-12-29 14:47:49 -0800
commit0b5c3841b5c2ed99b718c880bf0f161ac4db0788 (patch)
treed4870df92d716159f242e1b538877a1eb1f72e50 /Scripts/transcribe.py
parentf48ae0fffcd06f3cddd6cfc99b4c3d3a18c20038 (diff)
Users can disable local beep
The transcription engine beeps when you start/stop transcribing so you know that it's listening. Users can now disable this. * add help text to all input fields in GUI * make TaSTT generated file textctrls readonly, since I haven't tested them being reassigned * document idea to configure unity & transcription apps with config files * controller input thread no longer crashes if steamvr isn't running, it just slowly spins and waits * when you stop transcribing, the transcription engine re-transcribes a few times. I think this should improve end-of-transcription tail latencies * transcribe.py now prints out its args
Diffstat (limited to 'Scripts/transcribe.py')
-rw-r--r--Scripts/transcribe.py30
1 files changed, 23 insertions, 7 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 2729331..678ec59 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -300,8 +300,17 @@ def sendAudio(audio_state):
# Pace this out
time.sleep(0.01)
-def readControllerInput(audio_state):
- session = steamvr.SessionState()
+def readControllerInput(audio_state, enable_local_beep):
+ session = None
+ first = True
+ while session == None and audio_state.run_app == True:
+ try:
+ session = steamvr.SessionState()
+ except:
+ print("steamvr is off, no controller input")
+ session = None
+ time.sleep(5)
+
RECORD_STATE = 0
PAUSE_STATE = 1
state = PAUSE_STATE
@@ -335,10 +344,12 @@ def readControllerInput(audio_state):
state = PAUSE_STATE
osc_ctrl.indicateSpeech(audio_state.osc_state.client, False)
osc_ctrl.lockWorld(audio_state.osc_state.client, True)
+ audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
audio_state.audio_paused = True
- playsound(os.path.abspath("../Sounds/Noise_Off.wav"))
+ if enable_local_beep == 1:
+ playsound(os.path.abspath("../Sounds/Noise_Off.wav"))
elif state == PAUSE_STATE:
state = RECORD_STATE
osc_ctrl.indicateSpeech(audio_state.osc_state.client, True)
@@ -350,11 +361,12 @@ def readControllerInput(audio_state):
audio_state.drop_transcription = True
audio_state.audio_paused = False
- playsound(os.path.abspath("../Sounds/Noise_On.wav"))
+ if enable_local_beep == 1:
+ playsound(os.path.abspath("../Sounds/Noise_On.wav"))
# model should correspond to one of the Whisper models defined in
# whisper/__init__.py. Examples: tiny, base, small, medium.
-def transcribeLoop(mic: str, language: str, model: str):
+def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool):
audio_state = getMicStream(mic)
audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language]
@@ -375,7 +387,7 @@ def transcribeLoop(mic: str, language: str, model: str):
send_audio_thd.daemon = True
send_audio_thd.start()
- controller_input_thd = threading.Thread(target = readControllerInput, args = [audio_state])
+ controller_input_thd = threading.Thread(target = readControllerInput, args = [audio_state, enable_local_beep])
controller_input_thd.daemon = True
controller_input_thd.start()
@@ -400,6 +412,8 @@ def transcribeLoop(mic: str, language: str, model: str):
if __name__ == "__main__":
sys.stdout.reconfigure(encoding="utf-8")
+ print("args: {}".format(" ".join(sys.argv)))
+
# Set cwd to the directory holding the script
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
@@ -411,6 +425,7 @@ if __name__ == "__main__":
parser.add_argument("--model", type=str, help="Which AI model to use. Ex: tiny, base, small, medium")
parser.add_argument("--bytes_per_char", type=str, help="The number of bytes to use to represent each character")
parser.add_argument("--chars_per_sync", type=str, help="The number of characters to send on each sync event")
+ parser.add_argument("--enable_local_beep", type=int, help="Whether to play a local auditory indicator when transcription starts/stops.");
args = parser.parse_args()
if not args.mic:
@@ -425,8 +440,9 @@ if __name__ == "__main__":
if not args.bytes_per_char or not args.chars_per_sync:
print("--bytes_per_char and --chars_per_sync required", file=sys.stderr)
sys.exit(1)
+
generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char)
generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync)
- transcribeLoop(args.mic, args.language, args.model)
+ transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep)