diff options
| author | yum <yum.food.vr@gmail.com> | 2023-02-02 01:02:03 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-02-13 14:36:20 -0800 |
| commit | 7c6894614dcc3ebc5d4c8839b64f4da761b5ccf0 (patch) | |
| tree | 6232b86b09190fd162aeb67229da359971b2e517 /Scripts/transcribe.py | |
| parent | 2fc3b1b978b6e24814e9de7200865b912108bd34 (diff) | |
Begin work adding emotes
Done:
* Users can add images to Fonts/Emotes/
* The basename of that image ('clueless.png' becomes 'clueless') is the
keyword to make the image show up in game.
* Fix a bug in the shader where letters on the 2nd texture and later
would have UV outside of [0.0, 1.0]
Not yet implemented:
* transcribed words are encoded using emotes mapping
Diffstat (limited to 'Scripts/transcribe.py')
| -rw-r--r-- | Scripts/transcribe.py | 33 |
1 files changed, 18 insertions, 15 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index ee76a0a..3171336 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -1,28 +1,23 @@ #!/usr/bin/env python3 +from datetime import datetime +from emotes_v2 import EmotesState +from functools import partial +from playsound import playsound + import argparse import copy -from datetime import datetime import os import osc_ctrl -from functools import partial import generate_utils -# python3 -m pip install pyaudio -# License: MIT. import pyaudio import numpy as np -# python3 -m pip install playsound==1.2.2 -# License: MIT. -from playsound import playsound import steamvr import string_matcher import sys import threading import time import wave -# python3 -m pip install git+https://github.com/openai/whisper.git -# python3 -m pip install torch -f https://download.pytorch.org/whl/torch_stable.html -# License: MIT. import whisper class Config: @@ -303,14 +298,14 @@ def transcribeAudio(audio_state, model, use_cpu: bool): audio_state.transcribe_no_change_count = 0 audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s -def sendAudio(audio_state, use_builtin: bool): +def sendAudio(audio_state, use_builtin: bool, estate: EmotesState): while audio_state.run_app == True: text = audio_state.committed_text + " " + audio_state.text if use_builtin: ret = osc_ctrl.pageMessageBuiltin(audio_state.osc_state, text) time.sleep(1.5) else: - ret = osc_ctrl.pageMessage(audio_state.osc_state, text) + ret = osc_ctrl.pageMessage(audio_state.osc_state, text, estate) is_paging = (ret == False) osc_ctrl.indicatePaging(audio_state.osc_state.client, is_paging) @@ -393,7 +388,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, # whisper/__init__.py. Examples: tiny, base, small, medium. def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool, use_cpu: bool, use_builtin: bool, - button: str): + button: str, estate: EmotesState): audio_state = getMicStream(mic) audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language] @@ -410,7 +405,7 @@ def transcribeLoop(mic: str, language: str, model: str, transcribe_audio_thd.daemon = True transcribe_audio_thd.start() - send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state, use_builtin]) + send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state, use_builtin, estate]) send_audio_thd.daemon = True send_audio_thd.start() @@ -459,6 +454,7 @@ if __name__ == "__main__": parser.add_argument("--cpu", type=int, help="If set to 1, use CPU instead of GPU") parser.add_argument("--use_builtin", type=int, help="If set to 1, use the text box built into the game.") parser.add_argument("--button", type=str, help="The controller button used to start/stop transcription. E.g. \"left joystick\"") + parser.add_argument("--emotes_pickle", type=str, help="The path to emotes pickle. See emotes_v2.py for details.") args = parser.parse_args() if not args.mic: @@ -482,6 +478,10 @@ if __name__ == "__main__": print("--button required", file=sys.stderr) sys.exit(1) + if not args.emotes_pickle: + print("--emotes_pickle required", file=sys.stderr) + sys.exit(1) + if args.window_duration_s: config.MAX_LENGTH_S = int(args.window_duration_s) @@ -495,11 +495,14 @@ if __name__ == "__main__": else: args.use_builtin = False + estate = EmotesState() + estate.load(args.emotes_pickle) + generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char) generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync) generate_utils.config.BOARD_ROWS = int(args.rows) generate_utils.config.BOARD_COLS = int(args.cols) transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep, - args.cpu, args.use_builtin, args.button) + args.cpu, args.use_builtin, args.button, estate) |
