diff options
| author | yum <yum.food.vr@gmail.com> | 2023-01-22 15:05:54 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-01-22 15:35:00 -0800 |
| commit | 1c056bf385d2c48f6e4f77da513060c04415252c (patch) | |
| tree | 81091edb39e9e2b18652141dd3723751284c82ce /Scripts/transcribe.py | |
| parent | 06160c37acb26cfac9bab568bd3759c2386fb175 (diff) | |
Enable using built-in chatboxv0.3
VRChat exposes a built-in chatbox which can be seen by anyone who has
it enabled. This was not the case when I started this project: the
chatbox would only be visible to friends. Since this is clearly useful,
enabling the STT on public models, let's enable sending data to it.
Caveats:
* The built-in chatbox has anti-spam tech which limits us to updating
about once every 2 seconds. The custom chatbox has no such limitation
and is thus typically much faster.
Diffstat (limited to 'Scripts/transcribe.py')
| -rw-r--r-- | Scripts/transcribe.py | 50 |
1 files changed, 32 insertions, 18 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 7f07efe..1237334 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -303,17 +303,21 @@ def transcribeAudio(audio_state, model, use_cpu: bool): audio_state.transcribe_no_change_count = 0 audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s -def sendAudio(audio_state): +def sendAudio(audio_state, use_builtin: bool): while audio_state.run_app == True: text = audio_state.committed_text + " " + audio_state.text - ret = osc_ctrl.pageMessage(audio_state.osc_state, text) - is_paging = (ret == False) - osc_ctrl.indicatePaging(audio_state.osc_state.client, is_paging) + if use_builtin: + ret = osc_ctrl.pageMessageBuiltin(audio_state.osc_state, text) + time.sleep(1.5) + else: + ret = osc_ctrl.pageMessage(audio_state.osc_state, text) + is_paging = (ret == False) + osc_ctrl.indicatePaging(audio_state.osc_state.client, is_paging) - # Pace this out - time.sleep(0.01) + # Pace this out + time.sleep(0.01) -def readControllerInput(audio_state, enable_local_beep): +def readControllerInput(audio_state, enable_local_beep, use_builtin): session = None first = True while session == None and audio_state.run_app == True: @@ -343,8 +347,9 @@ def readControllerInput(audio_state, enable_local_beep): if now - last_rising > 0.5: # Long hold state = PAUSE_STATE - osc_ctrl.indicateSpeech(audio_state.osc_state.client, False) - osc_ctrl.toggleBoard(audio_state.osc_state.client, False) + if not use_builtin: + osc_ctrl.indicateSpeech(audio_state.osc_state.client, False) + osc_ctrl.toggleBoard(audio_state.osc_state.client, False) #playsound(os.path.abspath("../Sounds/Noise_Off.wav")) resetAudioLocked(audio_state) @@ -355,8 +360,9 @@ def readControllerInput(audio_state, enable_local_beep): # Short hold if state == RECORD_STATE: state = PAUSE_STATE - osc_ctrl.indicateSpeech(audio_state.osc_state.client, False) - osc_ctrl.lockWorld(audio_state.osc_state.client, True) + if not use_builtin: + osc_ctrl.indicateSpeech(audio_state.osc_state.client, False) + osc_ctrl.lockWorld(audio_state.osc_state.client, True) audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s audio_state.audio_paused = True @@ -365,9 +371,10 @@ def readControllerInput(audio_state, enable_local_beep): playsound(os.path.abspath("../Sounds/Noise_Off.wav")) elif state == PAUSE_STATE: state = RECORD_STATE - osc_ctrl.indicateSpeech(audio_state.osc_state.client, True) - osc_ctrl.toggleBoard(audio_state.osc_state.client, True) - osc_ctrl.lockWorld(audio_state.osc_state.client, False) + if not use_builtin: + osc_ctrl.indicateSpeech(audio_state.osc_state.client, True) + osc_ctrl.toggleBoard(audio_state.osc_state.client, True) + osc_ctrl.lockWorld(audio_state.osc_state.client, False) resetAudioLocked(audio_state) resetDisplayLocked(audio_state) @@ -379,7 +386,8 @@ def readControllerInput(audio_state, enable_local_beep): # model should correspond to one of the Whisper models defined in # whisper/__init__.py. Examples: tiny, base, small, medium. -def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool, use_cpu: bool): +def transcribeLoop(mic: str, language: str, model: str, + enable_local_beep: bool, use_cpu: bool, use_builtin: bool): audio_state = getMicStream(mic) audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language] @@ -396,11 +404,11 @@ def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool, transcribe_audio_thd.daemon = True transcribe_audio_thd.start() - send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state]) + send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state, use_builtin]) send_audio_thd.daemon = True send_audio_thd.start() - controller_input_thd = threading.Thread(target = readControllerInput, args = [audio_state, enable_local_beep]) + controller_input_thd = threading.Thread(target = readControllerInput, args = [audio_state, enable_local_beep, use_builtin]) controller_input_thd.daemon = True controller_input_thd.start() @@ -443,6 +451,7 @@ if __name__ == "__main__": parser.add_argument("--cols", type=int, help="The number of columns on the board") parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm") parser.add_argument("--cpu", type=int, help="If set to 1, use CPU instead of GPU") + parser.add_argument("--use_builtin", type=int, help="If set to 1, use the text box built into the game.") args = parser.parse_args() if not args.mic: @@ -470,11 +479,16 @@ if __name__ == "__main__": else: args.cpu = False + if args.use_builtin == 1: + args.use_builtin = True + else: + args.use_builtin = False + generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char) generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync) generate_utils.config.BOARD_ROWS = int(args.rows) generate_utils.config.BOARD_COLS = int(args.cols) transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep, - args.cpu) + args.cpu, args.use_builtin) |
