diff options
| author | yum <yum.food.vr@gmail.com> | 2022-12-18 19:11:47 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2022-12-18 19:11:47 -0800 |
| commit | b7b2b112a106138d99dda3f259620b350c896f1a (patch) | |
| tree | ec30bc9daeb0fc4b75c488f675cf70d55f0a1f85 /Scripts | |
| parent | 79f1b48042cbb724892301afdee842fb33ab2b37 (diff) | |
Add ability to select model
* icon now works when pinned to taskbar
* add model selection
* add script to dump mic devices
* whisper models now download into the virtual environment
Diffstat (limited to 'Scripts')
| -rw-r--r-- | Scripts/dump_mic_devices.py | 7 | ||||
| -rw-r--r-- | Scripts/emotes.py | 1 | ||||
| -rw-r--r-- | Scripts/transcribe.py | 31 |
3 files changed, 28 insertions, 11 deletions
diff --git a/Scripts/dump_mic_devices.py b/Scripts/dump_mic_devices.py new file mode 100644 index 0000000..2b712cf --- /dev/null +++ b/Scripts/dump_mic_devices.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 + +from transcribe import dumpMicDevices + +if __name__ == "__main__": + dumpMicDevices() + diff --git a/Scripts/emotes.py b/Scripts/emotes.py index b922fdf..0a4ed01 100644 --- a/Scripts/emotes.py +++ b/Scripts/emotes.py @@ -31,6 +31,7 @@ IMG_TEX_DATA.append(("Images/Emotes/based.png", "based")) IMG_TEX_DATA.append(("Images/Emotes/chad.png", "chad")) IMG_TEX_DATA.append(("Images/Emotes/aware.png", "aware")) IMG_TEX_DATA.append(("Images/Emotes/girl.png", "girl")) +IMG_TEX_DATA = [] IMG_TEX_KEYWORD_TO_COORD = {} for i in range(0, len(IMG_TEX_DATA)): diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 48426e2..0f7ae37 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -77,13 +77,14 @@ class AudioState: osc_client = osc_ctrl.getClient() -def dumpMicDevices(audio_state): - info = audio_state.p.get_host_api_info_by_index(0) +def dumpMicDevices(): + p = pyaudio.PyAudio() + info = p.get_host_api_info_by_index(0) numdevices = info.get('deviceCount') for i in range(0, numdevices): - if (audio_state.p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0: - device_name = audio_state.p.get_device_info_by_host_api_device_index(0, i).get('name') + if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0: + device_name = p.get_device_info_by_host_api_device_index(0, i).get('name') print("Input Device id ", i, " - ", device_name) def onAudioFramesAvailable( @@ -119,7 +120,7 @@ def getMicStream(which_mic): audio_state.p = pyaudio.PyAudio() print("Finding mic {}...".format(which_mic)) - dumpMicDevices(audio_state) + dumpMicDevices() got_match = False device_index = -1 focusrite_str = "Focusrite" @@ -318,16 +319,20 @@ def readControllerInput(audio_state): audio_state.drop_transcription = True audio_state.audio_paused = False -def transcribeLoop(mic: str, language: str): +# model should correspond to one of the Whisper models defined in +# whisper/__init__.py. Examples: tiny, base, small, medium. +def transcribeLoop(mic: str, language: str, model: str): audio_state = getMicStream(mic) audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language] print("Safe to start talking") - #model = whisper.load_model("tiny") - #model = whisper.load_model("base") - model = whisper.load_model("small") - #model = whisper.load_model("medium") + abspath = os.path.abspath(__file__) + dname = os.path.dirname(abspath) + model_root = os.path.join(dname, "Models") + + print("Model {} will be saved to {}".format(model, model_root)) + model = whisper.load_model(model, download_root=model_root) transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state, model]) transcribe_audio_thd.daemon = True @@ -369,6 +374,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index") parser.add_argument("--language", type=str, help="Which language to use. Ex: english, japanese, chinese, french, german.") + parser.add_argument("--model", type=str, help="Which AI model to use. Ex: tiny, base, small, medium") args = parser.parse_args() if not args.mic: @@ -377,5 +383,8 @@ if __name__ == "__main__": if not args.language: args.language = "english" - transcribeLoop(args.mic, args.language) + if not args.model: + args.language = "base" + + transcribeLoop(args.mic, args.language, args.model) |
