diff options
Diffstat (limited to 'Scripts/transcribe.py')
| -rw-r--r-- | Scripts/transcribe.py | 31 |
1 files changed, 20 insertions, 11 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 48426e2..0f7ae37 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -77,13 +77,14 @@ class AudioState: osc_client = osc_ctrl.getClient() -def dumpMicDevices(audio_state): - info = audio_state.p.get_host_api_info_by_index(0) +def dumpMicDevices(): + p = pyaudio.PyAudio() + info = p.get_host_api_info_by_index(0) numdevices = info.get('deviceCount') for i in range(0, numdevices): - if (audio_state.p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0: - device_name = audio_state.p.get_device_info_by_host_api_device_index(0, i).get('name') + if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0: + device_name = p.get_device_info_by_host_api_device_index(0, i).get('name') print("Input Device id ", i, " - ", device_name) def onAudioFramesAvailable( @@ -119,7 +120,7 @@ def getMicStream(which_mic): audio_state.p = pyaudio.PyAudio() print("Finding mic {}...".format(which_mic)) - dumpMicDevices(audio_state) + dumpMicDevices() got_match = False device_index = -1 focusrite_str = "Focusrite" @@ -318,16 +319,20 @@ def readControllerInput(audio_state): audio_state.drop_transcription = True audio_state.audio_paused = False -def transcribeLoop(mic: str, language: str): +# model should correspond to one of the Whisper models defined in +# whisper/__init__.py. Examples: tiny, base, small, medium. +def transcribeLoop(mic: str, language: str, model: str): audio_state = getMicStream(mic) audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language] print("Safe to start talking") - #model = whisper.load_model("tiny") - #model = whisper.load_model("base") - model = whisper.load_model("small") - #model = whisper.load_model("medium") + abspath = os.path.abspath(__file__) + dname = os.path.dirname(abspath) + model_root = os.path.join(dname, "Models") + + print("Model {} will be saved to {}".format(model, model_root)) + model = whisper.load_model(model, download_root=model_root) transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state, model]) transcribe_audio_thd.daemon = True @@ -369,6 +374,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index") parser.add_argument("--language", type=str, help="Which language to use. Ex: english, japanese, chinese, french, german.") + parser.add_argument("--model", type=str, help="Which AI model to use. Ex: tiny, base, small, medium") args = parser.parse_args() if not args.mic: @@ -377,5 +383,8 @@ if __name__ == "__main__": if not args.language: args.language = "english" - transcribeLoop(args.mic, args.language) + if not args.model: + args.language = "base" + + transcribeLoop(args.mic, args.language, args.model) |
