diff options
| -rw-r--r-- | Animations/TaSTT_Backplate_Resize_00_to_50.anim | 8 | ||||
| -rw-r--r-- | Animations/TaSTT_Backplate_Resize_100_to_50.anim | 8 | ||||
| -rw-r--r-- | Animations/TaSTT_Backplate_Resize_50_to_00.anim | 8 | ||||
| -rw-r--r-- | Animations/TaSTT_Backplate_Resize_50_to_100.anim | 8 | ||||
| -rw-r--r-- | Animations/TaSTT_Toggle_Off.anim | 98 | ||||
| -rw-r--r-- | Animations/TaSTT_Toggle_On.anim | 98 | ||||
| -rw-r--r-- | generate_params.py | 3 | ||||
| -rw-r--r-- | generate_utils.py | 3 | ||||
| -rw-r--r-- | libunity.py | 2 | ||||
| -rw-r--r-- | osc_ctrl.py | 45 | ||||
| -rw-r--r-- | template.anim.txt | 4 | ||||
| -rw-r--r-- | transcribe.py | 390 |
12 files changed, 448 insertions, 227 deletions
diff --git a/Animations/TaSTT_Backplate_Resize_00_to_50.anim b/Animations/TaSTT_Backplate_Resize_00_to_50.anim index d3dd68a..561c237 100644 --- a/Animations/TaSTT_Backplate_Resize_00_to_50.anim +++ b/Animations/TaSTT_Backplate_Resize_00_to_50.anim @@ -33,7 +33,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: m_IsActive - path: CustomSTT + path: TaSTT classID: 1 script: {fileID: 0} - curve: @@ -70,7 +70,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: blendShape.TaSTT_Hide_Backplate - path: CustomSTT + path: TaSTT classID: 137 script: {fileID: 0} m_PPtrCurves: [] @@ -133,7 +133,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: m_IsActive - path: CustomSTT + path: TaSTT classID: 1 script: {fileID: 0} - curve: @@ -170,7 +170,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: blendShape.TaSTT_Hide_Backplate - path: CustomSTT + path: TaSTT classID: 137 script: {fileID: 0} m_EulerEditorCurves: [] diff --git a/Animations/TaSTT_Backplate_Resize_100_to_50.anim b/Animations/TaSTT_Backplate_Resize_100_to_50.anim index e2cc887..b2bd44d 100644 --- a/Animations/TaSTT_Backplate_Resize_100_to_50.anim +++ b/Animations/TaSTT_Backplate_Resize_100_to_50.anim @@ -51,7 +51,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: m_IsActive - path: CustomSTT + path: TaSTT classID: 1 script: {fileID: 0} - curve: @@ -88,7 +88,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: blendShape.TaSTT_Hide_Backplate - path: CustomSTT + path: TaSTT classID: 137 script: {fileID: 0} m_PPtrCurves: [] @@ -169,7 +169,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: m_IsActive - path: CustomSTT + path: TaSTT classID: 1 script: {fileID: 0} - curve: @@ -206,7 +206,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: blendShape.TaSTT_Hide_Backplate - path: CustomSTT + path: TaSTT classID: 137 script: {fileID: 0} m_EulerEditorCurves: [] diff --git a/Animations/TaSTT_Backplate_Resize_50_to_00.anim b/Animations/TaSTT_Backplate_Resize_50_to_00.anim index 920f6b2..a38a9bb 100644 --- a/Animations/TaSTT_Backplate_Resize_50_to_00.anim +++ b/Animations/TaSTT_Backplate_Resize_50_to_00.anim @@ -51,7 +51,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: blendShape.TaSTT_Hide_Backplate - path: CustomSTT + path: TaSTT classID: 137 script: {fileID: 0} - curve: @@ -97,7 +97,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: m_IsActive - path: CustomSTT + path: TaSTT classID: 1 script: {fileID: 0} m_PPtrCurves: [] @@ -178,7 +178,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: blendShape.TaSTT_Hide_Backplate - path: CustomSTT + path: TaSTT classID: 137 script: {fileID: 0} - curve: @@ -224,7 +224,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: m_IsActive - path: CustomSTT + path: TaSTT classID: 1 script: {fileID: 0} m_EulerEditorCurves: [] diff --git a/Animations/TaSTT_Backplate_Resize_50_to_100.anim b/Animations/TaSTT_Backplate_Resize_50_to_100.anim index 8160721..3406b7d 100644 --- a/Animations/TaSTT_Backplate_Resize_50_to_100.anim +++ b/Animations/TaSTT_Backplate_Resize_50_to_100.anim @@ -51,7 +51,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: blendShape.TaSTT_Hide_Backplate - path: CustomSTT + path: TaSTT classID: 137 script: {fileID: 0} - curve: @@ -88,7 +88,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: m_IsActive - path: CustomSTT + path: TaSTT classID: 1 script: {fileID: 0} m_PPtrCurves: [] @@ -169,7 +169,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: blendShape.TaSTT_Hide_Backplate - path: CustomSTT + path: TaSTT classID: 137 script: {fileID: 0} - curve: @@ -206,7 +206,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: m_IsActive - path: CustomSTT + path: TaSTT classID: 1 script: {fileID: 0} m_EulerEditorCurves: [] diff --git a/Animations/TaSTT_Toggle_Off.anim b/Animations/TaSTT_Toggle_Off.anim new file mode 100644 index 0000000..69b487a --- /dev/null +++ b/Animations/TaSTT_Toggle_Off.anim @@ -0,0 +1,98 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!74 &7400000 +AnimationClip: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInstance: {fileID: 0} + m_PrefabAsset: {fileID: 0} + m_Name: TaSTT_Toggle_Off + serializedVersion: 6 + m_Legacy: 0 + m_Compressed: 0 + m_UseHighQualityCurve: 1 + m_RotationCurves: [] + m_CompressedRotationCurves: [] + m_EulerCurves: [] + m_PositionCurves: [] + m_ScaleCurves: [] + m_FloatCurves: + - curve: + serializedVersion: 2 + m_Curve: + - serializedVersion: 3 + time: 0 + value: 0 + inSlope: Infinity + outSlope: Infinity + tangentMode: 103 + weightedMode: 0 + inWeight: 0 + outWeight: 0 + m_PreInfinity: 2 + m_PostInfinity: 2 + m_RotationOrder: 4 + attribute: m_IsActive + path: TaSTT + classID: 1 + script: {fileID: 0} + m_PPtrCurves: [] + m_SampleRate: 60 + m_WrapMode: 0 + m_Bounds: + m_Center: {x: 0, y: 0, z: 0} + m_Extent: {x: 0, y: 0, z: 0} + m_ClipBindingConstant: + genericBindings: + - serializedVersion: 2 + path: 1409323812 + attribute: 2086281974 + script: {fileID: 0} + typeID: 1 + customType: 0 + isPPtrCurve: 0 + pptrCurveMapping: [] + m_AnimationClipSettings: + serializedVersion: 2 + m_AdditiveReferencePoseClip: {fileID: 0} + m_AdditiveReferencePoseTime: 0 + m_StartTime: 0 + m_StopTime: 0 + m_OrientationOffsetY: 0 + m_Level: 0 + m_CycleOffset: 0 + m_HasAdditiveReferencePose: 0 + m_LoopTime: 1 + m_LoopBlend: 0 + m_LoopBlendOrientation: 0 + m_LoopBlendPositionY: 0 + m_LoopBlendPositionXZ: 0 + m_KeepOriginalOrientation: 0 + m_KeepOriginalPositionY: 1 + m_KeepOriginalPositionXZ: 0 + m_HeightFromFeet: 0 + m_Mirror: 0 + m_EditorCurves: + - curve: + serializedVersion: 2 + m_Curve: + - serializedVersion: 3 + time: 0 + value: 0 + inSlope: Infinity + outSlope: Infinity + tangentMode: 103 + weightedMode: 0 + inWeight: 0 + outWeight: 0 + m_PreInfinity: 2 + m_PostInfinity: 2 + m_RotationOrder: 4 + attribute: m_IsActive + path: TaSTT + classID: 1 + script: {fileID: 0} + m_EulerEditorCurves: [] + m_HasGenericRootTransform: 0 + m_HasMotionFloatCurves: 0 + m_Events: [] diff --git a/Animations/TaSTT_Toggle_On.anim b/Animations/TaSTT_Toggle_On.anim new file mode 100644 index 0000000..874daf1 --- /dev/null +++ b/Animations/TaSTT_Toggle_On.anim @@ -0,0 +1,98 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!74 &7400000 +AnimationClip: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInstance: {fileID: 0} + m_PrefabAsset: {fileID: 0} + m_Name: TaSTT_Toggle_On + serializedVersion: 6 + m_Legacy: 0 + m_Compressed: 0 + m_UseHighQualityCurve: 1 + m_RotationCurves: [] + m_CompressedRotationCurves: [] + m_EulerCurves: [] + m_PositionCurves: [] + m_ScaleCurves: [] + m_FloatCurves: + - curve: + serializedVersion: 2 + m_Curve: + - serializedVersion: 3 + time: 0 + value: 1 + inSlope: Infinity + outSlope: Infinity + tangentMode: 103 + weightedMode: 0 + inWeight: 0 + outWeight: 0 + m_PreInfinity: 2 + m_PostInfinity: 2 + m_RotationOrder: 4 + attribute: m_IsActive + path: TaSTT + classID: 1 + script: {fileID: 0} + m_PPtrCurves: [] + m_SampleRate: 60 + m_WrapMode: 0 + m_Bounds: + m_Center: {x: 0, y: 0, z: 0} + m_Extent: {x: 0, y: 0, z: 0} + m_ClipBindingConstant: + genericBindings: + - serializedVersion: 2 + path: 1409323812 + attribute: 2086281974 + script: {fileID: 0} + typeID: 1 + customType: 0 + isPPtrCurve: 0 + pptrCurveMapping: [] + m_AnimationClipSettings: + serializedVersion: 2 + m_AdditiveReferencePoseClip: {fileID: 0} + m_AdditiveReferencePoseTime: 0 + m_StartTime: 0 + m_StopTime: 0 + m_OrientationOffsetY: 0 + m_Level: 0 + m_CycleOffset: 0 + m_HasAdditiveReferencePose: 0 + m_LoopTime: 1 + m_LoopBlend: 0 + m_LoopBlendOrientation: 0 + m_LoopBlendPositionY: 0 + m_LoopBlendPositionXZ: 0 + m_KeepOriginalOrientation: 0 + m_KeepOriginalPositionY: 1 + m_KeepOriginalPositionXZ: 0 + m_HeightFromFeet: 0 + m_Mirror: 0 + m_EditorCurves: + - curve: + serializedVersion: 2 + m_Curve: + - serializedVersion: 3 + time: 0 + value: 1 + inSlope: Infinity + outSlope: Infinity + tangentMode: 103 + weightedMode: 0 + inWeight: 0 + outWeight: 0 + m_PreInfinity: 2 + m_PostInfinity: 2 + m_RotationOrder: 4 + attribute: m_IsActive + path: TaSTT + classID: 1 + script: {fileID: 0} + m_EulerEditorCurves: [] + m_HasGenericRootTransform: 0 + m_HasMotionFloatCurves: 0 + m_Events: [] diff --git a/generate_params.py b/generate_params.py index 665272b..d91e801 100644 --- a/generate_params.py +++ b/generate_params.py @@ -60,6 +60,9 @@ print(generate_utils.replaceMacros(BOOL_PARAM, params)) params["PARAM_NAME"] = generate_utils.getHipToggleParam() print(generate_utils.replaceMacros(BOOL_PARAM, params)) +params["PARAM_NAME"] = generate_utils.getToggleParam() +print(generate_utils.replaceMacros(BOOL_PARAM, params)) + for i in range(0, generate_utils.NUM_LAYERS): params["PARAM_NAME"] = generate_utils.getLayerParam(i) print(generate_utils.replaceMacros(INT_PARAM, params)) diff --git a/generate_utils.py b/generate_utils.py index 68301fa..87e8e4f 100644 --- a/generate_utils.py +++ b/generate_utils.py @@ -42,6 +42,9 @@ def getHipToggleParam(): def getHandToggleParam(): return "TaSTT_Hand_Toggle" +def getToggleParam(): + return "TaSTT_Toggle" + # Each layer controls a group of cells. There's only one letter per layer, thus # this is also the name of the parameter which sets the letter for a layer. def getLayerParam(which_layer): diff --git a/libunity.py b/libunity.py index 1ae4f9b..9d7fac0 100644 --- a/libunity.py +++ b/libunity.py @@ -421,7 +421,7 @@ class UnityAnimator(): if anchor in self.id_mapping.keys(): return self.id_mapping[anchor] - new_id = allocateId(classId(anchor)) + new_id = self.allocateId(classId(anchor)) self.id_mapping[anchor] = new_id return new_id diff --git a/osc_ctrl.py b/osc_ctrl.py index ec3515d..40cb7d1 100644 --- a/osc_ctrl.py +++ b/osc_ctrl.py @@ -25,12 +25,8 @@ def usage(): print("python3 -m pip install python-osc") print("python3 ./osc_ctrl.py") -parser = argparse.ArgumentParser() -parser.add_argument("-i", default="127.0.0.1", help="OSC server IP") -parser.add_argument("-p", type=int, default=9000, help="OSC server port") -args = parser.parse_args() - -client = udp_client.SimpleUDPClient(args.i, args.p) +def getClient(ip = "127.0.0.1", port = 9000): + return udp_client.SimpleUDPClient(ip, port) class EvilGlobalState(): # Mapping from ascii char to encoded byte. @@ -73,7 +69,7 @@ def encodeMessage(lines): result += [state.encoding[' ']] * (BOARD_COLS - len(line)) return result -def updateCell(cell_idx, letter_encoded, s0, s1, s2, s3): +def updateCell(client, cell_idx, letter_encoded, s0, s1, s2, s3): addr="/avatar/parameters/" + getLayerParam(cell_idx) client.send_message(addr, letter_encoded) @@ -89,19 +85,19 @@ def updateCell(cell_idx, letter_encoded, s0, s1, s2, s3): addr="/avatar/parameters/" + getSelectParam(cell_idx, 3) client.send_message(addr, s3) -def enable(): +def enable(client): addr="/avatar/parameters/" + getEnableParam() client.send_message(addr, True) -def disable(): +def disable(client): addr="/avatar/parameters/" + getEnableParam() client.send_message(addr, False) # Send a cell all at once. # `which_cell` is an integer in the range [0,2**INDEX_BITS). -def sendMessageCellDiscrete(msg_cell, which_cell): +def sendMessageCellDiscrete(client, msg_cell, which_cell): # Disable each layer. - disable() + disable(client) time.sleep(CELL_TX_TIME_S / 3.0) @@ -115,13 +111,13 @@ def sendMessageCellDiscrete(msg_cell, which_cell): # Seek each layer to the current cell. for i in range(0, len(msg_cell)): - updateCell(i, msg_cell[i], s0, s1, s2, s3) + updateCell(client, i, msg_cell[i], s0, s1, s2, s3) # Wait for convergence. time.sleep(CELL_TX_TIME_S / 3.0) # Enable each layer. - enable() + enable(client) # Wait for convergence. time.sleep(CELL_TX_TIME_S / 3.0) @@ -152,7 +148,7 @@ def splitMessage(msg): line += " " + word continue - print("append line {}".format(line)) + #print("append line {}".format(line)) lines.append(line) line = word @@ -242,7 +238,7 @@ def resizeBoard(num_lines, tx_state, shrink_only): # Send a message to the board, but only overwrite cells that we know need to # change. # This may take multiple calls to complete. Returns True once it's done. -def sendMessageLazy(msg, tx_state): +def sendMessageLazy(client, msg, tx_state): lines = splitMessage(msg) #resizeBoard(len(lines), tx_state, shrink_only=False) @@ -277,13 +273,13 @@ def sendMessageLazy(msg, tx_state): return False empty_cells_sent += 1 - sendMessageCellDiscrete(cell_msg, cell) + sendMessageCellDiscrete(client, cell_msg, cell) tx_state.last_msg_encoded = msg_encoded #resizeBoard(len(lines), tx_state, shrink_only=True) return True -def sendMessage(msg, page_sleep_s): +def sendMessage(client, msg, page_sleep_s): lines = splitMessage(msg) msg = encodeMessage(lines) msg_len = len(msg) @@ -303,28 +299,35 @@ def sendMessage(msg, page_sleep_s): cell_end = (cell + 1) * NUM_LAYERS cell_msg = msg[cell_begin:cell_end] print("Send cell {}".format(cell)) - sendMessageCellDiscrete(cell_msg, cell) + sendMessageCellDiscrete(client, cell_msg, cell) #closeBoard() #clear() -def sendRawMessage(msg): +def sendRawMessage(client, msg): n_cells = ceil(len(msg) / NUM_LAYERS) for cell in range(0, n_cells): cell_begin = cell * NUM_LAYERS cell_end = (cell + 1) * NUM_LAYERS cell_msg = msg[cell_begin:cell_end] #print("Send cell {}".format(cell)) - sendMessageCellDiscrete(cell_msg, cell) + sendMessageCellDiscrete(client, cell_msg, cell) def clear(): sendRawMessage([state.encoding[' ']] * BOARD_ROWS * BOARD_COLS) if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-i", default="127.0.0.1", help="OSC server IP") + parser.add_argument("-p", type=int, default=9000, help="OSC server port") + args = parser.parse_args() + + client = getClient(args.i, args.p) + generateEncoding(state) tx_state = OscTxState() for line in fileinput.input(): - while not sendMessageLazy(line, tx_state): + while not sendMessageLazy(client, line, tx_state): continue clear() diff --git a/template.anim.txt b/template.anim.txt index 8f06fe1..6dccbd5 100644 --- a/template.anim.txt +++ b/template.anim.txt @@ -42,7 +42,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: material.%LETTER_SHADER_PARAM% - path: CustomSTT + path: TaSTT classID: 137 script: {fileID: 0} m_PPtrCurves: [] @@ -107,7 +107,7 @@ AnimationClip: m_PostInfinity: 2 m_RotationOrder: 4 attribute: material.%LETTER_SHADER_PARAM% - path: CustomSTT + path: TaSTT classID: 137 script: {fileID: 0} m_EulerEditorCurves: [] diff --git a/transcribe.py b/transcribe.py index d0e3574..dc36541 100644 --- a/transcribe.py +++ b/transcribe.py @@ -1,187 +1,203 @@ -#!/usr/bin/env python3
-
-import copy
-import fileinput
-import os
-import osc_ctrl
-# python3 -m pip install pyaudio
-import pyaudio
-import threading
-import time
-import wave
-# python3 -m pip install git+https://github.com/openai/whisper.git
-# python3 -m pip install torch -f https://download.pytorch.org/whl/torch_stable.html
-import whisper
-
-class AudioState:
- CHUNK = 1024
- FORMAT = pyaudio.paInt16
- CHANNELS = 1
- # This matches the framerate expected by whisper.
- RATE = 16000
-
- # The maximum length that recordAudio() will put into frames before it
- # starts dropping from the start.
- MAX_LENGTH_S = 90
- # The minimum length that recordAudio() will wait for before saving audio.
- MIN_LENGTH_S = 3
-
- # PyAudio object
- p = None
-
- # PyAudio stream object
- stream = None
-
- frames = []
- frames_lock = threading.Lock()
-
- text = ""
- text_lock = threading.Lock()
-
- record_audio = True
- transcribe_audio = True
- send_audio = True
-
-def getMicStream():
- audio_state = AudioState()
- audio_state.p = pyaudio.PyAudio()
-
- info = audio_state.p.get_host_api_info_by_index(0)
- numdevices = info.get('deviceCount')
-
- print("Finding index mic...")
- got_match = False
- device_index = -1
- mic_str = "Focusrite"
- index_str = "Digital Audio Interface"
- target_str = mic_str
- while got_match == False:
- for i in range(0, numdevices):
- if (audio_state.p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
- device_name = audio_state.p.get_device_info_by_host_api_device_index(0, i).get('name')
- print("Input Device id ", i, " - ", device_name)
- if target_str in device_name:
- print("Got match: {}".format(device_name))
- device_index = i
- got_match = True
- break
- if got_match == False:
- print("No match, sleeping")
- time.sleep(3)
-
- audio_state.stream = audio_state.p.open(format=audio_state.FORMAT,
- channels=audio_state.CHANNELS, rate=audio_state.RATE,
- input=True, frames_per_buffer=audio_state.CHUNK,
- input_device_index=device_index)
-
- return audio_state
-
-# Continuously records audio as long as audio_state.record_audio is set.
-def recordAudio(audio_state):
- print("Recording audio")
- while audio_state.record_audio:
- data = audio_state.stream.read(audio_state.CHUNK)
-
- audio_state.frames_lock.acquire()
- audio_state.frames.append(data)
- max_frames = int(audio_state.RATE * audio_state.MAX_LENGTH_S / audio_state.CHUNK)
- if len(audio_state.frames) > max_frames:
- audio_state.frames = audio_state.frames[-1 * max_frames :]
- audio_state.frames_lock.release()
-
- print("Done recording")
-
-# Saves audio. recordAudio() may continue running while this takes place.
-def saveAudio(audio_state, filename):
- min_frames = int(audio_state.RATE * audio_state.MIN_LENGTH_S / audio_state.CHUNK)
- if len(audio_state.frames) < min_frames:
- return
-
- wf = wave.open(filename, 'wb')
- wf.setnchannels(audio_state.CHANNELS)
- wf.setsampwidth(audio_state.p.get_sample_size(audio_state.FORMAT))
- wf.setframerate(audio_state.RATE)
-
- audio_state.frames_lock.acquire()
- frames = copy.deepcopy(audio_state.frames)
- audio_state.frames_lock.release()
-
- wf.writeframes(b''.join(frames))
- wf.close()
-
-def resetAudio(audio_state):
- audio_state.frames_lock.acquire()
- audio_state.frames = []
- audio_state.frames_lock.release()
-
-# Transcribe the audio recorded in a file.
-def transcribe(model, filename):
- result = whisper.transcribe(model=model, audio=filename, language="en")
- return result["text"]
-
-def transcribeAudio(audio_state, model):
- while audio_state.transcribe_audio == True:
- saveAudio(audio_state, "audio.wav")
-
- if not os.path.isfile("audio.wav"):
- time.sleep(0.1)
- continue
-
- print("Beginning transcription")
- text = transcribe(model, "audio.wav")
-
- audio_state.text_lock.acquire()
- audio_state.text = text
- audio_state.text_lock.release()
-
- print("Transcription: {}".format(audio_state.text))
-
- # Pace this out
- time.sleep(0.2)
-
-def sendAudio(audio_state):
- tx_state = osc_ctrl.OscTxState()
- while audio_state.send_audio == True:
- audio_state.text_lock.acquire()
- text = copy.deepcopy(audio_state.text)
- audio_state.text_lock.release()
-
- osc_ctrl.sendMessageLazy(text, tx_state)
-
- # Pace this out
- time.sleep(0.05)
-
-if __name__ == "__main__":
- if os.path.isfile("audio.wav"):
- os.remove("audio.wav")
-
- audio_state = getMicStream()
-
- record_audio_thd = threading.Thread(target = recordAudio, args = [audio_state])
- record_audio_thd.daemon = True
- record_audio_thd.start()
-
- print("Safe to start talking")
-
- model = whisper.load_model("base")
-
- transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state, model])
- transcribe_audio_thd.daemon = True
- transcribe_audio_thd.start()
-
- #send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state])
- #send_audio_thd.daemon = True
- #send_audio_thd.start()
-
- print("Press enter to start a new message")
- for line in fileinput.input():
- resetAudio(audio_state)
- if "exit" in line or "quit" in line:
- break
-
- print("Joining threads")
- audio_state.record_audio = False
- audio_state.transcribe_audio = False
- record_audio_thd.join()
- transcribe_audio_thd.join()
-
+#!/usr/bin/env python3 + +import argparse +import copy +import os +import osc_ctrl +# python3 -m pip install pyaudio +import pyaudio +import sys +import threading +import time +import wave +# python3 -m pip install git+https://github.com/openai/whisper.git +# python3 -m pip install torch -f https://download.pytorch.org/whl/torch_stable.html +from whisper import transcribe as whisper_transcribe +from whisper import load_model as whisper_load_model + +class AudioState: + CHUNK = 1024 + FORMAT = pyaudio.paInt16 + CHANNELS = 1 + # This matches the framerate expected by whisper. + RATE = 16000 + + # The maximum length that recordAudio() will put into frames before it + # starts dropping from the start. + MAX_LENGTH_S = 90 + # The minimum length that recordAudio() will wait for before saving audio. + MIN_LENGTH_S = 3 + + # PyAudio object + p = None + + # PyAudio stream object + stream = None + + frames = [] + frames_lock = threading.Lock() + + text = "" + text_lock = threading.Lock() + + record_audio = True + transcribe_audio = True + send_audio = True + + osc_client = osc_ctrl.getClient() + +def getMicStream(which_mic): + audio_state = AudioState() + audio_state.p = pyaudio.PyAudio() + + print("Finding index mic...") + got_match = False + device_index = -1 + focusrite_str = "Focusrite" + index_str = "Digital Audio Interface" + if which_mic == "index": + target_str = index_str + elif which_mic == "focusrite": + target_str = focusrite_str + else: + raise Exception("Unrecognized mic requested: {}".format(which_mic)) + while got_match == False: + info = audio_state.p.get_host_api_info_by_index(0) + numdevices = info.get('deviceCount') + + for i in range(0, numdevices): + if (audio_state.p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0: + device_name = audio_state.p.get_device_info_by_host_api_device_index(0, i).get('name') + print("Input Device id ", i, " - ", device_name) + if target_str in device_name: + print("Got match: {}".format(device_name)) + device_index = i + got_match = True + break + if got_match == False: + print("No match, sleeping") + time.sleep(3) + + audio_state.stream = audio_state.p.open(format=audio_state.FORMAT, + channels=audio_state.CHANNELS, rate=audio_state.RATE, + input=True, frames_per_buffer=audio_state.CHUNK, + input_device_index=device_index) + + return audio_state + +# Continuously records audio as long as audio_state.record_audio is set. +def recordAudio(audio_state): + print("Recording audio") + while audio_state.record_audio: + data = audio_state.stream.read(audio_state.CHUNK) + + audio_state.frames_lock.acquire() + audio_state.frames.append(data) + max_frames = int(audio_state.RATE * audio_state.MAX_LENGTH_S / audio_state.CHUNK) + if len(audio_state.frames) > max_frames: + audio_state.frames = audio_state.frames[-1 * max_frames :] + audio_state.frames_lock.release() + + print("Done recording") + +# Saves audio. recordAudio() may continue running while this takes place. +def saveAudio(audio_state, filename): + min_frames = int(audio_state.RATE * audio_state.MIN_LENGTH_S / audio_state.CHUNK) + if len(audio_state.frames) < min_frames: + return + + wf = wave.open(filename, 'wb') + wf.setnchannels(audio_state.CHANNELS) + wf.setsampwidth(audio_state.p.get_sample_size(audio_state.FORMAT)) + wf.setframerate(audio_state.RATE) + + audio_state.frames_lock.acquire() + frames = copy.deepcopy(audio_state.frames) + audio_state.frames_lock.release() + + wf.writeframes(b''.join(frames)) + wf.close() + +def resetAudio(audio_state): + audio_state.frames_lock.acquire() + audio_state.frames = [] + audio_state.frames_lock.release() + +# Transcribe the audio recorded in a file. +def transcribe(model, filename): + result = whisper_transcribe(model=model, audio=filename, language="en") + return result["text"] + +def transcribeAudio(audio_state, model): + while audio_state.transcribe_audio == True: + saveAudio(audio_state, "audio.wav") + + if not os.path.isfile("audio.wav"): + time.sleep(0.1) + continue + + print("Beginning transcription") + text = transcribe(model, "audio.wav") + + audio_state.text_lock.acquire() + audio_state.text = text + audio_state.text_lock.release() + + print("Transcription: {}".format(audio_state.text)) + + # Pace this out + time.sleep(0.2) + +def sendAudio(audio_state): + tx_state = osc_ctrl.OscTxState() + while audio_state.send_audio == True: + audio_state.text_lock.acquire() + text = copy.deepcopy(audio_state.text) + audio_state.text_lock.release() + + osc_ctrl.sendMessageLazy(audio_state.osc_client, text, tx_state) + + # Pace this out + time.sleep(0.05) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index") + args = parser.parse_args() + + if not args.mic: + args.mic = "index" + + if os.path.isfile("audio.wav"): + os.remove("audio.wav") + + audio_state = getMicStream(args.mic) + + record_audio_thd = threading.Thread(target = recordAudio, args = [audio_state]) + record_audio_thd.daemon = True + record_audio_thd.start() + + print("Safe to start talking") + + model = whisper_load_model("base") + + transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state, model]) + transcribe_audio_thd.daemon = True + transcribe_audio_thd.start() + + send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state]) + send_audio_thd.daemon = True + send_audio_thd.start() + + print("Press enter to start a new message") + for line in sys.stdin: + resetAudio(audio_state) + if "exit" in line or "quit" in line: + break + + print("Joining threads") + audio_state.record_audio = False + audio_state.transcribe_audio = False + record_audio_thd.join() + transcribe_audio_thd.join() + |
