diff options
| author | yum <yum.food.vr@gmail.com> | 2022-10-27 16:00:22 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2022-10-27 16:00:22 -0700 |
| commit | fd7ea2f72a486888c30726a2482fcc1f7fce9378 (patch) | |
| tree | cae0ee99fca59d65062074a546c3975404f7ad89 | |
| parent | eefa14c431efa4e3bc16cafbcb004e41622c2411 (diff) | |
Add fast clear animation
The old clear mechanism would write an empty cell in every layer,
which would take (0.3 seconds) * (11 layers) == about 3 seconds.
The new mechanism drives an animation which overwrites every character
slot simultaneously, taking only 0.1 seconds. A nice ~30x speedup.
* Fix the transcription exponential backoff logic. Saying new things
will reset the delay to the minimum again.
* Clearing the board will also reset the transcription delay back to
the minimum.
* Tune the noise detection minimum to 0.2 instead of 0.1. Speaking
softly into the mic seems to fail to exceed the 0.1 threshold pretty
often.
| -rw-r--r-- | TaSTT_Menu.asset | 3 | ||||
| -rw-r--r-- | dev_cheatsheat.txt | 5 | ||||
| -rw-r--r-- | generate_params.py | 3 | ||||
| -rw-r--r-- | generate_utils.py | 12 | ||||
| -rw-r--r-- | libtastt.py | 90 | ||||
| -rw-r--r-- | osc_ctrl.py | 44 | ||||
| -rw-r--r-- | transcribe.py | 10 |
7 files changed, 128 insertions, 39 deletions
diff --git a/TaSTT_Menu.asset b/TaSTT_Menu.asset index bd38fc5..3f8eeb5 100644 --- a/TaSTT_Menu.asset +++ b/TaSTT_Menu.asset @@ -10,7 +10,7 @@ MonoBehaviour: m_Enabled: 1 m_EditorHideFlags: 0 m_Script: {fileID: -340790334, guid: 67cc4cb7839cd3741b63733d5adf0442, type: 3} - m_Name: Menu + m_Name: TaSTT_Menu m_EditorClassIdentifier: controls: - name: Show @@ -43,4 +43,3 @@ MonoBehaviour: subMenu: {fileID: 0} subParameters: [] labels: [] - diff --git a/dev_cheatsheat.txt b/dev_cheatsheat.txt index db9697f..cf9f39a 100644 --- a/dev_cheatsheat.txt +++ b/dev_cheatsheat.txt @@ -10,9 +10,10 @@ Combining TaSTT_fx with a normal animator date rm -rf generated/animations ./libunity.py guid_map --project_root=$(cd ..; pwd) --save_to guid.map -#./libtastt.py gen_anims +./libtastt.py gen_anims ./libtastt.py gen_fx > TaSTT_fx.controller -./libunity.py merge --fx0 ../FXGeneric.controller --fx1 ../FXSpecific.controller > FX0.controller +#./libunity.py merge --fx0 ../FXGeneric.controller --fx1 ../FXSpecific.controller > FX0.controller +cp ../FXGeneric.controller FX0.controller ./libunity.py add_toggle --fx0 FX0.controller > FX1.controller ./libunity.py merge --fx0 FX1.controller --fx1 TaSTT_fx.controller > FX2.controller ./libunity.py set_noop_anim --fx0 FX2.controller --guid_map guid.map > FX3.controller diff --git a/generate_params.py b/generate_params.py index 23a2513..d09be63 100644 --- a/generate_params.py +++ b/generate_params.py @@ -69,6 +69,9 @@ params["SAVED"] = "0" params["PARAM_NAME"] = generate_utils.getLockWorldParam() print(generate_utils.replaceMacros(BOOL_PARAM, params)) +params["PARAM_NAME"] = generate_utils.getClearBoardParam() +print(generate_utils.replaceMacros(BOOL_PARAM, params)) + for i in range(0, generate_utils.NUM_LAYERS): params["PARAM_NAME"] = generate_utils.getLayerParam(i) print(generate_utils.replaceMacros(INT_PARAM, params)) diff --git a/generate_utils.py b/generate_utils.py index 68a455b..bf7e6c7 100644 --- a/generate_utils.py +++ b/generate_utils.py @@ -54,6 +54,10 @@ def getSpeechNoiseToggleParam(): def getSpeechNoiseEnableParam(): return "TaSTT_Speech_Noise_Enable" +# When this is set to true, the board clears. +def getClearBoardParam(): + return "TaSTT_Clear_Board" + def getLockWorldParam(): return "TaSTT_Lock_World" @@ -112,9 +116,15 @@ def getShaderParam(which_layer, s0, s1, s2, s3): return getShaderParamByRowCol(row, col) -def getAnimationName(row, col, letter): +# The name of the animation which writes `letter` at a specific position in the +# display. +def getLetterAnimationName(row, col, letter): return "R%02dC%02dL%02d" % (row, col, letter) +# The name of the animation which clears the entire board. +def getClearAnimationName(): + return "TaSTT_Clear_Board" + def getAnimationNameByLayerAndIndex(which_layer, s0, s1, s2, s3, letter): index = getBoardIndex(which_layer, s0, s1, s2, s3) diff --git a/libtastt.py b/libtastt.py index 34ac61a..a9b9fe8 100644 --- a/libtastt.py +++ b/libtastt.py @@ -8,6 +8,10 @@ import pickle import sys import typing +# TODO(yum) we're getting the encoding scheme from here, but I think it should +# be in a different layer. +import osc_ctrl + LETTER_ANIMATION_TEMPLATE = """ %YAML 1.1 %TAG !u! tag:unity3d.com,2011: @@ -140,7 +144,57 @@ AnimatorController: m_AnimatorLayers: [] """ +# For whatever reason, running unrelated animations s.a. +# facial expressions can have a slight effect on supposedly +# unrelated parameters, causing letter to flip. Add a +# little buffer to reduce the odds that this effect causes +# a letter to change after it has been written. +UNITY_ANIMATION_FUDGE_MARGIN = 0.1 + +def generateClearAnimation(anim_dir, guid_map): + print("Generating board clearing animation", file=sys.stderr) + + parser = libunity.UnityParser() + parser.parse(LETTER_ANIMATION_TEMPLATE) + + anim_node = parser.nodes[0] + anim_clip = anim_node.mapping['AnimationClip'] + curve_template = anim_clip.mapping['m_FloatCurves'].sequence[0] + anim_clip.mapping['m_FloatCurves'].sequence = [] + anim_clip.mapping['m_EditorCurves'].sequence = [] + + encoding = osc_ctrl.generateEncoding() + letter = encoding[' '] + + for row in range(0, generate_utils.BOARD_ROWS): + for col in range(0, generate_utils.BOARD_COLS): + curve = curve_template.copy() + for keyframe in curve.mapping['curve'].mapping['m_Curve'].sequence: + keyframe.mapping['value'] = str(letter + + UNITY_ANIMATION_FUDGE_MARGIN) + curve.mapping['attribute'] = "material.{}".format(generate_utils.getShaderParamByRowCol(row, col)) + curve.mapping['path'] = "World Constraint/Container/TaSTT" + # Add curve to animation + anim_clip.mapping['m_FloatCurves'].sequence.append(curve) + anim_clip.mapping['m_EditorCurves'].sequence.append(curve) + # Serialize animation to file + anim_name = generate_utils.getClearAnimationName() + anim_path = anim_dir + anim_name + ".anim" + with open(anim_path, "w") as f: + f.write(libunity.unityYamlToString([anim_node])) + # Generate metadata + meta = libunity.Metadata() + with open(anim_path + ".meta", "w") as f: + f.write(str(meta)) + # Add metadata to guid map + guid_map[anim_path] = meta.guid + guid_map[meta.guid] = anim_path + def generateAnimations(anim_dir, guid_map): + generateClearAnimation(args.gen_anim_dir, guid_map) + + print("Generating letter animations", file=sys.stderr) + parser = libunity.UnityParser() parser.parse(LETTER_ANIMATION_TEMPLATE) @@ -151,6 +205,8 @@ def generateAnimations(anim_dir, guid_map): anim_clip.mapping['m_EditorCurves'].sequence = [] for row in range(0, generate_utils.BOARD_ROWS): + print("Generating letter animations (row {}/{})".format(row, + generate_utils.BOARD_ROWS), file=sys.stderr) for col in range(0, generate_utils.BOARD_COLS): for letter in range(0, generate_utils.CHARS_PER_CELL): # Make a deep copy of the templates @@ -158,16 +214,11 @@ def generateAnimations(anim_dir, guid_map): curve = curve_template.copy() clip = node.mapping['AnimationClip'] # Populate animation name - anim_name = generate_utils.getAnimationName(row, col, letter) + anim_name = generate_utils.getLetterAnimationName(row, col, letter) clip.mapping['m_Name'] = anim_name # Populate letter value for keyframe in curve.mapping['curve'].mapping['m_Curve'].sequence: - # For whatever reason, running unrelated animations s.a. - # facial expressions can have a slight effect on supposedly - # unrelated parameters, causing letter to flip. Add a - # little buffer to reduce the odds that this effect causes - # a letter to change after it has been written. - keyframe.mapping['value'] = str(letter + 0.1) + keyframe.mapping['value'] = str(letter + UNITY_ANIMATION_FUDGE_MARGIN) # Populate path to letter parameter curve.mapping['attribute'] = "material.{}".format(generate_utils.getShaderParamByRowCol(row, col)) curve.mapping['path'] = "World Constraint/Container/TaSTT" @@ -200,6 +251,7 @@ def generateFXController(anim: libunity.UnityAnimator) -> typing.Dict[int, libun anim.addParameter(generate_utils.getHandToggleParam(), bool) anim.addParameter(generate_utils.getToggleParam(), bool) anim.addParameter(generate_utils.getSpeechNoiseEnableParam(), bool) + anim.addParameter(generate_utils.getClearBoardParam(), bool) layers = {} for i in range(0, generate_utils.NUM_LAYERS): @@ -356,15 +408,17 @@ def generateToggle(layer_name: str, is_default_state = True) on_state = anim.addAnimatorState(layer, layer_name + "_On", dy=100) - off_anim_path = gen_anim_dir + off_anim_basename - off_anim_meta = libunity.Metadata() - off_anim_meta.load(off_anim_path) - on_anim_path = gen_anim_dir + on_anim_basename - on_anim_meta = libunity.Metadata() - on_anim_meta.load(on_anim_path) + if off_anim_basename: + off_anim_path = gen_anim_dir + off_anim_basename + off_anim_meta = libunity.Metadata() + off_anim_meta.load(off_anim_path) + anim.setAnimatorStateAnimation(off_state, off_anim_meta.guid) - anim.setAnimatorStateAnimation(off_state, off_anim_meta.guid) - anim.setAnimatorStateAnimation(on_state, on_anim_meta.guid) + if on_anim_basename: + on_anim_path = gen_anim_dir + on_anim_basename + on_anim_meta = libunity.Metadata() + on_anim_meta.load(on_anim_path) + anim.setAnimatorStateAnimation(on_state, on_anim_meta.guid) off_to_on_trans = anim.addTransition(on_state) anim.addTransitionBooleanCondition(off_state, @@ -415,6 +469,12 @@ def generateFX(guid_map, gen_anim_dir): "TaSTT_Lock_World_Disable.anim", "TaSTT_Lock_World_Enable.anim", anim) + generateToggle( + generate_utils.getClearBoardParam(), + gen_anim_dir, + None, # No animation in the `off` state. + generate_utils.getClearAnimationName() + ".anim", + anim) return anim diff --git a/osc_ctrl.py b/osc_ctrl.py index 328b55c..2fede06 100644 --- a/osc_ctrl.py +++ b/osc_ctrl.py @@ -35,25 +35,27 @@ state = EvilGlobalState() # The characters in the TaSTT are all numbered from top left to bottom right. # This function provides a mapping from letter ('a') to index (26). -def generateEncoding(state): +def generateEncoding(): + encoding = {} for i in range(0, 26): - state.encoding[chr(ord('A') + i)] = i + encoding[chr(ord('A') + i)] = i for i in range(26, 52): - state.encoding[chr(ord('a') + i - 26)] = i + encoding[chr(ord('a') + i - 26)] = i for i in range(52, 62): - state.encoding[chr(ord('0') + i - 52)] = i - state.encoding[','] = 62 - state.encoding['.'] = 63 - state.encoding[' '] = 64 - state.encoding['?'] = 65 - state.encoding['!'] = 66 - state.encoding[';'] = 67 - state.encoding[':'] = 68 - state.encoding['-'] = 69 - state.encoding['_'] = 70 - state.encoding["'"] = 71 - state.encoding['"'] = 72 -generateEncoding(state) + encoding[chr(ord('0') + i - 52)] = i + encoding[','] = 62 + encoding['.'] = 63 + encoding[' '] = 64 + encoding['?'] = 65 + encoding['!'] = 66 + encoding[';'] = 67 + encoding[':'] = 68 + encoding['-'] = 69 + encoding['_'] = 70 + encoding["'"] = 71 + encoding['"'] = 72 + return encoding +state.encoding = generateEncoding() # Encodes a list of lines into the character set used by the board. # Pads lines with spaces and adds lines so that the total number of @@ -329,7 +331,13 @@ def sendRawMessage(client, msg): sendMessageCellDiscrete(client, cell_msg, cell) def clear(client): - sendRawMessage(client, [state.encoding[' ']] * BOARD_ROWS * BOARD_COLS) + addr="/avatar/parameters/" + generate_utils.getClearBoardParam() + client.send_message(addr, True) + + time.sleep(CELL_TX_TIME_S / 3.0) + + addr="/avatar/parameters/" + generate_utils.getClearBoardParam() + client.send_message(addr, False) if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -339,7 +347,7 @@ if __name__ == "__main__": client = getClient(args.i, args.p) - generateEncoding(state) + state.encoding = generateEncoding() tx_state = OscTxState() for line in fileinput.input(): diff --git a/transcribe.py b/transcribe.py index 474cd59..0a31608 100644 --- a/transcribe.py +++ b/transcribe.py @@ -150,6 +150,9 @@ def resetDiskAudioLocked(audio_state, filename): def resetAudioLocked(audio_state): audio_state.frames = [] + audio_state.transcribe_no_change_count = 0 + audio_state.transcribe_sleep_duration = \ + audio_state.transcribe_sleep_duration_min_s def resetAudio(audio_state): audio_state.frames_lock.acquire() @@ -170,7 +173,7 @@ def transcribe(model, filename): options = whisper.DecodingOptions(language = "en") result = whisper.decode(model, mel, options) - if result.no_speech_prob > 0.1: + if result.no_speech_prob > 0.2: print("no speech prob: {}".format(result.no_speech_prob)) return "" @@ -251,6 +254,11 @@ def transcribeAudio(audio_state, model): old_words = audio_state.text.split() new_words = text.split() audio_state.text = string_matcher.matchStringList(old_words, new_words) + if old_text != audio_state.text: + # We think the user said something, so reset the amount of + # time we sleep between transcriptions to the minimum. + audio_state.transcribe_no_change_count = 0 + audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s audio_state.text_candidate = text |
