summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2022-10-27 16:00:22 -0700
committeryum <yum.food.vr@gmail.com>2022-10-27 16:00:22 -0700
commitfd7ea2f72a486888c30726a2482fcc1f7fce9378 (patch)
treecae0ee99fca59d65062074a546c3975404f7ad89
parenteefa14c431efa4e3bc16cafbcb004e41622c2411 (diff)
Add fast clear animation
The old clear mechanism would write an empty cell in every layer, which would take (0.3 seconds) * (11 layers) == about 3 seconds. The new mechanism drives an animation which overwrites every character slot simultaneously, taking only 0.1 seconds. A nice ~30x speedup. * Fix the transcription exponential backoff logic. Saying new things will reset the delay to the minimum again. * Clearing the board will also reset the transcription delay back to the minimum. * Tune the noise detection minimum to 0.2 instead of 0.1. Speaking softly into the mic seems to fail to exceed the 0.1 threshold pretty often.
-rw-r--r--TaSTT_Menu.asset3
-rw-r--r--dev_cheatsheat.txt5
-rw-r--r--generate_params.py3
-rw-r--r--generate_utils.py12
-rw-r--r--libtastt.py90
-rw-r--r--osc_ctrl.py44
-rw-r--r--transcribe.py10
7 files changed, 128 insertions, 39 deletions
diff --git a/TaSTT_Menu.asset b/TaSTT_Menu.asset
index bd38fc5..3f8eeb5 100644
--- a/TaSTT_Menu.asset
+++ b/TaSTT_Menu.asset
@@ -10,7 +10,7 @@ MonoBehaviour:
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: -340790334, guid: 67cc4cb7839cd3741b63733d5adf0442, type: 3}
- m_Name: Menu
+ m_Name: TaSTT_Menu
m_EditorClassIdentifier:
controls:
- name: Show
@@ -43,4 +43,3 @@ MonoBehaviour:
subMenu: {fileID: 0}
subParameters: []
labels: []
-
diff --git a/dev_cheatsheat.txt b/dev_cheatsheat.txt
index db9697f..cf9f39a 100644
--- a/dev_cheatsheat.txt
+++ b/dev_cheatsheat.txt
@@ -10,9 +10,10 @@ Combining TaSTT_fx with a normal animator
date
rm -rf generated/animations
./libunity.py guid_map --project_root=$(cd ..; pwd) --save_to guid.map
-#./libtastt.py gen_anims
+./libtastt.py gen_anims
./libtastt.py gen_fx > TaSTT_fx.controller
-./libunity.py merge --fx0 ../FXGeneric.controller --fx1 ../FXSpecific.controller > FX0.controller
+#./libunity.py merge --fx0 ../FXGeneric.controller --fx1 ../FXSpecific.controller > FX0.controller
+cp ../FXGeneric.controller FX0.controller
./libunity.py add_toggle --fx0 FX0.controller > FX1.controller
./libunity.py merge --fx0 FX1.controller --fx1 TaSTT_fx.controller > FX2.controller
./libunity.py set_noop_anim --fx0 FX2.controller --guid_map guid.map > FX3.controller
diff --git a/generate_params.py b/generate_params.py
index 23a2513..d09be63 100644
--- a/generate_params.py
+++ b/generate_params.py
@@ -69,6 +69,9 @@ params["SAVED"] = "0"
params["PARAM_NAME"] = generate_utils.getLockWorldParam()
print(generate_utils.replaceMacros(BOOL_PARAM, params))
+params["PARAM_NAME"] = generate_utils.getClearBoardParam()
+print(generate_utils.replaceMacros(BOOL_PARAM, params))
+
for i in range(0, generate_utils.NUM_LAYERS):
params["PARAM_NAME"] = generate_utils.getLayerParam(i)
print(generate_utils.replaceMacros(INT_PARAM, params))
diff --git a/generate_utils.py b/generate_utils.py
index 68a455b..bf7e6c7 100644
--- a/generate_utils.py
+++ b/generate_utils.py
@@ -54,6 +54,10 @@ def getSpeechNoiseToggleParam():
def getSpeechNoiseEnableParam():
return "TaSTT_Speech_Noise_Enable"
+# When this is set to true, the board clears.
+def getClearBoardParam():
+ return "TaSTT_Clear_Board"
+
def getLockWorldParam():
return "TaSTT_Lock_World"
@@ -112,9 +116,15 @@ def getShaderParam(which_layer, s0, s1, s2, s3):
return getShaderParamByRowCol(row, col)
-def getAnimationName(row, col, letter):
+# The name of the animation which writes `letter` at a specific position in the
+# display.
+def getLetterAnimationName(row, col, letter):
return "R%02dC%02dL%02d" % (row, col, letter)
+# The name of the animation which clears the entire board.
+def getClearAnimationName():
+ return "TaSTT_Clear_Board"
+
def getAnimationNameByLayerAndIndex(which_layer, s0, s1, s2, s3, letter):
index = getBoardIndex(which_layer, s0, s1, s2, s3)
diff --git a/libtastt.py b/libtastt.py
index 34ac61a..a9b9fe8 100644
--- a/libtastt.py
+++ b/libtastt.py
@@ -8,6 +8,10 @@ import pickle
import sys
import typing
+# TODO(yum) we're getting the encoding scheme from here, but I think it should
+# be in a different layer.
+import osc_ctrl
+
LETTER_ANIMATION_TEMPLATE = """
%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
@@ -140,7 +144,57 @@ AnimatorController:
m_AnimatorLayers: []
"""
+# For whatever reason, running unrelated animations s.a.
+# facial expressions can have a slight effect on supposedly
+# unrelated parameters, causing letter to flip. Add a
+# little buffer to reduce the odds that this effect causes
+# a letter to change after it has been written.
+UNITY_ANIMATION_FUDGE_MARGIN = 0.1
+
+def generateClearAnimation(anim_dir, guid_map):
+ print("Generating board clearing animation", file=sys.stderr)
+
+ parser = libunity.UnityParser()
+ parser.parse(LETTER_ANIMATION_TEMPLATE)
+
+ anim_node = parser.nodes[0]
+ anim_clip = anim_node.mapping['AnimationClip']
+ curve_template = anim_clip.mapping['m_FloatCurves'].sequence[0]
+ anim_clip.mapping['m_FloatCurves'].sequence = []
+ anim_clip.mapping['m_EditorCurves'].sequence = []
+
+ encoding = osc_ctrl.generateEncoding()
+ letter = encoding[' ']
+
+ for row in range(0, generate_utils.BOARD_ROWS):
+ for col in range(0, generate_utils.BOARD_COLS):
+ curve = curve_template.copy()
+ for keyframe in curve.mapping['curve'].mapping['m_Curve'].sequence:
+ keyframe.mapping['value'] = str(letter +
+ UNITY_ANIMATION_FUDGE_MARGIN)
+ curve.mapping['attribute'] = "material.{}".format(generate_utils.getShaderParamByRowCol(row, col))
+ curve.mapping['path'] = "World Constraint/Container/TaSTT"
+ # Add curve to animation
+ anim_clip.mapping['m_FloatCurves'].sequence.append(curve)
+ anim_clip.mapping['m_EditorCurves'].sequence.append(curve)
+ # Serialize animation to file
+ anim_name = generate_utils.getClearAnimationName()
+ anim_path = anim_dir + anim_name + ".anim"
+ with open(anim_path, "w") as f:
+ f.write(libunity.unityYamlToString([anim_node]))
+ # Generate metadata
+ meta = libunity.Metadata()
+ with open(anim_path + ".meta", "w") as f:
+ f.write(str(meta))
+ # Add metadata to guid map
+ guid_map[anim_path] = meta.guid
+ guid_map[meta.guid] = anim_path
+
def generateAnimations(anim_dir, guid_map):
+ generateClearAnimation(args.gen_anim_dir, guid_map)
+
+ print("Generating letter animations", file=sys.stderr)
+
parser = libunity.UnityParser()
parser.parse(LETTER_ANIMATION_TEMPLATE)
@@ -151,6 +205,8 @@ def generateAnimations(anim_dir, guid_map):
anim_clip.mapping['m_EditorCurves'].sequence = []
for row in range(0, generate_utils.BOARD_ROWS):
+ print("Generating letter animations (row {}/{})".format(row,
+ generate_utils.BOARD_ROWS), file=sys.stderr)
for col in range(0, generate_utils.BOARD_COLS):
for letter in range(0, generate_utils.CHARS_PER_CELL):
# Make a deep copy of the templates
@@ -158,16 +214,11 @@ def generateAnimations(anim_dir, guid_map):
curve = curve_template.copy()
clip = node.mapping['AnimationClip']
# Populate animation name
- anim_name = generate_utils.getAnimationName(row, col, letter)
+ anim_name = generate_utils.getLetterAnimationName(row, col, letter)
clip.mapping['m_Name'] = anim_name
# Populate letter value
for keyframe in curve.mapping['curve'].mapping['m_Curve'].sequence:
- # For whatever reason, running unrelated animations s.a.
- # facial expressions can have a slight effect on supposedly
- # unrelated parameters, causing letter to flip. Add a
- # little buffer to reduce the odds that this effect causes
- # a letter to change after it has been written.
- keyframe.mapping['value'] = str(letter + 0.1)
+ keyframe.mapping['value'] = str(letter + UNITY_ANIMATION_FUDGE_MARGIN)
# Populate path to letter parameter
curve.mapping['attribute'] = "material.{}".format(generate_utils.getShaderParamByRowCol(row, col))
curve.mapping['path'] = "World Constraint/Container/TaSTT"
@@ -200,6 +251,7 @@ def generateFXController(anim: libunity.UnityAnimator) -> typing.Dict[int, libun
anim.addParameter(generate_utils.getHandToggleParam(), bool)
anim.addParameter(generate_utils.getToggleParam(), bool)
anim.addParameter(generate_utils.getSpeechNoiseEnableParam(), bool)
+ anim.addParameter(generate_utils.getClearBoardParam(), bool)
layers = {}
for i in range(0, generate_utils.NUM_LAYERS):
@@ -356,15 +408,17 @@ def generateToggle(layer_name: str,
is_default_state = True)
on_state = anim.addAnimatorState(layer, layer_name + "_On", dy=100)
- off_anim_path = gen_anim_dir + off_anim_basename
- off_anim_meta = libunity.Metadata()
- off_anim_meta.load(off_anim_path)
- on_anim_path = gen_anim_dir + on_anim_basename
- on_anim_meta = libunity.Metadata()
- on_anim_meta.load(on_anim_path)
+ if off_anim_basename:
+ off_anim_path = gen_anim_dir + off_anim_basename
+ off_anim_meta = libunity.Metadata()
+ off_anim_meta.load(off_anim_path)
+ anim.setAnimatorStateAnimation(off_state, off_anim_meta.guid)
- anim.setAnimatorStateAnimation(off_state, off_anim_meta.guid)
- anim.setAnimatorStateAnimation(on_state, on_anim_meta.guid)
+ if on_anim_basename:
+ on_anim_path = gen_anim_dir + on_anim_basename
+ on_anim_meta = libunity.Metadata()
+ on_anim_meta.load(on_anim_path)
+ anim.setAnimatorStateAnimation(on_state, on_anim_meta.guid)
off_to_on_trans = anim.addTransition(on_state)
anim.addTransitionBooleanCondition(off_state,
@@ -415,6 +469,12 @@ def generateFX(guid_map, gen_anim_dir):
"TaSTT_Lock_World_Disable.anim",
"TaSTT_Lock_World_Enable.anim",
anim)
+ generateToggle(
+ generate_utils.getClearBoardParam(),
+ gen_anim_dir,
+ None, # No animation in the `off` state.
+ generate_utils.getClearAnimationName() + ".anim",
+ anim)
return anim
diff --git a/osc_ctrl.py b/osc_ctrl.py
index 328b55c..2fede06 100644
--- a/osc_ctrl.py
+++ b/osc_ctrl.py
@@ -35,25 +35,27 @@ state = EvilGlobalState()
# The characters in the TaSTT are all numbered from top left to bottom right.
# This function provides a mapping from letter ('a') to index (26).
-def generateEncoding(state):
+def generateEncoding():
+ encoding = {}
for i in range(0, 26):
- state.encoding[chr(ord('A') + i)] = i
+ encoding[chr(ord('A') + i)] = i
for i in range(26, 52):
- state.encoding[chr(ord('a') + i - 26)] = i
+ encoding[chr(ord('a') + i - 26)] = i
for i in range(52, 62):
- state.encoding[chr(ord('0') + i - 52)] = i
- state.encoding[','] = 62
- state.encoding['.'] = 63
- state.encoding[' '] = 64
- state.encoding['?'] = 65
- state.encoding['!'] = 66
- state.encoding[';'] = 67
- state.encoding[':'] = 68
- state.encoding['-'] = 69
- state.encoding['_'] = 70
- state.encoding["'"] = 71
- state.encoding['"'] = 72
-generateEncoding(state)
+ encoding[chr(ord('0') + i - 52)] = i
+ encoding[','] = 62
+ encoding['.'] = 63
+ encoding[' '] = 64
+ encoding['?'] = 65
+ encoding['!'] = 66
+ encoding[';'] = 67
+ encoding[':'] = 68
+ encoding['-'] = 69
+ encoding['_'] = 70
+ encoding["'"] = 71
+ encoding['"'] = 72
+ return encoding
+state.encoding = generateEncoding()
# Encodes a list of lines into the character set used by the board.
# Pads lines with spaces and adds lines so that the total number of
@@ -329,7 +331,13 @@ def sendRawMessage(client, msg):
sendMessageCellDiscrete(client, cell_msg, cell)
def clear(client):
- sendRawMessage(client, [state.encoding[' ']] * BOARD_ROWS * BOARD_COLS)
+ addr="/avatar/parameters/" + generate_utils.getClearBoardParam()
+ client.send_message(addr, True)
+
+ time.sleep(CELL_TX_TIME_S / 3.0)
+
+ addr="/avatar/parameters/" + generate_utils.getClearBoardParam()
+ client.send_message(addr, False)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
@@ -339,7 +347,7 @@ if __name__ == "__main__":
client = getClient(args.i, args.p)
- generateEncoding(state)
+ state.encoding = generateEncoding()
tx_state = OscTxState()
for line in fileinput.input():
diff --git a/transcribe.py b/transcribe.py
index 474cd59..0a31608 100644
--- a/transcribe.py
+++ b/transcribe.py
@@ -150,6 +150,9 @@ def resetDiskAudioLocked(audio_state, filename):
def resetAudioLocked(audio_state):
audio_state.frames = []
+ audio_state.transcribe_no_change_count = 0
+ audio_state.transcribe_sleep_duration = \
+ audio_state.transcribe_sleep_duration_min_s
def resetAudio(audio_state):
audio_state.frames_lock.acquire()
@@ -170,7 +173,7 @@ def transcribe(model, filename):
options = whisper.DecodingOptions(language = "en")
result = whisper.decode(model, mel, options)
- if result.no_speech_prob > 0.1:
+ if result.no_speech_prob > 0.2:
print("no speech prob: {}".format(result.no_speech_prob))
return ""
@@ -251,6 +254,11 @@ def transcribeAudio(audio_state, model):
old_words = audio_state.text.split()
new_words = text.split()
audio_state.text = string_matcher.matchStringList(old_words, new_words)
+ if old_text != audio_state.text:
+ # We think the user said something, so reset the amount of
+ # time we sleep between transcriptions to the minimum.
+ audio_state.transcribe_no_change_count = 0
+ audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
audio_state.text_candidate = text