summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2022-11-22 15:36:19 -0800
committeryum <yum.food.vr@gmail.com>2022-11-22 18:13:18 -0800
commitbd8b63a357bb374f5875f0fedf2d677589419810 (patch)
tree0bb804459ebb7ad50e2a817bd842afd946339b30
parent27a67666c320bed3b4a18e415eb9702b03f8f0b5 (diff)
Rework input controls
Press joystick once to start recording, again to stop. When you start recording, any previous text on the board is cleared. Add 2 visual indicators: one to indicate speech, another to indicate that audio is paging.
-rw-r--r--TaSTT.shader139
-rw-r--r--generate_params.py6
-rw-r--r--generate_utils.py6
-rw-r--r--libtastt.py57
-rw-r--r--libunity.py5
-rw-r--r--osc_ctrl.py8
-rw-r--r--string_matcher.py2
-rw-r--r--transcribe.py55
8 files changed, 225 insertions, 53 deletions
diff --git a/TaSTT.shader b/TaSTT.shader
index b722d63..272f08b 100644
--- a/TaSTT.shader
+++ b/TaSTT.shader
@@ -10,6 +10,11 @@
_Font_0xA000_0xBFFF ("Font 5 (unicode 0xA000 - 0xBFFFF)", 2D) = "white" {}
_Font_0xC000_0xDFFF ("Font 6 (unicode 0xC000 - 0xDFFFF)", 2D) = "white" {}
+ TaSTT_Backplate("TaSTT_Backplate", 2D) = "black" {}
+
+ TaSTT_Indicator_0("TaSTT_Indicator_0", float) = 0
+ TaSTT_Indicator_1("TaSTT_Indicator_1", float) = 0
+
// software "engineering" LULW
_Letter_Row00_Col00_Byte0("_Letter_Row00_Col00_Byte0", float) = 0
_Letter_Row00_Col01_Byte0("_Letter_Row00_Col01_Byte0", float) = 0
@@ -403,6 +408,15 @@
Texture2D _Font_0xA000_0xBFFF;
Texture2D _Font_0xC000_0xDFFF;
+ float TaSTT_Indicator_0;
+ static const fixed4 TaSTT_Indicator_0_Off_Color = fixed4(0.0, 1.0, 0.0, 2) * 0.7;
+ static const fixed4 TaSTT_Indicator_0_On_Color = fixed4(0.8, 0.2, 0.0, 2) * 0.9;
+ float TaSTT_Indicator_1;
+ static const fixed4 TaSTT_Indicator_1_Off_Color = fixed4(0.0, 1.0, 0.0, 2) * 0.7;
+ static const fixed4 TaSTT_Indicator_1_On_Color = fixed4(0.8, 0.2, 0.0, 2) * 0.9;
+
+ Texture2D TaSTT_Backplate;
+
float _Letter_Row00_Col00_Byte0;
float _Letter_Row00_Col01_Byte0;
float _Letter_Row00_Col02_Byte0;
@@ -764,14 +778,29 @@
return o;
}
- float2 AddMarginToUV(float2 uv, float x_frac, float y_frac)
+ float2 AddMarginToUV(float2 uv, float2 margin)
{
- float2 lo = float2(-x_frac / 2, -y_frac / 2);
- float2 hi = float2(1.0 + x_frac / 2, 1.0 + y_frac / 2);
+ float2 lo = float2(-margin.x / 2, -margin.y / 2);
+ float2 hi = float2(1.0 + margin.x / 2, 1.0 + margin.y / 2);
return clamp(lerp(lo, hi, uv), 0.0, 1.0);
}
+ bool InMargin(float2 uv, float2 margin)
+ {
+ return uv.x < margin.x / 2 ||
+ uv.x > 1 - margin.x / 2 ||
+ uv.y < margin.y / 2 ||
+ uv.y > 1 - margin.y / 2;
+ }
+
+ // dist = sqrt(dx^2 + dy^2) = sqrt(<dx,dy> * <dx,dy>)
+ bool InRadius2(float2 uv, float2 pos, float radius2)
+ {
+ float2 delta = uv - pos;
+ return dot(delta, delta) < radius2;
+ }
+
// Write the nth letter in the current cell and return the value of the
// pixel.
float2 GetLetter(float2 uv, int nth_letter)
@@ -1238,34 +1267,88 @@
uv.x = 1.0 - uv.x;
}
- float uv_x_margin = 0.03;
- float uv_y_margin = 0.03;
- uv = AddMarginToUV(uv, uv_x_margin, uv_y_margin);
+ float2 uv_margin = float2(0.03, 0.06);
+ if (InMargin(uv, uv_margin)) {
+ // Margin is uv_margin/2 wide/tall.
+ // We want a circle whose radius is ~80% of that.
+ float radius_factor = 0.95;
+ float radius = (uv_margin.x / 2) * radius_factor;
+ // We want this circle to be centered halfway through the margin
+ // vertically, and at 1.5x the margin width horizontally.
+ float2 indicator_center = float2(
+ uv_margin.x * 0.5 + radius,
+ uv_margin.y * 0.5 * 0.5
+ );
+ // Finally, translate it to the top of the board instead of the
+ // bottom.
+ indicator_center.y = 1.0 - indicator_center.y;
- int2 letter_bytes = (int2) floor(GetLetterParameter(uv));
- int letter = letter_bytes[0] | (letter_bytes[1] << 8);
+ if (InRadius2(uv, indicator_center, radius * radius)) {
+ if (floor(TaSTT_Indicator_0) == 0.0) {
+ return TaSTT_Indicator_0_Off_Color;
+ } else {
+ return TaSTT_Indicator_0_On_Color;
+ }
+ }
- uv = GetLetter(uv, letter);
+ // Next, draw the second indicator. Same size as before, just shifted
+ // over a little.
+ indicator_center.x += radius * 2.5;
+ if (InRadius2(uv, indicator_center, radius * radius)) {
+ if (floor(TaSTT_Indicator_1) == 0.0) {
+ return TaSTT_Indicator_1_Off_Color;
+ } else {
+ return TaSTT_Indicator_1_On_Color;
+ }
+ }
- int which_texture = (int) floor(letter / (64 * 128));
- [forcecase] switch (which_texture)
- {
- case 0:
- return _Font_0x0000_0x1FFF.Sample(sampler_linear_repeat, uv);
- case 1:
- return _Font_0x2000_0x3FFF.Sample(sampler_linear_repeat, uv);
- case 2:
- return _Font_0x4000_0x5FFF.Sample(sampler_linear_repeat, uv);
- case 3:
- return _Font_0x6000_0x7FFF.Sample(sampler_linear_repeat, uv);
- case 4:
- return _Font_0x8000_0x9FFF.Sample(sampler_linear_repeat, uv);
- case 5:
- return _Font_0xA000_0xBFFF.Sample(sampler_linear_repeat, uv);
- case 6:
- return _Font_0xC000_0xDFFF.Sample(sampler_linear_repeat, uv);
- default:
- return _Font_0x0000_0x1FFF.Sample(sampler_linear_repeat, uv);
+ return fixed4(1,1,1,1);
+ } else {
+ uv_margin *= 2;
+ uv = AddMarginToUV(uv, uv_margin);
+
+ int2 letter_bytes = (int2) floor(GetLetterParameter(uv));
+ int letter = letter_bytes[0] | (letter_bytes[1] << 8);
+
+ uv = GetLetter(uv, letter);
+
+ fixed4 background = TaSTT_Backplate.Sample(sampler_linear_repeat, uv);
+ fixed4 text;
+
+ int which_texture = (int) floor(letter / (64 * 128));
+ [forcecase] switch (which_texture)
+ {
+ case 0:
+ text = _Font_0x0000_0x1FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 1:
+ text = _Font_0x2000_0x3FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 2:
+ text = _Font_0x4000_0x5FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 3:
+ text = _Font_0x6000_0x7FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 4:
+ text = _Font_0x8000_0x9FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 5:
+ text = _Font_0xA000_0xBFFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 6:
+ text = _Font_0xC000_0xDFFF.Sample(sampler_linear_repeat, uv);
+ break;
+ default:
+ text = _Font_0x0000_0x1FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ }
+ fixed4 black = fixed4(0,0,0,0);
+ if (text.r == black.r && text.g == black.g && text.b == black.b && text.a == black.a) {
+ return background;
+ } else {
+ return text;
+ }
}
}
ENDCG
diff --git a/generate_params.py b/generate_params.py
index 63203d0..1146ee5 100644
--- a/generate_params.py
+++ b/generate_params.py
@@ -62,6 +62,12 @@ print(generate_utils.replaceMacros(BOOL_PARAM, params))
params["PARAM_NAME"] = generate_utils.getEnableParam()
print(generate_utils.replaceMacros(BOOL_PARAM, params))
+params["PARAM_NAME"] = generate_utils.getIndicator0Param()
+print(generate_utils.replaceMacros(BOOL_PARAM, params))
+
+params["PARAM_NAME"] = generate_utils.getIndicator1Param()
+print(generate_utils.replaceMacros(BOOL_PARAM, params))
+
params["PARAM_NAME"] = generate_utils.getToggleParam()
print(generate_utils.replaceMacros(BOOL_PARAM, params))
diff --git a/generate_utils.py b/generate_utils.py
index 119714d..c4cbf4c 100644
--- a/generate_utils.py
+++ b/generate_utils.py
@@ -94,6 +94,12 @@ def getSelectParam() -> str:
def getEnableParam():
return "TaSTT_Enable"
+def getIndicator0Param():
+ return "TaSTT_Indicator_0"
+
+def getIndicator1Param():
+ return "TaSTT_Indicator_1"
+
def getBoardIndex(which_layer, select):
# Because we divide the board into a multiple of 8 cells, some cells may
# describe animations which don't exist, depending on the size of the board.
diff --git a/libtastt.py b/libtastt.py
index f580c1e..658e9ff 100644
--- a/libtastt.py
+++ b/libtastt.py
@@ -190,9 +190,54 @@ def generateClearAnimation(anim_dir, guid_map):
guid_map[anim_path] = meta.guid
guid_map[meta.guid] = anim_path
+# Generate a toggle animation for a shader parameter.
+def generateToggleAnimations(anim_dir, shader_param, guid_map):
+ print("Generating shader toggle animation", file=sys.stderr)
+
+ parser = libunity.UnityParser()
+ parser.parse(LETTER_ANIMATION_TEMPLATE)
+
+ # 0.0 represents false, 1.0 represents true. Don't forget that we add
+ # `UNITY_ANIMATION_FUDGE_MARGIN` to everything.
+ for shader_value in range(0, 2):
+ anim_node = parser.nodes[0]
+ anim_clip = anim_node.mapping['AnimationClip']
+ curve_template = anim_clip.mapping['m_FloatCurves'].sequence[0]
+ anim_clip.mapping['m_FloatCurves'].sequence = []
+ anim_clip.mapping['m_EditorCurves'].sequence = []
+
+ curve = curve_template.copy()
+ for keyframe in curve.mapping['curve'].mapping['m_Curve'].sequence:
+ keyframe.mapping['value'] = str(float(shader_value) +
+ UNITY_ANIMATION_FUDGE_MARGIN)
+ curve.mapping['attribute'] = "material.{}".format(shader_param)
+ curve.mapping['path'] = "World Constraint/Container/TaSTT"
+ # Add curve to animation
+ anim_clip.mapping['m_FloatCurves'].sequence.append(curve)
+ anim_clip.mapping['m_EditorCurves'].sequence.append(curve)
+
+ # Serialize animation to file
+ anim_name = generate_utils.getClearAnimationName()
+ anim_suffix = "_Off"
+ if shader_value == 1:
+ anim_suffix = "_On"
+ anim_path = anim_dir + shader_param + anim_suffix + ".anim"
+ with open(anim_path, "w") as f:
+ f.write(libunity.unityYamlToString([anim_node]))
+ # Generate metadata
+ meta = libunity.Metadata()
+ with open(anim_path + ".meta", "w") as f:
+ f.write(str(meta))
+ # Add metadata to guid map
+ guid_map[anim_path] = meta.guid
+ guid_map[meta.guid] = anim_path
+
def generateAnimations(anim_dir, guid_map):
generateClearAnimation(args.gen_anim_dir, guid_map)
+ generateToggleAnimations(args.gen_anim_dir, generate_utils.getIndicator0Param(), guid_map)
+ generateToggleAnimations(args.gen_anim_dir, generate_utils.getIndicator1Param(), guid_map)
+
print("Generating letter animations", file=sys.stderr)
parser = libunity.UnityParser()
@@ -257,6 +302,8 @@ def generateFXController(anim: libunity.UnityAnimator) -> typing.Dict[int, libun
anim.addParameter(generate_utils.getToggleParam(), bool)
anim.addParameter(generate_utils.getSpeechNoiseEnableParam(), bool)
anim.addParameter(generate_utils.getClearBoardParam(), bool)
+ anim.addParameter(generate_utils.getIndicator0Param(), bool)
+ anim.addParameter(generate_utils.getIndicator1Param(), bool)
layers = {}
for byte in range(0, generate_utils.BYTES_PER_CHAR):
@@ -410,6 +457,16 @@ def generateFX(guid_map, gen_anim_dir):
None, # No animation in the `off` state.
generate_utils.getClearAnimationName() + ".anim",
anim)
+ generateToggle(generate_utils.getIndicator0Param(),
+ gen_anim_dir,
+ generate_utils.getIndicator0Param() + "_Off.anim",
+ generate_utils.getIndicator0Param() + "_On.anim",
+ anim)
+ generateToggle(generate_utils.getIndicator1Param(),
+ gen_anim_dir,
+ generate_utils.getIndicator1Param() + "_Off.anim",
+ generate_utils.getIndicator1Param() + "_On.anim",
+ anim)
return anim
diff --git a/libunity.py b/libunity.py
index 822c238..9380a6c 100644
--- a/libunity.py
+++ b/libunity.py
@@ -521,6 +521,11 @@ class UnityAnimator():
p0.sequence += p1.sequence
a0.sequence += a1.sequence
+ for elm in p0.sequence:
+ elm.mapping['m_Controller'].mapping['fileID'] = ctrl0.anchor
+ for elm in a0.sequence:
+ elm.mapping['m_Controller'].mapping['fileID'] = ctrl0.anchor
+
return ctrl0
def merge(self, other):
diff --git a/osc_ctrl.py b/osc_ctrl.py
index be853dc..ea0c145 100644
--- a/osc_ctrl.py
+++ b/osc_ctrl.py
@@ -292,6 +292,14 @@ def clear(client, tx_state):
tx_state.last_msg_encoded = []
+def indicateSpeech(client, is_speaking: bool):
+ addr = "/avatar/parameters/" + generate_utils.getIndicator0Param()
+ client.send_message(addr, is_speaking)
+
+def indicatePaging(client, is_paging: bool):
+ addr = "/avatar/parameters/" + generate_utils.getIndicator1Param()
+ client.send_message(addr, is_paging)
+
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-i", default="127.0.0.1", help="OSC server IP")
diff --git a/string_matcher.py b/string_matcher.py
index 543b18f..461f180 100644
--- a/string_matcher.py
+++ b/string_matcher.py
@@ -78,7 +78,7 @@ def matchStrings(old_text: str, new_text: str, window_size = 3) -> str:
for j in range(0, 1 + len(new_text) - window_size):
new_slice = new_text[j:j + window_size]
cur_d = editdistance.eval(old_slice, new_slice)
- if cur_d <= best_match_d:
+ if cur_d < best_match_d:
best_match_i = i
best_match_j = j
best_match_d = cur_d
diff --git a/transcribe.py b/transcribe.py
index 1aabf6f..9290bdc 100644
--- a/transcribe.py
+++ b/transcribe.py
@@ -70,9 +70,7 @@ class AudioState:
# this to whatever they want.
language = whisper.tokenizer.TO_LANGUAGE_CODE["english"]
- # When the user says `over`, we stop displaying new transcriptions until
- # they clear the board again.
- display_paused = False
+ audio_paused = False
osc_client = osc_ctrl.getClient()
@@ -121,6 +119,10 @@ def recordAudio(audio_state):
while audio_state.run_app:
data = audio_state.stream.read(audio_state.CHUNK)
+ if audio_state.audio_paused:
+ time.sleep(0.1)
+ continue
+
audio_state.lock.acquire()
audio_state.frames.append(data)
max_frames = int(audio_state.RATE * audio_state.MAX_LENGTH_S / audio_state.CHUNK)
@@ -199,7 +201,8 @@ def transcribe(audio_state, model, filename):
result = None
#for temp in (0.00, 0.05, 0.10, 0.15, 0.20):
- for temp in (0.00, 0.05):
+ #for temp in (0.00, 0.05):
+ for temp in (0.00,):
print("temp: {}".format(temp))
options = whisper.DecodingOptions(language = audio_state.language,
beam_size = 5, temperature = temp)
@@ -256,11 +259,6 @@ def transcribeAudio(audio_state, model):
words = ''.join(c for c in text.lower() if (c.isalpha() or c == " ")).split()
- if len(words) > 0:
- if words[-1] == "over":
- words = words[0:-1]
- audio_state.display_paused = True
-
print("Transcription: {}".format(audio_state.text))
old_text = audio_state.text
@@ -280,14 +278,12 @@ def transcribeAudio(audio_state, model):
def sendAudio(audio_state):
while audio_state.run_app == True:
- if audio_state.display_paused:
- time.sleep(0.1)
- continue
-
audio_state.lock.acquire()
text = audio_state.committed_text + " " + audio_state.text
- osc_ctrl.sendMessageLazy(audio_state.osc_client, text, audio_state.tx_state)
+ is_paging = not osc_ctrl.sendMessageLazy(audio_state.osc_client, text,
+ audio_state.tx_state)
+ osc_ctrl.indicatePaging(audio_state.osc_client, is_paging)
audio_state.lock.release()
# Pace this out
@@ -295,19 +291,31 @@ def sendAudio(audio_state):
def readControllerInput(audio_state):
session = steamvr.SessionState()
+ RECORD_STATE = 0
+ PAUSE_STATE = 1
+ state = PAUSE_STATE
while audio_state.run_app == True:
time.sleep(0.05)
event = steamvr.pollButtonPress(session)
if event == steamvr.EVENT_RISING_EDGE:
- print("event get")
- audio_state.lock.acquire()
- resetAudioLocked(audio_state)
- resetDisplayLocked(audio_state)
- audio_state.drop_transcription = True
- audio_state.display_paused = False
- audio_state.lock.release()
+ if state == RECORD_STATE:
+ state = PAUSE_STATE
+ osc_ctrl.indicateSpeech(audio_state.osc_client, False)
+
+ audio_state.audio_paused = True
+ elif state == PAUSE_STATE:
+ state = RECORD_STATE
+ osc_ctrl.indicateSpeech(audio_state.osc_client, True)
+
+ audio_state.lock.acquire()
+ resetAudioLocked(audio_state)
+ resetDisplayLocked(audio_state)
+ audio_state.drop_transcription = True
+ audio_state.audio_paused = False
+ audio_state.lock.release()
+
def transcribeLoop(mic: str, language: str):
audio_state = getMicStream(mic)
@@ -337,14 +345,13 @@ def transcribeLoop(mic: str, language: str):
controller_input_thd.daemon = True
controller_input_thd.start()
- print("Press enter or say 'Clear' to start a new message. Say 'Over' to " +
- "pause the display (saying 'Clear' resets it again).")
+ print("Press enter to start a new message.")
for line in sys.stdin:
audio_state.lock.acquire()
resetAudioLocked(audio_state)
resetDisplayLocked(audio_state)
audio_state.drop_transcription = True
- audio_state.display_paused = False
+ audio_state.audio_paused = False
audio_state.lock.release()
if "exit" in line or "quit" in line:
break