summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--TaSTT.shader139
-rw-r--r--generate_params.py6
-rw-r--r--generate_utils.py6
-rw-r--r--libtastt.py57
-rw-r--r--libunity.py5
-rw-r--r--osc_ctrl.py8
-rw-r--r--string_matcher.py2
-rw-r--r--transcribe.py55
8 files changed, 225 insertions, 53 deletions
diff --git a/TaSTT.shader b/TaSTT.shader
index b722d63..272f08b 100644
--- a/TaSTT.shader
+++ b/TaSTT.shader
@@ -10,6 +10,11 @@
_Font_0xA000_0xBFFF ("Font 5 (unicode 0xA000 - 0xBFFFF)", 2D) = "white" {}
_Font_0xC000_0xDFFF ("Font 6 (unicode 0xC000 - 0xDFFFF)", 2D) = "white" {}
+ TaSTT_Backplate("TaSTT_Backplate", 2D) = "black" {}
+
+ TaSTT_Indicator_0("TaSTT_Indicator_0", float) = 0
+ TaSTT_Indicator_1("TaSTT_Indicator_1", float) = 0
+
// software "engineering" LULW
_Letter_Row00_Col00_Byte0("_Letter_Row00_Col00_Byte0", float) = 0
_Letter_Row00_Col01_Byte0("_Letter_Row00_Col01_Byte0", float) = 0
@@ -403,6 +408,15 @@
Texture2D _Font_0xA000_0xBFFF;
Texture2D _Font_0xC000_0xDFFF;
+ float TaSTT_Indicator_0;
+ static const fixed4 TaSTT_Indicator_0_Off_Color = fixed4(0.0, 1.0, 0.0, 2) * 0.7;
+ static const fixed4 TaSTT_Indicator_0_On_Color = fixed4(0.8, 0.2, 0.0, 2) * 0.9;
+ float TaSTT_Indicator_1;
+ static const fixed4 TaSTT_Indicator_1_Off_Color = fixed4(0.0, 1.0, 0.0, 2) * 0.7;
+ static const fixed4 TaSTT_Indicator_1_On_Color = fixed4(0.8, 0.2, 0.0, 2) * 0.9;
+
+ Texture2D TaSTT_Backplate;
+
float _Letter_Row00_Col00_Byte0;
float _Letter_Row00_Col01_Byte0;
float _Letter_Row00_Col02_Byte0;
@@ -764,14 +778,29 @@
return o;
}
- float2 AddMarginToUV(float2 uv, float x_frac, float y_frac)
+ float2 AddMarginToUV(float2 uv, float2 margin)
{
- float2 lo = float2(-x_frac / 2, -y_frac / 2);
- float2 hi = float2(1.0 + x_frac / 2, 1.0 + y_frac / 2);
+ float2 lo = float2(-margin.x / 2, -margin.y / 2);
+ float2 hi = float2(1.0 + margin.x / 2, 1.0 + margin.y / 2);
return clamp(lerp(lo, hi, uv), 0.0, 1.0);
}
+ bool InMargin(float2 uv, float2 margin)
+ {
+ return uv.x < margin.x / 2 ||
+ uv.x > 1 - margin.x / 2 ||
+ uv.y < margin.y / 2 ||
+ uv.y > 1 - margin.y / 2;
+ }
+
+ // dist = sqrt(dx^2 + dy^2) = sqrt(<dx,dy> * <dx,dy>)
+ bool InRadius2(float2 uv, float2 pos, float radius2)
+ {
+ float2 delta = uv - pos;
+ return dot(delta, delta) < radius2;
+ }
+
// Write the nth letter in the current cell and return the value of the
// pixel.
float2 GetLetter(float2 uv, int nth_letter)
@@ -1238,34 +1267,88 @@
uv.x = 1.0 - uv.x;
}
- float uv_x_margin = 0.03;
- float uv_y_margin = 0.03;
- uv = AddMarginToUV(uv, uv_x_margin, uv_y_margin);
+ float2 uv_margin = float2(0.03, 0.06);
+ if (InMargin(uv, uv_margin)) {
+ // Margin is uv_margin/2 wide/tall.
+ // We want a circle whose radius is ~80% of that.
+ float radius_factor = 0.95;
+ float radius = (uv_margin.x / 2) * radius_factor;
+ // We want this circle to be centered halfway through the margin
+ // vertically, and at 1.5x the margin width horizontally.
+ float2 indicator_center = float2(
+ uv_margin.x * 0.5 + radius,
+ uv_margin.y * 0.5 * 0.5
+ );
+ // Finally, translate it to the top of the board instead of the
+ // bottom.
+ indicator_center.y = 1.0 - indicator_center.y;
- int2 letter_bytes = (int2) floor(GetLetterParameter(uv));
- int letter = letter_bytes[0] | (letter_bytes[1] << 8);
+ if (InRadius2(uv, indicator_center, radius * radius)) {
+ if (floor(TaSTT_Indicator_0) == 0.0) {
+ return TaSTT_Indicator_0_Off_Color;
+ } else {
+ return TaSTT_Indicator_0_On_Color;
+ }
+ }
- uv = GetLetter(uv, letter);
+ // Next, draw the second indicator. Same size as before, just shifted
+ // over a little.
+ indicator_center.x += radius * 2.5;
+ if (InRadius2(uv, indicator_center, radius * radius)) {
+ if (floor(TaSTT_Indicator_1) == 0.0) {
+ return TaSTT_Indicator_1_Off_Color;
+ } else {
+ return TaSTT_Indicator_1_On_Color;
+ }
+ }
- int which_texture = (int) floor(letter / (64 * 128));
- [forcecase] switch (which_texture)
- {
- case 0:
- return _Font_0x0000_0x1FFF.Sample(sampler_linear_repeat, uv);
- case 1:
- return _Font_0x2000_0x3FFF.Sample(sampler_linear_repeat, uv);
- case 2:
- return _Font_0x4000_0x5FFF.Sample(sampler_linear_repeat, uv);
- case 3:
- return _Font_0x6000_0x7FFF.Sample(sampler_linear_repeat, uv);
- case 4:
- return _Font_0x8000_0x9FFF.Sample(sampler_linear_repeat, uv);
- case 5:
- return _Font_0xA000_0xBFFF.Sample(sampler_linear_repeat, uv);
- case 6:
- return _Font_0xC000_0xDFFF.Sample(sampler_linear_repeat, uv);
- default:
- return _Font_0x0000_0x1FFF.Sample(sampler_linear_repeat, uv);
+ return fixed4(1,1,1,1);
+ } else {
+ uv_margin *= 2;
+ uv = AddMarginToUV(uv, uv_margin);
+
+ int2 letter_bytes = (int2) floor(GetLetterParameter(uv));
+ int letter = letter_bytes[0] | (letter_bytes[1] << 8);
+
+ uv = GetLetter(uv, letter);
+
+ fixed4 background = TaSTT_Backplate.Sample(sampler_linear_repeat, uv);
+ fixed4 text;
+
+ int which_texture = (int) floor(letter / (64 * 128));
+ [forcecase] switch (which_texture)
+ {
+ case 0:
+ text = _Font_0x0000_0x1FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 1:
+ text = _Font_0x2000_0x3FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 2:
+ text = _Font_0x4000_0x5FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 3:
+ text = _Font_0x6000_0x7FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 4:
+ text = _Font_0x8000_0x9FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 5:
+ text = _Font_0xA000_0xBFFF.Sample(sampler_linear_repeat, uv);
+ break;
+ case 6:
+ text = _Font_0xC000_0xDFFF.Sample(sampler_linear_repeat, uv);
+ break;
+ default:
+ text = _Font_0x0000_0x1FFF.Sample(sampler_linear_repeat, uv);
+ break;
+ }
+ fixed4 black = fixed4(0,0,0,0);
+ if (text.r == black.r && text.g == black.g && text.b == black.b && text.a == black.a) {
+ return background;
+ } else {
+ return text;
+ }
}
}
ENDCG
diff --git a/generate_params.py b/generate_params.py
index 63203d0..1146ee5 100644
--- a/generate_params.py
+++ b/generate_params.py
@@ -62,6 +62,12 @@ print(generate_utils.replaceMacros(BOOL_PARAM, params))
params["PARAM_NAME"] = generate_utils.getEnableParam()
print(generate_utils.replaceMacros(BOOL_PARAM, params))
+params["PARAM_NAME"] = generate_utils.getIndicator0Param()
+print(generate_utils.replaceMacros(BOOL_PARAM, params))
+
+params["PARAM_NAME"] = generate_utils.getIndicator1Param()
+print(generate_utils.replaceMacros(BOOL_PARAM, params))
+
params["PARAM_NAME"] = generate_utils.getToggleParam()
print(generate_utils.replaceMacros(BOOL_PARAM, params))
diff --git a/generate_utils.py b/generate_utils.py
index 119714d..c4cbf4c 100644
--- a/generate_utils.py
+++ b/generate_utils.py
@@ -94,6 +94,12 @@ def getSelectParam() -> str:
def getEnableParam():
return "TaSTT_Enable"
+def getIndicator0Param():
+ return "TaSTT_Indicator_0"
+
+def getIndicator1Param():
+ return "TaSTT_Indicator_1"
+
def getBoardIndex(which_layer, select):
# Because we divide the board into a multiple of 8 cells, some cells may
# describe animations which don't exist, depending on the size of the board.
diff --git a/libtastt.py b/libtastt.py
index f580c1e..658e9ff 100644
--- a/libtastt.py
+++ b/libtastt.py
@@ -190,9 +190,54 @@ def generateClearAnimation(anim_dir, guid_map):
guid_map[anim_path] = meta.guid
guid_map[meta.guid] = anim_path
+# Generate a toggle animation for a shader parameter.
+def generateToggleAnimations(anim_dir, shader_param, guid_map):
+ print("Generating shader toggle animation", file=sys.stderr)
+
+ parser = libunity.UnityParser()
+ parser.parse(LETTER_ANIMATION_TEMPLATE)
+
+ # 0.0 represents false, 1.0 represents true. Don't forget that we add
+ # `UNITY_ANIMATION_FUDGE_MARGIN` to everything.
+ for shader_value in range(0, 2):
+ anim_node = parser.nodes[0]
+ anim_clip = anim_node.mapping['AnimationClip']
+ curve_template = anim_clip.mapping['m_FloatCurves'].sequence[0]
+ anim_clip.mapping['m_FloatCurves'].sequence = []
+ anim_clip.mapping['m_EditorCurves'].sequence = []
+
+ curve = curve_template.copy()
+ for keyframe in curve.mapping['curve'].mapping['m_Curve'].sequence:
+ keyframe.mapping['value'] = str(float(shader_value) +
+ UNITY_ANIMATION_FUDGE_MARGIN)
+ curve.mapping['attribute'] = "material.{}".format(shader_param)
+ curve.mapping['path'] = "World Constraint/Container/TaSTT"
+ # Add curve to animation
+ anim_clip.mapping['m_FloatCurves'].sequence.append(curve)
+ anim_clip.mapping['m_EditorCurves'].sequence.append(curve)
+
+ # Serialize animation to file
+ anim_name = generate_utils.getClearAnimationName()
+ anim_suffix = "_Off"
+ if shader_value == 1:
+ anim_suffix = "_On"
+ anim_path = anim_dir + shader_param + anim_suffix + ".anim"
+ with open(anim_path, "w") as f:
+ f.write(libunity.unityYamlToString([anim_node]))
+ # Generate metadata
+ meta = libunity.Metadata()
+ with open(anim_path + ".meta", "w") as f:
+ f.write(str(meta))
+ # Add metadata to guid map
+ guid_map[anim_path] = meta.guid
+ guid_map[meta.guid] = anim_path
+
def generateAnimations(anim_dir, guid_map):
generateClearAnimation(args.gen_anim_dir, guid_map)
+ generateToggleAnimations(args.gen_anim_dir, generate_utils.getIndicator0Param(), guid_map)
+ generateToggleAnimations(args.gen_anim_dir, generate_utils.getIndicator1Param(), guid_map)
+
print("Generating letter animations", file=sys.stderr)
parser = libunity.UnityParser()
@@ -257,6 +302,8 @@ def generateFXController(anim: libunity.UnityAnimator) -> typing.Dict[int, libun
anim.addParameter(generate_utils.getToggleParam(), bool)
anim.addParameter(generate_utils.getSpeechNoiseEnableParam(), bool)
anim.addParameter(generate_utils.getClearBoardParam(), bool)
+ anim.addParameter(generate_utils.getIndicator0Param(), bool)
+ anim.addParameter(generate_utils.getIndicator1Param(), bool)
layers = {}
for byte in range(0, generate_utils.BYTES_PER_CHAR):
@@ -410,6 +457,16 @@ def generateFX(guid_map, gen_anim_dir):
None, # No animation in the `off` state.
generate_utils.getClearAnimationName() + ".anim",
anim)
+ generateToggle(generate_utils.getIndicator0Param(),
+ gen_anim_dir,
+ generate_utils.getIndicator0Param() + "_Off.anim",
+ generate_utils.getIndicator0Param() + "_On.anim",
+ anim)
+ generateToggle(generate_utils.getIndicator1Param(),
+ gen_anim_dir,
+ generate_utils.getIndicator1Param() + "_Off.anim",
+ generate_utils.getIndicator1Param() + "_On.anim",
+ anim)
return anim
diff --git a/libunity.py b/libunity.py
index 822c238..9380a6c 100644
--- a/libunity.py
+++ b/libunity.py
@@ -521,6 +521,11 @@ class UnityAnimator():
p0.sequence += p1.sequence
a0.sequence += a1.sequence
+ for elm in p0.sequence:
+ elm.mapping['m_Controller'].mapping['fileID'] = ctrl0.anchor
+ for elm in a0.sequence:
+ elm.mapping['m_Controller'].mapping['fileID'] = ctrl0.anchor
+
return ctrl0
def merge(self, other):
diff --git a/osc_ctrl.py b/osc_ctrl.py
index be853dc..ea0c145 100644
--- a/osc_ctrl.py
+++ b/osc_ctrl.py
@@ -292,6 +292,14 @@ def clear(client, tx_state):
tx_state.last_msg_encoded = []
+def indicateSpeech(client, is_speaking: bool):
+ addr = "/avatar/parameters/" + generate_utils.getIndicator0Param()
+ client.send_message(addr, is_speaking)
+
+def indicatePaging(client, is_paging: bool):
+ addr = "/avatar/parameters/" + generate_utils.getIndicator1Param()
+ client.send_message(addr, is_paging)
+
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-i", default="127.0.0.1", help="OSC server IP")
diff --git a/string_matcher.py b/string_matcher.py
index 543b18f..461f180 100644
--- a/string_matcher.py
+++ b/string_matcher.py
@@ -78,7 +78,7 @@ def matchStrings(old_text: str, new_text: str, window_size = 3) -> str:
for j in range(0, 1 + len(new_text) - window_size):
new_slice = new_text[j:j + window_size]
cur_d = editdistance.eval(old_slice, new_slice)
- if cur_d <= best_match_d:
+ if cur_d < best_match_d:
best_match_i = i
best_match_j = j
best_match_d = cur_d
diff --git a/transcribe.py b/transcribe.py
index 1aabf6f..9290bdc 100644
--- a/transcribe.py
+++ b/transcribe.py
@@ -70,9 +70,7 @@ class AudioState:
# this to whatever they want.
language = whisper.tokenizer.TO_LANGUAGE_CODE["english"]
- # When the user says `over`, we stop displaying new transcriptions until
- # they clear the board again.
- display_paused = False
+ audio_paused = False
osc_client = osc_ctrl.getClient()
@@ -121,6 +119,10 @@ def recordAudio(audio_state):
while audio_state.run_app:
data = audio_state.stream.read(audio_state.CHUNK)
+ if audio_state.audio_paused:
+ time.sleep(0.1)
+ continue
+
audio_state.lock.acquire()
audio_state.frames.append(data)
max_frames = int(audio_state.RATE * audio_state.MAX_LENGTH_S / audio_state.CHUNK)
@@ -199,7 +201,8 @@ def transcribe(audio_state, model, filename):
result = None
#for temp in (0.00, 0.05, 0.10, 0.15, 0.20):
- for temp in (0.00, 0.05):
+ #for temp in (0.00, 0.05):
+ for temp in (0.00,):
print("temp: {}".format(temp))
options = whisper.DecodingOptions(language = audio_state.language,
beam_size = 5, temperature = temp)
@@ -256,11 +259,6 @@ def transcribeAudio(audio_state, model):
words = ''.join(c for c in text.lower() if (c.isalpha() or c == " ")).split()
- if len(words) > 0:
- if words[-1] == "over":
- words = words[0:-1]
- audio_state.display_paused = True
-
print("Transcription: {}".format(audio_state.text))
old_text = audio_state.text
@@ -280,14 +278,12 @@ def transcribeAudio(audio_state, model):
def sendAudio(audio_state):
while audio_state.run_app == True:
- if audio_state.display_paused:
- time.sleep(0.1)
- continue
-
audio_state.lock.acquire()
text = audio_state.committed_text + " " + audio_state.text
- osc_ctrl.sendMessageLazy(audio_state.osc_client, text, audio_state.tx_state)
+ is_paging = not osc_ctrl.sendMessageLazy(audio_state.osc_client, text,
+ audio_state.tx_state)
+ osc_ctrl.indicatePaging(audio_state.osc_client, is_paging)
audio_state.lock.release()
# Pace this out
@@ -295,19 +291,31 @@ def sendAudio(audio_state):
def readControllerInput(audio_state):
session = steamvr.SessionState()
+ RECORD_STATE = 0
+ PAUSE_STATE = 1
+ state = PAUSE_STATE
while audio_state.run_app == True:
time.sleep(0.05)
event = steamvr.pollButtonPress(session)
if event == steamvr.EVENT_RISING_EDGE:
- print("event get")
- audio_state.lock.acquire()
- resetAudioLocked(audio_state)
- resetDisplayLocked(audio_state)
- audio_state.drop_transcription = True
- audio_state.display_paused = False
- audio_state.lock.release()
+ if state == RECORD_STATE:
+ state = PAUSE_STATE
+ osc_ctrl.indicateSpeech(audio_state.osc_client, False)
+
+ audio_state.audio_paused = True
+ elif state == PAUSE_STATE:
+ state = RECORD_STATE
+ osc_ctrl.indicateSpeech(audio_state.osc_client, True)
+
+ audio_state.lock.acquire()
+ resetAudioLocked(audio_state)
+ resetDisplayLocked(audio_state)
+ audio_state.drop_transcription = True
+ audio_state.audio_paused = False
+ audio_state.lock.release()
+
def transcribeLoop(mic: str, language: str):
audio_state = getMicStream(mic)
@@ -337,14 +345,13 @@ def transcribeLoop(mic: str, language: str):
controller_input_thd.daemon = True
controller_input_thd.start()
- print("Press enter or say 'Clear' to start a new message. Say 'Over' to " +
- "pause the display (saying 'Clear' resets it again).")
+ print("Press enter to start a new message.")
for line in sys.stdin:
audio_state.lock.acquire()
resetAudioLocked(audio_state)
resetDisplayLocked(audio_state)
audio_state.drop_transcription = True
- audio_state.display_paused = False
+ audio_state.audio_paused = False
audio_state.lock.release()
if "exit" in line or "quit" in line:
break