summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Scripts/emotes_v2.py53
-rw-r--r--Scripts/generate_shader.py10
-rw-r--r--Scripts/osc_ctrl.py2
-rw-r--r--Scripts/transcribe.py9
-rw-r--r--Shaders/TaSTT_lighting_template.cginc21
5 files changed, 66 insertions, 29 deletions
diff --git a/Scripts/emotes_v2.py b/Scripts/emotes_v2.py
index 195e116..165db7c 100644
--- a/Scripts/emotes_v2.py
+++ b/Scripts/emotes_v2.py
@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Tuple
# The character range [0x0000, 0xDFFF] is reserved for text.
# The range [0xE000, infinity) is left over for emotes.
EMOTES_LETTER_OFFSET = 0xE000
+EMOTES_HEIGHT = 512
+EMOTES_TEX_SZ = 4096
def superimpose_image(base_img: Image, overlay_img: Image, position: Tuple[int, int]) -> Image:
base_img.paste(overlay_img, position, overlay_img)
@@ -28,7 +30,7 @@ def get_images_from_directory(directory_path: str) -> List[Tuple[Any, str]]:
for filename in os.listdir(directory_path):
file_path = os.path.join(directory_path, filename)
if os.path.isfile(file_path) and file_path.endswith(".png"):
- image = Image.open(file_path)
+ image = Image.open(file_path).convert("RGBA")
name = os.path.basename(filename).split('.')[0]
images.append((image, name))
return images
@@ -69,6 +71,45 @@ class EmotesState:
with open(pickle_path, 'rb') as f:
self.bits = pickle.load(f)
+ # This is quite slow since we do a search and replace (O(n))
+ # for each keyword O(m) times each variant of said keyword (O(k)).
+ # Thus total complexity is O(m*n*k). All three of these numbers are
+ # typically small: m and k typically < 10, n typically < 200.
+ #
+ # Naively one might split the input into words, but this only works for
+ # English-like languages. Eastern Asian languages like Japanese don't
+ # really divide into words AFAIK so this wouldn't work for them.
+ #
+ # Unless the performance becomes a user-reported problem, stick with this
+ # inefficient but reliable method.
+ def encode_emotes(self, msg: str):
+ for keyword, bits in self.bits.items():
+ bits_str = ""
+ for bit in bits:
+ bits_str += chr(bit)
+ # ALL CAPS
+ tmp = keyword.upper()
+ msg = msg.replace(tmp, bits_str)
+ # lowercase
+ tmp = keyword.lower()
+ msg = msg.replace(tmp, bits_str)
+ # Capitalized
+ tmp = keyword.lower().capitalize()
+ msg = msg.replace(tmp, bits_str)
+ # dashes inserted
+ tmp = '-'.join(keyword.upper())
+ msg = msg.replace(tmp, bits_str)
+ # uppercase, spaces inserted
+ tmp = ' '.join(keyword.upper())
+ msg = msg.replace(tmp, bits_str)
+ # lowercase, spaces inserted
+ tmp = ' '.join(keyword.lower())
+ msg = msg.replace(tmp, bits_str)
+ # uppercase, commas and spaces inserted
+ tmp = ', '.join(keyword.upper())
+ msg = msg.replace(tmp, bits_str)
+ return msg
+
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("dir", type=str, help="directory to get images from")
@@ -82,21 +123,21 @@ if __name__ == "__main__":
board_aspect_ratio = int(args.board_aspect_ratio)
texture_aspect_ratio = int(args.texture_aspect_ratio)
- base_img = Image.new("RGBA", (4096, 4096), (0, 0, 0, 0))
+ base_img = Image.new("RGBA", (EMOTES_TEX_SZ, EMOTES_TEX_SZ), (0, 0, 0, 0))
images_and_filenames = get_images_from_directory(directory_path)
i = 0
bits = {} # Dict[str, List[int]]
for img, filename in images_and_filenames:
print(f"Adding {filename}")
img = resize_image_with_aspect_ratio(img, board_aspect_ratio)
- img = resize_image_to_height(img, 1024)
- img_fragments = split_resized_image(img, int(1024 / texture_aspect_ratio), 1024)
+ img = resize_image_to_height(img, EMOTES_HEIGHT)
+ img_fragments = split_resized_image(img, int(EMOTES_HEIGHT / texture_aspect_ratio), EMOTES_HEIGHT)
img_bits = [] # List[int]
for img_fragment in img_fragments:
i = i + 1
img_pos = i_to_pos(i,
- 1024 / texture_aspect_ratio, 1024,
- 4096, 4096)
+ EMOTES_HEIGHT / texture_aspect_ratio, EMOTES_HEIGHT,
+ EMOTES_TEX_SZ, EMOTES_TEX_SZ)
print(f"{img_pos}")
superimpose_image(base_img, img_fragment, img_pos)
img_bits.append(EMOTES_LETTER_OFFSET + i)
diff --git a/Scripts/generate_shader.py b/Scripts/generate_shader.py
index cf113ec..15dc9b9 100644
--- a/Scripts/generate_shader.py
+++ b/Scripts/generate_shader.py
@@ -48,11 +48,11 @@ def generateCgConstants(nbytes: int, nrows: int, ncols: int, prefix: str = "") -
# This is the basic idea of what we're generating:
# // Get the value of the parameter for the cell we're in.
-# int GetLetterParameter(float2 uv)
+# uint GetLetterParameter(float2 uv)
# {
# float CHAR_COL = floor(uv.x * Cols);
# float CHAR_ROW = floor(uv.y * Rows);
-# int res = 0;
+# uint res = 0;
#
# [forcecase] switch(CHAR_ROW) {
# case n:
@@ -64,8 +64,8 @@ def generateCgConstants(nbytes: int, nrows: int, ncols: int, prefix: str = "") -
# case 1:
# ...
#
-# res |= ((int) round(_Letter_Row00_Col00_Byte0)) << (0 * 8);
-# res |= ((int) round(_Letter_Row00_Col00_Byte1)) << (1 * 8);
+# res |= ((uint) round(_Letter_Row00_Col00_Byte0)) << (0 * 8);
+# res |= ((uint) round(_Letter_Row00_Col00_Byte1)) << (1 * 8);
# continue;
# }
# }
@@ -84,7 +84,7 @@ def generateLetterAccessor(nbytes: int, nrows: int, ncols: int, prefix: str = ""
lines.append(prefix + " case {}:".format(col))
for byte in range(0, nbytes):
param_name = generate_utils.getShaderParamByRowColByte(row, col, byte)
- lines.append(prefix + " res |= ((int) round({})) << ({} * 8);".format(param_name, byte))
+ lines.append(prefix + " res |= ((uint) round({})) << ({} * 8);".format(param_name, byte))
lines.append(prefix + " return res;")
lines.append(prefix + " default:")
lines.append(prefix + " return 0;")
diff --git a/Scripts/osc_ctrl.py b/Scripts/osc_ctrl.py
index 750059f..3ff56ca 100644
--- a/Scripts/osc_ctrl.py
+++ b/Scripts/osc_ctrl.py
@@ -90,6 +90,8 @@ def updateRegion(client, region_idx, letter_encoded):
# in FIFO order; e.g., the most recently spoken words are sent last.
# Returns True if done paging, False otherwise.
def pageMessage(osc_state: OscState, msg: str, estate: EmotesState) -> bool:
+ msg = estate.encode_emotes(msg)
+
msg_slice, slice_idx = osc_state.pager.getNextSlice(msg)
if slice_idx == -1:
return True
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 3171336..d67edb6 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -368,7 +368,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
audio_state.audio_paused = True
if enable_local_beep == 1:
- playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav"))
+ playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"))
elif state == PAUSE_STATE:
state = RECORD_STATE
if not use_builtin:
@@ -382,7 +382,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
audio_state.audio_paused = False
if enable_local_beep == 1:
- playsound(os.path.abspath("../Sounds/Noise_On_Quiet.wav"))
+ playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"))
# model should correspond to one of the Whisper models defined in
# whisper/__init__.py. Examples: tiny, base, small, medium.
@@ -436,10 +436,13 @@ if __name__ == "__main__":
print("args: {}".format(" ".join(sys.argv)))
- # Set cwd to the directory holding the script
+ # Set cwd to TaSTT/
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
+ dname = os.path.dirname(dname)
+ dname = os.path.dirname(dname)
os.chdir(dname)
+ print(f"Set cwd to {os.getcwd()}")
parser = argparse.ArgumentParser()
parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index")
diff --git a/Shaders/TaSTT_lighting_template.cginc b/Shaders/TaSTT_lighting_template.cginc
index 15b4e41..aa20751 100644
--- a/Shaders/TaSTT_lighting_template.cginc
+++ b/Shaders/TaSTT_lighting_template.cginc
@@ -273,7 +273,7 @@ float2 GetLetter(float2 uv, int nth_letter,
}
// Get the value of the parameter for the cell we're in.
-int GetLetterParameter(float2 uv)
+uint GetLetterParameter(float2 uv)
{
float CHAR_COL = floor(uv.x * NCOLS);
float CHAR_ROW = floor(uv.y * NROWS);
@@ -575,7 +575,7 @@ fixed4 frag(v2f i) : SV_Target
fixed4 text = fixed4(0, 0, 0, 0);
bool discard_text = false;
- int letter = GetLetterParameter(uv_with_margin);
+ uint letter = GetLetterParameter(uv_with_margin);
float texture_cols;
float texture_rows;
@@ -587,8 +587,8 @@ fixed4 frag(v2f i) : SV_Target
letter_uv = GetLetter(uv_with_margin, letter % 0x2000, texture_cols, texture_rows, NCOLS, NROWS, /*margin=*/0.02);
} else {
is_emote = true;
- texture_cols = 8.0;
- texture_rows = 4.0;
+ texture_cols = 16.0;
+ texture_rows = 8.0;
// This will need to be updated if we create multiple emote textures.
letter_uv = GetLetter(uv_with_margin, letter % 0x2000, texture_cols, texture_rows, NCOLS, NROWS, /*margin=*/0);
}
@@ -604,7 +604,7 @@ fixed4 frag(v2f i) : SV_Target
const float iddx = ddx(letter_uv.x);
const float iddy = ddy(letter_uv.y);
- if (Enable_Dithering) {
+ if (Enable_Dithering && !is_emote) {
// Add noise to UV.
// Here, iddx and iddy tell us how big the current UV cell is with respect to
// screen space (i.e. how many pixels wide it is).
@@ -635,17 +635,8 @@ fixed4 frag(v2f i) : SV_Target
//float2 cur_letter_uv = letter_uv + float2(aa_region_x, aa_region_y);
float2 cur_letter_uv = letter_uv;
-
- if (is_emote) {
- // Emotes are broken up into several pieces and packed tightly. Thus one
- // emote may wrap around the edge of the texture. Clamping near the edge
- // of the texture avoids a small line from appearing in the middle of
- // these textures.
- float epsilon = 0.002;
- cur_letter_uv.x = clamp(cur_letter_uv.x, epsilon, 1.0 - epsilon);
- }
- int which_texture = (int) floor(letter / (64 * 128));
+ int which_texture = (int) floor(letter / (uint) (64 * 128));
[forcecase] switch (which_texture)
{
case 0: