summaryrefslogtreecommitdiffstats
path: root/Scripts
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-02-02 18:00:18 -0800
committeryum <yum.food.vr@gmail.com>2023-02-13 14:36:25 -0800
commit1cb5bdfe8cba6fe4647448cd3cf0c63ecbd7dfc2 (patch)
treee338264fbf6f75911246ca61c934110e00f144c8 /Scripts
parent7c6894614dcc3ebc5d4c8839b64f4da761b5ccf0 (diff)
Finish emotes
Emotes require 2 bytes per char. They're encoded into the region [0xE000, infinity). The texture is 4k, and uses 1k vertical pixels per emote segment, for a maximum of 32 segments. * Reduce volume of noise indicator by 90%. Quiet is probably better. Might want to add a volume slider idk. * Bugfix: emotes without a transparency channel now work * Address a couple Unity performance complaints about the shader
Diffstat (limited to 'Scripts')
-rw-r--r--Scripts/emotes_v2.py53
-rw-r--r--Scripts/generate_shader.py10
-rw-r--r--Scripts/osc_ctrl.py2
-rw-r--r--Scripts/transcribe.py9
4 files changed, 60 insertions, 14 deletions
diff --git a/Scripts/emotes_v2.py b/Scripts/emotes_v2.py
index 195e116..165db7c 100644
--- a/Scripts/emotes_v2.py
+++ b/Scripts/emotes_v2.py
@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Tuple
# The character range [0x0000, 0xDFFF] is reserved for text.
# The range [0xE000, infinity) is left over for emotes.
EMOTES_LETTER_OFFSET = 0xE000
+EMOTES_HEIGHT = 512
+EMOTES_TEX_SZ = 4096
def superimpose_image(base_img: Image, overlay_img: Image, position: Tuple[int, int]) -> Image:
base_img.paste(overlay_img, position, overlay_img)
@@ -28,7 +30,7 @@ def get_images_from_directory(directory_path: str) -> List[Tuple[Any, str]]:
for filename in os.listdir(directory_path):
file_path = os.path.join(directory_path, filename)
if os.path.isfile(file_path) and file_path.endswith(".png"):
- image = Image.open(file_path)
+ image = Image.open(file_path).convert("RGBA")
name = os.path.basename(filename).split('.')[0]
images.append((image, name))
return images
@@ -69,6 +71,45 @@ class EmotesState:
with open(pickle_path, 'rb') as f:
self.bits = pickle.load(f)
+ # This is quite slow since we do a search and replace (O(n))
+ # for each keyword O(m) times each variant of said keyword (O(k)).
+ # Thus total complexity is O(m*n*k). All three of these numbers are
+ # typically small: m and k typically < 10, n typically < 200.
+ #
+ # Naively one might split the input into words, but this only works for
+ # English-like languages. Eastern Asian languages like Japanese don't
+ # really divide into words AFAIK so this wouldn't work for them.
+ #
+ # Unless the performance becomes a user-reported problem, stick with this
+ # inefficient but reliable method.
+ def encode_emotes(self, msg: str):
+ for keyword, bits in self.bits.items():
+ bits_str = ""
+ for bit in bits:
+ bits_str += chr(bit)
+ # ALL CAPS
+ tmp = keyword.upper()
+ msg = msg.replace(tmp, bits_str)
+ # lowercase
+ tmp = keyword.lower()
+ msg = msg.replace(tmp, bits_str)
+ # Capitalized
+ tmp = keyword.lower().capitalize()
+ msg = msg.replace(tmp, bits_str)
+ # dashes inserted
+ tmp = '-'.join(keyword.upper())
+ msg = msg.replace(tmp, bits_str)
+ # uppercase, spaces inserted
+ tmp = ' '.join(keyword.upper())
+ msg = msg.replace(tmp, bits_str)
+ # lowercase, spaces inserted
+ tmp = ' '.join(keyword.lower())
+ msg = msg.replace(tmp, bits_str)
+ # uppercase, commas and spaces inserted
+ tmp = ', '.join(keyword.upper())
+ msg = msg.replace(tmp, bits_str)
+ return msg
+
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("dir", type=str, help="directory to get images from")
@@ -82,21 +123,21 @@ if __name__ == "__main__":
board_aspect_ratio = int(args.board_aspect_ratio)
texture_aspect_ratio = int(args.texture_aspect_ratio)
- base_img = Image.new("RGBA", (4096, 4096), (0, 0, 0, 0))
+ base_img = Image.new("RGBA", (EMOTES_TEX_SZ, EMOTES_TEX_SZ), (0, 0, 0, 0))
images_and_filenames = get_images_from_directory(directory_path)
i = 0
bits = {} # Dict[str, List[int]]
for img, filename in images_and_filenames:
print(f"Adding {filename}")
img = resize_image_with_aspect_ratio(img, board_aspect_ratio)
- img = resize_image_to_height(img, 1024)
- img_fragments = split_resized_image(img, int(1024 / texture_aspect_ratio), 1024)
+ img = resize_image_to_height(img, EMOTES_HEIGHT)
+ img_fragments = split_resized_image(img, int(EMOTES_HEIGHT / texture_aspect_ratio), EMOTES_HEIGHT)
img_bits = [] # List[int]
for img_fragment in img_fragments:
i = i + 1
img_pos = i_to_pos(i,
- 1024 / texture_aspect_ratio, 1024,
- 4096, 4096)
+ EMOTES_HEIGHT / texture_aspect_ratio, EMOTES_HEIGHT,
+ EMOTES_TEX_SZ, EMOTES_TEX_SZ)
print(f"{img_pos}")
superimpose_image(base_img, img_fragment, img_pos)
img_bits.append(EMOTES_LETTER_OFFSET + i)
diff --git a/Scripts/generate_shader.py b/Scripts/generate_shader.py
index cf113ec..15dc9b9 100644
--- a/Scripts/generate_shader.py
+++ b/Scripts/generate_shader.py
@@ -48,11 +48,11 @@ def generateCgConstants(nbytes: int, nrows: int, ncols: int, prefix: str = "") -
# This is the basic idea of what we're generating:
# // Get the value of the parameter for the cell we're in.
-# int GetLetterParameter(float2 uv)
+# uint GetLetterParameter(float2 uv)
# {
# float CHAR_COL = floor(uv.x * Cols);
# float CHAR_ROW = floor(uv.y * Rows);
-# int res = 0;
+# uint res = 0;
#
# [forcecase] switch(CHAR_ROW) {
# case n:
@@ -64,8 +64,8 @@ def generateCgConstants(nbytes: int, nrows: int, ncols: int, prefix: str = "") -
# case 1:
# ...
#
-# res |= ((int) round(_Letter_Row00_Col00_Byte0)) << (0 * 8);
-# res |= ((int) round(_Letter_Row00_Col00_Byte1)) << (1 * 8);
+# res |= ((uint) round(_Letter_Row00_Col00_Byte0)) << (0 * 8);
+# res |= ((uint) round(_Letter_Row00_Col00_Byte1)) << (1 * 8);
# continue;
# }
# }
@@ -84,7 +84,7 @@ def generateLetterAccessor(nbytes: int, nrows: int, ncols: int, prefix: str = ""
lines.append(prefix + " case {}:".format(col))
for byte in range(0, nbytes):
param_name = generate_utils.getShaderParamByRowColByte(row, col, byte)
- lines.append(prefix + " res |= ((int) round({})) << ({} * 8);".format(param_name, byte))
+ lines.append(prefix + " res |= ((uint) round({})) << ({} * 8);".format(param_name, byte))
lines.append(prefix + " return res;")
lines.append(prefix + " default:")
lines.append(prefix + " return 0;")
diff --git a/Scripts/osc_ctrl.py b/Scripts/osc_ctrl.py
index 750059f..3ff56ca 100644
--- a/Scripts/osc_ctrl.py
+++ b/Scripts/osc_ctrl.py
@@ -90,6 +90,8 @@ def updateRegion(client, region_idx, letter_encoded):
# in FIFO order; e.g., the most recently spoken words are sent last.
# Returns True if done paging, False otherwise.
def pageMessage(osc_state: OscState, msg: str, estate: EmotesState) -> bool:
+ msg = estate.encode_emotes(msg)
+
msg_slice, slice_idx = osc_state.pager.getNextSlice(msg)
if slice_idx == -1:
return True
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 3171336..d67edb6 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -368,7 +368,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
audio_state.audio_paused = True
if enable_local_beep == 1:
- playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav"))
+ playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"))
elif state == PAUSE_STATE:
state = RECORD_STATE
if not use_builtin:
@@ -382,7 +382,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
audio_state.audio_paused = False
if enable_local_beep == 1:
- playsound(os.path.abspath("../Sounds/Noise_On_Quiet.wav"))
+ playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"))
# model should correspond to one of the Whisper models defined in
# whisper/__init__.py. Examples: tiny, base, small, medium.
@@ -436,10 +436,13 @@ if __name__ == "__main__":
print("args: {}".format(" ".join(sys.argv)))
- # Set cwd to the directory holding the script
+ # Set cwd to TaSTT/
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
+ dname = os.path.dirname(dname)
+ dname = os.path.dirname(dname)
os.chdir(dname)
+ print(f"Set cwd to {os.getcwd()}")
parser = argparse.ArgumentParser()
parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index")