Finish emotes

Emotes require 2 bytes per char. They're encoded into the region [0xE000, infinity). The texture is 4k, and uses 1k vertical pixels per emote segment, for a maximum of 32 segments. * Reduce volume of noise indicator by 90%. Quiet is probably better. Might want to add a volume slider idk. * Bugfix: emotes without a transparency channel now work * Address a couple Unity performance complaints about the shader
author: yum <yum.food.vr@gmail.com> 2023-02-02 18:00:18 -0800
committer: yum <yum.food.vr@gmail.com> 2023-02-13 14:36:25 -0800
commit: 1cb5bdfe8cba6fe4647448cd3cf0c63ecbd7dfc2 (patch)
tree: e338264fbf6f75911246ca61c934110e00f144c8 /Scripts
parent: 7c6894614dcc3ebc5d4c8839b64f4da761b5ccf0 (diff)
4 files changed, 60 insertions, 14 deletions
diff --git a/Scripts/emotes_v2.py b/Scripts/emotes_v2.py
index 195e116..165db7c 100644
--- a/Scripts/emotes_v2.py
+++ b/Scripts/emotes_v2.py
@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Tuple
 # The character range [0x0000, 0xDFFF] is reserved for text.
 # The range [0xE000, infinity) is left over for emotes.
 EMOTES_LETTER_OFFSET = 0xE000
+EMOTES_HEIGHT = 512
+EMOTES_TEX_SZ = 4096
 
 def superimpose_image(base_img: Image, overlay_img: Image, position: Tuple[int, int]) -> Image:
     base_img.paste(overlay_img, position, overlay_img)
@@ -28,7 +30,7 @@ def get_images_from_directory(directory_path: str) -> List[Tuple[Any, str]]:
     for filename in os.listdir(directory_path):
         file_path = os.path.join(directory_path, filename)
         if os.path.isfile(file_path) and file_path.endswith(".png"):
-            image = Image.open(file_path)
+            image = Image.open(file_path).convert("RGBA")
             name = os.path.basename(filename).split('.')[0]
             images.append((image, name))
     return images
@@ -69,6 +71,45 @@ class EmotesState:
         with open(pickle_path, 'rb') as f:
             self.bits = pickle.load(f)
 
+    # This is quite slow since we do a search and replace (O(n))
+    # for each keyword O(m) times each variant of said keyword (O(k)).
+    # Thus total complexity is O(m*n*k). All three of these numbers are
+    # typically small: m and k typically < 10, n typically < 200.
+    #
+    # Naively one might split the input into words, but this only works for
+    # English-like languages. Eastern Asian languages like Japanese don't
+    # really divide into words AFAIK so this wouldn't work for them.
+    #
+    # Unless the performance becomes a user-reported problem, stick with this
+    # inefficient but reliable method.
+    def encode_emotes(self, msg: str):
+        for keyword, bits in self.bits.items():
+            bits_str = ""
+            for bit in bits:
+                bits_str += chr(bit)
+            # ALL CAPS
+            tmp = keyword.upper()
+            msg = msg.replace(tmp, bits_str)
+            # lowercase
+            tmp = keyword.lower()
+            msg = msg.replace(tmp, bits_str)
+            # Capitalized
+            tmp = keyword.lower().capitalize()
+            msg = msg.replace(tmp, bits_str)
+            # dashes inserted
+            tmp = '-'.join(keyword.upper())
+            msg = msg.replace(tmp, bits_str)
+            # uppercase, spaces inserted
+            tmp = ' '.join(keyword.upper())
+            msg = msg.replace(tmp, bits_str)
+            # lowercase, spaces inserted
+            tmp = ' '.join(keyword.lower())
+            msg = msg.replace(tmp, bits_str)
+            # uppercase, commas and spaces inserted
+            tmp = ', '.join(keyword.upper())
+            msg = msg.replace(tmp, bits_str)
+        return msg
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("dir", type=str, help="directory to get images from")
@@ -82,21 +123,21 @@ if __name__ == "__main__":
     board_aspect_ratio = int(args.board_aspect_ratio)
     texture_aspect_ratio = int(args.texture_aspect_ratio)
 
-    base_img = Image.new("RGBA", (4096, 4096), (0, 0, 0, 0))
+    base_img = Image.new("RGBA", (EMOTES_TEX_SZ, EMOTES_TEX_SZ), (0, 0, 0, 0))
     images_and_filenames = get_images_from_directory(directory_path)
     i = 0
     bits = {}  # Dict[str, List[int]]
     for img, filename in images_and_filenames:
         print(f"Adding {filename}")
         img = resize_image_with_aspect_ratio(img, board_aspect_ratio)
-        img = resize_image_to_height(img, 1024)
-        img_fragments = split_resized_image(img, int(1024 / texture_aspect_ratio), 1024)
+        img = resize_image_to_height(img, EMOTES_HEIGHT)
+        img_fragments = split_resized_image(img, int(EMOTES_HEIGHT / texture_aspect_ratio), EMOTES_HEIGHT)
         img_bits = []  # List[int]
         for img_fragment in img_fragments:
             i = i + 1
             img_pos = i_to_pos(i, 
-                    1024 / texture_aspect_ratio, 1024,
-                    4096, 4096)
+                    EMOTES_HEIGHT / texture_aspect_ratio, EMOTES_HEIGHT,
+                    EMOTES_TEX_SZ, EMOTES_TEX_SZ)
             print(f"{img_pos}")
             superimpose_image(base_img, img_fragment, img_pos)
             img_bits.append(EMOTES_LETTER_OFFSET + i)
diff --git a/Scripts/generate_shader.py b/Scripts/generate_shader.py
index cf113ec..15dc9b9 100644
--- a/Scripts/generate_shader.py
+++ b/Scripts/generate_shader.py
@@ -48,11 +48,11 @@ def generateCgConstants(nbytes: int, nrows: int, ncols: int, prefix: str = "") -
 
 # This is the basic idea of what we're generating:
 #      // Get the value of the parameter for the cell we're in.
-#      int GetLetterParameter(float2 uv)
+#      uint GetLetterParameter(float2 uv)
 #      {
 #        float CHAR_COL = floor(uv.x * Cols);
 #        float CHAR_ROW = floor(uv.y * Rows);
-#        int res = 0;
+#        uint res = 0;
 #
 #          [forcecase] switch(CHAR_ROW) {
 #            case n:
@@ -64,8 +64,8 @@ def generateCgConstants(nbytes: int, nrows: int, ncols: int, prefix: str = "") -
 #              case 1:
 #              ...
 #
-#                res |= ((int) round(_Letter_Row00_Col00_Byte0)) << (0 * 8);
-#                res |= ((int) round(_Letter_Row00_Col00_Byte1)) << (1 * 8);
+#                res |= ((uint) round(_Letter_Row00_Col00_Byte0)) << (0 * 8);
+#                res |= ((uint) round(_Letter_Row00_Col00_Byte1)) << (1 * 8);
 #                continue;
 #              }
 #        }
@@ -84,7 +84,7 @@ def generateLetterAccessor(nbytes: int, nrows: int, ncols: int, prefix: str = ""
             lines.append(prefix + "      case {}:".format(col))
             for byte in range(0, nbytes):
                 param_name = generate_utils.getShaderParamByRowColByte(row, col, byte)
-                lines.append(prefix + "        res |= ((int) round({})) << ({} * 8);".format(param_name, byte))
+                lines.append(prefix + "        res |= ((uint) round({})) << ({} * 8);".format(param_name, byte))
             lines.append(prefix + "        return res;")
         lines.append(prefix + "      default:")
         lines.append(prefix + "        return 0;")
diff --git a/Scripts/osc_ctrl.py b/Scripts/osc_ctrl.py
index 750059f..3ff56ca 100644
--- a/Scripts/osc_ctrl.py
+++ b/Scripts/osc_ctrl.py
@@ -90,6 +90,8 @@ def updateRegion(client, region_idx, letter_encoded):
 # in FIFO order; e.g., the most recently spoken words are sent last.
 # Returns True if done paging, False otherwise.
 def pageMessage(osc_state: OscState, msg: str, estate: EmotesState) -> bool:
+    msg = estate.encode_emotes(msg)
+
     msg_slice, slice_idx = osc_state.pager.getNextSlice(msg)
     if slice_idx == -1:
         return True
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 3171336..d67edb6 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -368,7 +368,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
                     audio_state.audio_paused = True
 
                     if enable_local_beep == 1:
-                        playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav"))
+                        playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"))
                 elif state == PAUSE_STATE:
                     state = RECORD_STATE
                     if not use_builtin:
@@ -382,7 +382,7 @@ def readControllerInput(audio_state, enable_local_beep: bool,
                     audio_state.audio_paused = False
 
                     if enable_local_beep == 1:
-                        playsound(os.path.abspath("../Sounds/Noise_On_Quiet.wav"))
+                        playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"))
 
 # model should correspond to one of the Whisper models defined in
 # whisper/__init__.py. Examples: tiny, base, small, medium.
@@ -436,10 +436,13 @@ if __name__ == "__main__":
 
     print("args: {}".format(" ".join(sys.argv)))
 
-    # Set cwd to the directory holding the script
+    # Set cwd to TaSTT/
     abspath = os.path.abspath(__file__)
     dname = os.path.dirname(abspath)
+    dname = os.path.dirname(dname)
+    dname = os.path.dirname(dname)
     os.chdir(dname)
+    print(f"Set cwd to {os.getcwd()}")
 
     parser = argparse.ArgumentParser()
     parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index")
author	yum <yum.food.vr@gmail.com>	2023-02-02 18:00:18 -0800
committer	yum <yum.food.vr@gmail.com>	2023-02-13 14:36:25 -0800
commit	1cb5bdfe8cba6fe4647448cd3cf0c63ecbd7dfc2 (patch)
tree	e338264fbf6f75911246ca61c934110e00f144c8 /Scripts
parent	7c6894614dcc3ebc5d4c8839b64f4da761b5ccf0 (diff)