diff options
| author | yum <yum.food.vr@gmail.com> | 2023-02-02 18:00:18 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-02-13 14:36:25 -0800 |
| commit | 1cb5bdfe8cba6fe4647448cd3cf0c63ecbd7dfc2 (patch) | |
| tree | e338264fbf6f75911246ca61c934110e00f144c8 | |
| parent | 7c6894614dcc3ebc5d4c8839b64f4da761b5ccf0 (diff) | |
Finish emotes
Emotes require 2 bytes per char. They're encoded into the region
[0xE000, infinity). The texture is 4k, and uses 1k vertical pixels
per emote segment, for a maximum of 32 segments.
* Reduce volume of noise indicator by 90%. Quiet is probably better.
Might want to add a volume slider idk.
* Bugfix: emotes without a transparency channel now work
* Address a couple Unity performance complaints about the shader
| -rw-r--r-- | Scripts/emotes_v2.py | 53 | ||||
| -rw-r--r-- | Scripts/generate_shader.py | 10 | ||||
| -rw-r--r-- | Scripts/osc_ctrl.py | 2 | ||||
| -rw-r--r-- | Scripts/transcribe.py | 9 | ||||
| -rw-r--r-- | Shaders/TaSTT_lighting_template.cginc | 21 |
5 files changed, 66 insertions, 29 deletions
diff --git a/Scripts/emotes_v2.py b/Scripts/emotes_v2.py index 195e116..165db7c 100644 --- a/Scripts/emotes_v2.py +++ b/Scripts/emotes_v2.py @@ -12,6 +12,8 @@ from typing import Any, Dict, List, Tuple # The character range [0x0000, 0xDFFF] is reserved for text. # The range [0xE000, infinity) is left over for emotes. EMOTES_LETTER_OFFSET = 0xE000 +EMOTES_HEIGHT = 512 +EMOTES_TEX_SZ = 4096 def superimpose_image(base_img: Image, overlay_img: Image, position: Tuple[int, int]) -> Image: base_img.paste(overlay_img, position, overlay_img) @@ -28,7 +30,7 @@ def get_images_from_directory(directory_path: str) -> List[Tuple[Any, str]]: for filename in os.listdir(directory_path): file_path = os.path.join(directory_path, filename) if os.path.isfile(file_path) and file_path.endswith(".png"): - image = Image.open(file_path) + image = Image.open(file_path).convert("RGBA") name = os.path.basename(filename).split('.')[0] images.append((image, name)) return images @@ -69,6 +71,45 @@ class EmotesState: with open(pickle_path, 'rb') as f: self.bits = pickle.load(f) + # This is quite slow since we do a search and replace (O(n)) + # for each keyword O(m) times each variant of said keyword (O(k)). + # Thus total complexity is O(m*n*k). All three of these numbers are + # typically small: m and k typically < 10, n typically < 200. + # + # Naively one might split the input into words, but this only works for + # English-like languages. Eastern Asian languages like Japanese don't + # really divide into words AFAIK so this wouldn't work for them. + # + # Unless the performance becomes a user-reported problem, stick with this + # inefficient but reliable method. + def encode_emotes(self, msg: str): + for keyword, bits in self.bits.items(): + bits_str = "" + for bit in bits: + bits_str += chr(bit) + # ALL CAPS + tmp = keyword.upper() + msg = msg.replace(tmp, bits_str) + # lowercase + tmp = keyword.lower() + msg = msg.replace(tmp, bits_str) + # Capitalized + tmp = keyword.lower().capitalize() + msg = msg.replace(tmp, bits_str) + # dashes inserted + tmp = '-'.join(keyword.upper()) + msg = msg.replace(tmp, bits_str) + # uppercase, spaces inserted + tmp = ' '.join(keyword.upper()) + msg = msg.replace(tmp, bits_str) + # lowercase, spaces inserted + tmp = ' '.join(keyword.lower()) + msg = msg.replace(tmp, bits_str) + # uppercase, commas and spaces inserted + tmp = ', '.join(keyword.upper()) + msg = msg.replace(tmp, bits_str) + return msg + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("dir", type=str, help="directory to get images from") @@ -82,21 +123,21 @@ if __name__ == "__main__": board_aspect_ratio = int(args.board_aspect_ratio) texture_aspect_ratio = int(args.texture_aspect_ratio) - base_img = Image.new("RGBA", (4096, 4096), (0, 0, 0, 0)) + base_img = Image.new("RGBA", (EMOTES_TEX_SZ, EMOTES_TEX_SZ), (0, 0, 0, 0)) images_and_filenames = get_images_from_directory(directory_path) i = 0 bits = {} # Dict[str, List[int]] for img, filename in images_and_filenames: print(f"Adding {filename}") img = resize_image_with_aspect_ratio(img, board_aspect_ratio) - img = resize_image_to_height(img, 1024) - img_fragments = split_resized_image(img, int(1024 / texture_aspect_ratio), 1024) + img = resize_image_to_height(img, EMOTES_HEIGHT) + img_fragments = split_resized_image(img, int(EMOTES_HEIGHT / texture_aspect_ratio), EMOTES_HEIGHT) img_bits = [] # List[int] for img_fragment in img_fragments: i = i + 1 img_pos = i_to_pos(i, - 1024 / texture_aspect_ratio, 1024, - 4096, 4096) + EMOTES_HEIGHT / texture_aspect_ratio, EMOTES_HEIGHT, + EMOTES_TEX_SZ, EMOTES_TEX_SZ) print(f"{img_pos}") superimpose_image(base_img, img_fragment, img_pos) img_bits.append(EMOTES_LETTER_OFFSET + i) diff --git a/Scripts/generate_shader.py b/Scripts/generate_shader.py index cf113ec..15dc9b9 100644 --- a/Scripts/generate_shader.py +++ b/Scripts/generate_shader.py @@ -48,11 +48,11 @@ def generateCgConstants(nbytes: int, nrows: int, ncols: int, prefix: str = "") - # This is the basic idea of what we're generating: # // Get the value of the parameter for the cell we're in. -# int GetLetterParameter(float2 uv) +# uint GetLetterParameter(float2 uv) # { # float CHAR_COL = floor(uv.x * Cols); # float CHAR_ROW = floor(uv.y * Rows); -# int res = 0; +# uint res = 0; # # [forcecase] switch(CHAR_ROW) { # case n: @@ -64,8 +64,8 @@ def generateCgConstants(nbytes: int, nrows: int, ncols: int, prefix: str = "") - # case 1: # ... # -# res |= ((int) round(_Letter_Row00_Col00_Byte0)) << (0 * 8); -# res |= ((int) round(_Letter_Row00_Col00_Byte1)) << (1 * 8); +# res |= ((uint) round(_Letter_Row00_Col00_Byte0)) << (0 * 8); +# res |= ((uint) round(_Letter_Row00_Col00_Byte1)) << (1 * 8); # continue; # } # } @@ -84,7 +84,7 @@ def generateLetterAccessor(nbytes: int, nrows: int, ncols: int, prefix: str = "" lines.append(prefix + " case {}:".format(col)) for byte in range(0, nbytes): param_name = generate_utils.getShaderParamByRowColByte(row, col, byte) - lines.append(prefix + " res |= ((int) round({})) << ({} * 8);".format(param_name, byte)) + lines.append(prefix + " res |= ((uint) round({})) << ({} * 8);".format(param_name, byte)) lines.append(prefix + " return res;") lines.append(prefix + " default:") lines.append(prefix + " return 0;") diff --git a/Scripts/osc_ctrl.py b/Scripts/osc_ctrl.py index 750059f..3ff56ca 100644 --- a/Scripts/osc_ctrl.py +++ b/Scripts/osc_ctrl.py @@ -90,6 +90,8 @@ def updateRegion(client, region_idx, letter_encoded): # in FIFO order; e.g., the most recently spoken words are sent last. # Returns True if done paging, False otherwise. def pageMessage(osc_state: OscState, msg: str, estate: EmotesState) -> bool: + msg = estate.encode_emotes(msg) + msg_slice, slice_idx = osc_state.pager.getNextSlice(msg) if slice_idx == -1: return True diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 3171336..d67edb6 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -368,7 +368,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, audio_state.audio_paused = True if enable_local_beep == 1: - playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav")) + playsound(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav")) elif state == PAUSE_STATE: state = RECORD_STATE if not use_builtin: @@ -382,7 +382,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, audio_state.audio_paused = False if enable_local_beep == 1: - playsound(os.path.abspath("../Sounds/Noise_On_Quiet.wav")) + playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav")) # model should correspond to one of the Whisper models defined in # whisper/__init__.py. Examples: tiny, base, small, medium. @@ -436,10 +436,13 @@ if __name__ == "__main__": print("args: {}".format(" ".join(sys.argv))) - # Set cwd to the directory holding the script + # Set cwd to TaSTT/ abspath = os.path.abspath(__file__) dname = os.path.dirname(abspath) + dname = os.path.dirname(dname) + dname = os.path.dirname(dname) os.chdir(dname) + print(f"Set cwd to {os.getcwd()}") parser = argparse.ArgumentParser() parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index") diff --git a/Shaders/TaSTT_lighting_template.cginc b/Shaders/TaSTT_lighting_template.cginc index 15b4e41..aa20751 100644 --- a/Shaders/TaSTT_lighting_template.cginc +++ b/Shaders/TaSTT_lighting_template.cginc @@ -273,7 +273,7 @@ float2 GetLetter(float2 uv, int nth_letter, } // Get the value of the parameter for the cell we're in. -int GetLetterParameter(float2 uv) +uint GetLetterParameter(float2 uv) { float CHAR_COL = floor(uv.x * NCOLS); float CHAR_ROW = floor(uv.y * NROWS); @@ -575,7 +575,7 @@ fixed4 frag(v2f i) : SV_Target fixed4 text = fixed4(0, 0, 0, 0); bool discard_text = false; - int letter = GetLetterParameter(uv_with_margin); + uint letter = GetLetterParameter(uv_with_margin); float texture_cols; float texture_rows; @@ -587,8 +587,8 @@ fixed4 frag(v2f i) : SV_Target letter_uv = GetLetter(uv_with_margin, letter % 0x2000, texture_cols, texture_rows, NCOLS, NROWS, /*margin=*/0.02); } else { is_emote = true; - texture_cols = 8.0; - texture_rows = 4.0; + texture_cols = 16.0; + texture_rows = 8.0; // This will need to be updated if we create multiple emote textures. letter_uv = GetLetter(uv_with_margin, letter % 0x2000, texture_cols, texture_rows, NCOLS, NROWS, /*margin=*/0); } @@ -604,7 +604,7 @@ fixed4 frag(v2f i) : SV_Target const float iddx = ddx(letter_uv.x); const float iddy = ddy(letter_uv.y); - if (Enable_Dithering) { + if (Enable_Dithering && !is_emote) { // Add noise to UV. // Here, iddx and iddy tell us how big the current UV cell is with respect to // screen space (i.e. how many pixels wide it is). @@ -635,17 +635,8 @@ fixed4 frag(v2f i) : SV_Target //float2 cur_letter_uv = letter_uv + float2(aa_region_x, aa_region_y); float2 cur_letter_uv = letter_uv; - - if (is_emote) { - // Emotes are broken up into several pieces and packed tightly. Thus one - // emote may wrap around the edge of the texture. Clamping near the edge - // of the texture avoids a small line from appearing in the middle of - // these textures. - float epsilon = 0.002; - cur_letter_uv.x = clamp(cur_letter_uv.x, epsilon, 1.0 - epsilon); - } - int which_texture = (int) floor(letter / (64 * 128)); + int which_texture = (int) floor(letter / (uint) (64 * 128)); [forcecase] switch (which_texture) { case 0: |
