diff options
| author | yum <yum.food.vr@gmail.com> | 2023-02-02 01:02:03 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-02-13 14:36:20 -0800 |
| commit | 7c6894614dcc3ebc5d4c8839b64f4da761b5ccf0 (patch) | |
| tree | 6232b86b09190fd162aeb67229da359971b2e517 /Scripts | |
| parent | 2fc3b1b978b6e24814e9de7200865b912108bd34 (diff) | |
Begin work adding emotes
Done:
* Users can add images to Fonts/Emotes/
* The basename of that image ('clueless.png' becomes 'clueless') is the
keyword to make the image show up in game.
* Fix a bug in the shader where letters on the 2nd texture and later
would have UV outside of [0.0, 1.0]
Not yet implemented:
* transcribed words are encoded using emotes mapping
Diffstat (limited to 'Scripts')
| -rw-r--r-- | Scripts/emotes.py | 16 | ||||
| -rw-r--r-- | Scripts/emotes_v2.py | 109 | ||||
| -rw-r--r-- | Scripts/generate_shader.py | 6 | ||||
| -rw-r--r-- | Scripts/osc_ctrl.py | 10 | ||||
| -rw-r--r-- | Scripts/transcribe.py | 33 |
5 files changed, 150 insertions, 24 deletions
diff --git a/Scripts/emotes.py b/Scripts/emotes.py index 0a4ed01..6ae0930 100644 --- a/Scripts/emotes.py +++ b/Scripts/emotes.py @@ -111,16 +111,28 @@ def addImageToTexture(tex: Image, img_path: str, x: int, y:int): def parseArgs(): parser = argparse.ArgumentParser() parser.add_argument("--texture_path", type=str, help="Path to save the generated texture.") + parser.add_argument("--rows", type=str, help="The number of rows on the board") + parser.add_argument("--cols", type=str, help="The number of columns on the board") args = parser.parse_args() - if not args.texture_path: - args.texture_path = "img_texture.png" + if not args.texture_path or not args.rows or not args.cols: + print("--texture_path, --rows, --cols required", file=sys.stderr) + sys.exit(1) return args if __name__ == "__main__": args = parseArgs() + rows = int(args.rows) + cols = int(args.cols) + # board is this much wider than tall + board_aspect_ratio = 2 + # each cell a square divided into `rows`x`cols` is this much wider than tall + cell_aspect_ratio = rows / cols + # each cell is this much wider than tall + board_cell_aspect_ratio = board_aspect_ratio * cell_aspect_ratio + tex = openTexture(args.texture_path) for i in range(0, len(IMG_TEX_DATA)): filename = IMG_TEX_DATA[i][0] diff --git a/Scripts/emotes_v2.py b/Scripts/emotes_v2.py new file mode 100644 index 0000000..195e116 --- /dev/null +++ b/Scripts/emotes_v2.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 + +import argparse +import os +import pickle +import sys + +from math import floor +from PIL import Image +from typing import Any, Dict, List, Tuple + +# The character range [0x0000, 0xDFFF] is reserved for text. +# The range [0xE000, infinity) is left over for emotes. +EMOTES_LETTER_OFFSET = 0xE000 + +def superimpose_image(base_img: Image, overlay_img: Image, position: Tuple[int, int]) -> Image: + base_img.paste(overlay_img, position, overlay_img) + return base_img + +def i_to_pos(i, sm_wd, sm_ht, big_wd, big_ht) -> Tuple[int, int]: + x = i * sm_wd % big_wd + row = floor((i * sm_wd) / big_wd) + y = row * sm_ht + return int(x), int(y) + +def get_images_from_directory(directory_path: str) -> List[Tuple[Any, str]]: + images = [] + for filename in os.listdir(directory_path): + file_path = os.path.join(directory_path, filename) + if os.path.isfile(file_path) and file_path.endswith(".png"): + image = Image.open(file_path) + name = os.path.basename(filename).split('.')[0] + images.append((image, name)) + return images + +def split_resized_image(img, wd: int, ht: int) -> List[Any]: + aspect_ratio = img.width / img.height + width = int(ht * aspect_ratio) + img = img.resize((width, ht)) + + split_images = [] + for i in range(0, img.width, wd): + split_image = img.crop((i, 0, i + wd, ht)) + split_images.append(split_image) + + return split_images + +print(i_to_pos(0, 5, 10, 10, 20)) +print(i_to_pos(1, 5, 10, 10, 20)) +print(i_to_pos(2, 5, 10, 10, 20)) +print(i_to_pos(3, 5, 10, 10, 20)) + +def resize_image_with_aspect_ratio(img: Image, aspect_ratio: float) -> Image: + original_width, original_height = img.size + new_width = int(original_height * aspect_ratio) + new_height = original_height + return img.resize((new_width, new_height)) + +def resize_image_to_height(img: Image, height: int) -> Image: + aspect_ratio = img.width / img.height + new_width = int(height * aspect_ratio) + return img.resize((new_width, height)) + +class EmotesState: + def __init__(self): + self.bits = {} + + def load(self, pickle_path): + with open(pickle_path, 'rb') as f: + self.bits = pickle.load(f) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("dir", type=str, help="directory to get images from") + parser.add_argument("board_aspect_ratio", help="aspect ratio of a cell in the board") + parser.add_argument("texture_aspect_ratio", help="aspect ratio of a cell in the texture") + parser.add_argument("tex_path", type=str, help="path to save the texture to") + parser.add_argument("pickle_path", type=str, help="path to save the texture index to") + args = parser.parse_args() + + directory_path = args.dir + board_aspect_ratio = int(args.board_aspect_ratio) + texture_aspect_ratio = int(args.texture_aspect_ratio) + + base_img = Image.new("RGBA", (4096, 4096), (0, 0, 0, 0)) + images_and_filenames = get_images_from_directory(directory_path) + i = 0 + bits = {} # Dict[str, List[int]] + for img, filename in images_and_filenames: + print(f"Adding {filename}") + img = resize_image_with_aspect_ratio(img, board_aspect_ratio) + img = resize_image_to_height(img, 1024) + img_fragments = split_resized_image(img, int(1024 / texture_aspect_ratio), 1024) + img_bits = [] # List[int] + for img_fragment in img_fragments: + i = i + 1 + img_pos = i_to_pos(i, + 1024 / texture_aspect_ratio, 1024, + 4096, 4096) + print(f"{img_pos}") + superimpose_image(base_img, img_fragment, img_pos) + img_bits.append(EMOTES_LETTER_OFFSET + i) + emote_name = os.path.basename(filename).split('.')[0] + print(f"{emote_name} -> {img_bits}") + bits[emote_name] = img_bits + base_img.save(args.tex_path) + with open(args.pickle_path, 'wb') as f: + pickle.dump(bits, f) + diff --git a/Scripts/generate_shader.py b/Scripts/generate_shader.py index 9ff0bc3..cf113ec 100644 --- a/Scripts/generate_shader.py +++ b/Scripts/generate_shader.py @@ -64,8 +64,8 @@ def generateCgConstants(nbytes: int, nrows: int, ncols: int, prefix: str = "") - # case 1: # ... # -# res |= ((int) _Letter_Row00_Col00_Byte0) << (0 * 8); -# res |= ((int) _Letter_Row00_Col00_Byte1) << (1 * 8); +# res |= ((int) round(_Letter_Row00_Col00_Byte0)) << (0 * 8); +# res |= ((int) round(_Letter_Row00_Col00_Byte1)) << (1 * 8); # continue; # } # } @@ -84,7 +84,7 @@ def generateLetterAccessor(nbytes: int, nrows: int, ncols: int, prefix: str = "" lines.append(prefix + " case {}:".format(col)) for byte in range(0, nbytes): param_name = generate_utils.getShaderParamByRowColByte(row, col, byte) - lines.append(prefix + " res |= ((int) {}) << ({} * 8);".format(param_name, byte)) + lines.append(prefix + " res |= ((int) round({})) << ({} * 8);".format(param_name, byte)) lines.append(prefix + " return res;") lines.append(prefix + " default:") lines.append(prefix + " return 0;") diff --git a/Scripts/osc_ctrl.py b/Scripts/osc_ctrl.py index 93b236b..750059f 100644 --- a/Scripts/osc_ctrl.py +++ b/Scripts/osc_ctrl.py @@ -1,11 +1,13 @@ #!/usr/bin/env python3 -import argparse +from emotes_v2 import EmotesState from generate_utils import config -import generate_utils +from math import ceil from paging import MultiLinePager from pythonosc import udp_client -from math import ceil + +import argparse +import generate_utils import time # Based on a couple experiments, this seems like about as fast as we can go @@ -87,7 +89,7 @@ def updateRegion(client, region_idx, letter_encoded): # Sends one slice of `msg` to the board then returns. Slices are sent # in FIFO order; e.g., the most recently spoken words are sent last. # Returns True if done paging, False otherwise. -def pageMessage(osc_state: OscState, msg: str) -> bool: +def pageMessage(osc_state: OscState, msg: str, estate: EmotesState) -> bool: msg_slice, slice_idx = osc_state.pager.getNextSlice(msg) if slice_idx == -1: return True diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index ee76a0a..3171336 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -1,28 +1,23 @@ #!/usr/bin/env python3 +from datetime import datetime +from emotes_v2 import EmotesState +from functools import partial +from playsound import playsound + import argparse import copy -from datetime import datetime import os import osc_ctrl -from functools import partial import generate_utils -# python3 -m pip install pyaudio -# License: MIT. import pyaudio import numpy as np -# python3 -m pip install playsound==1.2.2 -# License: MIT. -from playsound import playsound import steamvr import string_matcher import sys import threading import time import wave -# python3 -m pip install git+https://github.com/openai/whisper.git -# python3 -m pip install torch -f https://download.pytorch.org/whl/torch_stable.html -# License: MIT. import whisper class Config: @@ -303,14 +298,14 @@ def transcribeAudio(audio_state, model, use_cpu: bool): audio_state.transcribe_no_change_count = 0 audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s -def sendAudio(audio_state, use_builtin: bool): +def sendAudio(audio_state, use_builtin: bool, estate: EmotesState): while audio_state.run_app == True: text = audio_state.committed_text + " " + audio_state.text if use_builtin: ret = osc_ctrl.pageMessageBuiltin(audio_state.osc_state, text) time.sleep(1.5) else: - ret = osc_ctrl.pageMessage(audio_state.osc_state, text) + ret = osc_ctrl.pageMessage(audio_state.osc_state, text, estate) is_paging = (ret == False) osc_ctrl.indicatePaging(audio_state.osc_state.client, is_paging) @@ -393,7 +388,7 @@ def readControllerInput(audio_state, enable_local_beep: bool, # whisper/__init__.py. Examples: tiny, base, small, medium. def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool, use_cpu: bool, use_builtin: bool, - button: str): + button: str, estate: EmotesState): audio_state = getMicStream(mic) audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language] @@ -410,7 +405,7 @@ def transcribeLoop(mic: str, language: str, model: str, transcribe_audio_thd.daemon = True transcribe_audio_thd.start() - send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state, use_builtin]) + send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state, use_builtin, estate]) send_audio_thd.daemon = True send_audio_thd.start() @@ -459,6 +454,7 @@ if __name__ == "__main__": parser.add_argument("--cpu", type=int, help="If set to 1, use CPU instead of GPU") parser.add_argument("--use_builtin", type=int, help="If set to 1, use the text box built into the game.") parser.add_argument("--button", type=str, help="The controller button used to start/stop transcription. E.g. \"left joystick\"") + parser.add_argument("--emotes_pickle", type=str, help="The path to emotes pickle. See emotes_v2.py for details.") args = parser.parse_args() if not args.mic: @@ -482,6 +478,10 @@ if __name__ == "__main__": print("--button required", file=sys.stderr) sys.exit(1) + if not args.emotes_pickle: + print("--emotes_pickle required", file=sys.stderr) + sys.exit(1) + if args.window_duration_s: config.MAX_LENGTH_S = int(args.window_duration_s) @@ -495,11 +495,14 @@ if __name__ == "__main__": else: args.use_builtin = False + estate = EmotesState() + estate.load(args.emotes_pickle) + generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char) generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync) generate_utils.config.BOARD_ROWS = int(args.rows) generate_utils.config.BOARD_COLS = int(args.cols) transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep, - args.cpu, args.use_builtin, args.button) + args.cpu, args.use_builtin, args.button, estate) |
