diff options
| author | yum <yum.food.vr@gmail.com> | 2022-12-22 23:10:37 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2022-12-24 12:13:07 -0800 |
| commit | 50d327b83b496085ec91e31100d12f5f60c7d4ac (patch) | |
| tree | 239431b51e578f2188e6cf4c70ca8905390e72be | |
| parent | 6f2c1dace46a68620bc61a732a2f43252bd5d3ba (diff) | |
GUI: expose chars per sync, bytes per char
Users can now control how many characters they send per sync event, as
well as the number of bytes used to represent each character.
This gives them the power to pick between faster paging and fewer sync
params.
International users must use 2 bytes per char (at least for now).
* package.ps1: don't distribute the gigantic TTF files, just the bitmaps
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 140 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.h | 5 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.cpp | 25 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.h | 6 | ||||
| -rw-r--r-- | GUI/package.ps1 | 3 | ||||
| -rw-r--r-- | Scripts/generate_params.py | 13 | ||||
| -rw-r--r-- | Scripts/generate_utils.py | 38 | ||||
| -rw-r--r-- | Scripts/libtastt.py | 42 | ||||
| -rw-r--r-- | Scripts/osc_ctrl.py | 40 | ||||
| -rw-r--r-- | Scripts/transcribe.py | 9 |
10 files changed, 237 insertions, 84 deletions
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 2ecc255..92d02ed 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -16,7 +16,7 @@ namespace { ID_NAVBAR_BUTTON_UNITY,
ID_PY_PANEL,
ID_PY_CONFIG_PANEL,
- ID_PY_CONFIG_DROPDOWN_PANEL,
+ ID_PY_APP_CONFIG_PANEL_PAIRS,
ID_PY_SETUP_BUTTON,
ID_PY_DUMP_MICS_BUTTON,
ID_PY_APP_DRAIN,
@@ -28,6 +28,8 @@ namespace { ID_PY_APP_LANG,
ID_PY_APP_LANG_PANEL,
ID_PY_APP_MODEL,
+ ID_PY_APP_CHARS_PER_SYNC,
+ ID_PY_APP_BYTES_PER_CHAR,
ID_PY_APP_MODEL_PANEL,
ID_UNITY_PANEL,
ID_UNITY_CONFIG_PANEL,
@@ -42,6 +44,8 @@ namespace { ID_UNITY_PARAMETERS_GENERATED_NAME,
ID_UNITY_MENU_GENERATED_NAME,
ID_UNITY_BUTTON_GEN_ANIMATOR,
+ ID_UNITY_chars_per_sync,
+ ID_UNITY_BYTES_PER_CHAR,
};
const wxString kMicChoices[] = {
@@ -180,6 +184,40 @@ namespace { };
const size_t kNumModelChoices = sizeof(kModelChoices) / sizeof(kModelChoices[0]);
constexpr int kModelDefault = 2; // base.en
+
+ const wxString kCharsPerSync[] = {
+ "5",
+ "6",
+ "7",
+ "8",
+ "9",
+ "10",
+ "11",
+ "12",
+ "13",
+ "14",
+ "15",
+ "16",
+ "17",
+ "18",
+ "19",
+ "20",
+ "21",
+ "22",
+ "23",
+ "24",
+ };
+ const size_t kNumCharsPerSync = sizeof(kCharsPerSync) / sizeof(kCharsPerSync[0]);
+ // By default, use the fastest option.
+ constexpr int kCharsDefault = kNumCharsPerSync - 1;
+
+ const wxString kBytesPerChar[] = {
+ "1",
+ "2",
+ };
+ const size_t kNumBytesPerChar = sizeof(kBytesPerChar) / sizeof(kBytesPerChar[0]);
+ // Sorry international users. Optimize for English speakers, by default.
+ constexpr int kBytesDefault = 0;
} // namespace
using ::Logging::Log;
@@ -222,34 +260,52 @@ Frame::Frame() auto* py_setup_button = new wxButton(py_config_panel, ID_PY_SETUP_BUTTON, "Set up Python virtual environment");
auto* py_dump_mics_button = new wxButton(py_config_panel, ID_PY_DUMP_MICS_BUTTON, "List input devices");
- auto* py_config_dropdown_panel = new wxPanel(py_config_panel, ID_PY_CONFIG_DROPDOWN_PANEL);
+ auto* py_app_config_panel_pairs = new wxPanel(py_config_panel, ID_PY_APP_CONFIG_PANEL_PAIRS);
{
- auto* py_app_mic = new wxChoice(py_config_dropdown_panel, ID_PY_APP_MIC, wxDefaultPosition,
+ auto* py_app_mic = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_MIC, wxDefaultPosition,
wxDefaultSize, kNumMicChoices, kMicChoices);
py_app_mic->SetSelection(kMicDefault);
py_app_mic_ = py_app_mic;
- auto* py_app_lang = new wxChoice(py_config_dropdown_panel, ID_PY_APP_LANG, wxDefaultPosition,
+ auto* py_app_lang = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_LANG, wxDefaultPosition,
wxDefaultSize, kNumLangChoices, kLangChoices);
py_app_lang->SetSelection(kLangDefault);
py_app_lang_ = py_app_lang;
- auto* py_app_model = new wxChoice(py_config_dropdown_panel, ID_PY_APP_MODEL, wxDefaultPosition,
+ auto* py_app_model = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_MODEL, wxDefaultPosition,
wxDefaultSize, kNumModelChoices, kModelChoices);
py_app_model->SetSelection(kModelDefault);
py_app_model_ = py_app_model;
+ auto* py_app_chars_per_sync = new wxChoice(py_app_config_panel_pairs,
+ ID_PY_APP_CHARS_PER_SYNC, wxDefaultPosition,
+ wxDefaultSize, kNumCharsPerSync, kCharsPerSync);
+ py_app_chars_per_sync->SetSelection(kCharsDefault);
+ py_app_chars_per_sync_ = py_app_chars_per_sync;
+
+ auto* py_app_bytes_per_char = new wxChoice(py_app_config_panel_pairs,
+ ID_PY_APP_BYTES_PER_CHAR, wxDefaultPosition,
+ wxDefaultSize, kNumBytesPerChar, kBytesPerChar);
+ py_app_bytes_per_char->SetSelection(kBytesDefault);
+ py_app_bytes_per_char_ = py_app_bytes_per_char;
+
auto* sizer = new wxFlexGridSizer(/*cols=*/2);
- py_config_dropdown_panel->SetSizer(sizer);
+ py_app_config_panel_pairs->SetSizer(sizer);
- sizer->Add(new wxStaticText(py_config_dropdown_panel, wxID_ANY, /*label=*/"Microphone:"));
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Microphone:"));
sizer->Add(py_app_mic, /*proportion=*/0, /*flags=*/wxEXPAND);
- sizer->Add(new wxStaticText(py_config_dropdown_panel, wxID_ANY, /*label=*/"Language:"));
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Language:"));
sizer->Add(py_app_lang, /*proportion=*/0, /*flags=*/wxEXPAND);
- sizer->Add(new wxStaticText(py_config_dropdown_panel, wxID_ANY, /*label=*/"Model:"));
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Model:"));
sizer->Add(py_app_model, /*proportion=*/0, /*flags=*/wxEXPAND);
+
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Characters per sync:"));
+ sizer->Add(py_app_chars_per_sync, /*proportion=*/0, /*flags=*/wxEXPAND);
+
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Bytes per character:"));
+ sizer->Add(py_app_bytes_per_char, /*proportion=*/0, /*flags=*/wxEXPAND);
}
auto* py_app_start_button = new wxButton(py_config_panel, ID_PY_APP_START_BUTTON, "Begin transcribing");
@@ -259,7 +315,7 @@ Frame::Frame() py_config_panel->SetSizer(sizer);
sizer->Add(py_setup_button, /*proportion=*/0, /*flags=*/wxEXPAND);
sizer->Add(py_dump_mics_button, /*proportion=*/0, /*flags=*/wxEXPAND);
- sizer->Add(py_config_dropdown_panel, /*proportion=*/0, /*flags=*/wxEXPAND);
+ sizer->Add(py_app_config_panel_pairs, /*proportion=*/0, /*flags=*/wxEXPAND);
sizer->Add(py_app_start_button, /*proportion=*/0, /*flags=*/wxEXPAND);
sizer->Add(py_app_stop_button, /*proportion=*/0, /*flags=*/wxEXPAND);
}
@@ -348,6 +404,19 @@ Frame::Frame() unity_menu_generated_name->AppendText("TaSTT_Menu.asset");
unity_menu_generated_name_ = unity_menu_generated_name;
+ auto* unity_chars_per_sync = new wxChoice(unity_config_panel_pairs,
+ ID_UNITY_chars_per_sync, wxDefaultPosition,
+ wxDefaultSize, kNumCharsPerSync, kCharsPerSync);
+ unity_chars_per_sync->SetSelection(kCharsDefault);
+ unity_chars_per_sync_ = unity_chars_per_sync;
+
+ auto* unity_bytes_per_char = new wxChoice(unity_config_panel_pairs,
+ ID_UNITY_BYTES_PER_CHAR, wxDefaultPosition,
+ wxDefaultSize, kNumBytesPerChar, kBytesPerChar);
+ unity_bytes_per_char->SetSelection(kBytesDefault);
+ unity_bytes_per_char_ = unity_bytes_per_char;
+
+
auto* sizer = new wxFlexGridSizer(/*cols=*/2);
unity_config_panel_pairs->SetSizer(sizer);
@@ -374,6 +443,12 @@ Frame::Frame() sizer->Add(new wxStaticText(unity_config_panel_pairs, wxID_ANY, /*label=*/"Generated menu:"));
sizer->Add(unity_menu_generated_name);
+
+ sizer->Add(new wxStaticText(unity_config_panel_pairs, wxID_ANY, /*label=*/"Characters per sync:"));
+ sizer->Add(unity_chars_per_sync, /*proportion=*/0, /*flags=*/wxEXPAND);
+
+ sizer->Add(new wxStaticText(unity_config_panel_pairs, wxID_ANY, /*label=*/"Bytes per character:"));
+ sizer->Add(unity_bytes_per_char, /*proportion=*/0, /*flags=*/wxEXPAND);
}
auto* unity_button_gen_fx = new wxButton(unity_config_panel, ID_UNITY_BUTTON_GEN_ANIMATOR, "Generate avatar assets");
@@ -506,27 +581,38 @@ void Frame::OnGenerateFX(wxCommandEvent& event) #endif
std::filesystem::path unity_parameters_path = unity_parameters_file_picker_->GetPath().ToStdString();
#ifndef DEBUG
- if (!std::filesystem::exists(unity_parameters_path)) {
- std::ostringstream oss;
- oss << "Cannot generate FX layer: parameters do not exist at " << unity_parameters_path << std::endl;
- wxLogError(oss.str().c_str());
+ if (!std::filesystem::exists(unity_parameters_path)) {
+ std::ostringstream oss;
+ oss << "Cannot generate FX layer: parameters do not exist at " << unity_parameters_path << std::endl;
+ wxLogError(oss.str().c_str());
return;
- }
+ }
#endif
std::filesystem::path unity_menu_path = unity_menu_file_picker_->GetPath().ToStdString();
#ifndef DEBUG
- if (!std::filesystem::exists(unity_menu_path)) {
- std::ostringstream oss;
- oss << "Cannot generate FX layer: menu does not exist at " << unity_menu_path << std::endl;
- wxLogError(oss.str().c_str());
+ if (!std::filesystem::exists(unity_menu_path)) {
+ std::ostringstream oss;
+ oss << "Cannot generate FX layer: menu does not exist at " << unity_menu_path << std::endl;
+ wxLogError(oss.str().c_str());
return;
- }
+ }
#endif
std::string unity_animator_generated_dir = unity_animator_generated_dir_->GetLineText(0).ToStdString();
std::string unity_animator_generated_name = unity_animator_generated_name_->GetLineText(0).ToStdString();
std::string unity_parameters_generated_name = unity_parameters_generated_name_->GetLineText(0).ToStdString();
std::string unity_menu_generated_name = unity_menu_generated_name_->GetLineText(0).ToStdString();
+ int chars_per_sync_idx = unity_chars_per_sync_->GetSelection();
+ if (chars_per_sync_idx == wxNOT_FOUND) {
+ chars_per_sync_idx = kCharsDefault;
+ }
+ std::string chars_per_sync = kCharsPerSync[chars_per_sync_idx].ToStdString();
+ int bytes_per_char_idx = unity_bytes_per_char_->GetSelection();
+ if (bytes_per_char_idx == wxNOT_FOUND) {
+ bytes_per_char_idx = kBytesDefault;
+ }
+ std::string bytes_per_char = kBytesPerChar[bytes_per_char_idx].ToStdString();
+
std::string out;
if (!PythonWrapper::GenerateAnimator(
unity_assets_path.string(),
@@ -537,6 +623,8 @@ void Frame::OnGenerateFX(wxCommandEvent& event) unity_animator_generated_name,
unity_parameters_generated_name,
unity_menu_generated_name,
+ chars_per_sync,
+ bytes_per_char,
unity_out_)) {
wxLogError("Failed to generate animator:\n%s\n", out.c_str());
}
@@ -572,11 +660,21 @@ void Frame::OnAppStart(wxCommandEvent& event) { if (which_model == wxNOT_FOUND) {
which_model = kModelDefault;
}
+ int chars_per_sync_idx = unity_chars_per_sync_->GetSelection();
+ if (chars_per_sync_idx == wxNOT_FOUND) {
+ chars_per_sync_idx = kCharsDefault;
+ }
+ int bytes_per_char_idx = unity_bytes_per_char_->GetSelection();
+ if (bytes_per_char_idx == wxNOT_FOUND) {
+ bytes_per_char_idx = kBytesDefault;
+ }
wxProcess* p = PythonWrapper::StartApp(std::move(cb),
kMicChoices[which_mic].ToStdString(),
kLangChoices[which_lang].ToStdString(),
- kModelChoices[which_model].ToStdString());
+ kModelChoices[which_model].ToStdString(),
+ kCharsPerSync[chars_per_sync_idx].ToStdString(),
+ kBytesPerChar[bytes_per_char_idx].ToStdString());
if (!p) {
Log(transcribe_out_, "Failed to launch transcription engine\n");
return;
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 9b94036..aebdd93 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -37,6 +37,11 @@ private: wxChoice* py_app_mic_;
wxChoice* py_app_lang_;
wxChoice* py_app_model_;
+ // TODO(yum) figure out how to deduplicate these objects
+ wxChoice* py_app_chars_per_sync_;
+ wxChoice* py_app_bytes_per_char_;
+ wxChoice* unity_chars_per_sync_;
+ wxChoice* unity_bytes_per_char_;
wxProcess* py_app_;
wxTimer py_app_drain_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 31849f5..4dda098 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -123,13 +123,16 @@ bool PythonWrapper::InstallPip(std::string* out) { wxProcess* PythonWrapper::StartApp( std::function<void(wxProcess* proc, int ret)>&& exit_callback, - const std::string& mic, const std::string& lang, const std::string& model) { + const std::string& mic, const std::string& lang, const std::string& model, + const std::string& chars_per_sync, const std::string& bytes_per_char) { return InvokeAsyncWithArgs({ "-u", "Resources/Scripts/transcribe.py", "--mic", mic, "--lang", lang, "--model", model, + "--chars_per_sync", chars_per_sync, + "--bytes_per_char", bytes_per_char, }, std::move(exit_callback)); } @@ -143,6 +146,8 @@ bool PythonWrapper::GenerateAnimator( const std::string& unity_animator_generated_name, const std::string& unity_parameters_generated_name, const std::string& unity_menu_generated_name, + const std::string& chars_per_sync, + const std::string& bytes_per_char, wxTextCtrl* out) { // Python script locations std::string libunity_path = "Resources/Scripts/libunity.py"; @@ -180,12 +185,6 @@ bool PythonWrapper::GenerateAnimator( tastt_generated_dir_path / unity_animator_generated_name; { - /* - if (std::filesystem::exists(tastt_generated_dir_path)) { - Log(out, "Erasing {}\n", tastt_generated_dir_path.string()); - std::filesystem::remove_all(tastt_generated_dir_path); - } - */ Log(out, "Creating {}\n", tastt_generated_dir_path.string()); std::filesystem::create_directories(tastt_generated_dir_path); } @@ -273,7 +272,9 @@ bool PythonWrapper::GenerateAnimator( std::string py_stdout, py_stderr; if (InvokeWithArgs({ libtastt_path, "gen_anims", "--gen_anim_dir", tastt_animations_path.string(), - "--guid_map", guid_map_path.string() }, + "--guid_map", guid_map_path.string(), + "--chars_per_sync", chars_per_sync, + "--bytes_per_char", bytes_per_char }, &py_stdout, &py_stderr)) { Log(out, "success!\n"); Log(out, py_stdout.c_str()); @@ -297,7 +298,9 @@ bool PythonWrapper::GenerateAnimator( if (InvokeWithArgs({ libtastt_path, "gen_fx", "--fx_dest", tastt_fx0_path.string(), "--gen_anim_dir", tastt_animations_path.string(), - "--guid_map", guid_map_path.string() }, + "--guid_map", guid_map_path.string(), + "--chars_per_sync", chars_per_sync, + "--bytes_per_char", bytes_per_char }, &py_stdout, &py_stderr)) { Log(out, "success!\n"); Log(out, py_stdout.c_str()); @@ -394,7 +397,9 @@ bool PythonWrapper::GenerateAnimator( std::string py_stdout, py_stderr; if (InvokeWithArgs({ generate_params_path, "--old_params", unity_parameters_path, - "--new_params", tastt_params_path.string()}, + "--new_params", tastt_params_path.string(), + "--chars_per_sync", chars_per_sync, + "--bytes_per_char", bytes_per_char }, &py_stdout, &py_stderr)) { Log(out, "success!\n"); Log(out, py_stdout.c_str()); diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h index de5a2e4..5ce0113 100644 --- a/GUI/GUI/GUI/PythonWrapper.h +++ b/GUI/GUI/GUI/PythonWrapper.h @@ -38,7 +38,9 @@ namespace PythonWrapper wxProcess* StartApp( std::function<void(wxProcess* proc, int ret)>&& exit_callback, - const std::string& mic, const std::string& lang, const std::string& model); + const std::string& mic, const std::string& lang, const std::string& model, + const std::string& chars_per_sync, const std::string& bytes_per_char + ); bool GenerateAnimator( const std::string& unity_assets_path, @@ -49,6 +51,8 @@ namespace PythonWrapper const std::string& unity_animator_generated_name, const std::string& unity_parameters_generated_name, const std::string& unity_menu_generated_name, + const std::string& chars_per_sync, + const std::string& bytes_per_char, wxTextCtrl* out); }; diff --git a/GUI/package.ps1 b/GUI/package.ps1 index e4f8f3e..0941196 100644 --- a/GUI/package.ps1 +++ b/GUI/package.ps1 @@ -7,7 +7,8 @@ if (Test-Path $install_dir) { mkdir $install_dir > $null
mkdir $install_dir/Resources > $null
cp -Recurse ../Animations TaSTT/Resources/Animations
-cp -Recurse ../Fonts TaSTT/Resources/Fonts
+mkdir TaSTT/Resources/Fonts
+cp -Recurse ../Fonts/Bitmaps TaSTT/Resources/Fonts/Bitmaps
cp -Recurse ../Images TaSTT/Resources/Images
cp -Recurse ../Python TaSTT/Resources/Python
cp -Recurse ../Scripts TaSTT/Resources/Scripts
diff --git a/Scripts/generate_params.py b/Scripts/generate_params.py index 6c189a1..5deb17d 100644 --- a/Scripts/generate_params.py +++ b/Scripts/generate_params.py @@ -89,8 +89,8 @@ def generate(): params["PARAM_NAME"] = generate_utils.getSelectParam() result += generate_utils.replaceMacros(INT_PARAM, params) - for byte in range(0, generate_utils.BYTES_PER_CHAR): - for i in range(0, generate_utils.NUM_LAYERS): + for byte in range(0, generate_utils.config.BYTES_PER_CHAR): + for i in range(0, generate_utils.config.CHARS_PER_SYNC): params["PARAM_NAME"] = generate_utils.getBlendParam(i, byte) result += generate_utils.replaceMacros(FLOAT_PARAM, params) @@ -109,6 +109,8 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--old_params", type=str, help="The parameters to append to") parser.add_argument("--new_params", type=str, help="The parameters to create") + parser.add_argument("--bytes_per_char", type=str, help="The number of bytes to use to represent each character") + parser.add_argument("--chars_per_sync", type=str, help="The number of characters to send on each sync event") args = parser.parse_args() if not args.old_params or not args.new_params: @@ -117,5 +119,12 @@ if __name__ == "__main__": parser.print_help() parser.exit(1) + if not args.bytes_per_char or not args.chars_per_sync: + print("--bytes_per_char and --chars_per_sync required", file=sys.stderr) + parser.print_help() + parser.exit(1) + generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char) + generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync) + append(args.old_params, generate(), args.new_params) diff --git a/Scripts/generate_utils.py b/Scripts/generate_utils.py index e8fcc8b..6930782 100644 --- a/Scripts/generate_utils.py +++ b/Scripts/generate_utils.py @@ -6,16 +6,24 @@ def replaceMacros(lines, macro_defs): lines = lines.replace("%" + k + "%", v) return lines -# Note, (BOARD_ROWS * BOARD_COLS % NUM_LAYERS) must equal 0. If not, writing to -# the last cell will (with the current implementation) wrap around to the front -# of the board. -BOARD_ROWS=4 -BOARD_COLS=48 -NUM_REGIONS = 24 -CHARS_PER_CELL=256 -BYTES_PER_CHAR=2 - -NUM_LAYERS=ceil((BOARD_ROWS * BOARD_COLS) / NUM_REGIONS) +class Config(): + def __init__(self): + self.BOARD_ROWS=4 + self.BOARD_COLS=48 + self.CHARS_PER_CELL=256 + self.BYTES_PER_CHAR=2 + self.CHARS_PER_SYNC=10 + + def numRegions(self, which_layer): + num_cells = self.BOARD_ROWS * self.BOARD_COLS + layers_in_last_region = num_cells % self.CHARS_PER_SYNC + float_result = num_cells / self.CHARS_PER_SYNC + if which_layer > layers_in_last_region: + return floor(float_result) + else: + return ceil(float_result) + +config = Config() # Implementation detail. We use this parameter to return from the terminal # state of the FX layer to the starting state. @@ -94,7 +102,7 @@ def getBoardIndex(which_layer, select): # We work around this by simply wrapping those animations back to the top # of the board, and rely on the OSC controller to simply not reference # those cells. - return (select * NUM_LAYERS + which_layer) % (BOARD_ROWS * BOARD_COLS) + return (select * config.CHARS_PER_SYNC + which_layer) % (config.BOARD_ROWS * config.BOARD_COLS) def getShaderParamByRowColByte(row, col, byte): return "_Letter_Row%02d_Col%02d_Byte%01d" % (row, col, byte) @@ -103,8 +111,8 @@ def getShaderParamByRowColByte(row, col, byte): def getShaderParam(which_layer, select, byte): index = getBoardIndex(which_layer, select) - col = index % BOARD_COLS - row = floor(index / BOARD_COLS) + col = index % config.BOARD_COLS + row = floor(index / config.BOARD_COLS) return getShaderParamByRowCol(row, col, byte) @@ -120,8 +128,8 @@ def getClearAnimationName(): def getAnimationNameByLayerAndIndex(which_layer, select, letter, nth_byte): index = getBoardIndex(which_layer, select) - col = index % BOARD_COLS - row = floor(index / BOARD_COLS) + col = index % config.BOARD_COLS + row = floor(index / config.BOARD_COLS) return "R%02dC%02dL%02dB%01d" % (row, col, letter, nth_byte) diff --git a/Scripts/libtastt.py b/Scripts/libtastt.py index 9efd0e9..3168517 100644 --- a/Scripts/libtastt.py +++ b/Scripts/libtastt.py @@ -165,9 +165,9 @@ def generateClearAnimation(anim_dir, guid_map): letter = 0 - for byte in range(0, generate_utils.BYTES_PER_CHAR): - for row in range(0, generate_utils.BOARD_ROWS): - for col in range(0, generate_utils.BOARD_COLS): + for byte in range(0, generate_utils.config.BYTES_PER_CHAR): + for row in range(0, generate_utils.config.BOARD_ROWS): + for col in range(0, generate_utils.config.BOARD_COLS): curve = curve_template.copy() for keyframe in curve.mapping['curve'].mapping['m_Curve'].sequence: keyframe.mapping['value'] = str(letter + @@ -294,14 +294,14 @@ def generateAnimations(anim_dir, guid_map): anim_clip.mapping['m_EditorCurves'].sequence = [] # To support more languages, we use 2 bytes per character, giving us a 64K character set. - for byte in range(0, generate_utils.BYTES_PER_CHAR): - for row in range(0, generate_utils.BOARD_ROWS): + for byte in range(0, generate_utils.config.BYTES_PER_CHAR): + for row in range(0, generate_utils.config.BOARD_ROWS): print("Generating letter animations (row {}/{}) (byte {}/2)".format(row, - generate_utils.BOARD_ROWS, byte), file=sys.stderr) - for col in range(0, generate_utils.BOARD_COLS): + generate_utils.config.BOARD_ROWS, byte), file=sys.stderr) + for col in range(0, generate_utils.config.BOARD_COLS): for letter in range(0, 2): if letter == 1: - letter = generate_utils.CHARS_PER_CELL - 1 + letter = generate_utils.config.CHARS_PER_CELL - 1 # Make a deep copy of the templates node = anim_node.copy() @@ -348,9 +348,9 @@ def generateFXController(anim: libunity.UnityAnimator) -> typing.Dict[int, libun anim.addParameter(generate_utils.getScaleParam(), float) layers = {} - for byte in range(0, generate_utils.BYTES_PER_CHAR): + for byte in range(0, generate_utils.config.BYTES_PER_CHAR): layers[byte] = {} - for i in range(0, generate_utils.NUM_LAYERS): + for i in range(0, generate_utils.config.CHARS_PER_SYNC): anim.addParameter(generate_utils.getBlendParam(i, byte), float) layer = anim.addLayer(generate_utils.getLayerName(i, byte)) @@ -375,7 +375,7 @@ def generateFXLayer(which_layer: int, anim: libunity.UnityAnimator, layer: enable_param, True) select_states = {} - for i in range(0, generate_utils.NUM_REGIONS): + for i in range(0, generate_utils.config.numRegions(which_layer)): dx = i * 200 dy = 200 @@ -387,7 +387,7 @@ def generateFXLayer(which_layer: int, anim: libunity.UnityAnimator, layer: guid_lo = guid_map[anim_lo_path] anim_hi_path = os.path.join(gen_anim_dir, generate_utils.getAnimationNameByLayerAndIndex( - which_layer, i, generate_utils.CHARS_PER_CELL - 1, byte) + \ + which_layer, i, generate_utils.config.CHARS_PER_CELL - 1, byte) + \ ".anim") guid_hi = guid_map[anim_hi_path] @@ -490,7 +490,7 @@ def generateFX(guid_map, gen_anim_dir): layers = generateFXController(anim) # TODO(yum) parallelize - for byte in range(0, generate_utils.BYTES_PER_CHAR): + for byte in range(0, generate_utils.config.BYTES_PER_CHAR): for which_layer, layer in layers[byte].items(): print("Generating layer {}/{}".format(which_layer, len(layers[byte].items())), file=sys.stderr) generateFXLayer(which_layer, anim, layer, gen_anim_dir, byte) @@ -547,6 +547,8 @@ def parseArgs(): "which all generated animations are placed.") parser.add_argument("--guid_map", type=str, help="The path to a file which will store guids") parser.add_argument("--fx_dest", type=str, help="The path at which to save the generated FX controller") + parser.add_argument("--bytes_per_char", type=str, help="The number of bytes to use to represent each character") + parser.add_argument("--chars_per_sync", type=str, help="The number of characters to send on each sync event") args = parser.parse_args() if not args.gen_dir: @@ -569,6 +571,13 @@ if __name__ == "__main__": args = parseArgs() if args.cmd == "gen_anims": + if not args.bytes_per_char or not args.chars_per_sync: + print("--bytes_per_char and --chars_per_sync required", file=sys.stderr) + sys.exit(1) + + generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char) + generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync) + guid_map = {} with open(args.guid_map, 'rb') as f: guid_map = pickle.load(f) @@ -579,6 +588,13 @@ if __name__ == "__main__": with open(args.guid_map, 'wb') as f: pickle.dump(guid_map, f) elif args.cmd == "gen_fx": + if not args.bytes_per_char or not args.chars_per_sync: + print("--bytes_per_char and --chars_per_sync required", file=sys.stderr) + sys.exit(1) + + generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char) + generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync) + guid_map = {} with open(args.guid_map, 'rb') as f: guid_map = pickle.load(f) diff --git a/Scripts/osc_ctrl.py b/Scripts/osc_ctrl.py index a7dcc2b..21c6348 100644 --- a/Scripts/osc_ctrl.py +++ b/Scripts/osc_ctrl.py @@ -16,9 +16,7 @@ from generate_utils import getLayerParam from generate_utils import getSelectParam from generate_utils import getEnableParam from generate_utils import getBoardIndex -from generate_utils import NUM_LAYERS -from generate_utils import BOARD_ROWS -from generate_utils import BOARD_COLS +from generate_utils import config import emotes @@ -53,7 +51,7 @@ state.encoding = generateEncoding() # lines sent is a multiple of the number of rows in the board. def encodeMessage(lines): result = [] - lines_tmp = lines + [" "] * ((BOARD_ROWS - len(lines)) % BOARD_ROWS) + lines_tmp = lines + [" "] * ((config.BOARD_ROWS - len(lines)) % config.BOARD_ROWS) for line in lines_tmp: first_word = True for word in line.split(): @@ -82,11 +80,11 @@ def encodeMessage(lines): print("skip unrecognized char {}".format(char)) continue result.append(state.encoding[char]) - result += [state.encoding[' ']] * (BOARD_COLS - len(line)) + result += [state.encoding[' ']] * (config.BOARD_COLS - len(line)) return result def updateCell(client, cell_idx, letter_encoded): - for byte in range(0, generate_utils.BYTES_PER_CHAR): + for byte in range(0, generate_utils.config.BYTES_PER_CHAR): addr="/avatar/parameters/" + generate_utils.getBlendParam(cell_idx, byte) letter_remapped = (-127.5 + letter_encoded[byte]) / 127.5 client.send_message(addr, letter_remapped) @@ -102,14 +100,14 @@ def disable(client): # Send a cell all at once. # `which_cell` is an integer in the range [0,NUM_REGIONS) def sendMessageCellDiscrete(client, msg_cell, which_cell): - empty_cell = [state.encoding[' ']] * NUM_LAYERS + empty_cell = [state.encoding[' ']] * generate_utils.config.CHARS_PER_SYNC if msg_cell != empty_cell: addr="/avatar/parameters/" + generate_utils.getSpeechNoiseToggleParam() client.send_message(addr, True) # Really long messages just wrap back around. - which_cell = (which_cell % generate_utils.NUM_REGIONS) + which_cell = (which_cell % generate_utils.config.numRegions(0)) enable(client) @@ -129,7 +127,7 @@ def sendMessageCellDiscrete(client, msg_cell, which_cell): client.send_message(addr, False) # The board is broken down into contiguous collections of characters called -# cells. Each cell contains `NUM_LAYERS` characters. We can update one cell +# cells. Each cell contains `CHARS_PER_SYNC` characters. We can update one cell # every ~1.0 seconds. Going faster causes the board to display garbage to # remote players. def splitMessage(msg): @@ -151,13 +149,13 @@ def splitMessage(msg): print("word align: {}".format(word_align)) word = ' ' * word_align + word - while len(word) > BOARD_COLS: + while len(word) > config.BOARD_COLS: if len(line) != 0: lines.append(line) line = "" - word_prefix = word[0:BOARD_COLS-1] + "-" - word_suffix = word[BOARD_COLS-1:] + word_prefix = word[0:config.BOARD_COLS-1] + "-" + word_suffix = word[config.BOARD_COLS-1:] #print("append prefix {}".format(word_prefix)) lines.append(word_prefix) word = word_suffix @@ -166,7 +164,7 @@ def splitMessage(msg): line = word continue - if len(line) + len(" ") + len(word) <= BOARD_COLS: + if len(line) + len(" ") + len(word) <= config.BOARD_COLS: line += " " + word continue @@ -195,7 +193,7 @@ def resizeBoard(num_lines, tx_state, shrink_only): resize_param0 = None resize_param1 = None - if num_lines > BOARD_ROWS / 2: + if num_lines > config.BOARD_ROWS / 2: # Board must be expanded to full size. if shrink_only: return @@ -275,10 +273,10 @@ def sendMessageLazy(client, msg, tx_state): empty_cells_sent = 0 nonempty_cells_sent = 0 - n_cells = ceil(msg_encoded_len / NUM_LAYERS) + n_cells = floor(msg_encoded_len / config.CHARS_PER_SYNC) for cell in range(0, n_cells): - cell_begin = cell * NUM_LAYERS - cell_end = (cell + 1) * NUM_LAYERS + cell_begin = cell * config.CHARS_PER_SYNC + cell_end = (cell + 1) * config.CHARS_PER_SYNC cell_msg = msg_encoded[cell_begin:cell_end] last_cell_msg = [] @@ -289,7 +287,7 @@ def sendMessageLazy(client, msg, tx_state): if cell_msg == last_cell_msg: continue - if cell_msg == [state.encoding[' ']] * NUM_LAYERS: + if cell_msg == [state.encoding[' ']] * config.CHARS_PER_SYNC: if empty_cells_sent >= tx_state.empty_cells_to_send_per_call: return SEND_MSG_LAZY_SENT_EMPTY empty_cells_sent += 1 @@ -308,10 +306,10 @@ def sendMessageLazy(client, msg, tx_state): return SEND_MSG_LAZY_DONE def sendRawMessage(client, msg): - n_cells = ceil(len(msg) / NUM_LAYERS) + n_cells = ceil(len(msg) / config.CHARS_PER_SYNC) for cell in range(0, n_cells): - cell_begin = cell * NUM_LAYERS - cell_end = (cell + 1) * NUM_LAYERS + cell_begin = cell * config.CHARS_PER_SYNC + cell_end = (cell + 1) * config.CHARS_PER_SYNC cell_msg = msg[cell_begin:cell_end] #print("Send cell {}".format(cell)) sendMessageCellDiscrete(client, cell_msg, cell) diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index e883704..00ab82f 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -6,6 +6,7 @@ from datetime import datetime import os import osc_ctrl from functools import partial +import generate_utils # python3 -m pip install pyaudio # License: MIT. import pyaudio @@ -400,6 +401,8 @@ if __name__ == "__main__": parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index") parser.add_argument("--language", type=str, help="Which language to use. Ex: english, japanese, chinese, french, german.") parser.add_argument("--model", type=str, help="Which AI model to use. Ex: tiny, base, small, medium") + parser.add_argument("--bytes_per_char", type=str, help="The number of bytes to use to represent each character") + parser.add_argument("--chars_per_sync", type=str, help="The number of characters to send on each sync event") args = parser.parse_args() if not args.mic: @@ -411,5 +414,11 @@ if __name__ == "__main__": if not args.model: args.language = "base" + if not args.bytes_per_char or not args.chars_per_sync: + print("--bytes_per_char and --chars_per_sync required", file=sys.stderr) + sys.exit(1) + generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char) + generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync) + transcribeLoop(args.mic, args.language, args.model) |
