diff options
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 39 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.cpp | 3 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.h | 20 | ||||
| -rw-r--r-- | Scripts/transcribe.py | 14 |
5 files changed, 61 insertions, 16 deletions
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index b708953..28c0e4f 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -34,6 +34,7 @@ namespace { ID_PY_APP_ENABLE_LOCAL_BEEP,
ID_PY_APP_ROWS,
ID_PY_APP_COLS,
+ ID_PY_APP_WINDOW_DURATION,
ID_UNITY_PANEL,
ID_UNITY_CONFIG_PANEL,
ID_UNITY_OUT,
@@ -342,6 +343,18 @@ Frame::Frame() "The number of columns on the text box.");
py_app_cols_ = py_app_cols;
+ auto* py_app_window_duration = new wxTextCtrl(py_app_config_panel_pairs,
+ ID_PY_APP_WINDOW_DURATION, /*value=*/"15",
+ wxDefaultPosition, wxDefaultSize, /*style=*/0);
+ py_app_window_duration->SetToolTip(
+ "This controls how long the slice of audio that "
+ "we feed the transcription algorithm is, in seconds. "
+ "Shorter values (as low as 10 seconds) can be transcribed "
+ "more quickly, but are less accurate. Longer values "
+ "(as high as 28 seconds) take longer to transcribe, "
+ "but are far more accurate.");
+ py_app_window_duration_ = py_app_window_duration;
+
auto* sizer = new wxFlexGridSizer(/*cols=*/2);
py_app_config_panel_pairs->SetSizer(sizer);
@@ -365,6 +378,9 @@ Frame::Frame() sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Text box columns:"));
sizer->Add(py_app_cols, /*proportion=*/0, /*flags=*/wxEXPAND);
+
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Window duration (s):"));
+ sizer->Add(py_app_window_duration, /*proportion=*/0, /*flags=*/wxEXPAND);
}
auto* py_app_enable_local_beep = new wxCheckBox(py_config_panel,
@@ -873,17 +889,33 @@ void Frame::OnAppStart(wxCommandEvent& event) { const bool enable_local_beep = py_app_enable_local_beep_->GetValue();
std::string rows_str = py_app_rows_->GetValue().ToStdString();
std::string cols_str = py_app_cols_->GetValue().ToStdString();
- int rows, cols;
+ std::string window_duration_str = py_app_window_duration_->GetValue().ToStdString();
+ int rows, cols, window_duration;
try {
rows = std::stoi(rows_str);
cols = std::stoi(cols_str);
+ window_duration = std::stoi(window_duration_str);
}
catch (const std::invalid_argument& e) {
- Log(transcribe_out_, "Could not parse rows \"{}\" or cols \"{}\" as an integer\n", rows_str, cols_str);
+ Log(transcribe_out_, "Could not parse rows \"{}\", cols \"{}\", or window duration \"{}\" as an integer\n", rows_str, cols_str);
return;
}
catch (const std::out_of_range& e) {
- Log(transcribe_out_, "Rows \"{}\" or cols \"{}\" are out of range\n", rows_str, cols_str);
+ Log(transcribe_out_, "Rows \"{}\", cols \"{}\", or window duration \"{}\" are out of range\n", rows_str, cols_str, window_duration);
+ return;
+ }
+ const int max_rows = 10;
+ const int max_cols = 240;
+ const int min_window_duration_s = 10;
+ const int max_window_duration_s = 28;
+ if (rows < 0 || rows > max_rows ||
+ cols < 0 || cols > max_cols ||
+ window_duration < min_window_duration_s || window_duration > max_window_duration_s) {
+ Log(transcribe_out_, "Rows not on [{},{}] or cols not on [{},{}] or "
+ "window_duration not on [{},{}]\n",
+ 0, max_rows,
+ 0, max_cols,
+ min_window_duration_s, max_window_duration_s);
return;
}
@@ -895,6 +927,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { kBytesPerChar[bytes_per_char_idx].ToStdString(),
rows,
cols,
+ window_duration,
enable_local_beep);
if (!p) {
Log(transcribe_out_, "Failed to launch transcription engine\n");
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 3ce6cb9..2a2760a 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -31,6 +31,7 @@ private: wxTextCtrl* py_app_rows_;
wxTextCtrl* py_app_cols_;
+ wxTextCtrl* py_app_window_duration_;
wxTextCtrl* unity_rows_;
wxTextCtrl* unity_cols_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 4162a4e..da63a4a 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -125,7 +125,7 @@ wxProcess* PythonWrapper::StartApp( std::function<void(wxProcess* proc, int ret)>&& exit_callback, const std::string& mic, const std::string& lang, const std::string& model, const std::string& chars_per_sync, const std::string& bytes_per_char, - int rows, int cols, bool enable_local_beep) { + int rows, int cols, int window_duration_s, bool enable_local_beep) { return InvokeAsyncWithArgs({ "-u", "Resources/Scripts/transcribe.py", @@ -137,6 +137,7 @@ wxProcess* PythonWrapper::StartApp( "--enable_local_beep", enable_local_beep ? "1" : "0", "--rows", std::to_string(rows), "--cols", std::to_string(cols), + "--window_duration_s", std::to_string(window_duration_s), }, std::move(exit_callback)); } diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h index 95195b9..25855a4 100644 --- a/GUI/GUI/GUI/PythonWrapper.h +++ b/GUI/GUI/GUI/PythonWrapper.h @@ -44,20 +44,20 @@ namespace PythonWrapper std::function<void(wxProcess* proc, int ret)>&& exit_callback, const std::string& mic, const std::string& lang, const std::string& model, const std::string& chars_per_sync, const std::string& bytes_per_char, - int rows, int cols, bool enable_local_beep + int rows, int cols, int window_duration_s, bool enable_local_beep ); bool GenerateAnimator( const std::string& unity_assets_path, - const std::string& unity_animator_path, - const std::string& unity_parameters_path, - const std::string& unity_menu_path, - const std::string& unity_animator_generated_dir, - const std::string& unity_animator_generated_name, - const std::string& unity_parameters_generated_name, - const std::string& unity_menu_generated_name, - const std::string& chars_per_sync, - const std::string& bytes_per_char, + const std::string& unity_animator_path, + const std::string& unity_parameters_path, + const std::string& unity_menu_path, + const std::string& unity_animator_generated_dir, + const std::string& unity_animator_generated_name, + const std::string& unity_parameters_generated_name, + const std::string& unity_menu_generated_name, + const std::string& chars_per_sync, + const std::string& bytes_per_char, int rows, int cols, wxTextCtrl* out); diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 0530946..f90867a 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -25,6 +25,13 @@ import wave # License: MIT. import whisper +class Config: + def __init__(self): + # The maximum length that recordAudio() will put into frames before it + # starts dropping from the start. + self.MAX_LENGTH_S = 10 +config = Config() + class AudioState: def __init__(self): self.CHUNK = 1024 @@ -35,7 +42,6 @@ class AudioState: # The maximum length that recordAudio() will put into frames before it # starts dropping from the start. - self.MAX_LENGTH_S = 10 self.MAX_LENGTH_S_WHISPER = 30 # The minimum length that recordAudio() will wait for before saving audio. self.MIN_LENGTH_S = 1 @@ -119,7 +125,7 @@ def onAudioFramesAvailable( audio_state.frames.append(decimated) - max_frames = int(input_rate * audio_state.MAX_LENGTH_S / audio_state.CHUNK) + max_frames = int(input_rate * config.MAX_LENGTH_S / audio_state.CHUNK) if len(audio_state.frames) > max_frames: audio_state.frames = audio_state.frames[-1 * max_frames :] @@ -428,6 +434,7 @@ if __name__ == "__main__": parser.add_argument("--enable_local_beep", type=int, help="Whether to play a local auditory indicator when transcription starts/stops."); parser.add_argument("--rows", type=int, help="The number of rows on the board") parser.add_argument("--cols", type=int, help="The number of columns on the board") + parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm"); args = parser.parse_args() if not args.mic: @@ -447,6 +454,9 @@ if __name__ == "__main__": print("--rows and --cols required", file=sys.stderr) sys.exit(1) + if args.window_duration_s: + config.MAX_LENGTH_S = int(args.window_duration_s) + generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char) generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync) generate_utils.config.BOARD_ROWS = int(args.rows) |
