summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--GUI/GUI/GUI/Frame.cpp39
-rw-r--r--GUI/GUI/GUI/Frame.h1
-rw-r--r--GUI/GUI/GUI/PythonWrapper.cpp3
-rw-r--r--GUI/GUI/GUI/PythonWrapper.h20
-rw-r--r--Scripts/transcribe.py14
5 files changed, 61 insertions, 16 deletions
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index b708953..28c0e4f 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -34,6 +34,7 @@ namespace {
ID_PY_APP_ENABLE_LOCAL_BEEP,
ID_PY_APP_ROWS,
ID_PY_APP_COLS,
+ ID_PY_APP_WINDOW_DURATION,
ID_UNITY_PANEL,
ID_UNITY_CONFIG_PANEL,
ID_UNITY_OUT,
@@ -342,6 +343,18 @@ Frame::Frame()
"The number of columns on the text box.");
py_app_cols_ = py_app_cols;
+ auto* py_app_window_duration = new wxTextCtrl(py_app_config_panel_pairs,
+ ID_PY_APP_WINDOW_DURATION, /*value=*/"15",
+ wxDefaultPosition, wxDefaultSize, /*style=*/0);
+ py_app_window_duration->SetToolTip(
+ "This controls how long the slice of audio that "
+ "we feed the transcription algorithm is, in seconds. "
+ "Shorter values (as low as 10 seconds) can be transcribed "
+ "more quickly, but are less accurate. Longer values "
+ "(as high as 28 seconds) take longer to transcribe, "
+ "but are far more accurate.");
+ py_app_window_duration_ = py_app_window_duration;
+
auto* sizer = new wxFlexGridSizer(/*cols=*/2);
py_app_config_panel_pairs->SetSizer(sizer);
@@ -365,6 +378,9 @@ Frame::Frame()
sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Text box columns:"));
sizer->Add(py_app_cols, /*proportion=*/0, /*flags=*/wxEXPAND);
+
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Window duration (s):"));
+ sizer->Add(py_app_window_duration, /*proportion=*/0, /*flags=*/wxEXPAND);
}
auto* py_app_enable_local_beep = new wxCheckBox(py_config_panel,
@@ -873,17 +889,33 @@ void Frame::OnAppStart(wxCommandEvent& event) {
const bool enable_local_beep = py_app_enable_local_beep_->GetValue();
std::string rows_str = py_app_rows_->GetValue().ToStdString();
std::string cols_str = py_app_cols_->GetValue().ToStdString();
- int rows, cols;
+ std::string window_duration_str = py_app_window_duration_->GetValue().ToStdString();
+ int rows, cols, window_duration;
try {
rows = std::stoi(rows_str);
cols = std::stoi(cols_str);
+ window_duration = std::stoi(window_duration_str);
}
catch (const std::invalid_argument& e) {
- Log(transcribe_out_, "Could not parse rows \"{}\" or cols \"{}\" as an integer\n", rows_str, cols_str);
+ Log(transcribe_out_, "Could not parse rows \"{}\", cols \"{}\", or window duration \"{}\" as an integer\n", rows_str, cols_str);
return;
}
catch (const std::out_of_range& e) {
- Log(transcribe_out_, "Rows \"{}\" or cols \"{}\" are out of range\n", rows_str, cols_str);
+ Log(transcribe_out_, "Rows \"{}\", cols \"{}\", or window duration \"{}\" are out of range\n", rows_str, cols_str, window_duration);
+ return;
+ }
+ const int max_rows = 10;
+ const int max_cols = 240;
+ const int min_window_duration_s = 10;
+ const int max_window_duration_s = 28;
+ if (rows < 0 || rows > max_rows ||
+ cols < 0 || cols > max_cols ||
+ window_duration < min_window_duration_s || window_duration > max_window_duration_s) {
+ Log(transcribe_out_, "Rows not on [{},{}] or cols not on [{},{}] or "
+ "window_duration not on [{},{}]\n",
+ 0, max_rows,
+ 0, max_cols,
+ min_window_duration_s, max_window_duration_s);
return;
}
@@ -895,6 +927,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
kBytesPerChar[bytes_per_char_idx].ToStdString(),
rows,
cols,
+ window_duration,
enable_local_beep);
if (!p) {
Log(transcribe_out_, "Failed to launch transcription engine\n");
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 3ce6cb9..2a2760a 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -31,6 +31,7 @@ private:
wxTextCtrl* py_app_rows_;
wxTextCtrl* py_app_cols_;
+ wxTextCtrl* py_app_window_duration_;
wxTextCtrl* unity_rows_;
wxTextCtrl* unity_cols_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index 4162a4e..da63a4a 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -125,7 +125,7 @@ wxProcess* PythonWrapper::StartApp(
std::function<void(wxProcess* proc, int ret)>&& exit_callback,
const std::string& mic, const std::string& lang, const std::string& model,
const std::string& chars_per_sync, const std::string& bytes_per_char,
- int rows, int cols, bool enable_local_beep) {
+ int rows, int cols, int window_duration_s, bool enable_local_beep) {
return InvokeAsyncWithArgs({
"-u",
"Resources/Scripts/transcribe.py",
@@ -137,6 +137,7 @@ wxProcess* PythonWrapper::StartApp(
"--enable_local_beep", enable_local_beep ? "1" : "0",
"--rows", std::to_string(rows),
"--cols", std::to_string(cols),
+ "--window_duration_s", std::to_string(window_duration_s),
},
std::move(exit_callback));
}
diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h
index 95195b9..25855a4 100644
--- a/GUI/GUI/GUI/PythonWrapper.h
+++ b/GUI/GUI/GUI/PythonWrapper.h
@@ -44,20 +44,20 @@ namespace PythonWrapper
std::function<void(wxProcess* proc, int ret)>&& exit_callback,
const std::string& mic, const std::string& lang, const std::string& model,
const std::string& chars_per_sync, const std::string& bytes_per_char,
- int rows, int cols, bool enable_local_beep
+ int rows, int cols, int window_duration_s, bool enable_local_beep
);
bool GenerateAnimator(
const std::string& unity_assets_path,
- const std::string& unity_animator_path,
- const std::string& unity_parameters_path,
- const std::string& unity_menu_path,
- const std::string& unity_animator_generated_dir,
- const std::string& unity_animator_generated_name,
- const std::string& unity_parameters_generated_name,
- const std::string& unity_menu_generated_name,
- const std::string& chars_per_sync,
- const std::string& bytes_per_char,
+ const std::string& unity_animator_path,
+ const std::string& unity_parameters_path,
+ const std::string& unity_menu_path,
+ const std::string& unity_animator_generated_dir,
+ const std::string& unity_animator_generated_name,
+ const std::string& unity_parameters_generated_name,
+ const std::string& unity_menu_generated_name,
+ const std::string& chars_per_sync,
+ const std::string& bytes_per_char,
int rows,
int cols,
wxTextCtrl* out);
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 0530946..f90867a 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -25,6 +25,13 @@ import wave
# License: MIT.
import whisper
+class Config:
+ def __init__(self):
+ # The maximum length that recordAudio() will put into frames before it
+ # starts dropping from the start.
+ self.MAX_LENGTH_S = 10
+config = Config()
+
class AudioState:
def __init__(self):
self.CHUNK = 1024
@@ -35,7 +42,6 @@ class AudioState:
# The maximum length that recordAudio() will put into frames before it
# starts dropping from the start.
- self.MAX_LENGTH_S = 10
self.MAX_LENGTH_S_WHISPER = 30
# The minimum length that recordAudio() will wait for before saving audio.
self.MIN_LENGTH_S = 1
@@ -119,7 +125,7 @@ def onAudioFramesAvailable(
audio_state.frames.append(decimated)
- max_frames = int(input_rate * audio_state.MAX_LENGTH_S / audio_state.CHUNK)
+ max_frames = int(input_rate * config.MAX_LENGTH_S / audio_state.CHUNK)
if len(audio_state.frames) > max_frames:
audio_state.frames = audio_state.frames[-1 * max_frames :]
@@ -428,6 +434,7 @@ if __name__ == "__main__":
parser.add_argument("--enable_local_beep", type=int, help="Whether to play a local auditory indicator when transcription starts/stops.");
parser.add_argument("--rows", type=int, help="The number of rows on the board")
parser.add_argument("--cols", type=int, help="The number of columns on the board")
+ parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm");
args = parser.parse_args()
if not args.mic:
@@ -447,6 +454,9 @@ if __name__ == "__main__":
print("--rows and --cols required", file=sys.stderr)
sys.exit(1)
+ if args.window_duration_s:
+ config.MAX_LENGTH_S = int(args.window_duration_s)
+
generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char)
generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync)
generate_utils.config.BOARD_ROWS = int(args.rows)