diff options
| -rw-r--r-- | GUI/GUI/GUI/Config.cpp | 3 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Config.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 31 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.cpp | 1 | ||||
| -rw-r--r-- | Scripts/steamvr.py | 27 | ||||
| -rw-r--r-- | Scripts/transcribe.py | 21 |
7 files changed, 70 insertions, 15 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index 4e6eb48..3994e62 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -73,6 +73,7 @@ TranscriptionAppConfig::TranscriptionAppConfig() model("base.en"),
chars_per_sync("20"),
bytes_per_char("1"),
+ button("left joystick"),
rows("4"),
cols("48"),
window_duration("15"),
@@ -90,6 +91,7 @@ bool TranscriptionAppConfig::Serialize(const std::filesystem::path& path) { root["model"] << ryml::to_substr(model);
root["chars_per_sync"] << ryml::to_substr(chars_per_sync);
root["bytes_per_char"] << ryml::to_substr(bytes_per_char);
+ root["button"] << ryml::to_substr(button);
root["rows"] << ryml::to_substr(rows);
root["cols"] << ryml::to_substr(cols);
root["window_duration"] << ryml::to_substr(window_duration);
@@ -119,6 +121,7 @@ bool TranscriptionAppConfig::Deserialize(const std::filesystem::path& path) { root.get_if("model", &c.model);
root.get_if("chars_per_sync", &c.chars_per_sync);
root.get_if("bytes_per_char", &c.bytes_per_char);
+ root.get_if("button", &c.button);
root.get_if("rows", &c.rows);
root.get_if("cols", &c.cols);
root.get_if("window_duration", &c.window_duration);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index fe7b862..511ba01 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -41,6 +41,7 @@ public: std::string model;
std::string chars_per_sync;
std::string bytes_per_char;
+ std::string button;
std::string rows;
std::string cols;
std::string window_duration;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 5c73bf6..e0663c5 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -32,6 +32,7 @@ namespace { ID_PY_APP_MODEL,
ID_PY_APP_CHARS_PER_SYNC,
ID_PY_APP_BYTES_PER_CHAR,
+ ID_PY_APP_BUTTON,
ID_PY_APP_MODEL_PANEL,
ID_PY_APP_ENABLE_LOCAL_BEEP,
ID_PY_APP_USE_CPU,
@@ -230,6 +231,17 @@ namespace { // Sorry international users. Optimize for English speakers, by default.
constexpr int kBytesDefault = 0;
+ const wxString kButton[] = {
+ "left joystick",
+ "left a",
+ "left b",
+ "right joystick",
+ "right a",
+ "right b",
+ };
+ const size_t kNumButtons = sizeof(kButton) / sizeof(kButton[0]);
+ constexpr int kButtonDefault = 0;
+
// Given the string value of a dropdown menu's entry, find its index. If no
// entry matches, return `default_index`.
int GetDropdownChoiceIndex(const wxString menu[],
@@ -360,6 +372,17 @@ Frame::Frame() "characters (i.e. not English), set this to 2.");
py_app_bytes_per_char_ = py_app_bytes_per_char;
+ auto* py_app_button = new wxChoice(py_app_config_panel_pairs,
+ ID_PY_APP_BUTTON, wxDefaultPosition,
+ wxDefaultSize, kNumButtons, kButton);
+ int button_idx = GetDropdownChoiceIndex(kButton, kNumButtons, py_c.button, kButtonDefault);
+ py_app_button->SetSelection(button_idx);
+ py_app_button->SetToolTip(
+ "You will use this button in game to start and stop "
+ "transcription. Set it to a button you're not using "
+ "for anything else!");
+ py_app_button_ = py_app_button;
+
auto* py_app_rows = new wxTextCtrl(py_app_config_panel_pairs,
ID_PY_APP_ROWS, py_c.rows,
wxDefaultPosition, wxDefaultSize, /*style=*/0);
@@ -404,6 +427,9 @@ Frame::Frame() sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Bytes per character:"));
sizer->Add(py_app_bytes_per_char, /*proportion=*/0, /*flags=*/wxEXPAND);
+ sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Button:"));
+ sizer->Add(py_app_button, /*proportion=*/0, /*flags=*/wxEXPAND);
+
sizer->Add(new wxStaticText(py_app_config_panel_pairs, wxID_ANY, /*label=*/"Text box rows:"));
sizer->Add(py_app_rows, /*proportion=*/0, /*flags=*/wxEXPAND);
@@ -946,6 +972,10 @@ void Frame::OnAppStart(wxCommandEvent& event) { if (bytes_per_char_idx == wxNOT_FOUND) {
bytes_per_char_idx = kBytesDefault;
}
+ int button_idx = py_app_button_->GetSelection();
+ if (button_idx == wxNOT_FOUND) {
+ button_idx = kBytesDefault;
+ }
const bool enable_local_beep = py_app_enable_local_beep_->GetValue();
const bool use_cpu = py_app_use_cpu_->GetValue();
const bool use_builtin = py_app_use_builtin_->GetValue();
@@ -987,6 +1017,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { py_c.model = kModelChoices[which_model].ToStdString();
py_c.chars_per_sync = kCharsPerSync[chars_per_sync_idx].ToStdString();
py_c.bytes_per_char = kBytesPerChar[bytes_per_char_idx].ToStdString();
+ py_c.button = kButton[button_idx].ToStdString();
py_c.rows = std::to_string(rows);
py_c.cols = std::to_string(cols);
py_c.window_duration = std::to_string(window_duration);
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index c55830e..28c8f09 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -46,6 +46,7 @@ private: // TODO(yum) figure out how to deduplicate these objects
wxChoice* py_app_chars_per_sync_;
wxChoice* py_app_bytes_per_char_;
+ wxChoice* py_app_button_;
wxChoice* unity_chars_per_sync_;
wxChoice* unity_bytes_per_char_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 7571833..cad9395 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -154,6 +154,7 @@ wxProcess* PythonWrapper::StartApp( "--model", config.model, "--chars_per_sync", config.chars_per_sync, "--bytes_per_char", config.bytes_per_char, + "--button", Quote(config.button), "--enable_local_beep", config.enable_local_beep ? "1" : "0", "--rows", config.rows, "--cols", config.cols, diff --git a/Scripts/steamvr.py b/Scripts/steamvr.py index ed4150c..c231184 100644 --- a/Scripts/steamvr.py +++ b/Scripts/steamvr.py @@ -9,6 +9,15 @@ EVENT_NONE = 0 EVENT_RISING_EDGE = 1 EVENT_FALLING_EDGE = 2 +hands = {} +hands["left"] = vr.TrackedControllerRole_LeftHand +hands["right"] = vr.TrackedControllerRole_RightHand + +buttons = {} +buttons["a"] = vr.k_EButton_IndexController_A +buttons["b"] = vr.k_EButton_IndexController_B +buttons["joystick"] = vr.k_EButton_Axis0 + class SessionState: def __init__(self): self.system = vr.init(vr.VRApplication_Background) @@ -24,11 +33,11 @@ class SessionState: # 2 - button falling edge def pollButtonPress( session_state: SessionState, - controller: vr.ETrackedControllerRole = vr.TrackedControllerRole_LeftHand, - button: vr.EVRButtonId = vr.k_EButton_Axis0 + hand_id: vr.ETrackedControllerRole = hands["left"], + button_id: vr.EVRButtonId = buttons["joystick"], ) -> int: - lh_idx = session_state.system.getTrackedDeviceIndexForControllerRole(vr.TrackedControllerRole_LeftHand) - #print("Left hand device idx: {}".format(lh_idx)) + lh_idx = session_state.system.getTrackedDeviceIndexForControllerRole(hand_id) + #print("left hand device idx: {}".format(lh_idx)) got_state, state = session_state.system.getControllerState(lh_idx) if not got_state: @@ -41,13 +50,11 @@ def pollButtonPress( # differentiate movement from clicking, we create a dead zone: if the event # fires while the stick isn't moved far from center, we assume it's a # click, not movement. - dead_zone_radius = 0.5 + dead_zone_radius = 0.7 - # This is the ID of event for the joystick being clicked. - joy_click = vr.k_EButton_Axis0 - joy_click_mask = (1 << joy_click) + button_mask = (1 << button_id) ret = EVENT_NONE - if (state.ulButtonPressed & joy_click_mask) != 0 and\ + if (state.ulButtonPressed & button_mask) != 0 and\ (state.rAxis[0].x**2 + state.rAxis[0].y**2 < dead_zone_radius**2): #print("button pressed: %016x" % state.ulButtonPressed) #for i in range(0, 5): @@ -65,7 +72,7 @@ if __name__ == "__main__": while True: time.sleep(0.1) - event = pollButtonPress(session_state) + event = pollButtonPress(session_state, hand_id = hands["left"], button_id = buttons["joystick"]) if event == EVENT_RISING_EDGE: print("rising edge") elif event == EVENT_FALLING_EDGE: diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 1237334..f452b2c 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -317,7 +317,8 @@ def sendAudio(audio_state, use_builtin: bool): # Pace this out time.sleep(0.01) -def readControllerInput(audio_state, enable_local_beep, use_builtin): +def readControllerInput(audio_state, enable_local_beep: bool, + use_builtin: bool, button: str): session = None first = True while session == None and audio_state.run_app == True: @@ -334,11 +335,15 @@ def readControllerInput(audio_state, enable_local_beep, use_builtin): osc_ctrl.indicateSpeech(audio_state.osc_state.client, False) osc_ctrl.indicatePaging(audio_state.osc_state.client, False) + hand_id = steamvr.hands[button.split()[0]] + button_id = steamvr.buttons[button.split()[1]] + last_rising = time.time() while audio_state.run_app == True: time.sleep(0.05) - event = steamvr.pollButtonPress(session) + event = steamvr.pollButtonPress(session, hand_id=hand_id, + button_id=button_id) if event == steamvr.EVENT_RISING_EDGE: last_rising = time.time() @@ -387,7 +392,8 @@ def readControllerInput(audio_state, enable_local_beep, use_builtin): # model should correspond to one of the Whisper models defined in # whisper/__init__.py. Examples: tiny, base, small, medium. def transcribeLoop(mic: str, language: str, model: str, - enable_local_beep: bool, use_cpu: bool, use_builtin: bool): + enable_local_beep: bool, use_cpu: bool, use_builtin: bool, + button: str): audio_state = getMicStream(mic) audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language] @@ -408,7 +414,7 @@ def transcribeLoop(mic: str, language: str, model: str, send_audio_thd.daemon = True send_audio_thd.start() - controller_input_thd = threading.Thread(target = readControllerInput, args = [audio_state, enable_local_beep, use_builtin]) + controller_input_thd = threading.Thread(target = readControllerInput, args = [audio_state, enable_local_beep, use_builtin, button]) controller_input_thd.daemon = True controller_input_thd.start() @@ -452,6 +458,7 @@ if __name__ == "__main__": parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm") parser.add_argument("--cpu", type=int, help="If set to 1, use CPU instead of GPU") parser.add_argument("--use_builtin", type=int, help="If set to 1, use the text box built into the game.") + parser.add_argument("--button", type=str, help="The controller button used to start/stop transcription. E.g. \"left joystick\"") args = parser.parse_args() if not args.mic: @@ -471,6 +478,10 @@ if __name__ == "__main__": print("--rows and --cols required", file=sys.stderr) sys.exit(1) + if not args.button: + print("--button required", file=sys.stderr) + sys.exit(1) + if args.window_duration_s: config.MAX_LENGTH_S = int(args.window_duration_s) @@ -490,5 +501,5 @@ if __name__ == "__main__": generate_utils.config.BOARD_COLS = int(args.cols) transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep, - args.cpu, args.use_builtin) + args.cpu, args.use_builtin, args.button) |
