diff options
| author | yum <yum.food.vr@gmail.com> | 2023-09-09 17:00:36 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-09-09 17:00:36 -0700 |
| commit | 9a18326aecd53c5619109dd2c5da1f26689f2f7b (patch) | |
| tree | 73cf1235a2b216f272931c54dd072397d4f89fd9 | |
| parent | 7d809c1779a46a11609e64e6c52b1851e86bee36 (diff) | |
Bugfix: fix process leak in PythonWrapper::InvokeCommandWithArgs
It now waits up to 10 seconds for a graceful exit and falls back on
the equivalent of a SIGKILL. The caller is assumed to have signaled to the
process through `in_cb` that an exit is desired.
Also:
* Fix graceful exit path of transcribe_v2.py.
* Add toggle to enable/disable preview text. It is enabled by default.
* Constrain transcription temperature to 0.0. This keeps latency more
predictable at the cost of some accuracy.
| -rw-r--r-- | GUI/GUI/GUI/Config.cpp | 3 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Config.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 32 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/PythonWrapper.cpp | 11 | ||||
| -rw-r--r-- | Scripts/steamvr.py | 5 | ||||
| -rw-r--r-- | Scripts/transcribe_v2.py | 38 |
7 files changed, 72 insertions, 19 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index 456f5d4..db4b184 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -82,6 +82,7 @@ AppConfig::AppConfig(wxTextCtrl* out) enable_profanity_filter(false),
enable_debug_mode(false),
reset_on_toggle(true),
+ enable_previews(true),
gpu_idx(0),
keybind("ctrl+x"),
@@ -122,6 +123,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { cm.Set("enable_profanity_filter", enable_profanity_filter);
cm.Set("enable_debug_mode", enable_debug_mode);
cm.Set("reset_on_toggle", reset_on_toggle);
+ cm.Set("enable_previews", enable_previews);
cm.Set("gpu_idx", gpu_idx);
cm.Set("keybind", keybind);
@@ -175,6 +177,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { cm.Get("enable_profanity_filter", c.enable_profanity_filter);
cm.Get("enable_debug_mode", c.enable_debug_mode);
cm.Get("reset_on_toggle", c.reset_on_toggle);
+ cm.Get("enable_previews", c.enable_previews);
cm.Get("gpu_idx", c.gpu_idx);
cm.Get("keybind", c.keybind);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index a6f83e2..4137c59 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -68,6 +68,7 @@ public: bool enable_profanity_filter;
bool enable_debug_mode;
bool reset_on_toggle;
+ bool enable_previews;
int gpu_idx;
std::string keybind;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 9a781e8..2823437 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -70,6 +70,7 @@ namespace { ID_PY_APP_ENABLE_PROFANITY_FILTER,
ID_PY_APP_ENABLE_DEBUG_MODE,
ID_PY_APP_RESET_ON_TOGGLE,
+ ID_PY_APP_ENABLE_PREVIEWS,
ID_PY_APP_ROWS,
ID_PY_APP_COLS,
ID_PY_APP_GPU_IDX,
@@ -942,6 +943,17 @@ Frame::Frame() );
py_app_reset_on_toggle_ = py_app_reset_on_toggle;
+ auto* py_app_enable_previews = new wxCheckBox(py_config_panel,
+ ID_PY_APP_ENABLE_PREVIEWS, "Enable previews");
+ py_app_enable_previews->SetValue(app_c_->enable_previews);
+ py_app_enable_previews->SetToolTip(
+ "If checked, audio that has not yet stabilized will also "
+ "be transcribed and shown. Turn this off if you're on a "
+ "resource-constrained system or if transcription is "
+ "running slowly."
+ );
+ py_app_enable_previews_ = py_app_enable_previews;
+
// Hack: Add newlines before and after the button text to make
// the buttons bigger, and easier to click from inside VR.
auto* py_app_start_button = new wxButton(py_config_panel,
@@ -957,6 +969,8 @@ Frame::Frame() /*flags=*/wxEXPAND);
sizer->Add(py_app_reset_on_toggle, /*proportion=*/0,
/*flags=*/wxEXPAND);
+ sizer->Add(py_app_enable_previews, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
sizer->Add(py_app_enable_browser_src, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(py_app_enable_local_beep, /*proportion=*/0,
@@ -1538,6 +1552,9 @@ void Frame::ApplyConfigToInputFields() auto* py_app_reset_on_toggle = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_RESET_ON_TOGGLE));
py_app_reset_on_toggle->SetValue(app_c_->reset_on_toggle);
+ auto* py_app_enable_previews = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_PREVIEWS));
+ py_app_enable_previews->SetValue(app_c_->enable_previews);
+
// Unity panel
auto* unity_assets_path = static_cast<wxDirPickerCtrl*>(FindWindowById(ID_UNITY_ASSETS_FILE_PICKER));
unity_assets_path->SetPath(app_c_->assets_path);
@@ -1574,7 +1591,6 @@ void Frame::OnExit(wxCloseEvent& event) {
OnAppStop();
OnUnityAutoRefreshStop();
- // Allow default close processing to continue.
event.Skip();
}
@@ -2245,6 +2261,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { const bool enable_profanity_filter = py_app_enable_profanity_filter_->GetValue();
const bool enable_debug_mode = py_app_enable_debug_mode_->GetValue();
const bool reset_on_toggle = py_app_reset_on_toggle_->GetValue();
+ const bool enable_previews = py_app_enable_previews_->GetValue();
ASSIGN_OR_RETURN_VOID(int, rows, stoiInRange(transcribe_out_, py_app_rows_->GetValue().ToStdString(), "rows", 1, 10));
ASSIGN_OR_RETURN_VOID(int, cols, stoiInRange(transcribe_out_, py_app_cols_->GetValue().ToStdString(), "cols", 1, 120));
@@ -2277,6 +2294,7 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->enable_profanity_filter = enable_profanity_filter;
app_c_->enable_debug_mode = enable_debug_mode;
app_c_->reset_on_toggle = reset_on_toggle;
+ app_c_->enable_previews = enable_previews;
app_c_->gpu_idx = gpu_idx;
app_c_->keybind = keybind;
app_c_->Serialize(AppConfig::kConfigPath);
@@ -2306,7 +2324,13 @@ void Frame::OnAppStart(wxCommandEvent& event) { }
}
};
- auto in_cb = [&](std::string& in) {};
+ auto in_cb = [&](std::string& in) {
+ if (!run_py_app_) {
+ std::ostringstream oss;
+ oss << "exit" << std::endl;
+ in = oss.str();
+ }
+ };
auto run_cb = [&]() {
return run_py_app_;
};
@@ -2338,7 +2362,7 @@ void Frame::OnAppStop() { }
else {
py_app_.wait();
- Log(transcribe_out_, "Stopped transcription engine\n");
+ Log(transcribe_out_, "Stopped transcription engine\n");
}
status = obs_app_.wait_for(std::chrono::seconds(0));
if (status == std::future_status::ready) {
@@ -2346,7 +2370,7 @@ void Frame::OnAppStop() { }
else {
obs_app_.wait();
- Log(transcribe_out_, "Stopped browser source\n");
+ Log(transcribe_out_, "Stopped browser source\n");
}
transcript_.Clear();
}
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 2a538b4..7baf77f 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -72,6 +72,7 @@ private: wxCheckBox* py_app_enable_profanity_filter_;
wxCheckBox* py_app_enable_debug_mode_;
wxCheckBox* py_app_reset_on_toggle_;
+ wxCheckBox* py_app_enable_previews_;
wxCheckBox* unity_clear_osc_;
wxCheckBox* unity_enable_phonemes_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 7141037..29b7d75 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -320,7 +320,16 @@ bool PythonWrapper::InvokeCommandWithArgs(const std::string& cmd, } } if (!run_cb()) { - return false; + DWORD timeout_ms = 1000 * 10; + DWORD ret = WaitForSingleObject(pi.hProcess, timeout_ms); + + if (ret == WAIT_TIMEOUT) { + std::ostringstream stderr_oss; + stderr_oss << "Timed out waiting for graceful exit, killing process"; + out_cb("", stderr_oss.str()); + + TerminateProcess(pi.hProcess, 0); + } } std::ostringstream stdout_oss, stderr_oss; diff --git a/Scripts/steamvr.py b/Scripts/steamvr.py index da07134..3e6c6c9 100644 --- a/Scripts/steamvr.py +++ b/Scripts/steamvr.py @@ -19,6 +19,7 @@ class InputEvent: def pollButtonPress( hand: str = "right", button: str = "b", + ctrl = None # ThreadControl object ) -> int: hands = {} hands["left"] = vr.TrackedControllerRole_LeftHand @@ -31,7 +32,7 @@ def pollButtonPress( system = None first = True - while not system: + while ctrl.run_app and not system: try: system = vr.init(vr.VRApplication_Background) except Exception as e: @@ -42,7 +43,7 @@ def pollButtonPress( last_packet = 0 event_high = False - while True: + while ctrl.run_app: time.sleep(0.01) lh_idx = system.getTrackedDeviceIndexForControllerRole(hands[hand]) diff --git a/Scripts/transcribe_v2.py b/Scripts/transcribe_v2.py index 9b3a404..9e642e9 100644 --- a/Scripts/transcribe_v2.py +++ b/Scripts/transcribe_v2.py @@ -428,6 +428,7 @@ class Whisper: audio, language = langcodes.find(self.cfg["language"]).language, vad_filter = True, + temperature=0.0, without_timestamps = False) res = [] for s in segments: @@ -463,9 +464,11 @@ def saveAudio(audio: bytes, path: str): class VadCommitter: def __init__(self, + cfg: typing.Dict, collector: AudioCollector, whisper: Whisper, segmenter: AudioSegmenter): + self.cfg = cfg self.collector = collector self.whisper = whisper self.segmenter = segmenter @@ -486,7 +489,7 @@ class VadCommitter: for s in segments: print(f"commit segment: {s}") delta = ''.join(s.transcript for s in segments) - #print(f"delta get: {delta}") + print(f"delta get: {delta}") audio = self.collector.getAudio() #ts = datetime.fromtimestamp(self.collector.now() - latency_s) @@ -494,12 +497,13 @@ class VadCommitter: #saveAudio(commit_audio, filename) preview = "" - if has_audio: - segments = self.whisper.transcribe(audio) - preview = "".join(s.transcript for s in segments) - else: - #print("VAD detects no audio, skip transcription") - self.collector.keepLast(1.0) + if self.cfg["enable_previews"]: + if has_audio: + segments = self.whisper.transcribe(audio) + preview = "".join(s.transcript for s in segments) + else: + #print("VAD detects no audio, skip transcription") + self.collector.keepLast(1.0) return TranscriptCommit( delta, @@ -556,7 +560,7 @@ def evaluate(cfg, collector = CompressingAudioCollector(collector) whisper = Whisper(collector, cfg) segmenter = AudioSegmenter(min_silence_ms=250) - committer = VadCommitter(collector, whisper, segmenter) + committer = VadCommitter(cfg, collector, whisper, segmenter) transcript = "" commits = [] last_commit_ts = None @@ -685,10 +689,14 @@ def vrInputThread(ctrl: ThreadControl): last_rising = time.time() last_medium_press_end = 0 - button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id) + button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id, + ctrl=ctrl) while ctrl.run_app: time.sleep(0.01) - event = next(button_generator) + try: + event = next(button_generator) + except StopIteration: + break if event.opcode == steamvr.EVENT_RISING_EDGE: last_rising = time.time() @@ -790,7 +798,7 @@ def kbInputThread(ctrl: ThreadControl): PAUSE_STATE = 1 state = PAUSE_STATE - while ctrl.run_app == True: + while ctrl.run_app: time.sleep(0.01) cur_press_time = machine.getNextPressTime() @@ -879,7 +887,7 @@ def run(cfg): collector = CompressingAudioCollector(collector) whisper = Whisper(collector, cfg) segmenter = AudioSegmenter(min_silence_ms=250) - committer = VadCommitter(collector, whisper, segmenter) + committer = VadCommitter(cfg, collector, whisper, segmenter) pager = OscPager(cfg) ctrl = ThreadControl(cfg) @@ -909,13 +917,19 @@ def run(cfg): for line in sys.stdin: if "exit" in line or "quit" in line: + print("Exit requested", file=sys.stderr) break ctrl.run_app = False + print("Join transcription thread") transcribe_audio_thd.join() + print("Join vr input thread") vr_input_thd.join() + print("Join kb input thread") kb_input_thd.join() + print("Join osc thread") osc_thd.join() + print("Done") if __name__ == "__main__": parser = argparse.ArgumentParser() |
