summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--GUI/GUI/GUI/Config.cpp3
-rw-r--r--GUI/GUI/GUI/Config.h1
-rw-r--r--GUI/GUI/GUI/Frame.cpp32
-rw-r--r--GUI/GUI/GUI/Frame.h1
-rw-r--r--GUI/GUI/GUI/PythonWrapper.cpp11
-rw-r--r--Scripts/steamvr.py5
-rw-r--r--Scripts/transcribe_v2.py38
7 files changed, 72 insertions, 19 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
index 456f5d4..db4b184 100644
--- a/GUI/GUI/GUI/Config.cpp
+++ b/GUI/GUI/GUI/Config.cpp
@@ -82,6 +82,7 @@ AppConfig::AppConfig(wxTextCtrl* out)
enable_profanity_filter(false),
enable_debug_mode(false),
reset_on_toggle(true),
+ enable_previews(true),
gpu_idx(0),
keybind("ctrl+x"),
@@ -122,6 +123,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) {
cm.Set("enable_profanity_filter", enable_profanity_filter);
cm.Set("enable_debug_mode", enable_debug_mode);
cm.Set("reset_on_toggle", reset_on_toggle);
+ cm.Set("enable_previews", enable_previews);
cm.Set("gpu_idx", gpu_idx);
cm.Set("keybind", keybind);
@@ -175,6 +177,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) {
cm.Get("enable_profanity_filter", c.enable_profanity_filter);
cm.Get("enable_debug_mode", c.enable_debug_mode);
cm.Get("reset_on_toggle", c.reset_on_toggle);
+ cm.Get("enable_previews", c.enable_previews);
cm.Get("gpu_idx", c.gpu_idx);
cm.Get("keybind", c.keybind);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
index a6f83e2..4137c59 100644
--- a/GUI/GUI/GUI/Config.h
+++ b/GUI/GUI/GUI/Config.h
@@ -68,6 +68,7 @@ public:
bool enable_profanity_filter;
bool enable_debug_mode;
bool reset_on_toggle;
+ bool enable_previews;
int gpu_idx;
std::string keybind;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 9a781e8..2823437 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -70,6 +70,7 @@ namespace {
ID_PY_APP_ENABLE_PROFANITY_FILTER,
ID_PY_APP_ENABLE_DEBUG_MODE,
ID_PY_APP_RESET_ON_TOGGLE,
+ ID_PY_APP_ENABLE_PREVIEWS,
ID_PY_APP_ROWS,
ID_PY_APP_COLS,
ID_PY_APP_GPU_IDX,
@@ -942,6 +943,17 @@ Frame::Frame()
);
py_app_reset_on_toggle_ = py_app_reset_on_toggle;
+ auto* py_app_enable_previews = new wxCheckBox(py_config_panel,
+ ID_PY_APP_ENABLE_PREVIEWS, "Enable previews");
+ py_app_enable_previews->SetValue(app_c_->enable_previews);
+ py_app_enable_previews->SetToolTip(
+ "If checked, audio that has not yet stabilized will also "
+ "be transcribed and shown. Turn this off if you're on a "
+ "resource-constrained system or if transcription is "
+ "running slowly."
+ );
+ py_app_enable_previews_ = py_app_enable_previews;
+
// Hack: Add newlines before and after the button text to make
// the buttons bigger, and easier to click from inside VR.
auto* py_app_start_button = new wxButton(py_config_panel,
@@ -957,6 +969,8 @@ Frame::Frame()
/*flags=*/wxEXPAND);
sizer->Add(py_app_reset_on_toggle, /*proportion=*/0,
/*flags=*/wxEXPAND);
+ sizer->Add(py_app_enable_previews, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
sizer->Add(py_app_enable_browser_src, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(py_app_enable_local_beep, /*proportion=*/0,
@@ -1538,6 +1552,9 @@ void Frame::ApplyConfigToInputFields()
auto* py_app_reset_on_toggle = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_RESET_ON_TOGGLE));
py_app_reset_on_toggle->SetValue(app_c_->reset_on_toggle);
+ auto* py_app_enable_previews = static_cast<wxCheckBox*>(FindWindowById(ID_PY_APP_ENABLE_PREVIEWS));
+ py_app_enable_previews->SetValue(app_c_->enable_previews);
+
// Unity panel
auto* unity_assets_path = static_cast<wxDirPickerCtrl*>(FindWindowById(ID_UNITY_ASSETS_FILE_PICKER));
unity_assets_path->SetPath(app_c_->assets_path);
@@ -1574,7 +1591,6 @@ void Frame::OnExit(wxCloseEvent& event)
{
OnAppStop();
OnUnityAutoRefreshStop();
- // Allow default close processing to continue.
event.Skip();
}
@@ -2245,6 +2261,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
const bool enable_profanity_filter = py_app_enable_profanity_filter_->GetValue();
const bool enable_debug_mode = py_app_enable_debug_mode_->GetValue();
const bool reset_on_toggle = py_app_reset_on_toggle_->GetValue();
+ const bool enable_previews = py_app_enable_previews_->GetValue();
ASSIGN_OR_RETURN_VOID(int, rows, stoiInRange(transcribe_out_, py_app_rows_->GetValue().ToStdString(), "rows", 1, 10));
ASSIGN_OR_RETURN_VOID(int, cols, stoiInRange(transcribe_out_, py_app_cols_->GetValue().ToStdString(), "cols", 1, 120));
@@ -2277,6 +2294,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
app_c_->enable_profanity_filter = enable_profanity_filter;
app_c_->enable_debug_mode = enable_debug_mode;
app_c_->reset_on_toggle = reset_on_toggle;
+ app_c_->enable_previews = enable_previews;
app_c_->gpu_idx = gpu_idx;
app_c_->keybind = keybind;
app_c_->Serialize(AppConfig::kConfigPath);
@@ -2306,7 +2324,13 @@ void Frame::OnAppStart(wxCommandEvent& event) {
}
}
};
- auto in_cb = [&](std::string& in) {};
+ auto in_cb = [&](std::string& in) {
+ if (!run_py_app_) {
+ std::ostringstream oss;
+ oss << "exit" << std::endl;
+ in = oss.str();
+ }
+ };
auto run_cb = [&]() {
return run_py_app_;
};
@@ -2338,7 +2362,7 @@ void Frame::OnAppStop() {
}
else {
py_app_.wait();
- Log(transcribe_out_, "Stopped transcription engine\n");
+ Log(transcribe_out_, "Stopped transcription engine\n");
}
status = obs_app_.wait_for(std::chrono::seconds(0));
if (status == std::future_status::ready) {
@@ -2346,7 +2370,7 @@ void Frame::OnAppStop() {
}
else {
obs_app_.wait();
- Log(transcribe_out_, "Stopped browser source\n");
+ Log(transcribe_out_, "Stopped browser source\n");
}
transcript_.Clear();
}
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 2a538b4..7baf77f 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -72,6 +72,7 @@ private:
wxCheckBox* py_app_enable_profanity_filter_;
wxCheckBox* py_app_enable_debug_mode_;
wxCheckBox* py_app_reset_on_toggle_;
+ wxCheckBox* py_app_enable_previews_;
wxCheckBox* unity_clear_osc_;
wxCheckBox* unity_enable_phonemes_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index 7141037..29b7d75 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -320,7 +320,16 @@ bool PythonWrapper::InvokeCommandWithArgs(const std::string& cmd,
}
}
if (!run_cb()) {
- return false;
+ DWORD timeout_ms = 1000 * 10;
+ DWORD ret = WaitForSingleObject(pi.hProcess, timeout_ms);
+
+ if (ret == WAIT_TIMEOUT) {
+ std::ostringstream stderr_oss;
+ stderr_oss << "Timed out waiting for graceful exit, killing process";
+ out_cb("", stderr_oss.str());
+
+ TerminateProcess(pi.hProcess, 0);
+ }
}
std::ostringstream stdout_oss, stderr_oss;
diff --git a/Scripts/steamvr.py b/Scripts/steamvr.py
index da07134..3e6c6c9 100644
--- a/Scripts/steamvr.py
+++ b/Scripts/steamvr.py
@@ -19,6 +19,7 @@ class InputEvent:
def pollButtonPress(
hand: str = "right",
button: str = "b",
+ ctrl = None # ThreadControl object
) -> int:
hands = {}
hands["left"] = vr.TrackedControllerRole_LeftHand
@@ -31,7 +32,7 @@ def pollButtonPress(
system = None
first = True
- while not system:
+ while ctrl.run_app and not system:
try:
system = vr.init(vr.VRApplication_Background)
except Exception as e:
@@ -42,7 +43,7 @@ def pollButtonPress(
last_packet = 0
event_high = False
- while True:
+ while ctrl.run_app:
time.sleep(0.01)
lh_idx = system.getTrackedDeviceIndexForControllerRole(hands[hand])
diff --git a/Scripts/transcribe_v2.py b/Scripts/transcribe_v2.py
index 9b3a404..9e642e9 100644
--- a/Scripts/transcribe_v2.py
+++ b/Scripts/transcribe_v2.py
@@ -428,6 +428,7 @@ class Whisper:
audio,
language = langcodes.find(self.cfg["language"]).language,
vad_filter = True,
+ temperature=0.0,
without_timestamps = False)
res = []
for s in segments:
@@ -463,9 +464,11 @@ def saveAudio(audio: bytes, path: str):
class VadCommitter:
def __init__(self,
+ cfg: typing.Dict,
collector: AudioCollector,
whisper: Whisper,
segmenter: AudioSegmenter):
+ self.cfg = cfg
self.collector = collector
self.whisper = whisper
self.segmenter = segmenter
@@ -486,7 +489,7 @@ class VadCommitter:
for s in segments:
print(f"commit segment: {s}")
delta = ''.join(s.transcript for s in segments)
- #print(f"delta get: {delta}")
+ print(f"delta get: {delta}")
audio = self.collector.getAudio()
#ts = datetime.fromtimestamp(self.collector.now() - latency_s)
@@ -494,12 +497,13 @@ class VadCommitter:
#saveAudio(commit_audio, filename)
preview = ""
- if has_audio:
- segments = self.whisper.transcribe(audio)
- preview = "".join(s.transcript for s in segments)
- else:
- #print("VAD detects no audio, skip transcription")
- self.collector.keepLast(1.0)
+ if self.cfg["enable_previews"]:
+ if has_audio:
+ segments = self.whisper.transcribe(audio)
+ preview = "".join(s.transcript for s in segments)
+ else:
+ #print("VAD detects no audio, skip transcription")
+ self.collector.keepLast(1.0)
return TranscriptCommit(
delta,
@@ -556,7 +560,7 @@ def evaluate(cfg,
collector = CompressingAudioCollector(collector)
whisper = Whisper(collector, cfg)
segmenter = AudioSegmenter(min_silence_ms=250)
- committer = VadCommitter(collector, whisper, segmenter)
+ committer = VadCommitter(cfg, collector, whisper, segmenter)
transcript = ""
commits = []
last_commit_ts = None
@@ -685,10 +689,14 @@ def vrInputThread(ctrl: ThreadControl):
last_rising = time.time()
last_medium_press_end = 0
- button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id)
+ button_generator = steamvr.pollButtonPress(hand=hand_id, button=button_id,
+ ctrl=ctrl)
while ctrl.run_app:
time.sleep(0.01)
- event = next(button_generator)
+ try:
+ event = next(button_generator)
+ except StopIteration:
+ break
if event.opcode == steamvr.EVENT_RISING_EDGE:
last_rising = time.time()
@@ -790,7 +798,7 @@ def kbInputThread(ctrl: ThreadControl):
PAUSE_STATE = 1
state = PAUSE_STATE
- while ctrl.run_app == True:
+ while ctrl.run_app:
time.sleep(0.01)
cur_press_time = machine.getNextPressTime()
@@ -879,7 +887,7 @@ def run(cfg):
collector = CompressingAudioCollector(collector)
whisper = Whisper(collector, cfg)
segmenter = AudioSegmenter(min_silence_ms=250)
- committer = VadCommitter(collector, whisper, segmenter)
+ committer = VadCommitter(cfg, collector, whisper, segmenter)
pager = OscPager(cfg)
ctrl = ThreadControl(cfg)
@@ -909,13 +917,19 @@ def run(cfg):
for line in sys.stdin:
if "exit" in line or "quit" in line:
+ print("Exit requested", file=sys.stderr)
break
ctrl.run_app = False
+ print("Join transcription thread")
transcribe_audio_thd.join()
+ print("Join vr input thread")
vr_input_thd.join()
+ print("Join kb input thread")
kb_input_thd.join()
+ print("Join osc thread")
osc_thd.join()
+ print("Done")
if __name__ == "__main__":
parser = argparse.ArgumentParser()