From f7d7858a9ff270380f5407e48d6afaf6a3a97de3 Mon Sep 17 00:00:00 2001 From: yum Date: Sun, 26 Feb 2023 14:21:18 -0800 Subject: Begin work on C++ custom chatbox Sort of a misnomer. The idea is to use C++ for transcription and Python for steamvr and OSC. Having issues getting output from multithreaded Python code. Not in the mood to figure this out today. * Hide unimplemented parts of C++ panel. --- GUI/GUI/GUI/Frame.cpp | 25 ++++++++++- GUI/GUI/GUI/PythonWrapper.cpp | 56 ++++++++++++++++++++--- GUI/GUI/GUI/PythonWrapper.h | 8 +++- GUI/GUI/GUI/WhisperCPP.cpp | 100 ++++++++++++++++++++++++++++++++++++++++++ GUI/GUI/GUI/WhisperCPP.h | 6 +++ 5 files changed, 185 insertions(+), 10 deletions(-) (limited to 'GUI') diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index de99bdc..34bc6d3 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -965,6 +965,8 @@ Frame::Frame() sizer->Add(whisper_model, /*proportion=*/0, /*flags=*/wxEXPAND); +#if 0 + // Not implemented. sizer->Add(new wxStaticText(whisper_config_panel_pairs, wxID_ANY, /*label=*/"Characters per sync:")); sizer->Add(whisper_chars_per_sync, /*proportion=*/0, @@ -989,6 +991,13 @@ Frame::Frame() wxID_ANY, /*label=*/"Text box columns:")); sizer->Add(whisper_cols, /*proportion=*/0, /*flags=*/wxEXPAND); +#else + whisper_chars_per_sync->Hide(); + whisper_bytes_per_char->Hide(); + whisper_button->Hide(); + whisper_rows->Hide(); + whisper_cols->Hide(); +#endif sizer->Add(new wxStaticText(whisper_config_panel_pairs, wxID_ANY, /*label=*/"Browser source port:")); @@ -1052,14 +1061,22 @@ Frame::Frame() whisper_config_panel->SetSizer(sizer); sizer->Add(whisper_config_panel_pairs, /*proportion=*/0, /*flags=*/wxEXPAND); +#if 0 sizer->Add(whisper_enable_local_beep, /*proportion=*/0, /*flags=*/wxEXPAND); + // Not yet implemented. sizer->Add(whisper_use_cpu, /*proportion=*/0, /*flags=*/wxEXPAND); sizer->Add(whisper_enable_builtin, /*proportion=*/0, /*flags=*/wxEXPAND); sizer->Add(whisper_enable_custom, /*proportion=*/0, /*flags=*/wxEXPAND); +#else + whisper_enable_local_beep->Hide(); + whisper_use_cpu->Hide(); + whisper_enable_builtin->Hide(); + whisper_enable_custom->Hide(); +#endif sizer->Add(whisper_enable_browser_src, /*proportion=*/0, /*flags=*/wxEXPAND); sizer->Add(whisper_start_button, /*proportion=*/0, @@ -1182,7 +1199,6 @@ Frame::Frame() // Now that transcribe_out_ has been created, we can deserialize. app_c_ = std::make_unique(transcribe_out_); - Log(transcribe_out_, "Deserializing config\n"); app_c_->Deserialize(AppConfig::kConfigPath); Bind(wxEVT_CLOSE_WINDOW, &Frame::OnExit, this, wxID_EXIT); @@ -2032,9 +2048,11 @@ void Frame::OnWhisperStart(wxCommandEvent& event) { whisper_->Start(*app_c_); if (whisper_enable_browser_src_->GetValue()) { - Log(whisper_out_, "Frame launching browser src\n"); whisper_->StartBrowserSource(*app_c_); } + if (whisper_enable_custom_->GetValue()) { + whisper_->StartCustomChatbox(*app_c_); + } } void Frame::OnWhisperStop() { @@ -2042,6 +2060,9 @@ void Frame::OnWhisperStop() { if (whisper_enable_browser_src_->GetValue()) { whisper_->StopBrowserSource(); } + if (whisper_enable_custom_->GetValue()) { + whisper_->StopCustomChatbox(); + } } void Frame::OnWhisperStop(wxCommandEvent& event) { diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 51907b5..6c08fbe 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -94,7 +94,9 @@ std::string DrainWin32Pipe(const HANDLE pipe) { bool PythonWrapper::InvokeCommandWithArgs(const std::string& cmd, std::vector&& args, - const std::function&& out_cb) { + const std::function&& out_cb, + const std::function&& in_cb, + const std::function&& run_cb) { std::ostringstream cmd_oss; cmd_oss << cmd; for (const auto& arg : args) { @@ -146,10 +148,34 @@ bool PythonWrapper::InvokeCommandWithArgs(const std::string& cmd, }); SetHandleInformation(stderr_read, HANDLE_FLAG_INHERIT, 0); + HANDLE stdin_read{}; + HANDLE stdin_write{}; + SECURITY_ATTRIBUTES stdin_sec_attr{}; + stdin_sec_attr.nLength = sizeof(stdin_sec_attr); + stdin_sec_attr.bInheritHandle = TRUE; + + if (!CreatePipe(&stdin_read, &stdin_write, &stdin_sec_attr, 0)) { + std::ostringstream err_oss; + err_oss << "Error while executing python command \"" << cmd_oss.str() + << "\": Failed to create stdin pipe: " << GetWin32ErrMsg() << std::endl; + out_cb("", err_oss.str()); + return false; + } + ScopeGuard stdin_cleanup([&]() { + if (stdin_read) { + CloseHandle(stdin_read); + } + if (stdin_write) { + CloseHandle(stdin_write); + } + }); + SetHandleInformation(stdin_write, HANDLE_FLAG_INHERIT, 0); + STARTUPINFOA si{}; si.cb = sizeof(si); si.hStdOutput = stdout_write; si.hStdError = stderr_write; + si.hStdInput = stdin_read; si.dwFlags |= STARTF_USESTDHANDLES; si.dwFlags |= STARTF_USESHOWWINDOW; si.wShowWindow = SW_HIDE; @@ -195,10 +221,10 @@ bool PythonWrapper::InvokeCommandWithArgs(const std::string& cmd, CloseHandle(pi.hThread); }); - // While the process is running, drain output every 10 ms. + // While the process is running, drain output and send input every 10 ms. DWORD timeout_ms = 10; DWORD ret = WAIT_TIMEOUT; - while (ret == WAIT_TIMEOUT) { + while (run_cb() && ret == WAIT_TIMEOUT) { DWORD ret = WaitForSingleObject(pi.hProcess, timeout_ms); if (ret != WAIT_TIMEOUT) { break; @@ -207,9 +233,25 @@ bool PythonWrapper::InvokeCommandWithArgs(const std::string& cmd, stdout_oss << DrainWin32Pipe(stdout_read); stderr_oss << DrainWin32Pipe(stderr_read); out_cb(stdout_oss.str(), stderr_oss.str()); + + std::string input; + in_cb(input); + if (input.size()) { + DWORD cur_bytes_write = 0; + DWORD sum_bytes_write = 0; + std::vector buf(4096, 0); + while (sum_bytes_write < input.size() && + WriteFile(stdin_write, input.data() + sum_bytes_write, + input.size() - sum_bytes_write, &cur_bytes_write, NULL)) { + sum_bytes_write += cur_bytes_write; + } + } + } + if (!run_cb()) { + return true; } - std::ostringstream stdout_oss, stderr_oss; + std::ostringstream stdout_oss, stderr_oss; DWORD exit_code; if (!GetExitCodeProcess(pi.hProcess, &exit_code)) { stderr_oss << "Failed to get exit code: " << GetWin32ErrMsg(); @@ -280,9 +322,11 @@ bool PythonWrapper::InvokeWithArgs(std::vector&& args, } bool PythonWrapper::InvokeWithArgs(std::vector&& args, - const std::function&& out_cb) { + const std::function&& out_cb, + const std::function&& in_cb, + const std::function&& run_cb) { return InvokeCommandWithArgs("Resources/Python/python.exe", - std::move(args), std::move(out_cb)); + std::move(args), std::move(out_cb), std::move(in_cb), std::move(run_cb)); } diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h index 12f56cc..f0591c1 100644 --- a/GUI/GUI/GUI/PythonWrapper.h +++ b/GUI/GUI/GUI/PythonWrapper.h @@ -36,7 +36,9 @@ namespace PythonWrapper // On error, sets `out` to an error message and returns false. bool InvokeCommandWithArgs(const std::string& cmd, std::vector&& args, - const std::function&& out_cb); + const std::function&& out_cb, + const std::function&& in_cb = [](std::string&) {}, + const std::function&& run_cb = []() { return true; }); // Invoke the interpreter with arguments. // On error, sets `out` to an error message and returns false. @@ -47,7 +49,9 @@ namespace PythonWrapper const std::string&& err_msg, wxTextCtrl* out); bool InvokeWithArgs(std::vector&& args, - const std::function&& out_cb); + const std::function&& out_cb, + const std::function&& in_cb = [](std::string&) {}, + const std::function&& run_cb = []() { return true; }); // Execute python --version. std::string GetVersion(); diff --git a/GUI/GUI/GUI/WhisperCPP.cpp b/GUI/GUI/GUI/WhisperCPP.cpp index a82dc59..b969494 100644 --- a/GUI/GUI/GUI/WhisperCPP.cpp +++ b/GUI/GUI/GUI/WhisperCPP.cpp @@ -79,6 +79,11 @@ WhisperCPP::WhisperCPP(wxTextCtrl* out) browser_src_thd_ = p.get_future(); p.set_value(); } + { + auto p = std::promise(); + custom_chatbox_thd_ = p.get_future(); + p.set_value(); + } } WhisperCPP::~WhisperCPP() { @@ -418,6 +423,101 @@ void WhisperCPP::StopBrowserSource() { Log(out_, "Done!\n"); } +// TODO(yum) we should have a thread which simply tells us when to +// start/stop transcription. +void WhisperCPP::StartCustomChatbox(const AppConfig& c) { + if (!custom_chatbox_thd_.valid()) { + Log(out_, "Custom chatbox already running\n"); + return; + } + + custom_chatbox_thd_ = std::async(std::launch::async, [&]() -> void { + run_custom_chatbox_ = true; + Log(out_, "Launching custom chatbox OSC layer\n"); + + while (run_custom_chatbox_) { + bool send_transcript = false; + auto out_cb = [&](const std::string& out, const std::string& err) { + std::string delim = "\r\n"; + size_t begin = 0; + size_t end = out.size(); + while (begin < out.size()) { + end = out.find(delim, begin); + if (end == std::string::npos) { + end = out.size(); + } + ScopeGuard advance_begin([&]() { begin = end + delim.size(); }); + std::string line = out.substr(begin, end - begin); + if (line == "1") { + Log(out_, "Control message get: send transcript\n"); + transcript_.Clear(); + send_transcript = true; + } + else if (line == "0") { + // TODO pause transcription loop? + Log(out_, "Control message get: stop transcript\n"); + send_transcript = false; + } + else { + Log(out_, " custom chatbox: Unrecognized control sequence: {}\n", line); + } + } + + begin = 0; + end = err.size(); + while (begin < err.size()) { + end = err.find(delim, begin); + if (end == std::string::npos) { + end = err.size(); + } + ScopeGuard advance_begin([&]() { begin = end + delim.size(); }); + std::string line = err.substr(begin, end - begin); + Log(out_, " {}\n", line); + } + }; + auto in_cb = [&](std::string& in) { + if (!send_transcript) { + return; + } + // TODO(yum) use a streaming interface for this. As written, we + // have to copy a ton of redundant text every time. + const std::vector segments = transcript_.Get(); + std::ostringstream oss; + for (const auto& segment : segments) { + oss << segment; + } + oss << std::endl; + in = oss.str(); + }; + auto run_cb = [&]() { + return run_custom_chatbox_; + }; + if (!PythonWrapper::InvokeWithArgs({ + "Resources/Scripts/cpp_transcribe.py", + "--bytes_per_char", std::to_string(c.bytes_per_char), + "--chars_per_sync", std::to_string(c.chars_per_sync), + "--rows", std::to_string(c.rows), + "--cols", std::to_string(c.cols), + "--button", Quote(c.button), + "--enable_local_beep", c.enable_local_beep ? "1" : "0", + "--use_builtin", "0", + }, out_cb, in_cb, run_cb)) { + Log(out_, "Failed to launch custom chatbox OSC layer!\n"); + break; + } + } + + Log(out_, "Custom chatbox thread exit\n"); + }); +} + +void WhisperCPP::StopCustomChatbox() { + Log(out_, "Stopping custom chatbox...\n"); + run_custom_chatbox_ = false; + custom_chatbox_thd_.wait(); + Log(out_, "Done!\n"); +} + bool WhisperCPP::GetMicsImpl(std::vector& mics) { pfnFoundCaptureDevices dev_cb = [](int len, const sCaptureDevice* buf, void* pv)->HRESULT __stdcall { std::vector* mics = static_cast*>(pv); diff --git a/GUI/GUI/GUI/WhisperCPP.h b/GUI/GUI/GUI/WhisperCPP.h index fbaab46..1390e97 100644 --- a/GUI/GUI/GUI/WhisperCPP.h +++ b/GUI/GUI/GUI/WhisperCPP.h @@ -41,6 +41,9 @@ public: void StartBrowserSource(const AppConfig& c); void StopBrowserSource(); + void StartCustomChatbox(const AppConfig& c); + void StopCustomChatbox(); + private: bool GetMicsImpl(std::vector& mics); @@ -54,5 +57,8 @@ private: std::future browser_src_thd_; volatile bool run_browser_src_; + std::future custom_chatbox_thd_; + volatile bool run_custom_chatbox_; + Transcript transcript_; }; -- cgit v1.2.3