From 64c158c549f6f5136846a0f546e8a204843e1ef8 Mon Sep 17 00:00:00 2001 From: yum Date: Tue, 28 Feb 2023 20:25:10 -0800 Subject: Implement thread affinity optimization for Python transcription engine A user pointed out that constraining the Python implmentation to a single core does not affect visible latency. This seems true on my PC as well. * Reimplement Python transcription wxProcess as a std::async. App shutdown is much faster now. --- GUI/GUI/GUI/Frame.cpp | 88 +++++++++++------------------- GUI/GUI/GUI/Frame.h | 5 +- GUI/GUI/GUI/PythonWrapper.cpp | 122 +++++++++++++++++++++++++++++++++--------- GUI/GUI/GUI/PythonWrapper.h | 9 ++-- 4 files changed, 139 insertions(+), 85 deletions(-) (limited to 'GUI') diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 34bc6d3..e4b1e13 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -326,12 +326,20 @@ using ::Logging::Log; Frame::Frame() : wxFrame(nullptr, wxID_ANY, "TaSTT"), - py_app_(nullptr), + run_py_app_(false), env_proc_(nullptr), py_app_drain_(this, ID_PY_APP_DRAIN) { app_c_ = std::make_unique(nullptr); + // Initialize futures so that valid() returns true. We use this as a proxy + // to tell whether they're still executing. + { + auto p = std::promise(); + py_app_ = p.get_future(); + p.set_value(true); + } + auto* main_panel = new wxPanel(this, ID_MAIN_PANEL); main_panel_ = main_panel; { @@ -1797,14 +1805,11 @@ void Frame::OnUnityParamChange(wxCommandEvent& event) { } void Frame::OnAppStart(wxCommandEvent& event) { - if (py_app_) { - if (wxProcess::Exists(py_app_->GetPid())) { - Log(transcribe_out_, "Transcription engine already running\n"); - return; - } - delete py_app_; - py_app_ = nullptr; - } + auto status = py_app_.wait_for(std::chrono::seconds(0)); + if (status != std::future_status::ready) { + Log(transcribe_out_, "Transcription engine already running\n"); + return; + } Log(transcribe_out_, "Launching transcription engine\n"); @@ -1895,58 +1900,28 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->use_builtin = use_builtin; app_c_->Serialize(AppConfig::kConfigPath); - auto cb = [&](wxProcess* proc, int ret) -> void { - Log(transcribe_out_, "Transcription engine exited with code {}\n", ret); - DrainAsyncOutput(proc, transcribe_out_); - return; + auto out_cb = [&](const std::string& out, const std::string& err) { + Log(transcribe_out_, out); + Log(transcribe_out_, err); }; - wxProcess* p = PythonWrapper::StartApp(std::move(cb), *app_c_); - if (!p) { - Log(transcribe_out_, "Failed to launch transcription engine\n"); - return; - } - - py_app_ = p; + auto in_cb = [&](std::string& in) {}; + auto run_cb = [&]() { + return run_py_app_; + }; + run_py_app_ = true; + py_app_ = std::move(PythonWrapper::StartApp(*app_c_, std::move(out_cb), std::move(in_cb), std::move(run_cb))); + Log(transcribe_out_, "py app valid: {}\n", py_app_.valid()); } void Frame::OnAppStop() { - if (py_app_) { - const long pid = py_app_->GetPid(); - - Log(transcribe_out_, "Stopping transcription engine...\n"); - - // Closing stdout causes the app to exit. It takes it quite a while - // to exit gracefully; be patient. - py_app_->CloseOutput(); - - int timeout_s = 10; - for (int i = 0; i < 100 * timeout_s; i++) { - if (!wxProcess::Exists(pid)) { - break; - } - wxMilliSleep(10); - } - - DrainAsyncOutput(py_app_, transcribe_out_); - - // Now shut it down. - bool first = true; - int loop_cnt = 0; - while (wxProcess::Exists(pid)) { - wxProcess::Kill(pid, wxSIGKILL); - if (++loop_cnt % 100 == 0) { - Log(transcribe_out_, "Waiting for transcription engine to exit\n"); - } - wxMilliSleep(10); - } - - // Since we don't process the termination event, py_app_ deletes itself! - py_app_ = nullptr; - Log(transcribe_out_, "Stopped transcription engine\n"); - } - else { - Log(transcribe_out_, "Transcription engine already stopped\n"); + auto status = py_app_.wait_for(std::chrono::seconds(0)); + if (status == std::future_status::ready) { + Log(transcribe_out_, "Transcription engine already stopped\n"); + return; } + run_py_app_ = false; + py_app_.wait(); + Log(transcribe_out_, "Stopped transcription engine\n"); } void Frame::OnAppStop(wxCommandEvent& event) { @@ -2070,7 +2045,6 @@ void Frame::OnWhisperStop(wxCommandEvent& event) { } void Frame::OnAppDrain(wxTimerEvent& event) { - DrainAsyncOutput(py_app_, transcribe_out_); DrainAsyncOutput(env_proc_, transcribe_out_); Logging::kThreadLogger.Drain(); } diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index 4aa6a72..39988ef 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -10,6 +10,7 @@ #include "Config.h" #include "WhisperCPP.h" +#include #include class Frame : public wxFrame @@ -76,7 +77,9 @@ private: wxCheckBox* whisper_enable_custom_; wxCheckBox* whisper_enable_browser_src_; - wxProcess* py_app_; + std::future py_app_; + bool run_py_app_; + wxProcess* env_proc_; wxTimer py_app_drain_; diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp index 55c7626..d5636ce 100644 --- a/GUI/GUI/GUI/PythonWrapper.cpp +++ b/GUI/GUI/GUI/PythonWrapper.cpp @@ -1,8 +1,14 @@ +// Import rand_s() WIN32 API. +#define _CRT_RAND_S +// Silence security warnings caused by importing stdlib.h before wx. +#define _CRT_SECURE_NO_WARNINGS + +#include + #include "Logging.h" #include "PythonWrapper.h" #include "ScopeGuard.h" #include "Util.h" - #include "Config.h" #include @@ -92,11 +98,66 @@ std::string DrainWin32Pipe(const HANDLE pipe) { return oss.str(); } +bool SetAffinityMask( + HANDLE hProcess, + const std::function out_cb) +{ + // Set process affinity mask. This is an simple optimization pointed out by + // a user. Constraining any of the processes used by the STT to a reduced + // number of processors does not affect user-visible performance. + { + // Query processor information. + SYSTEM_INFO sysinfo{}; + GetSystemInfo(&sysinfo); + //sysinfo.dwNumberOfProcessors + + // Pick a random processor. + unsigned int rand_num; + auto err = rand_s(&rand_num); + if (err) { + std::ostringstream err_oss; + err_oss << "Failed to get random number: " << err << std::endl; + out_cb("", err_oss.str()); + return false; + } + // Constrain the processor ID to [1, num_processors). + // We don't want to run on processor 0 since it receives system interrupts + int processor_id = rand_num; + switch (sysinfo.dwNumberOfProcessors) { + // case 0 can never happen. + case 1: + processor_id = 0; + case 2: + processor_id = rand_num % 2; + default: + processor_id = (processor_id % (sysinfo.dwNumberOfProcessors - 1)) + 1; + } + DWORD_PTR affinity_mask = 0; + processor_id = std::min(processor_id, + static_cast(sizeof(affinity_mask))); + affinity_mask = 1LL << processor_id; + + if (!SetProcessAffinityMask(hProcess, affinity_mask)) { + std::ostringstream err_oss; + err_oss << "Failed to set affinity mask: " << GetWin32ErrMsg(); + out_cb("", err_oss.str()); + return false; + } + + { + std::ostringstream oss; + oss << "Set affinity mask to " << affinity_mask << + ", i.e. processor " << processor_id << std::endl; + out_cb(oss.str(), ""); + } + } +} + bool PythonWrapper::InvokeCommandWithArgs(const std::string& cmd, std::vector&& args, const std::function&& out_cb, - const std::function&& in_cb, - const std::function&& run_cb) { + const std::function&& in_cb, + const std::function&& run_cb) { std::ostringstream cmd_oss; cmd_oss << cmd; for (const auto& arg : args) { @@ -237,6 +298,9 @@ bool PythonWrapper::InvokeCommandWithArgs(const std::string& cmd, CloseHandle(pi.hThread); }); + // Set affinity mask (best effort) + SetAffinityMask(pi.hProcess, out_cb); + // While the process is running, drain output and send input every 10 ms. DWORD timeout_ms = 10; DWORD ret = WAIT_TIMEOUT; @@ -401,27 +465,37 @@ bool PythonWrapper::InstallPip( return true; } -wxProcess* PythonWrapper::StartApp( - std::function&& exit_callback, - const AppConfig& config) { - return InvokeAsyncWithArgs({ - "-u", // Unbuffered output - "Resources/Scripts/transcribe.py", - "--mic", config.microphone, - "--lang", config.language, - "--model", config.model, - "--chars_per_sync", std::to_string(config.chars_per_sync), - "--bytes_per_char", std::to_string(config.bytes_per_char), - "--button", Quote(config.button), - "--enable_local_beep", config.enable_local_beep ? "1" : "0", - "--rows", std::to_string(config.rows), - "--cols", std::to_string(config.cols), - "--window_duration_s", config.window_duration, - "--cpu", config.use_cpu ? "1" : "0", - "--use_builtin", config.use_builtin ? "1" : "0", - "--emotes_pickle", kEmotesPickle, - }, - std::move(exit_callback)); +std::future PythonWrapper::StartApp( + const AppConfig& config, + const std::function&& out_cb, + const std::function&& in_cb, + const std::function&& run_cb) { + return std::move(std::async(std::launch::async, + [&]( + const std::function&& out_cb, + const std::function&& in_cb, + const std::function&& run_cb) -> bool { + return InvokeWithArgs({ + "-u", // Unbuffered output + "Resources/Scripts/transcribe.py", + "--mic", config.microphone, + "--lang", config.language, + "--model", config.model, + "--chars_per_sync", std::to_string(config.chars_per_sync), + "--bytes_per_char", std::to_string(config.bytes_per_char), + "--button", Quote(config.button), + "--enable_local_beep", config.enable_local_beep ? "1" : "0", + "--rows", std::to_string(config.rows), + "--cols", std::to_string(config.cols), + "--window_duration_s", config.window_duration, + "--cpu", config.use_cpu ? "1" : "0", + "--use_builtin", config.use_builtin ? "1" : "0", + "--emotes_pickle", kEmotesPickle, + }, + std::move(out_cb), + std::move(in_cb), + std::move(run_cb)); + }, std::move(out_cb), std::move(in_cb), std::move(run_cb))); } bool PythonWrapper::GenerateAnimator( diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h index edea0df..3c8f53a 100644 --- a/GUI/GUI/GUI/PythonWrapper.h +++ b/GUI/GUI/GUI/PythonWrapper.h @@ -11,6 +11,7 @@ #include "Config.h" #include +#include #include #include @@ -70,9 +71,11 @@ namespace PythonWrapper // parameterized with config files instead of these ever-growing lists of // parameters. We could persist those files so settings would persist across // app restarts. - wxProcess* StartApp( - std::function&& exit_callback, - const AppConfig& config); + std::future StartApp( + const AppConfig& config, + const std::function&& out_cb, + const std::function&& in_cb = [](std::string&) {}, + const std::function&& run_cb = []() { return true; }); bool GenerateAnimator( const AppConfig& config, -- cgit v1.2.3