From f65b93aa6f0a5b7ddd069cd1f50f142029e4a5e5 Mon Sep 17 00:00:00 2001 From: yum Date: Tue, 28 Feb 2023 00:34:10 -0800 Subject: Bugfix: fix use-after-free in GetMicsImpl * Plumb beam search params into whisper cpp implementation (currently broken) --- GUI/GUI/GUI/GUI.vcxproj | 1 + GUI/GUI/GUI/WhisperCPP.cpp | 22 ++++++++++++---------- GUI/GUI/GUI/WhisperCPP.h | 10 +++++++++- GUI/README.md | 11 ++++++----- 4 files changed, 28 insertions(+), 16 deletions(-) (limited to 'GUI') diff --git a/GUI/GUI/GUI/GUI.vcxproj b/GUI/GUI/GUI/GUI.vcxproj index 6976c31..2d08d30 100644 --- a/GUI/GUI/GUI/GUI.vcxproj +++ b/GUI/GUI/GUI/GUI.vcxproj @@ -44,6 +44,7 @@ true v143 Unicode + false Application diff --git a/GUI/GUI/GUI/WhisperCPP.cpp b/GUI/GUI/GUI/WhisperCPP.cpp index ec8a6c2..6fd38b8 100644 --- a/GUI/GUI/GUI/WhisperCPP.cpp +++ b/GUI/GUI/GUI/WhisperCPP.cpp @@ -113,14 +113,14 @@ bool WhisperCPP::GetMics(std::vector& mics) { return false; } - std::vector> mics_raw; + std::vector> mics_raw; if (!GetMicsImpl(mics_raw)) { return false; } mics.clear(); for (const auto& raw_mic : mics_raw) { - mics.push_back(wcharToAsciiString(raw_mic->displayName)); + mics.push_back(wcharToAsciiString(raw_mic->name.c_str())); } return true; @@ -132,7 +132,7 @@ bool WhisperCPP::OpenMic(const int idx, Whisper::iAudioCapture*& stream) { return false; } - std::vector> mics_raw; + std::vector> mics_raw; if (!GetMicsImpl(mics_raw)) { return false; } @@ -149,11 +149,11 @@ bool WhisperCPP::OpenMic(const int idx, Whisper::iAudioCapture*& stream) { params.maxDuration = 3.0; params.retainDuration = 1.5; stream = nullptr; - HRESULT err = f_->openCaptureDevice(mics_raw[idx]->endpoint, params, - &stream); + HRESULT err = f_->openCaptureDevice(mics_raw[idx]->endpoint.c_str(), + params, &stream); if (FAILED(err)) { Log(out_, "Failed to open mic with idx {} ({}): {}\n", idx, - wcharToAsciiString(mics_raw[idx]->displayName), + wcharToAsciiString(mics_raw[idx]->name.c_str()), hresultToString(err)); return false; } @@ -318,7 +318,9 @@ void WhisperCPP::Start(const AppConfig& c) { ScopeGuard context_cleanup([context]() { context->Release(); }); Whisper::sFullParams wparams{}; - context->fullDefaultParams(eSamplingStrategy::Greedy, &wparams); + context->fullDefaultParams(eSamplingStrategy::BeamSearch, &wparams); + wparams.beam_search.beam_width = 5; + wparams.beam_search.n_best = 5; wparams.language = Whisper::makeLanguageKey("en"); // TODO(yum) use config // This must be set to keep memory usage from growing without bound. wparams.n_max_text_ctx = 100; @@ -538,11 +540,11 @@ void WhisperCPP::StopCustomChatbox() { Log(out_, "Done!\n"); } -bool WhisperCPP::GetMicsImpl(std::vector>& mics) { +bool WhisperCPP::GetMicsImpl(std::vector>& mics) { pfnFoundCaptureDevices dev_cb = [](int len, const sCaptureDevice* buf, void* pv)->HRESULT __stdcall { - auto mics = static_cast>*>(pv); + auto mics = static_cast>*>(pv); for (int i = 0; i < len; i++) { - mics->push_back(std::make_unique(buf[i])); + mics->push_back(std::make_unique(buf[i].displayName, buf[i].endpoint)); } return S_OK; }; diff --git a/GUI/GUI/GUI/WhisperCPP.h b/GUI/GUI/GUI/WhisperCPP.h index 7b3f17d..d58a671 100644 --- a/GUI/GUI/GUI/WhisperCPP.h +++ b/GUI/GUI/GUI/WhisperCPP.h @@ -46,8 +46,16 @@ public: void StopCustomChatbox(); private: + struct MicInfo { + MicInfo(const wchar_t* n, const wchar_t* e) + : name(n), endpoint(e) + {} + + std::wstring name; + std::wstring endpoint; + }; bool GetMicsImpl( - std::vector>& mics); + std::vector>& mics); wxTextCtrl* out_; Whisper::iMediaFoundation* f_; diff --git a/GUI/README.md b/GUI/README.md index ea28124..dbbc9be 100644 --- a/GUI/README.md +++ b/GUI/README.md @@ -8,17 +8,18 @@ $ git submodule init $ git submodule update ``` 3. Execute Libraries/fetch.ps1. -4. Open Libraries/wx/build/msw/wx\_vc17.sln with Visual Studio 2022. -5. Select every project in the Solution Explorer except for _custom_build. +4. Open `Libraries/wx/build/msw/wx_vc17.sln` with Visual Studio 2022. +5. Select every project in the Solution Explorer except for `_custom_build`. 6. Right click, select Properties, go to C/C++, Code Generation, and set Runtime Library to Multi-threaded (/MT). Make sure this applies to the configuration x64/Release. 7. Build x64/Release. 1. The build configuration is in the top. By default it's probably Debug/x64. 2. To build: ctrl+shift+B -8. Open GUI/GUI.sln with Visual Studio 2022. -9. Build x64/Release. -10. Run package.ps1 from powershell. +8. Follow TaSTT-Whisper README and build it as x64/Release. +9. Open GUI/GUI.sln with Visual Studio 2022. +10. Build x64/Release. +11. Run package.ps1 from powershell. ## High level design -- cgit v1.2.3