diff options
| author | yum <yum.food.vr@gmail.com> | 2023-02-28 00:34:10 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-02-28 00:34:10 -0800 |
| commit | f65b93aa6f0a5b7ddd069cd1f50f142029e4a5e5 (patch) | |
| tree | 12f1440988247e35c20fedf951f43db302ed5ab1 | |
| parent | b70628c047404c82793c80c4a2caf25e77d0b257 (diff) | |
Bugfix: fix use-after-free in GetMicsImpl
* Plumb beam search params into whisper cpp implementation
(currently broken)
| -rw-r--r-- | GUI/GUI/GUI/GUI.vcxproj | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/WhisperCPP.cpp | 22 | ||||
| -rw-r--r-- | GUI/GUI/GUI/WhisperCPP.h | 10 | ||||
| -rw-r--r-- | GUI/README.md | 11 | ||||
| m--------- | TaSTT-Whisper | 0 |
5 files changed, 28 insertions, 16 deletions
diff --git a/GUI/GUI/GUI/GUI.vcxproj b/GUI/GUI/GUI/GUI.vcxproj index 6976c31..2d08d30 100644 --- a/GUI/GUI/GUI/GUI.vcxproj +++ b/GUI/GUI/GUI/GUI.vcxproj @@ -44,6 +44,7 @@ <UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
+ <EnableASAN>false</EnableASAN>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
diff --git a/GUI/GUI/GUI/WhisperCPP.cpp b/GUI/GUI/GUI/WhisperCPP.cpp index ec8a6c2..6fd38b8 100644 --- a/GUI/GUI/GUI/WhisperCPP.cpp +++ b/GUI/GUI/GUI/WhisperCPP.cpp @@ -113,14 +113,14 @@ bool WhisperCPP::GetMics(std::vector<std::string>& mics) { return false;
}
- std::vector<std::unique_ptr<sCaptureDevice>> mics_raw;
+ std::vector<std::unique_ptr<MicInfo>> mics_raw;
if (!GetMicsImpl(mics_raw)) {
return false;
}
mics.clear();
for (const auto& raw_mic : mics_raw) {
- mics.push_back(wcharToAsciiString(raw_mic->displayName));
+ mics.push_back(wcharToAsciiString(raw_mic->name.c_str()));
}
return true;
@@ -132,7 +132,7 @@ bool WhisperCPP::OpenMic(const int idx, Whisper::iAudioCapture*& stream) { return false;
}
- std::vector<std::unique_ptr<sCaptureDevice>> mics_raw;
+ std::vector<std::unique_ptr<MicInfo>> mics_raw;
if (!GetMicsImpl(mics_raw)) {
return false;
}
@@ -149,11 +149,11 @@ bool WhisperCPP::OpenMic(const int idx, Whisper::iAudioCapture*& stream) { params.maxDuration = 3.0;
params.retainDuration = 1.5;
stream = nullptr;
- HRESULT err = f_->openCaptureDevice(mics_raw[idx]->endpoint, params,
- &stream);
+ HRESULT err = f_->openCaptureDevice(mics_raw[idx]->endpoint.c_str(),
+ params, &stream);
if (FAILED(err)) {
Log(out_, "Failed to open mic with idx {} ({}): {}\n", idx,
- wcharToAsciiString(mics_raw[idx]->displayName),
+ wcharToAsciiString(mics_raw[idx]->name.c_str()),
hresultToString(err));
return false;
}
@@ -318,7 +318,9 @@ void WhisperCPP::Start(const AppConfig& c) { ScopeGuard context_cleanup([context]() { context->Release(); });
Whisper::sFullParams wparams{};
- context->fullDefaultParams(eSamplingStrategy::Greedy, &wparams);
+ context->fullDefaultParams(eSamplingStrategy::BeamSearch, &wparams);
+ wparams.beam_search.beam_width = 5;
+ wparams.beam_search.n_best = 5;
wparams.language = Whisper::makeLanguageKey("en"); // TODO(yum) use config
// This must be set to keep memory usage from growing without bound.
wparams.n_max_text_ctx = 100;
@@ -538,11 +540,11 @@ void WhisperCPP::StopCustomChatbox() { Log(out_, "Done!\n");
}
-bool WhisperCPP::GetMicsImpl(std::vector<std::unique_ptr<sCaptureDevice>>& mics) {
+bool WhisperCPP::GetMicsImpl(std::vector<std::unique_ptr<MicInfo>>& mics) {
pfnFoundCaptureDevices dev_cb = [](int len, const sCaptureDevice* buf, void* pv)->HRESULT __stdcall {
- auto mics = static_cast<std::vector<std::unique_ptr<sCaptureDevice>>*>(pv);
+ auto mics = static_cast<std::vector<std::unique_ptr<MicInfo>>*>(pv);
for (int i = 0; i < len; i++) {
- mics->push_back(std::make_unique<sCaptureDevice>(buf[i]));
+ mics->push_back(std::make_unique<MicInfo>(buf[i].displayName, buf[i].endpoint));
}
return S_OK;
};
diff --git a/GUI/GUI/GUI/WhisperCPP.h b/GUI/GUI/GUI/WhisperCPP.h index 7b3f17d..d58a671 100644 --- a/GUI/GUI/GUI/WhisperCPP.h +++ b/GUI/GUI/GUI/WhisperCPP.h @@ -46,8 +46,16 @@ public: void StopCustomChatbox();
private:
+ struct MicInfo {
+ MicInfo(const wchar_t* n, const wchar_t* e)
+ : name(n), endpoint(e)
+ {}
+
+ std::wstring name;
+ std::wstring endpoint;
+ };
bool GetMicsImpl(
- std::vector<std::unique_ptr<Whisper::sCaptureDevice>>& mics);
+ std::vector<std::unique_ptr<MicInfo>>& mics);
wxTextCtrl* out_;
Whisper::iMediaFoundation* f_;
diff --git a/GUI/README.md b/GUI/README.md index ea28124..dbbc9be 100644 --- a/GUI/README.md +++ b/GUI/README.md @@ -8,17 +8,18 @@ $ git submodule init $ git submodule update ``` 3. Execute Libraries/fetch.ps1. -4. Open Libraries/wx/build/msw/wx\_vc17.sln with Visual Studio 2022. -5. Select every project in the Solution Explorer except for _custom_build. +4. Open `Libraries/wx/build/msw/wx_vc17.sln` with Visual Studio 2022. +5. Select every project in the Solution Explorer except for `_custom_build`. 6. Right click, select Properties, go to C/C++, Code Generation, and set Runtime Library to Multi-threaded (/MT). Make sure this applies to the configuration x64/Release. 7. Build x64/Release. 1. The build configuration is in the top. By default it's probably Debug/x64. 2. To build: ctrl+shift+B -8. Open GUI/GUI.sln with Visual Studio 2022. -9. Build x64/Release. -10. Run package.ps1 from powershell. +8. Follow TaSTT-Whisper README and build it as x64/Release. +9. Open GUI/GUI.sln with Visual Studio 2022. +10. Build x64/Release. +11. Run package.ps1 from powershell. ## High level design diff --git a/TaSTT-Whisper b/TaSTT-Whisper -Subproject 1136acfc365f357d2df13a263714e8ae0614c4f +Subproject 8050fba80e08dc2d107944fb20da7028dc73d05 |
