summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-02-28 00:34:10 -0800
committeryum <yum.food.vr@gmail.com>2023-02-28 00:34:10 -0800
commitf65b93aa6f0a5b7ddd069cd1f50f142029e4a5e5 (patch)
tree12f1440988247e35c20fedf951f43db302ed5ab1
parentb70628c047404c82793c80c4a2caf25e77d0b257 (diff)
Bugfix: fix use-after-free in GetMicsImpl
* Plumb beam search params into whisper cpp implementation (currently broken)
-rw-r--r--GUI/GUI/GUI/GUI.vcxproj1
-rw-r--r--GUI/GUI/GUI/WhisperCPP.cpp22
-rw-r--r--GUI/GUI/GUI/WhisperCPP.h10
-rw-r--r--GUI/README.md11
m---------TaSTT-Whisper0
5 files changed, 28 insertions, 16 deletions
diff --git a/GUI/GUI/GUI/GUI.vcxproj b/GUI/GUI/GUI/GUI.vcxproj
index 6976c31..2d08d30 100644
--- a/GUI/GUI/GUI/GUI.vcxproj
+++ b/GUI/GUI/GUI/GUI.vcxproj
@@ -44,6 +44,7 @@
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
+ <EnableASAN>false</EnableASAN>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
diff --git a/GUI/GUI/GUI/WhisperCPP.cpp b/GUI/GUI/GUI/WhisperCPP.cpp
index ec8a6c2..6fd38b8 100644
--- a/GUI/GUI/GUI/WhisperCPP.cpp
+++ b/GUI/GUI/GUI/WhisperCPP.cpp
@@ -113,14 +113,14 @@ bool WhisperCPP::GetMics(std::vector<std::string>& mics) {
return false;
}
- std::vector<std::unique_ptr<sCaptureDevice>> mics_raw;
+ std::vector<std::unique_ptr<MicInfo>> mics_raw;
if (!GetMicsImpl(mics_raw)) {
return false;
}
mics.clear();
for (const auto& raw_mic : mics_raw) {
- mics.push_back(wcharToAsciiString(raw_mic->displayName));
+ mics.push_back(wcharToAsciiString(raw_mic->name.c_str()));
}
return true;
@@ -132,7 +132,7 @@ bool WhisperCPP::OpenMic(const int idx, Whisper::iAudioCapture*& stream) {
return false;
}
- std::vector<std::unique_ptr<sCaptureDevice>> mics_raw;
+ std::vector<std::unique_ptr<MicInfo>> mics_raw;
if (!GetMicsImpl(mics_raw)) {
return false;
}
@@ -149,11 +149,11 @@ bool WhisperCPP::OpenMic(const int idx, Whisper::iAudioCapture*& stream) {
params.maxDuration = 3.0;
params.retainDuration = 1.5;
stream = nullptr;
- HRESULT err = f_->openCaptureDevice(mics_raw[idx]->endpoint, params,
- &stream);
+ HRESULT err = f_->openCaptureDevice(mics_raw[idx]->endpoint.c_str(),
+ params, &stream);
if (FAILED(err)) {
Log(out_, "Failed to open mic with idx {} ({}): {}\n", idx,
- wcharToAsciiString(mics_raw[idx]->displayName),
+ wcharToAsciiString(mics_raw[idx]->name.c_str()),
hresultToString(err));
return false;
}
@@ -318,7 +318,9 @@ void WhisperCPP::Start(const AppConfig& c) {
ScopeGuard context_cleanup([context]() { context->Release(); });
Whisper::sFullParams wparams{};
- context->fullDefaultParams(eSamplingStrategy::Greedy, &wparams);
+ context->fullDefaultParams(eSamplingStrategy::BeamSearch, &wparams);
+ wparams.beam_search.beam_width = 5;
+ wparams.beam_search.n_best = 5;
wparams.language = Whisper::makeLanguageKey("en"); // TODO(yum) use config
// This must be set to keep memory usage from growing without bound.
wparams.n_max_text_ctx = 100;
@@ -538,11 +540,11 @@ void WhisperCPP::StopCustomChatbox() {
Log(out_, "Done!\n");
}
-bool WhisperCPP::GetMicsImpl(std::vector<std::unique_ptr<sCaptureDevice>>& mics) {
+bool WhisperCPP::GetMicsImpl(std::vector<std::unique_ptr<MicInfo>>& mics) {
pfnFoundCaptureDevices dev_cb = [](int len, const sCaptureDevice* buf, void* pv)->HRESULT __stdcall {
- auto mics = static_cast<std::vector<std::unique_ptr<sCaptureDevice>>*>(pv);
+ auto mics = static_cast<std::vector<std::unique_ptr<MicInfo>>*>(pv);
for (int i = 0; i < len; i++) {
- mics->push_back(std::make_unique<sCaptureDevice>(buf[i]));
+ mics->push_back(std::make_unique<MicInfo>(buf[i].displayName, buf[i].endpoint));
}
return S_OK;
};
diff --git a/GUI/GUI/GUI/WhisperCPP.h b/GUI/GUI/GUI/WhisperCPP.h
index 7b3f17d..d58a671 100644
--- a/GUI/GUI/GUI/WhisperCPP.h
+++ b/GUI/GUI/GUI/WhisperCPP.h
@@ -46,8 +46,16 @@ public:
void StopCustomChatbox();
private:
+ struct MicInfo {
+ MicInfo(const wchar_t* n, const wchar_t* e)
+ : name(n), endpoint(e)
+ {}
+
+ std::wstring name;
+ std::wstring endpoint;
+ };
bool GetMicsImpl(
- std::vector<std::unique_ptr<Whisper::sCaptureDevice>>& mics);
+ std::vector<std::unique_ptr<MicInfo>>& mics);
wxTextCtrl* out_;
Whisper::iMediaFoundation* f_;
diff --git a/GUI/README.md b/GUI/README.md
index ea28124..dbbc9be 100644
--- a/GUI/README.md
+++ b/GUI/README.md
@@ -8,17 +8,18 @@ $ git submodule init
$ git submodule update
```
3. Execute Libraries/fetch.ps1.
-4. Open Libraries/wx/build/msw/wx\_vc17.sln with Visual Studio 2022.
-5. Select every project in the Solution Explorer except for _custom_build.
+4. Open `Libraries/wx/build/msw/wx_vc17.sln` with Visual Studio 2022.
+5. Select every project in the Solution Explorer except for `_custom_build`.
6. Right click, select Properties, go to C/C++, Code Generation, and set
Runtime Library to Multi-threaded (/MT). Make sure this applies to the
configuration x64/Release.
7. Build x64/Release.
1. The build configuration is in the top. By default it's probably Debug/x64.
2. To build: ctrl+shift+B
-8. Open GUI/GUI.sln with Visual Studio 2022.
-9. Build x64/Release.
-10. Run package.ps1 from powershell.
+8. Follow TaSTT-Whisper README and build it as x64/Release.
+9. Open GUI/GUI.sln with Visual Studio 2022.
+10. Build x64/Release.
+11. Run package.ps1 from powershell.
## High level design
diff --git a/TaSTT-Whisper b/TaSTT-Whisper
-Subproject 1136acfc365f357d2df13a263714e8ae0614c4f
+Subproject 8050fba80e08dc2d107944fb20da7028dc73d05