From d96851f716b9e18d827ba7795a343dbf3cf529c4 Mon Sep 17 00:00:00 2001 From: yum Date: Sun, 26 Feb 2023 20:12:36 -0800 Subject: Improve behavior around VAD segmentation events Use forked Whisper implementation which has tweaks to reduce dropped words around the beginning VAD segments. * Retain audio after VAD segmentation events --- GUI/GUI/GUI/WhisperCPP.cpp | 7 ++++++- GUI/Libraries/fetch.ps1 | 26 ++++++-------------------- GUI/package.ps1 | 8 +------- 3 files changed, 13 insertions(+), 28 deletions(-) (limited to 'GUI') diff --git a/GUI/GUI/GUI/WhisperCPP.cpp b/GUI/GUI/GUI/WhisperCPP.cpp index 3610901..0495b2e 100644 --- a/GUI/GUI/GUI/WhisperCPP.cpp +++ b/GUI/GUI/GUI/WhisperCPP.cpp @@ -143,6 +143,11 @@ bool WhisperCPP::OpenMic(const int idx, Whisper::iAudioCapture*& stream) { } Whisper::sCaptureParams params{}; + params.dropStartSilence = 1.0; + params.pauseDuration = 1.0; + params.minDuration = 2.0; + params.maxDuration = 3.0; + params.retainDuration = 1.5; stream = nullptr; HRESULT err = f_->openCaptureDevice(mics_raw[idx]->endpoint, params, &stream); @@ -340,6 +345,7 @@ void WhisperCPP::Start(const AppConfig& c) { static const std::vector banned_words{ " -", " (static)", + " *no audio*", }; const sSegment* const segments = results->getSegments(); @@ -404,7 +410,6 @@ void WhisperCPP::Start(const AppConfig& c) { Log(out_, "Exit transcription engine\n"); }); - Log(out_, "Success!\n"); return; } diff --git a/GUI/Libraries/fetch.ps1 b/GUI/Libraries/fetch.ps1 index ac9ccd5..d71ce44 100644 --- a/GUI/Libraries/fetch.ps1 +++ b/GUI/Libraries/fetch.ps1 @@ -12,10 +12,6 @@ $WX_3_2_1_URL = "https://github.com/wxWidgets/wxWidgets/releases/download/v3.2.1 $WX_URL = $WX_3_2_1_URL $WX_FILE = $(Split-Path -Path $WX_URL -Leaf) -$WHISPER_1_7_0_URL = "https://github.com/Const-me/Whisper/releases/download/1.7.0/Library.zip" -$WHISPER_URL = $WHISPER_1_7_0_URL -$WHISPER_FILE = $(Split-Path -Path $WHISPER_URL -Leaf) - pushd $PSScriptRoot # WX @@ -31,24 +27,14 @@ if (-Not (Test-Path wx)) { popd > $null } -if ((Test-Path whisper) -And ($overwrite)) { - rm -Recurse whisper -} -if (-Not (Test-Path whisper)) { - mkdir whisper - pushd whisper > $null - Invoke-WebRequest $WHISPER_URL -OutFile $WHISPER_FILE - Expand-Archive $WHISPER_FILE -DestinationPath . - if (Test-Path ../../GUI/GUI/whisper/) { - rm -Recurse ../../GUI/GUI/whisper/ - } - mkdir ../../GUI/GUI/whisper/ - cp Include/*.h ../../GUI/GUI/whisper/ - cp Linker/*.lib ../../GUI/GUI/whisper/Whisper.lib - cp Binary/*.dll ../../GUI/GUI/whisper/Whisper.dll - popd > $null +if (Test-Path ../GUI/GUI/whisper/) { + rm -Recurse ../GUI/GUI/whisper/ } +mkdir ../GUI/GUI/whisper/ +cp ../../TaSTT-Whisper/Whisper/API/*.h ../GUI/GUI/whisper/ +cp ../../TaSTT-Whisper/x64/Release/Whisper.lib ../GUI/GUI/whisper/ + popd > $null # $PSScriptRoot diff --git a/GUI/package.ps1 b/GUI/package.ps1 index bac7147..fa5e162 100644 --- a/GUI/package.ps1 +++ b/GUI/package.ps1 @@ -65,12 +65,6 @@ if (-Not (Test-Path $git_dir)) { Read-Host -Prompt "Press enter once PortableGit is installed at $pwd\PortableGit" } -#$WHISPER_CHECKPOINT_URL = "https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" -#$WHISPER_CHECKPOINT_FILE = $(Split-Path -Path $WHISPER_CHECKPOINT_URL -Leaf) -#if (-Not (Test-Path $WHISPER_CHECKPOINT_FILE)) { -# Invoke-WebRequest $WHISPER_CHECKPOINT_URL -OutFile $WHISPER_CHECKPOINT_FILE -#} - mkdir $install_dir > $null mkdir $install_dir/Resources > $null cp -Recurse ../Animations TaSTT/Resources/Animations @@ -86,7 +80,7 @@ cp -Recurse ../Sounds TaSTT/Resources/Sounds cp -Recurse ../UnityAssets TaSTT/Resources/UnityAssets cp -Recurse ../BrowserSource TaSTT/Resources/BrowserSource cp GUI/x64/$release/GUI.exe TaSTT/TaSTT.exe -cp GUI/GUI/Whisper/Whisper.dll TaSTT/Whisper.dll +cp ../"TaSTT-Whisper"/x64/Release/Whisper.dll TaSTT/Whisper.dll mkdir TaSTT/Resources/Models #cp $WHISPER_CHECKPOINT_FILE TaSTT/Resources/Models/ -- cgit v1.2.3