diff options
| author | yum <yum.food.vr@gmail.com> | 2023-02-26 19:12:01 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-02-26 20:08:45 -0800 |
| commit | 02c2605454288f7c86023ae700366acf08cd2206 (patch) | |
| tree | a169c65349a15a208966c62c363252dc0fb93142 | |
| parent | 00a0350a0218cf4b03d14acac84110bc1e882bee (diff) | |
Normalize audio before sending to transcription layer
Helps in cases where the speaker is speaking softly, or their mic gain
is set low.
| -rw-r--r-- | Whisper/MF/AudioBuffer.h | 13 | ||||
| -rw-r--r-- | Whisper/Whisper/ContextImpl.capture.cpp | 5 |
2 files changed, 16 insertions, 2 deletions
diff --git a/Whisper/MF/AudioBuffer.h b/Whisper/MF/AudioBuffer.h index b12dff4..77be1e0 100644 --- a/Whisper/MF/AudioBuffer.h +++ b/Whisper/MF/AudioBuffer.h @@ -1,4 +1,5 @@ #pragma once +#include <algorithm> #include <vector> namespace Whisper @@ -52,5 +53,17 @@ namespace Whisper memcpy(tmp.data(), mono.data() + len, remainder); mono = std::move(tmp); } + + void normalize() + { + const auto &min = *std::min_element(mono.begin(), mono.end()); + const auto &max = *std::max_element(mono.begin(), mono.end()); + + for (auto& elm : mono) { + elm -= min; + elm /= (max - min) + 1; + elm *= 255.0; + } + } }; }
\ No newline at end of file diff --git a/Whisper/Whisper/ContextImpl.capture.cpp b/Whisper/Whisper/ContextImpl.capture.cpp index 0b213b8..bc88249 100644 --- a/Whisper/Whisper/ContextImpl.capture.cpp +++ b/Whisper/Whisper/ContextImpl.capture.cpp @@ -138,7 +138,8 @@ namespace workStatus = S_FALSE; buffer.currentOffset = pcmStartTime; - pcm.swap( buffer.pcm ); + buffer.pcm = pcm; + buffer.pcm.normalize(); SubmitThreadpoolWork( work ); pcmStartTime = nextSampleTime; pcm.clear(); @@ -419,4 +420,4 @@ HRESULT COMLIGHTCALL ContextImpl::runCapture( const sFullParams& params, const s return S_OK; CHECK( capture.run() ); } -}
\ No newline at end of file +} |
