diff options
| author | yum <yum.food.vr@gmail.com> | 2023-04-04 17:40:59 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-04-04 17:41:30 -0700 |
| commit | 59297502afb8f61c1216c6d57d6cc18ab5b9f467 (patch) | |
| tree | 0f1a8f374934696063fb564e61397a9644bc5358 /Whisper | |
| parent | aaa0188da81056748ef8ffcd5ad86d6f4bffa6bd (diff) | |
Normalization was putting audio onto range [0, 255], while it should
have been on range [0, 1].
* Add AudioBuffer::save() to enable debugging audio issues.
Diffstat (limited to 'Whisper')
| -rw-r--r-- | Whisper/MF/AudioBuffer.h | 35 | ||||
| -rw-r--r-- | Whisper/Whisper/ContextImpl.capture.cpp | 14 |
2 files changed, 48 insertions, 1 deletions
diff --git a/Whisper/MF/AudioBuffer.h b/Whisper/MF/AudioBuffer.h index 11b5ead..6b5c264 100644 --- a/Whisper/MF/AudioBuffer.h +++ b/Whisper/MF/AudioBuffer.h @@ -1,5 +1,6 @@ #pragma once #include <algorithm> +#include <fstream> #include <vector> namespace Whisper @@ -76,8 +77,40 @@ namespace Whisper for (auto& elm : mono) { elm -= min; elm /= (max - min) + 1; - elm *= 255.0; } } + + void save(const char* path, const int sample_rate) { + const int n_samples = mono.size(); + const int bits_per_sample = sizeof(mono[0]) * 8; + const int n_channels = 1; + const int byte_rate = sample_rate * n_channels * bits_per_sample / 8; + const int block_align = n_channels * bits_per_sample / 8; + const int data_chunk_size = n_samples * n_channels * bits_per_sample / 8; + const int file_size = 36 + data_chunk_size; + + std::ofstream ofs(path, std::ios::out | std::ios::binary); + ofs.write("RIFF", 4); + ofs.write((char*)&file_size, 4); + ofs.write("WAVE", 4); + + ofs.write("fmt ", 4); + const int fmt_chunk_size = 16; + ofs.write((char*)&fmt_chunk_size, 4); + const short audio_format = 1; // PCM + ofs.write((char*)&audio_format, 2); + ofs.write((char*)&n_channels, 2); + ofs.write((char*)&sample_rate, 4); + ofs.write((char*)&byte_rate, 4); + ofs.write((char*)&block_align, 2); + ofs.write((char*)&bits_per_sample, 2); + + ofs.write("data", 4); + ofs.write((char*)&data_chunk_size, 4); + for (int i = 0; i < n_samples; i++) { + short sample = (short)(mono[i] * 32767.0f); + ofs.write((char*)&sample, 2); + } + }; }; }
\ No newline at end of file diff --git a/Whisper/Whisper/ContextImpl.capture.cpp b/Whisper/Whisper/ContextImpl.capture.cpp index ce51393..2f29934 100644 --- a/Whisper/Whisper/ContextImpl.capture.cpp +++ b/Whisper/Whisper/ContextImpl.capture.cpp @@ -142,7 +142,21 @@ namespace workStatus = S_FALSE; buffer.currentOffset = pcmStartTime; buffer.pcm = pcm; +#if 0 + { + static int i = 0; + std::string filename = "buf_" + std::to_string(i++) + "_raw.wav"; + buffer.pcm.save(filename.c_str(), SAMPLE_RATE / 2); + } +#endif buffer.pcm.normalize(); +#if 0 + { + static int i = 0; + std::string filename = "buf_" + std::to_string(i++) + "_normalized.wav"; + buffer.pcm.save(filename.c_str(), SAMPLE_RATE / 2); + } +#endif SubmitThreadpoolWork( work ); pcmStartTime = nextSampleTime; pcm.retainLast(captureParams.retainDuration); |
