summaryrefslogtreecommitdiffstats
path: root/Whisper/MF
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-04-04 17:40:59 -0700
committeryum <yum.food.vr@gmail.com>2023-04-04 17:41:30 -0700
commit59297502afb8f61c1216c6d57d6cc18ab5b9f467 (patch)
tree0f1a8f374934696063fb564e61397a9644bc5358 /Whisper/MF
parentaaa0188da81056748ef8ffcd5ad86d6f4bffa6bd (diff)
Fix audio normalizationHEADmaster
Normalization was putting audio onto range [0, 255], while it should have been on range [0, 1]. * Add AudioBuffer::save() to enable debugging audio issues.
Diffstat (limited to 'Whisper/MF')
-rw-r--r--Whisper/MF/AudioBuffer.h35
1 files changed, 34 insertions, 1 deletions
diff --git a/Whisper/MF/AudioBuffer.h b/Whisper/MF/AudioBuffer.h
index 11b5ead..6b5c264 100644
--- a/Whisper/MF/AudioBuffer.h
+++ b/Whisper/MF/AudioBuffer.h
@@ -1,5 +1,6 @@
#pragma once
#include <algorithm>
+#include <fstream>
#include <vector>
namespace Whisper
@@ -76,8 +77,40 @@ namespace Whisper
for (auto& elm : mono) {
elm -= min;
elm /= (max - min) + 1;
- elm *= 255.0;
}
}
+
+ void save(const char* path, const int sample_rate) {
+ const int n_samples = mono.size();
+ const int bits_per_sample = sizeof(mono[0]) * 8;
+ const int n_channels = 1;
+ const int byte_rate = sample_rate * n_channels * bits_per_sample / 8;
+ const int block_align = n_channels * bits_per_sample / 8;
+ const int data_chunk_size = n_samples * n_channels * bits_per_sample / 8;
+ const int file_size = 36 + data_chunk_size;
+
+ std::ofstream ofs(path, std::ios::out | std::ios::binary);
+ ofs.write("RIFF", 4);
+ ofs.write((char*)&file_size, 4);
+ ofs.write("WAVE", 4);
+
+ ofs.write("fmt ", 4);
+ const int fmt_chunk_size = 16;
+ ofs.write((char*)&fmt_chunk_size, 4);
+ const short audio_format = 1; // PCM
+ ofs.write((char*)&audio_format, 2);
+ ofs.write((char*)&n_channels, 2);
+ ofs.write((char*)&sample_rate, 4);
+ ofs.write((char*)&byte_rate, 4);
+ ofs.write((char*)&block_align, 2);
+ ofs.write((char*)&bits_per_sample, 2);
+
+ ofs.write("data", 4);
+ ofs.write((char*)&data_chunk_size, 4);
+ for (int i = 0; i < n_samples; i++) {
+ short sample = (short)(mono[i] * 32767.0f);
+ ofs.write((char*)&sample, 2);
+ }
+ };
};
} \ No newline at end of file