diff options
| author | yum <yum.food.vr@gmail.com> | 2023-02-26 20:57:33 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-02-26 20:57:33 -0800 |
| commit | b70628c047404c82793c80c4a2caf25e77d0b257 (patch) | |
| tree | 03823b47211eaf8d20b05dbb28b5da2f3fffad84 /GUI | |
| parent | d96851f716b9e18d827ba7795a343dbf3cf529c4 (diff) | |
Filter out more transcription noisev0.8.2
Things like " (static)" and " *explosions*" were showing up a lot with
ggml-medium.bin. Filter them out.
Diffstat (limited to 'GUI')
| -rw-r--r-- | GUI/GUI/GUI/WhisperCPP.cpp | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/GUI/GUI/GUI/WhisperCPP.cpp b/GUI/GUI/GUI/WhisperCPP.cpp index 0495b2e..ec8a6c2 100644 --- a/GUI/GUI/GUI/WhisperCPP.cpp +++ b/GUI/GUI/GUI/WhisperCPP.cpp @@ -344,7 +344,7 @@ void WhisperCPP::Start(const AppConfig& c) { // entries (source: I heard it from someone once).
static const std::vector<std::string> banned_words{
" -",
- " (static)",
+ " *fades out*",
" *no audio*",
};
@@ -359,7 +359,9 @@ void WhisperCPP::Start(const AppConfig& c) { const sToken& tok = tokens[seg.firstToken + j];
std::string_view tok_str(tok.text);
if (tok_str.starts_with("[") ||
- tok_str.starts_with(" [")) {
+ tok_str.starts_with("(") ||
+ tok_str.starts_with(" [") ||
+ tok_str.starts_with(" (")) {
is_metadata = true;
}
if (is_metadata) {
|
