From b70628c047404c82793c80c4a2caf25e77d0b257 Mon Sep 17 00:00:00 2001 From: yum Date: Sun, 26 Feb 2023 20:57:33 -0800 Subject: Filter out more transcription noise Things like " (static)" and " *explosions*" were showing up a lot with ggml-medium.bin. Filter them out. --- GUI/GUI/GUI/WhisperCPP.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'GUI') diff --git a/GUI/GUI/GUI/WhisperCPP.cpp b/GUI/GUI/GUI/WhisperCPP.cpp index 0495b2e..ec8a6c2 100644 --- a/GUI/GUI/GUI/WhisperCPP.cpp +++ b/GUI/GUI/GUI/WhisperCPP.cpp @@ -344,7 +344,7 @@ void WhisperCPP::Start(const AppConfig& c) { // entries (source: I heard it from someone once). static const std::vector banned_words{ " -", - " (static)", + " *fades out*", " *no audio*", }; @@ -359,7 +359,9 @@ void WhisperCPP::Start(const AppConfig& c) { const sToken& tok = tokens[seg.firstToken + j]; std::string_view tok_str(tok.text); if (tok_str.starts_with("[") || - tok_str.starts_with(" [")) { + tok_str.starts_with("(") || + tok_str.starts_with(" [") || + tok_str.starts_with(" (")) { is_metadata = true; } if (is_metadata) { -- cgit v1.2.3