diff options
| author | yum <yum.food.vr@gmail.com> | 2025-05-30 13:32:36 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2025-05-30 13:34:23 -0700 |
| commit | 7fb9c575aea4d318e9c14b82174d1b323171b62b (patch) | |
| tree | 8f924a32def3bdc963be40e67879887cbac68f08 /app/profanity_filter.py | |
| parent | e1b3f638a1ea448de9691f69eb62ebf4c3944c9f (diff) | |
More stuff
- fix unicode output from python terminal
- fix cpu inference
- add filters
- add beam search params to UI
- DRY up config definition in UI
Diffstat (limited to 'app/profanity_filter.py')
| -rw-r--r-- | app/profanity_filter.py | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/app/profanity_filter.py b/app/profanity_filter.py new file mode 100644 index 0000000..b8c84ed --- /dev/null +++ b/app/profanity_filter.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +class ProfanityFilter: + def __init__(self, en_path: str): + self.en_path = en_path + self.en_profanity = set() + + def load(self): + with open(self.en_path, 'r') as f: + for line in f: + self.en_profanity.add(line.strip()) + + def filter(self, line: str, language_code: str = "en") -> str: + filtered = "" + + if language_code not in {"en"}: + raise ValueError(f"Language code \"{language_code}\" is " + + "unsupported by the profanity filter") + + # Translation table converting vowels to asterisks. + vowel_to_asterisk = str.maketrans('aeiouAEIOU', '**********') + + result = [] + for word in line.split(): + word_clean = word.lower() + # Filter out non-alphabet characters from the word. + word_clean = ''.join([char for char in word_clean if char.isalpha()]) + if word_clean in self.en_profanity: + result.append(word.translate(vowel_to_asterisk)) + else: + result.append(word) + + return " ".join(result) + +if __name__ == "__main__": + en_path = "/mnt/d/vrc/TaSTT/GUI/Profanity/Profanity/en" + p = ProfanityFilter(en_path) + p.load() + assert(p.filter("fuck") == "f*ck") + assert(p.filter("fuck!") == "f*ck!") + assert(p.filter("fuck shit") == "f*ck sh*t") + assert(p.filter("fuck shit this should not be filtered") == "f*ck sh*t this should not be filtered") + assert(p.filter("ASS") == "*SS") |
