summaryrefslogtreecommitdiffstats
path: root/app/profanity_filter.py
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2025-05-30 13:32:36 -0700
committeryum <yum.food.vr@gmail.com>2025-05-30 13:34:23 -0700
commit7fb9c575aea4d318e9c14b82174d1b323171b62b (patch)
tree8f924a32def3bdc963be40e67879887cbac68f08 /app/profanity_filter.py
parente1b3f638a1ea448de9691f69eb62ebf4c3944c9f (diff)
More stuff
- fix unicode output from python terminal - fix cpu inference - add filters - add beam search params to UI - DRY up config definition in UI
Diffstat (limited to 'app/profanity_filter.py')
-rw-r--r--app/profanity_filter.py43
1 files changed, 43 insertions, 0 deletions
diff --git a/app/profanity_filter.py b/app/profanity_filter.py
new file mode 100644
index 0000000..b8c84ed
--- /dev/null
+++ b/app/profanity_filter.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+
+class ProfanityFilter:
+ def __init__(self, en_path: str):
+ self.en_path = en_path
+ self.en_profanity = set()
+
+ def load(self):
+ with open(self.en_path, 'r') as f:
+ for line in f:
+ self.en_profanity.add(line.strip())
+
+ def filter(self, line: str, language_code: str = "en") -> str:
+ filtered = ""
+
+ if language_code not in {"en"}:
+ raise ValueError(f"Language code \"{language_code}\" is " +
+ "unsupported by the profanity filter")
+
+ # Translation table converting vowels to asterisks.
+ vowel_to_asterisk = str.maketrans('aeiouAEIOU', '**********')
+
+ result = []
+ for word in line.split():
+ word_clean = word.lower()
+ # Filter out non-alphabet characters from the word.
+ word_clean = ''.join([char for char in word_clean if char.isalpha()])
+ if word_clean in self.en_profanity:
+ result.append(word.translate(vowel_to_asterisk))
+ else:
+ result.append(word)
+
+ return " ".join(result)
+
+if __name__ == "__main__":
+ en_path = "/mnt/d/vrc/TaSTT/GUI/Profanity/Profanity/en"
+ p = ProfanityFilter(en_path)
+ p.load()
+ assert(p.filter("fuck") == "f*ck")
+ assert(p.filter("fuck!") == "f*ck!")
+ assert(p.filter("fuck shit") == "f*ck sh*t")
+ assert(p.filter("fuck shit this should not be filtered") == "f*ck sh*t this should not be filtered")
+ assert(p.filter("ASS") == "*SS")