From 9a97fbc3c583ccd518d838faaaa36ed9aa5558e1 Mon Sep 17 00:00:00 2001 From: yum Date: Tue, 21 Feb 2023 13:19:43 -0800 Subject: Begin work on C++ implementation Use Const-me/Whisper to perform transcription. This implementation is vastly more efficient: CPU usage, memory usage, and VRAM usage are all dramatically reduced. It's slightly less accurate when comparing the same model (due to the lack of beam search decoding), but since you can use larger models, the impact is largely a wash. --- Scripts/string_matcher.py | 4 ++++ Scripts/whisper_requirements.txt | 8 ++++++++ 2 files changed, 12 insertions(+) create mode 100644 Scripts/whisper_requirements.txt (limited to 'Scripts') diff --git a/Scripts/string_matcher.py b/Scripts/string_matcher.py index 26241f2..a56308a 100644 --- a/Scripts/string_matcher.py +++ b/Scripts/string_matcher.py @@ -55,6 +55,10 @@ def matchStrings(old_text: str, new_text: str, window_size = 3) -> str: if DEBUG: print("STRING MATCH exception path 1") return old_text + elif len(new_text) == 0: + return old_text + elif len(old_text) == 0: + return new_text elif len(old_text) >= window_size and len(new_text) >= window_size: # Find the window where the cumulative string distance # between the text in that window in the old/new transcription diff --git a/Scripts/whisper_requirements.txt b/Scripts/whisper_requirements.txt new file mode 100644 index 0000000..e99fe9e --- /dev/null +++ b/Scripts/whisper_requirements.txt @@ -0,0 +1,8 @@ +editdistance +future==0.18.2 +openvr +pillow +playsound==1.2.2 +pyaudio +python-osc +wget -- cgit v1.2.3