summaryrefslogtreecommitdiffstats
path: root/Scripts
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-02-21 13:19:43 -0800
committeryum <yum.food.vr@gmail.com>2023-02-22 21:49:29 -0800
commit9a97fbc3c583ccd518d838faaaa36ed9aa5558e1 (patch)
tree92646de70fdd4971092de3d5cf76dce79978cd8e /Scripts
parentcece1ee8f1b985c2a89adb661dd02c6d44787f67 (diff)
Begin work on C++ implementation
Use Const-me/Whisper to perform transcription. This implementation is vastly more efficient: CPU usage, memory usage, and VRAM usage are all dramatically reduced. It's slightly less accurate when comparing the same model (due to the lack of beam search decoding), but since you can use larger models, the impact is largely a wash.
Diffstat (limited to 'Scripts')
-rw-r--r--Scripts/string_matcher.py4
-rw-r--r--Scripts/whisper_requirements.txt8
2 files changed, 12 insertions, 0 deletions
diff --git a/Scripts/string_matcher.py b/Scripts/string_matcher.py
index 26241f2..a56308a 100644
--- a/Scripts/string_matcher.py
+++ b/Scripts/string_matcher.py
@@ -55,6 +55,10 @@ def matchStrings(old_text: str, new_text: str, window_size = 3) -> str:
if DEBUG:
print("STRING MATCH exception path 1")
return old_text
+ elif len(new_text) == 0:
+ return old_text
+ elif len(old_text) == 0:
+ return new_text
elif len(old_text) >= window_size and len(new_text) >= window_size:
# Find the window where the cumulative string distance
# between the text in that window in the old/new transcription
diff --git a/Scripts/whisper_requirements.txt b/Scripts/whisper_requirements.txt
new file mode 100644
index 0000000..e99fe9e
--- /dev/null
+++ b/Scripts/whisper_requirements.txt
@@ -0,0 +1,8 @@
+editdistance
+future==0.18.2
+openvr
+pillow
+playsound==1.2.2
+pyaudio
+python-osc
+wget