summaryrefslogtreecommitdiffstats
path: root/Scripts
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2024-06-05 18:15:47 -0700
committeryum <yum.food.vr@gmail.com>2024-06-05 18:15:47 -0700
commit4f0fb5b17de990517e3c1de7ffee5d0f3c9a8961 (patch)
tree2a1d1b33af881a651037af040b4ba8e7860a58db /Scripts
parent5638d86c97041de31217e058e411034143e9c882 (diff)
Upgrade faster-whisper with flash-attention2
This should be significantly more efficient than prior versions. * add large-v3 & distilled variant * simplify model acquisition code now that distilled models are part of faster-whisper.
Diffstat (limited to 'Scripts')
-rw-r--r--Scripts/requirements.txt4
-rw-r--r--Scripts/requirements_frozen.txt6
-rw-r--r--Scripts/transcribe_v2.py18
3 files changed, 11 insertions, 17 deletions
diff --git a/Scripts/requirements.txt b/Scripts/requirements.txt
index 9224ba8..949b82c 100644
--- a/Scripts/requirements.txt
+++ b/Scripts/requirements.txt
@@ -1,6 +1,6 @@
-ctranslate2
+ctranslate2==4.2.1
editdistance
-faster-whisper@https://github.com/guillaumekln/faster-whisper/archive/78d57d73c5b4a76b32d1d5a415e4e7aea760295c.tar.gz
+faster-whisper@https://github.com/guillaumekln/faster-whisper/archive/2f6913efc85306fc4f900da6c67f9a06a7d54a3d.tar.gz
future==0.18.2
huggingface_hub==0.16.4
keyboard
diff --git a/Scripts/requirements_frozen.txt b/Scripts/requirements_frozen.txt
index dd56b9d..1913457 100644
--- a/Scripts/requirements_frozen.txt
+++ b/Scripts/requirements_frozen.txt
@@ -1,11 +1,11 @@
-av==10.0.0
+av==12.0.0
certifi==2023.7.22
charset-normalizer==3.2.0
colorama==0.4.6
coloredlogs==15.0.1
-ctranslate2==3.19.0
+ctranslate2==4.2.1
editdistance==0.6.2
-faster-whisper @ https://github.com/guillaumekln/faster-whisper/archive/78d57d73c5b4a76b32d1d5a415e4e7aea760295c.tar.gz#sha256=824b441278d59fb7daff99f98823d492762e279980922308a2e0833ac8e49c5c
+faster-whisper @ https://github.com/guillaumekln/faster-whisper/archive/2f6913efc85306fc4f900da6c67f9a06a7d54a3d.tar.gz#sha256=c389ad787c8cdafcb13d31f8bae788083eb4e490819aad6b49d76c82e490a388
filelock==3.12.3
flatbuffers==23.5.26
fsspec==2023.9.0
diff --git a/Scripts/transcribe_v2.py b/Scripts/transcribe_v2.py
index 32652df..32deb42 100644
--- a/Scripts/transcribe_v2.py
+++ b/Scripts/transcribe_v2.py
@@ -13,6 +13,7 @@ import app_config
import argparse
import ctranslate2
import editdistance
+import glob
import keybind_event_machine
import keyboard
import langcodes
@@ -414,7 +415,8 @@ class Whisper:
parent_dir = os.path.dirname(my_dir)
model_str = cfg["model"]
- model_root = os.path.join(parent_dir, "Models", model_str)
+ model_root = os.path.join(parent_dir, "Models",
+ os.path.normpath(model_str))
print(f"Model {cfg['model']} will be saved to {model_root}",
file=sys.stderr)
@@ -423,22 +425,14 @@ class Whisper:
model_device = "cpu"
already_downloaded = os.path.exists(model_root)
- if '/' in model_str:
- hf_hub_download(repo_id=model_str, filename='model.bin',
- local_dir=model_root)
- hf_hub_download(repo_id=model_str, filename='vocabulary.json',
- local_dir=model_root)
- hf_hub_download(repo_id=model_str, filename='config.json',
- local_dir=model_root)
- already_downloaded = True
- if already_downloaded:
- model_str = model_root
+
self.model = WhisperModel(model_str,
device = model_device,
device_index = cfg["gpu_idx"],
compute_type = cfg["compute_type"],
download_root = model_root,
- local_files_only = already_downloaded)
+ local_files_only = already_downloaded,
+ flash_attention = True)
def transcribe(self, frames: bytes = None) -> typing.List[Segment]:
if frames is None: