From 4fec36c3cc00bd649dfb3c9d7e9079b5c8685a0e Mon Sep 17 00:00:00 2001 From: yum Date: Sun, 9 Jun 2024 16:43:34 -0700 Subject: Bump CUDNN to v8.9.7 Also disable flash-attention when CPU mode is selected --- GUI/package.ps1 | 20 ++++++-------------- Scripts/transcribe_v2.py | 5 +++-- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/GUI/package.ps1 b/GUI/package.ps1 index a9be563..849e35c 100644 --- a/GUI/package.ps1 +++ b/GUI/package.ps1 @@ -99,21 +99,13 @@ if (-Not (Test-Path $nvidia_dir)) { # NVIDIA locks these files behind a fucking login making it a massive # pain in the dick for end users to download, so I rehosted them. # TODO check hashes. - echo "Fetching NVIDIA dll 1/4 (90MB)" - $CUDNN_1_URL = "https://www.dropbox.com/scl/fi/7vgnqz732lmrdkbktc0tm/cublas64_11.dll?rlkey=ju4fjpf8v9aoopdeqddfkcrfj&dl=1" - Invoke-WebRequest $CUDNN_1_URL -OutFile "cublas64_11.dll" + echo "Fetching NVIDIA dll 1/2 (90 MB)" + $CUDNN_1_URL = "https://www.dropbox.com/scl/fi/d21dsoa982ce7wigng510/cudnn_ops_infer64_8.dll?rlkey=xflxyux0ekhr0fs11m4gs58md&st=0wff5fyn&dl=1" + Invoke-WebRequest $CUDNN_1_URL -OutFile "cudnn_ops_infer64_8.dll" - echo "Fetching NVIDIA dll 2/4 (160MB)" - $CUDNN_2_URL = "https://www.dropbox.com/scl/fi/xgz2rbp5gxy1v6o0t5e4r/cublasLt64_11.dll?rlkey=l6uk24q82anmjtlr39m8pbftb&dl=1" - Invoke-WebRequest $CUDNN_2_URL -OutFile "cublasLt64_11.dll" - - echo "Fetching NVIDIA dll 3/4 (600MB)" - $CUDNN_3_URL = "https://www.dropbox.com/scl/fi/ffy0i5l5asrfkiesr3t1u/cudnn_cnn_infer64_8.dll?rlkey=t2v0v6y78tq2h2i5y0bmm7n0z&dl=1" - Invoke-WebRequest $CUDNN_3_URL -OutFile "cudnn_cnn_infer64_8.dll" - - echo "Fetching NVIDIA dll 4/4 (90MB)" - $CUDNN_4_URL = "https://www.dropbox.com/scl/fi/dd1t62wc5gtz5bmhji01h/cudnn_ops_infer64_8.dll?rlkey=b11q8h3f97pxsamak04z4f84d&dl=1" - Invoke-WebRequest $CUDNN_4_URL -OutFile "cudnn_ops_infer64_8.dll" + echo "Fetching NVIDIA dll 2/2 (570 MB)" + $CUDNN_2_URL = "https://www.dropbox.com/scl/fi/uqccevwk9h2q84dt9vr6u/cudnn_cnn_infer64_8.dll?rlkey=sik7xd0ozg06nr4eayzdym4la&st=031bb8pa&dl=1" + Invoke-WebRequest $CUDNN_2_URL -OutFile "cudnn_cnn_infer64_8.dll" popd > $null } diff --git a/Scripts/transcribe_v2.py b/Scripts/transcribe_v2.py index 2f37945..1bdc487 100644 --- a/Scripts/transcribe_v2.py +++ b/Scripts/transcribe_v2.py @@ -423,11 +423,12 @@ class Whisper: model_device = "cuda" if cfg["use_cpu"]: model_device = "cpu" + if cfg["use_flash_attention"]: + print(f"Flash attention disabled on CPU", file=sys.stderr) + cfg["use_flash_attention"] = False already_downloaded = os.path.exists(model_root) - print(f"Use flash attention {cfg['use_flash_attention']}") - self.model = WhisperModel(model_str, device = model_device, device_index = cfg["gpu_idx"], -- cgit v1.2.3