From 4fec36c3cc00bd649dfb3c9d7e9079b5c8685a0e Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Sun, 9 Jun 2024 16:43:34 -0700
Subject: Bump CUDNN to v8.9.7

Also disable flash-attention when CPU mode is selected
---
 GUI/package.ps1          | 20 ++++++--------------
 Scripts/transcribe_v2.py |  5 +++--
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/GUI/package.ps1 b/GUI/package.ps1
index a9be563..849e35c 100644
--- a/GUI/package.ps1
+++ b/GUI/package.ps1
@@ -99,21 +99,13 @@ if (-Not (Test-Path $nvidia_dir)) {
   # NVIDIA locks these files behind a fucking login making it a massive
   # pain in the dick for end users to download, so I rehosted them.
   # TODO check hashes.
-  echo "Fetching NVIDIA dll 1/4 (90MB)"
-  $CUDNN_1_URL = "https://www.dropbox.com/scl/fi/7vgnqz732lmrdkbktc0tm/cublas64_11.dll?rlkey=ju4fjpf8v9aoopdeqddfkcrfj&dl=1"
-  Invoke-WebRequest $CUDNN_1_URL -OutFile "cublas64_11.dll"
+  echo "Fetching NVIDIA dll 1/2 (90 MB)"
+  $CUDNN_1_URL = "https://www.dropbox.com/scl/fi/d21dsoa982ce7wigng510/cudnn_ops_infer64_8.dll?rlkey=xflxyux0ekhr0fs11m4gs58md&st=0wff5fyn&dl=1"
+  Invoke-WebRequest $CUDNN_1_URL -OutFile "cudnn_ops_infer64_8.dll"
 
-  echo "Fetching NVIDIA dll 2/4 (160MB)"
-  $CUDNN_2_URL = "https://www.dropbox.com/scl/fi/xgz2rbp5gxy1v6o0t5e4r/cublasLt64_11.dll?rlkey=l6uk24q82anmjtlr39m8pbftb&dl=1"
-  Invoke-WebRequest $CUDNN_2_URL -OutFile "cublasLt64_11.dll"
-
-  echo "Fetching NVIDIA dll 3/4 (600MB)"
-  $CUDNN_3_URL = "https://www.dropbox.com/scl/fi/ffy0i5l5asrfkiesr3t1u/cudnn_cnn_infer64_8.dll?rlkey=t2v0v6y78tq2h2i5y0bmm7n0z&dl=1"
-  Invoke-WebRequest $CUDNN_3_URL -OutFile "cudnn_cnn_infer64_8.dll"
-
-  echo "Fetching NVIDIA dll 4/4 (90MB)"
-  $CUDNN_4_URL = "https://www.dropbox.com/scl/fi/dd1t62wc5gtz5bmhji01h/cudnn_ops_infer64_8.dll?rlkey=b11q8h3f97pxsamak04z4f84d&dl=1"
-  Invoke-WebRequest $CUDNN_4_URL -OutFile "cudnn_ops_infer64_8.dll"
+  echo "Fetching NVIDIA dll 2/2 (570 MB)"
+  $CUDNN_2_URL = "https://www.dropbox.com/scl/fi/uqccevwk9h2q84dt9vr6u/cudnn_cnn_infer64_8.dll?rlkey=sik7xd0ozg06nr4eayzdym4la&st=031bb8pa&dl=1"
+  Invoke-WebRequest $CUDNN_2_URL -OutFile "cudnn_cnn_infer64_8.dll"
 
   popd > $null
 }
diff --git a/Scripts/transcribe_v2.py b/Scripts/transcribe_v2.py
index 2f37945..1bdc487 100644
--- a/Scripts/transcribe_v2.py
+++ b/Scripts/transcribe_v2.py
@@ -423,11 +423,12 @@ class Whisper:
         model_device = "cuda"
         if cfg["use_cpu"]:
             model_device = "cpu"
+            if cfg["use_flash_attention"]:
+                print(f"Flash attention disabled on CPU", file=sys.stderr)
+                cfg["use_flash_attention"] = False
 
         already_downloaded = os.path.exists(model_root)
 
-        print(f"Use flash attention {cfg['use_flash_attention']}")
-
         self.model = WhisperModel(model_str,
                 device = model_device,
                 device_index = cfg["gpu_idx"],
-- 
cgit v1.2.3