summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2024-03-14 18:03:54 -0700
committeryum <yum.food.vr@gmail.com>2024-03-14 18:03:54 -0700
commit5638d86c97041de31217e058e411034143e9c882 (patch)
treedc90d20f28437e97fded97aeb05b724ff75c594c
parentcdc079fb59832fce46708df36ac80ede6d2bd046 (diff)
Fix distilled models
These were broken due to some logic errors in the codepath which acquires models from huggingface. Distilled large-v2 seems promising as a new default model.
-rw-r--r--GUI/GUI/GUI/Frame.cpp4
-rw-r--r--Scripts/text_to_text_demo.py2
-rw-r--r--Scripts/transcribe_v2.py7
3 files changed, 6 insertions, 7 deletions
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 66c3d4e..908a54c 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -461,12 +461,12 @@ namespace {
"base",
"small.en",
"small",
- "yumfood/whisper_distil_medium_en_ct2",
+ "Systran/faster-distil-whisper-medium.en",
"medium.en",
"medium",
+ "Systran/faster-distil-whisper-large-v2",
"large-v1",
"large-v2",
- "yumfood/whisper_distil_large_v2_ct2",
};
const size_t kNumModelChoices = sizeof(kModelChoices) / sizeof(kModelChoices[0]);
constexpr int kModelDefault = 2; // base.en
diff --git a/Scripts/text_to_text_demo.py b/Scripts/text_to_text_demo.py
index d5f0ada..4810361 100644
--- a/Scripts/text_to_text_demo.py
+++ b/Scripts/text_to_text_demo.py
@@ -7,8 +7,6 @@ from pythonosc import udp_client
import generate_utils
import osc_ctrl
-import paging
-import pythonosc
import time
class AppConfig:
diff --git a/Scripts/transcribe_v2.py b/Scripts/transcribe_v2.py
index 7655d4b..32652df 100644
--- a/Scripts/transcribe_v2.py
+++ b/Scripts/transcribe_v2.py
@@ -422,7 +422,7 @@ class Whisper:
if cfg["use_cpu"]:
model_device = "cpu"
- download_it = os.path.exists(model_root)
+ already_downloaded = os.path.exists(model_root)
if '/' in model_str:
hf_hub_download(repo_id=model_str, filename='model.bin',
local_dir=model_root)
@@ -430,14 +430,15 @@ class Whisper:
local_dir=model_root)
hf_hub_download(repo_id=model_str, filename='config.json',
local_dir=model_root)
- if download_it:
+ already_downloaded = True
+ if already_downloaded:
model_str = model_root
self.model = WhisperModel(model_str,
device = model_device,
device_index = cfg["gpu_idx"],
compute_type = cfg["compute_type"],
download_root = model_root,
- local_files_only = download_it)
+ local_files_only = already_downloaded)
def transcribe(self, frames: bytes = None) -> typing.List[Segment]:
if frames is None: