Add ability to select model

* icon now works when pinned to taskbar * add model selection * add script to dump mic devices * whisper models now download into the virtual environment
author: yum <yum.food.vr@gmail.com> 2022-12-18 19:11:47 -0800
committer: yum <yum.food.vr@gmail.com> 2022-12-18 19:11:47 -0800
commit: b7b2b112a106138d99dda3f259620b350c896f1a (patch)
tree: ec30bc9daeb0fc4b75c488f675cf70d55f0a1f85
parent: 79f1b48042cbb724892301afdee842fb33ab2b37 (diff)
16 files changed, 243 insertions, 99 deletions
diff --git a/GUI/GUI/GUI/.gitignore b/GUI/GUI/GUI/.gitignore
index 86c78ae..92618b8 100644
--- a/GUI/GUI/GUI/.gitignore
+++ b/GUI/GUI/GUI/.gitignore
@@ -1,3 +1,6 @@
 # Don't check in build artifacts
 x64
 x86
+# No .rc generated files
+GUI.APS
+resource.h
diff --git a/GUI/GUI/GUI/App.cpp b/GUI/GUI/GUI/App.cpp
index 8456447..94a01a4 100644
--- a/GUI/GUI/GUI/App.cpp
+++ b/GUI/GUI/GUI/App.cpp
@@ -4,6 +4,7 @@
 bool MyApp::OnInit()
 {
     Frame* frame = new Frame();
+
     frame->Show(true);
 
     return true;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 55112db..4f23beb 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -8,13 +8,19 @@
 namespace {
     enum FrameIds {
         ID_PY_PANEL,
-        ID_PY_VERSION_BUTTON,
+        ID_PY_CONFIG_PANEL,
+        ID_PY_CONFIG_DROPDOWN_PANEL,
         ID_PY_SETUP_BUTTON,
+        ID_PY_DUMP_MICS_BUTTON,
         ID_PY_APP_START_BUTTON,
         ID_PY_APP_STOP_BUTTON,
         ID_PY_OUT,
         ID_PY_APP_MIC,
+        ID_PY_APP_MIC_PANEL,
         ID_PY_APP_LANG,
+        ID_PY_APP_LANG_PANEL,
+        ID_PY_APP_MODEL,
+        ID_PY_APP_MODEL_PANEL,
     };
 
     const wxString kMicChoices[] = {
@@ -33,6 +39,7 @@ namespace {
         "9",
     };
     const size_t kNumMicChoices = sizeof(kMicChoices) / sizeof(kMicChoices[0]);
+    constexpr int kMicDefault = 0;  // index
 
     // lifted from whisper/tokenizer.py
 	const wxString kLangChoices[] = {
@@ -137,79 +144,134 @@ namespace {
 		"sundanese"
 	};
     const size_t kNumLangChoices = sizeof(kLangChoices) / sizeof(kLangChoices[0]);
+    constexpr int kLangDefault = 0;  // english
+
+    // lifted from whisper/__init__.py
+    const wxString kModelChoices[] = {
+        "tiny.en",
+        "tiny",
+        "base.en",
+        "base",
+        "small.en",
+        "small",
+        "medium.en",
+        "medium",
+    };
+    const size_t kNumModelChoices = sizeof(kModelChoices) / sizeof(kModelChoices[0]);
+    constexpr int kModelDefault = 2;  // base.en
 }  // namespace
 
 Frame::Frame()
     : wxFrame(nullptr, wxID_ANY, "TaSTT"),
-    py_panel_(this, ID_PY_PANEL),
-    py_panel_sizer_(wxVERTICAL),
-    py_version_button_(&py_panel_, ID_PY_VERSION_BUTTON, "Check embedded Python version"),
-    py_setup_button_(&py_panel_, ID_PY_SETUP_BUTTON, "Set up Python virtual environment"),
-    py_app_start_button_(&py_panel_, ID_PY_APP_START_BUTTON, "Begin transcribing"),
-    py_app_stop_button_(&py_panel_, ID_PY_APP_STOP_BUTTON, "Stop transcribing"),
-    py_out_(&py_panel_, ID_PY_OUT, wxEmptyString, wxDefaultPosition,
-        wxSize(/*x_px=*/480, /*y_px=*/160), wxTE_MULTILINE),
-    py_app_(nullptr),
-    py_app_mic_(&py_panel_, ID_PY_APP_MIC, wxDefaultPosition, wxDefaultSize, kNumMicChoices, kMicChoices),
-    py_app_lang_(&py_panel_, ID_PY_APP_LANG, wxDefaultPosition, wxDefaultSize, kNumLangChoices, kLangChoices)
+    py_app_(nullptr)
 {
+    auto* py_panel = new wxPanel(this, ID_PY_PANEL);
+    {
+        const auto py_out_sz = wxSize(/*x_px=*/320, /*y_px=*/160);
+		auto* py_out = new wxTextCtrl(py_panel, ID_PY_OUT,
+            wxEmptyString,
+            wxDefaultPosition,
+			py_out_sz, wxTE_MULTILINE | wxTE_READONLY);
+        py_out->SetMinSize(py_out_sz);
+        py_out_ = py_out;
+
+        py_out_->AppendText(PythonWrapper::GetVersion() + "\n");
+
+		auto* py_config_panel = new wxPanel(py_panel, ID_PY_CONFIG_PANEL);
+        {
+            auto* py_setup_button = new wxButton(py_config_panel, ID_PY_SETUP_BUTTON, "Set up Python virtual environment");
+            auto* py_dump_mics_button = new wxButton(py_config_panel, ID_PY_DUMP_MICS_BUTTON, "List input devices");
+
+            auto* py_config_dropdown_panel = new wxPanel(py_config_panel, ID_PY_CONFIG_DROPDOWN_PANEL);
+            {
+                auto* py_app_mic = new wxChoice(py_config_dropdown_panel, ID_PY_APP_MIC, wxDefaultPosition,
+                    wxDefaultSize, kNumMicChoices, kMicChoices);
+				py_app_mic->SetSelection(kMicDefault);
+				py_app_mic_ = py_app_mic;
+
+                auto* py_app_lang = new wxChoice(py_config_dropdown_panel, ID_PY_APP_LANG, wxDefaultPosition,
+                    wxDefaultSize, kNumLangChoices, kLangChoices);
+                py_app_lang->SetSelection(kLangDefault);
+				py_app_lang_ = py_app_lang;
+
+                auto* py_app_model = new wxChoice(py_config_dropdown_panel, ID_PY_APP_MODEL, wxDefaultPosition,
+                    wxDefaultSize, kNumModelChoices, kModelChoices);
+                py_app_model->SetSelection(kModelDefault);
+                py_app_model_ = py_app_model;
+
+                auto* sizer = new wxGridSizer(/*cols=*/2);
+                py_config_dropdown_panel->SetSizer(sizer);
+
+				sizer->Add(new wxStaticText(py_config_dropdown_panel, wxID_ANY, /*label=*/"Microphone:"));
+                sizer->Add(py_app_mic);
+
+				sizer->Add(new wxStaticText(py_config_dropdown_panel, wxID_ANY, /*label=*/"Language:"));
+                sizer->Add(py_app_lang);
+
+				sizer->Add(new wxStaticText(py_config_dropdown_panel, wxID_ANY, /*label=*/"Model:"));
+                sizer->Add(py_app_model);
+            }
+
+            auto* py_app_start_button = new wxButton(py_config_panel, ID_PY_APP_START_BUTTON, "Begin transcribing");
+            auto* py_app_stop_button = new wxButton(py_config_panel, ID_PY_APP_STOP_BUTTON, "Stop transcribing");
+
+            auto* sizer = new wxBoxSizer(wxVERTICAL);
+			py_config_panel->SetSizer(sizer);
+			sizer->Add(py_setup_button);
+			sizer->Add(py_dump_mics_button);
+			sizer->Add(py_config_dropdown_panel);
+			sizer->Add(py_app_start_button);
+			sizer->Add(py_app_stop_button);
+        }
+
+		auto* sizer = new wxBoxSizer(wxHORIZONTAL);
+        py_panel->SetSizer(sizer);
+        sizer->Add(py_config_panel);
+        sizer->Add(py_out);
+    }
+
 	Bind(wxEVT_MENU, &Frame::OnExit, this, wxID_EXIT);
-	Bind(wxEVT_BUTTON, &Frame::OnGetPythonVersion, this, ID_PY_VERSION_BUTTON);
 	Bind(wxEVT_BUTTON, &Frame::OnAppStart, this, ID_PY_APP_START_BUTTON);
 	Bind(wxEVT_BUTTON, &Frame::OnAppStop, this, ID_PY_APP_STOP_BUTTON);
 	Bind(wxEVT_BUTTON, &Frame::OnSetupPython, this, ID_PY_SETUP_BUTTON);
+	Bind(wxEVT_BUTTON, &Frame::OnDumpMics, this, ID_PY_DUMP_MICS_BUTTON);
 
 	// wx needs this to be able to load PNGs.
 	wxImage::AddHandler(&png_handler_);
-	const std::string icon_path = "Resources/logo.png";
-	LoadAndSetIcon(icon_path);
-
-    wxSize py_out_size(/*x=*/80, /*y=*/20);
-    py_out_.SetSize(py_out_size);
-    py_app_mic_.SetSelection(0);
-    py_app_lang_.SetSelection(0);
-
-	py_panel_.SetSizer(&py_panel_sizer_);
-    py_panel_sizer_.Add(&py_version_button_);
-    py_panel_sizer_.Add(&py_setup_button_);
-    py_panel_sizer_.Add(&py_app_mic_);
-    py_panel_sizer_.Add(&py_app_lang_);
-    py_panel_sizer_.Add(&py_app_start_button_);
-    py_panel_sizer_.Add(&py_app_stop_button_);
-    py_panel_sizer_.Add(&py_out_);
+	LoadAndSetIcons();
+
+	{
+        auto frame_sz = GetBestSize();
+		auto panel_sz = py_panel->GetBestSize();
+
+        auto ideal_sz = panel_sz;
+        ideal_sz.y += frame_sz.y;
+
+        this->SetSize(ideal_sz);
+	}
 }
 
 void Frame::OnExit(wxCommandEvent& event)
 {
+    OnAppStop(event);
     Close(true);
 }
 
-void Frame::OnGetPythonVersion(wxCommandEvent& event)
-{
-    PythonWrapper py;
-    std::string py_version = py.GetVersion();
-    py_out_.AppendText(py_version + "\n");
-}
-
 void Frame::OnSetupPython(wxCommandEvent& event)
 {
-    PythonWrapper py;
-
-    py_out_.AppendText("Setting up Python virtual environment\n");
-    py_out_.AppendText("This could take several minutes, please be patient!\n");
-    py_out_.AppendText("This will download ~5GB of dependencies.\n");
-    py_out_.AppendText("Dependencies are installed in the GUI's folder, "
-        "so deleting the folder is all that's needed to uninstall.\n");
+    py_out_->AppendText("Setting up Python virtual environment\n");
+    py_out_->AppendText("This could take several minutes, please be patient!\n");
+    py_out_->AppendText("This will download ~5GB of dependencies.\n");
 
     {
         std::string py_out;
         std::ostringstream py_out_oss;
         py_out_oss << "  Installing pip" << std::endl;
-        py_out_.AppendText(py_out_oss.str());
-        if (!py.InstallPip(&py_out)) {
+        py_out_->AppendText(py_out_oss.str());
+        if (!PythonWrapper::InstallPip(&py_out)) {
             std::ostringstream py_out_oss;
             py_out_oss << "Failed to install pip: " << py_out;
-            py_out_.AppendText(py_out_oss.str());
+            py_out_->AppendText(py_out_oss.str());
         }
     }
 
@@ -228,56 +290,65 @@ void Frame::OnSetupPython(wxCommandEvent& event)
         {
             std::ostringstream py_out_oss;
             py_out_oss << "  Installing " << pip_dep << std::endl;
-            py_out_.AppendText(py_out_oss.str());
+            py_out_->AppendText(py_out_oss.str());
         }
         std::string py_out;
-        bool res = py.InvokeWithArgs({ "-m", "pip", "install", pip_dep }, &py_out);
+        bool res = PythonWrapper::InvokeWithArgs({ "-m", "pip", "install", pip_dep }, &py_out);
         if (!res) {
             std::ostringstream py_out_oss;
             py_out_oss << "Failed to install " << pip_dep << ": " << py_out << std::endl;
-            py_out_.AppendText(py_out_oss.str());
+            py_out_->AppendText(py_out_oss.str());
             return;
         }
     }
 
-    py_out_.AppendText("Python virtual environment successfully set up!\n");
+    py_out_->AppendText("Python virtual environment successfully set up!\n");
+}
+
+void Frame::OnDumpMics(wxCommandEvent& event)
+{
+    py_out_->AppendText(PythonWrapper::DumpMics());
 }
 
 void Frame::OnAppStart(wxCommandEvent& event) {
     if (py_app_) {
         if (wxProcess::Exists(py_app_->GetPid())) {
-            py_out_.AppendText("Transcription engine already running\n");
+            py_out_->AppendText("Transcription engine already running\n");
             return;
         }
         delete py_app_;
         py_app_ = nullptr;
     }
 
-	py_out_.AppendText("Launching transcription engine\n");
+	py_out_->AppendText("Launching transcription engine\n");
 
-    PythonWrapper py;
     auto cb = [&](wxProcess* proc, int ret) -> void {
 		std::ostringstream py_out_oss;
         py_out_oss << "Transcription engine exited with code " << ret << std::endl;
 
-		py_out_.AppendText(py_out_oss.str());
+		py_out_->AppendText(py_out_oss.str());
 		return;
     };
 
-    int which_mic = py_app_mic_.GetSelection();
+    int which_mic = py_app_mic_->GetSelection();
     if (which_mic == wxNOT_FOUND) {
-        which_mic = 0;
+        which_mic = kMicDefault;
     }
-    int which_lang = py_app_lang_.GetSelection();
+    int which_lang = py_app_lang_->GetSelection();
     if (which_lang == wxNOT_FOUND) {
-        which_lang = 0;
+        which_lang = kLangDefault;
+    }
+    int which_model = py_app_model_->GetSelection();
+    if (which_model == wxNOT_FOUND) {
+        which_model = kModelDefault;
     }
 
-    wxProcess* p = py.StartApp(std::move(cb),
+    wxProcess* p = PythonWrapper::StartApp(std::move(cb),
         kMicChoices[which_mic].ToStdString(),
-        kLangChoices[which_lang].ToStdString());
+        kLangChoices[which_lang].ToStdString(),
+        kModelChoices[which_model].ToStdString());
     if (!p) {
-        py_out_.AppendText("Failed to launch transcription engine\n");
+        py_out_->AppendText("Failed to launch transcription engine\n");
         return;
     }
 
@@ -303,11 +374,11 @@ void Frame::OnAppStop(wxCommandEvent& event) {
 		while (wxProcess::Exists(pid)) {
 			if (first) {
 				first = false;
-				py_out_.AppendText("Timed out trying to stop transcription engine "
+				py_out_->AppendText("Timed out trying to stop transcription engine "
 					"cleanly, sending SIGKILL\n");
 			}
 			else if (++loop_cnt % 100 == 0) {
-                    py_out_.AppendText("Waiting for transcription engine to exit");
+                    py_out_->AppendText("Waiting for transcription engine to exit");
 			}
 			wxProcess::Kill(pid, wxSIGKILL);
 			wxMilliSleep(10);
@@ -315,20 +386,26 @@ void Frame::OnAppStop(wxCommandEvent& event) {
 
         // Since we don't process the termination event, py_app_ deletes itself!
         py_app_ = nullptr;
-        py_out_.AppendText("Stopped transcription engine\n");
+        py_out_->AppendText("Stopped transcription engine\n");
     }
     else {
-        py_out_.AppendText("Transcription engine already stopped\n");
+        py_out_->AppendText("Transcription engine already stopped\n");
     }
 }
 
-void Frame::LoadAndSetIcon(const std::string& icon_path) {
-    if (!std::filesystem::exists(icon_path)) {
-        wxLogFatalError("Logo is missing from %s", icon_path.c_str());
+void Frame::LoadAndSetIcons() {
+    const char* icons[] = {
+        "Resources/Images/logo.png",
+        "Resources/Images/logo_16x16.png",
+        "Resources/Images/logo_32x32.png",
+    };
+    wxIconBundle icon_bundle;
+    for (const auto& icon_path : icons) {
+        if (!std::filesystem::exists(icon_path)) {
+            wxLogFatalError("Logo is missing from %s", icon_path);
+        }
+        icon_bundle.AddIcon(icon_path, wxBITMAP_TYPE_PNG);
     }
-    wxBitmap icon_img(icon_path, wxBITMAP_TYPE_PNG);
-    wxIcon icon;
-    icon.CopyFromBitmap(icon_img);
-    SetIcon(icon);
+    SetIcons(icon_bundle);
 }
 
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 8132cce..e5b3ae3 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -15,23 +15,19 @@ public:
 
 private:
     wxPNGHandler png_handler_;
-    wxPanel py_panel_;
-    wxBoxSizer py_panel_sizer_;
-    wxButton py_version_button_;
-    wxButton py_setup_button_;
-    wxButton py_app_start_button_;
-    wxButton py_app_stop_button_;
-    wxTextCtrl py_out_;
-    wxChoice py_app_mic_;
-    wxChoice py_app_lang_;
 
+    wxTextCtrl* py_out_;
+    wxChoice* py_app_mic_;
+    wxChoice* py_app_lang_;
+    wxChoice* py_app_model_;
     wxProcess* py_app_;
 
     void OnExit(wxCommandEvent& event);
     void OnGetPythonVersion(wxCommandEvent& event);
     void OnSetupPython(wxCommandEvent& event);
+    void OnDumpMics(wxCommandEvent& event);
     void OnAppStart(wxCommandEvent& event);
     void OnAppStop(wxCommandEvent& event);
 
-    void LoadAndSetIcon(const std::string& icon_path);
+    void LoadAndSetIcons();
 };
diff --git a/GUI/GUI/GUI/GUI.rc b/GUI/GUI/GUI/GUI.rc
new file mode 100644
index 0000000..01c922a
--- /dev/null
+++ b/GUI/GUI/GUI/GUI.rc
diff --git a/GUI/GUI/GUI/GUI.vcxproj b/GUI/GUI/GUI/GUI.vcxproj
index 223f47e..cd0e5f0 100644
--- a/GUI/GUI/GUI/GUI.vcxproj
+++ b/GUI/GUI/GUI/GUI.vcxproj
@@ -144,8 +144,19 @@
     <ClInclude Include="App.h" />
     <ClInclude Include="Frame.h" />
     <ClInclude Include="PythonWrapper.h" />
+    <ClInclude Include="resource.h" />
     <ClInclude Include="ScopeGuard.h" />
   </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="GUI.rc" />
+  </ItemGroup>
+  <ItemGroup>
+    <Image Include="..\..\..\Images\logo.png" />
+    <Image Include="..\..\..\Images\logo_16x16.png" />
+    <Image Include="..\..\..\Images\logo_32x32.png" />
+    <Image Include="icon1.ico" />
+    <Image Include="Resources\logo.png" />
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/GUI/GUI/GUI/GUI.vcxproj.filters b/GUI/GUI/GUI/GUI.vcxproj.filters
index 74e4659..5118c26 100644
--- a/GUI/GUI/GUI/GUI.vcxproj.filters
+++ b/GUI/GUI/GUI/GUI.vcxproj.filters
@@ -41,5 +41,30 @@
     <ClInclude Include="ScopeGuard.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="resource.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="GUI.rc">
+      <Filter>Resource Files</Filter>
+    </ResourceCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <Image Include="..\..\..\Images\logo.png">
+      <Filter>Resource Files</Filter>
+    </Image>
+    <Image Include="icon1.ico">
+      <Filter>Resource Files</Filter>
+    </Image>
+    <Image Include="..\..\..\Images\logo_16x16.png">
+      <Filter>Resource Files</Filter>
+    </Image>
+    <Image Include="..\..\..\Images\logo_32x32.png">
+      <Filter>Resource Files</Filter>
+    </Image>
+    <Image Include="Resources\logo.png">
+      <Filter>Resource Files</Filter>
+    </Image>
   </ItemGroup>
 </Project>
 \ No newline at end of file
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index 53fcc06..6e9e0f1 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -78,6 +78,16 @@ std::string PythonWrapper::GetVersion() {
 	return result;
 }
 
+std::string PythonWrapper::DumpMics() {
+	std::string result;
+	const std::string dump_mics_path = "Resources/Scripts/dump_mic_devices.py";
+	bool ok = InvokeWithArgs({ dump_mics_path }, &result);
+	if (!ok) {
+		wxLogFatalError("Failed to dump mic devices: %s", result.c_str());
+	}
+	return result;
+}
+
 bool PythonWrapper::InstallPip(std::string* out) {
 	std::string result;
 
@@ -87,11 +97,12 @@ bool PythonWrapper::InstallPip(std::string* out) {
 
 wxProcess* PythonWrapper::StartApp(
 	std::function<void(wxProcess* proc, int ret)>&& exit_callback,
-	const std::string& mic, const std::string& lang) {
+	const std::string& mic, const std::string& lang, const std::string& model) {
 	return InvokeAsyncWithArgs({
 		"Resources/Scripts/transcribe.py",
 		"--mic", mic,
 		"--lang", lang,
+		"--model", model,
 		},
 		std::move(exit_callback));
 }
diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h
index 0fa3c94..f6a739e 100644
--- a/GUI/GUI/GUI/PythonWrapper.h
+++ b/GUI/GUI/GUI/PythonWrapper.h
@@ -14,10 +14,8 @@
 /*
  * This class wraps interactions with the embedded Python interpreter.
 */
-class PythonWrapper
+namespace PythonWrapper
 {
-public:
-
 	// Invoke the interpreter asynchronously with the given arguments.
 	// When the process exits, `exit_callback` runs.
 	// The caller is responsible for deleting wxProcess.
@@ -31,11 +29,14 @@ public:
 	// Execute python --version.
 	std::string GetVersion();
 
+	// Executes dump_mic_devices.py.
+	std::string DumpMics();
+
 	// Execute get-pip.py.
 	bool InstallPip(std::string* out);
 
 	wxProcess* StartApp(
 		std::function<void(wxProcess* proc, int ret)>&& exit_callback,
-		const std::string& mic, const std::string& lang);
+		const std::string& mic, const std::string& lang, const std::string& model);
 };
 
diff --git a/GUI/GUI/GUI/Resources/logo.ico b/GUI/GUI/GUI/Resources/logo.ico
new file mode 100644
index 0000000..aca1b5a
--- /dev/null
+++ b/GUI/GUI/GUI/Resources/logo.ico
diff --git a/GUI/package.ps1 b/GUI/package.ps1
index 0346e67..36049b2 100644
--- a/GUI/package.ps1
+++ b/GUI/package.ps1
@@ -6,9 +6,11 @@ if (Test-Path $install_dir) {
 
 mkdir $install_dir > $null
 mkdir $install_dir/Resources > $null
-cp ../Images/logo.png TaSTT/Resources
+cp -Recurse ../Images TaSTT/Resources/Images
 cp -Recurse ../Python TaSTT/Resources/Python
 cp -Recurse ../Scripts TaSTT/Resources/Scripts
 cp -Recurse ../Sounds TaSTT/Resources/Sounds
 cp GUI/x64/Release/GUI.exe TaSTT/TaSTT.exe
 
+#Compress-Archive -Path "$install_dir" -DestinationPath "$install_dir.zip" -Force
+
diff --git a/Images/logo_16x16.png b/Images/logo_16x16.png
new file mode 100644
index 0000000..abdbe8a
--- /dev/null
+++ b/Images/logo_16x16.png
diff --git a/Images/logo_32x32.png b/Images/logo_32x32.png
new file mode 100644
index 0000000..d30f6fe
--- /dev/null
+++ b/Images/logo_32x32.png
diff --git a/Scripts/dump_mic_devices.py b/Scripts/dump_mic_devices.py
new file mode 100644
index 0000000..2b712cf
--- /dev/null
+++ b/Scripts/dump_mic_devices.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from transcribe import dumpMicDevices
+
+if __name__ == "__main__":
+    dumpMicDevices()
+
diff --git a/Scripts/emotes.py b/Scripts/emotes.py
index b922fdf..0a4ed01 100644
--- a/Scripts/emotes.py
+++ b/Scripts/emotes.py
@@ -31,6 +31,7 @@ IMG_TEX_DATA.append(("Images/Emotes/based.png", "based"))
 IMG_TEX_DATA.append(("Images/Emotes/chad.png", "chad"))
 IMG_TEX_DATA.append(("Images/Emotes/aware.png", "aware"))
 IMG_TEX_DATA.append(("Images/Emotes/girl.png", "girl"))
+IMG_TEX_DATA = []
 
 IMG_TEX_KEYWORD_TO_COORD = {}
 for i in range(0, len(IMG_TEX_DATA)):
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 48426e2..0f7ae37 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -77,13 +77,14 @@ class AudioState:
 
     osc_client = osc_ctrl.getClient()
 
-def dumpMicDevices(audio_state):
-    info = audio_state.p.get_host_api_info_by_index(0)
+def dumpMicDevices():
+    p = pyaudio.PyAudio()
+    info = p.get_host_api_info_by_index(0)
     numdevices = info.get('deviceCount')
 
     for i in range(0, numdevices):
-        if (audio_state.p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
-            device_name = audio_state.p.get_device_info_by_host_api_device_index(0, i).get('name')
+        if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
+            device_name = p.get_device_info_by_host_api_device_index(0, i).get('name')
             print("Input Device id ", i, " - ", device_name)
 
 def onAudioFramesAvailable(
@@ -119,7 +120,7 @@ def getMicStream(which_mic):
     audio_state.p = pyaudio.PyAudio()
 
     print("Finding mic {}...".format(which_mic))
-    dumpMicDevices(audio_state)
+    dumpMicDevices()
     got_match = False
     device_index = -1
     focusrite_str = "Focusrite"
@@ -318,16 +319,20 @@ def readControllerInput(audio_state):
                 audio_state.drop_transcription = True
                 audio_state.audio_paused = False
 
-def transcribeLoop(mic: str, language: str):
+# model should correspond to one of the Whisper models defined in
+# whisper/__init__.py. Examples: tiny, base, small, medium.
+def transcribeLoop(mic: str, language: str, model: str):
     audio_state = getMicStream(mic)
     audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language]
 
     print("Safe to start talking")
 
-    #model = whisper.load_model("tiny")
-    #model = whisper.load_model("base")
-    model = whisper.load_model("small")
-    #model = whisper.load_model("medium")
+    abspath = os.path.abspath(__file__)
+    dname = os.path.dirname(abspath)
+    model_root = os.path.join(dname, "Models")
+
+    print("Model {} will be saved to {}".format(model, model_root))
+    model = whisper.load_model(model, download_root=model_root)
 
     transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state, model])
     transcribe_audio_thd.daemon = True
@@ -369,6 +374,7 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index")
     parser.add_argument("--language", type=str, help="Which language to use. Ex: english, japanese, chinese, french, german.")
+    parser.add_argument("--model", type=str, help="Which AI model to use. Ex: tiny, base, small, medium")
     args = parser.parse_args()
 
     if not args.mic:
@@ -377,5 +383,8 @@ if __name__ == "__main__":
     if not args.language:
         args.language = "english"
 
-    transcribeLoop(args.mic, args.language)
+    if not args.model:
+        args.language = "base"
+
+    transcribeLoop(args.mic, args.language, args.model)
author	yum <yum.food.vr@gmail.com>	2022-12-18 19:11:47 -0800
committer	yum <yum.food.vr@gmail.com>	2022-12-18 19:11:47 -0800
commit	b7b2b112a106138d99dda3f259620b350c896f1a (patch)
tree	ec30bc9daeb0fc4b75c488f675cf70d55f0a1f85
parent	79f1b48042cbb724892301afdee842fb33ab2b37 (diff)