summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2022-12-18 19:11:47 -0800
committeryum <yum.food.vr@gmail.com>2022-12-18 19:11:47 -0800
commitb7b2b112a106138d99dda3f259620b350c896f1a (patch)
treeec30bc9daeb0fc4b75c488f675cf70d55f0a1f85
parent79f1b48042cbb724892301afdee842fb33ab2b37 (diff)
Add ability to select model
* icon now works when pinned to taskbar * add model selection * add script to dump mic devices * whisper models now download into the virtual environment
-rw-r--r--GUI/GUI/GUI/.gitignore3
-rw-r--r--GUI/GUI/GUI/App.cpp1
-rw-r--r--GUI/GUI/GUI/Frame.cpp221
-rw-r--r--GUI/GUI/GUI/Frame.h16
-rw-r--r--GUI/GUI/GUI/GUI.rcbin0 -> 2798 bytes
-rw-r--r--GUI/GUI/GUI/GUI.vcxproj11
-rw-r--r--GUI/GUI/GUI/GUI.vcxproj.filters25
-rw-r--r--GUI/GUI/GUI/PythonWrapper.cpp13
-rw-r--r--GUI/GUI/GUI/PythonWrapper.h9
-rw-r--r--GUI/GUI/GUI/Resources/logo.icobin0 -> 4314 bytes
-rw-r--r--GUI/package.ps14
-rw-r--r--Images/logo_16x16.pngbin0 -> 1135 bytes
-rw-r--r--Images/logo_32x32.pngbin0 -> 2245 bytes
-rw-r--r--Scripts/dump_mic_devices.py7
-rw-r--r--Scripts/emotes.py1
-rw-r--r--Scripts/transcribe.py31
16 files changed, 243 insertions, 99 deletions
diff --git a/GUI/GUI/GUI/.gitignore b/GUI/GUI/GUI/.gitignore
index 86c78ae..92618b8 100644
--- a/GUI/GUI/GUI/.gitignore
+++ b/GUI/GUI/GUI/.gitignore
@@ -1,3 +1,6 @@
# Don't check in build artifacts
x64
x86
+# No .rc generated files
+GUI.APS
+resource.h
diff --git a/GUI/GUI/GUI/App.cpp b/GUI/GUI/GUI/App.cpp
index 8456447..94a01a4 100644
--- a/GUI/GUI/GUI/App.cpp
+++ b/GUI/GUI/GUI/App.cpp
@@ -4,6 +4,7 @@
bool MyApp::OnInit()
{
Frame* frame = new Frame();
+
frame->Show(true);
return true;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 55112db..4f23beb 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -8,13 +8,19 @@
namespace {
enum FrameIds {
ID_PY_PANEL,
- ID_PY_VERSION_BUTTON,
+ ID_PY_CONFIG_PANEL,
+ ID_PY_CONFIG_DROPDOWN_PANEL,
ID_PY_SETUP_BUTTON,
+ ID_PY_DUMP_MICS_BUTTON,
ID_PY_APP_START_BUTTON,
ID_PY_APP_STOP_BUTTON,
ID_PY_OUT,
ID_PY_APP_MIC,
+ ID_PY_APP_MIC_PANEL,
ID_PY_APP_LANG,
+ ID_PY_APP_LANG_PANEL,
+ ID_PY_APP_MODEL,
+ ID_PY_APP_MODEL_PANEL,
};
const wxString kMicChoices[] = {
@@ -33,6 +39,7 @@ namespace {
"9",
};
const size_t kNumMicChoices = sizeof(kMicChoices) / sizeof(kMicChoices[0]);
+ constexpr int kMicDefault = 0; // index
// lifted from whisper/tokenizer.py
const wxString kLangChoices[] = {
@@ -137,79 +144,134 @@ namespace {
"sundanese"
};
const size_t kNumLangChoices = sizeof(kLangChoices) / sizeof(kLangChoices[0]);
+ constexpr int kLangDefault = 0; // english
+
+ // lifted from whisper/__init__.py
+ const wxString kModelChoices[] = {
+ "tiny.en",
+ "tiny",
+ "base.en",
+ "base",
+ "small.en",
+ "small",
+ "medium.en",
+ "medium",
+ };
+ const size_t kNumModelChoices = sizeof(kModelChoices) / sizeof(kModelChoices[0]);
+ constexpr int kModelDefault = 2; // base.en
} // namespace
Frame::Frame()
: wxFrame(nullptr, wxID_ANY, "TaSTT"),
- py_panel_(this, ID_PY_PANEL),
- py_panel_sizer_(wxVERTICAL),
- py_version_button_(&py_panel_, ID_PY_VERSION_BUTTON, "Check embedded Python version"),
- py_setup_button_(&py_panel_, ID_PY_SETUP_BUTTON, "Set up Python virtual environment"),
- py_app_start_button_(&py_panel_, ID_PY_APP_START_BUTTON, "Begin transcribing"),
- py_app_stop_button_(&py_panel_, ID_PY_APP_STOP_BUTTON, "Stop transcribing"),
- py_out_(&py_panel_, ID_PY_OUT, wxEmptyString, wxDefaultPosition,
- wxSize(/*x_px=*/480, /*y_px=*/160), wxTE_MULTILINE),
- py_app_(nullptr),
- py_app_mic_(&py_panel_, ID_PY_APP_MIC, wxDefaultPosition, wxDefaultSize, kNumMicChoices, kMicChoices),
- py_app_lang_(&py_panel_, ID_PY_APP_LANG, wxDefaultPosition, wxDefaultSize, kNumLangChoices, kLangChoices)
+ py_app_(nullptr)
{
+ auto* py_panel = new wxPanel(this, ID_PY_PANEL);
+ {
+ const auto py_out_sz = wxSize(/*x_px=*/320, /*y_px=*/160);
+ auto* py_out = new wxTextCtrl(py_panel, ID_PY_OUT,
+ wxEmptyString,
+ wxDefaultPosition,
+ py_out_sz, wxTE_MULTILINE | wxTE_READONLY);
+ py_out->SetMinSize(py_out_sz);
+ py_out_ = py_out;
+
+ py_out_->AppendText(PythonWrapper::GetVersion() + "\n");
+
+ auto* py_config_panel = new wxPanel(py_panel, ID_PY_CONFIG_PANEL);
+ {
+ auto* py_setup_button = new wxButton(py_config_panel, ID_PY_SETUP_BUTTON, "Set up Python virtual environment");
+ auto* py_dump_mics_button = new wxButton(py_config_panel, ID_PY_DUMP_MICS_BUTTON, "List input devices");
+
+ auto* py_config_dropdown_panel = new wxPanel(py_config_panel, ID_PY_CONFIG_DROPDOWN_PANEL);
+ {
+ auto* py_app_mic = new wxChoice(py_config_dropdown_panel, ID_PY_APP_MIC, wxDefaultPosition,
+ wxDefaultSize, kNumMicChoices, kMicChoices);
+ py_app_mic->SetSelection(kMicDefault);
+ py_app_mic_ = py_app_mic;
+
+ auto* py_app_lang = new wxChoice(py_config_dropdown_panel, ID_PY_APP_LANG, wxDefaultPosition,
+ wxDefaultSize, kNumLangChoices, kLangChoices);
+ py_app_lang->SetSelection(kLangDefault);
+ py_app_lang_ = py_app_lang;
+
+ auto* py_app_model = new wxChoice(py_config_dropdown_panel, ID_PY_APP_MODEL, wxDefaultPosition,
+ wxDefaultSize, kNumModelChoices, kModelChoices);
+ py_app_model->SetSelection(kModelDefault);
+ py_app_model_ = py_app_model;
+
+ auto* sizer = new wxGridSizer(/*cols=*/2);
+ py_config_dropdown_panel->SetSizer(sizer);
+
+ sizer->Add(new wxStaticText(py_config_dropdown_panel, wxID_ANY, /*label=*/"Microphone:"));
+ sizer->Add(py_app_mic);
+
+ sizer->Add(new wxStaticText(py_config_dropdown_panel, wxID_ANY, /*label=*/"Language:"));
+ sizer->Add(py_app_lang);
+
+ sizer->Add(new wxStaticText(py_config_dropdown_panel, wxID_ANY, /*label=*/"Model:"));
+ sizer->Add(py_app_model);
+ }
+
+ auto* py_app_start_button = new wxButton(py_config_panel, ID_PY_APP_START_BUTTON, "Begin transcribing");
+ auto* py_app_stop_button = new wxButton(py_config_panel, ID_PY_APP_STOP_BUTTON, "Stop transcribing");
+
+ auto* sizer = new wxBoxSizer(wxVERTICAL);
+ py_config_panel->SetSizer(sizer);
+ sizer->Add(py_setup_button);
+ sizer->Add(py_dump_mics_button);
+ sizer->Add(py_config_dropdown_panel);
+ sizer->Add(py_app_start_button);
+ sizer->Add(py_app_stop_button);
+ }
+
+ auto* sizer = new wxBoxSizer(wxHORIZONTAL);
+ py_panel->SetSizer(sizer);
+ sizer->Add(py_config_panel);
+ sizer->Add(py_out);
+ }
+
Bind(wxEVT_MENU, &Frame::OnExit, this, wxID_EXIT);
- Bind(wxEVT_BUTTON, &Frame::OnGetPythonVersion, this, ID_PY_VERSION_BUTTON);
Bind(wxEVT_BUTTON, &Frame::OnAppStart, this, ID_PY_APP_START_BUTTON);
Bind(wxEVT_BUTTON, &Frame::OnAppStop, this, ID_PY_APP_STOP_BUTTON);
Bind(wxEVT_BUTTON, &Frame::OnSetupPython, this, ID_PY_SETUP_BUTTON);
+ Bind(wxEVT_BUTTON, &Frame::OnDumpMics, this, ID_PY_DUMP_MICS_BUTTON);
// wx needs this to be able to load PNGs.
wxImage::AddHandler(&png_handler_);
- const std::string icon_path = "Resources/logo.png";
- LoadAndSetIcon(icon_path);
-
- wxSize py_out_size(/*x=*/80, /*y=*/20);
- py_out_.SetSize(py_out_size);
- py_app_mic_.SetSelection(0);
- py_app_lang_.SetSelection(0);
-
- py_panel_.SetSizer(&py_panel_sizer_);
- py_panel_sizer_.Add(&py_version_button_);
- py_panel_sizer_.Add(&py_setup_button_);
- py_panel_sizer_.Add(&py_app_mic_);
- py_panel_sizer_.Add(&py_app_lang_);
- py_panel_sizer_.Add(&py_app_start_button_);
- py_panel_sizer_.Add(&py_app_stop_button_);
- py_panel_sizer_.Add(&py_out_);
+ LoadAndSetIcons();
+
+ {
+ auto frame_sz = GetBestSize();
+ auto panel_sz = py_panel->GetBestSize();
+
+ auto ideal_sz = panel_sz;
+ ideal_sz.y += frame_sz.y;
+
+ this->SetSize(ideal_sz);
+ }
}
void Frame::OnExit(wxCommandEvent& event)
{
+ OnAppStop(event);
Close(true);
}
-void Frame::OnGetPythonVersion(wxCommandEvent& event)
-{
- PythonWrapper py;
- std::string py_version = py.GetVersion();
- py_out_.AppendText(py_version + "\n");
-}
-
void Frame::OnSetupPython(wxCommandEvent& event)
{
- PythonWrapper py;
-
- py_out_.AppendText("Setting up Python virtual environment\n");
- py_out_.AppendText("This could take several minutes, please be patient!\n");
- py_out_.AppendText("This will download ~5GB of dependencies.\n");
- py_out_.AppendText("Dependencies are installed in the GUI's folder, "
- "so deleting the folder is all that's needed to uninstall.\n");
+ py_out_->AppendText("Setting up Python virtual environment\n");
+ py_out_->AppendText("This could take several minutes, please be patient!\n");
+ py_out_->AppendText("This will download ~5GB of dependencies.\n");
{
std::string py_out;
std::ostringstream py_out_oss;
py_out_oss << " Installing pip" << std::endl;
- py_out_.AppendText(py_out_oss.str());
- if (!py.InstallPip(&py_out)) {
+ py_out_->AppendText(py_out_oss.str());
+ if (!PythonWrapper::InstallPip(&py_out)) {
std::ostringstream py_out_oss;
py_out_oss << "Failed to install pip: " << py_out;
- py_out_.AppendText(py_out_oss.str());
+ py_out_->AppendText(py_out_oss.str());
}
}
@@ -228,56 +290,65 @@ void Frame::OnSetupPython(wxCommandEvent& event)
{
std::ostringstream py_out_oss;
py_out_oss << " Installing " << pip_dep << std::endl;
- py_out_.AppendText(py_out_oss.str());
+ py_out_->AppendText(py_out_oss.str());
}
std::string py_out;
- bool res = py.InvokeWithArgs({ "-m", "pip", "install", pip_dep }, &py_out);
+ bool res = PythonWrapper::InvokeWithArgs({ "-m", "pip", "install", pip_dep }, &py_out);
if (!res) {
std::ostringstream py_out_oss;
py_out_oss << "Failed to install " << pip_dep << ": " << py_out << std::endl;
- py_out_.AppendText(py_out_oss.str());
+ py_out_->AppendText(py_out_oss.str());
return;
}
}
- py_out_.AppendText("Python virtual environment successfully set up!\n");
+ py_out_->AppendText("Python virtual environment successfully set up!\n");
+}
+
+void Frame::OnDumpMics(wxCommandEvent& event)
+{
+ py_out_->AppendText(PythonWrapper::DumpMics());
}
void Frame::OnAppStart(wxCommandEvent& event) {
if (py_app_) {
if (wxProcess::Exists(py_app_->GetPid())) {
- py_out_.AppendText("Transcription engine already running\n");
+ py_out_->AppendText("Transcription engine already running\n");
return;
}
delete py_app_;
py_app_ = nullptr;
}
- py_out_.AppendText("Launching transcription engine\n");
+ py_out_->AppendText("Launching transcription engine\n");
- PythonWrapper py;
auto cb = [&](wxProcess* proc, int ret) -> void {
std::ostringstream py_out_oss;
py_out_oss << "Transcription engine exited with code " << ret << std::endl;
- py_out_.AppendText(py_out_oss.str());
+ py_out_->AppendText(py_out_oss.str());
return;
};
- int which_mic = py_app_mic_.GetSelection();
+ int which_mic = py_app_mic_->GetSelection();
if (which_mic == wxNOT_FOUND) {
- which_mic = 0;
+ which_mic = kMicDefault;
}
- int which_lang = py_app_lang_.GetSelection();
+ int which_lang = py_app_lang_->GetSelection();
if (which_lang == wxNOT_FOUND) {
- which_lang = 0;
+ which_lang = kLangDefault;
+ }
+ int which_model = py_app_model_->GetSelection();
+ if (which_model == wxNOT_FOUND) {
+ which_model = kModelDefault;
}
- wxProcess* p = py.StartApp(std::move(cb),
+ wxProcess* p = PythonWrapper::StartApp(std::move(cb),
kMicChoices[which_mic].ToStdString(),
- kLangChoices[which_lang].ToStdString());
+ kLangChoices[which_lang].ToStdString(),
+ kModelChoices[which_model].ToStdString());
if (!p) {
- py_out_.AppendText("Failed to launch transcription engine\n");
+ py_out_->AppendText("Failed to launch transcription engine\n");
return;
}
@@ -303,11 +374,11 @@ void Frame::OnAppStop(wxCommandEvent& event) {
while (wxProcess::Exists(pid)) {
if (first) {
first = false;
- py_out_.AppendText("Timed out trying to stop transcription engine "
+ py_out_->AppendText("Timed out trying to stop transcription engine "
"cleanly, sending SIGKILL\n");
}
else if (++loop_cnt % 100 == 0) {
- py_out_.AppendText("Waiting for transcription engine to exit");
+ py_out_->AppendText("Waiting for transcription engine to exit");
}
wxProcess::Kill(pid, wxSIGKILL);
wxMilliSleep(10);
@@ -315,20 +386,26 @@ void Frame::OnAppStop(wxCommandEvent& event) {
// Since we don't process the termination event, py_app_ deletes itself!
py_app_ = nullptr;
- py_out_.AppendText("Stopped transcription engine\n");
+ py_out_->AppendText("Stopped transcription engine\n");
}
else {
- py_out_.AppendText("Transcription engine already stopped\n");
+ py_out_->AppendText("Transcription engine already stopped\n");
}
}
-void Frame::LoadAndSetIcon(const std::string& icon_path) {
- if (!std::filesystem::exists(icon_path)) {
- wxLogFatalError("Logo is missing from %s", icon_path.c_str());
+void Frame::LoadAndSetIcons() {
+ const char* icons[] = {
+ "Resources/Images/logo.png",
+ "Resources/Images/logo_16x16.png",
+ "Resources/Images/logo_32x32.png",
+ };
+ wxIconBundle icon_bundle;
+ for (const auto& icon_path : icons) {
+ if (!std::filesystem::exists(icon_path)) {
+ wxLogFatalError("Logo is missing from %s", icon_path);
+ }
+ icon_bundle.AddIcon(icon_path, wxBITMAP_TYPE_PNG);
}
- wxBitmap icon_img(icon_path, wxBITMAP_TYPE_PNG);
- wxIcon icon;
- icon.CopyFromBitmap(icon_img);
- SetIcon(icon);
+ SetIcons(icon_bundle);
}
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 8132cce..e5b3ae3 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -15,23 +15,19 @@ public:
private:
wxPNGHandler png_handler_;
- wxPanel py_panel_;
- wxBoxSizer py_panel_sizer_;
- wxButton py_version_button_;
- wxButton py_setup_button_;
- wxButton py_app_start_button_;
- wxButton py_app_stop_button_;
- wxTextCtrl py_out_;
- wxChoice py_app_mic_;
- wxChoice py_app_lang_;
+ wxTextCtrl* py_out_;
+ wxChoice* py_app_mic_;
+ wxChoice* py_app_lang_;
+ wxChoice* py_app_model_;
wxProcess* py_app_;
void OnExit(wxCommandEvent& event);
void OnGetPythonVersion(wxCommandEvent& event);
void OnSetupPython(wxCommandEvent& event);
+ void OnDumpMics(wxCommandEvent& event);
void OnAppStart(wxCommandEvent& event);
void OnAppStop(wxCommandEvent& event);
- void LoadAndSetIcon(const std::string& icon_path);
+ void LoadAndSetIcons();
};
diff --git a/GUI/GUI/GUI/GUI.rc b/GUI/GUI/GUI/GUI.rc
new file mode 100644
index 0000000..01c922a
--- /dev/null
+++ b/GUI/GUI/GUI/GUI.rc
Binary files differ
diff --git a/GUI/GUI/GUI/GUI.vcxproj b/GUI/GUI/GUI/GUI.vcxproj
index 223f47e..cd0e5f0 100644
--- a/GUI/GUI/GUI/GUI.vcxproj
+++ b/GUI/GUI/GUI/GUI.vcxproj
@@ -144,8 +144,19 @@
<ClInclude Include="App.h" />
<ClInclude Include="Frame.h" />
<ClInclude Include="PythonWrapper.h" />
+ <ClInclude Include="resource.h" />
<ClInclude Include="ScopeGuard.h" />
</ItemGroup>
+ <ItemGroup>
+ <ResourceCompile Include="GUI.rc" />
+ </ItemGroup>
+ <ItemGroup>
+ <Image Include="..\..\..\Images\logo.png" />
+ <Image Include="..\..\..\Images\logo_16x16.png" />
+ <Image Include="..\..\..\Images\logo_32x32.png" />
+ <Image Include="icon1.ico" />
+ <Image Include="Resources\logo.png" />
+ </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
diff --git a/GUI/GUI/GUI/GUI.vcxproj.filters b/GUI/GUI/GUI/GUI.vcxproj.filters
index 74e4659..5118c26 100644
--- a/GUI/GUI/GUI/GUI.vcxproj.filters
+++ b/GUI/GUI/GUI/GUI.vcxproj.filters
@@ -41,5 +41,30 @@
<ClInclude Include="ScopeGuard.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="resource.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ </ItemGroup>
+ <ItemGroup>
+ <ResourceCompile Include="GUI.rc">
+ <Filter>Resource Files</Filter>
+ </ResourceCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <Image Include="..\..\..\Images\logo.png">
+ <Filter>Resource Files</Filter>
+ </Image>
+ <Image Include="icon1.ico">
+ <Filter>Resource Files</Filter>
+ </Image>
+ <Image Include="..\..\..\Images\logo_16x16.png">
+ <Filter>Resource Files</Filter>
+ </Image>
+ <Image Include="..\..\..\Images\logo_32x32.png">
+ <Filter>Resource Files</Filter>
+ </Image>
+ <Image Include="Resources\logo.png">
+ <Filter>Resource Files</Filter>
+ </Image>
</ItemGroup>
</Project> \ No newline at end of file
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index 53fcc06..6e9e0f1 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -78,6 +78,16 @@ std::string PythonWrapper::GetVersion() {
return result;
}
+std::string PythonWrapper::DumpMics() {
+ std::string result;
+ const std::string dump_mics_path = "Resources/Scripts/dump_mic_devices.py";
+ bool ok = InvokeWithArgs({ dump_mics_path }, &result);
+ if (!ok) {
+ wxLogFatalError("Failed to dump mic devices: %s", result.c_str());
+ }
+ return result;
+}
+
bool PythonWrapper::InstallPip(std::string* out) {
std::string result;
@@ -87,11 +97,12 @@ bool PythonWrapper::InstallPip(std::string* out) {
wxProcess* PythonWrapper::StartApp(
std::function<void(wxProcess* proc, int ret)>&& exit_callback,
- const std::string& mic, const std::string& lang) {
+ const std::string& mic, const std::string& lang, const std::string& model) {
return InvokeAsyncWithArgs({
"Resources/Scripts/transcribe.py",
"--mic", mic,
"--lang", lang,
+ "--model", model,
},
std::move(exit_callback));
}
diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h
index 0fa3c94..f6a739e 100644
--- a/GUI/GUI/GUI/PythonWrapper.h
+++ b/GUI/GUI/GUI/PythonWrapper.h
@@ -14,10 +14,8 @@
/*
* This class wraps interactions with the embedded Python interpreter.
*/
-class PythonWrapper
+namespace PythonWrapper
{
-public:
-
// Invoke the interpreter asynchronously with the given arguments.
// When the process exits, `exit_callback` runs.
// The caller is responsible for deleting wxProcess.
@@ -31,11 +29,14 @@ public:
// Execute python --version.
std::string GetVersion();
+ // Executes dump_mic_devices.py.
+ std::string DumpMics();
+
// Execute get-pip.py.
bool InstallPip(std::string* out);
wxProcess* StartApp(
std::function<void(wxProcess* proc, int ret)>&& exit_callback,
- const std::string& mic, const std::string& lang);
+ const std::string& mic, const std::string& lang, const std::string& model);
};
diff --git a/GUI/GUI/GUI/Resources/logo.ico b/GUI/GUI/GUI/Resources/logo.ico
new file mode 100644
index 0000000..aca1b5a
--- /dev/null
+++ b/GUI/GUI/GUI/Resources/logo.ico
Binary files differ
diff --git a/GUI/package.ps1 b/GUI/package.ps1
index 0346e67..36049b2 100644
--- a/GUI/package.ps1
+++ b/GUI/package.ps1
@@ -6,9 +6,11 @@ if (Test-Path $install_dir) {
mkdir $install_dir > $null
mkdir $install_dir/Resources > $null
-cp ../Images/logo.png TaSTT/Resources
+cp -Recurse ../Images TaSTT/Resources/Images
cp -Recurse ../Python TaSTT/Resources/Python
cp -Recurse ../Scripts TaSTT/Resources/Scripts
cp -Recurse ../Sounds TaSTT/Resources/Sounds
cp GUI/x64/Release/GUI.exe TaSTT/TaSTT.exe
+#Compress-Archive -Path "$install_dir" -DestinationPath "$install_dir.zip" -Force
+
diff --git a/Images/logo_16x16.png b/Images/logo_16x16.png
new file mode 100644
index 0000000..abdbe8a
--- /dev/null
+++ b/Images/logo_16x16.png
Binary files differ
diff --git a/Images/logo_32x32.png b/Images/logo_32x32.png
new file mode 100644
index 0000000..d30f6fe
--- /dev/null
+++ b/Images/logo_32x32.png
Binary files differ
diff --git a/Scripts/dump_mic_devices.py b/Scripts/dump_mic_devices.py
new file mode 100644
index 0000000..2b712cf
--- /dev/null
+++ b/Scripts/dump_mic_devices.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from transcribe import dumpMicDevices
+
+if __name__ == "__main__":
+ dumpMicDevices()
+
diff --git a/Scripts/emotes.py b/Scripts/emotes.py
index b922fdf..0a4ed01 100644
--- a/Scripts/emotes.py
+++ b/Scripts/emotes.py
@@ -31,6 +31,7 @@ IMG_TEX_DATA.append(("Images/Emotes/based.png", "based"))
IMG_TEX_DATA.append(("Images/Emotes/chad.png", "chad"))
IMG_TEX_DATA.append(("Images/Emotes/aware.png", "aware"))
IMG_TEX_DATA.append(("Images/Emotes/girl.png", "girl"))
+IMG_TEX_DATA = []
IMG_TEX_KEYWORD_TO_COORD = {}
for i in range(0, len(IMG_TEX_DATA)):
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 48426e2..0f7ae37 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -77,13 +77,14 @@ class AudioState:
osc_client = osc_ctrl.getClient()
-def dumpMicDevices(audio_state):
- info = audio_state.p.get_host_api_info_by_index(0)
+def dumpMicDevices():
+ p = pyaudio.PyAudio()
+ info = p.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
for i in range(0, numdevices):
- if (audio_state.p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
- device_name = audio_state.p.get_device_info_by_host_api_device_index(0, i).get('name')
+ if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
+ device_name = p.get_device_info_by_host_api_device_index(0, i).get('name')
print("Input Device id ", i, " - ", device_name)
def onAudioFramesAvailable(
@@ -119,7 +120,7 @@ def getMicStream(which_mic):
audio_state.p = pyaudio.PyAudio()
print("Finding mic {}...".format(which_mic))
- dumpMicDevices(audio_state)
+ dumpMicDevices()
got_match = False
device_index = -1
focusrite_str = "Focusrite"
@@ -318,16 +319,20 @@ def readControllerInput(audio_state):
audio_state.drop_transcription = True
audio_state.audio_paused = False
-def transcribeLoop(mic: str, language: str):
+# model should correspond to one of the Whisper models defined in
+# whisper/__init__.py. Examples: tiny, base, small, medium.
+def transcribeLoop(mic: str, language: str, model: str):
audio_state = getMicStream(mic)
audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language]
print("Safe to start talking")
- #model = whisper.load_model("tiny")
- #model = whisper.load_model("base")
- model = whisper.load_model("small")
- #model = whisper.load_model("medium")
+ abspath = os.path.abspath(__file__)
+ dname = os.path.dirname(abspath)
+ model_root = os.path.join(dname, "Models")
+
+ print("Model {} will be saved to {}".format(model, model_root))
+ model = whisper.load_model(model, download_root=model_root)
transcribe_audio_thd = threading.Thread(target = transcribeAudio, args = [audio_state, model])
transcribe_audio_thd.daemon = True
@@ -369,6 +374,7 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index")
parser.add_argument("--language", type=str, help="Which language to use. Ex: english, japanese, chinese, french, german.")
+ parser.add_argument("--model", type=str, help="Which AI model to use. Ex: tiny, base, small, medium")
args = parser.parse_args()
if not args.mic:
@@ -377,5 +383,8 @@ if __name__ == "__main__":
if not args.language:
args.language = "english"
- transcribeLoop(args.mic, args.language)
+ if not args.model:
+ args.language = "base"
+
+ transcribeLoop(args.mic, args.language, args.model)