diff options
| author | yum <yum.food.vr@gmail.com> | 2023-02-21 15:20:38 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-02-22 21:49:29 -0800 |
| commit | d701f80119ecb51366a45f429ec7006926527a40 (patch) | |
| tree | dcbf6445742a5acbdc2d21f26861d3c0b0b644b3 /GUI | |
| parent | 9a97fbc3c583ccd518d838faaaa36ed9aa5558e1 (diff) | |
Various refinements
* Filter out transcriptions like " (music)"
* Whisper mic choice auto-populates with queried values
* No more manually lining up numbers!
* Persist whisper mic in config
* Remove setup and dump mics button from Whisper page
* Redesign makes these unnecessary
Diffstat (limited to 'GUI')
| -rw-r--r-- | GUI/GUI/GUI/Config.cpp | 5 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Config.h | 1 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 85 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.h | 3 | ||||
| -rw-r--r-- | GUI/GUI/GUI/WhisperCPP.cpp | 31 |
5 files changed, 74 insertions, 51 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp index 1287619..50f0dca 100644 --- a/GUI/GUI/GUI/Config.cpp +++ b/GUI/GUI/GUI/Config.cpp @@ -89,7 +89,8 @@ AppConfig::AppConfig() menu_path(),
clear_osc(false),
- whisper_model("ggml-base.en.bin")
+ whisper_model("ggml-base.en.bin"),
+ whisper_mic(0)
{}
bool AppConfig::Serialize(const std::filesystem::path& path) {
@@ -118,6 +119,7 @@ bool AppConfig::Serialize(const std::filesystem::path& path) { root["clear_osc"] << clear_osc;
root["whisper_model"] << whisper_model;
+ root["whisper_mic"] << whisper_mic;
return Config::Serialize(path, &t);
}
@@ -159,6 +161,7 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) { root.get_if("clear_osc", &c.clear_osc);
root.get_if("whisper_model", &c.whisper_model);
+ root.get_if("whisper_mic", &c.whisper_mic);
*this = std::move(c);
return true;
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h index 07a4d8c..b27dcc5 100644 --- a/GUI/GUI/GUI/Config.h +++ b/GUI/GUI/GUI/Config.h @@ -63,5 +63,6 @@ public: // WhisperCPP-specific settings.
std::string whisper_model;
+ int whisper_mic;
};
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index e0713c1..5332490 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -72,7 +72,7 @@ namespace { ID_WHISPER_OUT,
ID_WHISPER_CONFIG_PANEL,
ID_WHISPER_SETUP_BUTTON,
- ID_WHISPER_DUMP_MICS_BUTTON,
+ //ID_WHISPER_DUMP_MICS_BUTTON,
ID_WHISPER_CONFIG_PANEL_PAIRS,
ID_WHISPER_MIC,
ID_WHISPER_LANG,
@@ -108,6 +108,21 @@ namespace { const size_t kNumMicChoices = sizeof(kMicChoices) / sizeof(kMicChoices[0]);
constexpr int kMicDefault = 0; // index
+ wxString kWhisperMicChoices[] = {
+ "0",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ "6",
+ "7",
+ "8",
+ "9",
+ };
+ const size_t kNumWhisperMicChoices = sizeof(kWhisperMicChoices) / sizeof(kWhisperMicChoices[0]);
+ constexpr int kWhisperMicDefault = 0; // index
+
// lifted from whisper/tokenizer.py
const wxString kLangChoices[] = {
"english",
@@ -322,25 +337,25 @@ Frame::Frame() auto* navbar = new wxPanel(main_panel, ID_NAVBAR);
{
auto* navbar_button_transcribe = new wxButton(navbar,
- ID_NAVBAR_BUTTON_TRANSCRIBE, "Transcription");
+ ID_NAVBAR_BUTTON_TRANSCRIBE, "Transcription (PY)");
auto* navbar_button_unity = new wxButton(navbar,
ID_NAVBAR_BUTTON_UNITY, "Unity");
auto* navbar_button_debug = new wxButton(navbar,
ID_NAVBAR_BUTTON_DEBUG, "Debug");
auto* navbar_button_whisper = new wxButton(navbar,
- ID_NAVBAR_BUTTON_WHISPER, "WhisperCPP");
+ ID_NAVBAR_BUTTON_WHISPER, "Transcription (CPP)");
auto* sizer = new wxBoxSizer(wxVERTICAL);
navbar->SetSizer(sizer);
sizer->Add(navbar_button_transcribe, /*proportion=*/0,
/*flags=*/wxEXPAND);
+ sizer->Add(navbar_button_whisper, /*proportion=*/0,
+ /*flags=*/wxEXPAND);
sizer->Add(navbar_button_unity, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(navbar_button_debug, /*proportion=*/0,
/*flags=*/wxEXPAND);
- sizer->Add(navbar_button_whisper, /*proportion=*/0,
- /*flags=*/wxEXPAND);
}
auto* transcribe_panel = new wxPanel(main_panel, ID_PY_PANEL);
@@ -843,27 +858,12 @@ Frame::Frame() auto* whisper_config_panel = new wxPanel(whisper_panel,
ID_WHISPER_CONFIG_PANEL);
{
- auto* whisper_setup_button = new wxButton(whisper_config_panel,
- ID_WHISPER_SETUP_BUTTON, "Set up Python virtual environment");
- whisper_setup_button->SetToolTip(
- "TaSTT uses the Python programming language to provide both "
- "transcription services and to interface with Unity. "
- "It installs its dependencies into an isolated folder "
- "called a 'virtual environment'. Click this button to "
- "install those dependencies. This only has to be done "
- "once when you install a new version of TaSTT.");
- auto* whisper_dump_mics_button = new wxButton(whisper_config_panel,
- ID_WHISPER_DUMP_MICS_BUTTON, "List input devices");
- whisper_dump_mics_button->SetToolTip(
- "List the microphones (and input devices) attached to "
- "your computer. To use a microphone, enter the number "
- "to its left in the 'Microphone' dropdown.");
auto* whisper_config_panel_pairs = new wxPanel(whisper_config_panel,
ID_WHISPER_CONFIG_PANEL_PAIRS);
{
auto* whisper_mic = new wxChoice(whisper_config_panel_pairs,
ID_WHISPER_MIC, wxDefaultPosition,
- wxDefaultSize, kNumMicChoices, kMicChoices);
+ wxDefaultSize, kNumWhisperMicChoices, kWhisperMicChoices);
whisper_mic->SetToolTip(
"Select which microphone to listen to when "
"transcribing. To get list microphones and get their "
@@ -1037,10 +1037,6 @@ Frame::Frame() auto* sizer = new wxBoxSizer(wxVERTICAL);
whisper_config_panel->SetSizer(sizer);
- sizer->Add(whisper_setup_button, /*proportion=*/0,
- /*flags=*/wxEXPAND);
- sizer->Add(whisper_dump_mics_button, /*proportion=*/0,
- /*flags=*/wxEXPAND);
sizer->Add(whisper_config_panel_pairs, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(whisper_enable_local_beep, /*proportion=*/0,
@@ -1060,8 +1056,8 @@ Frame::Frame() sizer->Add(whisper_config_panel, /*proportion=*/0, /*flags=*/wxEXPAND);
sizer->Add(whisper_out, /*proportion=*/1, /*flags=*/wxEXPAND);
}
- whisper_panel_->Hide();
whisper_ = std::make_unique<WhisperCPP>(whisper_out_);
+ whisper_panel_->Hide();
auto* debug_panel = new wxPanel(main_panel, ID_DEBUG_PANEL);
debug_panel_ = debug_panel;
@@ -1180,7 +1176,7 @@ Frame::Frame() Bind(wxEVT_TIMER, &Frame::OnAppDrain, this, ID_PY_APP_DRAIN);
Bind(wxEVT_BUTTON, &Frame::OnSetupPython, this, ID_PY_SETUP_BUTTON);
Bind(wxEVT_BUTTON, &Frame::OnDumpMics, this, ID_PY_DUMP_MICS_BUTTON);
- Bind(wxEVT_BUTTON, &Frame::OnWhisperDumpMics, this, ID_WHISPER_DUMP_MICS_BUTTON);
+ //Bind(wxEVT_BUTTON, &Frame::OnWhisperDumpMics, this, ID_WHISPER_DUMP_MICS_BUTTON);
Bind(wxEVT_BUTTON, &Frame::OnGenerateFX, this,
ID_UNITY_BUTTON_GEN_ANIMATOR);
Bind(wxEVT_BUTTON, &Frame::OnListPip, this, ID_DEBUG_BUTTON_LIST_PIP);
@@ -1205,6 +1201,7 @@ Frame::Frame() // Initialize input fields using AppConfig.
ApplyConfigToInputFields();
+ PopulateDynamicInputFields();
Resize();
OnUnityParamChangeImpl();
@@ -1258,7 +1255,8 @@ void Frame::ApplyConfigToInputFields() // Whisper panel
auto* whisper_mic = static_cast<wxChoice*>(FindWindowById(ID_WHISPER_MIC));
- whisper_mic->SetSelection(mic_idx);
+ int whisper_mic_idx = app_c_.whisper_mic;
+ whisper_mic->SetSelection(whisper_mic_idx);
auto* whisper_lang = static_cast<wxChoice*>(FindWindowById(ID_WHISPER_LANG));
whisper_lang->SetSelection(lang_idx);
@@ -1266,7 +1264,7 @@ void Frame::ApplyConfigToInputFields() auto* whisper_model = static_cast<wxChoice*>(FindWindowById(ID_WHISPER_MODEL));
int whisper_model_idx = GetDropdownChoiceIndex(kWhisperModelChoices,
kNumWhisperModelChoices, app_c_.whisper_model, kWhisperModelDefault);
- whisper_model->SetSelection(model_idx);
+ whisper_model->SetSelection(whisper_model_idx);
auto* whisper_button = static_cast<wxChoice*>(FindWindowById(ID_WHISPER_BUTTON));
whisper_button->SetSelection(button_idx);
@@ -1301,6 +1299,20 @@ void Frame::ApplyConfigToInputFields() unity_cols->AppendText(std::to_string(app_c_.cols));
}
+void Frame::PopulateDynamicInputFields()
+{
+ whisper_->Init();
+ std::vector<std::string> mics;
+ if (whisper_->GetMics(mics)) {
+ std::vector<wxString> contents(mics.size());
+ auto* whisper_mic = static_cast<wxChoice*>(FindWindowById(ID_WHISPER_MIC));
+ for (int i = 0; i < std::min(mics.size(), kNumWhisperMicChoices); i++) {
+ contents[i] = mics[i];
+ }
+ whisper_mic->Set(contents);
+ }
+}
+
void Frame::OnExit(wxCommandEvent& event)
{
OnAppStop(event);
@@ -1422,17 +1434,6 @@ void Frame::OnDumpMics(wxCommandEvent& event) Log(transcribe_out_, "{}\n", PythonWrapper::DumpMics());
}
-void Frame::OnWhisperDumpMics(wxCommandEvent& event)
-{
- whisper_->Init();
- std::vector<std::string> mics;
- whisper_->GetMics(mics);
- Log(whisper_out_, "Microphones:\n");
- for (int i = 0; i < mics.size(); i++) {
- Log(whisper_out_, " {}: {}\n", i, mics[i]);
- }
-}
-
bool GetUserPath(const std::string& raw, std::filesystem::path& clean,
const std::string& err_prefix = "", bool must_exist = true) {
clean = raw;
@@ -1902,7 +1903,7 @@ void Frame::OnWhisperStart(wxCommandEvent& event) { }
int which_model = whisper_model_->GetSelection();
if (which_model == wxNOT_FOUND) {
- which_model = kModelDefault;
+ which_model = kWhisperModelDefault;
}
int chars_per_sync_idx = whisper_chars_per_sync_->GetSelection();
if (chars_per_sync_idx == wxNOT_FOUND) {
@@ -1965,7 +1966,7 @@ void Frame::OnWhisperStart(wxCommandEvent& event) { return;
}
- app_c_.microphone = kMicChoices[which_mic].ToStdString();
+ app_c_.whisper_mic = which_mic;
app_c_.language = kLangChoices[which_lang].ToStdString();
app_c_.whisper_model = kWhisperModelChoices[which_model].ToStdString();
app_c_.chars_per_sync = chars_per_sync;
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h index ef65d7f..5d69679 100644 --- a/GUI/GUI/GUI/Frame.h +++ b/GUI/GUI/GUI/Frame.h @@ -87,6 +87,8 @@ private: // Initialize GUI input fields using `app_c_`.
void ApplyConfigToInputFields();
+ // Populate dynamically-generated input fields, such as microphone lists.
+ void PopulateDynamicInputFields();
void OnExit(wxCommandEvent& event);
void OnNavbarTranscribe(wxCommandEvent& event);
@@ -95,7 +97,6 @@ private: void OnNavbarWhisper(wxCommandEvent& event);
void OnSetupPython(wxCommandEvent& event);
void OnDumpMics(wxCommandEvent& event);
- void OnWhisperDumpMics(wxCommandEvent& event);
void OnAppStart(wxCommandEvent& event);
void OnAppStop(wxCommandEvent& event);
void OnWhisperStart(wxCommandEvent& event);
diff --git a/GUI/GUI/GUI/WhisperCPP.cpp b/GUI/GUI/GUI/WhisperCPP.cpp index 742f0a4..809415e 100644 --- a/GUI/GUI/GUI/WhisperCPP.cpp +++ b/GUI/GUI/GUI/WhisperCPP.cpp @@ -251,13 +251,12 @@ void WhisperCPP::Start(const AppConfig& c) { return;
}
- // TODO(yum) use app config to select mic
proc_ = new AppThread([&](AppThread* thd) {
Log(out_, "Transcription thread top\n");
run_ = true;
Whisper::iAudioCapture* mic_stream;
- if (!OpenMic(1, mic_stream)) {
+ if (!OpenMic(c.whisper_mic, mic_stream)) {
return;
}
ScopeGuard mic_stream_cleanup([mic_stream]() { mic_stream->Release(); });
@@ -322,7 +321,8 @@ void WhisperCPP::Start(const AppConfig& c) { Whisper::sFullParams wparams{};
context->fullDefaultParams(eSamplingStrategy::BeamSearch, &wparams);
wparams.language = Whisper::makeLanguageKey("en"); // TODO(yum) use config
- wparams.n_max_text_ctx = 20;
+ // This must be set to keep memory usage from growing without bound.
+ wparams.n_max_text_ctx = 100;
wparams.new_segment_callback = [](iContext* context, uint32_t n_new, void* user_data) noexcept -> HRESULT {
wxTextCtrl* out = static_cast<wxTextCtrl*>(user_data);
@@ -346,12 +346,28 @@ void WhisperCPP::Start(const AppConfig& c) { const int s0 = length.countSegments - n_new;
for (int i = s0; i < length.countSegments; i++) {
const sSegment& seg = segments[i];
- Log(out, "{} ", seg.text);
+ bool is_metadata = false;
for (int j = 0; j < seg.countTokens; j++) {
const sToken& tok = tokens[seg.firstToken + j];
- if (*tok.text == 0 || tok.text[0] == '[') {
+ if (tok.text[0] == '[') {
continue;
}
+ if (tok.text[0] == ' ' && (
+ tok.text[1] == '[' ||
+ tok.text[1] == '(')) {
+ if (tok.text[strlen(tok.text) - 1] == ']') {
+ continue;
+ }
+ is_metadata = true;
+ continue;
+ }
+ if (is_metadata && (
+ tok.text[strlen(tok.text) - 1] == ']' ||
+ tok.text[strlen(tok.text) - 1] == ')')) {
+ is_metadata = false;
+ continue;
+ }
+ Log(out, "{}", tok.text);
}
}
if (n_new) {
@@ -369,11 +385,12 @@ void WhisperCPP::Start(const AppConfig& c) { Log(app->out_, "Exit transcription loop\n");
return S_FALSE;
}
+ // Sleeping here prevents the GUI from hanging.
+ // For some reason, printing is also required to prevent hanging.
static int i = 0;
- if (++i % 10 == 0) {
+ if (++i % 20 == 0) {
Log(app->out_, "Spin {}\n", i);
}
- // Sleeping here prevents the GUI from hanging.
wxThread::Sleep(10);
return S_OK;
};
|
