4 files changed, 100 insertions, 291 deletions
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index bc8e3ee..5416abe 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -2285,7 +2285,9 @@ void Frame::OnAppStart(wxCommandEvent& event) {
             }
             return true;
         });
-    py_app_ = std::move(PythonWrapper::StartApp(*app_c_, transcribe_out_,
+    Log(transcribe_out_, "DEBUG::{}:: AppConfig::kConfigPath: {}\n", __func__, AppConfig::kConfigPath);
+    const std::string config_path(AppConfig::kConfigPath);
+    py_app_ = std::move(PythonWrapper::StartApp(config_path, transcribe_out_,
         std::move(out_cb), std::move(in_cb), std::move(run_cb),
         std::move(prestart_cb)));
     Log(transcribe_out_, "py app valid: {}\n", py_app_.valid());
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index 71df5c5..a855b4c 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -459,7 +459,7 @@ bool PythonWrapper::InstallPip(
 }
 
 std::future<bool> PythonWrapper::StartApp(
-		const AppConfig& config,
+		const std::string& config_path,
 		wxTextCtrl *out,
 		const std::function<void(const std::string& out, const std::string& err)>&& out_cb,
 		const std::function<void(std::string& in)>&& in_cb,
@@ -467,46 +467,27 @@ std::future<bool> PythonWrapper::StartApp(
 		const std::function<void()>&& prestart_cb) {
 
 	return std::move(std::async(std::launch::async,
-		[&](
+		[](
+			const std::string config_path,
+			wxTextCtrl *out,
 			const std::function<void(const std::string& out, const std::string& err)>&& out_cb,
 			const std::function<void(std::string& in)>&& in_cb,
-			const std::function<bool()>&& run_cb) -> bool {
+			const std::function<bool()>&& run_cb,
+			const std::function<void()>&& prestart_cb) -> bool {
 				prestart_cb();
 
+				Log(out, "DEBUG::{}:: config_path: {}\n", __func__, config_path);
+
 				return InvokeWithArgs({
 					"-u",  // Unbuffered output
 					"Resources/Scripts/transcribe.py",
-					"--mic", config.microphone,
-					"--language", config.language,
-					"--language_target", Quote(config.language_target),
-					"--model", config.model,
-					"--model_translation", config.model_translation,
-					"--chars_per_sync", std::to_string(config.chars_per_sync),
-					"--bytes_per_char", std::to_string(config.bytes_per_char),
-					"--button", Quote(config.button),
-					"--enable_local_beep", config.enable_local_beep ? "1" : "0",
-					"--rows", std::to_string(config.rows),
-					"--cols", std::to_string(config.cols),
-					// this is the max length of the audio buffer. 5 minutes
-					// is a reasonable approximation of infinity.
-					"--window_duration_s", "300",
-					"--cpu", config.use_cpu ? "1" : "0",
-					"--use_builtin", config.use_builtin ? "1" : "0",
-					"--enable_uwu_filter", config.enable_uwu_filter ? "1" : "0",
-					"--remove_trailing_period", config.remove_trailing_period ? "1" : "0",
-					"--enable_uppercase_filter", config.enable_uppercase_filter ? "1" : "0",
-					"--enable_lowercase_filter", config.enable_lowercase_filter ? "1" : "0",
-					"--enable_profanity_filter", config.enable_profanity_filter ? "1" : "0",
-					"--enable_debug_mode", config.enable_debug_mode ? "1" : "0",
-					"--emotes_pickle", kEmotesPickle,
-					"--gpu_idx", std::to_string(config.gpu_idx),
-					"--keybind", Quote(config.keybind),
-					"--reset_on_toggle", config.reset_on_toggle ? "1" : "0",
+					"--config", config_path,
 					},
 					std::move(out_cb),
 					std::move(in_cb),
 					std::move(run_cb));
-		}, std::move(out_cb), std::move(in_cb), std::move(run_cb)));
+		}, config_path, out, std::move(out_cb), std::move(in_cb),
+			std::move(run_cb), std::move(prestart_cb)));
 }
 
 bool PythonWrapper::GenerateAnimator(
diff --git a/GUI/GUI/GUI/PythonWrapper.h b/GUI/GUI/GUI/PythonWrapper.h
index 6366247..477224d 100644
--- a/GUI/GUI/GUI/PythonWrapper.h
+++ b/GUI/GUI/GUI/PythonWrapper.h
@@ -72,7 +72,7 @@ namespace PythonWrapper
 	// parameters. We could persist those files so settings would persist across
 	// app restarts.
 	std::future<bool> StartApp(
-		const AppConfig& config,
+		const std::string& config_path,
 		wxTextCtrl *out,
 		const std::function<void(const std::string& out, const std::string& err)>&& out_cb,
 		const std::function<void(std::string& in)>&& in_cb = [](std::string&) {},
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index c718f73..2605bd3 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -9,6 +9,7 @@ from profanity_filter import ProfanityFilter
 from sentence_splitter import split_text_into_sentences
 
 import argparse
+import app_config
 import copy
 import ctranslate2
 import editdistance
@@ -39,9 +40,6 @@ class AudioState:
         # This matches the framerate expected by whisper.
         self.RATE = 16000
 
-        # If set, print additional information to stdout while transcribing.
-        self.enable_debug_mode = False
-
         # The maximum length that recordAudio() will put into frames before it
         # starts dropping from the start.
         self.MAX_LENGTH_S = 300
@@ -58,11 +56,6 @@ class AudioState:
         self.text = ""
         self.filtered_text = ""
 
-        # If set to true, then the transcript strings (`text` and friends) will
-        # be reset whenever transcription is toggled on. At time of writing,
-        # this only applies to keyboard controls.
-        self.reset_on_toggle = True
-
         # The edit distance under which two consecutive transcripts are
         # considered to match. This affects how easily `preview_text`
         # gets appended to `text`.
@@ -70,7 +63,6 @@ class AudioState:
 
         # If set, profanity in transcriptions will have their vowels replaced
         # with asterisks. Only works in English.
-        self.enable_profanity_filter = False
         self.profanity_filter: ProfanityFilter = None
 
         # List of:
@@ -180,14 +172,14 @@ def onAudioFramesAvailable(
         n_frames_to_drop = float(audio_state.drop_samples_till_i) / audio_state.CHUNK
         n_frames_to_drop *= keep_every
         n_frames_to_drop_int = int(floor(n_frames_to_drop))
-        if audio_state.enable_debug_mode:
+        if audio_state.cfg["enable_debug_mode"]:
             print(f"Dropping {n_frames_to_drop_int} frames, buffer has {len(audio_state.frames)} frames total")
         # First drop every whole chunk
         audio_state.frames = audio_state.frames[n_frames_to_drop_int:]
         # Then drop the part of the most recent chunk we no longer want
         if len(audio_state.frames) > 0:
             n_samples_to_drop = int(ceil((n_frames_to_drop % 1.0) * audio_state.CHUNK / keep_every))
-            if audio_state.enable_debug_mode:
+            if audio_state.cfg["enable_debug_mode"]:
                 print(f"Zeroing {n_samples_to_drop} samples in frame 0")
                 print(f"Frame 0 has length {len(audio_state.frames[0])}")
             bytes_per_sample = 2
@@ -209,7 +201,7 @@ def onAudioFramesAvailable(
     delta_duration_s = desired_min_duration_s - cur_duration_s
     if delta_duration_s > 0:
         delta_chunks = int(ceil(delta_duration_s / chunk_duration_s))
-        if audio_state.enable_debug_mode:
+        if audio_state.cfg["enable_debug_mode"]:
             print(f"Padding with {delta_duration_s} seconds ({delta_chunks} chunks) of silence")
             print(f"Each chunk has {len(empty_chunk)} samples")
         audio_state.frames = [empty_chunk] * delta_chunks + audio_state.frames
@@ -275,8 +267,8 @@ def resetAudioLocked(audio_state):
     audio_state.transcribe_sleep_duration = \
             audio_state.transcribe_sleep_duration_min_s
 
-    if audio_state.reset_on_toggle:
-        if audio_state.enable_debug_mode:
+    if audio_state.cfg["reset_on_toggle"]:
+        if audio_state.cfg["enable_debug_mode"]:
             print("resetAudioLocked resetting text")
         audio_state.text = ""
         audio_state.preview_text = ""
@@ -303,7 +295,7 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st
     segments, info = model.transcribe(
             audio,
             beam_size = 5,
-            language = audio_state.language,
+            language = langcodes.find(audio_state.cfg["language"]).language,
             temperature = 0.0,
             log_prob_threshold = -0.8,
             vad_filter = True,
@@ -313,7 +305,7 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st
     for s in segments:
         if s.avg_logprob < -0.8 or s.no_speech_prob > 0.6:
             continue
-        if audio_state.enable_debug_mode:
+        if audio_state.cfg["enable_debug_mode"]:
             print(f"Segment: {s}")
         ranges.append((s.start, s.end, s.text))
     audio_state.ranges_ls.append(ranges)
@@ -340,13 +332,13 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st
 
             max_edit = audio_state.commit_fuzz_threshold
 
-            if audio_state.enable_debug_mode:
+            if audio_state.cfg["enable_debug_mode"]:
                 print(f"c0: {c0}, c1: {c1}, c2: {c2}, c3: {c3}")
             if c0_c1_d < max_edit and c1_c2_d < max_edit and c2_c3_d < max_edit:
                 # For simplicity, completely reset saved audio ranges.
                 audio_state.ranges_ls = []
                 committed_text = c0[2]
-                if audio_state.enable_debug_mode:
+                if audio_state.cfg["enable_debug_mode"]:
                     print(f"Dropping frames until {c0[1]}")
                 n_samples_to_drop = int(ceil(audio_state.RATE * c0[1]))
                 audio_state.drop_samples_till_i = n_samples_to_drop
@@ -363,14 +355,7 @@ def transcribe(audio_state, model, frames, use_cpu: bool) -> typing.Tuple[str,st
 
     return (committed_text, preview_text)
 
-def transcribeAudio(audio_state,
-        model,
-        use_cpu: bool,
-        enable_uwu_filter: bool,
-        remove_trailing_period: bool,
-        enable_uppercase_filter: bool,
-        enable_lowercase_filter: bool,
-        ):
+def transcribeAudio(audio_state):
     print("Ready!")
     last_transcribe_time = time.time()
     while audio_state.run_app == True:
@@ -389,9 +374,10 @@ def transcribeAudio(audio_state,
                     1000 * 1000,
                     longer_sleep_dur)
 
-        text, preview_text = transcribe(audio_state, model, audio_state.frames, use_cpu)
+        text, preview_text = transcribe(audio_state, audio_state.cfg["model"], audio_state.frames,
+                audio_state.cfg["use_cpu"])
         if len(text) == 0 and len(preview_text) == 0:
-            if audio_state.enable_debug_mode:
+            if audio_state.cfg["enable_debug_mode"]:
                 print("no transcription, spin ({} seconds)".format(time.time() - last_transcribe_time))
             last_transcribe_time = time.time()
             # Prevent audio buffer from holding more than a few seconds of silence
@@ -406,7 +392,7 @@ def transcribeAudio(audio_state,
             audio_state.text = ""
             audio_state.preview_text = ""
             audio_state.filtered_text = ""
-            if audio_state.enable_debug_mode:
+            if audio_state.cfg["enable_debug_mode"]:
                 print("drop transcription ({} seconds)".format(time.time() - last_transcribe_time))
             last_transcribe_time = time.time()
             continue
@@ -428,7 +414,7 @@ def transcribeAudio(audio_state,
         audio_state.text = audio_state.text[-4096:]
 
         now = time.time()
-        if audio_state.enable_debug_mode:
+        if audio_state.cfg["enable_debug_mode"]:
             print("Raw transcription ({} seconds): {}".format(
                 now - last_transcribe_time,
                 audio_state.text + audio_state.preview_text))
@@ -439,7 +425,7 @@ def transcribeAudio(audio_state,
         # Translate if requested.
         translated = audio_state.text + audio_state.preview_text
         if audio_state.language_target:
-            whisper_lang = audio_state.whisper_language
+            whisper_lang = audio_state.cfg["language"]
             nllb_lang = lang_compat.whisper_to_nllb[whisper_lang]
             ss_lang = lang_compat.nllb_to_ss[nllb_lang]
             sentences = split_text_into_sentences(translated, language=ss_lang)
@@ -457,7 +443,7 @@ def transcribeAudio(audio_state,
 
         # Apply filters to transcription
         filtered_text = translated
-        if enable_uwu_filter:
+        if audio_state.cfg["enable_uwu_filter"]:
             uwu_proc = subprocess.Popen(["Resources/Uwu/Uwwwu.exe", filtered_text],
                     stdout=subprocess.PIPE,
                     stderr=subprocess.PIPE)
@@ -466,14 +452,14 @@ def transcribeAudio(audio_state,
             uwu_text = uwu_text.replace("\n", "")
             uwu_text = uwu_text.replace("\r", "")
             filtered_text = uwu_text
-        if remove_trailing_period:
+        if audio_state.cfg["remove_trailing_period"]:
             if len(filtered_text) > 0 and filtered_text[-1] == '.' and not filtered_text.endswith("..."):
                 filtered_text = filtered_text[0:len(filtered_text)-1]
-        if enable_uppercase_filter:
+        if audio_state.cfg["enable_uppercase_filter"]:
             filtered_text = filtered_text.upper()
-        if enable_lowercase_filter:
+        if audio_state.cfg["enable_lowercase_filter"]:
             filtered_text = filtered_text.lower()
-        if audio_state.enable_profanity_filter:
+        if audio_state.cfg["enable_profanity_filter"]:
             filtered_text = audio_state.profanity_filter.filter(filtered_text)
         audio_state.filtered_text = filtered_text
 
@@ -489,10 +475,10 @@ def transcribeAudio(audio_state,
             audio_state.transcribe_no_change_count = 0
             audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
 
-def sendAudio(audio_state, use_builtin: bool, estate: EmotesState):
+def sendAudio(audio_state, estate: EmotesState):
     while audio_state.run_app == True:
         text = audio_state.filtered_text
-        if use_builtin:
+        if audio_state.cfg["use_builtin"]:
             ret = osc_ctrl.pageMessageBuiltin(audio_state.osc_state, text)
             time.sleep(1.5)
         else:
@@ -502,9 +488,8 @@ def sendAudio(audio_state, use_builtin: bool, estate: EmotesState):
             # Pace this out
             time.sleep(0.01)
 
-def readKeyboardInput(audio_state, enable_local_beep: bool,
-        use_builtin: bool, keybind: str):
-    machine = keybind_event_machine.KeybindEventMachine(keybind)
+def readKeyboardInput(audio_state):
+    machine = keybind_event_machine.KeybindEventMachine(audio_state.cfg["keybind"])
     last_press_time = 0
 
     # double pressing the keybind
@@ -533,15 +518,15 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
 
         if event == EVENT_DOUBLE_PRESS:
             state = PAUSE_STATE
-            if not use_builtin:
+            if not audio_state.cfg["use_builtin"]:
                 osc_ctrl.toggleBoard(audio_state.osc_state.client, False)
 
-            if audio_state.reset_on_toggle:
-                if audio_state.enable_debug_mode:
+            if audio_state.cfg["reset_on_toggle"]:
+                if audio_state.cfg["enable_debug_mode"]:
                     print("Toggle detected, dropping transcript (1)")
                 audio_state.drop_transcription = True
             else:
-                if audio_state.enable_debug_mode:
+                if audio_state.cfg["enable_debug_mode"]:
                     print("Toggle detected, committing preview text (1)")
                 audio_state.text += audio_state.preview_text
             audio_state.audio_paused = True
@@ -552,26 +537,26 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
         # Short hold
         if state == RECORD_STATE:
             state = PAUSE_STATE
-            if not use_builtin:
+            if not audio_state.cfg["use_builtin"]:
                 osc_ctrl.lockWorld(audio_state.osc_state.client, True)
             audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
 
             audio_state.audio_paused = True
 
-            if enable_local_beep == 1:
+            if audio_state.cfg["enable_local_beep"]:
                 audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF)
         elif state == PAUSE_STATE:
             state = RECORD_STATE
-            if not use_builtin:
+            if not audio_state.cfg["use_builtin"]:
                 osc_ctrl.toggleBoard(audio_state.osc_state.client, True)
                 osc_ctrl.lockWorld(audio_state.osc_state.client, False)
                 osc_ctrl.ellipsis(audio_state.osc_state.client, True)
-            if audio_state.reset_on_toggle:
-                if audio_state.enable_debug_mode:
+            if audio_state.cfg["reset_on_toggle"]:
+                if audio_state.cfg["enable_debug_mode"]:
                     print("Toggle detected, dropping transcript (2)")
                 audio_state.drop_transcription = True
             else:
-                if audio_state.enable_debug_mode:
+                if audio_state.cfg["enable_debug_mode"]:
                     print("Toggle detected, committing preview text (2)")
                 audio_state.text += audio_state.preview_text
             audio_state.audio_paused = False
@@ -579,11 +564,11 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
             resetAudioLocked(audio_state)
             resetDisplayLocked(audio_state)
 
-            if enable_local_beep == 1:
+            if audio_state.cfg["enable_local_beep"]:
                 audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_ON)
 
-def audioFeedbackThread(audio_state, enable_local_beep: bool,
-        use_builtin: bool, button: str):
+
+def audioFeedbackThread(audio_state):
     with open(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"), "rb") as f:
         waveform0 = f.read()
     with open(os.path.abspath("Resources/Sounds/Noise_Off_Quiet.wav"), "rb") as f:
@@ -612,14 +597,13 @@ def audioFeedbackThread(audio_state, enable_local_beep: bool,
             waveform = waveform3
         winsound.PlaySound(waveform, winsound.SND_MEMORY)
 
-def readControllerInput(audio_state, enable_local_beep: bool,
-        use_builtin: bool, button: str):
+def readControllerInput(audio_state):
     RECORD_STATE = 0
     PAUSE_STATE = 1
     state = PAUSE_STATE
 
-    hand_id = button.split()[0]
-    button_id = button.split()[1]
+    hand_id = audio_state.cfg["button"].split()[0]
+    button_id = audio_state.cfg["button"].split()[1]
 
     # Rough description of state machine:
     #   Single short press: toggle transcription
@@ -649,18 +633,18 @@ def readControllerInput(audio_state, enable_local_beep: bool,
             if now - last_rising > 1.5:
                 # Long press: treat as the end of transcription.
                 state = PAUSE_STATE
-                if not use_builtin:
+                if not audio_state.cfg["use_builtin"]:
                     osc_ctrl.lockWorld(audio_state.osc_state.client, True)
                 audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
                 audio_state.audio_paused = True
 
                 if last_rising - last_medium_press_end < 1.0:
                     # Type transcription
-                    if enable_local_beep == 1:
+                    if audio_state.cfg["enable_local_beep"]:
                         audio_state.audio_events.append(audio_state.AUDIO_EVENT_UPDATE)
                     keyboard.write(audio_state.filtered_text)
                 else:
-                    if enable_local_beep == 1:
+                    if audio_state.cfg["enable_local_beep"]:
                         audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF)
 
             elif now - last_rising > 0.5:
@@ -668,10 +652,10 @@ def readControllerInput(audio_state, enable_local_beep: bool,
                 last_medium_press_end = now
                 state = PAUSE_STATE
 
-                if enable_local_beep == 1:
+                if audio_state.cfg["enable_local_beep"]:
                     audio_state.audio_events.append(audio_state.AUDIO_EVENT_DISMISS)
 
-                if not use_builtin:
+                if not audio_state.cfg["use_builtin"]:
                     osc_ctrl.toggleBoard(audio_state.osc_state.client, False)
 
                 resetAudioLocked(audio_state)
@@ -682,72 +666,56 @@ def readControllerInput(audio_state, enable_local_beep: bool,
                 # Short hold
                 if state == RECORD_STATE:
                     state = PAUSE_STATE
-                    if not use_builtin:
+                    if not audio_state.cfg["use_builtin"]:
                         osc_ctrl.lockWorld(audio_state.osc_state.client, True)
                     audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
 
                     audio_state.audio_paused = True
 
-                    if enable_local_beep == 1:
+                    if audio_state.cfg["enable_local_beep"]:
                         audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_OFF)
                 elif state == PAUSE_STATE:
                     state = RECORD_STATE
-                    if not use_builtin:
+                    if not audio_state.cfg["use_builtin"]:
                         osc_ctrl.toggleBoard(audio_state.osc_state.client, True)
                         osc_ctrl.lockWorld(audio_state.osc_state.client, False)
                         osc_ctrl.ellipsis(audio_state.osc_state.client, True)
                     if audio_state.reset_on_toggle:
-                        if audio_state.enable_debug_mode:
+                        if audio_state.cfg["enable_debug_mode"]:
                             print("Toggle detected, dropping transcript (3)")
                         audio_state.drop_transcription = True
                     else:
-                        if audio_state.enable_debug_mode:
+                        if audio_state.cfg["enable_debug_mode"]:
                             print("Toggle detected, committing preview text (3)")
                         audio_state.text += audio_state.preview_text
 
                     resetAudioLocked(audio_state)
                     resetDisplayLocked(audio_state)
 
-                    if enable_local_beep == 1:
+                    if audio_state.cfg["enable_local_beep"]:
                         audio_state.audio_events.append(audio_state.AUDIO_EVENT_TOGGLE_ON)
 
 # model should correspond to one of the Whisper models defined in
 # whisper/__init__.py. Examples: tiny, base, small, medium.
-def transcribeLoop(mic: str,
-        language: str,
-        language_target: str,
-        model: str,
-        model_translation: str,
-        enable_local_beep: bool,
-        use_cpu: bool,
-        use_builtin: bool,
-        enable_uwu_filter: bool,
-        remove_trailing_period: bool,
-        enable_uppercase_filter: bool,
-        enable_lowercase_filter: bool,
-        enable_profanity_filter: bool,
-        enable_debug_mode: bool,
-        button: str,
-        estate: EmotesState,
-        window_duration_s: int,
-        gpu_idx: int,
-        keyboard_hotkey: str,
-        reset_on_toggle: bool):
-    audio_state = getMicStream(mic)
-    audio_state.whisper_language = language
-    audio_state.language = langcodes.find(language).language
-    audio_state.MAX_LENGTH_S = window_duration_s
-    audio_state.reset_on_toggle = reset_on_toggle
-    audio_state.enable_debug_mode = enable_debug_mode
-    audio_state.enable_profanity_filter = enable_profanity_filter
+def transcribeLoop(config_path: str):
+    cfg = app_config.getConfig(config_path)
+    estate = EmotesState()
+
+    generate_utils.config.BYTES_PER_CHAR = int(cfg["bytes_per_char"])
+    generate_utils.config.CHARS_PER_SYNC = int(cfg["chars_per_sync"])
+    generate_utils.config.BOARD_ROWS = int(cfg["rows"])
+    generate_utils.config.BOARD_COLS = int(cfg["cols"])
+
+    audio_state = getMicStream(cfg["microphone"])
+    audio_state.cfg = cfg
 
     # Set up profanity filter
     en_profanity_path = os.path.abspath("Resources/Profanity/en")
     audio_state.profanity_filter = ProfanityFilter(en_profanity_path)
-    if enable_profanity_filter:
+    if cfg["enable_profanity_filter"]:
         audio_state.profanity_filter.load()
 
-    lang_bits = language_target.split(" | ")
+    lang_bits = cfg["language_target"].split(" | ")
     if len(lang_bits) == 2:
         lang_code = lang_bits[1]
         audio_state.language_target = lang_code
@@ -773,11 +741,12 @@ def transcribeLoop(mic: str,
             print(f"Failed to set up for translation: `pip install torch` "
                     "exited with {pip_proc.returncode}")
 
-        output_dir = "Resources/" + model_translation
+        output_dir = "Resources/" + cfg["model_translation"]
         # Provided by ctranslate2 Python package
-        cmd = "ct2-transformers-converter.exe --model facebook/" + model_translation + " --output_dir " + output_dir
+        cmd = "ct2-transformers-converter.exe --model facebook/" + \
+                cfg["model_translation"] + " --output_dir " + output_dir
 
-        print(f"Fetching translation algorithm ({model_translation})")
+        print(f"Fetching translation algorithm ({cfg['model_translation']})")
         if not os.path.exists(output_dir):
             ct2_proc = subprocess.Popen(
                     cmd.split(),
@@ -795,59 +764,54 @@ def transcribeLoop(mic: str,
 
         audio_state.translator = ctranslate2.Translator(output_dir)
 
-        whisper_lang = audio_state.whisper_language
+        whisper_lang = cfg["language"]
         nllb_lang = lang_compat.whisper_to_nllb[whisper_lang]
 
         audio_state.tokenizer = transformers.AutoTokenizer.from_pretrained(
-                "facebook/" + model_translation,
+                "facebook/" + cfg["model_translation"],
                 src_lang=nllb_lang)
 
         print(f"Translation ready to go")
 
     abspath = os.path.abspath(__file__)
     dname = os.path.dirname(abspath)
-    model_root = os.path.join(dname, "Models", model)
+    model_root = os.path.join(dname, "Models", cfg["model"])
 
-    print("Model {} will be saved to {}".format(model, model_root))
+    print("Model {} will be saved to {}".format(cfg["model"], model_root))
 
     model_device = "cuda"
-    if use_cpu:
+    if cfg["use_cpu"]:
         model_device = "cpu"
 
     download_it = os.path.exists(model_root)
     if download_it:
-        model = model_root
-    model = WhisperModel(model,
+        cfg["model"] = model_root
+    cfg["model"] = WhisperModel(cfg["model"],
             device = model_device,
-            device_index = gpu_idx,
+            device_index = cfg["gpu_idx"],
             compute_type = "int8",
             download_root = model_root,
             local_files_only = download_it)
 
     transcribe_audio_thd = threading.Thread(
             target = transcribeAudio,
-            args = [audio_state, model, use_cpu, enable_uwu_filter,
-                remove_trailing_period, enable_uppercase_filter,
-                enable_lowercase_filter])
+            args = [audio_state])
     transcribe_audio_thd.daemon = True
     transcribe_audio_thd.start()
 
-    send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state, use_builtin, estate])
+    send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state, estate])
     send_audio_thd.daemon = True
     send_audio_thd.start()
 
-    controller_input_thd = threading.Thread(target = readControllerInput, args
-            = [audio_state, enable_local_beep, use_builtin, button])
+    controller_input_thd = threading.Thread(target = readControllerInput, args = [audio_state])
     controller_input_thd.daemon = True
     controller_input_thd.start()
 
-    audio_feedback_thd = threading.Thread(target = audioFeedbackThread, args
-            = [audio_state, enable_local_beep, use_builtin, button])
+    audio_feedback_thd = threading.Thread(target = audioFeedbackThread, args = [audio_state])
     audio_feedback_thd.daemon = True
     audio_feedback_thd.start()
 
-    keyboard_input_thd = threading.Thread(target = readKeyboardInput, args
-            = [audio_state, enable_local_beep, use_builtin, keyboard_hotkey])
+    keyboard_input_thd = threading.Thread(target = readKeyboardInput, args = [audio_state])
     keyboard_input_thd.daemon = True
     keyboard_input_thd.start()
 
@@ -878,148 +842,10 @@ if __name__ == "__main__":
     print(f"Set cwd to {os.getcwd()}")
 
     parser = argparse.ArgumentParser()
-    parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index")
-    parser.add_argument("--language", type=str, help="Which language to use. Ex: english, japanese, chinese, french, german.")
-    parser.add_argument("--language_target", type=str, help="Which language to translate into. See kLangTargetChoices in Frame.cpp for valid choices")
-    parser.add_argument("--model", type=str, help="Which transcription model to use. " \
-            "Options: tiny, tiny.en, base, base.en, small, small.en, " \
-            "medium, medium.en, large-v1, large-v2")
-    parser.add_argument("--model_translation", type=str, help="Which translation model to use. " \
-            "Options: nllb-200-distilled-600M, nllb-200-distilled-1.3B.")
-    parser.add_argument("--bytes_per_char", type=str, help="The number of bytes to use to represent each character")
-    parser.add_argument("--chars_per_sync", type=str, help="The number of characters to send on each sync event")
-    parser.add_argument("--enable_local_beep", type=int, help="Whether to play a local auditory indicator when transcription starts/stops.")
-    parser.add_argument("--rows", type=int, help="The number of rows on the board")
-    parser.add_argument("--cols", type=int, help="The number of columns on the board")
-    parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm")
-    parser.add_argument("--cpu", type=int, help="If set to 1, use CPU instead of GPU")
-    parser.add_argument("--use_builtin", type=int, help="If set to 1, use the text box built into the game.")
-    parser.add_argument("--enable_uwu_filter", type=int, help="If set to 1, transcribed text will be passed through an uwu filter :3.")
-    parser.add_argument("--remove_trailing_period", type=int, help="If set to 1, trailing period will be removed.")
-    parser.add_argument("--enable_uppercase_filter", type=int, help="If set to 1, transcriptions will be converted to UPPERCASE.")
-    parser.add_argument("--enable_lowercase_filter", type=int, help="If set to 1, transcriptions will be converted to lowercase.")
-    parser.add_argument("--enable_profanity_filter", type=int, help="If set to 1, profanity in transcriptions will have their vowels replaced with asterisks. Only works in English.")
-    parser.add_argument("--button", type=str, help="The controller button used to start/stop transcription. E.g. \"left joystick\"")
-    parser.add_argument("--emotes_pickle", type=str, help="The path to emotes pickle. See emotes_v2.py for details.")
-    parser.add_argument("--gpu_idx", type=str, help="The index of the GPU device to use. On single GPU systems, use 0.")
-    parser.add_argument("--keybind", type=str, help="The keyboard hotkey to use to toggle transcription. For example, ctrl+shift+s")
-    parser.add_argument("--reset_on_toggle", type=int, help="Whether to reset (clear) the transcript every time that transcription is toggled on.")
-    parser.add_argument("--enable_debug_mode", type=int, help="If set to 1, print additional information to stdout while transcribing.")
+    parser.add_argument("--config", type=str, help="Path to app config YAML file.")
     args = parser.parse_args()
 
-    if not args.mic:
-        args.mic = "index"
-
-    if not args.language:
-        args.language = "english"
-
-    if not args.language_target:
-        print("--language_target required", file=sys.stderr)
-
-    if not args.model:
-        args.model = "base"
-
-    if not args.model_translation:
-        print("--model_translation required.", file=sys.stderr)
-        sys.exit(1)
-
-    if not args.bytes_per_char or not args.chars_per_sync:
-        print("--bytes_per_char and --chars_per_sync required", file=sys.stderr)
-        sys.exit(1)
-
-    if not args.rows or not args.cols:
-        print("--rows and --cols required", file=sys.stderr)
-        sys.exit(1)
-
-    if not args.button:
-        print("--button required", file=sys.stderr)
-        sys.exit(1)
-
-    if not args.emotes_pickle:
-        print("--emotes_pickle required", file=sys.stderr)
-        sys.exit(1)
-
-    if not args.gpu_idx:
-        print("--gpu_idx required", file=sys.stderr)
-        sys.exit(1)
-
-    args.gpu_idx = int(args.gpu_idx)
-
-    window_duration_s = 120
-    if args.window_duration_s:
-        window_duration_s = int(args.window_duration_s)
-
-    if args.cpu == 1:
-        args.cpu = True
-    else:
-        args.cpu = False
-
-    if args.reset_on_toggle == 1:
-        args.reset_on_toggle = True
-    else:
-        args.reset_on_toggle = False
-
-    if args.use_builtin == 1:
-        args.use_builtin = True
-    else:
-        args.use_builtin = False
-
-    if args.enable_uwu_filter == 1:
-        args.enable_uwu_filter = True
-    else:
-        args.enable_uwu_filter = False
-
-    if args.remove_trailing_period == 1:
-        args.remove_trailing_period = True
-    else:
-        args.remove_trailing_period = False
-
-    if args.enable_uppercase_filter == 1:
-        args.enable_uppercase_filter = True
-    else:
-        args.enable_uppercase_filter = False
-
-    if args.enable_lowercase_filter == 1:
-        args.enable_lowercase_filter = True
-    else:
-        args.enable_lowercase_filter = False
-
-    if args.enable_profanity_filter == 1:
-        args.enable_profanity_filter = True
-    else:
-        args.enable_profanity_filter = False
-
-    if args.enable_debug_mode == 1:
-        args.enable_debug_mode = True
-    else:
-        args.enable_debug_mode = False
-
-    estate = EmotesState()
-    estate.load(args.emotes_pickle)
-
-    generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char)
-    generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync)
-    generate_utils.config.BOARD_ROWS = int(args.rows)
-    generate_utils.config.BOARD_COLS = int(args.cols)
-
     print(f"PATH: {os.environ['PATH']}")
 
-    transcribeLoop(args.mic,
-            args.language,
-            args.language_target,
-            args.model,
-            args.model_translation,
-            args.enable_local_beep,
-            args.cpu, args.use_builtin,
-            args.enable_uwu_filter,
-            args.remove_trailing_period,
-            args.enable_uppercase_filter,
-            args.enable_lowercase_filter,
-            args.enable_profanity_filter,
-            args.enable_debug_mode,
-            args.button,
-            estate, window_duration_s,
-            args.gpu_idx,
-            args.keybind,
-            args.reset_on_toggle)
+    transcribeLoop(args.config)