diff options
| author | yum <yum.food.vr@gmail.com> | 2022-10-25 16:05:25 -0700 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2022-10-25 16:05:25 -0700 |
| commit | 98284cc0ca42c75ef8cf22d9435821c62a12c592 (patch) | |
| tree | a013ab37e7ddf109496028cc50f4312a74a27bda /transcribe.py | |
| parent | de99d88a1c15c64812d16231e1b58e6e32e1588a (diff) | |
Add exponentially longer sleeps to transcribe loop
When the user pauses their speech for an extended period of time, the
transcription engine will sleep for progressively longer intervals,
up to 1.5 seconds between transcriptions. This allows us to reduce
idle resource consumption.
To enable responsive transcription while the user is speaking actively,
we reset the sleep duration to the minimum whenever a change is
detected.
Diffstat (limited to 'transcribe.py')
| -rw-r--r-- | transcribe.py | 27 |
1 files changed, 25 insertions, 2 deletions
diff --git a/transcribe.py b/transcribe.py index fa3b166..520d63e 100644 --- a/transcribe.py +++ b/transcribe.py @@ -55,6 +55,11 @@ class AudioState: send_audio = True run_control_thread = True + transcribe_sleep_duration_min_s = 0.05 + transcribe_sleep_duration_max_s = 1.50 + transcribe_no_change_count = 0 + transcribe_sleep_duration = transcribe_sleep_duration_min_s + osc_client = osc_ctrl.getClient() def getMicStream(which_mic): @@ -175,7 +180,19 @@ def transcribe(model, filename): def transcribeAudio(audio_state, model): while audio_state.transcribe_audio == True: # Pace this out - time.sleep(0.05) + print("sleep duration: {}".format(audio_state.transcribe_sleep_duration)) + time.sleep(audio_state.transcribe_sleep_duration) + + # Increase sleep time. Code below will set sleep time back to minimum + # if a change is detected. + if audio_state.transcribe_no_change_count < 10: + audio_state.transcribe_no_change_count += 1 + longer_sleep_dur = audio_state.transcribe_sleep_duration + longer_sleep_dur += audio_state.transcribe_sleep_duration_min_s * (1.3**audio_state.transcribe_no_change_count) + audio_state.transcribe_sleep_duration = min( + audio_state.transcribe_sleep_duration_max_s, + longer_sleep_dur) + print("next sleep duration: {}".format(audio_state.transcribe_sleep_duration)) saveAudio(audio_state, audio_state.VOICE_AUDIO_FILENAME) @@ -250,6 +267,12 @@ def transcribeAudio(audio_state, model): else: audio_state.text = text + if audio_state.text != old_text: + # We think the user said something, so reset the amount of + # time we sleep between transcriptions to the minimum. + audio_state.transcribe_no_change_count = 0 + audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s + audio_state.text_candidate = text audio_state.text_lock.release() @@ -328,7 +351,7 @@ if __name__ == "__main__": control_thd.daemon = True control_thd.start() - print("Press enter to start a new message") + print("Press enter or say 'Clear' to start a new message") for line in sys.stdin: resetAudio(audio_state) if "exit" in line or "quit" in line: |
