Add exponentially longer sleeps to transcribe loop

When the user pauses their speech for an extended period of time, the transcription engine will sleep for progressively longer intervals, up to 1.5 seconds between transcriptions. This allows us to reduce idle resource consumption. To enable responsive transcription while the user is speaking actively, we reset the sleep duration to the minimum whenever a change is detected.
author: yum <yum.food.vr@gmail.com> 2022-10-25 16:05:25 -0700
committer: yum <yum.food.vr@gmail.com> 2022-10-25 16:05:25 -0700
commit: 98284cc0ca42c75ef8cf22d9435821c62a12c592 (patch)
tree: a013ab37e7ddf109496028cc50f4312a74a27bda
parent: de99d88a1c15c64812d16231e1b58e6e32e1588a (diff)
2 files changed, 27 insertions, 3 deletions
diff --git a/osc_ctrl.py b/osc_ctrl.py
index 4353939..328b55c 100644
--- a/osc_ctrl.py
+++ b/osc_ctrl.py
@@ -33,6 +33,8 @@ class EvilGlobalState():
     encoding = {}
 state = EvilGlobalState()
 
+# The characters in the TaSTT are all numbered from top left to bottom right.
+# This function provides a mapping from letter ('a') to index (26).
 def generateEncoding(state):
     for i in range(0, 26):
         state.encoding[chr(ord('A') + i)] = i
@@ -103,7 +105,6 @@ def sendMessageCellDiscrete(client, msg_cell, which_cell):
     if msg_cell != state.encoding[' '] * BOARD_COLS:
         addr="/avatar/parameters/" + generate_utils.getSpeechNoiseToggleParam()
         client.send_message(addr, False)
-        print("beep")
 
     time.sleep(CELL_TX_TIME_S / 3.0)
 
diff --git a/transcribe.py b/transcribe.py
index fa3b166..520d63e 100644
--- a/transcribe.py
+++ b/transcribe.py
@@ -55,6 +55,11 @@ class AudioState:
     send_audio = True
     run_control_thread = True
 
+    transcribe_sleep_duration_min_s = 0.05
+    transcribe_sleep_duration_max_s = 1.50
+    transcribe_no_change_count = 0
+    transcribe_sleep_duration = transcribe_sleep_duration_min_s
+
     osc_client = osc_ctrl.getClient()
 
 def getMicStream(which_mic):
@@ -175,7 +180,19 @@ def transcribe(model, filename):
 def transcribeAudio(audio_state, model):
     while audio_state.transcribe_audio == True:
         # Pace this out
-        time.sleep(0.05)
+        print("sleep duration: {}".format(audio_state.transcribe_sleep_duration))
+        time.sleep(audio_state.transcribe_sleep_duration)
+
+        # Increase sleep time. Code below will set sleep time back to minimum
+        # if a change is detected.
+        if audio_state.transcribe_no_change_count < 10:
+            audio_state.transcribe_no_change_count += 1
+        longer_sleep_dur = audio_state.transcribe_sleep_duration
+        longer_sleep_dur += audio_state.transcribe_sleep_duration_min_s * (1.3**audio_state.transcribe_no_change_count)
+        audio_state.transcribe_sleep_duration = min(
+                audio_state.transcribe_sleep_duration_max_s,
+                longer_sleep_dur)
+        print("next sleep duration: {}".format(audio_state.transcribe_sleep_duration))
 
         saveAudio(audio_state, audio_state.VOICE_AUDIO_FILENAME)
 
@@ -250,6 +267,12 @@ def transcribeAudio(audio_state, model):
             else:
                 audio_state.text = text
 
+            if audio_state.text != old_text:
+                # We think the user said something, so  reset the amount of
+                # time we sleep between transcriptions to the minimum.
+                audio_state.transcribe_no_change_count = 0
+                audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
+
         audio_state.text_candidate = text
 
         audio_state.text_lock.release()
@@ -328,7 +351,7 @@ if __name__ == "__main__":
     control_thd.daemon = True
     control_thd.start()
 
-    print("Press enter to start a new message")
+    print("Press enter or say 'Clear' to start a new message")
     for line in sys.stdin:
         resetAudio(audio_state)
         if "exit" in line or "quit" in line:
author	yum <yum.food.vr@gmail.com>	2022-10-25 16:05:25 -0700
committer	yum <yum.food.vr@gmail.com>	2022-10-25 16:05:25 -0700
commit	98284cc0ca42c75ef8cf22d9435821c62a12c592 (patch)
tree	a013ab37e7ddf109496028cc50f4312a74a27bda
parent	de99d88a1c15c64812d16231e1b58e6e32e1588a (diff)