diff options
| -rw-r--r-- | osc_ctrl.py | 2 | ||||
| -rw-r--r-- | transcribe.py | 77 |
2 files changed, 29 insertions, 50 deletions
diff --git a/osc_ctrl.py b/osc_ctrl.py index 2fede06..259a8c2 100644 --- a/osc_ctrl.py +++ b/osc_ctrl.py @@ -331,6 +331,8 @@ def sendRawMessage(client, msg): sendMessageCellDiscrete(client, cell_msg, cell) def clear(client): + disable(client) + addr="/avatar/parameters/" + generate_utils.getClearBoardParam() client.send_message(addr, True) diff --git a/transcribe.py b/transcribe.py index 0a31608..a2b1486 100644 --- a/transcribe.py +++ b/transcribe.py @@ -47,17 +47,18 @@ class AudioState: # transcriptions before "committing" to a transcription. text_candidate = "" text_lock = threading.Lock() - clear_requested = False record_audio = True transcribe_audio = True send_audio = True - run_control_thread = True transcribe_sleep_duration_min_s = 0.05 transcribe_sleep_duration_max_s = 1.50 transcribe_no_change_count = 0 transcribe_sleep_duration = transcribe_sleep_duration_min_s + # When the user says `over`, we stop displaying new transcriptions until + # they clear the board again. + display_paused = False osc_client = osc_ctrl.getClient() @@ -140,6 +141,10 @@ def saveAudio(audio_state, filename): normalized.export(filename, format="wav") def resetDiskAudioLocked(audio_state, filename): + if os.path.isfile(audio_state.VOICE_AUDIO_FILENAME): + # empty out the voice file + open(audio_state.VOICE_AUDIO_FILENAME, "w").close() + wf = wave.open(filename, 'wb') wf.setnchannels(audio_state.CHANNELS) wf.setsampwidth(audio_state.p.get_sample_size(audio_state.FORMAT)) @@ -154,6 +159,8 @@ def resetAudioLocked(audio_state): audio_state.transcribe_sleep_duration = \ audio_state.transcribe_sleep_duration_min_s + resetDiskAudioLocked(audio_state, audio_state.VOICE_AUDIO_FILENAME) + def resetAudio(audio_state): audio_state.frames_lock.acquire() resetAudioLocked(audio_state) @@ -206,21 +213,20 @@ def transcribeAudio(audio_state, model): audio_state.text_lock.acquire() - if audio_state.clear_requested: - audio_state.text = "" - audio_state.text_candidate = "" - audio_state.clear_requested = False - audio_state.text_lock.release() - continue - words = ''.join(c for c in text.lower() if (c.isalpha() or c == " ")).split() - #print("words: {}".format(words)) - if len(words) > 0 and words[-1] == "clear": - audio_state.text = "" - audio_state.text_candidate = "" - audio_state.clear_requested = True - audio_state.text_lock.release() - continue + + if len(words) > 0: + if words[-1] == "clear": + resetAudio(audio_state) + osc_ctrl.clear(audio_state.osc_client) + audio_state.text = "" + audio_state.text_candidate = "" + audio_state.text_lock.release() + audio_state.display_paused = False + continue + elif words[-1] == "over": + words = words[0:-1] + audio_state.display_paused = True # We use a few heuristics to handle spurious mistranscriptions and to # handle events where we trim off the start of the audio clip. @@ -266,7 +272,12 @@ def transcribeAudio(audio_state, model): def sendAudio(audio_state): tx_state = osc_ctrl.OscTxState() + while audio_state.send_audio == True: + if audio_state.display_paused: + time.sleep(0.1) + continue + audio_state.text_lock.acquire() text = copy.deepcopy(audio_state.text) audio_state.text_lock.release() @@ -276,34 +287,6 @@ def sendAudio(audio_state): # Pace this out time.sleep(0.01) -def controlThread(audio_state): - while audio_state.run_control_thread: - time.sleep(0.1) - if audio_state.clear_requested: - print("here a") - audio_state.text_lock.acquire() - audio_state.frames_lock.acquire() - - if os.path.isfile(audio_state.VOICE_AUDIO_FILENAME): - # empty out the voice file - open(audio_state.VOICE_AUDIO_FILENAME, "w").close() - resetAudioLocked(audio_state) - resetDiskAudioLocked(audio_state, audio_state.VOICE_AUDIO_FILENAME) - audio_state.clear_requested = False - - # Allow audio collection to resume now. If we don't do this, then - # any audio spoken while the board is slowly clearing will be lost. - audio_state.frames_lock.release() - - # Clearing can take a while, and the user might be talking in the - # meantime. So we drop audio state before clearing so the other - # threads can continue saving to it. - osc_ctrl.clear(audio_state.osc_client) - - audio_state.text_lock.release() - - print("here b") - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index") @@ -334,10 +317,6 @@ if __name__ == "__main__": send_audio_thd.daemon = True send_audio_thd.start() - control_thd = threading.Thread(target = controlThread, args = [audio_state]) - control_thd.daemon = True - control_thd.start() - print("Press enter or say 'Clear' to start a new message") for line in sys.stdin: resetAudio(audio_state) @@ -347,8 +326,6 @@ if __name__ == "__main__": print("Joining threads") audio_state.record_audio = False audio_state.transcribe_audio = False - audio_state.run_control_thread = False record_audio_thd.join() transcribe_audio_thd.join() - control_thd.join() |
