diff options
| author | yum <yum.food.vr@gmail.com> | 2022-11-22 19:01:01 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2022-11-22 19:01:01 -0800 |
| commit | 9f87674d1b484a2e61e87ad53d8ebcf9985dce6b (patch) | |
| tree | b3e8958a5a28901f1ecb9409db22ba15dba00a77 | |
| parent | a7a52b884061b154eaae6b0a8c0d5b443cbb8abe (diff) | |
Shorten audio window to 10 seconds
This helps with temporal stability in long-running transcriptions, and
lets us get rid of that hack where we refuse to update old pages.
| -rw-r--r-- | Images/speech_to_text_demo.gif | bin | 6505549 -> 5410888 bytes | |||
| -rw-r--r-- | osc_ctrl.py | 2 | ||||
| -rw-r--r-- | transcribe.py | 5 |
3 files changed, 4 insertions, 3 deletions
diff --git a/Images/speech_to_text_demo.gif b/Images/speech_to_text_demo.gif Binary files differindex 509f26b..661d80b 100644 --- a/Images/speech_to_text_demo.gif +++ b/Images/speech_to_text_demo.gif diff --git a/osc_ctrl.py b/osc_ctrl.py index ea0c145..c72aed6 100644 --- a/osc_ctrl.py +++ b/osc_ctrl.py @@ -250,7 +250,7 @@ def sendMessageLazy(client, msg, tx_state): last_cell = (len(tx_state.last_msg_encoded) / NUM_LAYERS) - 1 last_page = floor(last_cell / (2 ** generate_utils.INDEX_BITS)) if page < last_page: - continue + pass if cell_msg == [state.encoding[' ']] * NUM_LAYERS: if empty_cells_sent >= tx_state.empty_cells_to_send_per_call: diff --git a/transcribe.py b/transcribe.py index 91fcd54..b316014 100644 --- a/transcribe.py +++ b/transcribe.py @@ -33,7 +33,8 @@ class AudioState: # The maximum length that recordAudio() will put into frames before it # starts dropping from the start. - MAX_LENGTH_S = 30 + MAX_LENGTH_S = 10 + MAX_LENGTH_S_WHISPER = 30 # The minimum length that recordAudio() will wait for before saving audio. MIN_LENGTH_S = 1 @@ -202,7 +203,7 @@ def transcribe(audio_state, model, filename): audio_state.transcribe_lock.release() audio = whisper.pad_or_trim(audio, length = audio_state.RATE * - audio_state.MAX_LENGTH_S) + audio_state.MAX_LENGTH_S_WHISPER) mel = whisper.log_mel_spectrogram(audio).to(model.device) result = None |
