diff options
| author | yum <yum.food.vr@gmail.com> | 2022-12-30 01:35:11 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2022-12-30 01:35:11 -0800 |
| commit | 9a7190a04bbe6feba2abe4a1590eb8114c04f683 (patch) | |
| tree | 44dab8ebf587f520abe1214f0136990efdecc63c /Scripts/transcribe.py | |
| parent | d1024fef1b216af5d3d991228c6b83311a71bb42 (diff) | |
GUI: Expose transcription window duration
Users can pick longer transcription durations for accuracy-critical
tasks, or shorter durations for latency-critical tasks.
Diffstat (limited to 'Scripts/transcribe.py')
| -rw-r--r-- | Scripts/transcribe.py | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index 0530946..f90867a 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -25,6 +25,13 @@ import wave # License: MIT. import whisper +class Config: + def __init__(self): + # The maximum length that recordAudio() will put into frames before it + # starts dropping from the start. + self.MAX_LENGTH_S = 10 +config = Config() + class AudioState: def __init__(self): self.CHUNK = 1024 @@ -35,7 +42,6 @@ class AudioState: # The maximum length that recordAudio() will put into frames before it # starts dropping from the start. - self.MAX_LENGTH_S = 10 self.MAX_LENGTH_S_WHISPER = 30 # The minimum length that recordAudio() will wait for before saving audio. self.MIN_LENGTH_S = 1 @@ -119,7 +125,7 @@ def onAudioFramesAvailable( audio_state.frames.append(decimated) - max_frames = int(input_rate * audio_state.MAX_LENGTH_S / audio_state.CHUNK) + max_frames = int(input_rate * config.MAX_LENGTH_S / audio_state.CHUNK) if len(audio_state.frames) > max_frames: audio_state.frames = audio_state.frames[-1 * max_frames :] @@ -428,6 +434,7 @@ if __name__ == "__main__": parser.add_argument("--enable_local_beep", type=int, help="Whether to play a local auditory indicator when transcription starts/stops."); parser.add_argument("--rows", type=int, help="The number of rows on the board") parser.add_argument("--cols", type=int, help="The number of columns on the board") + parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm"); args = parser.parse_args() if not args.mic: @@ -447,6 +454,9 @@ if __name__ == "__main__": print("--rows and --cols required", file=sys.stderr) sys.exit(1) + if args.window_duration_s: + config.MAX_LENGTH_S = int(args.window_duration_s) + generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char) generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync) generate_utils.config.BOARD_ROWS = int(args.rows) |
