From da360bcd1870650848daa034aa31b8a337caecc7 Mon Sep 17 00:00:00 2001 From: yum Date: Sun, 10 Sep 2023 03:50:46 -0700 Subject: Bugfix: only cap display of transcript at 4K chars Actually retain the whole transcript to avoid breaking the OSC pager. Also constrain the UI buffer size by characters instead of lines. Since some lines can be massive and others short, characters are a better way of consistently keeping the UI memory in check. --- GUI/GUI/GUI/Logging.cpp | 13 +++++++------ Scripts/transcribe_v2.py | 19 ++++++++++++------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/GUI/GUI/GUI/Logging.cpp b/GUI/GUI/GUI/Logging.cpp index f6ad3ab..6983a40 100644 --- a/GUI/GUI/GUI/Logging.cpp +++ b/GUI/GUI/GUI/Logging.cpp @@ -51,13 +51,14 @@ void Logging::ThreadLogger::Drain() // Constrain wxTextCtrl's to a few hundred lines to keep memory usage / // general snappiness in check. if (frame) { - wxString allText = frame->GetValue(); - wxArrayString lines = wxStringTokenize(allText, "\n"); - size_t count = lines.GetCount(); - constexpr int kHalfMaxLines = 1000; - if (count > kHalfMaxLines * 2) { + constexpr int kHalfMaxChars = 50 * 1000; + int nchars; + while ((nchars = frame->GetLastPosition()) > kHalfMaxChars) { + wxString allText = frame->GetValue(); + wxArrayString lines = wxStringTokenize(allText, "\n"); // Keep only the last kHalfMaxLines lines. - size_t linesToRemove = count - kHalfMaxLines; + size_t nlines = lines.GetCount(); + size_t linesToRemove = nlines / 2; // Remove lines from the beginning lines.RemoveAt(0, linesToRemove); diff --git a/Scripts/transcribe_v2.py b/Scripts/transcribe_v2.py index f0e994f..c73c13a 100644 --- a/Scripts/transcribe_v2.py +++ b/Scripts/transcribe_v2.py @@ -592,8 +592,6 @@ def evaluate(cfg, last_commit_ts = collector.now() transcript += commit.delta - # Hard-cap transcript length at 4k. - transcript = transcript[-4096:] preview = commit.preview if False and len(commit.delta): @@ -681,8 +679,18 @@ def transcriptionThread(ctrl: ThreadControl): commit = ctrl.committer.getDelta() if len(commit.delta) > 0 or len(commit.preview) > 0: - print(f"Transcript: {ctrl.transcript}{commit.delta}") - print(f"Preview: {commit.preview}") + # Hard-cap displayed transcript length at 4k characters to prevent + # runaway memory use in UI. Keep the full transcript to avoid + # breaking OSC pager. + try: + print(f"Transcript: {ctrl.transcript[-4096:]}{commit.delta}") + except UnicodeEncodeError: + print("Failed to encode transcript - discarding delta") + continue + try: + print(f"Preview: {commit.preview}") + except UnicodeEncodeError: + print("Failed to encode preview - discarding") if cfg["enable_debug_mode"]: print(f"commit latency: {commit.latency_s}", file=sys.stderr) print(f"commit thresh: {commit.thresh_at_commit}", @@ -693,9 +701,6 @@ def transcriptionThread(ctrl: ThreadControl): print("Finalized: 1") ctrl.transcript += commit.delta - # Hard-cap transcript length at 4k characters to prevent runaway memory - # use. - ctrl.transcript = ctrl.transcript[-4096:] ctrl.preview = ctrl.transcript + commit.preview def vrInputThread(ctrl: ThreadControl): -- cgit v1.2.3