diff options
| -rw-r--r-- | BrowserSource/index.html | 28 | ||||
| -rw-r--r-- | GUI/GUI/GUI/BrowserSource.cpp | 5 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Frame.cpp | 40 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Transcript.cpp | 10 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Transcript.h | 7 | ||||
| -rw-r--r-- | Scripts/transcribe.py | 14 |
6 files changed, 86 insertions, 18 deletions
diff --git a/BrowserSource/index.html b/BrowserSource/index.html index 000f366..96692cf 100644 --- a/BrowserSource/index.html +++ b/BrowserSource/index.html @@ -17,6 +17,24 @@ color: #89CFF0; -webkit-text-stroke: 3.0px #000; } + .red_circle { + height: 50px; + width: 50px; + background-color: red; + border-radius: 50%; + display: inline-block; + vertical-align: middle; + margin: 20px; + } + .grey_circle { + height: 50px; + width: 50px; + background-color: grey; + border-radius: 50%; + display: inline-block; + vertical-align: middle; + margin: 20px; + } </style> <body> <div id="transcript"></div> @@ -31,7 +49,15 @@ dataType: 'json', success: function(data) { var transcript = data.transcript; - $('#transcript').html(transcript); + var red_circle = '<span class="red_circle"></span>'; + var grey_circle = '<span class="grey_circle"></span>'; + var transcript_w_circle = transcript; + if (data.is_final == 1) { + transcript_w_circle += grey_circle; + } else { + transcript_w_circle += red_circle; + } + $('#transcript').html(transcript_w_circle); $('#transcript').css("background-color", "#00000080"); }, error: function(jqXHR, textStatus, errorThrown) { diff --git a/GUI/GUI/GUI/BrowserSource.cpp b/GUI/GUI/GUI/BrowserSource.cpp index 62e3e43..45ca7f9 100644 --- a/GUI/GUI/GUI/BrowserSource.cpp +++ b/GUI/GUI/GUI/BrowserSource.cpp @@ -55,9 +55,12 @@ void BrowserSource::Run(volatile bool* run) transcript_oss << segment;
}
+ bool is_final = transcript_->IsFinalized();
+
std::ostringstream resp_oss;
resp_oss << "{";
- resp_oss << "\"transcript\":\"" << transcript_oss.str() << "\"";
+ resp_oss << "\"transcript\":\"" << transcript_oss.str() << "\",";
+ resp_oss << "\"is_final\":" << std::to_string(is_final ? 1 : 0) << "";
resp_oss << "}";
payload = resp_oss.str();
type = WebServer::JSON;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp index 236d375..e6506bd 100644 --- a/GUI/GUI/GUI/Frame.cpp +++ b/GUI/GUI/GUI/Frame.cpp @@ -8,6 +8,7 @@ #include <filesystem>
#include <fstream>
#include <regex>
+#include <sstream>
#include <string>
#include <vector>
#include <wx/filepicker.h>
@@ -2164,20 +2165,31 @@ void Frame::OnAppStart(wxCommandEvent& event) { app_c_->keybind = keybind;
app_c_->Serialize(AppConfig::kConfigPath);
- auto out_cb = [&](const std::string& out, const std::string& err) {
- Log(transcribe_out_, "{}", out);
- Log(transcribe_out_, "{}", err);
-
- std::regex pattern("^Transcription \\(([0-9]*\\.[0-9]+) seconds\\):");
- if (std::regex_search(out, pattern)) {
- std::string filtered_transcript = std::regex_replace(out, pattern, "");
- filtered_transcript.erase(std::remove_if(filtered_transcript.begin(), filtered_transcript.end(), [](char c) {
- return c == '\n' || c == '\r';
- }), filtered_transcript.end());
- //Log(transcribe_out_, "Got transcription line! Transcript: \"{}\"", filtered_transcript);
- transcript_.Set(std::move(filtered_transcript));
- }
- };
+ auto out_cb = [&](const std::string& out, const std::string& err) {
+ Log(transcribe_out_, "{}", out);
+ Log(transcribe_out_, "{}", err);
+
+ std::istringstream out_iss(out);
+ std::string out_line;
+ while (std::getline(out_iss, out_line)) {
+ if (out_line.starts_with("Finalized: 1")) {
+ transcript_.SetFinalized(true);
+ }
+ else if (out_line.starts_with("Finalized: 0")) {
+ transcript_.SetFinalized(false);
+ }
+
+ std::regex pattern("^Transcription \\(([0-9]*\\.[0-9]+) seconds\\):");
+ if (std::regex_search(out_line, pattern)) {
+ std::string filtered_transcript = std::regex_replace(out_line, pattern, "");
+ filtered_transcript.erase(std::remove_if(filtered_transcript.begin(), filtered_transcript.end(), [](char c) {
+ return c == '\n' || c == '\r';
+ }), filtered_transcript.end());
+ //Log(transcribe_out_, "Got transcription line! Transcript: \"{}\"", filtered_transcript);
+ transcript_.Set(std::move(filtered_transcript));
+ }
+ }
+ };
auto in_cb = [&](std::string& in) {};
auto run_cb = [&]() {
return run_py_app_;
diff --git a/GUI/GUI/GUI/Transcript.cpp b/GUI/GUI/GUI/Transcript.cpp index 9ef607f..e635343 100644 --- a/GUI/GUI/GUI/Transcript.cpp +++ b/GUI/GUI/GUI/Transcript.cpp @@ -20,3 +20,13 @@ std::vector<std::string> Transcript::Get() { std::scoped_lock l(mu_);
return segments_;
}
+
+void Transcript::SetFinalized(bool is_finalized) {
+ // Accessing anything smaller than a word is always atomic.
+ is_finalized_ = is_finalized;
+}
+
+bool Transcript::IsFinalized() {
+ // Accessing anything smaller than a word is always atomic.
+ return is_finalized_;
+}
diff --git a/GUI/GUI/GUI/Transcript.h b/GUI/GUI/GUI/Transcript.h index fae2bad..07cf6c0 100644 --- a/GUI/GUI/GUI/Transcript.h +++ b/GUI/GUI/GUI/Transcript.h @@ -13,9 +13,16 @@ public: void Set(std::string&& segment);
void Clear();
+ // Indicate whether the transcript is "finalized", i.e. the transcription
+ // engine has committed the entirety of the transcript and will no longer
+ // change it.
+ void SetFinalized(bool is_finalized);
+
std::vector<std::string> Get();
+ bool IsFinalized();
private:
std::mutex mu_;
std::vector<std::string> segments_;
+ bool is_finalized_{ false };
};
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py index c249e43..039811c 100644 --- a/Scripts/transcribe.py +++ b/Scripts/transcribe.py @@ -364,6 +364,18 @@ def transcribeAudio(audio_state, audio_state.text += text audio_state.preview_text = audio_state.text + preview_text + if len(preview_text) == 0: + print("Finalized: 1") + else: + print("Finalized: 0") + + # Hard cap transcript at 4096 chars. Letting it grow longer than this + # eventually causes lag. This happens routinely when streaming. Capping + # like this does not affect the visible portion of the transcript in + # OBS, but it might affect the visible portion in-game. (Don't make + # your friends read more than 4k characters on a fucking chatbox.) + audio_state.text = audio_state.text[-4096:] + now = time.time() if audio_state.enable_debug_mode: print("Raw transcription ({} seconds): {}".format( @@ -739,8 +751,6 @@ def transcribeLoop(mic: str, print(f"Translation ready to go") - print("Safe to start talking") - abspath = os.path.abspath(__file__) dname = os.path.dirname(abspath) model_root = os.path.join(dname, "Models", model) |
