summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--BrowserSource/index.html28
-rw-r--r--GUI/GUI/GUI/BrowserSource.cpp5
-rw-r--r--GUI/GUI/GUI/Frame.cpp40
-rw-r--r--GUI/GUI/GUI/Transcript.cpp10
-rw-r--r--GUI/GUI/GUI/Transcript.h7
-rw-r--r--Scripts/transcribe.py14
6 files changed, 86 insertions, 18 deletions
diff --git a/BrowserSource/index.html b/BrowserSource/index.html
index 000f366..96692cf 100644
--- a/BrowserSource/index.html
+++ b/BrowserSource/index.html
@@ -17,6 +17,24 @@
color: #89CFF0;
-webkit-text-stroke: 3.0px #000;
}
+ .red_circle {
+ height: 50px;
+ width: 50px;
+ background-color: red;
+ border-radius: 50%;
+ display: inline-block;
+ vertical-align: middle;
+ margin: 20px;
+ }
+ .grey_circle {
+ height: 50px;
+ width: 50px;
+ background-color: grey;
+ border-radius: 50%;
+ display: inline-block;
+ vertical-align: middle;
+ margin: 20px;
+ }
</style>
<body>
<div id="transcript"></div>
@@ -31,7 +49,15 @@
dataType: 'json',
success: function(data) {
var transcript = data.transcript;
- $('#transcript').html(transcript);
+ var red_circle = '<span class="red_circle"></span>';
+ var grey_circle = '<span class="grey_circle"></span>';
+ var transcript_w_circle = transcript;
+ if (data.is_final == 1) {
+ transcript_w_circle += grey_circle;
+ } else {
+ transcript_w_circle += red_circle;
+ }
+ $('#transcript').html(transcript_w_circle);
$('#transcript').css("background-color", "#00000080");
},
error: function(jqXHR, textStatus, errorThrown) {
diff --git a/GUI/GUI/GUI/BrowserSource.cpp b/GUI/GUI/GUI/BrowserSource.cpp
index 62e3e43..45ca7f9 100644
--- a/GUI/GUI/GUI/BrowserSource.cpp
+++ b/GUI/GUI/GUI/BrowserSource.cpp
@@ -55,9 +55,12 @@ void BrowserSource::Run(volatile bool* run)
transcript_oss << segment;
}
+ bool is_final = transcript_->IsFinalized();
+
std::ostringstream resp_oss;
resp_oss << "{";
- resp_oss << "\"transcript\":\"" << transcript_oss.str() << "\"";
+ resp_oss << "\"transcript\":\"" << transcript_oss.str() << "\",";
+ resp_oss << "\"is_final\":" << std::to_string(is_final ? 1 : 0) << "";
resp_oss << "}";
payload = resp_oss.str();
type = WebServer::JSON;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 236d375..e6506bd 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -8,6 +8,7 @@
#include <filesystem>
#include <fstream>
#include <regex>
+#include <sstream>
#include <string>
#include <vector>
#include <wx/filepicker.h>
@@ -2164,20 +2165,31 @@ void Frame::OnAppStart(wxCommandEvent& event) {
app_c_->keybind = keybind;
app_c_->Serialize(AppConfig::kConfigPath);
- auto out_cb = [&](const std::string& out, const std::string& err) {
- Log(transcribe_out_, "{}", out);
- Log(transcribe_out_, "{}", err);
-
- std::regex pattern("^Transcription \\(([0-9]*\\.[0-9]+) seconds\\):");
- if (std::regex_search(out, pattern)) {
- std::string filtered_transcript = std::regex_replace(out, pattern, "");
- filtered_transcript.erase(std::remove_if(filtered_transcript.begin(), filtered_transcript.end(), [](char c) {
- return c == '\n' || c == '\r';
- }), filtered_transcript.end());
- //Log(transcribe_out_, "Got transcription line! Transcript: \"{}\"", filtered_transcript);
- transcript_.Set(std::move(filtered_transcript));
- }
- };
+ auto out_cb = [&](const std::string& out, const std::string& err) {
+ Log(transcribe_out_, "{}", out);
+ Log(transcribe_out_, "{}", err);
+
+ std::istringstream out_iss(out);
+ std::string out_line;
+ while (std::getline(out_iss, out_line)) {
+ if (out_line.starts_with("Finalized: 1")) {
+ transcript_.SetFinalized(true);
+ }
+ else if (out_line.starts_with("Finalized: 0")) {
+ transcript_.SetFinalized(false);
+ }
+
+ std::regex pattern("^Transcription \\(([0-9]*\\.[0-9]+) seconds\\):");
+ if (std::regex_search(out_line, pattern)) {
+ std::string filtered_transcript = std::regex_replace(out_line, pattern, "");
+ filtered_transcript.erase(std::remove_if(filtered_transcript.begin(), filtered_transcript.end(), [](char c) {
+ return c == '\n' || c == '\r';
+ }), filtered_transcript.end());
+ //Log(transcribe_out_, "Got transcription line! Transcript: \"{}\"", filtered_transcript);
+ transcript_.Set(std::move(filtered_transcript));
+ }
+ }
+ };
auto in_cb = [&](std::string& in) {};
auto run_cb = [&]() {
return run_py_app_;
diff --git a/GUI/GUI/GUI/Transcript.cpp b/GUI/GUI/GUI/Transcript.cpp
index 9ef607f..e635343 100644
--- a/GUI/GUI/GUI/Transcript.cpp
+++ b/GUI/GUI/GUI/Transcript.cpp
@@ -20,3 +20,13 @@ std::vector<std::string> Transcript::Get() {
std::scoped_lock l(mu_);
return segments_;
}
+
+void Transcript::SetFinalized(bool is_finalized) {
+ // Accessing anything smaller than a word is always atomic.
+ is_finalized_ = is_finalized;
+}
+
+bool Transcript::IsFinalized() {
+ // Accessing anything smaller than a word is always atomic.
+ return is_finalized_;
+}
diff --git a/GUI/GUI/GUI/Transcript.h b/GUI/GUI/GUI/Transcript.h
index fae2bad..07cf6c0 100644
--- a/GUI/GUI/GUI/Transcript.h
+++ b/GUI/GUI/GUI/Transcript.h
@@ -13,9 +13,16 @@ public:
void Set(std::string&& segment);
void Clear();
+ // Indicate whether the transcript is "finalized", i.e. the transcription
+ // engine has committed the entirety of the transcript and will no longer
+ // change it.
+ void SetFinalized(bool is_finalized);
+
std::vector<std::string> Get();
+ bool IsFinalized();
private:
std::mutex mu_;
std::vector<std::string> segments_;
+ bool is_finalized_{ false };
};
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index c249e43..039811c 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -364,6 +364,18 @@ def transcribeAudio(audio_state,
audio_state.text += text
audio_state.preview_text = audio_state.text + preview_text
+ if len(preview_text) == 0:
+ print("Finalized: 1")
+ else:
+ print("Finalized: 0")
+
+ # Hard cap transcript at 4096 chars. Letting it grow longer than this
+ # eventually causes lag. This happens routinely when streaming. Capping
+ # like this does not affect the visible portion of the transcript in
+ # OBS, but it might affect the visible portion in-game. (Don't make
+ # your friends read more than 4k characters on a fucking chatbox.)
+ audio_state.text = audio_state.text[-4096:]
+
now = time.time()
if audio_state.enable_debug_mode:
print("Raw transcription ({} seconds): {}".format(
@@ -739,8 +751,6 @@ def transcribeLoop(mic: str,
print(f"Translation ready to go")
- print("Safe to start talking")
-
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
model_root = os.path.join(dname, "Models", model)