summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-06-30 19:44:27 -0700
committeryum <yum.food.vr@gmail.com>2023-06-30 19:46:17 -0700
commit4f3131b4a36d8e1557edb31d3754a431717dab7b (patch)
treeea3151841f8d2d2abc38c71e87ccdffaac2be2dc
parent9ab500036bdfa87215e9a05fc167c4d9dea8e437 (diff)
Add visual commit indicator to OBS browser source
Circle goes red when speaking, grey when done. Ideally it would be in the top right portion of the browser source, but this is a good start. Also, hard-cap transcripts to 4096 chars. This prevents the STT from lagging during long sessions.
-rw-r--r--BrowserSource/index.html28
-rw-r--r--GUI/GUI/GUI/BrowserSource.cpp5
-rw-r--r--GUI/GUI/GUI/Frame.cpp40
-rw-r--r--GUI/GUI/GUI/Transcript.cpp10
-rw-r--r--GUI/GUI/GUI/Transcript.h7
-rw-r--r--Scripts/transcribe.py14
6 files changed, 86 insertions, 18 deletions
diff --git a/BrowserSource/index.html b/BrowserSource/index.html
index 000f366..96692cf 100644
--- a/BrowserSource/index.html
+++ b/BrowserSource/index.html
@@ -17,6 +17,24 @@
color: #89CFF0;
-webkit-text-stroke: 3.0px #000;
}
+ .red_circle {
+ height: 50px;
+ width: 50px;
+ background-color: red;
+ border-radius: 50%;
+ display: inline-block;
+ vertical-align: middle;
+ margin: 20px;
+ }
+ .grey_circle {
+ height: 50px;
+ width: 50px;
+ background-color: grey;
+ border-radius: 50%;
+ display: inline-block;
+ vertical-align: middle;
+ margin: 20px;
+ }
</style>
<body>
<div id="transcript"></div>
@@ -31,7 +49,15 @@
dataType: 'json',
success: function(data) {
var transcript = data.transcript;
- $('#transcript').html(transcript);
+ var red_circle = '<span class="red_circle"></span>';
+ var grey_circle = '<span class="grey_circle"></span>';
+ var transcript_w_circle = transcript;
+ if (data.is_final == 1) {
+ transcript_w_circle += grey_circle;
+ } else {
+ transcript_w_circle += red_circle;
+ }
+ $('#transcript').html(transcript_w_circle);
$('#transcript').css("background-color", "#00000080");
},
error: function(jqXHR, textStatus, errorThrown) {
diff --git a/GUI/GUI/GUI/BrowserSource.cpp b/GUI/GUI/GUI/BrowserSource.cpp
index 62e3e43..45ca7f9 100644
--- a/GUI/GUI/GUI/BrowserSource.cpp
+++ b/GUI/GUI/GUI/BrowserSource.cpp
@@ -55,9 +55,12 @@ void BrowserSource::Run(volatile bool* run)
transcript_oss << segment;
}
+ bool is_final = transcript_->IsFinalized();
+
std::ostringstream resp_oss;
resp_oss << "{";
- resp_oss << "\"transcript\":\"" << transcript_oss.str() << "\"";
+ resp_oss << "\"transcript\":\"" << transcript_oss.str() << "\",";
+ resp_oss << "\"is_final\":" << std::to_string(is_final ? 1 : 0) << "";
resp_oss << "}";
payload = resp_oss.str();
type = WebServer::JSON;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 236d375..e6506bd 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -8,6 +8,7 @@
#include <filesystem>
#include <fstream>
#include <regex>
+#include <sstream>
#include <string>
#include <vector>
#include <wx/filepicker.h>
@@ -2164,20 +2165,31 @@ void Frame::OnAppStart(wxCommandEvent& event) {
app_c_->keybind = keybind;
app_c_->Serialize(AppConfig::kConfigPath);
- auto out_cb = [&](const std::string& out, const std::string& err) {
- Log(transcribe_out_, "{}", out);
- Log(transcribe_out_, "{}", err);
-
- std::regex pattern("^Transcription \\(([0-9]*\\.[0-9]+) seconds\\):");
- if (std::regex_search(out, pattern)) {
- std::string filtered_transcript = std::regex_replace(out, pattern, "");
- filtered_transcript.erase(std::remove_if(filtered_transcript.begin(), filtered_transcript.end(), [](char c) {
- return c == '\n' || c == '\r';
- }), filtered_transcript.end());
- //Log(transcribe_out_, "Got transcription line! Transcript: \"{}\"", filtered_transcript);
- transcript_.Set(std::move(filtered_transcript));
- }
- };
+ auto out_cb = [&](const std::string& out, const std::string& err) {
+ Log(transcribe_out_, "{}", out);
+ Log(transcribe_out_, "{}", err);
+
+ std::istringstream out_iss(out);
+ std::string out_line;
+ while (std::getline(out_iss, out_line)) {
+ if (out_line.starts_with("Finalized: 1")) {
+ transcript_.SetFinalized(true);
+ }
+ else if (out_line.starts_with("Finalized: 0")) {
+ transcript_.SetFinalized(false);
+ }
+
+ std::regex pattern("^Transcription \\(([0-9]*\\.[0-9]+) seconds\\):");
+ if (std::regex_search(out_line, pattern)) {
+ std::string filtered_transcript = std::regex_replace(out_line, pattern, "");
+ filtered_transcript.erase(std::remove_if(filtered_transcript.begin(), filtered_transcript.end(), [](char c) {
+ return c == '\n' || c == '\r';
+ }), filtered_transcript.end());
+ //Log(transcribe_out_, "Got transcription line! Transcript: \"{}\"", filtered_transcript);
+ transcript_.Set(std::move(filtered_transcript));
+ }
+ }
+ };
auto in_cb = [&](std::string& in) {};
auto run_cb = [&]() {
return run_py_app_;
diff --git a/GUI/GUI/GUI/Transcript.cpp b/GUI/GUI/GUI/Transcript.cpp
index 9ef607f..e635343 100644
--- a/GUI/GUI/GUI/Transcript.cpp
+++ b/GUI/GUI/GUI/Transcript.cpp
@@ -20,3 +20,13 @@ std::vector<std::string> Transcript::Get() {
std::scoped_lock l(mu_);
return segments_;
}
+
+void Transcript::SetFinalized(bool is_finalized) {
+ // Accessing anything smaller than a word is always atomic.
+ is_finalized_ = is_finalized;
+}
+
+bool Transcript::IsFinalized() {
+ // Accessing anything smaller than a word is always atomic.
+ return is_finalized_;
+}
diff --git a/GUI/GUI/GUI/Transcript.h b/GUI/GUI/GUI/Transcript.h
index fae2bad..07cf6c0 100644
--- a/GUI/GUI/GUI/Transcript.h
+++ b/GUI/GUI/GUI/Transcript.h
@@ -13,9 +13,16 @@ public:
void Set(std::string&& segment);
void Clear();
+ // Indicate whether the transcript is "finalized", i.e. the transcription
+ // engine has committed the entirety of the transcript and will no longer
+ // change it.
+ void SetFinalized(bool is_finalized);
+
std::vector<std::string> Get();
+ bool IsFinalized();
private:
std::mutex mu_;
std::vector<std::string> segments_;
+ bool is_finalized_{ false };
};
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index c249e43..039811c 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -364,6 +364,18 @@ def transcribeAudio(audio_state,
audio_state.text += text
audio_state.preview_text = audio_state.text + preview_text
+ if len(preview_text) == 0:
+ print("Finalized: 1")
+ else:
+ print("Finalized: 0")
+
+ # Hard cap transcript at 4096 chars. Letting it grow longer than this
+ # eventually causes lag. This happens routinely when streaming. Capping
+ # like this does not affect the visible portion of the transcript in
+ # OBS, but it might affect the visible portion in-game. (Don't make
+ # your friends read more than 4k characters on a fucking chatbox.)
+ audio_state.text = audio_state.text[-4096:]
+
now = time.time()
if audio_state.enable_debug_mode:
print("Raw transcription ({} seconds): {}".format(
@@ -739,8 +751,6 @@ def transcribeLoop(mic: str,
print(f"Translation ready to go")
- print("Safe to start talking")
-
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
model_root = os.path.join(dname, "Models", model)