diff options
| author | yum <yum.food.vr@gmail.com> | 2023-02-24 14:35:02 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-02-24 14:35:02 -0800 |
| commit | 8170273eb786d52aa9a9c1cdbef1357c32d95471 (patch) | |
| tree | 0a19c8632beec6e51e51582a004bed21b0e04041 | |
| parent | c18b6842757e1a40f27bd940a810e32f3a0c485c (diff) | |
Finish browser source proof-of-concept
It's a crashy mess, but it sort of works.
* Add Transcript class to send transcription segments between layers
| -rw-r--r-- | BrowserSource/index.html | 13 | ||||
| -rw-r--r-- | GUI/GUI/GUI/BrowserSource.cpp | 6 | ||||
| -rw-r--r-- | GUI/GUI/GUI/BrowserSource.h | 21 | ||||
| -rw-r--r-- | GUI/GUI/GUI/GUI.vcxproj | 2 | ||||
| -rw-r--r-- | GUI/GUI/GUI/GUI.vcxproj.filters | 6 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Transcript.cpp | 16 | ||||
| -rw-r--r-- | GUI/GUI/GUI/Transcript.h | 20 | ||||
| -rw-r--r-- | GUI/GUI/GUI/WhisperCPP.cpp | 16 | ||||
| -rw-r--r-- | GUI/GUI/GUI/WhisperCPP.h | 3 |
9 files changed, 88 insertions, 15 deletions
diff --git a/BrowserSource/index.html b/BrowserSource/index.html index decdba2..29cffbf 100644 --- a/BrowserSource/index.html +++ b/BrowserSource/index.html @@ -3,10 +3,22 @@ <head> <title>TaSTT</title> <script src="https://code.jquery.com/jquery-3.6.3.min.js"></script> + <link rel="stylesheet" + href="https://fonts.googleapis.com/css?family=Noto+Sans+Mono&display=swap"> </head> + <style> + body { + font-family: 'Noto Sans Mono', monospace; + font-size: 48px; + font-weight: 700; + } + </style> <body> <div id="transcript"></div> <script> + function scrollToBottom() { + window.scrollTo(0,document.body.scrollHeight); + } function getTranscript() { $.ajax({ // TODO(yum) parameterize the port @@ -21,6 +33,7 @@ console.error('Error getting transcript: ', textStatus, errorThrown); } }); + scrollToBottom(); } setInterval(getTranscript, /*interval_ms=*/100); </script> diff --git a/GUI/GUI/GUI/BrowserSource.cpp b/GUI/GUI/GUI/BrowserSource.cpp index 9bf19cb..2403411 100644 --- a/GUI/GUI/GUI/BrowserSource.cpp +++ b/GUI/GUI/GUI/BrowserSource.cpp @@ -6,8 +6,8 @@ using ::Logging::Log;
-BrowserSource::BrowserSource(uint16_t port, wxTextCtrl *out)
- : port_(port), out_(out)
+BrowserSource::BrowserSource(uint16_t port, wxTextCtrl *out, Transcript *transcript)
+ : port_(port), out_(out), transcript_(transcript)
{}
void BrowserSource::Run(volatile bool* run)
@@ -37,7 +37,7 @@ void BrowserSource::Run(volatile bool* run) OATPP_CREATE_COMPONENT(std::shared_ptr<oatpp::web::server::HttpRouter>, httpRouter)([] {
return oatpp::web::server::HttpRouter::createShared();
}());
- httpRouter.getObject()->addController(std::make_shared<AppController>(apiObjectMapper.getObject()));
+ httpRouter.getObject()->addController(std::make_shared<AppController>(apiObjectMapper.getObject(), transcript_));
OATPP_CREATE_COMPONENT(std::shared_ptr<oatpp::network::ConnectionHandler>, serverConnectionHandler)([&] {
return oatpp::web::server::HttpConnectionHandler::createShared(httpRouter.getObject());
diff --git a/GUI/GUI/GUI/BrowserSource.h b/GUI/GUI/GUI/BrowserSource.h index 6387d1c..25b9aa5 100644 --- a/GUI/GUI/GUI/BrowserSource.h +++ b/GUI/GUI/GUI/BrowserSource.h @@ -15,6 +15,8 @@ #include "oatpp/web/server/HttpConnectionHandler.hpp"
#include "oatpp/web/protocol/http/incoming/Request.hpp"
+#include "Transcript.h"
+
#include <stdint.h>
#include <filesystem>
@@ -37,15 +39,20 @@ class AppDto : public oatpp::DTO class AppController : public oatpp::web::server::api::ApiController
{
public:
- AppController(std::shared_ptr<ObjectMapper> objectMapper)
- : oatpp::web::server::api::ApiController(objectMapper)
+ AppController(std::shared_ptr<ObjectMapper> objectMapper, Transcript* transcript)
+ : oatpp::web::server::api::ApiController(objectMapper), transcript_(transcript)
{}
-public:
ENDPOINT("GET", "/api/transcript", transcription) {
auto dto = AppDto::createShared();
dto->statusCode = 200;
- dto->transcript = "Hello World!";
+
+ std::ostringstream oss;
+ std::vector<std::string> segments = transcript_->Get();
+ for (const auto& seg : segments) {
+ oss << seg;
+ }
+ dto->transcript = oss.str();
return createDtoResponse(Status::CODE_200, dto);
}
@@ -57,6 +64,9 @@ public: html_ifs.read(resp.data(), resp.size());
return createResponse(Status::CODE_200, resp.data());
}
+
+private:
+ Transcript* const transcript_;
};
#include OATPP_CODEGEN_END(ApiController)
@@ -64,12 +74,13 @@ public: class BrowserSource
{
public:
- BrowserSource(uint16_t port, wxTextCtrl *out);
+ BrowserSource(uint16_t port, wxTextCtrl *out, Transcript *transcript);
void Run(volatile bool* run);
private:
const uint16_t port_;
wxTextCtrl* const out_;
+ Transcript* const transcript_;
};
diff --git a/GUI/GUI/GUI/GUI.vcxproj b/GUI/GUI/GUI/GUI.vcxproj index e3fac34..568b06f 100644 --- a/GUI/GUI/GUI/GUI.vcxproj +++ b/GUI/GUI/GUI/GUI.vcxproj @@ -157,6 +157,7 @@ <ClCompile Include="Logging.cpp" />
<ClCompile Include="main.cpp" />
<ClCompile Include="PythonWrapper.cpp" />
+ <ClCompile Include="Transcript.cpp" />
<ClCompile Include="WhisperCPP.cpp" />
</ItemGroup>
<ItemGroup>
@@ -169,6 +170,7 @@ <ClInclude Include="resource.h" />
<ClInclude Include="ryml.h" />
<ClInclude Include="ScopeGuard.h" />
+ <ClInclude Include="Transcript.h" />
<ClInclude Include="Util.h" />
<ClInclude Include="WhisperCPP.h" />
</ItemGroup>
diff --git a/GUI/GUI/GUI/GUI.vcxproj.filters b/GUI/GUI/GUI/GUI.vcxproj.filters index 2d87bd3..aa2e6d1 100644 --- a/GUI/GUI/GUI/GUI.vcxproj.filters +++ b/GUI/GUI/GUI/GUI.vcxproj.filters @@ -39,6 +39,9 @@ <ClCompile Include="BrowserSource.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="Transcript.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="PythonWrapper.h">
@@ -74,6 +77,9 @@ <ClInclude Include="BrowserSource.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="Transcript.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="GUI.rc">
diff --git a/GUI/GUI/GUI/Transcript.cpp b/GUI/GUI/GUI/Transcript.cpp new file mode 100644 index 0000000..30f1f76 --- /dev/null +++ b/GUI/GUI/GUI/Transcript.cpp @@ -0,0 +1,16 @@ +#include "Transcript.h"
+
+void Transcript::Append(std::string&& segment) {
+ std::scoped_lock l(mu_);
+ segments_.push_back(std::move(segment));
+}
+
+void Transcript::Clear() {
+ std::scoped_lock l(mu_);
+ segments_.clear();
+}
+
+std::vector<std::string> Transcript::Get() {
+ std::scoped_lock l(mu_);
+ return segments_;
+}
diff --git a/GUI/GUI/GUI/Transcript.h b/GUI/GUI/GUI/Transcript.h new file mode 100644 index 0000000..09858b0 --- /dev/null +++ b/GUI/GUI/GUI/Transcript.h @@ -0,0 +1,20 @@ +#pragma once
+
+#include <mutex>
+#include <string>
+#include <vector>
+
+// Simple thread-safe class to share transcription data between layers.
+class Transcript {
+public:
+ Transcript() = default;
+
+ void Append(std::string&& segment);
+ void Clear();
+
+ std::vector<std::string> Get();
+
+private:
+ std::mutex mu_;
+ std::vector<std::string> segments_;
+};
diff --git a/GUI/GUI/GUI/WhisperCPP.cpp b/GUI/GUI/GUI/WhisperCPP.cpp index 2e872f9..e8ed4ef 100644 --- a/GUI/GUI/GUI/WhisperCPP.cpp +++ b/GUI/GUI/GUI/WhisperCPP.cpp @@ -238,6 +238,7 @@ bool WhisperCPP::CreateContext(Whisper::iModel* model, Whisper::iContext*& conte void WhisperCPP::Start(const AppConfig& c) {
Init();
+ transcript_.Clear();
if (!transcription_thd_.valid()) {
Log(out_, "Transcription engine already running\n");
@@ -301,11 +302,11 @@ void WhisperCPP::Start(const AppConfig& c) { wparams.n_max_text_ctx = 100;
wparams.new_segment_callback = [](iContext* context, uint32_t n_new, void* user_data) noexcept -> HRESULT {
- wxTextCtrl* out = static_cast<wxTextCtrl*>(user_data);
+ WhisperCPP* app = static_cast<WhisperCPP*>(user_data);
iTranscribeResult* results = nullptr;
HRESULT err = context->getResults(eResultFlags::Timestamps | eResultFlags::Tokens, &results);
if (FAILED(err)) {
- Log(out, "Failed to get transcription: {}\n", hresultToString(err));
+ Log(app->out_, "Failed to get transcription: {}\n", hresultToString(err));
return S_OK;
}
ScopeGuard results_cleanup([results]() { results->Release(); });
@@ -313,7 +314,7 @@ void WhisperCPP::Start(const AppConfig& c) { sTranscribeLength length;
err = results->getSize(length);
if (FAILED(err)) {
- Log(out, "Failed to get transcription size: {}\n", hresultToString(err));
+ Log(app->out_, "Failed to get transcription size: {}\n", hresultToString(err));
return S_OK;
}
@@ -366,16 +367,17 @@ void WhisperCPP::Start(const AppConfig& c) { continue;
}
#endif
- Log(out, "{}", tok.text);
+ Log(app->out_, "{}", tok.text);
+ app->transcript_.Append(tok.text);
}
}
if (n_new) {
- Log(out, "\n");
+ Log(app->out_, "\n");
}
return S_OK;
};
- wparams.new_segment_callback_user_data = out_;
+ wparams.new_segment_callback_user_data = this;
sCaptureCallbacks callbacks{};
@@ -418,7 +420,7 @@ void WhisperCPP::StartBrowserSource(const AppConfig& c) { browser_src_thd_ = std::async(std::launch::async, [&]() -> void {
run_browser_src_ = true;
- BrowserSource src(c.browser_src_port, out_);
+ BrowserSource src(c.browser_src_port, out_, &transcript_);
src.Run(&run_browser_src_);
Log(out_, "Browser source thread exit\n");
});
diff --git a/GUI/GUI/GUI/WhisperCPP.h b/GUI/GUI/GUI/WhisperCPP.h index c6e1011..fbaab46 100644 --- a/GUI/GUI/GUI/WhisperCPP.h +++ b/GUI/GUI/GUI/WhisperCPP.h @@ -13,6 +13,7 @@ #include "whisper/whisperWindows.h"
#include "Config.h"
+#include "Transcript.h"
#include <filesystem>
#include <functional>
@@ -52,4 +53,6 @@ private: std::future<void> browser_src_thd_;
volatile bool run_browser_src_;
+
+ Transcript transcript_;
};
|
