summaryrefslogtreecommitdiffstats
path: root/BrowserSource/Proxy/HTTPParser.cpp
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-07-03 18:44:43 -0700
committeryum <yum.food.vr@gmail.com>2023-07-03 19:36:13 -0700
commit76ae7c28ea6224b2c919122d5dc71bcc00a0ecaa (patch)
tree9723fd02715d747cfc439d1f66d36821a56069e9 /BrowserSource/Proxy/HTTPParser.cpp
parent7888ccc96d001512dd3bdc01f299856e86c876f5 (diff)
Begin work on proxy server
Create a simple server with 3 endpoints: * /create_session: Create a session and return its identifier. * /set_transcript: Update a session's transcript. * /get_transcript: Fetch a session's transcript. Right now the session ID provides authentication *and* authorization. There is no public/private ID so you have to trust whoever you share your ID with. IDs are long and generated by the server, so it should be somewhat secure against low-effort hacking. Other updates: * Drop whisper_requirements.txt - no longer needed. * Vendor curl to make it easier to interact with the server. TODO: * Fuzz test the server.
Diffstat (limited to 'BrowserSource/Proxy/HTTPParser.cpp')
-rw-r--r--BrowserSource/Proxy/HTTPParser.cpp229
1 files changed, 229 insertions, 0 deletions
diff --git a/BrowserSource/Proxy/HTTPParser.cpp b/BrowserSource/Proxy/HTTPParser.cpp
new file mode 100644
index 0000000..4f6c850
--- /dev/null
+++ b/BrowserSource/Proxy/HTTPParser.cpp
@@ -0,0 +1,229 @@
+#include "HTTPParser.h"
+#include "Logging.h"
+#include "ScopeGuard.h"
+
+#include <sstream>
+#include <string.h>
+#include <string_view>
+
+using ::Logging::Log;
+
+namespace WebServer {
+ HTTPParser::HTTPParser() {}
+
+ namespace {
+ constexpr const char kLineDelim[] = "\r\n";
+ constexpr const char kHeadersDelim[] = "\r\n\r\n";
+ constexpr const char kRfcLWS[] = " \t\r\n";
+ };
+
+ bool HTTPParser::Parse(const std::string& raw_http, std::string& err) {
+ std::ostringstream err_oss;
+ ScopeGuard err_oss_flush([&]() { err += err_oss.str(); });
+
+ ParserState state = PARSER_STATE_START_LINE;
+ size_t pos = 0;
+ while (pos < raw_http.length()) {
+ size_t end;
+ switch (state) {
+ case PARSER_STATE_START_LINE:
+ end = raw_http.find(kLineDelim, pos);
+ break;
+ case PARSER_STATE_HEADERS:
+ end = raw_http.find(kHeadersDelim, pos);
+ break;
+ case PARSER_STATE_PAYLOAD:
+ end = raw_http.length();
+ break;
+ }
+ ScopeGuard advance_pos([&]() { pos = end + 1; });
+ if (end == std::string::npos) {
+ err_oss << "Failed to parse HTTP in state " << state << ": No delimiter!" << std::endl;
+ return false;
+ }
+ std::string_view segment(raw_http.data() + pos, end - pos);
+ if (!ParseSegment(segment, state, err)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ const std::string& HTTPParser::GetMethod() const {
+ return method_;
+ }
+
+ const std::string& HTTPParser::GetPath() const {
+ return path_;
+ }
+
+ bool HTTPParser::GetHeader(const std::string& header, std::string& value) const {
+ auto iter = headers_.find(header);
+ if (iter == headers_.end()) {
+ return false;
+ }
+ value = iter->second;
+ return true;
+ }
+
+ const std::map<std::string, std::string>& HTTPParser::GetHeaders() const {
+ return headers_;
+ }
+
+ const std::string& HTTPParser::GetPayload() const {
+ return payload_;
+ }
+
+ bool HTTPParser::ParseSegment(
+ const std::string_view segment,
+ ParserState& state,
+ std::string& err) {
+ std::ostringstream err_oss;
+ ScopeGuard err_oss_flush([&]() { err += err_oss.str(); });
+ switch (state) {
+ case PARSER_STATE_START_LINE:
+ return ParseStartLine(segment, state, err);
+ case PARSER_STATE_HEADERS:
+ return ParseHeaders(segment, state, err);
+ case PARSER_STATE_PAYLOAD:
+ return ParsePayload(segment, state, err);
+ }
+ }
+
+ enum StartLineParserState {
+ START_LINE_PARSER_STATE_METHOD,
+ START_LINE_PARSER_STATE_PATH,
+ START_LINE_PARSER_STATE_VERSION,
+ START_LINE_PARSER_STATE_END,
+ };
+ // Source: RFC 2616 section 5.1.1.
+ bool HTTPParser::ParseStartLine(
+ const std::string_view segment,
+ ParserState& state,
+ std::string& err) {
+ std::ostringstream err_oss;
+ ScopeGuard err_oss_flush([&]() { err += err_oss.str(); });
+
+ // Request-Line = Method SP Request-URI SP HTTP-Version CRLF
+ // SP == space.
+ // Thus we expect to see exactly three space-delimited chunks.
+ StartLineParserState cur_state = START_LINE_PARSER_STATE_METHOD;
+ size_t pos = 0;
+ while (pos < segment.length()) {
+ size_t end = segment.find(' ', pos);
+ if (end == std::string::npos) {
+ end = segment.length();
+ }
+ ScopeGuard advance_pos([&]() { pos = end + 1; });
+
+ std::string_view cur_segment(segment.data() + pos, end - pos);
+ switch (cur_state) {
+ case START_LINE_PARSER_STATE_METHOD:
+ method_ = cur_segment;
+ cur_state = START_LINE_PARSER_STATE_PATH;
+ continue;
+ case START_LINE_PARSER_STATE_PATH:
+ path_ = cur_segment;
+ cur_state = START_LINE_PARSER_STATE_VERSION;
+ continue;
+ case START_LINE_PARSER_STATE_VERSION:
+ // TODO(yum) check this
+ cur_state = START_LINE_PARSER_STATE_END;
+ continue;
+ case START_LINE_PARSER_STATE_END:
+ err_oss << "Invalid start line: has too many parts: " << segment << std::endl;
+ return false;
+ }
+ }
+ if (cur_state != START_LINE_PARSER_STATE_END) {
+ err_oss << "Invalid start line: missing parts: " << segment << std::endl;
+ return false;
+ }
+
+ state = PARSER_STATE_HEADERS;
+ return true;
+ }
+
+ // Source: RFC 2616 section 4.2.
+ bool HTTPParser::ParseHeaders(
+ const std::string_view segment,
+ ParserState& state,
+ std::string& err) {
+ std::ostringstream err_oss;
+ ScopeGuard err_oss_flush([&]() { err += err_oss.str(); });
+
+ // From the RFC:
+ // message-header = field-name ":" [ field-value ]
+ // field-name = token
+ // field-value = *(field-content | LWS)
+ // field-content = <the OCTETs making up the field - value
+ // and consisting of either * TEXT or combinations
+ // of token, separators, and quoted-string>
+ // Takewaways:
+ // * field-name is guaranteed to not be preceded by whitespace
+ // * field-name is guaranteed to be followed by ":"
+ // * field-value may be preceded by LWS
+ // * multi-line field-values are guaranteed to start with either ' '
+ // or '\t'
+ size_t pos = 0;
+ std::string key, value;
+ while (pos < segment.length()) {
+ // Divide into lines.
+ size_t end = segment.find(kLineDelim, pos);
+ if (end == std::string::npos) {
+ end = segment.length();
+ }
+ ScopeGuard advance_pos([&]() { pos = end + 1; });
+
+ std::string_view line = segment.substr(pos, end - pos);
+ if (line.empty()) {
+ continue;
+ }
+
+ // Lengthen the current line to cover multi-line header.
+ while (end + 1 < segment.length() &&
+ (segment[end + 1] == ' ' || segment[end + 1] == '\t')) {
+ end = segment.find("\r\n", end + 1);
+ }
+
+ size_t sep = line.find(':');
+ if (sep == std::string::npos) {
+ err_oss << "Invalid header: No ':' delimiter: " << segment << std::endl;
+ return false;
+ }
+
+ std::string_view key = line.substr(0, sep);
+ size_t key_start = key.find_first_not_of(kRfcLWS);
+ size_t key_end = key.find_last_not_of(kRfcLWS);
+ key = key.substr(key_start, (key_end - key_start) + 1);
+ // Value may contain interspersed LWS (linear whitespace).
+ // Could scrub it out, but not necessary for our purposes.
+ std::string_view value = line.substr(sep + 1);
+ size_t value_start = value.find_first_not_of(kRfcLWS);
+ size_t value_end = value.find_last_not_of(kRfcLWS);
+ value = value.substr(value_start, (value_end - value_start) + 1);
+
+ headers_[std::string(key)] = value;
+ }
+
+ state = PARSER_STATE_PAYLOAD;
+ return true;
+ }
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-parameter"
+ bool HTTPParser::ParsePayload(
+ const std::string_view segment,
+ ParserState& state,
+ std::string& err) {
+
+ const char kScuffedHeadersDelim[] = "\n\r\n";
+ if (!segment.starts_with(kScuffedHeadersDelim)) {
+ return true;
+ }
+
+ payload_ = segment.substr(strlen(kScuffedHeadersDelim));
+ return true;
+ }
+#pragma clang diagnostic pop
+}