From 353e47011450e04a089121db6ff3b83d50209487 Mon Sep 17 00:00:00 2001 From: yum Date: Sat, 25 Feb 2023 14:56:23 -0800 Subject: Add HTTP parser Server needs to parse incoming HTTP. * Server spawns a thread for each incoming connection --- GUI/GUI/GUI/GUI.vcxproj | 2 + GUI/GUI/GUI/GUI.vcxproj.filters | 6 ++ GUI/GUI/GUI/HTTPParser.cpp | 217 ++++++++++++++++++++++++++++++++++++++++ GUI/GUI/GUI/HTTPParser.h | 51 ++++++++++ GUI/GUI/GUI/WebServer.cpp | 74 ++++++++++++-- GUI/GUI/GUI/WebServer.h | 5 + 6 files changed, 349 insertions(+), 6 deletions(-) create mode 100644 GUI/GUI/GUI/HTTPParser.cpp create mode 100644 GUI/GUI/GUI/HTTPParser.h (limited to 'GUI') diff --git a/GUI/GUI/GUI/GUI.vcxproj b/GUI/GUI/GUI/GUI.vcxproj index 1f14088..f683c4a 100644 --- a/GUI/GUI/GUI/GUI.vcxproj +++ b/GUI/GUI/GUI/GUI.vcxproj @@ -155,6 +155,7 @@ + @@ -168,6 +169,7 @@ + diff --git a/GUI/GUI/GUI/GUI.vcxproj.filters b/GUI/GUI/GUI/GUI.vcxproj.filters index 43f6791..49b2a85 100644 --- a/GUI/GUI/GUI/GUI.vcxproj.filters +++ b/GUI/GUI/GUI/GUI.vcxproj.filters @@ -51,6 +51,9 @@ WebServer + + WebServer + @@ -98,6 +101,9 @@ WebServer + + WebServer + diff --git a/GUI/GUI/GUI/HTTPParser.cpp b/GUI/GUI/GUI/HTTPParser.cpp new file mode 100644 index 0000000..3e15afa --- /dev/null +++ b/GUI/GUI/GUI/HTTPParser.cpp @@ -0,0 +1,217 @@ +#include "HTTPParser.h" +#include "ScopeGuard.h" + +#include + +namespace WebServer { + HTTPParser::HTTPParser() {} + + namespace { + constexpr const char kLineDelim[] = "\r\n"; + constexpr const char kHeadersDelim[] = "\r\n\r\n"; + constexpr const char kRfcSP[] = " "; + constexpr const char kRfcHT[] = "\t"; + constexpr const char kRfcLWS[] = " \t\r\n"; + }; + + bool HTTPParser::Parse(const std::string& raw_http, std::string& err) { + std::ostringstream err_oss; + ScopeGuard err_oss_flush([&]() { err += err_oss.str(); }); + + ParserState state = PARSER_STATE_START_LINE; + size_t pos = 0; + while (pos < raw_http.length()) { + size_t end; + switch (state) { + case PARSER_STATE_START_LINE: + end = raw_http.find(kLineDelim, pos); + break; + case PARSER_STATE_HEADERS: + end = raw_http.find(kHeadersDelim, pos); + break; + case PARSER_STATE_PAYLOAD: + end = raw_http.length(); + break; + } + ScopeGuard advance_pos([&]() { pos = end + 1; }); + if (end == std::string::npos) { + err_oss << "Failed to parse HTTP in state " << state << ": No delimiter!" << std::endl; + return false; + } + std::string_view segment(raw_http.data() + pos, end - pos); + if (!ParseSegment(segment, state, err)) { + return false; + } + } + return true; + } + + const std::string& HTTPParser::GetMethod() const { + return method_; + } + + const std::string& HTTPParser::GetPath() const { + return path_; + } + + bool HTTPParser::GetHeader(const std::string& header, std::string& value) const { + auto iter = headers_.find(header); + if (iter == headers_.end()) { + return false; + } + value = iter->second; + return true; + } + + const std::map& HTTPParser::GetHeaders() const { + return headers_; + } + + const std::string& HTTPParser::GetPayload() const { + return payload_; + } + + bool HTTPParser::ParseSegment( + const std::string_view segment, + ParserState& state, + std::string& err) { + std::ostringstream err_oss; + ScopeGuard err_oss_flush([&]() { err += err_oss.str(); }); + switch (state) { + case PARSER_STATE_START_LINE: + return ParseStartLine(segment, state, err); + case PARSER_STATE_HEADERS: + return ParseHeaders(segment, state, err); + case PARSER_STATE_PAYLOAD: + return ParsePayload(segment, state, err); + } + } + + enum StartLineParserState { + START_LINE_PARSER_STATE_METHOD, + START_LINE_PARSER_STATE_PATH, + START_LINE_PARSER_STATE_VERSION, + START_LINE_PARSER_STATE_END, + }; + // Source: RFC 2616 section 5.1.1. + bool HTTPParser::ParseStartLine( + const std::string_view segment, + ParserState& state, + std::string& err) { + std::ostringstream err_oss; + ScopeGuard err_oss_flush([&]() { err += err_oss.str(); }); + + // Request-Line = Method SP Request-URI SP HTTP-Version CRLF + // SP == space. + // Thus we expect to see exactly three space-delimited chunks. + StartLineParserState cur_state = START_LINE_PARSER_STATE_METHOD; + size_t pos = 0; + while (pos < segment.length()) { + size_t end = segment.find(' ', pos); + if (end == std::string::npos) { + end = segment.length(); + } + ScopeGuard advance_pos([&]() { pos = end + 1; }); + + std::string_view cur_segment(segment.data() + pos, end - pos); + switch (cur_state) { + case START_LINE_PARSER_STATE_METHOD: + method_ = cur_segment; + cur_state = START_LINE_PARSER_STATE_PATH; + continue; + case START_LINE_PARSER_STATE_PATH: + path_ = cur_segment; + cur_state = START_LINE_PARSER_STATE_VERSION; + continue; + case START_LINE_PARSER_STATE_VERSION: + // TODO(yum) check this + cur_state = START_LINE_PARSER_STATE_END; + continue; + case START_LINE_PARSER_STATE_END: + err_oss << "Invalid start line: has too many parts: " << segment << std::endl; + return false; + } + } + if (cur_state != START_LINE_PARSER_STATE_END) { + err_oss << "Invalid start line: missing parts: " << segment << std::endl; + return false; + } + + state = PARSER_STATE_HEADERS; + return true; + } + + // Source: RFC 2616 section 4.2. + bool HTTPParser::ParseHeaders( + const std::string_view segment, + ParserState& state, + std::string& err) { + std::ostringstream err_oss; + ScopeGuard err_oss_flush([&]() { err += err_oss.str(); }); + + // From the RFC: + // message-header = field-name ":" [ field-value ] + // field-name = token + // field-value = *(field-content | LWS) + // field-content = + // Takewaways: + // * field-name is guaranteed to not be preceded by whitespace + // * field-name is guaranteed to be followed by ":" + // * field-value may be preceded by LWS + // * multi-line field-values are guaranteed to start with either ' ' + // or '\t' + size_t pos = 0; + std::string key, value; + while (pos < segment.length()) { + // Divide into lines. + size_t end = segment.find(kLineDelim, pos); + if (end == std::string::npos) { + end = segment.length(); + } + ScopeGuard advance_pos([&]() { pos = end + 1; }); + + std::string_view line = segment.substr(pos, end - pos); + if (line.empty()) { + continue; + } + + // Lengthen the current line to cover multi-line header. + while (end + 1 < segment.length() && + (segment[end + 1] == ' ' || segment[end + 1] == '\t')) { + end = segment.find("\r\n", end + 1); + } + + size_t sep = line.find(':'); + if (sep == std::string::npos) { + err_oss << "Invalid header: No ':' delimiter: " << segment << std::endl; + return false; + } + + std::string_view key = line.substr(0, sep); + size_t key_start = key.find_first_not_of(kRfcLWS); + size_t key_end = key.find_last_not_of(kRfcLWS); + key = key.substr(key_start, (key_end - key_start) + 1); + // Value may contain interspersed LWS (linear whitespace). + // Could scrub it out, but not necessary for our purposes. + std::string_view value = line.substr(sep + 1); + size_t value_start = value.find_first_not_of(kRfcLWS); + size_t value_end = value.find_last_not_of(kRfcLWS); + value = value.substr(value_start, (value_end - value_start) + 1); + + headers_[std::string(key)] = value; + } + + state = PARSER_STATE_PAYLOAD; + return true; + } + + bool HTTPParser::ParsePayload( + const std::string_view segment, + ParserState& state, + std::string& err) { + payload_ = segment; + return true; + } +} diff --git a/GUI/GUI/GUI/HTTPParser.h b/GUI/GUI/GUI/HTTPParser.h new file mode 100644 index 0000000..7fcfe0e --- /dev/null +++ b/GUI/GUI/GUI/HTTPParser.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include + +namespace WebServer { + + // A simple HTTP/1.1 message parser based on RFC 2616. + class HTTPParser + { + public: + HTTPParser(); + + bool Parse(const std::string& raw_http, std::string& err); + + const std::string& GetMethod() const; + const std::string& GetPath() const; + bool GetHeader(const std::string& header, std::string& value) const; + const std::map& GetHeaders() const; + const std::string& GetPayload() const; + + private: + enum ParserState { + PARSER_STATE_START_LINE, + PARSER_STATE_HEADERS, + PARSER_STATE_PAYLOAD, + }; + + bool ParseSegment( + const std::string_view segment, + ParserState& state, + std::string& err); + bool ParseStartLine( + const std::string_view segment, + ParserState& state, + std::string& err); + bool ParseHeaders( + const std::string_view segment, + ParserState& state, + std::string& err); + bool ParsePayload( + const std::string_view segment, + ParserState& state, + std::string& err); + + std::string method_; + std::string path_; + std::map headers_; + std::string payload_; + }; +} diff --git a/GUI/GUI/GUI/WebServer.cpp b/GUI/GUI/GUI/WebServer.cpp index 6cce5d6..0704950 100644 --- a/GUI/GUI/GUI/WebServer.cpp +++ b/GUI/GUI/GUI/WebServer.cpp @@ -4,6 +4,7 @@ #include #endif +#include "HTTPParser.h" #include "ScopeGuard.h" #include "WebServer.h" @@ -82,13 +83,74 @@ namespace WebServer { Log(out_, "Accept failed: {}\n", WSAGetLastError()); return false; } - ScopeGuard csock_cleanup([csock]() { closesocket(csock); }); - char peer_ip_str[INET_ADDRSTRLEN]{}; - inet_ntop(AF_INET, &peer_addr.sin_addr, peer_ip_str, sizeof(peer_ip_str)); - Log(out_, "Connection get: peer: {}:{}\n", peer_ip_str, ntohs(peer_addr.sin_port)); - // TODO(yum) parse and send a response - } + // TODO(yum) periodically cull connections_. + wxTextCtrl* out = out_; + const auto& dispatch_map = dispatch_map_; + connections_.push_back(std::async(std::launch::async, [csock, peer_addr, out, run, dispatch_map]() -> void { + ScopeGuard csock_cleanup([csock]() { closesocket(csock); }); + char peer_ip_str[INET_ADDRSTRLEN]{}; + inet_ntop(AF_INET, &peer_addr.sin_addr, peer_ip_str, sizeof(peer_ip_str)); + Log(out, "Connection get: peer: {}:{}\n", peer_ip_str, ntohs(peer_addr.sin_port)); + + std::string buf(4096 * 16, 0); + int cur_bytes_read = 0; + int sum_bytes_read = 0; + + bool abort_client = false; + while (*run) { + cur_bytes_read = recv(csock, buf.data() + sum_bytes_read, + buf.size() - (1 + sum_bytes_read), /*flags=*/0); + if (cur_bytes_read == SOCKET_ERROR) { + if (WSAGetLastError() == WSAEWOULDBLOCK) { + // Client may try to keep the connection open, + // so see if there's a complete request in the + // buffer. If so, terminate the recv loop. + HTTPParser p; + std::string err; + if (p.Parse(buf, err)) { + // In general we should verify that we got a + // full message, but since we only need to + // support GET, this is unnecessary. + cur_bytes_read = 0; + break; + } + continue; + } + break; + } + sum_bytes_read += cur_bytes_read; + if (cur_bytes_read == 0) { + break; + } + } + if (abort_client) { + return; + } + if (cur_bytes_read == SOCKET_ERROR) { + Log(out, "Failed to read client socket: {}\n", WSAGetLastError()); + return; + } + buf.resize(sum_bytes_read); + HTTPParser p; + std::string err; + if (!p.Parse(buf, err)) { + Log(out, "Failed to parse client request: {}\n", err); + Log(out, "Offending request:\n{}\n", buf); + return; + } + + dispatch_key_t dispatch_key = GetDispatchKey(p.GetMethod(), p.GetPath()); + auto iter = dispatch_map.find(dispatch_key); + if (iter == dispatch_map.end()) { + Log(out, "No route defined for client request: {} {}\n", + p.GetMethod(), p.GetPath()); + return; + } + + // TODO(yum) send a response + })); + } return true; } } diff --git a/GUI/GUI/GUI/WebServer.h b/GUI/GUI/GUI/WebServer.h index f66e0f7..96c4eb3 100644 --- a/GUI/GUI/GUI/WebServer.h +++ b/GUI/GUI/GUI/WebServer.h @@ -9,8 +9,11 @@ #include #include +#include #include +#include #include +#include #include "Logging.h" #include "WebCommon.h" @@ -45,6 +48,8 @@ namespace WebServer { wxTextCtrl* const out_; const uint16_t port_; + + std::vector> connections_; }; } -- cgit v1.2.3