summaryrefslogtreecommitdiffstats
path: root/BrowserSource/Proxy/HTTPParser.cpp
blob: 4f6c850e59cd41a8d32db64c3c195e2845fd68ff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#include "HTTPParser.h"
#include "Logging.h"
#include "ScopeGuard.h"

#include <sstream>
#include <string.h>
#include <string_view>

using ::Logging::Log;

namespace WebServer {
	HTTPParser::HTTPParser() {}

	namespace {
		constexpr const char kLineDelim[] = "\r\n";
		constexpr const char kHeadersDelim[] = "\r\n\r\n";
		constexpr const char kRfcLWS[] = " \t\r\n";
	};

	bool HTTPParser::Parse(const std::string& raw_http, std::string& err) {
		std::ostringstream err_oss;
		ScopeGuard err_oss_flush([&]() { err += err_oss.str(); });

		ParserState state = PARSER_STATE_START_LINE;
		size_t pos = 0;
		while (pos < raw_http.length()) {
			size_t end;
			switch (state) {
			case PARSER_STATE_START_LINE:
				end = raw_http.find(kLineDelim, pos);
				break;
			case PARSER_STATE_HEADERS:
				end = raw_http.find(kHeadersDelim, pos);
				break;
			case PARSER_STATE_PAYLOAD:
				end = raw_http.length();
				break;
			}
			ScopeGuard advance_pos([&]() { pos = end + 1; });
			if (end == std::string::npos) {
				err_oss << "Failed to parse HTTP in state " << state << ": No delimiter!" << std::endl;
				return false;
			}
			std::string_view segment(raw_http.data() + pos, end - pos);
			if (!ParseSegment(segment, state, err)) {
				return false;
			}
		}
		return true;
	}

	const std::string& HTTPParser::GetMethod() const {
		return method_;
	}

	const std::string& HTTPParser::GetPath() const {
		return path_;
	}

	bool HTTPParser::GetHeader(const std::string& header, std::string& value) const {
		auto iter = headers_.find(header);
		if (iter == headers_.end()) {
			return false;
		}
		value = iter->second;
		return true;
	}

	const std::map<std::string, std::string>& HTTPParser::GetHeaders() const {
		return headers_;
	}

	const std::string& HTTPParser::GetPayload() const {
		return payload_;
	}

	bool HTTPParser::ParseSegment(
		const std::string_view segment,
		ParserState& state,
		std::string& err) {
		std::ostringstream err_oss;
		ScopeGuard err_oss_flush([&]() { err += err_oss.str(); });
		switch (state) {
		case PARSER_STATE_START_LINE:
			return ParseStartLine(segment, state, err);
		case PARSER_STATE_HEADERS:
			return ParseHeaders(segment, state, err);
		case PARSER_STATE_PAYLOAD:
			return ParsePayload(segment, state, err);
		}
	}

	enum StartLineParserState {
		START_LINE_PARSER_STATE_METHOD,
		START_LINE_PARSER_STATE_PATH,
		START_LINE_PARSER_STATE_VERSION,
		START_LINE_PARSER_STATE_END,
	};
	// Source: RFC 2616 section 5.1.1.
	bool HTTPParser::ParseStartLine(
		const std::string_view segment,
		ParserState& state,
		std::string& err) {
		std::ostringstream err_oss;
		ScopeGuard err_oss_flush([&]() { err += err_oss.str(); });

		// Request-Line = Method SP Request-URI SP HTTP-Version CRLF
		// SP == space.
		// Thus we expect to see exactly three space-delimited chunks.
		StartLineParserState cur_state = START_LINE_PARSER_STATE_METHOD;
		size_t pos = 0;
		while (pos < segment.length()) {
			size_t end = segment.find(' ', pos);
			if (end == std::string::npos) {
				end = segment.length();
			}
			ScopeGuard advance_pos([&]() { pos = end + 1; });

			std::string_view cur_segment(segment.data() + pos, end - pos);
			switch (cur_state) {
			case START_LINE_PARSER_STATE_METHOD:
				method_ = cur_segment;
				cur_state = START_LINE_PARSER_STATE_PATH;
				continue;
			case START_LINE_PARSER_STATE_PATH:
				path_ = cur_segment;
				cur_state = START_LINE_PARSER_STATE_VERSION;
				continue;
			case START_LINE_PARSER_STATE_VERSION:
				// TODO(yum) check this
				cur_state = START_LINE_PARSER_STATE_END;
				continue;
			case START_LINE_PARSER_STATE_END:
				err_oss << "Invalid start line: has too many parts: " << segment << std::endl;
				return false;
			}
		}
		if (cur_state != START_LINE_PARSER_STATE_END) {
			err_oss << "Invalid start line: missing parts: " << segment << std::endl;
			return false;
		}

		state = PARSER_STATE_HEADERS;
		return true;
	}

	// Source: RFC 2616 section 4.2.
	bool HTTPParser::ParseHeaders(
		const std::string_view segment,
		ParserState& state,
		std::string& err) {
		std::ostringstream err_oss;
		ScopeGuard err_oss_flush([&]() { err += err_oss.str(); });

		// From the RFC:
		//	message-header = field-name ":" [ field-value ]
		//	field-name = token
		//	field-value = *(field-content | LWS)
		//	field-content = <the OCTETs making up the field - value
		//		and consisting of either * TEXT or combinations
		//		of token, separators, and quoted-string>
		// Takewaways:
		//  * field-name is guaranteed to not be preceded by whitespace
		//  * field-name is guaranteed to be followed by ":"
		//  * field-value may be preceded by LWS
		//  * multi-line field-values are guaranteed to start with either ' '
		//    or '\t'
		size_t pos = 0;
		std::string key, value;
		while (pos < segment.length()) {
			// Divide into lines.
			size_t end = segment.find(kLineDelim, pos);
			if (end == std::string::npos) {
				end = segment.length();
			}
			ScopeGuard advance_pos([&]() { pos = end + 1; });

			std::string_view line = segment.substr(pos, end - pos);
			if (line.empty()) {
				continue;
			}

			// Lengthen the current line to cover multi-line header.
			while (end + 1 < segment.length() &&
				(segment[end + 1] == ' ' || segment[end + 1] == '\t')) {
				end = segment.find("\r\n", end + 1);
			}

			size_t sep = line.find(':');
			if (sep == std::string::npos) {
				err_oss << "Invalid header: No ':' delimiter: " << segment << std::endl;
				return false;
			}

			std::string_view key = line.substr(0, sep);
			size_t key_start = key.find_first_not_of(kRfcLWS);
			size_t key_end = key.find_last_not_of(kRfcLWS);
			key = key.substr(key_start, (key_end - key_start) + 1);
			// Value may contain interspersed LWS (linear whitespace).
			// Could scrub it out, but not necessary for our purposes.
			std::string_view value = line.substr(sep + 1);
			size_t value_start = value.find_first_not_of(kRfcLWS);
			size_t value_end = value.find_last_not_of(kRfcLWS);
			value = value.substr(value_start, (value_end - value_start) + 1);

			headers_[std::string(key)] = value;
		}

		state = PARSER_STATE_PAYLOAD;
		return true;
	}

#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-parameter"
	bool HTTPParser::ParsePayload(
		const std::string_view segment,
		ParserState& state,
		std::string& err) {

    const char kScuffedHeadersDelim[] = "\n\r\n";
    if (!segment.starts_with(kScuffedHeadersDelim)) {
      return true;
    }

		payload_ = segment.substr(strlen(kScuffedHeadersDelim));
		return true;
	}
#pragma clang diagnostic pop
}