Skip to content

Commit

Permalink
Make //net/server request parsing less inefficient
Browse files Browse the repository at this point in the history
Reduce the number of copies performed on each byte of the input in
net::HttpServer::ParseHeaders by one by tracking the start position of
each token. Also eliminates some unnecessary bounds checking in
std::string::push_back and some allocations for longer tokens.

Disallow null bytes in headers as code may not handle them
correctly.

Also fix some minor style guide violations.

Doesn't attempt to modernize the interface as that will be done by the
spanification effort soon.

Also doesn't attempt to fix the design flaw of reparsing the headers
every time a packet is received.

Change-Id: I4e470398723e10ce663bc85038db146c2baf1469
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5831550
Reviewed-by: mmenke <[email protected]>
Commit-Queue: Adam Rice <[email protected]>
Cr-Commit-Position: refs/heads/main@{#1357503}
  • Loading branch information
ricea authored and Chromium LUCI CQ committed Sep 19, 2024
1 parent 8fd5000 commit dae5a0e
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 49 deletions.
116 changes: 67 additions & 49 deletions net/server/http_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "net/socket/server_socket.h"
#include "net/socket/stream_socket.h"
#include "net/socket/tcp_server_socket.h"
#include "third_party/abseil-cpp/absl/cleanup/cleanup.h"

namespace net {

Expand Down Expand Up @@ -272,6 +273,9 @@ int HttpServer::HandleReadResult(HttpConnection* connection, int rv) {
continue;
}

// The headers are reparsed from the beginning every time a packet is
// received. This only really matters if something tries to upload a large
// request body.
HttpServerRequestInfo request;
size_t pos = 0;
if (!ParseHeaders(read_buf->StartOfBuffer(), read_buf->GetSize(),
Expand Down Expand Up @@ -381,7 +385,7 @@ namespace {
// a single space between the method/url and url/protocol.

// Input character types.
enum header_parse_inputs {
enum HeaderParseInputs {
INPUT_LWS,
INPUT_CR,
INPUT_LF,
Expand All @@ -391,7 +395,7 @@ enum header_parse_inputs {
};

// Parser states.
enum header_parse_states {
enum HeaderParseStates {
ST_METHOD, // Receiving the method
ST_URL, // Receiving the URL
ST_PROTO, // Receiving the protocol
Expand All @@ -404,20 +408,28 @@ enum header_parse_states {
MAX_STATES
};

// This state machine has a number of bugs, for example it considers
// "HTTP/1.1 200 OK\r\n"
// "Foo\r\n"
// to be a correctly terminated set of request headers. It also accepts "\n"
// between header lines but requires "\r\n" at the end of the headers.
// TODO(crbug): Consider using a different request parser. Maybe balsa headers
// from QUICHE, if it doesn't increase the binary size too much.

// State transition table
const int parser_state[MAX_STATES][MAX_INPUTS] = {
constexpr int kParserState[MAX_STATES][MAX_INPUTS] = {
/* METHOD */ {ST_URL, ST_ERR, ST_ERR, ST_ERR, ST_METHOD},
/* URL */ {ST_PROTO, ST_ERR, ST_ERR, ST_URL, ST_URL},
/* PROTOCOL */ {ST_ERR, ST_HEADER, ST_NAME, ST_ERR, ST_PROTO},
/* HEADER */ {ST_ERR, ST_ERR, ST_NAME, ST_ERR, ST_ERR},
/* NAME */ {ST_SEPARATOR, ST_DONE, ST_ERR, ST_VALUE, ST_NAME},
/* SEPARATOR */ {ST_SEPARATOR, ST_ERR, ST_ERR, ST_VALUE, ST_ERR},
/* VALUE */ {ST_VALUE, ST_HEADER, ST_NAME, ST_VALUE, ST_VALUE},
/* DONE */ {ST_DONE, ST_DONE, ST_DONE, ST_DONE, ST_DONE},
/* DONE */ {ST_ERR, ST_ERR, ST_DONE, ST_ERR, ST_ERR},
/* ERR */ {ST_ERR, ST_ERR, ST_ERR, ST_ERR, ST_ERR}};

// Convert an input character to the parser's input token.
int charToInput(char ch) {
int CharToInputType(char ch) {
switch (ch) {
case ' ':
case '\t':
Expand All @@ -438,72 +450,78 @@ bool HttpServer::ParseHeaders(const char* data,
size_t data_len,
HttpServerRequestInfo* info,
size_t* ppos) {
size_t& pos = *ppos;
// Copy *ppos to avoid the compiler having to think about pointer aliasing.
size_t pos = *ppos;
// Make sure `pos` is always written back to `ppos` even if an extra return is
// added to the function.
absl::Cleanup set_ppos = [&pos, ppos]() { *ppos = pos; };
int state = ST_METHOD;
std::string buffer;
// Technically a base::span<const uint8_t> would be more correct, but using a
// std::string_view makes integration with the rest of the code easier.
const std::string_view data_view(data, data_len);
size_t token_start = pos;
std::string header_name;
std::string header_value;
while (pos < data_len) {
char ch = data[pos++];
int input = charToInput(ch);
int next_state = parser_state[state][input];

bool transition = (next_state != state);
HttpServerRequestInfo::HeadersMap::iterator it;
for (; pos < data_len; ++pos) {
const char ch = data[pos];
if (ch == '\0') {
// Lots of code assumes strings don't contain null characters, so disallow
// them to be on the safe side.
return false;
}
const int input = CharToInputType(ch);
const int next_state = kParserState[state][input];
if (next_state == ST_ERR) {
// No point in continuing.
return false;
}

const bool transition = (next_state != state);
if (transition) {
const std::string_view token =
data_view.substr(token_start, pos - token_start);
token_start = pos + 1; // Skip the whitespace or separator.
// Do any actions based on state transitions.
switch (state) {
case ST_METHOD:
info->method = buffer;
buffer.clear();
info->method = std::string(token);
break;
case ST_URL:
info->path = buffer;
buffer.clear();
info->path = std::string(token);
break;
case ST_PROTO:
if (buffer != "HTTP/1.1") {
LOG(ERROR) << "Cannot handle request with protocol: " << buffer;
next_state = ST_ERR;
if (token != "HTTP/1.1") {
LOG(ERROR) << "Cannot handle request with protocol: " << token;
return false;
}
buffer.clear();
break;
case ST_NAME:
header_name = base::ToLowerASCII(buffer);
buffer.clear();
header_name = base::ToLowerASCII(token);
break;
case ST_VALUE:
base::TrimWhitespaceASCII(buffer, base::TRIM_LEADING, &header_value);
it = info->headers.find(header_name);
case ST_VALUE: {
std::string_view header_value =
base::TrimWhitespaceASCII(token, base::TRIM_LEADING);
// See the second paragraph ("A sender MUST NOT generate multiple
// header fields...") of tools.ietf.org/html/rfc7230#section-3.2.2.
if (it == info->headers.end()) {
info->headers[header_name] = header_value;
} else {
it->second.append(",");
it->second.append(header_value);
auto [it, inserted] = info->headers.try_emplace(
std::move(header_name), std::move(header_value));
header_name.clear(); // Avoid use-after-move lint error.
if (!inserted) {
// Since the insertion did not happen, try_emplace() did not move
// the contents of `header_value` and we can still use it.
std::string& value = it->second;
value.reserve(value.size() + 1 + header_value.size());
value.push_back(',');
value.append(header_value);
}
buffer.clear();
break;
case ST_SEPARATOR:
break;
}
}
state = next_state;
} else {
// Do any actions based on current state
switch (state) {
case ST_METHOD:
case ST_URL:
case ST_PROTO:
case ST_VALUE:
case ST_NAME:
buffer.append(&ch, 1);
break;
case ST_DONE:
// We got CR to get this far, also need the LF
return (input == INPUT_LF);
case ST_ERR:
return false;
if (state == ST_DONE) {
++pos; // Point to the first byte of the body.
return true;
}
}
}
Expand Down
35 changes: 35 additions & 0 deletions net/server/http_server_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "base/auto_reset.h"
#include "base/check_op.h"
#include "base/compiler_specific.h"
#include "base/containers/span.h"
#include "base/format_macros.h"
#include "base/functional/bind.h"
#include "base/functional/callback_helpers.h"
Expand Down Expand Up @@ -953,6 +954,40 @@ TEST_F(HttpServerTest, WrongProtocolRequest) {
}
}

// A null byte in the headers should cause the request to be rejected.
TEST_F(HttpServerTest, NullByteInHeaders) {
constexpr char kNullByteInHeader[] =
"GET / HTTP/1.1\r\n"
"User-Agent: Mozilla\0/\r\n"
"\r\n";
TestHttpClient client;
CreateConnection(&client);

client.Send(std::string(kNullByteInHeader, std::size(kNullByteInHeader) - 1));
client.ExpectUsedThenDisconnectedWithNoData();

ASSERT_EQ(1u, connection_map().size());
ASSERT_FALSE(connection_map().begin()->second);
EXPECT_FALSE(HasRequest());
}

// A null byte in the body should be accepted.
TEST_F(HttpServerTest, NullByteInBody) {
// We use the trailing null byte added by the compiler as the "body" of the
// request.
constexpr char kNullByteInBody[] =
"POST /body HTTP/1.1\r\n"
"User-Agent: Mozilla\r\n"
"Content-Length: 1\r\n"
"\r\n";
TestHttpClient client;
CreateConnection(&client);

client.Send(std::string(kNullByteInBody, std::size(kNullByteInBody)));
auto request = WaitForRequest();
EXPECT_EQ(request.info.data, std::string_view("\0", 1));
}

class MockStreamSocket : public StreamSocket {
public:
MockStreamSocket() = default;
Expand Down

0 comments on commit dae5a0e

Please sign in to comment.