From 6c15549e77b15b2958afd5c56c964e1efc9dab18 Mon Sep 17 00:00:00 2001 From: Will Date: Sat, 31 Jan 2015 11:55:31 +0900 Subject: [PATCH] update http parser to 2.4.2 --- src/http_parser.c | 707 +++++++++++++++++++++++++++++++--------------- src/http_parser.h | 64 +++-- 2 files changed, 517 insertions(+), 254 deletions(-) diff --git a/src/http_parser.c b/src/http_parser.c index ed3a9232..c0e7ca15 100644 --- a/src/http_parser.c +++ b/src/http_parser.c @@ -56,19 +56,41 @@ do { \ parser->http_errno = (e); \ } while(0) +#define CURRENT_STATE() p_state +#define UPDATE_STATE(V) p_state = (enum state) (V); +#define RETURN(V) \ +do { \ + parser->state = CURRENT_STATE(); \ + return (V); \ +} while (0); +#define REEXECUTE() \ + --p; \ + break; + + +#ifdef __GNUC__ +# define LIKELY(X) __builtin_expect(!!(X), 1) +# define UNLIKELY(X) __builtin_expect(!!(X), 0) +#else +# define LIKELY(X) (X) +# define UNLIKELY(X) (X) +#endif + /* Run the notify callback FOR, returning ER if it fails */ #define CALLBACK_NOTIFY_(FOR, ER) \ do { \ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ \ - if (settings->on_##FOR) { \ - if (0 != settings->on_##FOR(parser)) { \ + if (LIKELY(settings->on_##FOR)) { \ + parser->state = CURRENT_STATE(); \ + if (UNLIKELY(0 != settings->on_##FOR(parser))) { \ SET_ERRNO(HPE_CB_##FOR); \ } \ + UPDATE_STATE(parser->state); \ \ /* We either errored above or got paused; get out */ \ - if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ + if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ return (ER); \ } \ } \ @@ -86,13 +108,16 @@ do { \ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ \ if (FOR##_mark) { \ - if (settings->on_##FOR) { \ - if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \ + if (LIKELY(settings->on_##FOR)) { \ + parser->state = CURRENT_STATE(); \ + if (UNLIKELY(0 != \ + settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \ SET_ERRNO(HPE_CB_##FOR); \ } \ + UPDATE_STATE(parser->state); \ \ /* We either errored above or got paused; get out */ \ - if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ + if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ return (ER); \ } \ } \ @@ -116,6 +141,26 @@ do { \ } \ } while (0) +/* Don't allow the total size of the HTTP headers (including the status + * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect + * embedders against denial-of-service attacks where the attacker feeds + * us a never-ending header that the embedder keeps buffering. + * + * This check is arguably the responsibility of embedders but we're doing + * it on the embedder's behalf because most won't bother and this way we + * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger + * than any reasonable request or response so this should never affect + * day-to-day operation. + */ +#define COUNT_HEADER_SIZE(V) \ +do { \ + parser->nread += (V); \ + if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \ + SET_ERRNO(HPE_HEADER_OVERFLOW); \ + goto error; \ + } \ +} while (0) + #define PROXY_CONNECTION "proxy-connection" #define CONNECTION "connection" @@ -248,6 +293,7 @@ enum state , s_res_http_minor , s_res_first_status_code , s_res_status_code + , s_res_status_start , s_res_status , s_res_line_almost_done @@ -279,6 +325,9 @@ enum state , s_header_field_start , s_header_field + , s_header_value_discard_ws + , s_header_value_discard_ws_almost_done + , s_header_value_discard_lws , s_header_value_start , s_header_value , s_header_value_lws @@ -330,12 +379,16 @@ enum header_states , h_upgrade , h_matching_transfer_encoding_chunked + , h_matching_connection_token_start , h_matching_connection_keep_alive , h_matching_connection_close + , h_matching_connection_upgrade + , h_matching_connection_token , h_transfer_encoding_chunked , h_connection_keep_alive , h_connection_close + , h_connection_upgrade }; enum http_host_state @@ -367,6 +420,8 @@ enum http_host_state (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ (c) == '$' || (c) == ',') +#define STRICT_TOKEN(c) (tokens[(unsigned char)c]) + #if HTTP_PARSER_STRICT #define TOKEN(c) (tokens[(unsigned char)c]) #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) @@ -581,6 +636,8 @@ size_t http_parser_execute (http_parser *parser, const char *header_value_mark = 0; const char *url_mark = 0; const char *body_mark = 0; + const char *status_mark = 0; + enum state p_state = (enum state) parser->state; /* We're in an error state. Don't bother doing anything. */ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { @@ -588,7 +645,7 @@ size_t http_parser_execute (http_parser *parser, } if (len == 0) { - switch (parser->state) { + switch (CURRENT_STATE()) { case s_body_identity_eof: /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if * we got paused. @@ -609,11 +666,11 @@ size_t http_parser_execute (http_parser *parser, } - if (parser->state == s_header_field) + if (CURRENT_STATE() == s_header_field) header_field_mark = data; - if (parser->state == s_header_value) + if (CURRENT_STATE() == s_header_value) header_value_mark = data; - switch (parser->state) { + switch (CURRENT_STATE()) { case s_req_path: case s_req_schema: case s_req_schema_slash: @@ -627,28 +684,26 @@ size_t http_parser_execute (http_parser *parser, case s_req_fragment: url_mark = data; break; + case s_res_status: + status_mark = data; + break; + default: + break; } for (p=data; p != data + len; p++) { ch = *p; - if (PARSING_HEADER(parser->state)) { - ++parser->nread; - /* Buffer overflow attack */ - if (parser->nread > HTTP_MAX_HEADER_SIZE) { - SET_ERRNO(HPE_HEADER_OVERFLOW); - goto error; - } - } + if (PARSING_HEADER(CURRENT_STATE())) + COUNT_HEADER_SIZE(1); - reexecute_byte: - switch (parser->state) { + switch (CURRENT_STATE()) { case s_dead: /* this state is used after a 'Connection: close' message * the parser will error out if it reads another message */ - if (ch == CR || ch == LF) + if (LIKELY(ch == CR || ch == LF)) break; SET_ERRNO(HPE_CLOSED_CONNECTION); @@ -662,13 +717,13 @@ size_t http_parser_execute (http_parser *parser, parser->content_length = ULLONG_MAX; if (ch == 'H') { - parser->state = s_res_or_resp_H; + UPDATE_STATE(s_res_or_resp_H); CALLBACK_NOTIFY(message_begin); } else { parser->type = HTTP_REQUEST; - parser->state = s_start_req; - goto reexecute_byte; + UPDATE_STATE(s_start_req); + REEXECUTE(); } break; @@ -677,9 +732,9 @@ size_t http_parser_execute (http_parser *parser, case s_res_or_resp_H: if (ch == 'T') { parser->type = HTTP_RESPONSE; - parser->state = s_res_HT; + UPDATE_STATE(s_res_HT); } else { - if (ch != 'E') { + if (UNLIKELY(ch != 'E')) { SET_ERRNO(HPE_INVALID_CONSTANT); goto error; } @@ -687,7 +742,7 @@ size_t http_parser_execute (http_parser *parser, parser->type = HTTP_REQUEST; parser->method = HTTP_HEAD; parser->index = 2; - parser->state = s_req_method; + UPDATE_STATE(s_req_method); } break; @@ -698,7 +753,7 @@ size_t http_parser_execute (http_parser *parser, switch (ch) { case 'H': - parser->state = s_res_H; + UPDATE_STATE(s_res_H); break; case CR: @@ -716,39 +771,39 @@ size_t http_parser_execute (http_parser *parser, case s_res_H: STRICT_CHECK(ch != 'T'); - parser->state = s_res_HT; + UPDATE_STATE(s_res_HT); break; case s_res_HT: STRICT_CHECK(ch != 'T'); - parser->state = s_res_HTT; + UPDATE_STATE(s_res_HTT); break; case s_res_HTT: STRICT_CHECK(ch != 'P'); - parser->state = s_res_HTTP; + UPDATE_STATE(s_res_HTTP); break; case s_res_HTTP: STRICT_CHECK(ch != '/'); - parser->state = s_res_first_http_major; + UPDATE_STATE(s_res_first_http_major); break; case s_res_first_http_major: - if (ch < '0' || ch > '9') { + if (UNLIKELY(ch < '0' || ch > '9')) { SET_ERRNO(HPE_INVALID_VERSION); goto error; } parser->http_major = ch - '0'; - parser->state = s_res_http_major; + UPDATE_STATE(s_res_http_major); break; /* major HTTP version or dot */ case s_res_http_major: { if (ch == '.') { - parser->state = s_res_first_http_minor; + UPDATE_STATE(s_res_first_http_minor); break; } @@ -760,7 +815,7 @@ size_t http_parser_execute (http_parser *parser, parser->http_major *= 10; parser->http_major += ch - '0'; - if (parser->http_major > 999) { + if (UNLIKELY(parser->http_major > 999)) { SET_ERRNO(HPE_INVALID_VERSION); goto error; } @@ -770,24 +825,24 @@ size_t http_parser_execute (http_parser *parser, /* first digit of minor HTTP version */ case s_res_first_http_minor: - if (!IS_NUM(ch)) { + if (UNLIKELY(!IS_NUM(ch))) { SET_ERRNO(HPE_INVALID_VERSION); goto error; } parser->http_minor = ch - '0'; - parser->state = s_res_http_minor; + UPDATE_STATE(s_res_http_minor); break; /* minor HTTP version or end of request line */ case s_res_http_minor: { if (ch == ' ') { - parser->state = s_res_first_status_code; + UPDATE_STATE(s_res_first_status_code); break; } - if (!IS_NUM(ch)) { + if (UNLIKELY(!IS_NUM(ch))) { SET_ERRNO(HPE_INVALID_VERSION); goto error; } @@ -795,7 +850,7 @@ size_t http_parser_execute (http_parser *parser, parser->http_minor *= 10; parser->http_minor += ch - '0'; - if (parser->http_minor > 999) { + if (UNLIKELY(parser->http_minor > 999)) { SET_ERRNO(HPE_INVALID_VERSION); goto error; } @@ -814,7 +869,7 @@ size_t http_parser_execute (http_parser *parser, goto error; } parser->status_code = ch - '0'; - parser->state = s_res_status_code; + UPDATE_STATE(s_res_status_code); break; } @@ -823,13 +878,13 @@ size_t http_parser_execute (http_parser *parser, if (!IS_NUM(ch)) { switch (ch) { case ' ': - parser->state = s_res_status; + UPDATE_STATE(s_res_status_start); break; case CR: - parser->state = s_res_line_almost_done; + UPDATE_STATE(s_res_line_almost_done); break; case LF: - parser->state = s_header_field_start; + UPDATE_STATE(s_header_field_start); break; default: SET_ERRNO(HPE_INVALID_STATUS); @@ -841,7 +896,7 @@ size_t http_parser_execute (http_parser *parser, parser->status_code *= 10; parser->status_code += ch - '0'; - if (parser->status_code > 999) { + if (UNLIKELY(parser->status_code > 999)) { SET_ERRNO(HPE_INVALID_STATUS); goto error; } @@ -849,24 +904,42 @@ size_t http_parser_execute (http_parser *parser, break; } + case s_res_status_start: + { + if (ch == CR) { + UPDATE_STATE(s_res_line_almost_done); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_field_start); + break; + } + + MARK(status); + UPDATE_STATE(s_res_status); + parser->index = 0; + break; + } + case s_res_status: - /* the human readable status. e.g. "NOT FOUND" - * we are not humans so just ignore this */ if (ch == CR) { - parser->state = s_res_line_almost_done; + UPDATE_STATE(s_res_line_almost_done); + CALLBACK_DATA(status); break; } if (ch == LF) { - parser->state = s_header_field_start; + UPDATE_STATE(s_header_field_start); + CALLBACK_DATA(status); break; } + break; case s_res_line_almost_done: STRICT_CHECK(ch != LF); - parser->state = s_header_field_start; - CALLBACK_NOTIFY(status_complete); + UPDATE_STATE(s_header_field_start); break; case s_start_req: @@ -876,7 +949,7 @@ size_t http_parser_execute (http_parser *parser, parser->flags = 0; parser->content_length = ULLONG_MAX; - if (!IS_ALPHA(ch)) { + if (UNLIKELY(!IS_ALPHA(ch))) { SET_ERRNO(HPE_INVALID_METHOD); goto error; } @@ -889,7 +962,7 @@ size_t http_parser_execute (http_parser *parser, case 'G': parser->method = HTTP_GET; break; case 'H': parser->method = HTTP_HEAD; break; case 'L': parser->method = HTTP_LOCK; break; - case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break; + case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break; case 'N': parser->method = HTTP_NOTIFY; break; case 'O': parser->method = HTTP_OPTIONS; break; case 'P': parser->method = HTTP_POST; @@ -903,7 +976,7 @@ size_t http_parser_execute (http_parser *parser, SET_ERRNO(HPE_INVALID_METHOD); goto error; } - parser->state = s_req_method; + UPDATE_STATE(s_req_method); CALLBACK_NOTIFY(message_begin); @@ -913,14 +986,14 @@ size_t http_parser_execute (http_parser *parser, case s_req_method: { const char *matcher; - if (ch == '\0') { + if (UNLIKELY(ch == '\0')) { SET_ERRNO(HPE_INVALID_METHOD); goto error; } matcher = method_strings[parser->method]; if (ch == ' ' && matcher[parser->index] == '\0') { - parser->state = s_req_spaces_before_url; + UPDATE_STATE(s_req_spaces_before_url); } else if (ch == matcher[parser->index]) { ; /* nada */ } else if (parser->method == HTTP_CONNECT) { @@ -929,6 +1002,7 @@ size_t http_parser_execute (http_parser *parser, } else if (parser->index == 2 && ch == 'P') { parser->method = HTTP_COPY; } else { + SET_ERRNO(HPE_INVALID_METHOD); goto error; } } else if (parser->method == HTTP_MKCOL) { @@ -940,13 +1014,17 @@ size_t http_parser_execute (http_parser *parser, parser->method = HTTP_MSEARCH; } else if (parser->index == 2 && ch == 'A') { parser->method = HTTP_MKACTIVITY; + } else if (parser->index == 3 && ch == 'A') { + parser->method = HTTP_MKCALENDAR; } else { + SET_ERRNO(HPE_INVALID_METHOD); goto error; } } else if (parser->method == HTTP_SUBSCRIBE) { if (parser->index == 1 && ch == 'E') { parser->method = HTTP_SEARCH; } else { + SET_ERRNO(HPE_INVALID_METHOD); goto error; } } else if (parser->index == 1 && parser->method == HTTP_POST) { @@ -957,13 +1035,27 @@ size_t http_parser_execute (http_parser *parser, } else if (ch == 'A') { parser->method = HTTP_PATCH; } else { + SET_ERRNO(HPE_INVALID_METHOD); goto error; } } else if (parser->index == 2) { if (parser->method == HTTP_PUT) { - if (ch == 'R') parser->method = HTTP_PURGE; + if (ch == 'R') { + parser->method = HTTP_PURGE; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } } else if (parser->method == HTTP_UNLOCK) { - if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE; + if (ch == 'S') { + parser->method = HTTP_UNSUBSCRIBE; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; } } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { parser->method = HTTP_PROPPATCH; @@ -982,11 +1074,11 @@ size_t http_parser_execute (http_parser *parser, MARK(url); if (parser->method == HTTP_CONNECT) { - parser->state = s_req_server_start; + UPDATE_STATE(s_req_server_start); } - parser->state = parse_url_char((enum state)parser->state, ch); - if (parser->state == s_dead) { + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { SET_ERRNO(HPE_INVALID_URL); goto error; } @@ -1007,8 +1099,8 @@ size_t http_parser_execute (http_parser *parser, SET_ERRNO(HPE_INVALID_URL); goto error; default: - parser->state = parse_url_char((enum state)parser->state, ch); - if (parser->state == s_dead) { + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { SET_ERRNO(HPE_INVALID_URL); goto error; } @@ -1027,21 +1119,21 @@ size_t http_parser_execute (http_parser *parser, { switch (ch) { case ' ': - parser->state = s_req_http_start; + UPDATE_STATE(s_req_http_start); CALLBACK_DATA(url); break; case CR: case LF: parser->http_major = 0; parser->http_minor = 9; - parser->state = (ch == CR) ? + UPDATE_STATE((ch == CR) ? s_req_line_almost_done : - s_header_field_start; + s_header_field_start); CALLBACK_DATA(url); break; default: - parser->state = parse_url_char((enum state)parser->state, ch); - if (parser->state == s_dead) { + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { SET_ERRNO(HPE_INVALID_URL); goto error; } @@ -1052,7 +1144,7 @@ size_t http_parser_execute (http_parser *parser, case s_req_http_start: switch (ch) { case 'H': - parser->state = s_req_http_H; + UPDATE_STATE(s_req_http_H); break; case ' ': break; @@ -1064,44 +1156,44 @@ size_t http_parser_execute (http_parser *parser, case s_req_http_H: STRICT_CHECK(ch != 'T'); - parser->state = s_req_http_HT; + UPDATE_STATE(s_req_http_HT); break; case s_req_http_HT: STRICT_CHECK(ch != 'T'); - parser->state = s_req_http_HTT; + UPDATE_STATE(s_req_http_HTT); break; case s_req_http_HTT: STRICT_CHECK(ch != 'P'); - parser->state = s_req_http_HTTP; + UPDATE_STATE(s_req_http_HTTP); break; case s_req_http_HTTP: STRICT_CHECK(ch != '/'); - parser->state = s_req_first_http_major; + UPDATE_STATE(s_req_first_http_major); break; /* first digit of major HTTP version */ case s_req_first_http_major: - if (ch < '1' || ch > '9') { + if (UNLIKELY(ch < '1' || ch > '9')) { SET_ERRNO(HPE_INVALID_VERSION); goto error; } parser->http_major = ch - '0'; - parser->state = s_req_http_major; + UPDATE_STATE(s_req_http_major); break; /* major HTTP version or dot */ case s_req_http_major: { if (ch == '.') { - parser->state = s_req_first_http_minor; + UPDATE_STATE(s_req_first_http_minor); break; } - if (!IS_NUM(ch)) { + if (UNLIKELY(!IS_NUM(ch))) { SET_ERRNO(HPE_INVALID_VERSION); goto error; } @@ -1109,7 +1201,7 @@ size_t http_parser_execute (http_parser *parser, parser->http_major *= 10; parser->http_major += ch - '0'; - if (parser->http_major > 999) { + if (UNLIKELY(parser->http_major > 999)) { SET_ERRNO(HPE_INVALID_VERSION); goto error; } @@ -1119,31 +1211,31 @@ size_t http_parser_execute (http_parser *parser, /* first digit of minor HTTP version */ case s_req_first_http_minor: - if (!IS_NUM(ch)) { + if (UNLIKELY(!IS_NUM(ch))) { SET_ERRNO(HPE_INVALID_VERSION); goto error; } parser->http_minor = ch - '0'; - parser->state = s_req_http_minor; + UPDATE_STATE(s_req_http_minor); break; /* minor HTTP version or end of request line */ case s_req_http_minor: { if (ch == CR) { - parser->state = s_req_line_almost_done; + UPDATE_STATE(s_req_line_almost_done); break; } if (ch == LF) { - parser->state = s_header_field_start; + UPDATE_STATE(s_header_field_start); break; } /* XXX allow spaces after digit? */ - if (!IS_NUM(ch)) { + if (UNLIKELY(!IS_NUM(ch))) { SET_ERRNO(HPE_INVALID_VERSION); goto error; } @@ -1151,7 +1243,7 @@ size_t http_parser_execute (http_parser *parser, parser->http_minor *= 10; parser->http_minor += ch - '0'; - if (parser->http_minor > 999) { + if (UNLIKELY(parser->http_minor > 999)) { SET_ERRNO(HPE_INVALID_VERSION); goto error; } @@ -1162,32 +1254,32 @@ size_t http_parser_execute (http_parser *parser, /* end of request line */ case s_req_line_almost_done: { - if (ch != LF) { + if (UNLIKELY(ch != LF)) { SET_ERRNO(HPE_LF_EXPECTED); goto error; } - parser->state = s_header_field_start; + UPDATE_STATE(s_header_field_start); break; } case s_header_field_start: { if (ch == CR) { - parser->state = s_headers_almost_done; + UPDATE_STATE(s_headers_almost_done); break; } if (ch == LF) { /* they might be just sending \n instead of \r\n so this would be * the second \n to denote the end of headers*/ - parser->state = s_headers_almost_done; - goto reexecute_byte; + UPDATE_STATE(s_headers_almost_done); + REEXECUTE(); } c = TOKEN(ch); - if (!c) { + if (UNLIKELY(!c)) { SET_ERRNO(HPE_INVALID_HEADER_TOKEN); goto error; } @@ -1195,7 +1287,7 @@ size_t http_parser_execute (http_parser *parser, MARK(header_field); parser->index = 0; - parser->state = s_header_field; + UPDATE_STATE(s_header_field); switch (c) { case 'c': @@ -1223,9 +1315,14 @@ size_t http_parser_execute (http_parser *parser, case s_header_field: { - c = TOKEN(ch); + const char* start = p; + for (; p != data + len; p++) { + ch = *p; + c = TOKEN(ch); + + if (!c) + break; - if (c) { switch (parser->header_state) { case h_general: break; @@ -1326,23 +1423,17 @@ size_t http_parser_execute (http_parser *parser, assert(0 && "Unknown header_state"); break; } - break; } - if (ch == ':') { - parser->state = s_header_value_start; - CALLBACK_DATA(header_field); - break; - } + COUNT_HEADER_SIZE(p - start); - if (ch == CR) { - parser->state = s_header_almost_done; - CALLBACK_DATA(header_field); + if (p == data + len) { + --p; break; } - if (ch == LF) { - parser->state = s_header_field_start; + if (ch == ':') { + UPDATE_STATE(s_header_value_discard_ws); CALLBACK_DATA(header_field); break; } @@ -1351,28 +1442,28 @@ size_t http_parser_execute (http_parser *parser, goto error; } - case s_header_value_start: - { + case s_header_value_discard_ws: if (ch == ' ' || ch == '\t') break; - MARK(header_value); - - parser->state = s_header_value; - parser->index = 0; - if (ch == CR) { - parser->header_state = h_general; - parser->state = s_header_almost_done; - CALLBACK_DATA(header_value); + UPDATE_STATE(s_header_value_discard_ws_almost_done); break; } if (ch == LF) { - parser->state = s_header_field_start; - CALLBACK_DATA(header_value); + UPDATE_STATE(s_header_value_discard_lws); break; } + /* FALLTHROUGH */ + + case s_header_value_start: + { + MARK(header_value); + + UPDATE_STATE(s_header_value); + parser->index = 0; + c = LOWER(ch); switch (parser->header_state) { @@ -1391,7 +1482,7 @@ size_t http_parser_execute (http_parser *parser, break; case h_content_length: - if (!IS_NUM(ch)) { + if (UNLIKELY(!IS_NUM(ch))) { SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); goto error; } @@ -1406,11 +1497,17 @@ size_t http_parser_execute (http_parser *parser, /* looking for 'Connection: close' */ } else if (c == 'c') { parser->header_state = h_matching_connection_close; + } else if (c == 'u') { + parser->header_state = h_matching_connection_upgrade; } else { - parser->header_state = h_general; + parser->header_state = h_matching_connection_token; } break; + /* Multi-value `Connection` header */ + case h_matching_connection_token_start: + break; + default: parser->header_state = h_general; break; @@ -1420,98 +1517,187 @@ size_t http_parser_execute (http_parser *parser, case s_header_value: { + const char* start = p; + enum header_states h_state = (enum header_states) parser->header_state; + for (; p != data + len; p++) { + ch = *p; + if (ch == CR) { + UPDATE_STATE(s_header_almost_done); + parser->header_state = h_state; + CALLBACK_DATA(header_value); + break; + } - if (ch == CR) { - parser->state = s_header_almost_done; - CALLBACK_DATA(header_value); - break; - } + if (ch == LF) { + UPDATE_STATE(s_header_almost_done); + COUNT_HEADER_SIZE(p - start); + parser->header_state = h_state; + CALLBACK_DATA_NOADVANCE(header_value); + REEXECUTE(); + } - if (ch == LF) { - parser->state = s_header_almost_done; - CALLBACK_DATA_NOADVANCE(header_value); - goto reexecute_byte; - } + c = LOWER(ch); - c = LOWER(ch); + switch (h_state) { + case h_general: + { + const char* p_cr; + const char* p_lf; + size_t limit = data + len - p; + + limit = MIN(limit, HTTP_MAX_HEADER_SIZE); + + p_cr = (const char*) memchr(p, CR, limit); + p_lf = (const char*) memchr(p, LF, limit); + if (p_cr != NULL) { + if (p_lf != NULL && p_cr >= p_lf) + p = p_lf; + else + p = p_cr; + } else if (UNLIKELY(p_lf != NULL)) { + p = p_lf; + } else { + p = data + len; + } + --p; - switch (parser->header_state) { - case h_general: - break; + break; + } - case h_connection: - case h_transfer_encoding: - assert(0 && "Shouldn't get here."); - break; + case h_connection: + case h_transfer_encoding: + assert(0 && "Shouldn't get here."); + break; - case h_content_length: - { - uint64_t t; + case h_content_length: + { + uint64_t t; - if (ch == ' ') break; + if (ch == ' ') break; - if (!IS_NUM(ch)) { - SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); - goto error; - } + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + parser->header_state = h_state; + goto error; + } - t = parser->content_length; - t *= 10; - t += ch - '0'; + t = parser->content_length; + t *= 10; + t += ch - '0'; - /* Overflow? */ - if (t < parser->content_length || t == ULLONG_MAX) { - SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); - goto error; + /* Overflow? Test against a conservative limit for simplicity. */ + if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + parser->header_state = h_state; + goto error; + } + + parser->content_length = t; + break; } - parser->content_length = t; - break; - } + /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_chunked: + parser->index++; + if (parser->index > sizeof(CHUNKED)-1 + || c != CHUNKED[parser->index]) { + h_state = h_general; + } else if (parser->index == sizeof(CHUNKED)-2) { + h_state = h_transfer_encoding_chunked; + } + break; - /* Transfer-Encoding: chunked */ - case h_matching_transfer_encoding_chunked: - parser->index++; - if (parser->index > sizeof(CHUNKED)-1 - || c != CHUNKED[parser->index]) { - parser->header_state = h_general; - } else if (parser->index == sizeof(CHUNKED)-2) { - parser->header_state = h_transfer_encoding_chunked; - } - break; + case h_matching_connection_token_start: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + h_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + h_state = h_matching_connection_close; + } else if (c == 'u') { + h_state = h_matching_connection_upgrade; + } else if (STRICT_TOKEN(c)) { + h_state = h_matching_connection_token; + } else if (c == ' ' || c == '\t') { + /* Skip lws */ + } else { + h_state = h_general; + } + break; - /* looking for 'Connection: keep-alive' */ - case h_matching_connection_keep_alive: - parser->index++; - if (parser->index > sizeof(KEEP_ALIVE)-1 - || c != KEEP_ALIVE[parser->index]) { - parser->header_state = h_general; - } else if (parser->index == sizeof(KEEP_ALIVE)-2) { - parser->header_state = h_connection_keep_alive; - } - break; + /* looking for 'Connection: keep-alive' */ + case h_matching_connection_keep_alive: + parser->index++; + if (parser->index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(KEEP_ALIVE)-2) { + h_state = h_connection_keep_alive; + } + break; - /* looking for 'Connection: close' */ - case h_matching_connection_close: - parser->index++; - if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { - parser->header_state = h_general; - } else if (parser->index == sizeof(CLOSE)-2) { - parser->header_state = h_connection_close; - } - break; + /* looking for 'Connection: close' */ + case h_matching_connection_close: + parser->index++; + if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(CLOSE)-2) { + h_state = h_connection_close; + } + break; - case h_transfer_encoding_chunked: - case h_connection_keep_alive: - case h_connection_close: - if (ch != ' ') parser->header_state = h_general; - break; + /* looking for 'Connection: upgrade' */ + case h_matching_connection_upgrade: + parser->index++; + if (parser->index > sizeof(UPGRADE) - 1 || + c != UPGRADE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(UPGRADE)-2) { + h_state = h_connection_upgrade; + } + break; - default: - parser->state = s_header_value; - parser->header_state = h_general; - break; + case h_matching_connection_token: + if (ch == ',') { + h_state = h_matching_connection_token_start; + parser->index = 0; + } + break; + + case h_transfer_encoding_chunked: + if (ch != ' ') h_state = h_general; + break; + + case h_connection_keep_alive: + case h_connection_close: + case h_connection_upgrade: + if (ch == ',') { + if (h_state == h_connection_keep_alive) { + parser->flags |= F_CONNECTION_KEEP_ALIVE; + } else if (h_state == h_connection_close) { + parser->flags |= F_CONNECTION_CLOSE; + } else if (h_state == h_connection_upgrade) { + parser->flags |= F_CONNECTION_UPGRADE; + } + h_state = h_matching_connection_token_start; + parser->index = 0; + } else if (ch != ' ') { + h_state = h_matching_connection_token; + } + break; + + default: + UPDATE_STATE(s_header_value); + h_state = h_general; + break; + } } + parser->header_state = h_state; + + COUNT_HEADER_SIZE(p - start); + + if (p == data + len) + --p; break; } @@ -1519,8 +1705,18 @@ size_t http_parser_execute (http_parser *parser, { STRICT_CHECK(ch != LF); - parser->state = s_header_value_lws; + UPDATE_STATE(s_header_value_lws); + break; + } + case s_header_value_lws: + { + if (ch == ' ' || ch == '\t') { + UPDATE_STATE(s_header_value_start); + REEXECUTE(); + } + + /* finished the header */ switch (parser->header_state) { case h_connection_keep_alive: parser->flags |= F_CONNECTION_KEEP_ALIVE; @@ -1531,23 +1727,53 @@ size_t http_parser_execute (http_parser *parser, case h_transfer_encoding_chunked: parser->flags |= F_CHUNKED; break; + case h_connection_upgrade: + parser->flags |= F_CONNECTION_UPGRADE; + break; default: break; } + UPDATE_STATE(s_header_field_start); + REEXECUTE(); + } + + case s_header_value_discard_ws_almost_done: + { + STRICT_CHECK(ch != LF); + UPDATE_STATE(s_header_value_discard_lws); break; } - case s_header_value_lws: + case s_header_value_discard_lws: { - if (ch == ' ' || ch == '\t') - parser->state = s_header_value_start; - else - { - parser->state = s_header_field_start; - goto reexecute_byte; + if (ch == ' ' || ch == '\t') { + UPDATE_STATE(s_header_value_discard_ws); + break; + } else { + switch (parser->header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_connection_upgrade: + parser->flags |= F_CONNECTION_UPGRADE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + default: + break; + } + + /* header value was empty */ + MARK(header_value); + UPDATE_STATE(s_header_field_start); + CALLBACK_DATA_NOADVANCE(header_value); + REEXECUTE(); } - break; } case s_headers_almost_done: @@ -1556,16 +1782,18 @@ size_t http_parser_execute (http_parser *parser, if (parser->flags & F_TRAILING) { /* End of a chunked request */ - parser->state = NEW_MESSAGE(); + UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); break; } - parser->state = s_headers_done; + UPDATE_STATE(s_headers_done); /* Set this here so that on_headers_complete() callbacks can see it */ parser->upgrade = - (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT); + ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) == + (F_UPGRADE | F_CONNECTION_UPGRADE) || + parser->method == HTTP_CONNECT); /* Here we call the headers_complete callback. This is somewhat * different than other callbacks because if the user returns 1, we @@ -1587,15 +1815,15 @@ size_t http_parser_execute (http_parser *parser, default: SET_ERRNO(HPE_CB_headers_complete); - return p - data; /* Error */ + RETURN(p - data); /* Error */ } } if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { - return p - data; + RETURN(p - data); } - goto reexecute_byte; + REEXECUTE(); } case s_headers_done: @@ -1606,34 +1834,34 @@ size_t http_parser_execute (http_parser *parser, /* Exit, the rest of the connect is in a different protocol. */ if (parser->upgrade) { - parser->state = NEW_MESSAGE(); + UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); - return (p - data) + 1; + RETURN((p - data) + 1); } if (parser->flags & F_SKIPBODY) { - parser->state = NEW_MESSAGE(); + UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); } else if (parser->flags & F_CHUNKED) { /* chunked encoding - ignore Content-Length header */ - parser->state = s_chunk_size_start; + UPDATE_STATE(s_chunk_size_start); } else { if (parser->content_length == 0) { /* Content-Length header given but zero: Content-Length: 0\r\n */ - parser->state = NEW_MESSAGE(); + UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); } else if (parser->content_length != ULLONG_MAX) { /* Content-Length header given and non-zero */ - parser->state = s_body_identity; + UPDATE_STATE(s_body_identity); } else { if (parser->type == HTTP_REQUEST || !http_message_needs_eof(parser)) { /* Assume content-length 0 - read the next */ - parser->state = NEW_MESSAGE(); + UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); } else { /* Read body until EOF */ - parser->state = s_body_identity_eof; + UPDATE_STATE(s_body_identity_eof); } } } @@ -1659,7 +1887,7 @@ size_t http_parser_execute (http_parser *parser, p += to_read - 1; if (parser->content_length == 0) { - parser->state = s_message_done; + UPDATE_STATE(s_message_done); /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. * @@ -1671,7 +1899,7 @@ size_t http_parser_execute (http_parser *parser, * important for applications, but let's keep it for now. */ CALLBACK_DATA_(body, p - body_mark + 1, p - data); - goto reexecute_byte; + REEXECUTE(); } break; @@ -1685,7 +1913,7 @@ size_t http_parser_execute (http_parser *parser, break; case s_message_done: - parser->state = NEW_MESSAGE(); + UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); break; @@ -1695,13 +1923,13 @@ size_t http_parser_execute (http_parser *parser, assert(parser->flags & F_CHUNKED); unhex_val = unhex[(unsigned char)ch]; - if (unhex_val == -1) { + if (UNLIKELY(unhex_val == -1)) { SET_ERRNO(HPE_INVALID_CHUNK_SIZE); goto error; } parser->content_length = unhex_val; - parser->state = s_chunk_size; + UPDATE_STATE(s_chunk_size); break; } @@ -1712,7 +1940,7 @@ size_t http_parser_execute (http_parser *parser, assert(parser->flags & F_CHUNKED); if (ch == CR) { - parser->state = s_chunk_size_almost_done; + UPDATE_STATE(s_chunk_size_almost_done); break; } @@ -1720,7 +1948,7 @@ size_t http_parser_execute (http_parser *parser, if (unhex_val == -1) { if (ch == ';' || ch == ' ') { - parser->state = s_chunk_parameters; + UPDATE_STATE(s_chunk_parameters); break; } @@ -1732,8 +1960,8 @@ size_t http_parser_execute (http_parser *parser, t *= 16; t += unhex_val; - /* Overflow? */ - if (t < parser->content_length || t == ULLONG_MAX) { + /* Overflow? Test against a conservative limit for simplicity. */ + if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) { SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); goto error; } @@ -1747,7 +1975,7 @@ size_t http_parser_execute (http_parser *parser, assert(parser->flags & F_CHUNKED); /* just ignore this shit. TODO check for overflow */ if (ch == CR) { - parser->state = s_chunk_size_almost_done; + UPDATE_STATE(s_chunk_size_almost_done); break; } break; @@ -1762,9 +1990,9 @@ size_t http_parser_execute (http_parser *parser, if (parser->content_length == 0) { parser->flags |= F_TRAILING; - parser->state = s_header_field_start; + UPDATE_STATE(s_header_field_start); } else { - parser->state = s_chunk_data; + UPDATE_STATE(s_chunk_data); } break; } @@ -1786,7 +2014,7 @@ size_t http_parser_execute (http_parser *parser, p += to_read - 1; if (parser->content_length == 0) { - parser->state = s_chunk_data_almost_done; + UPDATE_STATE(s_chunk_data_almost_done); } break; @@ -1796,7 +2024,7 @@ size_t http_parser_execute (http_parser *parser, assert(parser->flags & F_CHUNKED); assert(parser->content_length == 0); STRICT_CHECK(ch != CR); - parser->state = s_chunk_data_done; + UPDATE_STATE(s_chunk_data_done); CALLBACK_DATA(body); break; @@ -1804,7 +2032,7 @@ size_t http_parser_execute (http_parser *parser, assert(parser->flags & F_CHUNKED); STRICT_CHECK(ch != LF); parser->nread = 0; - parser->state = s_chunk_size_start; + UPDATE_STATE(s_chunk_size_start); break; default: @@ -1827,21 +2055,23 @@ size_t http_parser_execute (http_parser *parser, assert(((header_field_mark ? 1 : 0) + (header_value_mark ? 1 : 0) + (url_mark ? 1 : 0) + - (body_mark ? 1 : 0)) <= 1); + (body_mark ? 1 : 0) + + (status_mark ? 1 : 0)) <= 1); CALLBACK_DATA_NOADVANCE(header_field); CALLBACK_DATA_NOADVANCE(header_value); CALLBACK_DATA_NOADVANCE(url); CALLBACK_DATA_NOADVANCE(body); + CALLBACK_DATA_NOADVANCE(status); - return len; + RETURN(len); error: if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { SET_ERRNO(HPE_UNKNOWN); } - return (p - data); + RETURN(p - data); } @@ -2067,7 +2297,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect, u->port = u->field_set = 0; s = is_connect ? s_req_server_start : s_req_spaces_before_url; - uf = old_uf = UF_MAX; + old_uf = UF_MAX; for (p = buf; p < buf + buflen; p++) { s = parse_url_char(s, *p); @@ -2173,3 +2403,10 @@ int http_body_is_final(const struct http_parser *parser) { return parser->state == s_message_done; } + +unsigned long +http_parser_version(void) { + return HTTP_PARSER_VERSION_MAJOR * 0x10000 | + HTTP_PARSER_VERSION_MINOR * 0x00100 | + HTTP_PARSER_VERSION_PATCH * 0x00001; +} diff --git a/src/http_parser.h b/src/http_parser.h index f7030c4f..cb592952 100644 --- a/src/http_parser.h +++ b/src/http_parser.h @@ -24,8 +24,10 @@ extern "C" { #endif +/* Also update SONAME in the Makefile whenever you change these. */ #define HTTP_PARSER_VERSION_MAJOR 2 -#define HTTP_PARSER_VERSION_MINOR 1 +#define HTTP_PARSER_VERSION_MINOR 4 +#define HTTP_PARSER_VERSION_PATCH 2 #include #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) @@ -50,9 +52,16 @@ typedef unsigned __int64 uint64_t; # define HTTP_PARSER_STRICT 1 #endif -/* Maximium header size allowed */ -#define HTTP_MAX_HEADER_SIZE (80*1024) - +/* Maximium header size allowed. If the macro is not defined + * before including this header then the default is used. To + * change the maximum header size, define the macro in the build + * environment (e.g. -DHTTP_MAX_HEADER_SIZE=). To remove + * the effective limit on the size of the header, define the macro + * to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff) + */ +#ifndef HTTP_MAX_HEADER_SIZE +# define HTTP_MAX_HEADER_SIZE (80*1024) +#endif typedef struct http_parser http_parser; typedef struct http_parser_settings http_parser_settings; @@ -67,7 +76,7 @@ typedef struct http_parser_settings http_parser_settings; * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: * chunked' headers that indicate the presence of a body. * - * http_data_cb does not return data chunks. It will be call arbitrarally + * http_data_cb does not return data chunks. It will be called arbitrarily * many times for each string. E.G. you might get 10 callbacks for "on_url" * each providing just a few characters more data. */ @@ -108,6 +117,8 @@ typedef int (*http_cb) (http_parser*); /* RFC-5789 */ \ XX(24, PATCH, PATCH) \ XX(25, PURGE, PURGE) \ + /* CalDAV */ \ + XX(26, MKCALENDAR, MKCALENDAR) \ enum http_method { @@ -125,9 +136,10 @@ enum flags { F_CHUNKED = 1 << 0 , F_CONNECTION_KEEP_ALIVE = 1 << 1 , F_CONNECTION_CLOSE = 1 << 2 - , F_TRAILING = 1 << 3 - , F_UPGRADE = 1 << 4 - , F_SKIPBODY = 1 << 5 + , F_CONNECTION_UPGRADE = 1 << 3 + , F_TRAILING = 1 << 4 + , F_UPGRADE = 1 << 5 + , F_SKIPBODY = 1 << 6 }; @@ -141,13 +153,13 @@ enum flags \ /* Callback-related errors */ \ XX(CB_message_begin, "the on_message_begin callback failed") \ - XX(CB_status_complete, "the on_status_complete callback failed") \ XX(CB_url, "the on_url callback failed") \ XX(CB_header_field, "the on_header_field callback failed") \ XX(CB_header_value, "the on_header_value callback failed") \ XX(CB_headers_complete, "the on_headers_complete callback failed") \ XX(CB_body, "the on_body callback failed") \ XX(CB_message_complete, "the on_message_complete callback failed") \ + XX(CB_status, "the on_status callback failed") \ \ /* Parsing-related errors */ \ XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ @@ -191,11 +203,11 @@ enum http_errno { struct http_parser { /** PRIVATE **/ - unsigned char type : 2; /* enum http_parser_type */ - unsigned char flags : 6; /* F_* values from 'flags' enum; semi-public */ - unsigned char state; /* enum state from http_parser.c */ - unsigned char header_state; /* enum header_state from http_parser.c */ - unsigned char index; /* index into current matcher */ + unsigned int type : 2; /* enum http_parser_type */ + unsigned int flags : 6; /* F_* values from 'flags' enum; semi-public */ + unsigned int state : 8; /* enum state from http_parser.c */ + unsigned int header_state : 8; /* enum header_state from http_parser.c */ + unsigned int index : 8; /* index into current matcher */ uint32_t nread; /* # bytes read in various scenarios */ uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ @@ -203,16 +215,16 @@ struct http_parser { /** READ-ONLY **/ unsigned short http_major; unsigned short http_minor; - unsigned short status_code; /* responses only */ - unsigned char method; /* requests only */ - unsigned char http_errno : 7; + unsigned int status_code : 16; /* responses only */ + unsigned int method : 8; /* requests only */ + unsigned int http_errno : 7; /* 1 = Upgrade header was present and the parser has exited because of that. * 0 = No upgrade header present. * Should be checked when http_parser_execute() returns in addition to * error checking. */ - unsigned char upgrade : 1; + unsigned int upgrade : 1; /** PUBLIC **/ void *data; /* A pointer to get hook to the "connection" or "socket" object */ @@ -222,7 +234,7 @@ struct http_parser { struct http_parser_settings { http_cb on_message_begin; http_data_cb on_url; - http_cb on_status_complete; + http_data_cb on_status; http_data_cb on_header_field; http_data_cb on_header_value; http_cb on_headers_complete; @@ -261,9 +273,23 @@ struct http_parser_url { }; +/* Returns the library version. Bits 16-23 contain the major version number, + * bits 8-15 the minor version number and bits 0-7 the patch level. + * Usage example: + * + * unsigned long version = http_parser_version(); + * unsigned major = (version >> 16) & 255; + * unsigned minor = (version >> 8) & 255; + * unsigned patch = version & 255; + * printf("http_parser v%u.%u.%u\n", major, minor, patch); + */ +unsigned long http_parser_version(void); + void http_parser_init(http_parser *parser, enum http_parser_type type); +/* Executes the parser. Returns number of parsed bytes. Sets + * `parser->http_errno` on error. */ size_t http_parser_execute(http_parser *parser, const http_parser_settings *settings, const char *data,