123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366 |
- Subject: Support multi-coding Transfer-Encoding
- Origin: v2.9.2-2-g7d5c99d <https://github.com/nodejs/http-parser/commit/7d5c99d09f6743b055d53fc3f642746d9801479b>
- Upstream-Author: Fedor Indutny <fedor@indutny.com>
- Date: Mon Feb 3 21:10:35 2020 +0100
- `Transfer-Encoding` header might have multiple codings in it. Even
- though llhttp cares only about `chunked`, it must check that `chunked`
- is the last coding (if present).
- ABNF from RFC 7230:
- ```
- Transfer-Encoding = *( "," OWS ) transfer-coding *( OWS "," [ OWS
- transfer-coding ] )
- transfer-coding = "chunked" / "compress" / "deflate" / "gzip" /
- transfer-extension
- transfer-extension = token *( OWS ";" OWS transfer-parameter )
- transfer-parameter = token BWS "=" BWS ( token / quoted-string )
- ```
- However, if `chunked` is not last - llhttp must assume that the encoding
- and size of the body is unknown (according to 3.3.3 of RFC 7230) and
- read the response until EOF. For request - the error must be raised for
- an unknown `Transfer-Encoding`.
- Furthermore, 3.3.3 of RFC 7230 explicitly states that presence of both
- `Transfer-Encoding` and `Content-Length` indicates the smuggling attack
- and "ought to be handled as an error".
- For the lenient mode:
- * Unknown `Transfer-Encoding` in requests is not an error and request
- body is simply read until EOF (end of connection)
- * Only `Transfer-Encoding: chunked` together with `Content-Length` would
- result an error (just like before the patch)
- PR-URL: https://github.com/nodejs-private/http-parser-private/pull/4
- Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
- Reviewed-By: Sam Roberts <vieuxtech@gmail.com>
- Reviewed-By: James M Snell <jasnell@gmail.com>
- --- a/http_parser.c
- +++ b/http_parser.c
- @@ -375,7 +375,10 @@
- , h_transfer_encoding
- , h_upgrade
-
- + , h_matching_transfer_encoding_token_start
- , h_matching_transfer_encoding_chunked
- + , h_matching_transfer_encoding_token
- +
- , h_matching_connection_token_start
- , h_matching_connection_keep_alive
- , h_matching_connection_close
- @@ -1311,6 +1314,7 @@
- parser->header_state = h_general;
- } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
- parser->header_state = h_transfer_encoding;
- + parser->flags |= F_TRANSFER_ENCODING;
- }
- break;
-
- @@ -1391,10 +1395,14 @@
- if ('c' == c) {
- parser->header_state = h_matching_transfer_encoding_chunked;
- } else {
- - parser->header_state = h_general;
- + parser->header_state = h_matching_transfer_encoding_token;
- }
- break;
-
- + /* Multi-value `Transfer-Encoding` header */
- + case h_matching_transfer_encoding_token_start:
- + break;
- +
- case h_content_length:
- if (UNLIKELY(!IS_NUM(ch))) {
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
- @@ -1537,16 +1545,41 @@
- goto error;
-
- /* Transfer-Encoding: chunked */
- + case h_matching_transfer_encoding_token_start:
- + /* looking for 'Transfer-Encoding: chunked' */
- + if ('c' == c) {
- + h_state = h_matching_transfer_encoding_chunked;
- + } else if (STRICT_TOKEN(c)) {
- + /* TODO(indutny): similar code below does this, but why?
- + * At the very least it seems to be inconsistent given that
- + * h_matching_transfer_encoding_token does not check for
- + * `STRICT_TOKEN`
- + */
- + h_state = h_matching_transfer_encoding_token;
- + } else if (c == ' ' || c == '\t') {
- + /* Skip lws */
- + } else {
- + h_state = h_general;
- + }
- + break;
- +
- case h_matching_transfer_encoding_chunked:
- parser->index++;
- if (parser->index > sizeof(CHUNKED)-1
- || c != CHUNKED[parser->index]) {
- - h_state = h_general;
- + h_state = h_matching_transfer_encoding_token;
- } else if (parser->index == sizeof(CHUNKED)-2) {
- h_state = h_transfer_encoding_chunked;
- }
- break;
-
- + case h_matching_transfer_encoding_token:
- + if (ch == ',') {
- + h_state = h_matching_transfer_encoding_token_start;
- + parser->index = 0;
- + }
- + break;
- +
- case h_matching_connection_token_start:
- /* looking for 'Connection: keep-alive' */
- if (c == 'k') {
- @@ -1605,7 +1638,7 @@
- break;
-
- case h_transfer_encoding_chunked:
- - if (ch != ' ') h_state = h_general;
- + if (ch != ' ') h_state = h_matching_transfer_encoding_token;
- break;
-
- case h_connection_keep_alive:
- @@ -1730,12 +1763,17 @@
- REEXECUTE();
- }
-
- - /* Cannot use chunked encoding and a content-length header together
- - per the HTTP specification. */
- - if ((parser->flags & F_CHUNKED) &&
- + /* Cannot us transfer-encoding and a content-length header together
- + per the HTTP specification. (RFC 7230 Section 3.3.3) */
- + if ((parser->flags & F_TRANSFER_ENCODING) &&
- (parser->flags & F_CONTENTLENGTH)) {
- - SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
- - goto error;
- + /* Allow it for lenient parsing as long as `Transfer-Encoding` is
- + * not `chunked`
- + */
- + if (!lenient || (parser->flags & F_CHUNKED)) {
- + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
- + goto error;
- + }
- }
-
- UPDATE_STATE(s_headers_done);
- @@ -1809,8 +1847,31 @@
- UPDATE_STATE(NEW_MESSAGE());
- CALLBACK_NOTIFY(message_complete);
- } else if (parser->flags & F_CHUNKED) {
- - /* chunked encoding - ignore Content-Length header */
- + /* chunked encoding - ignore Content-Length header,
- + * prepare for a chunk */
- UPDATE_STATE(s_chunk_size_start);
- + } else if (parser->flags & F_TRANSFER_ENCODING) {
- + if (parser->type == HTTP_REQUEST && !lenient) {
- + /* RFC 7230 3.3.3 */
- +
- + /* If a Transfer-Encoding header field
- + * is present in a request and the chunked transfer coding is not
- + * the final encoding, the message body length cannot be determined
- + * reliably; the server MUST respond with the 400 (Bad Request)
- + * status code and then close the connection.
- + */
- + SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
- + RETURN(p - data); /* Error */
- + } else {
- + /* RFC 7230 3.3.3 */
- +
- + /* If a Transfer-Encoding header field is present in a response and
- + * the chunked transfer coding is not the final encoding, the
- + * message body length is determined by reading the connection until
- + * it is closed by the server.
- + */
- + UPDATE_STATE(s_body_identity_eof);
- + }
- } else {
- if (parser->content_length == 0) {
- /* Content-Length header given but zero: Content-Length: 0\r\n */
- @@ -2062,6 +2123,12 @@
- return 0;
- }
-
- + /* RFC 7230 3.3.3, see `s_headers_almost_done` */
- + if ((parser->flags & F_TRANSFER_ENCODING) &&
- + (parser->flags & F_CHUNKED) == 0) {
- + return 1;
- + }
- +
- if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
- return 0;
- }
- --- a/http_parser.h
- +++ b/http_parser.h
- @@ -225,6 +225,7 @@
- , F_UPGRADE = 1 << 5
- , F_SKIPBODY = 1 << 6
- , F_CONTENTLENGTH = 1 << 7
- + , F_TRANSFER_ENCODING = 1 << 8
- };
-
-
- @@ -271,6 +272,8 @@
- "unexpected content-length header") \
- XX(INVALID_CHUNK_SIZE, \
- "invalid character in chunk size header") \
- + XX(INVALID_TRANSFER_ENCODING, \
- + "request has invalid transfer-encoding") \
- XX(INVALID_CONSTANT, "invalid constant string") \
- XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
- XX(STRICT, "strict mode assertion failed") \
- @@ -293,11 +296,11 @@
- struct http_parser {
- /** PRIVATE **/
- unsigned int type : 2; /* enum http_parser_type */
- - unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */
- unsigned int state : 7; /* enum state from http_parser.c */
- unsigned int header_state : 7; /* enum header_state from http_parser.c */
- unsigned int index : 7; /* index into current matcher */
- unsigned int lenient_http_headers : 1;
- + unsigned int flags : 16; /* F_* values from 'flags' enum; semi-public */
-
- uint32_t nread; /* # bytes read in various scenarios */
- uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */
- --- a/test.c
- +++ b/test.c
- @@ -262,7 +262,6 @@
- ,.type= HTTP_REQUEST
- ,.raw= "POST /post_identity_body_world?q=search#hey HTTP/1.1\r\n"
- "Accept: */*\r\n"
- - "Transfer-Encoding: identity\r\n"
- "Content-Length: 5\r\n"
- "\r\n"
- "World"
- @@ -275,10 +274,9 @@
- ,.fragment= "hey"
- ,.request_path= "/post_identity_body_world"
- ,.request_url= "/post_identity_body_world?q=search#hey"
- - ,.num_headers= 3
- + ,.num_headers= 2
- ,.headers=
- { { "Accept", "*/*" }
- - , { "Transfer-Encoding", "identity" }
- , { "Content-Length", "5" }
- }
- ,.body= "World"
- @@ -1173,6 +1171,61 @@
- ,.headers= { { "Host", "example.com" } }
- ,.body= ""
- }
- +
- +#define POST_MULTI_TE_LAST_CHUNKED 43
- +, {.name= "post - multi coding transfer-encoding chunked body"
- + ,.type= HTTP_REQUEST
- + ,.raw= "POST / HTTP/1.1\r\n"
- + "Transfer-Encoding: deflate, chunked\r\n"
- + "\r\n"
- + "1e\r\nall your base are belong to us\r\n"
- + "0\r\n"
- + "\r\n"
- + ,.should_keep_alive= TRUE
- + ,.message_complete_on_eof= FALSE
- + ,.http_major= 1
- + ,.http_minor= 1
- + ,.method= HTTP_POST
- + ,.query_string= ""
- + ,.fragment= ""
- + ,.request_path= "/"
- + ,.request_url= "/"
- + ,.num_headers= 1
- + ,.headers=
- + { { "Transfer-Encoding" , "deflate, chunked" }
- + }
- + ,.body= "all your base are belong to us"
- + ,.num_chunks_complete= 2
- + ,.chunk_lengths= { 0x1e }
- + }
- +
- +#define POST_MULTI_LINE_TE_LAST_CHUNKED 43
- +, {.name= "post - multi coding transfer-encoding chunked body"
- + ,.type= HTTP_REQUEST
- + ,.raw= "POST / HTTP/1.1\r\n"
- + "Transfer-Encoding: deflate,\r\n"
- + " chunked\r\n"
- + "\r\n"
- + "1e\r\nall your base are belong to us\r\n"
- + "0\r\n"
- + "\r\n"
- + ,.should_keep_alive= TRUE
- + ,.message_complete_on_eof= FALSE
- + ,.http_major= 1
- + ,.http_minor= 1
- + ,.method= HTTP_POST
- + ,.query_string= ""
- + ,.fragment= ""
- + ,.request_path= "/"
- + ,.request_url= "/"
- + ,.num_headers= 1
- + ,.headers=
- + { { "Transfer-Encoding" , "deflate, chunked" }
- + }
- + ,.body= "all your base are belong to us"
- + ,.num_chunks_complete= 2
- + ,.chunk_lengths= { 0x1e }
- + }
- };
-
- /* * R E S P O N S E S * */
- @@ -1950,6 +2003,28 @@
- ,.num_chunks_complete= 3
- ,.chunk_lengths= { 2, 2 }
- }
- +#define HTTP_200_MULTI_TE_NOT_LAST_CHUNKED 28
- +, {.name= "HTTP 200 response with `chunked` being *not last* Transfer-Encoding"
- + ,.type= HTTP_RESPONSE
- + ,.raw= "HTTP/1.1 200 OK\r\n"
- + "Transfer-Encoding: chunked, identity\r\n"
- + "\r\n"
- + "2\r\n"
- + "OK\r\n"
- + "0\r\n"
- + "\r\n"
- + ,.should_keep_alive= FALSE
- + ,.message_complete_on_eof= TRUE
- + ,.http_major= 1
- + ,.http_minor= 1
- + ,.status_code= 200
- + ,.response_status= "OK"
- + ,.num_headers= 1
- + ,.headers= { { "Transfer-Encoding", "chunked, identity" }
- + }
- + ,.body= "2\r\nOK\r\n0\r\n\r\n"
- + ,.num_chunks_complete= 0
- + }
- };
-
- /* strnlen() is a POSIX.2008 addition. Can't rely on it being available so
- @@ -3627,7 +3702,7 @@
- parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf));
- assert(parsed == strlen(buf));
-
- - buf = "Transfer-Encoding: chunked\r\nContent-Length: 1\r\n\r\n";
- + buf = "Transfer-Encoding: anything\r\nContent-Length: 1\r\n\r\n";
- size_t buflen = strlen(buf);
-
- parsed = http_parser_execute(&parser, &settings_null, buf, buflen);
- @@ -4270,6 +4345,12 @@
- "fooba",
- HPE_OK);
-
- + // Unknown Transfer-Encoding in request
- + test_simple("GET / HTTP/1.1\r\n"
- + "Transfer-Encoding: unknown\r\n"
- + "\r\n",
- + HPE_INVALID_TRANSFER_ENCODING);
- +
- static const char *all_methods[] = {
- "DELETE",
- "GET",
|