1580760635.v2.9.2-2-g7d5c99d.support-multi-coding-transfer-encoding.patch 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. Subject: Support multi-coding Transfer-Encoding
  2. Origin: v2.9.2-2-g7d5c99d <https://github.com/nodejs/http-parser/commit/7d5c99d09f6743b055d53fc3f642746d9801479b>
  3. Upstream-Author: Fedor Indutny <fedor@indutny.com>
  4. Date: Mon Feb 3 21:10:35 2020 +0100
  5. `Transfer-Encoding` header might have multiple codings in it. Even
  6. though llhttp cares only about `chunked`, it must check that `chunked`
  7. is the last coding (if present).
  8. ABNF from RFC 7230:
  9. ```
  10. Transfer-Encoding = *( "," OWS ) transfer-coding *( OWS "," [ OWS
  11. transfer-coding ] )
  12. transfer-coding = "chunked" / "compress" / "deflate" / "gzip" /
  13. transfer-extension
  14. transfer-extension = token *( OWS ";" OWS transfer-parameter )
  15. transfer-parameter = token BWS "=" BWS ( token / quoted-string )
  16. ```
  17. However, if `chunked` is not last - llhttp must assume that the encoding
  18. and size of the body is unknown (according to 3.3.3 of RFC 7230) and
  19. read the response until EOF. For request - the error must be raised for
  20. an unknown `Transfer-Encoding`.
  21. Furthermore, 3.3.3 of RFC 7230 explicitly states that presence of both
  22. `Transfer-Encoding` and `Content-Length` indicates the smuggling attack
  23. and "ought to be handled as an error".
  24. For the lenient mode:
  25. * Unknown `Transfer-Encoding` in requests is not an error and request
  26. body is simply read until EOF (end of connection)
  27. * Only `Transfer-Encoding: chunked` together with `Content-Length` would
  28. result an error (just like before the patch)
  29. PR-URL: https://github.com/nodejs-private/http-parser-private/pull/4
  30. Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
  31. Reviewed-By: Sam Roberts <vieuxtech@gmail.com>
  32. Reviewed-By: James M Snell <jasnell@gmail.com>
  33. --- a/http_parser.c
  34. +++ b/http_parser.c
  35. @@ -375,7 +375,10 @@
  36. , h_transfer_encoding
  37. , h_upgrade
  38. + , h_matching_transfer_encoding_token_start
  39. , h_matching_transfer_encoding_chunked
  40. + , h_matching_transfer_encoding_token
  41. +
  42. , h_matching_connection_token_start
  43. , h_matching_connection_keep_alive
  44. , h_matching_connection_close
  45. @@ -1311,6 +1314,7 @@
  46. parser->header_state = h_general;
  47. } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
  48. parser->header_state = h_transfer_encoding;
  49. + parser->flags |= F_TRANSFER_ENCODING;
  50. }
  51. break;
  52. @@ -1391,10 +1395,14 @@
  53. if ('c' == c) {
  54. parser->header_state = h_matching_transfer_encoding_chunked;
  55. } else {
  56. - parser->header_state = h_general;
  57. + parser->header_state = h_matching_transfer_encoding_token;
  58. }
  59. break;
  60. + /* Multi-value `Transfer-Encoding` header */
  61. + case h_matching_transfer_encoding_token_start:
  62. + break;
  63. +
  64. case h_content_length:
  65. if (UNLIKELY(!IS_NUM(ch))) {
  66. SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
  67. @@ -1537,16 +1545,41 @@
  68. goto error;
  69. /* Transfer-Encoding: chunked */
  70. + case h_matching_transfer_encoding_token_start:
  71. + /* looking for 'Transfer-Encoding: chunked' */
  72. + if ('c' == c) {
  73. + h_state = h_matching_transfer_encoding_chunked;
  74. + } else if (STRICT_TOKEN(c)) {
  75. + /* TODO(indutny): similar code below does this, but why?
  76. + * At the very least it seems to be inconsistent given that
  77. + * h_matching_transfer_encoding_token does not check for
  78. + * `STRICT_TOKEN`
  79. + */
  80. + h_state = h_matching_transfer_encoding_token;
  81. + } else if (c == ' ' || c == '\t') {
  82. + /* Skip lws */
  83. + } else {
  84. + h_state = h_general;
  85. + }
  86. + break;
  87. +
  88. case h_matching_transfer_encoding_chunked:
  89. parser->index++;
  90. if (parser->index > sizeof(CHUNKED)-1
  91. || c != CHUNKED[parser->index]) {
  92. - h_state = h_general;
  93. + h_state = h_matching_transfer_encoding_token;
  94. } else if (parser->index == sizeof(CHUNKED)-2) {
  95. h_state = h_transfer_encoding_chunked;
  96. }
  97. break;
  98. + case h_matching_transfer_encoding_token:
  99. + if (ch == ',') {
  100. + h_state = h_matching_transfer_encoding_token_start;
  101. + parser->index = 0;
  102. + }
  103. + break;
  104. +
  105. case h_matching_connection_token_start:
  106. /* looking for 'Connection: keep-alive' */
  107. if (c == 'k') {
  108. @@ -1605,7 +1638,7 @@
  109. break;
  110. case h_transfer_encoding_chunked:
  111. - if (ch != ' ') h_state = h_general;
  112. + if (ch != ' ') h_state = h_matching_transfer_encoding_token;
  113. break;
  114. case h_connection_keep_alive:
  115. @@ -1730,12 +1763,17 @@
  116. REEXECUTE();
  117. }
  118. - /* Cannot use chunked encoding and a content-length header together
  119. - per the HTTP specification. */
  120. - if ((parser->flags & F_CHUNKED) &&
  121. + /* Cannot us transfer-encoding and a content-length header together
  122. + per the HTTP specification. (RFC 7230 Section 3.3.3) */
  123. + if ((parser->flags & F_TRANSFER_ENCODING) &&
  124. (parser->flags & F_CONTENTLENGTH)) {
  125. - SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
  126. - goto error;
  127. + /* Allow it for lenient parsing as long as `Transfer-Encoding` is
  128. + * not `chunked`
  129. + */
  130. + if (!lenient || (parser->flags & F_CHUNKED)) {
  131. + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
  132. + goto error;
  133. + }
  134. }
  135. UPDATE_STATE(s_headers_done);
  136. @@ -1809,8 +1847,31 @@
  137. UPDATE_STATE(NEW_MESSAGE());
  138. CALLBACK_NOTIFY(message_complete);
  139. } else if (parser->flags & F_CHUNKED) {
  140. - /* chunked encoding - ignore Content-Length header */
  141. + /* chunked encoding - ignore Content-Length header,
  142. + * prepare for a chunk */
  143. UPDATE_STATE(s_chunk_size_start);
  144. + } else if (parser->flags & F_TRANSFER_ENCODING) {
  145. + if (parser->type == HTTP_REQUEST && !lenient) {
  146. + /* RFC 7230 3.3.3 */
  147. +
  148. + /* If a Transfer-Encoding header field
  149. + * is present in a request and the chunked transfer coding is not
  150. + * the final encoding, the message body length cannot be determined
  151. + * reliably; the server MUST respond with the 400 (Bad Request)
  152. + * status code and then close the connection.
  153. + */
  154. + SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
  155. + RETURN(p - data); /* Error */
  156. + } else {
  157. + /* RFC 7230 3.3.3 */
  158. +
  159. + /* If a Transfer-Encoding header field is present in a response and
  160. + * the chunked transfer coding is not the final encoding, the
  161. + * message body length is determined by reading the connection until
  162. + * it is closed by the server.
  163. + */
  164. + UPDATE_STATE(s_body_identity_eof);
  165. + }
  166. } else {
  167. if (parser->content_length == 0) {
  168. /* Content-Length header given but zero: Content-Length: 0\r\n */
  169. @@ -2062,6 +2123,12 @@
  170. return 0;
  171. }
  172. + /* RFC 7230 3.3.3, see `s_headers_almost_done` */
  173. + if ((parser->flags & F_TRANSFER_ENCODING) &&
  174. + (parser->flags & F_CHUNKED) == 0) {
  175. + return 1;
  176. + }
  177. +
  178. if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
  179. return 0;
  180. }
  181. --- a/http_parser.h
  182. +++ b/http_parser.h
  183. @@ -225,6 +225,7 @@
  184. , F_UPGRADE = 1 << 5
  185. , F_SKIPBODY = 1 << 6
  186. , F_CONTENTLENGTH = 1 << 7
  187. + , F_TRANSFER_ENCODING = 1 << 8
  188. };
  189. @@ -271,6 +272,8 @@
  190. "unexpected content-length header") \
  191. XX(INVALID_CHUNK_SIZE, \
  192. "invalid character in chunk size header") \
  193. + XX(INVALID_TRANSFER_ENCODING, \
  194. + "request has invalid transfer-encoding") \
  195. XX(INVALID_CONSTANT, "invalid constant string") \
  196. XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
  197. XX(STRICT, "strict mode assertion failed") \
  198. @@ -293,11 +296,11 @@
  199. struct http_parser {
  200. /** PRIVATE **/
  201. unsigned int type : 2; /* enum http_parser_type */
  202. - unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */
  203. unsigned int state : 7; /* enum state from http_parser.c */
  204. unsigned int header_state : 7; /* enum header_state from http_parser.c */
  205. unsigned int index : 7; /* index into current matcher */
  206. unsigned int lenient_http_headers : 1;
  207. + unsigned int flags : 16; /* F_* values from 'flags' enum; semi-public */
  208. uint32_t nread; /* # bytes read in various scenarios */
  209. uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */
  210. --- a/test.c
  211. +++ b/test.c
  212. @@ -262,7 +262,6 @@
  213. ,.type= HTTP_REQUEST
  214. ,.raw= "POST /post_identity_body_world?q=search#hey HTTP/1.1\r\n"
  215. "Accept: */*\r\n"
  216. - "Transfer-Encoding: identity\r\n"
  217. "Content-Length: 5\r\n"
  218. "\r\n"
  219. "World"
  220. @@ -275,10 +274,9 @@
  221. ,.fragment= "hey"
  222. ,.request_path= "/post_identity_body_world"
  223. ,.request_url= "/post_identity_body_world?q=search#hey"
  224. - ,.num_headers= 3
  225. + ,.num_headers= 2
  226. ,.headers=
  227. { { "Accept", "*/*" }
  228. - , { "Transfer-Encoding", "identity" }
  229. , { "Content-Length", "5" }
  230. }
  231. ,.body= "World"
  232. @@ -1173,6 +1171,61 @@
  233. ,.headers= { { "Host", "example.com" } }
  234. ,.body= ""
  235. }
  236. +
  237. +#define POST_MULTI_TE_LAST_CHUNKED 43
  238. +, {.name= "post - multi coding transfer-encoding chunked body"
  239. + ,.type= HTTP_REQUEST
  240. + ,.raw= "POST / HTTP/1.1\r\n"
  241. + "Transfer-Encoding: deflate, chunked\r\n"
  242. + "\r\n"
  243. + "1e\r\nall your base are belong to us\r\n"
  244. + "0\r\n"
  245. + "\r\n"
  246. + ,.should_keep_alive= TRUE
  247. + ,.message_complete_on_eof= FALSE
  248. + ,.http_major= 1
  249. + ,.http_minor= 1
  250. + ,.method= HTTP_POST
  251. + ,.query_string= ""
  252. + ,.fragment= ""
  253. + ,.request_path= "/"
  254. + ,.request_url= "/"
  255. + ,.num_headers= 1
  256. + ,.headers=
  257. + { { "Transfer-Encoding" , "deflate, chunked" }
  258. + }
  259. + ,.body= "all your base are belong to us"
  260. + ,.num_chunks_complete= 2
  261. + ,.chunk_lengths= { 0x1e }
  262. + }
  263. +
  264. +#define POST_MULTI_LINE_TE_LAST_CHUNKED 43
  265. +, {.name= "post - multi coding transfer-encoding chunked body"
  266. + ,.type= HTTP_REQUEST
  267. + ,.raw= "POST / HTTP/1.1\r\n"
  268. + "Transfer-Encoding: deflate,\r\n"
  269. + " chunked\r\n"
  270. + "\r\n"
  271. + "1e\r\nall your base are belong to us\r\n"
  272. + "0\r\n"
  273. + "\r\n"
  274. + ,.should_keep_alive= TRUE
  275. + ,.message_complete_on_eof= FALSE
  276. + ,.http_major= 1
  277. + ,.http_minor= 1
  278. + ,.method= HTTP_POST
  279. + ,.query_string= ""
  280. + ,.fragment= ""
  281. + ,.request_path= "/"
  282. + ,.request_url= "/"
  283. + ,.num_headers= 1
  284. + ,.headers=
  285. + { { "Transfer-Encoding" , "deflate, chunked" }
  286. + }
  287. + ,.body= "all your base are belong to us"
  288. + ,.num_chunks_complete= 2
  289. + ,.chunk_lengths= { 0x1e }
  290. + }
  291. };
  292. /* * R E S P O N S E S * */
  293. @@ -1950,6 +2003,28 @@
  294. ,.num_chunks_complete= 3
  295. ,.chunk_lengths= { 2, 2 }
  296. }
  297. +#define HTTP_200_MULTI_TE_NOT_LAST_CHUNKED 28
  298. +, {.name= "HTTP 200 response with `chunked` being *not last* Transfer-Encoding"
  299. + ,.type= HTTP_RESPONSE
  300. + ,.raw= "HTTP/1.1 200 OK\r\n"
  301. + "Transfer-Encoding: chunked, identity\r\n"
  302. + "\r\n"
  303. + "2\r\n"
  304. + "OK\r\n"
  305. + "0\r\n"
  306. + "\r\n"
  307. + ,.should_keep_alive= FALSE
  308. + ,.message_complete_on_eof= TRUE
  309. + ,.http_major= 1
  310. + ,.http_minor= 1
  311. + ,.status_code= 200
  312. + ,.response_status= "OK"
  313. + ,.num_headers= 1
  314. + ,.headers= { { "Transfer-Encoding", "chunked, identity" }
  315. + }
  316. + ,.body= "2\r\nOK\r\n0\r\n\r\n"
  317. + ,.num_chunks_complete= 0
  318. + }
  319. };
  320. /* strnlen() is a POSIX.2008 addition. Can't rely on it being available so
  321. @@ -3627,7 +3702,7 @@
  322. parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf));
  323. assert(parsed == strlen(buf));
  324. - buf = "Transfer-Encoding: chunked\r\nContent-Length: 1\r\n\r\n";
  325. + buf = "Transfer-Encoding: anything\r\nContent-Length: 1\r\n\r\n";
  326. size_t buflen = strlen(buf);
  327. parsed = http_parser_execute(&parser, &settings_null, buf, buflen);
  328. @@ -4270,6 +4345,12 @@
  329. "fooba",
  330. HPE_OK);
  331. + // Unknown Transfer-Encoding in request
  332. + test_simple("GET / HTTP/1.1\r\n"
  333. + "Transfer-Encoding: unknown\r\n"
  334. + "\r\n",
  335. + HPE_INVALID_TRANSFER_ENCODING);
  336. +
  337. static const char *all_methods[] = {
  338. "DELETE",
  339. "GET",