http_parser.c 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644
  1. /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to
  5. * deal in the Software without restriction, including without limitation the
  6. * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  7. * sell copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  18. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  19. * IN THE SOFTWARE.
  20. */
  21. #include <http_parser.h>
  22. #include <assert.h>
  23. #include <stddef.h>
  24. #ifndef MIN
  25. # define MIN(a,b) ((a) < (b) ? (a) : (b))
  26. #endif
  27. #define CALLBACK2(FOR) \
  28. do { \
  29. if (settings->on_##FOR) { \
  30. if (0 != settings->on_##FOR(parser)) return (p - data); \
  31. } \
  32. } while (0)
  33. #define MARK(FOR) \
  34. do { \
  35. FOR##_mark = p; \
  36. } while (0)
  37. #define CALLBACK_NOCLEAR(FOR) \
  38. do { \
  39. if (FOR##_mark) { \
  40. if (settings->on_##FOR) { \
  41. if (0 != settings->on_##FOR(parser, \
  42. FOR##_mark, \
  43. p - FOR##_mark)) \
  44. { \
  45. return (p - data); \
  46. } \
  47. } \
  48. } \
  49. } while (0)
  50. #define CALLBACK(FOR) \
  51. do { \
  52. CALLBACK_NOCLEAR(FOR); \
  53. FOR##_mark = NULL; \
  54. } while (0)
  55. #define PROXY_CONNECTION "proxy-connection"
  56. #define CONNECTION "connection"
  57. #define CONTENT_LENGTH "content-length"
  58. #define TRANSFER_ENCODING "transfer-encoding"
  59. #define UPGRADE "upgrade"
  60. #define CHUNKED "chunked"
  61. #define KEEP_ALIVE "keep-alive"
  62. #define CLOSE "close"
  63. static const char *method_strings[] =
  64. { "DELETE"
  65. , "GET"
  66. , "HEAD"
  67. , "POST"
  68. , "PUT"
  69. , "CONNECT"
  70. , "OPTIONS"
  71. , "TRACE"
  72. , "COPY"
  73. , "LOCK"
  74. , "MKCOL"
  75. , "MOVE"
  76. , "PROPFIND"
  77. , "PROPPATCH"
  78. , "UNLOCK"
  79. , "REPORT"
  80. , "MKACTIVITY"
  81. , "CHECKOUT"
  82. , "MERGE"
  83. , "M-SEARCH"
  84. , "NOTIFY"
  85. , "SUBSCRIBE"
  86. , "UNSUBSCRIBE"
  87. };
  88. /* Tokens as defined by rfc 2616. Also lowercases them.
  89. * token = 1*<any CHAR except CTLs or separators>
  90. * separators = "(" | ")" | "<" | ">" | "@"
  91. * | "," | ";" | ":" | "\" | <">
  92. * | "/" | "[" | "]" | "?" | "="
  93. * | "{" | "}" | SP | HT
  94. */
  95. static const char tokens[256] = {
  96. /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
  97. 0, 0, 0, 0, 0, 0, 0, 0,
  98. /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
  99. 0, 0, 0, 0, 0, 0, 0, 0,
  100. /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
  101. 0, 0, 0, 0, 0, 0, 0, 0,
  102. /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
  103. 0, 0, 0, 0, 0, 0, 0, 0,
  104. /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
  105. ' ', '!', '"', '#', '$', '%', '&', '\'',
  106. /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
  107. 0, 0, '*', '+', 0, '-', '.', '/',
  108. /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
  109. '0', '1', '2', '3', '4', '5', '6', '7',
  110. /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
  111. '8', '9', 0, 0, 0, 0, 0, 0,
  112. /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
  113. 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
  114. /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
  115. 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  116. /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
  117. 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
  118. /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
  119. 'x', 'y', 'z', 0, 0, 0, '^', '_',
  120. /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
  121. '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
  122. /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
  123. 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  124. /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
  125. 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
  126. /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
  127. 'x', 'y', 'z', 0, '|', '}', '~', 0 };
  128. static const int8_t unhex[256] =
  129. {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  130. ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  131. ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  132. , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
  133. ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
  134. ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  135. ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
  136. ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  137. };
  138. static const uint8_t normal_url_char[256] = {
  139. /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
  140. 0, 0, 0, 0, 0, 0, 0, 0,
  141. /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
  142. 0, 0, 0, 0, 0, 0, 0, 0,
  143. /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
  144. 0, 0, 0, 0, 0, 0, 0, 0,
  145. /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
  146. 0, 0, 0, 0, 0, 0, 0, 0,
  147. /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
  148. 0, 1, 1, 0, 1, 1, 1, 1,
  149. /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
  150. 1, 1, 1, 1, 1, 1, 1, 1,
  151. /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
  152. 1, 1, 1, 1, 1, 1, 1, 1,
  153. /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
  154. 1, 1, 1, 1, 1, 1, 1, 0,
  155. /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
  156. 1, 1, 1, 1, 1, 1, 1, 1,
  157. /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
  158. 1, 1, 1, 1, 1, 1, 1, 1,
  159. /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
  160. 1, 1, 1, 1, 1, 1, 1, 1,
  161. /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
  162. 1, 1, 1, 1, 1, 1, 1, 1,
  163. /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
  164. 1, 1, 1, 1, 1, 1, 1, 1,
  165. /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
  166. 1, 1, 1, 1, 1, 1, 1, 1,
  167. /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
  168. 1, 1, 1, 1, 1, 1, 1, 1,
  169. /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
  170. 1, 1, 1, 1, 1, 1, 1, 0,
  171. /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
  172. encoded paths. This is out of spec, but clients generate this and most other
  173. HTTP servers support it. We should, too. */
  174. 1, 1, 1, 1, 1, 1, 1, 1,
  175. 1, 1, 1, 1, 1, 1, 1, 1,
  176. 1, 1, 1, 1, 1, 1, 1, 1,
  177. 1, 1, 1, 1, 1, 1, 1, 1,
  178. 1, 1, 1, 1, 1, 1, 1, 1,
  179. 1, 1, 1, 1, 1, 1, 1, 1,
  180. 1, 1, 1, 1, 1, 1, 1, 1,
  181. 1, 1, 1, 1, 1, 1, 1, 1,
  182. 1, 1, 1, 1, 1, 1, 1, 1,
  183. 1, 1, 1, 1, 1, 1, 1, 1,
  184. 1, 1, 1, 1, 1, 1, 1, 1,
  185. 1, 1, 1, 1, 1, 1, 1, 1,
  186. 1, 1, 1, 1, 1, 1, 1, 1,
  187. 1, 1, 1, 1, 1, 1, 1, 1,
  188. 1, 1, 1, 1, 1, 1, 1, 1,
  189. 1, 1, 1, 1, 1, 1, 1, 1 };
  190. enum state
  191. { s_dead = 1 /* important that this is > 0 */
  192. , s_start_req_or_res
  193. , s_res_or_resp_H
  194. , s_start_res
  195. , s_res_H
  196. , s_res_HT
  197. , s_res_HTT
  198. , s_res_HTTP
  199. , s_res_first_http_major
  200. , s_res_http_major
  201. , s_res_first_http_minor
  202. , s_res_http_minor
  203. , s_res_first_status_code
  204. , s_res_status_code
  205. , s_res_status
  206. , s_res_line_almost_done
  207. , s_start_req
  208. , s_req_method
  209. , s_req_spaces_before_url
  210. , s_req_schema
  211. , s_req_schema_slash
  212. , s_req_schema_slash_slash
  213. , s_req_host
  214. , s_req_port
  215. , s_req_path
  216. , s_req_query_string_start
  217. , s_req_query_string
  218. , s_req_fragment_start
  219. , s_req_fragment
  220. , s_req_http_start
  221. , s_req_http_H
  222. , s_req_http_HT
  223. , s_req_http_HTT
  224. , s_req_http_HTTP
  225. , s_req_first_http_major
  226. , s_req_http_major
  227. , s_req_first_http_minor
  228. , s_req_http_minor
  229. , s_req_line_almost_done
  230. , s_header_field_start
  231. , s_header_field
  232. , s_header_value_start
  233. , s_header_value
  234. , s_header_almost_done
  235. , s_chunk_size_start
  236. , s_chunk_size
  237. , s_chunk_parameters
  238. , s_chunk_size_almost_done
  239. , s_headers_almost_done
  240. /* Important: 's_headers_almost_done' must be the last 'header' state. All
  241. * states beyond this must be 'body' states. It is used for overflow
  242. * checking. See the PARSING_HEADER() macro.
  243. */
  244. , s_chunk_data
  245. , s_chunk_data_almost_done
  246. , s_chunk_data_done
  247. , s_body_identity
  248. , s_body_identity_eof
  249. };
  250. #define PARSING_HEADER(state) (state <= s_headers_almost_done)
  251. enum header_states
  252. { h_general = 0
  253. , h_C
  254. , h_CO
  255. , h_CON
  256. , h_matching_connection
  257. , h_matching_proxy_connection
  258. , h_matching_content_length
  259. , h_matching_transfer_encoding
  260. , h_matching_upgrade
  261. , h_connection
  262. , h_content_length
  263. , h_transfer_encoding
  264. , h_upgrade
  265. , h_matching_transfer_encoding_chunked
  266. , h_matching_connection_keep_alive
  267. , h_matching_connection_close
  268. , h_transfer_encoding_chunked
  269. , h_connection_keep_alive
  270. , h_connection_close
  271. };
  272. enum flags
  273. { F_CHUNKED = 1 << 0
  274. , F_CONNECTION_KEEP_ALIVE = 1 << 1
  275. , F_CONNECTION_CLOSE = 1 << 2
  276. , F_TRAILING = 1 << 3
  277. , F_UPGRADE = 1 << 4
  278. , F_SKIPBODY = 1 << 5
  279. };
  280. #define CR '\r'
  281. #define LF '\n'
  282. #define LOWER(c) (unsigned char)(c | 0x20)
  283. #define TOKEN(c) tokens[(unsigned char)c]
  284. #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
  285. #if HTTP_PARSER_STRICT
  286. # define STRICT_CHECK(cond) if (cond) goto error
  287. # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
  288. #else
  289. # define STRICT_CHECK(cond)
  290. # define NEW_MESSAGE() start_state
  291. #endif
  292. size_t http_parser_execute (http_parser *parser,
  293. const http_parser_settings *settings,
  294. const char *data,
  295. size_t len)
  296. {
  297. char c, ch;
  298. const char *p = data, *pe;
  299. int64_t to_read;
  300. enum state state = (enum state) parser->state;
  301. enum header_states header_state = (enum header_states) parser->header_state;
  302. uint64_t index = parser->index;
  303. uint64_t nread = parser->nread;
  304. if (len == 0) {
  305. switch (state) {
  306. case s_body_identity_eof:
  307. CALLBACK2(message_complete);
  308. return 0;
  309. case s_dead:
  310. case s_start_req_or_res:
  311. case s_start_res:
  312. case s_start_req:
  313. return 0;
  314. default:
  315. return 1; // error
  316. }
  317. }
  318. /* technically we could combine all of these (except for url_mark) into one
  319. variable, saving stack space, but it seems more clear to have them
  320. separated. */
  321. const char *header_field_mark = 0;
  322. const char *header_value_mark = 0;
  323. const char *fragment_mark = 0;
  324. const char *query_string_mark = 0;
  325. const char *path_mark = 0;
  326. const char *url_mark = 0;
  327. if (state == s_header_field)
  328. header_field_mark = data;
  329. if (state == s_header_value)
  330. header_value_mark = data;
  331. if (state == s_req_fragment)
  332. fragment_mark = data;
  333. if (state == s_req_query_string)
  334. query_string_mark = data;
  335. if (state == s_req_path)
  336. path_mark = data;
  337. if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
  338. || state == s_req_schema_slash_slash || state == s_req_port
  339. || state == s_req_query_string_start || state == s_req_query_string
  340. || state == s_req_host
  341. || state == s_req_fragment_start || state == s_req_fragment)
  342. url_mark = data;
  343. for (p=data, pe=data+len; p != pe; p++) {
  344. ch = *p;
  345. if (PARSING_HEADER(state)) {
  346. ++nread;
  347. /* Buffer overflow attack */
  348. if (nread > HTTP_MAX_HEADER_SIZE) goto error;
  349. }
  350. switch (state) {
  351. case s_dead:
  352. /* this state is used after a 'Connection: close' message
  353. * the parser will error out if it reads another message
  354. */
  355. goto error;
  356. case s_start_req_or_res:
  357. {
  358. if (ch == CR || ch == LF)
  359. break;
  360. parser->flags = 0;
  361. parser->content_length = -1;
  362. CALLBACK2(message_begin);
  363. if (ch == 'H')
  364. state = s_res_or_resp_H;
  365. else {
  366. parser->type = HTTP_REQUEST;
  367. goto start_req_method_assign;
  368. }
  369. break;
  370. }
  371. case s_res_or_resp_H:
  372. if (ch == 'T') {
  373. parser->type = HTTP_RESPONSE;
  374. state = s_res_HT;
  375. } else {
  376. if (ch != 'E') goto error;
  377. parser->type = HTTP_REQUEST;
  378. parser->method = HTTP_HEAD;
  379. index = 2;
  380. state = s_req_method;
  381. }
  382. break;
  383. case s_start_res:
  384. {
  385. parser->flags = 0;
  386. parser->content_length = -1;
  387. CALLBACK2(message_begin);
  388. switch (ch) {
  389. case 'H':
  390. state = s_res_H;
  391. break;
  392. case CR:
  393. case LF:
  394. break;
  395. default:
  396. goto error;
  397. }
  398. break;
  399. }
  400. case s_res_H:
  401. STRICT_CHECK(ch != 'T');
  402. state = s_res_HT;
  403. break;
  404. case s_res_HT:
  405. STRICT_CHECK(ch != 'T');
  406. state = s_res_HTT;
  407. break;
  408. case s_res_HTT:
  409. STRICT_CHECK(ch != 'P');
  410. state = s_res_HTTP;
  411. break;
  412. case s_res_HTTP:
  413. STRICT_CHECK(ch != '/');
  414. state = s_res_first_http_major;
  415. break;
  416. case s_res_first_http_major:
  417. if (ch < '1' || ch > '9') goto error;
  418. parser->http_major = ch - '0';
  419. state = s_res_http_major;
  420. break;
  421. /* major HTTP version or dot */
  422. case s_res_http_major:
  423. {
  424. if (ch == '.') {
  425. state = s_res_first_http_minor;
  426. break;
  427. }
  428. if (ch < '0' || ch > '9') goto error;
  429. parser->http_major *= 10;
  430. parser->http_major += ch - '0';
  431. if (parser->http_major > 999) goto error;
  432. break;
  433. }
  434. /* first digit of minor HTTP version */
  435. case s_res_first_http_minor:
  436. if (ch < '0' || ch > '9') goto error;
  437. parser->http_minor = ch - '0';
  438. state = s_res_http_minor;
  439. break;
  440. /* minor HTTP version or end of request line */
  441. case s_res_http_minor:
  442. {
  443. if (ch == ' ') {
  444. state = s_res_first_status_code;
  445. break;
  446. }
  447. if (ch < '0' || ch > '9') goto error;
  448. parser->http_minor *= 10;
  449. parser->http_minor += ch - '0';
  450. if (parser->http_minor > 999) goto error;
  451. break;
  452. }
  453. case s_res_first_status_code:
  454. {
  455. if (ch < '0' || ch > '9') {
  456. if (ch == ' ') {
  457. break;
  458. }
  459. goto error;
  460. }
  461. parser->status_code = ch - '0';
  462. state = s_res_status_code;
  463. break;
  464. }
  465. case s_res_status_code:
  466. {
  467. if (ch < '0' || ch > '9') {
  468. switch (ch) {
  469. case ' ':
  470. state = s_res_status;
  471. break;
  472. case CR:
  473. state = s_res_line_almost_done;
  474. break;
  475. case LF:
  476. state = s_header_field_start;
  477. break;
  478. default:
  479. goto error;
  480. }
  481. break;
  482. }
  483. parser->status_code *= 10;
  484. parser->status_code += ch - '0';
  485. if (parser->status_code > 999) goto error;
  486. break;
  487. }
  488. case s_res_status:
  489. /* the human readable status. e.g. "NOT FOUND"
  490. * we are not humans so just ignore this */
  491. if (ch == CR) {
  492. state = s_res_line_almost_done;
  493. break;
  494. }
  495. if (ch == LF) {
  496. state = s_header_field_start;
  497. break;
  498. }
  499. break;
  500. case s_res_line_almost_done:
  501. STRICT_CHECK(ch != LF);
  502. state = s_header_field_start;
  503. break;
  504. case s_start_req:
  505. {
  506. if (ch == CR || ch == LF)
  507. break;
  508. parser->flags = 0;
  509. parser->content_length = -1;
  510. CALLBACK2(message_begin);
  511. if (ch < 'A' || 'Z' < ch) goto error;
  512. start_req_method_assign:
  513. parser->method = (enum http_method) 0;
  514. index = 1;
  515. switch (ch) {
  516. case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
  517. case 'D': parser->method = HTTP_DELETE; break;
  518. case 'G': parser->method = HTTP_GET; break;
  519. case 'H': parser->method = HTTP_HEAD; break;
  520. case 'L': parser->method = HTTP_LOCK; break;
  521. case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
  522. case 'N': parser->method = HTTP_NOTIFY; break;
  523. case 'O': parser->method = HTTP_OPTIONS; break;
  524. case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
  525. case 'R': parser->method = HTTP_REPORT; break;
  526. case 'S': parser->method = HTTP_SUBSCRIBE; break;
  527. case 'T': parser->method = HTTP_TRACE; break;
  528. case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
  529. default: goto error;
  530. }
  531. state = s_req_method;
  532. break;
  533. }
  534. case s_req_method:
  535. {
  536. if (ch == '\0')
  537. goto error;
  538. const char *matcher = method_strings[parser->method];
  539. if (ch == ' ' && matcher[index] == '\0') {
  540. state = s_req_spaces_before_url;
  541. } else if (ch == matcher[index]) {
  542. ; /* nada */
  543. } else if (parser->method == HTTP_CONNECT) {
  544. if (index == 1 && ch == 'H') {
  545. parser->method = HTTP_CHECKOUT;
  546. } else if (index == 2 && ch == 'P') {
  547. parser->method = HTTP_COPY;
  548. }
  549. } else if (parser->method == HTTP_MKCOL) {
  550. if (index == 1 && ch == 'O') {
  551. parser->method = HTTP_MOVE;
  552. } else if (index == 1 && ch == 'E') {
  553. parser->method = HTTP_MERGE;
  554. } else if (index == 1 && ch == '-') {
  555. parser->method = HTTP_MSEARCH;
  556. } else if (index == 2 && ch == 'A') {
  557. parser->method = HTTP_MKACTIVITY;
  558. }
  559. } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
  560. parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
  561. } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
  562. parser->method = HTTP_PUT;
  563. } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
  564. parser->method = HTTP_UNSUBSCRIBE;
  565. } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
  566. parser->method = HTTP_PROPPATCH;
  567. } else {
  568. goto error;
  569. }
  570. ++index;
  571. break;
  572. }
  573. case s_req_spaces_before_url:
  574. {
  575. if (ch == ' ') break;
  576. if (ch == '/' || ch == '*') {
  577. MARK(url);
  578. MARK(path);
  579. state = s_req_path;
  580. break;
  581. }
  582. c = LOWER(ch);
  583. if (c >= 'a' && c <= 'z') {
  584. MARK(url);
  585. state = s_req_schema;
  586. break;
  587. }
  588. goto error;
  589. }
  590. case s_req_schema:
  591. {
  592. c = LOWER(ch);
  593. if (c >= 'a' && c <= 'z') break;
  594. if (ch == ':') {
  595. state = s_req_schema_slash;
  596. break;
  597. } else if (ch == '.') {
  598. state = s_req_host;
  599. break;
  600. } else if ('0' <= ch && ch <= '9') {
  601. state = s_req_host;
  602. break;
  603. }
  604. goto error;
  605. }
  606. case s_req_schema_slash:
  607. STRICT_CHECK(ch != '/');
  608. state = s_req_schema_slash_slash;
  609. break;
  610. case s_req_schema_slash_slash:
  611. STRICT_CHECK(ch != '/');
  612. state = s_req_host;
  613. break;
  614. case s_req_host:
  615. {
  616. c = LOWER(ch);
  617. if (c >= 'a' && c <= 'z') break;
  618. if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
  619. switch (ch) {
  620. case ':':
  621. state = s_req_port;
  622. break;
  623. case '/':
  624. MARK(path);
  625. state = s_req_path;
  626. break;
  627. case ' ':
  628. /* The request line looks like:
  629. * "GET http://foo.bar.com HTTP/1.1"
  630. * That is, there is no path.
  631. */
  632. CALLBACK(url);
  633. state = s_req_http_start;
  634. break;
  635. case '?':
  636. state = s_req_query_string_start;
  637. break;
  638. default:
  639. goto error;
  640. }
  641. break;
  642. }
  643. case s_req_port:
  644. {
  645. if (ch >= '0' && ch <= '9') break;
  646. switch (ch) {
  647. case '/':
  648. MARK(path);
  649. state = s_req_path;
  650. break;
  651. case ' ':
  652. /* The request line looks like:
  653. * "GET http://foo.bar.com:1234 HTTP/1.1"
  654. * That is, there is no path.
  655. */
  656. CALLBACK(url);
  657. state = s_req_http_start;
  658. break;
  659. case '?':
  660. state = s_req_query_string_start;
  661. break;
  662. default:
  663. goto error;
  664. }
  665. break;
  666. }
  667. case s_req_path:
  668. {
  669. if (normal_url_char[(unsigned char)ch]) break;
  670. switch (ch) {
  671. case ' ':
  672. CALLBACK(url);
  673. CALLBACK(path);
  674. state = s_req_http_start;
  675. break;
  676. case CR:
  677. CALLBACK(url);
  678. CALLBACK(path);
  679. parser->http_major = 0;
  680. parser->http_minor = 9;
  681. state = s_req_line_almost_done;
  682. break;
  683. case LF:
  684. CALLBACK(url);
  685. CALLBACK(path);
  686. parser->http_major = 0;
  687. parser->http_minor = 9;
  688. state = s_header_field_start;
  689. break;
  690. case '?':
  691. CALLBACK(path);
  692. state = s_req_query_string_start;
  693. break;
  694. case '#':
  695. CALLBACK(path);
  696. state = s_req_fragment_start;
  697. break;
  698. default:
  699. goto error;
  700. }
  701. break;
  702. }
  703. case s_req_query_string_start:
  704. {
  705. if (normal_url_char[(unsigned char)ch]) {
  706. MARK(query_string);
  707. state = s_req_query_string;
  708. break;
  709. }
  710. switch (ch) {
  711. case '?':
  712. break; /* XXX ignore extra '?' ... is this right? */
  713. case ' ':
  714. CALLBACK(url);
  715. state = s_req_http_start;
  716. break;
  717. case CR:
  718. CALLBACK(url);
  719. parser->http_major = 0;
  720. parser->http_minor = 9;
  721. state = s_req_line_almost_done;
  722. break;
  723. case LF:
  724. CALLBACK(url);
  725. parser->http_major = 0;
  726. parser->http_minor = 9;
  727. state = s_header_field_start;
  728. break;
  729. case '#':
  730. state = s_req_fragment_start;
  731. break;
  732. default:
  733. goto error;
  734. }
  735. break;
  736. }
  737. case s_req_query_string:
  738. {
  739. if (normal_url_char[(unsigned char)ch]) break;
  740. switch (ch) {
  741. case '?':
  742. /* allow extra '?' in query string */
  743. break;
  744. case ' ':
  745. CALLBACK(url);
  746. CALLBACK(query_string);
  747. state = s_req_http_start;
  748. break;
  749. case CR:
  750. CALLBACK(url);
  751. CALLBACK(query_string);
  752. parser->http_major = 0;
  753. parser->http_minor = 9;
  754. state = s_req_line_almost_done;
  755. break;
  756. case LF:
  757. CALLBACK(url);
  758. CALLBACK(query_string);
  759. parser->http_major = 0;
  760. parser->http_minor = 9;
  761. state = s_header_field_start;
  762. break;
  763. case '#':
  764. CALLBACK(query_string);
  765. state = s_req_fragment_start;
  766. break;
  767. default:
  768. goto error;
  769. }
  770. break;
  771. }
  772. case s_req_fragment_start:
  773. {
  774. if (normal_url_char[(unsigned char)ch]) {
  775. MARK(fragment);
  776. state = s_req_fragment;
  777. break;
  778. }
  779. switch (ch) {
  780. case ' ':
  781. CALLBACK(url);
  782. state = s_req_http_start;
  783. break;
  784. case CR:
  785. CALLBACK(url);
  786. parser->http_major = 0;
  787. parser->http_minor = 9;
  788. state = s_req_line_almost_done;
  789. break;
  790. case LF:
  791. CALLBACK(url);
  792. parser->http_major = 0;
  793. parser->http_minor = 9;
  794. state = s_header_field_start;
  795. break;
  796. case '?':
  797. MARK(fragment);
  798. state = s_req_fragment;
  799. break;
  800. case '#':
  801. break;
  802. default:
  803. goto error;
  804. }
  805. break;
  806. }
  807. case s_req_fragment:
  808. {
  809. if (normal_url_char[(unsigned char)ch]) break;
  810. switch (ch) {
  811. case ' ':
  812. CALLBACK(url);
  813. CALLBACK(fragment);
  814. state = s_req_http_start;
  815. break;
  816. case CR:
  817. CALLBACK(url);
  818. CALLBACK(fragment);
  819. parser->http_major = 0;
  820. parser->http_minor = 9;
  821. state = s_req_line_almost_done;
  822. break;
  823. case LF:
  824. CALLBACK(url);
  825. CALLBACK(fragment);
  826. parser->http_major = 0;
  827. parser->http_minor = 9;
  828. state = s_header_field_start;
  829. break;
  830. case '?':
  831. case '#':
  832. break;
  833. default:
  834. goto error;
  835. }
  836. break;
  837. }
  838. case s_req_http_start:
  839. switch (ch) {
  840. case 'H':
  841. state = s_req_http_H;
  842. break;
  843. case ' ':
  844. break;
  845. default:
  846. goto error;
  847. }
  848. break;
  849. case s_req_http_H:
  850. STRICT_CHECK(ch != 'T');
  851. state = s_req_http_HT;
  852. break;
  853. case s_req_http_HT:
  854. STRICT_CHECK(ch != 'T');
  855. state = s_req_http_HTT;
  856. break;
  857. case s_req_http_HTT:
  858. STRICT_CHECK(ch != 'P');
  859. state = s_req_http_HTTP;
  860. break;
  861. case s_req_http_HTTP:
  862. STRICT_CHECK(ch != '/');
  863. state = s_req_first_http_major;
  864. break;
  865. /* first digit of major HTTP version */
  866. case s_req_first_http_major:
  867. if (ch < '1' || ch > '9') goto error;
  868. parser->http_major = ch - '0';
  869. state = s_req_http_major;
  870. break;
  871. /* major HTTP version or dot */
  872. case s_req_http_major:
  873. {
  874. if (ch == '.') {
  875. state = s_req_first_http_minor;
  876. break;
  877. }
  878. if (ch < '0' || ch > '9') goto error;
  879. parser->http_major *= 10;
  880. parser->http_major += ch - '0';
  881. if (parser->http_major > 999) goto error;
  882. break;
  883. }
  884. /* first digit of minor HTTP version */
  885. case s_req_first_http_minor:
  886. if (ch < '0' || ch > '9') goto error;
  887. parser->http_minor = ch - '0';
  888. state = s_req_http_minor;
  889. break;
  890. /* minor HTTP version or end of request line */
  891. case s_req_http_minor:
  892. {
  893. if (ch == CR) {
  894. state = s_req_line_almost_done;
  895. break;
  896. }
  897. if (ch == LF) {
  898. state = s_header_field_start;
  899. break;
  900. }
  901. /* XXX allow spaces after digit? */
  902. if (ch < '0' || ch > '9') goto error;
  903. parser->http_minor *= 10;
  904. parser->http_minor += ch - '0';
  905. if (parser->http_minor > 999) goto error;
  906. break;
  907. }
  908. /* end of request line */
  909. case s_req_line_almost_done:
  910. {
  911. if (ch != LF) goto error;
  912. state = s_header_field_start;
  913. break;
  914. }
  915. case s_header_field_start:
  916. {
  917. if (ch == CR) {
  918. state = s_headers_almost_done;
  919. break;
  920. }
  921. if (ch == LF) {
  922. /* they might be just sending \n instead of \r\n so this would be
  923. * the second \n to denote the end of headers*/
  924. state = s_headers_almost_done;
  925. goto headers_almost_done;
  926. }
  927. c = TOKEN(ch);
  928. if (!c) goto error;
  929. MARK(header_field);
  930. index = 0;
  931. state = s_header_field;
  932. switch (c) {
  933. case 'c':
  934. header_state = h_C;
  935. break;
  936. case 'p':
  937. header_state = h_matching_proxy_connection;
  938. break;
  939. case 't':
  940. header_state = h_matching_transfer_encoding;
  941. break;
  942. case 'u':
  943. header_state = h_matching_upgrade;
  944. break;
  945. default:
  946. header_state = h_general;
  947. break;
  948. }
  949. break;
  950. }
  951. case s_header_field:
  952. {
  953. c = TOKEN(ch);
  954. if (c) {
  955. switch (header_state) {
  956. case h_general:
  957. break;
  958. case h_C:
  959. index++;
  960. header_state = (c == 'o' ? h_CO : h_general);
  961. break;
  962. case h_CO:
  963. index++;
  964. header_state = (c == 'n' ? h_CON : h_general);
  965. break;
  966. case h_CON:
  967. index++;
  968. switch (c) {
  969. case 'n':
  970. header_state = h_matching_connection;
  971. break;
  972. case 't':
  973. header_state = h_matching_content_length;
  974. break;
  975. default:
  976. header_state = h_general;
  977. break;
  978. }
  979. break;
  980. /* connection */
  981. case h_matching_connection:
  982. index++;
  983. if (index > sizeof(CONNECTION)-1
  984. || c != CONNECTION[index]) {
  985. header_state = h_general;
  986. } else if (index == sizeof(CONNECTION)-2) {
  987. header_state = h_connection;
  988. }
  989. break;
  990. /* proxy-connection */
  991. case h_matching_proxy_connection:
  992. index++;
  993. if (index > sizeof(PROXY_CONNECTION)-1
  994. || c != PROXY_CONNECTION[index]) {
  995. header_state = h_general;
  996. } else if (index == sizeof(PROXY_CONNECTION)-2) {
  997. header_state = h_connection;
  998. }
  999. break;
  1000. /* content-length */
  1001. case h_matching_content_length:
  1002. index++;
  1003. if (index > sizeof(CONTENT_LENGTH)-1
  1004. || c != CONTENT_LENGTH[index]) {
  1005. header_state = h_general;
  1006. } else if (index == sizeof(CONTENT_LENGTH)-2) {
  1007. header_state = h_content_length;
  1008. }
  1009. break;
  1010. /* transfer-encoding */
  1011. case h_matching_transfer_encoding:
  1012. index++;
  1013. if (index > sizeof(TRANSFER_ENCODING)-1
  1014. || c != TRANSFER_ENCODING[index]) {
  1015. header_state = h_general;
  1016. } else if (index == sizeof(TRANSFER_ENCODING)-2) {
  1017. header_state = h_transfer_encoding;
  1018. }
  1019. break;
  1020. /* upgrade */
  1021. case h_matching_upgrade:
  1022. index++;
  1023. if (index > sizeof(UPGRADE)-1
  1024. || c != UPGRADE[index]) {
  1025. header_state = h_general;
  1026. } else if (index == sizeof(UPGRADE)-2) {
  1027. header_state = h_upgrade;
  1028. }
  1029. break;
  1030. case h_connection:
  1031. case h_content_length:
  1032. case h_transfer_encoding:
  1033. case h_upgrade:
  1034. if (ch != ' ') header_state = h_general;
  1035. break;
  1036. default:
  1037. assert(0 && "Unknown header_state");
  1038. break;
  1039. }
  1040. break;
  1041. }
  1042. if (ch == ':') {
  1043. CALLBACK(header_field);
  1044. state = s_header_value_start;
  1045. break;
  1046. }
  1047. if (ch == CR) {
  1048. state = s_header_almost_done;
  1049. CALLBACK(header_field);
  1050. break;
  1051. }
  1052. if (ch == LF) {
  1053. CALLBACK(header_field);
  1054. state = s_header_field_start;
  1055. break;
  1056. }
  1057. goto error;
  1058. }
  1059. case s_header_value_start:
  1060. {
  1061. if (ch == ' ') break;
  1062. MARK(header_value);
  1063. state = s_header_value;
  1064. index = 0;
  1065. c = LOWER(ch);
  1066. if (ch == CR) {
  1067. CALLBACK(header_value);
  1068. header_state = h_general;
  1069. state = s_header_almost_done;
  1070. break;
  1071. }
  1072. if (ch == LF) {
  1073. CALLBACK(header_value);
  1074. state = s_header_field_start;
  1075. break;
  1076. }
  1077. switch (header_state) {
  1078. case h_upgrade:
  1079. parser->flags |= F_UPGRADE;
  1080. header_state = h_general;
  1081. break;
  1082. case h_transfer_encoding:
  1083. /* looking for 'Transfer-Encoding: chunked' */
  1084. if ('c' == c) {
  1085. header_state = h_matching_transfer_encoding_chunked;
  1086. } else {
  1087. header_state = h_general;
  1088. }
  1089. break;
  1090. case h_content_length:
  1091. if (ch < '0' || ch > '9') goto error;
  1092. parser->content_length = ch - '0';
  1093. break;
  1094. case h_connection:
  1095. /* looking for 'Connection: keep-alive' */
  1096. if (c == 'k') {
  1097. header_state = h_matching_connection_keep_alive;
  1098. /* looking for 'Connection: close' */
  1099. } else if (c == 'c') {
  1100. header_state = h_matching_connection_close;
  1101. } else {
  1102. header_state = h_general;
  1103. }
  1104. break;
  1105. default:
  1106. header_state = h_general;
  1107. break;
  1108. }
  1109. break;
  1110. }
  1111. case s_header_value:
  1112. {
  1113. c = LOWER(ch);
  1114. if (ch == CR) {
  1115. CALLBACK(header_value);
  1116. state = s_header_almost_done;
  1117. break;
  1118. }
  1119. if (ch == LF) {
  1120. CALLBACK(header_value);
  1121. goto header_almost_done;
  1122. }
  1123. switch (header_state) {
  1124. case h_general:
  1125. break;
  1126. case h_connection:
  1127. case h_transfer_encoding:
  1128. assert(0 && "Shouldn't get here.");
  1129. break;
  1130. case h_content_length:
  1131. if (ch == ' ') break;
  1132. if (ch < '0' || ch > '9') goto error;
  1133. parser->content_length *= 10;
  1134. parser->content_length += ch - '0';
  1135. break;
  1136. /* Transfer-Encoding: chunked */
  1137. case h_matching_transfer_encoding_chunked:
  1138. index++;
  1139. if (index > sizeof(CHUNKED)-1
  1140. || c != CHUNKED[index]) {
  1141. header_state = h_general;
  1142. } else if (index == sizeof(CHUNKED)-2) {
  1143. header_state = h_transfer_encoding_chunked;
  1144. }
  1145. break;
  1146. /* looking for 'Connection: keep-alive' */
  1147. case h_matching_connection_keep_alive:
  1148. index++;
  1149. if (index > sizeof(KEEP_ALIVE)-1
  1150. || c != KEEP_ALIVE[index]) {
  1151. header_state = h_general;
  1152. } else if (index == sizeof(KEEP_ALIVE)-2) {
  1153. header_state = h_connection_keep_alive;
  1154. }
  1155. break;
  1156. /* looking for 'Connection: close' */
  1157. case h_matching_connection_close:
  1158. index++;
  1159. if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
  1160. header_state = h_general;
  1161. } else if (index == sizeof(CLOSE)-2) {
  1162. header_state = h_connection_close;
  1163. }
  1164. break;
  1165. case h_transfer_encoding_chunked:
  1166. case h_connection_keep_alive:
  1167. case h_connection_close:
  1168. if (ch != ' ') header_state = h_general;
  1169. break;
  1170. default:
  1171. state = s_header_value;
  1172. header_state = h_general;
  1173. break;
  1174. }
  1175. break;
  1176. }
  1177. case s_header_almost_done:
  1178. header_almost_done:
  1179. {
  1180. STRICT_CHECK(ch != LF);
  1181. state = s_header_field_start;
  1182. switch (header_state) {
  1183. case h_connection_keep_alive:
  1184. parser->flags |= F_CONNECTION_KEEP_ALIVE;
  1185. break;
  1186. case h_connection_close:
  1187. parser->flags |= F_CONNECTION_CLOSE;
  1188. break;
  1189. case h_transfer_encoding_chunked:
  1190. parser->flags |= F_CHUNKED;
  1191. break;
  1192. default:
  1193. break;
  1194. }
  1195. break;
  1196. }
  1197. case s_headers_almost_done:
  1198. headers_almost_done:
  1199. {
  1200. STRICT_CHECK(ch != LF);
  1201. if (parser->flags & F_TRAILING) {
  1202. /* End of a chunked request */
  1203. CALLBACK2(message_complete);
  1204. state = NEW_MESSAGE();
  1205. break;
  1206. }
  1207. nread = 0;
  1208. if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
  1209. parser->upgrade = 1;
  1210. }
  1211. /* Here we call the headers_complete callback. This is somewhat
  1212. * different than other callbacks because if the user returns 1, we
  1213. * will interpret that as saying that this message has no body. This
  1214. * is needed for the annoying case of recieving a response to a HEAD
  1215. * request.
  1216. */
  1217. if (settings->on_headers_complete) {
  1218. switch (settings->on_headers_complete(parser)) {
  1219. case 0:
  1220. break;
  1221. case 1:
  1222. parser->flags |= F_SKIPBODY;
  1223. break;
  1224. default:
  1225. parser->state = state;
  1226. return p - data; /* Error */
  1227. }
  1228. }
  1229. /* Exit, the rest of the connect is in a different protocol. */
  1230. if (parser->upgrade) {
  1231. CALLBACK2(message_complete);
  1232. return (p - data);
  1233. }
  1234. if (parser->flags & F_SKIPBODY) {
  1235. CALLBACK2(message_complete);
  1236. state = NEW_MESSAGE();
  1237. } else if (parser->flags & F_CHUNKED) {
  1238. /* chunked encoding - ignore Content-Length header */
  1239. state = s_chunk_size_start;
  1240. } else {
  1241. if (parser->content_length == 0) {
  1242. /* Content-Length header given but zero: Content-Length: 0\r\n */
  1243. CALLBACK2(message_complete);
  1244. state = NEW_MESSAGE();
  1245. } else if (parser->content_length > 0) {
  1246. /* Content-Length header given and non-zero */
  1247. state = s_body_identity;
  1248. } else {
  1249. if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
  1250. /* Assume content-length 0 - read the next */
  1251. CALLBACK2(message_complete);
  1252. state = NEW_MESSAGE();
  1253. } else {
  1254. /* Read body until EOF */
  1255. state = s_body_identity_eof;
  1256. }
  1257. }
  1258. }
  1259. break;
  1260. }
  1261. case s_body_identity:
  1262. to_read = MIN(pe - p, (int64_t)parser->content_length);
  1263. if (to_read > 0) {
  1264. if (settings->on_body) settings->on_body(parser, p, to_read);
  1265. p += to_read - 1;
  1266. parser->content_length -= to_read;
  1267. if (parser->content_length == 0) {
  1268. CALLBACK2(message_complete);
  1269. state = NEW_MESSAGE();
  1270. }
  1271. }
  1272. break;
  1273. /* read until EOF */
  1274. case s_body_identity_eof:
  1275. to_read = pe - p;
  1276. if (to_read > 0) {
  1277. if (settings->on_body) settings->on_body(parser, p, to_read);
  1278. p += to_read - 1;
  1279. }
  1280. break;
  1281. case s_chunk_size_start:
  1282. {
  1283. assert(nread == 1);
  1284. assert(parser->flags & F_CHUNKED);
  1285. c = unhex[(unsigned char)ch];
  1286. if (c == -1) goto error;
  1287. parser->content_length = c;
  1288. state = s_chunk_size;
  1289. break;
  1290. }
  1291. case s_chunk_size:
  1292. {
  1293. assert(parser->flags & F_CHUNKED);
  1294. if (ch == CR) {
  1295. state = s_chunk_size_almost_done;
  1296. break;
  1297. }
  1298. c = unhex[(unsigned char)ch];
  1299. if (c == -1) {
  1300. if (ch == ';' || ch == ' ') {
  1301. state = s_chunk_parameters;
  1302. break;
  1303. }
  1304. goto error;
  1305. }
  1306. parser->content_length *= 16;
  1307. parser->content_length += c;
  1308. break;
  1309. }
  1310. case s_chunk_parameters:
  1311. {
  1312. assert(parser->flags & F_CHUNKED);
  1313. /* just ignore this shit. TODO check for overflow */
  1314. if (ch == CR) {
  1315. state = s_chunk_size_almost_done;
  1316. break;
  1317. }
  1318. break;
  1319. }
  1320. case s_chunk_size_almost_done:
  1321. {
  1322. assert(parser->flags & F_CHUNKED);
  1323. STRICT_CHECK(ch != LF);
  1324. nread = 0;
  1325. if (parser->content_length == 0) {
  1326. parser->flags |= F_TRAILING;
  1327. state = s_header_field_start;
  1328. } else {
  1329. state = s_chunk_data;
  1330. }
  1331. break;
  1332. }
  1333. case s_chunk_data:
  1334. {
  1335. assert(parser->flags & F_CHUNKED);
  1336. to_read = MIN(pe - p, (int64_t)(parser->content_length));
  1337. if (to_read > 0) {
  1338. if (settings->on_body) settings->on_body(parser, p, to_read);
  1339. p += to_read - 1;
  1340. }
  1341. if (to_read == parser->content_length) {
  1342. state = s_chunk_data_almost_done;
  1343. }
  1344. parser->content_length -= to_read;
  1345. break;
  1346. }
  1347. case s_chunk_data_almost_done:
  1348. assert(parser->flags & F_CHUNKED);
  1349. STRICT_CHECK(ch != CR);
  1350. state = s_chunk_data_done;
  1351. break;
  1352. case s_chunk_data_done:
  1353. assert(parser->flags & F_CHUNKED);
  1354. STRICT_CHECK(ch != LF);
  1355. state = s_chunk_size_start;
  1356. break;
  1357. default:
  1358. assert(0 && "unhandled state");
  1359. goto error;
  1360. }
  1361. }
  1362. CALLBACK_NOCLEAR(header_field);
  1363. CALLBACK_NOCLEAR(header_value);
  1364. CALLBACK_NOCLEAR(fragment);
  1365. CALLBACK_NOCLEAR(query_string);
  1366. CALLBACK_NOCLEAR(path);
  1367. CALLBACK_NOCLEAR(url);
  1368. parser->state = state;
  1369. parser->header_state = header_state;
  1370. parser->index = index;
  1371. parser->nread = nread;
  1372. return len;
  1373. error:
  1374. parser->state = s_dead;
  1375. return (p - data);
  1376. }
  1377. int
  1378. http_should_keep_alive (http_parser *parser)
  1379. {
  1380. if (parser->http_major > 0 && parser->http_minor > 0) {
  1381. /* HTTP/1.1 */
  1382. if (parser->flags & F_CONNECTION_CLOSE) {
  1383. return 0;
  1384. } else {
  1385. return 1;
  1386. }
  1387. } else {
  1388. /* HTTP/1.0 or earlier */
  1389. if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
  1390. return 1;
  1391. } else {
  1392. return 0;
  1393. }
  1394. }
  1395. }
  1396. const char * http_method_str (enum http_method m)
  1397. {
  1398. return method_strings[m];
  1399. }
  1400. void
  1401. http_parser_init (http_parser *parser, enum http_parser_type t)
  1402. {
  1403. parser->type = t;
  1404. parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
  1405. parser->nread = 0;
  1406. parser->upgrade = 0;
  1407. parser->flags = 0;
  1408. parser->method = 0;
  1409. }