Browse Source

Merge upstream version 2.7.1

Christoph Biedl 6 years ago
parent
commit
ac9d9f915a
13 changed files with 1888 additions and 436 deletions
  1. 17 1
      .gitignore
  2. 1 0
      .mailmap
  3. 1 1
      .travis.yml
  4. 27 0
      AUTHORS
  5. 0 4
      CONTRIBUTIONS
  6. 83 9
      Makefile
  7. 98 32
      README.md
  8. 111 0
      bench.c
  9. 8 4
      contrib/parsertrace.c
  10. 9 6
      contrib/url_parser.c
  11. 585 290
      http_parser.c
  12. 90 32
      http_parser.h
  13. 858 57
      test.c

+ 17 - 1
.gitignore

@@ -5,10 +5,26 @@ tags
 test
 test_g
 test_fast
+bench
 url_parser
 parsertrace
 parsertrace_g
 *.mk
 *.Makefile
-*.so
+*.so.*
+*.exe.*
+*.exe
 *.a
+
+
+# Visual Studio uglies
+*.suo
+*.sln
+*.vcxproj
+*.vcxproj.filters
+*.vcxproj.user
+*.opensdf
+*.ncrunchsolution*
+*.sdf
+*.vsp
+*.psess

+ 1 - 0
.mailmap

@@ -5,3 +5,4 @@ Salman Haq <salman.haq@asti-usa.com>
 Simon Zimmermann <simonz05@gmail.com>
 Thomas LE ROUX <thomas@november-eleven.fr> LE ROUX Thomas <thomas@procheo.fr>
 Thomas LE ROUX <thomas@november-eleven.fr> Thomas LE ROUX <thomas@procheo.fr>
+Fedor Indutny <fedor@indutny.com>

+ 1 - 1
.travis.yml

@@ -10,4 +10,4 @@ script:
 notifications:
   email: false
   irc:
-    - "irc.freenode.net#libuv"
+    - "irc.freenode.net#node-ci"

+ 27 - 0
AUTHORS

@@ -39,3 +39,30 @@ BogDan Vatra <bogdan@kde.org>
 Peter Faiman <peter@thepicard.org>
 Corey Richardson <corey@octayn.net>
 Tóth Tamás <tomika_nospam@freemail.hu>
+Cam Swords <cam.swords@gmail.com>
+Chris Dickinson <christopher.s.dickinson@gmail.com>
+Uli Köhler <ukoehler@btronik.de>
+Charlie Somerville <charlie@charliesomerville.com>
+Patrik Stutz <patrik.stutz@gmail.com>
+Fedor Indutny <fedor.indutny@gmail.com>
+runner <runner.mei@gmail.com>
+Alexis Campailla <alexis@janeasystems.com>
+David Wragg <david@wragg.org>
+Vinnie Falco <vinnie.falco@gmail.com>
+Alex Butum <alexbutum@linux.com>
+Rex Feng <rexfeng@gmail.com>
+Alex Kocharin <alex@kocharin.ru>
+Mark Koopman <markmontymark@yahoo.com>
+Helge Heß <me@helgehess.eu>
+Alexis La Goutte <alexis.lagoutte@gmail.com>
+George Miroshnykov <george.miroshnykov@gmail.com>
+Maciej Małecki <me@mmalecki.com>
+Marc O'Morain <github.com@marcomorain.com>
+Jeff Pinner <jpinner@twitter.com>
+Timothy J Fontaine <tjfontaine@gmail.com>
+Akagi201 <akagi201@gmail.com>
+Romain Giraud <giraud.romain@gmail.com>
+Jay Satiro <raysatiro@yahoo.com>
+Arne Steen <Arne.Steen@gmx.de>
+Kjell Schubert <kjell.schubert@gmail.com>
+Olivier Mengué <dolmen@cpan.org>

+ 0 - 4
CONTRIBUTIONS

@@ -1,4 +0,0 @@
-Contributors must agree to the Contributor License Agreement before patches
-can be accepted.
-
-http://spreadsheets2.google.com/viewform?hl=en&formkey=dDJXOGUwbzlYaWM4cHN1MERwQS1CSnc6MQ

+ 83 - 9
Makefile

@@ -1,20 +1,72 @@
+# Copyright Joyent, Inc. and other Node contributors. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+PLATFORM ?= $(shell sh -c 'uname -s | tr "[A-Z]" "[a-z]"')
+HELPER ?=
+BINEXT ?=
+ifeq (darwin,$(PLATFORM))
+SONAME ?= libhttp_parser.2.7.1.dylib
+SOEXT ?= dylib
+else ifeq (wine,$(PLATFORM))
+CC = winegcc
+BINEXT = .exe.so
+HELPER = wine
+else
+SONAME ?= libhttp_parser.so.2.7.1
+SOEXT ?= so
+endif
+
 CC?=gcc
 AR?=ar
 
+CPPFLAGS ?=
+LDFLAGS ?=
+
 CPPFLAGS += -I.
 CPPFLAGS_DEBUG = $(CPPFLAGS) -DHTTP_PARSER_STRICT=1
 CPPFLAGS_DEBUG += $(CPPFLAGS_DEBUG_EXTRA)
 CPPFLAGS_FAST = $(CPPFLAGS) -DHTTP_PARSER_STRICT=0
 CPPFLAGS_FAST += $(CPPFLAGS_FAST_EXTRA)
+CPPFLAGS_BENCH = $(CPPFLAGS_FAST)
 
 CFLAGS += -Wall -Wextra -Werror
 CFLAGS_DEBUG = $(CFLAGS) -O0 -g $(CFLAGS_DEBUG_EXTRA)
 CFLAGS_FAST = $(CFLAGS) -O3 $(CFLAGS_FAST_EXTRA)
+CFLAGS_BENCH = $(CFLAGS_FAST) -Wno-unused-parameter
 CFLAGS_LIB = $(CFLAGS_FAST) -fPIC
 
+LDFLAGS_LIB = $(LDFLAGS) -shared
+
+INSTALL ?= install
+PREFIX ?= $(DESTDIR)/usr/local
+LIBDIR = $(PREFIX)/lib
+INCLUDEDIR = $(PREFIX)/include
+
+ifneq (darwin,$(PLATFORM))
+# TODO(bnoordhuis) The native SunOS linker expects -h rather than -soname...
+LDFLAGS_LIB += -Wl,-soname=$(SONAME)
+endif
+
 test: test_g test_fast
-	./test_g
-	./test_fast
+	$(HELPER) ./test_g$(BINEXT)
+	$(HELPER) ./test_fast$(BINEXT)
 
 test_g: http_parser_g.o test_g.o
 	$(CC) $(CFLAGS_DEBUG) $(LDFLAGS) http_parser_g.o test_g.o -o $@
@@ -31,11 +83,17 @@ test_fast: http_parser.o test.o http_parser.h
 test.o: test.c http_parser.h Makefile
 	$(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) -c test.c -o $@
 
+bench: http_parser.o bench.o
+	$(CC) $(CFLAGS_BENCH) $(LDFLAGS) http_parser.o bench.o -o $@
+
+bench.o: bench.c http_parser.h Makefile
+	$(CC) $(CPPFLAGS_BENCH) $(CFLAGS_BENCH) -c bench.c -o $@
+
 http_parser.o: http_parser.c http_parser.h Makefile
 	$(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) -c http_parser.c
 
 test-run-timed: test_fast
-	while(true) do time ./test_fast > /dev/null; done
+	while(true) do time $(HELPER) ./test_fast$(BINEXT) > /dev/null; done
 
 test-valgrind: test_g
 	valgrind ./test_g
@@ -44,7 +102,7 @@ libhttp_parser.o: http_parser.c http_parser.h Makefile
 	$(CC) $(CPPFLAGS_FAST) $(CFLAGS_LIB) -c http_parser.c -o libhttp_parser.o
 
 library: libhttp_parser.o
-	$(CC) -shared -o libhttp_parser.so libhttp_parser.o
+	$(CC) $(LDFLAGS_LIB) -o $(SONAME) $<
 
 package: http_parser.o
 	$(AR) rcs libhttp_parser.a http_parser.o
@@ -56,20 +114,36 @@ url_parser_g: http_parser_g.o contrib/url_parser.c
 	$(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) $^ -o $@
 
 parsertrace: http_parser.o contrib/parsertrace.c
-	$(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) $^ -o parsertrace
+	$(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) $^ -o parsertrace$(BINEXT)
 
 parsertrace_g: http_parser_g.o contrib/parsertrace.c
-	$(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) $^ -o parsertrace_g
+	$(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) $^ -o parsertrace_g$(BINEXT)
 
 tags: http_parser.c http_parser.h test.c
 	ctags $^
 
+install: library
+	$(INSTALL) -D  http_parser.h $(INCLUDEDIR)/http_parser.h
+	$(INSTALL) -D $(SONAME) $(LIBDIR)/$(SONAME)
+	ln -s $(LIBDIR)/$(SONAME) $(LIBDIR)/libhttp_parser.$(SOEXT)
+
+install-strip: library
+	$(INSTALL) -D  http_parser.h $(INCLUDEDIR)/http_parser.h
+	$(INSTALL) -D -s $(SONAME) $(LIBDIR)/$(SONAME)
+	ln -s $(LIBDIR)/$(SONAME) $(LIBDIR)/libhttp_parser.$(SOEXT)
+
+uninstall:
+	rm $(INCLUDEDIR)/http_parser.h
+	rm $(LIBDIR)/$(SONAME)
+	rm $(LIBDIR)/libhttp_parser.so
+
 clean:
 	rm -f *.o *.a tags test test_fast test_g \
-		http_parser.tar libhttp_parser.so \
-		url_parser url_parser_g parsertrace parsertrace_g
+		http_parser.tar libhttp_parser.so.* \
+		url_parser url_parser_g parsertrace parsertrace_g \
+		*.exe *.exe.so
 
 contrib/url_parser.c:	http_parser.h
 contrib/parsertrace.c:	http_parser.h
 
-.PHONY: clean package test-run test-run-timed test-valgrind
+.PHONY: clean package test-run test-run-timed test-valgrind install install-strip uninstall

+ 98 - 32
README.md

@@ -1,7 +1,7 @@
 HTTP Parser
 ===========
 
-[![Build Status](https://travis-ci.org/joyent/http-parser.png?branch=master)](https://travis-ci.org/joyent/http-parser)
+[![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser)
 
 This is a parser for HTTP messages written in C. It parses both requests and
 responses. The parser is designed to be used in performance HTTP
@@ -36,43 +36,46 @@ Usage
 One `http_parser` object is used per TCP connection. Initialize the struct
 using `http_parser_init()` and set the callbacks. That might look something
 like this for a request parser:
+```c
+http_parser_settings settings;
+settings.on_url = my_url_callback;
+settings.on_header_field = my_header_field_callback;
+/* ... */
 
-    http_parser_settings settings;
-    settings.on_url = my_url_callback;
-    settings.on_header_field = my_header_field_callback;
-    /* ... */
-
-    http_parser *parser = malloc(sizeof(http_parser));
-    http_parser_init(parser, HTTP_REQUEST);
-    parser->data = my_socket;
+http_parser *parser = malloc(sizeof(http_parser));
+http_parser_init(parser, HTTP_REQUEST);
+parser->data = my_socket;
+```
 
 When data is received on the socket execute the parser and check for errors.
 
-    size_t len = 80*1024, nparsed;
-    char buf[len];
-    ssize_t recved;
+```c
+size_t len = 80*1024, nparsed;
+char buf[len];
+ssize_t recved;
 
-    recved = recv(fd, buf, len, 0);
+recved = recv(fd, buf, len, 0);
 
-    if (recved < 0) {
-      /* Handle error. */
-    }
+if (recved < 0) {
+  /* Handle error. */
+}
 
-    /* Start up / continue the parser.
-     * Note we pass recved==0 to signal that EOF has been recieved.
-     */
-    nparsed = http_parser_execute(parser, &settings, buf, recved);
+/* Start up / continue the parser.
+ * Note we pass recved==0 to signal that EOF has been received.
+ */
+nparsed = http_parser_execute(parser, &settings, buf, recved);
 
-    if (parser->upgrade) {
-      /* handle new protocol */
-    } else if (nparsed != recved) {
-      /* Handle error. Usually just close the connection. */
-    }
+if (parser->upgrade) {
+  /* handle new protocol */
+} else if (nparsed != recved) {
+  /* Handle error. Usually just close the connection. */
+}
+```
 
 HTTP needs to know where the end of the stream is. For example, sometimes
 servers send responses without Content-Length and expect the client to
 consume input (for the body) until EOF. To tell http_parser about EOF, give
-`0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
+`0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors
 can still be encountered during an EOF, so one must still be prepared
 to receive them.
 
@@ -91,7 +94,7 @@ The Special Problem of Upgrade
 ------------------------------
 
 HTTP supports upgrading the connection to a different protocol. An
-increasingly common example of this is the Web Socket protocol which sends
+increasingly common example of this is the WebSocket protocol which sends
 a request like
 
         GET /demo HTTP/1.1
@@ -103,11 +106,11 @@ a request like
 
 followed by non-HTTP data.
 
-(See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
-information the Web Socket protocol.)
+(See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the
+WebSocket protocol.)
 
 To support this, the parser will treat this as a normal HTTP message without a
-body. Issuing both on_headers_complete and on_message_complete callbacks. However
+body, issuing both on_headers_complete and on_message_complete callbacks. However
 http_parser_execute() will stop parsing at the end of the headers and return.
 
 The user is expected to check if `parser->upgrade` has been set to 1 after
@@ -128,12 +131,75 @@ There are two types of callbacks:
 * notification `typedef int (*http_cb) (http_parser*);`
     Callbacks: on_message_begin, on_headers_complete, on_message_complete.
 * data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
-    Callbacks: (requests only) on_uri,
+    Callbacks: (requests only) on_url,
                (common) on_header_field, on_header_value, on_body;
 
 Callbacks must return 0 on success. Returning a non-zero value indicates
 error to the parser, making it exit immediately.
 
+For cases where it is necessary to pass local information to/from a callback,
+the `http_parser` object's `data` field can be used.
+An example of such a case is when using threads to handle a socket connection,
+parse a request, and then give a response over that socket. By instantiation
+of a thread-local struct containing relevant data (e.g. accepted socket,
+allocated memory for callbacks to write into, etc), a parser's callbacks are
+able to communicate data between the scope of the thread and the scope of the
+callback in a threadsafe manner. This allows http-parser to be used in
+multi-threaded contexts.
+
+Example:
+```c
+ typedef struct {
+  socket_t sock;
+  void* buffer;
+  int buf_len;
+ } custom_data_t;
+
+
+int my_url_callback(http_parser* parser, const char *at, size_t length) {
+  /* access to thread local custom_data_t struct.
+  Use this access save parsed data for later use into thread local
+  buffer, or communicate over socket
+  */
+  parser->data;
+  ...
+  return 0;
+}
+
+...
+
+void http_parser_thread(socket_t sock) {
+ int nparsed = 0;
+ /* allocate memory for user data */
+ custom_data_t *my_data = malloc(sizeof(custom_data_t));
+
+ /* some information for use by callbacks.
+ * achieves thread -> callback information flow */
+ my_data->sock = sock;
+
+ /* instantiate a thread-local parser */
+ http_parser *parser = malloc(sizeof(http_parser));
+ http_parser_init(parser, HTTP_REQUEST); /* initialise parser */
+ /* this custom data reference is accessible through the reference to the
+ parser supplied to callback functions */
+ parser->data = my_data;
+
+ http_parser_settings settings; /* set up callbacks */
+ settings.on_url = my_url_callback;
+
+ /* execute parser */
+ nparsed = http_parser_execute(parser, &settings, buf, recved);
+
+ ...
+ /* parsed information copied from callback.
+ can now perform action on data copied into thread-local memory from callbacks.
+ achieves callback -> thread information flow */
+ my_data->buffer;
+ ...
+}
+
+```
+
 In case you parse HTTP message in chunks (i.e. `read()` request line
 from socket, parse, read half headers, parse, etc) your data callbacks
 may be called more than once. Http-parser guarantees that data pointer is only
@@ -142,7 +208,7 @@ buffer to avoid copying memory around if this fits your application.
 
 Reading headers may be a tricky task if you read/parse headers partially.
 Basically, you need to remember whether last header callback was field or value
-and apply following logic:
+and apply the following logic:
 
     (on_header_field and on_header_value shortened to on_h_*)
      ------------------------ ------------ --------------------------------------------

+ 111 - 0
bench.c

@@ -0,0 +1,111 @@
+/* Copyright Fedor Indutny. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "http_parser.h"
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+
+static const char data[] =
+    "POST /joyent/http-parser HTTP/1.1\r\n"
+    "Host: github.com\r\n"
+    "DNT: 1\r\n"
+    "Accept-Encoding: gzip, deflate, sdch\r\n"
+    "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n"
+    "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/39.0.2171.65 Safari/537.36\r\n"
+    "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,"
+        "image/webp,*/*;q=0.8\r\n"
+    "Referer: https://github.com/joyent/http-parser\r\n"
+    "Connection: keep-alive\r\n"
+    "Transfer-Encoding: chunked\r\n"
+    "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n\r\n";
+static const size_t data_len = sizeof(data) - 1;
+
+static int on_info(http_parser* p) {
+  return 0;
+}
+
+
+static int on_data(http_parser* p, const char *at, size_t length) {
+  return 0;
+}
+
+static http_parser_settings settings = {
+  .on_message_begin = on_info,
+  .on_headers_complete = on_info,
+  .on_message_complete = on_info,
+  .on_header_field = on_data,
+  .on_header_value = on_data,
+  .on_url = on_data,
+  .on_status = on_data,
+  .on_body = on_data
+};
+
+int bench(int iter_count, int silent) {
+  struct http_parser parser;
+  int i;
+  int err;
+  struct timeval start;
+  struct timeval end;
+  float rps;
+
+  if (!silent) {
+    err = gettimeofday(&start, NULL);
+    assert(err == 0);
+  }
+
+  for (i = 0; i < iter_count; i++) {
+    size_t parsed;
+    http_parser_init(&parser, HTTP_REQUEST);
+
+    parsed = http_parser_execute(&parser, &settings, data, data_len);
+    assert(parsed == data_len);
+  }
+
+  if (!silent) {
+    err = gettimeofday(&end, NULL);
+    assert(err == 0);
+
+    fprintf(stdout, "Benchmark result:\n");
+
+    rps = (float) (end.tv_sec - start.tv_sec) +
+          (end.tv_usec - start.tv_usec) * 1e-6f;
+    fprintf(stdout, "Took %f seconds to run\n", rps);
+
+    rps = (float) iter_count / rps;
+    fprintf(stdout, "%f req/sec\n", rps);
+    fflush(stdout);
+  }
+
+  return 0;
+}
+
+int main(int argc, char** argv) {
+  if (argc == 2 && strcmp(argv[1], "infinite") == 0) {
+    for (;;)
+      bench(5000000, 1);
+    return 0;
+  } else {
+    return bench(5000000, 0);
+  }
+}

+ 8 - 4
contrib/parsertrace.c

@@ -111,14 +111,14 @@ int main(int argc, char* argv[]) {
   FILE* file = fopen(filename, "r");
   if (file == NULL) {
     perror("fopen");
-    return EXIT_FAILURE;
+    goto fail;
   }
 
   fseek(file, 0, SEEK_END);
   long file_length = ftell(file);
   if (file_length == -1) {
     perror("ftell");
-    return EXIT_FAILURE;
+    goto fail;
   }
   fseek(file, 0, SEEK_SET);
 
@@ -126,7 +126,7 @@ int main(int argc, char* argv[]) {
   if (fread(data, 1, file_length, file) != (size_t)file_length) {
     fprintf(stderr, "couldn't read entire file\n");
     free(data);
-    return EXIT_FAILURE;
+    goto fail;
   }
 
   http_parser_settings settings;
@@ -149,8 +149,12 @@ int main(int argc, char* argv[]) {
             "Error: %s (%s)\n",
             http_errno_description(HTTP_PARSER_ERRNO(&parser)),
             http_errno_name(HTTP_PARSER_ERRNO(&parser)));
-    return EXIT_FAILURE;
+    goto fail;
   }
 
   return EXIT_SUCCESS;
+
+fail:
+  fclose(file);
+  return EXIT_FAILURE;
 }

+ 9 - 6
contrib/url_parser.c

@@ -14,7 +14,7 @@ dump_url (const char *url, const struct http_parser_url *u)
       continue;
     }
 
-    printf("\tfield_data[%u]: off: %u len: %u part: \"%.*s\n",
+    printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n",
            i,
            u->field_data[i].off,
            u->field_data[i].len,
@@ -24,16 +24,19 @@ dump_url (const char *url, const struct http_parser_url *u)
 }
 
 int main(int argc, char ** argv) {
+  struct http_parser_url u;
+  int len, connect, result;
+
   if (argc != 3) {
     printf("Syntax : %s connect|get url\n", argv[0]);
     return 1;
   }
-  struct http_parser_url u;
-  int len = strlen(argv[2]);
-  int connect = strcmp("connect", argv[1]) == 0 ? 1 : 0;
+  len = strlen(argv[2]);
+  connect = strcmp("connect", argv[1]) == 0 ? 1 : 0;
   printf("Parsing %s, connect %d\n", argv[2], connect);
 
-  int result = http_parser_parse_url(argv[2], len, connect, &u);
+  http_parser_url_init(&u);
+  result = http_parser_parse_url(argv[2], len, connect, &u);
   if (result != 0) {
     printf("Parse error : %d\n", result);
     return result;
@@ -41,4 +44,4 @@ int main(int argc, char ** argv) {
   printf("Parse ok, result : \n");
   dump_url(argv[2], &u);
   return 0;
-}
+}

File diff suppressed because it is too large
+ 585 - 290
http_parser.c


+ 90 - 32
http_parser.h

@@ -24,11 +24,14 @@
 extern "C" {
 #endif
 
+/* Also update SONAME in the Makefile whenever you change these. */
 #define HTTP_PARSER_VERSION_MAJOR 2
-#define HTTP_PARSER_VERSION_MINOR 1
+#define HTTP_PARSER_VERSION_MINOR 7
+#define HTTP_PARSER_VERSION_PATCH 1
 
 #include <sys/types.h>
-#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
+#if defined(_WIN32) && !defined(__MINGW32__) && \
+  (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__)
 #include <BaseTsd.h>
 #include <stddef.h>
 typedef __int8 int8_t;
@@ -50,9 +53,16 @@ typedef unsigned __int64 uint64_t;
 # define HTTP_PARSER_STRICT 1
 #endif
 
-/* Maximium header size allowed */
-#define HTTP_MAX_HEADER_SIZE (80*1024)
-
+/* Maximium header size allowed. If the macro is not defined
+ * before including this header then the default is used. To
+ * change the maximum header size, define the macro in the build
+ * environment (e.g. -DHTTP_MAX_HEADER_SIZE=<value>). To remove
+ * the effective limit on the size of the header, define the macro
+ * to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff)
+ */
+#ifndef HTTP_MAX_HEADER_SIZE
+# define HTTP_MAX_HEADER_SIZE (80*1024)
+#endif
 
 typedef struct http_parser http_parser;
 typedef struct http_parser_settings http_parser_settings;
@@ -67,7 +77,12 @@ typedef struct http_parser_settings http_parser_settings;
  * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
  * chunked' headers that indicate the presence of a body.
  *
- * http_data_cb does not return data chunks. It will be call arbitrarally
+ * Returning `2` from on_headers_complete will tell parser that it should not
+ * expect neither a body nor any futher responses on this connection. This is
+ * useful for handling responses to a CONNECT request which may not contain
+ * `Upgrade` or `Connection: upgrade` headers.
+ *
+ * http_data_cb does not return data chunks. It will be called arbitrarily
  * many times for each string. E.G. you might get 10 callbacks for "on_url"
  * each providing just a few characters more data.
  */
@@ -86,7 +101,7 @@ typedef int (*http_cb) (http_parser*);
   XX(5,  CONNECT,     CONNECT)      \
   XX(6,  OPTIONS,     OPTIONS)      \
   XX(7,  TRACE,       TRACE)        \
-  /* webdav */                      \
+  /* WebDAV */                      \
   XX(8,  COPY,        COPY)         \
   XX(9,  LOCK,        LOCK)         \
   XX(10, MKCOL,       MKCOL)        \
@@ -95,19 +110,28 @@ typedef int (*http_cb) (http_parser*);
   XX(13, PROPPATCH,   PROPPATCH)    \
   XX(14, SEARCH,      SEARCH)       \
   XX(15, UNLOCK,      UNLOCK)       \
+  XX(16, BIND,        BIND)         \
+  XX(17, REBIND,      REBIND)       \
+  XX(18, UNBIND,      UNBIND)       \
+  XX(19, ACL,         ACL)          \
   /* subversion */                  \
-  XX(16, REPORT,      REPORT)       \
-  XX(17, MKACTIVITY,  MKACTIVITY)   \
-  XX(18, CHECKOUT,    CHECKOUT)     \
-  XX(19, MERGE,       MERGE)        \
+  XX(20, REPORT,      REPORT)       \
+  XX(21, MKACTIVITY,  MKACTIVITY)   \
+  XX(22, CHECKOUT,    CHECKOUT)     \
+  XX(23, MERGE,       MERGE)        \
   /* upnp */                        \
-  XX(20, MSEARCH,     M-SEARCH)     \
-  XX(21, NOTIFY,      NOTIFY)       \
-  XX(22, SUBSCRIBE,   SUBSCRIBE)    \
-  XX(23, UNSUBSCRIBE, UNSUBSCRIBE)  \
+  XX(24, MSEARCH,     M-SEARCH)     \
+  XX(25, NOTIFY,      NOTIFY)       \
+  XX(26, SUBSCRIBE,   SUBSCRIBE)    \
+  XX(27, UNSUBSCRIBE, UNSUBSCRIBE)  \
   /* RFC-5789 */                    \
-  XX(24, PATCH,       PATCH)        \
-  XX(25, PURGE,       PURGE)        \
+  XX(28, PATCH,       PATCH)        \
+  XX(29, PURGE,       PURGE)        \
+  /* CalDAV */                      \
+  XX(30, MKCALENDAR,  MKCALENDAR)   \
+  /* RFC-2068, section 19.6.1.2 */  \
+  XX(31, LINK,        LINK)         \
+  XX(32, UNLINK,      UNLINK)       \
 
 enum http_method
   {
@@ -125,14 +149,16 @@ enum flags
   { F_CHUNKED               = 1 << 0
   , F_CONNECTION_KEEP_ALIVE = 1 << 1
   , F_CONNECTION_CLOSE      = 1 << 2
-  , F_TRAILING              = 1 << 3
-  , F_UPGRADE               = 1 << 4
-  , F_SKIPBODY              = 1 << 5
+  , F_CONNECTION_UPGRADE    = 1 << 3
+  , F_TRAILING              = 1 << 4
+  , F_UPGRADE               = 1 << 5
+  , F_SKIPBODY              = 1 << 6
+  , F_CONTENTLENGTH         = 1 << 7
   };
 
 
 /* Map for errno-related constants
- * 
+ *
  * The provided argument should be a macro that takes 2 arguments.
  */
 #define HTTP_ERRNO_MAP(XX)                                           \
@@ -141,13 +167,15 @@ enum flags
                                                                      \
   /* Callback-related errors */                                      \
   XX(CB_message_begin, "the on_message_begin callback failed")       \
-  XX(CB_status_complete, "the on_status_complete callback failed")   \
   XX(CB_url, "the on_url callback failed")                           \
   XX(CB_header_field, "the on_header_field callback failed")         \
   XX(CB_header_value, "the on_header_value callback failed")         \
   XX(CB_headers_complete, "the on_headers_complete callback failed") \
   XX(CB_body, "the on_body callback failed")                         \
   XX(CB_message_complete, "the on_message_complete callback failed") \
+  XX(CB_status, "the on_status callback failed")                     \
+  XX(CB_chunk_header, "the on_chunk_header callback failed")         \
+  XX(CB_chunk_complete, "the on_chunk_complete callback failed")     \
                                                                      \
   /* Parsing-related errors */                                       \
   XX(INVALID_EOF_STATE, "stream ended at an unexpected time")        \
@@ -168,6 +196,8 @@ enum flags
   XX(INVALID_HEADER_TOKEN, "invalid character in header")            \
   XX(INVALID_CONTENT_LENGTH,                                         \
      "invalid character in content-length header")                   \
+  XX(UNEXPECTED_CONTENT_LENGTH,                                      \
+     "unexpected content-length header")                             \
   XX(INVALID_CHUNK_SIZE,                                             \
      "invalid character in chunk size header")                       \
   XX(INVALID_CONSTANT, "invalid constant string")                    \
@@ -191,11 +221,12 @@ enum http_errno {
 
 struct http_parser {
   /** PRIVATE **/
-  unsigned char type : 2;     /* enum http_parser_type */
-  unsigned char flags : 6;    /* F_* values from 'flags' enum; semi-public */
-  unsigned char state;        /* enum state from http_parser.c */
-  unsigned char header_state; /* enum header_state from http_parser.c */
-  unsigned char index;        /* index into current matcher */
+  unsigned int type : 2;         /* enum http_parser_type */
+  unsigned int flags : 8;        /* F_* values from 'flags' enum; semi-public */
+  unsigned int state : 7;        /* enum state from http_parser.c */
+  unsigned int header_state : 7; /* enum header_state from http_parser.c */
+  unsigned int index : 7;        /* index into current matcher */
+  unsigned int lenient_http_headers : 1;
 
   uint32_t nread;          /* # bytes read in various scenarios */
   uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */
@@ -203,16 +234,16 @@ struct http_parser {
   /** READ-ONLY **/
   unsigned short http_major;
   unsigned short http_minor;
-  unsigned short status_code; /* responses only */
-  unsigned char method;       /* requests only */
-  unsigned char http_errno : 7;
+  unsigned int status_code : 16; /* responses only */
+  unsigned int method : 8;       /* requests only */
+  unsigned int http_errno : 7;
 
   /* 1 = Upgrade header was present and the parser has exited because of that.
    * 0 = No upgrade header present.
    * Should be checked when http_parser_execute() returns in addition to
    * error checking.
    */
-  unsigned char upgrade : 1;
+  unsigned int upgrade : 1;
 
   /** PUBLIC **/
   void *data; /* A pointer to get hook to the "connection" or "socket" object */
@@ -222,12 +253,17 @@ struct http_parser {
 struct http_parser_settings {
   http_cb      on_message_begin;
   http_data_cb on_url;
-  http_cb      on_status_complete;
+  http_data_cb on_status;
   http_data_cb on_header_field;
   http_data_cb on_header_value;
   http_cb      on_headers_complete;
   http_data_cb on_body;
   http_cb      on_message_complete;
+  /* When on_chunk_header is called, the current chunk length is stored
+   * in parser->content_length.
+   */
+  http_cb      on_chunk_header;
+  http_cb      on_chunk_complete;
 };
 
 
@@ -261,9 +297,28 @@ struct http_parser_url {
 };
 
 
+/* Returns the library version. Bits 16-23 contain the major version number,
+ * bits 8-15 the minor version number and bits 0-7 the patch level.
+ * Usage example:
+ *
+ *   unsigned long version = http_parser_version();
+ *   unsigned major = (version >> 16) & 255;
+ *   unsigned minor = (version >> 8) & 255;
+ *   unsigned patch = version & 255;
+ *   printf("http_parser v%u.%u.%u\n", major, minor, patch);
+ */
+unsigned long http_parser_version(void);
+
 void http_parser_init(http_parser *parser, enum http_parser_type type);
 
 
+/* Initialize http_parser_settings members to 0
+ */
+void http_parser_settings_init(http_parser_settings *settings);
+
+
+/* Executes the parser. Returns number of parsed bytes. Sets
+ * `parser->http_errno` on error. */
 size_t http_parser_execute(http_parser *parser,
                            const http_parser_settings *settings,
                            const char *data,
@@ -287,6 +342,9 @@ const char *http_errno_name(enum http_errno err);
 /* Return a string description of the given error */
 const char *http_errno_description(enum http_errno err);
 
+/* Initialize all http_parser_url members to 0 */
+void http_parser_url_init(struct http_parser_url *u);
+
 /* Parse a URL; return nonzero on failure */
 int http_parser_parse_url(const char *buf, size_t buflen,
                           int is_connect,

File diff suppressed because it is too large
+ 858 - 57
test.c