Browse Source

Imported Upstream version 1.0

Mike Gabriel 11 years ago
commit
da566840ee
7 changed files with 4014 additions and 0 deletions
  1. 4 0
      CONTRIBUTIONS
  2. 19 0
      LICENSE-MIT
  3. 41 0
      Makefile
  4. 171 0
      README.md
  5. 1644 0
      http_parser.c
  6. 183 0
      http_parser.h
  7. 1952 0
      test.c

+ 4 - 0
CONTRIBUTIONS

@@ -0,0 +1,4 @@
+Contributors must agree to the Contributor License Agreement before patches
+can be accepted.
+
+http://spreadsheets2.google.com/viewform?hl=en&formkey=dDJXOGUwbzlYaWM4cHN1MERwQS1CSnc6MQ

+ 19 - 0
LICENSE-MIT

@@ -0,0 +1,19 @@
+Copyright Joyent, Inc. and other Node contributors. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE. 

+ 41 - 0
Makefile

@@ -0,0 +1,41 @@
+OPT_DEBUG=-O0 -g -Wall -Wextra -Werror -I.
+OPT_FAST=-O3 -DHTTP_PARSER_STRICT=0 -I.
+
+CC?=gcc
+
+
+test: test_g
+	./test_g
+
+test_g: http_parser_g.o test_g.o
+	$(CC) $(OPT_DEBUG) http_parser_g.o test_g.o -o $@
+
+test_g.o: test.c http_parser.h Makefile
+	$(CC) $(OPT_DEBUG) -c test.c -o $@
+
+test.o: test.c http_parser.h Makefile
+	$(CC) $(OPT_FAST) -c test.c -o $@
+
+http_parser_g.o: http_parser.c http_parser.h Makefile
+	$(CC) $(OPT_DEBUG) -c http_parser.c -o $@
+
+test-valgrind: test_g
+	valgrind ./test_g
+
+http_parser.o: http_parser.c http_parser.h Makefile
+	$(CC) $(OPT_FAST) -c http_parser.c
+
+test_fast: http_parser.o test.c http_parser.h
+	$(CC) $(OPT_FAST) http_parser.o test.c -o $@
+
+test-run-timed: test_fast
+	while(true) do time ./test_fast > /dev/null; done
+
+
+tags: http_parser.c http_parser.h test.c
+	ctags $^
+
+clean:
+	rm -f *.o test test_fast test_g http_parser.tar tags
+
+.PHONY: clean package test-run test-run-timed test-valgrind

+ 171 - 0
README.md

@@ -0,0 +1,171 @@
+HTTP Parser
+===========
+
+This is a parser for HTTP messages written in C. It parses both requests and
+responses. The parser is designed to be used in performance HTTP
+applications. It does not make any syscalls nor allocations, it does not
+buffer data, it can be interrupted at anytime. Depending on your
+architecture, it only requires about 40 bytes of data per message
+stream (in a web server that is per connection).
+
+Features:
+
+  * No dependencies
+  * Handles persistent streams (keep-alive).
+  * Decodes chunked encoding.
+  * Upgrade support
+  * Defends against buffer overflow attacks.
+
+The parser extracts the following information from HTTP messages:
+
+  * Header fields and values
+  * Content-Length
+  * Request method
+  * Response status code
+  * Transfer-Encoding
+  * HTTP version
+  * Request path, query string, fragment
+  * Message body
+
+
+Usage
+-----
+
+One `http_parser` object is used per TCP connection. Initialize the struct
+using `http_parser_init()` and set the callbacks. That might look something
+like this for a request parser:
+
+    http_parser_settings settings;
+    settings.on_path = my_path_callback;
+    settings.on_header_field = my_header_field_callback;
+    /* ... */
+
+    http_parser *parser = malloc(sizeof(http_parser));
+    http_parser_init(parser, HTTP_REQUEST);
+    parser->data = my_socket;
+
+When data is received on the socket execute the parser and check for errors.
+
+    size_t len = 80*1024, nparsed;
+    char buf[len];
+    ssize_t recved;
+
+    recved = recv(fd, buf, len, 0);
+
+    if (recved < 0) {
+      /* Handle error. */
+    }
+
+    /* Start up / continue the parser.
+     * Note we pass recved==0 to signal that EOF has been recieved.
+     */
+    nparsed = http_parser_execute(parser, &settings, buf, recved);
+
+    if (parser->upgrade) {
+      /* handle new protocol */
+    } else if (nparsed != recved) {
+      /* Handle error. Usually just close the connection. */
+    }
+
+HTTP needs to know where the end of the stream is. For example, sometimes
+servers send responses without Content-Length and expect the client to
+consume input (for the body) until EOF. To tell http_parser about EOF, give
+`0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
+can still be encountered during an EOF, so one must still be prepared
+to receive them.
+
+Scalar valued message information such as `status_code`, `method`, and the
+HTTP version are stored in the parser structure. This data is only
+temporally stored in `http_parser` and gets reset on each new message. If
+this information is needed later, copy it out of the structure during the
+`headers_complete` callback.
+
+The parser decodes the transfer-encoding for both requests and responses
+transparently. That is, a chunked encoding is decoded before being sent to
+the on_body callback.
+
+
+The Special Problem of Upgrade
+------------------------------
+
+HTTP supports upgrading the connection to a different protocol. An
+increasingly common example of this is the Web Socket protocol which sends
+a request like
+
+        GET /demo HTTP/1.1
+        Upgrade: WebSocket
+        Connection: Upgrade
+        Host: example.com
+        Origin: http://example.com
+        WebSocket-Protocol: sample
+
+followed by non-HTTP data.
+
+(See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
+information the Web Socket protocol.)
+
+To support this, the parser will treat this as a normal HTTP message without a
+body. Issuing both on_headers_complete and on_message_complete callbacks. However
+http_parser_execute() will stop parsing at the end of the headers and return.
+
+The user is expected to check if `parser->upgrade` has been set to 1 after
+`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
+offset by the return value of `http_parser_execute()`.
+
+
+Callbacks
+---------
+
+During the `http_parser_execute()` call, the callbacks set in
+`http_parser_settings` will be executed. The parser maintains state and
+never looks behind, so buffering the data is not necessary. If you need to
+save certain data for later usage, you can do that from the callbacks.
+
+There are two types of callbacks:
+
+* notification `typedef int (*http_cb) (http_parser*);`
+    Callbacks: on_message_begin, on_headers_complete, on_message_complete.
+* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
+    Callbacks: (requests only) on_path, on_query_string, on_uri, on_fragment,
+               (common) on_header_field, on_header_value, on_body;
+
+Callbacks must return 0 on success. Returning a non-zero value indicates
+error to the parser, making it exit immediately.
+
+In case you parse HTTP message in chunks (i.e. `read()` request line
+from socket, parse, read half headers, parse, etc) your data callbacks
+may be called more than once. Http-parser guarantees that data pointer is only
+valid for the lifetime of callback. You can also `read()` into a heap allocated
+buffer to avoid copying memory around if this fits your application.
+
+Reading headers may be a tricky task if you read/parse headers partially.
+Basically, you need to remember whether last header callback was field or value
+and apply following logic:
+
+    (on_header_field and on_header_value shortened to on_h_*)
+     ------------------------ ------------ --------------------------------------------
+    | State (prev. callback) | Callback   | Description/action                         |
+     ------------------------ ------------ --------------------------------------------
+    | nothing (first call)   | on_h_field | Allocate new buffer and copy callback data |
+    |                        |            | into it                                    |
+     ------------------------ ------------ --------------------------------------------
+    | value                  | on_h_field | New header started.                        |
+    |                        |            | Copy current name,value buffers to headers |
+    |                        |            | list and allocate new buffer for new name  |
+     ------------------------ ------------ --------------------------------------------
+    | field                  | on_h_field | Previous name continues. Reallocate name   |
+    |                        |            | buffer and append callback data to it      |
+     ------------------------ ------------ --------------------------------------------
+    | field                  | on_h_value | Value for current header started. Allocate |
+    |                        |            | new buffer and copy callback data to it    |
+     ------------------------ ------------ --------------------------------------------
+    | value                  | on_h_value | Value continues. Reallocate value buffer   |
+    |                        |            | and append callback data to it             |
+     ------------------------ ------------ --------------------------------------------
+
+
+See examples of reading in headers:
+
+* [partial example](http://gist.github.com/155877) in C
+* [from http-parser tests](http://github.com/ry/http-parser/blob/37a0ff8928fb0d83cec0d0d8909c5a4abcd221af/test.c#L403) in C
+* [from Node library](http://github.com/ry/node/blob/842eaf446d2fdcb33b296c67c911c32a0dabc747/src/http.js#L284) in Javascript

File diff suppressed because it is too large
+ 1644 - 0
http_parser.c


+ 183 - 0
http_parser.h

@@ -0,0 +1,183 @@
+/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#ifndef http_parser_h
+#define http_parser_h
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define HTTP_PARSER_VERSION_MAJOR 1
+#define HTTP_PARSER_VERSION_MINOR 0
+
+#include <sys/types.h>
+#if defined(_WIN32) && !defined(__MINGW32__)
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+
+typedef unsigned int size_t;
+typedef int ssize_t;
+#else
+#include <stdint.h>
+#endif
+
+/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
+ * faster
+ */
+#ifndef HTTP_PARSER_STRICT
+# define HTTP_PARSER_STRICT 1
+#else
+# define HTTP_PARSER_STRICT 0
+#endif
+
+
+/* Maximium header size allowed */
+#define HTTP_MAX_HEADER_SIZE (80*1024)
+
+
+typedef struct http_parser http_parser;
+typedef struct http_parser_settings http_parser_settings;
+
+
+/* Callbacks should return non-zero to indicate an error. The parser will
+ * then halt execution.
+ *
+ * The one exception is on_headers_complete. In a HTTP_RESPONSE parser
+ * returning '1' from on_headers_complete will tell the parser that it
+ * should not expect a body. This is used when receiving a response to a
+ * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
+ * chunked' headers that indicate the presence of a body.
+ *
+ * http_data_cb does not return data chunks. It will be call arbitrarally
+ * many times for each string. E.G. you might get 10 callbacks for "on_path"
+ * each providing just a few characters more data.
+ */
+typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
+typedef int (*http_cb) (http_parser*);
+
+
+/* Request Methods */
+enum http_method
+  { HTTP_DELETE    = 0
+  , HTTP_GET
+  , HTTP_HEAD
+  , HTTP_POST
+  , HTTP_PUT
+  /* pathological */
+  , HTTP_CONNECT
+  , HTTP_OPTIONS
+  , HTTP_TRACE
+  /* webdav */
+  , HTTP_COPY
+  , HTTP_LOCK
+  , HTTP_MKCOL
+  , HTTP_MOVE
+  , HTTP_PROPFIND
+  , HTTP_PROPPATCH
+  , HTTP_UNLOCK
+  /* subversion */
+  , HTTP_REPORT
+  , HTTP_MKACTIVITY
+  , HTTP_CHECKOUT
+  , HTTP_MERGE
+  /* upnp */
+  , HTTP_MSEARCH
+  , HTTP_NOTIFY
+  , HTTP_SUBSCRIBE
+  , HTTP_UNSUBSCRIBE
+  };
+
+
+enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
+
+
+struct http_parser {
+  /** PRIVATE **/
+  unsigned char type : 2;
+  unsigned char flags : 6;
+  unsigned char state;
+  unsigned char header_state;
+  unsigned char index;
+
+  uint32_t nread;
+  int64_t content_length;
+
+  /** READ-ONLY **/
+  unsigned short http_major;
+  unsigned short http_minor;
+  unsigned short status_code; /* responses only */
+  unsigned char method;    /* requests only */
+
+  /* 1 = Upgrade header was present and the parser has exited because of that.
+   * 0 = No upgrade header present.
+   * Should be checked when http_parser_execute() returns in addition to
+   * error checking.
+   */
+  char upgrade;
+
+  /** PUBLIC **/
+  void *data; /* A pointer to get hook to the "connection" or "socket" object */
+};
+
+
+struct http_parser_settings {
+  http_cb      on_message_begin;
+  http_data_cb on_path;
+  http_data_cb on_query_string;
+  http_data_cb on_url;
+  http_data_cb on_fragment;
+  http_data_cb on_header_field;
+  http_data_cb on_header_value;
+  http_cb      on_headers_complete;
+  http_data_cb on_body;
+  http_cb      on_message_complete;
+};
+
+
+void http_parser_init(http_parser *parser, enum http_parser_type type);
+
+
+size_t http_parser_execute(http_parser *parser,
+                           const http_parser_settings *settings,
+                           const char *data,
+                           size_t len);
+
+
+/* If http_should_keep_alive() in the on_headers_complete or
+ * on_message_complete callback returns true, then this will be should be
+ * the last message on the connection.
+ * If you are the server, respond with the "Connection: close" header.
+ * If you are the client, close the connection.
+ */
+int http_should_keep_alive(http_parser *parser);
+
+/* Returns a string version of the HTTP method. */
+const char *http_method_str(enum http_method);
+
+#ifdef __cplusplus
+}
+#endif
+#endif

File diff suppressed because it is too large
+ 1952 - 0
test.c