1 files changed, 173 insertions, 0 deletions
diff --git a/http_parser.c b/http_parser.c
new file mode 100644
index 0000000..05cd094
--- /dev/null
+++ b/http_parser.c
@@ -0,0 +1,173 @@
+/*          __                               _
+*    ____  / /_  ____ ___  __  ___________ _(_)
+*   / __ \/ __ \/ __ `__ \/ / / / ___/ __ `/ /
+*  / /_/ / / / / / / / / / /_/ / /__/ /_/ / /
+*  \____/_/ /_/_/ /_/ /_/\__, /\___/\__, /_/
+*                       /____/     /____/
+*
+* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+*
+* Copyright (c) 2021, Max Christian Pohle <max@coderonline.de>
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* 1. Redistributions of source code must retain the above copyright
+*    notice, this list of conditions and the following disclaimer.
+*
+* 2. Redistributions in binary form must reproduce the above copyright
+*    notice, this list of conditions and the following disclaimer in the
+*    documentation and/or other materials provided with the distribution.
+*
+* {{{ DISCLAIMER
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.
+* }}}
+*/
+#include "main.h"
+void parse_http(size_t new_socket, char * request, size_t request_length) {
+        char * start = request;
+        char * end = NULL;
+        char * search = "\r\n";
+        Http_Header http_header = {0};
+        char * name = NULL;
+        while(NULL != (end = strpbrk(start, search))) { // TODO: try harder to break things (are SEGFAULTs possible?)
+                size_t matchlen = strspn(end, search);
+                switch(end[0]) {
+                        case ':':
+                                end[0] = '\0'; // {{{ remember header 'names' and search for the value
+                                end++;         // jump over the colon
+                                name = start;  // remember, where name starts, will be important in the newline case
+                                if (0 == strcasecmp("Content-Type", start)) {
+                                        search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...]
+                                } else {
+                                        search = "\r\n";  // (likely) search for some kind of newline
+                                } // }}}
+                                break;
+                        case ';':
+                                // {{{ find the form-data boundary in the main header
+                                start += strspn(start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0')
+                                const char s_multipart_form_data[] = "boundary=";
+                                if(NULL == http_header.boundary && 0 < strcasecmp(start, s_multipart_form_data))
+                                {
+                                        http_header.boundary = end + sizeof(s_multipart_form_data) + 1;
+                                        http_header.boundary += strspn(http_header.boundary, "-");
+                                        DEBUG("> Boundary found, now looking where it ends...\n");
+                                        search = "\r\n";
+                                        continue;
+                                } /// }}}
+                                break;
+                        case '\r': // fallthrough
+                        case '\n':
+                                // {{{ newlines are special: sometimes content parts follow and sometimes headers, guess what...
+                                end[0] = '\0';
+                                search = ":"; // we will continue to search for headers
+                                if(NULL == name) {
+                                        if(NULL == http_header.method) {
+                                                DEBUG("[%ld]> HTTP REQUEST LINE :: %s \n", matchlen, start);
+                                                end[0] = '\0';
+                                                while(NULL != (start = memchr(start, ' ', end - start))) {
+                                                        if(NULL == http_header.url)
+                                                                http_header.url = ++start;
+                                                        else
+                                                                start[0] = '\0';
+                                                }
+                                                http_header.method = start;
+                                                http_header.newline_length = matchlen;
+                                        } else {
+                                                DEBUG("[...]\n"); // if we want to intentially skip something, we land here by setting name = NUL;
+                                                break;
+                                        }
+                                } else { // we know that name is not NULL and can work with it
+                                        if (0 == strcasecmp("Content-Disposition", name))
+                                        { http_header.content_disposition = start; }
+                                } // }}}
+                                DEBUG("\033[32m[%ld]> '% 20s' = '%s'\033[0m\n", matchlen, name, start);
+                                // {{{ check if a http header ended (e.g. two newlines)
+                                if(matchlen > http_header.newline_length) {
+                                        DEBUG("> END HEADERS, because there were %d newlines; boundary='%s'[%ld]\n", matchlen / http_header.newline_length, http_header.boundary, http_header.boundary_size);
+                                        end += matchlen;
+                                        // if it was the first header, we calculate the boundary size and expect more headers to come after a boundary
+                                        if(http_header.boundary && http_header.boundary_size == 0) {
+                                                DEBUG("================================================================================\n");
+                                                http_header.boundary_size = strlen(http_header.boundary);
+                                                // skip the first header and boundary...
+                                                start = end;
+                                                start += strspn(start, "-");
+                                                start += http_header.boundary_size;
+                                                start += http_header.newline_length;
+                                                continue;
+                                        } else {
+                                                char * content_start = end;
+                                                while(1)
+                                                {
+                                                        size_t size_remaining = (size_t)  request_length - (end - request) - 1;
+                                                        DEBUG("%ld remaining.\n", size_remaining);
+                                                        if(size_remaining <= 0) {
+                                                                DEBUG("> not even the boundary would fit in that what is left.\n");
+                                                                break;
+                                                        }
+                                                        if(NULL == (end = memchr((void*) end, '-', size_remaining))) {
+                                                                DEBUG("no further '-' found\n");
+                                                                break;
+                                                        }
+                                                        char * content_end = end - http_header.newline_length;
+                                                        end += strspn(end, "-");
+                                                        if(0 == strncmp(end, http_header.boundary, http_header.boundary_size)) {
+                                                                size_t file_size = content_end - content_start;
+                                                                DEBUG("> Content ends here, size of the last file is %ld\n", file_size);
+                                                                content_start[file_size + 1] = '\0';
+                                                                next_part(&http_header, content_start, file_size);
+                                                                end += http_header.boundary_size;
+                                                                matchlen = strspn(end, "\r\n");
+                                                                DEBUG("> end is at %p, matchlen is %ld\n", end, matchlen);
+                                                                search = ":";
+                                                                break;
+                                                        } else {
+                                                                end = end + 1;
+                                                        }
+                                                }
+                                        }
+                                        break;
+                                } // }}} if condition after a header
+                } // switch
+                if(NULL == end)
+                        break;
+                else
+                        start = end + matchlen;
+        }
+        DEBUG("> sending answer...\n");
+        send_answer(&http_header, new_socket);
+        DEBUG("> answer sent.\n");
+}
+// modeline for vim: shiftwidth=2 tabstop=2 number foldmethod=marker

diff --git a/http_parser.c b/http_parser.c new file mode 100644 index 0000000..05cd094 --- /dev/null +++ b/http_parser.c
@@ -0,0 +1,173 @@
	1	/* __ _
	2	* ____ / /_ ____ ___ __ ___________ _(_)
	3	* / __ \/ __ \/ __ `__ \/ / / / ___/ __ `/ /
	4	* / /_/ / / / / / / / / / /_/ / /__/ /_/ / /
	5	* \____/_/ /_/_/ /_/ /_/\__, /\___/\__, /_/
	6	* /____/ /____/
	7	*
	8	* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	9	*
	10	* Copyright (c) 2021, Max Christian Pohle <max@coderonline.de>
	11	*
	12	* Redistribution and use in source and binary forms, with or without
	13	* modification, are permitted provided that the following conditions
	14	* are met:
	15	*
	16	* 1. Redistributions of source code must retain the above copyright
	17	* notice, this list of conditions and the following disclaimer.
	18	*
	19	* 2. Redistributions in binary form must reproduce the above copyright
	20	* notice, this list of conditions and the following disclaimer in the
	21	* documentation and/or other materials provided with the distribution.
	22	*
	23	* {{{ DISCLAIMER
	24	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	25	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	26	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	27	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
	28	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	29	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	30	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	31	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	32	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	33	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	34	* POSSIBILITY OF SUCH DAMAGE.
	35	* }}}
	36	*/
	37
	38	#include "main.h"
	39
	40	void parse_http(size_t new_socket, char * request, size_t request_length) {
	41	char * start = request;
	42	char * end = NULL;
	43	char * search = "\r\n";
	44
	45	Http_Header http_header = {0};
	46
	47	char * name = NULL;
	48	while(NULL != (end = strpbrk(start, search))) { // TODO: try harder to break things (are SEGFAULTs possible?)
	49
	50	size_t matchlen = strspn(end, search);
	51	switch(end[0]) {
	52	case ':':
	53	end[0] = '\0'; // {{{ remember header 'names' and search for the value
	54	end++; // jump over the colon
	55
	56	name = start; // remember, where name starts, will be important in the newline case
	57
	58	if (0 == strcasecmp("Content-Type", start)) {
	59	search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...]
	60	} else {
	61	search = "\r\n"; // (likely) search for some kind of newline
	62	} // }}}
	63	break;
	64	case ';':
	65	// {{{ find the form-data boundary in the main header
	66	start += strspn(start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0')
	67
	68	const char s_multipart_form_data[] = "boundary=";
	69	if(NULL == http_header.boundary && 0 < strcasecmp(start, s_multipart_form_data))
	70	{
	71	http_header.boundary = end + sizeof(s_multipart_form_data) + 1;
	72	http_header.boundary += strspn(http_header.boundary, "-");
	73	DEBUG("> Boundary found, now looking where it ends...\n");
	74	search = "\r\n";
	75	continue;
	76	} /// }}}
	77	break;
	78	case '\r': // fallthrough
	79	case '\n':
	80	// {{{ newlines are special: sometimes content parts follow and sometimes headers, guess what...
	81	end[0] = '\0';
	82	search = ":"; // we will continue to search for headers
	83	if(NULL == name) {
	84	if(NULL == http_header.method) {
	85	DEBUG("[%ld]> HTTP REQUEST LINE :: %s \n", matchlen, start);
	86	end[0] = '\0';
	87
	88	while(NULL != (start = memchr(start, ' ', end - start))) {
	89	if(NULL == http_header.url)
	90	http_header.url = ++start;
	91	else
	92	start[0] = '\0';
	93	}
	94	http_header.method = start;
	95	http_header.newline_length = matchlen;
	96	} else {
	97	DEBUG("[...]\n"); // if we want to intentially skip something, we land here by setting name = NUL;
	98	break;
	99	}
	100	} else { // we know that name is not NULL and can work with it
	101	if (0 == strcasecmp("Content-Disposition", name))
	102	{ http_header.content_disposition = start; }
	103	} // }}}
	104	DEBUG("\033[32m[%ld]> '% 20s' = '%s'\033[0m\n", matchlen, name, start);
	105	// {{{ check if a http header ended (e.g. two newlines)
	106	if(matchlen > http_header.newline_length) {
	107	DEBUG("> END HEADERS, because there were %d newlines; boundary='%s'[%ld]\n", matchlen / http_header.newline_length, http_header.boundary, http_header.boundary_size);
	108	end += matchlen;
	109
	110	// if it was the first header, we calculate the boundary size and expect more headers to come after a boundary
	111	if(http_header.boundary && http_header.boundary_size == 0) {
	112	DEBUG("================================================================================\n");
	113	http_header.boundary_size = strlen(http_header.boundary);
	114	// skip the first header and boundary...
	115	start = end;
	116	start += strspn(start, "-");
	117	start += http_header.boundary_size;
	118	start += http_header.newline_length;
	119	continue;
	120	} else {
	121	char * content_start = end;
	122	while(1)
	123	{
	124	size_t size_remaining = (size_t) request_length - (end - request) - 1;
	125	DEBUG("%ld remaining.\n", size_remaining);
	126
	127	if(size_remaining <= 0) {
	128	DEBUG("> not even the boundary would fit in that what is left.\n");
	129	break;
	130	}
	131
	132	if(NULL == (end = memchr((void*) end, '-', size_remaining))) {
	133	DEBUG("no further '-' found\n");
	134	break;
	135	}
	136
	137	char * content_end = end - http_header.newline_length;
	138
	139	end += strspn(end, "-");
	140	if(0 == strncmp(end, http_header.boundary, http_header.boundary_size)) {
	141	size_t file_size = content_end - content_start;
	142	DEBUG("> Content ends here, size of the last file is %ld\n", file_size);
	143
	144	content_start[file_size + 1] = '\0';
	145	next_part(&http_header, content_start, file_size);
	146
	147	end += http_header.boundary_size;
	148	matchlen = strspn(end, "\r\n");
	149	DEBUG("> end is at %p, matchlen is %ld\n", end, matchlen);
	150
	151	search = ":";
	152	break;
	153	} else {
	154	end = end + 1;
	155	}
	156	}
	157	}
	158	break;
	159	} // }}} if condition after a header
	160	} // switch
	161
	162	if(NULL == end)
	163	break;
	164	else
	165	start = end + matchlen;
	166	}
	167
	168	DEBUG("> sending answer...\n");
	169	send_answer(&http_header, new_socket);
	170	DEBUG("> answer sent.\n");
	171	}
	172
	173	// modeline for vim: shiftwidth=2 tabstop=2 number foldmethod=marker