From db864e290ba1ec4acd74371b150e7770585ef284 Mon Sep 17 00:00:00 2001
From: Max Christian Pohle
Date: Wed, 24 Nov 2021 01:22:54 +0100
Subject: Made the implementation more generic again

---
 Makefile      |   2 +-
 http_parser.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 main.c        | 154 ++-------------------------------------------------
 main.h        |  19 +++++++
 4 files changed, 197 insertions(+), 151 deletions(-)
 create mode 100644 http_parser.c

diff --git a/Makefile b/Makefile
index 9a41fc9..2458ed9 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ LIBS := -lcups
 CFLAGS += -L/usr/local/lib
 CFLAGS += -I/usr/local/include
 
-FILES := cgi.c
+FILES := cgi.c http_parser.c
 
 test: main
 	./main
diff --git a/http_parser.c b/http_parser.c
new file mode 100644
index 0000000..05cd094
--- /dev/null
+++ b/http_parser.c
@@ -0,0 +1,173 @@
+/*          __                               _
+*    ____  / /_  ____ ___  __  ___________ _(_)
+*   / __ \/ __ \/ __ `__ \/ / / / ___/ __ `/ /
+*  / /_/ / / / / / / / / / /_/ / /__/ /_/ / /
+*  \____/_/ /_/_/ /_/ /_/\__, /\___/\__, /_/
+*                       /____/     /____/
+*
+* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+*
+* Copyright (c) 2021, Max Christian Pohle <max@coderonline.de>
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* 1. Redistributions of source code must retain the above copyright
+*    notice, this list of conditions and the following disclaimer.
+*
+* 2. Redistributions in binary form must reproduce the above copyright
+*    notice, this list of conditions and the following disclaimer in the
+*    documentation and/or other materials provided with the distribution.
+*
+* {{{ DISCLAIMER
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.
+* }}}
+*/
+
+#include "main.h"
+
+void parse_http(size_t new_socket, char * request, size_t request_length) {
+	char * start = request;
+	char * end = NULL;
+	char * search = "\r\n";
+
+	Http_Header http_header = {0};
+
+	char * name = NULL;
+	while(NULL != (end = strpbrk(start, search))) { // TODO: try harder to break things (are SEGFAULTs possible?)
+
+		size_t matchlen = strspn(end, search);
+		switch(end[0]) {
+			case ':':
+				end[0] = '\0'; // {{{ remember header 'names' and search for the value
+				end++;         // jump over the colon
+
+				name = start;  // remember, where name starts, will be important in the newline case
+
+				if (0 == strcasecmp("Content-Type", start)) {
+					search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...]
+				} else {
+					search = "\r\n";  // (likely) search for some kind of newline
+				} // }}}
+				break;
+			case ';':
+				// {{{ find the form-data boundary in the main header
+				start += strspn(start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0')
+
+				const char s_multipart_form_data[] = "boundary=";
+				if(NULL == http_header.boundary && 0 < strcasecmp(start, s_multipart_form_data))
+				{
+					http_header.boundary = end + sizeof(s_multipart_form_data) + 1;
+					http_header.boundary += strspn(http_header.boundary, "-");
+					DEBUG("> Boundary found, now looking where it ends...\n");
+					search = "\r\n";
+					continue;
+				} /// }}}
+				break;
+			case '\r': // fallthrough
+			case '\n':
+				// {{{ newlines are special: sometimes content parts follow and sometimes headers, guess what...
+				end[0] = '\0';
+				search = ":"; // we will continue to search for headers
+				if(NULL == name) {
+					if(NULL == http_header.method) {
+						DEBUG("[%ld]> HTTP REQUEST LINE :: %s \n", matchlen, start);
+						end[0] = '\0';
+
+						while(NULL != (start = memchr(start, ' ', end - start))) {
+							if(NULL == http_header.url)
+								http_header.url = ++start;
+							else
+								start[0] = '\0';
+						}
+						http_header.method = start;
+						http_header.newline_length = matchlen;
+					} else {
+						DEBUG("[...]\n"); // if we want to intentially skip something, we land here by setting name = NUL;
+						break;
+					}
+				} else { // we know that name is not NULL and can work with it
+					if (0 == strcasecmp("Content-Disposition", name))
+					{ http_header.content_disposition = start; }
+				} // }}}
+				DEBUG("\033[32m[%ld]> '% 20s' = '%s'\033[0m\n", matchlen, name, start);
+				// {{{ check if a http header ended (e.g. two newlines)
+				if(matchlen > http_header.newline_length) {
+					DEBUG("> END HEADERS, because there were %d newlines; boundary='%s'[%ld]\n", matchlen / http_header.newline_length, http_header.boundary, http_header.boundary_size);
+					end += matchlen;
+
+					// if it was the first header, we calculate the boundary size and expect more headers to come after a boundary
+					if(http_header.boundary && http_header.boundary_size == 0) {
+						DEBUG("================================================================================\n");
+						http_header.boundary_size = strlen(http_header.boundary);
+						// skip the first header and boundary...
+						start = end;
+						start += strspn(start, "-");
+						start += http_header.boundary_size;
+						start += http_header.newline_length;
+						continue;
+					} else {
+						char * content_start = end;
+						while(1)
+						{
+							size_t size_remaining = (size_t)  request_length - (end - request) - 1;
+							DEBUG("%ld remaining.\n", size_remaining);
+
+							if(size_remaining <= 0) {
+								DEBUG("> not even the boundary would fit in that what is left.\n");
+								break;
+							}
+
+							if(NULL == (end = memchr((void*) end, '-', size_remaining))) {
+								DEBUG("no further '-' found\n");
+								break;
+							}
+
+							char * content_end = end - http_header.newline_length;
+
+							end += strspn(end, "-");
+							if(0 == strncmp(end, http_header.boundary, http_header.boundary_size)) {
+								size_t file_size = content_end - content_start;
+								DEBUG("> Content ends here, size of the last file is %ld\n", file_size);
+
+								content_start[file_size + 1] = '\0';
+								next_part(&http_header, content_start, file_size);
+
+								end += http_header.boundary_size;
+								matchlen = strspn(end, "\r\n");
+								DEBUG("> end is at %p, matchlen is %ld\n", end, matchlen);
+
+								search = ":";
+								break;
+							} else {
+								end = end + 1;
+							}
+						}
+					}
+					break;
+				} // }}} if condition after a header
+		} // switch
+
+		if(NULL == end)
+			break;
+		else
+			start = end + matchlen;
+	}
+
+	DEBUG("> sending answer...\n");
+	send_answer(&http_header, new_socket);
+	DEBUG("> answer sent.\n");
+}
+
+// modeline for vim: shiftwidth=2 tabstop=2 number foldmethod=marker
diff --git a/main.c b/main.c
index c1a7c30..8eb6f40 100644
--- a/main.c
+++ b/main.c
@@ -36,23 +36,6 @@
 */
 
 #include "main.h"
-// {{{ MACROS
-#define EWOULDBLOCK_DELAY     100
-#define READ_BUFFER_LENGTH    9000 // jumboframe?
-#define POST_DATA_MAX_LENGTH  18000
-#define DEBUG_SLEEP_TIME      50000
-
-#ifndef DEBUG
-#define DEBUG(X, ...) // (X, ...)
-#else
-#include <stdarg.h>
-static inline int verbose(const char * format, ...) {
-	va_list va; va_start(va, format); usleep(DEBUG_SLEEP_TIME); return vprintf(format, va);
-}
-#undef DEBUG
-#define DEBUG verbose
-#endif
-// }}}
 
 static int read_everything(FILE * f_r, FILE * output) {
 	const int read_buffer_length = READ_BUFFER_LENGTH;
@@ -76,7 +59,7 @@ static int read_everything(FILE * f_r, FILE * output) {
 	return EXIT_SUCCESS;
 }
 
-static void * next_customer(size_t new_socket) {
+static void * answer_request(size_t new_socket) {
 	FILE * f_r = fdopen((size_t) new_socket, "r");
 
 	char * output_buffer = NULL;
@@ -86,137 +69,8 @@ static void * next_customer(size_t new_socket) {
 	read_everything(f_r, output);  // TODO: catch return value and error handling
 	shutdown(new_socket, SHUT_RD); // shutdown the reading half of the connection
 
-	char * start = output_buffer;
-	char * end = NULL;
-	char * search = "\r\n";
-
-	Http_Header http_header = {0};
-
-	char * name = NULL;
-	while(NULL != (end = strpbrk(start, search))) { // TODO: try harder to break things (are SEGFAULTs possible?)
-
-		size_t matchlen = strspn(end, search);
-		switch(end[0]) {
-			case ':':
-				end[0] = '\0'; // {{{ remember header 'names' and search for the value
-				end++;         // jump over the colon
-
-				name = start;  // remember, where name starts, will be important in the newline case
-
-				if (0 == strcasecmp("Content-Type", start)) {
-					search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...]
-				} else {
-					search = "\r\n";  // (likely) search for some kind of newline
-				} // }}}
-				break;
-			case ';':
-				// {{{ find the form-data boundary in the main header
-				start += strspn(start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0')
-
-				const char s_multipart_form_data[] = "boundary=";
-				if(NULL == http_header.boundary && 0 < strcasecmp(start, s_multipart_form_data))
-				{
-					http_header.boundary = end + sizeof(s_multipart_form_data) + 1;
-					http_header.boundary += strspn(http_header.boundary, "-");
-					DEBUG("> Boundary found, now looking where it ends...\n");
-					search = "\r\n";
-					continue;
-				} /// }}}
-				break;
-			case '\r': // fallthrough
-			case '\n':
-				// {{{ newlines are special: sometimes content parts follow and sometimes headers, guess what...
-				end[0] = '\0';
-				search = ":"; // we will continue to search for headers
-				if(NULL == name) {
-					if(NULL == http_header.method) {
-						DEBUG("[%ld]> HTTP REQUEST LINE :: %s \n", matchlen, start);
-						end[0] = '\0';
-
-						while(NULL != (start = memchr(start, ' ', end - start))) {
-							if(NULL == http_header.url)
-								http_header.url = ++start;
-							else
-								start[0] = '\0';
-						}
-						http_header.method = start;
-						http_header.newline_length = matchlen;
-					} else {
-						DEBUG("[...]\n"); // if we want to intentially skip something, we land here by setting name = NUL;
-						break;
-					}
-				} else { // we know that name is not NULL and can work with it
-					if (0 == strcasecmp("Content-Disposition", name))
-					{ http_header.content_disposition = start; }
-				} // }}}
-				DEBUG("\033[32m[%ld]> '% 20s' = '%s'\033[0m\n", matchlen, name, start);
-				// {{{ check if a http header ended (e.g. two newlines)
-				if(matchlen > http_header.newline_length) {
-					DEBUG("> END HEADERS, because there were %d newlines; boundary='%s'[%ld]\n", matchlen / http_header.newline_length, http_header.boundary, http_header.boundary_size);
-					end += matchlen;
-
-					// if it was the first header, we calculate the boundary size and expect more headers to come after a boundary
-					if(http_header.boundary && http_header.boundary_size == 0) {
-						DEBUG("================================================================================\n");
-						http_header.boundary_size = strlen(http_header.boundary);
-						// skip the first header and boundary...
-						start = end;
-						start += strspn(start, "-");
-						start += http_header.boundary_size;
-						start += http_header.newline_length;
-						continue;
-					} else {
-						char * content_start = end;
-						while(1)
-						{
-							size_t size_remaining = (size_t)  output_buffer_length - (end - output_buffer) - 1;
-							DEBUG("%ld remaining.\n", size_remaining);
-
-							if(size_remaining <= 0) {
-								DEBUG("> not even the boundary would fit in that what is left.\n");
-								break;
-							}
-
-							if(NULL == (end = memchr((void*) end, '-', size_remaining))) {
-								DEBUG("no further '-' found\n");
-								break;
-							}
-
-							char * content_end = end - http_header.newline_length;
-
-							end += strspn(end, "-");
-							if(0 == strncmp(end, http_header.boundary, http_header.boundary_size)) {
-								size_t file_size = content_end - content_start;
-								DEBUG("> Content ends here, size of the last file is %ld\n", file_size);
-
-								content_start[file_size + 1] = '\0';
-								next_part(&http_header, content_start, file_size);
-
-								end += http_header.boundary_size;
-								matchlen = strspn(end, "\r\n");
-								DEBUG("> end is at %p, matchlen is %ld\n", end, matchlen);
-
-
-								search = ":";
-								break;
-							} else {
-								end = end + 1;
-							}
-						}
-					}
-					break;
-				} // }}} if condition after a header
-		} // switch
-
-		if(NULL == end)
-			break;
-		else
-			start = end + matchlen;
-	}
-
-	DEBUG("> sending answer...\n");
-	send_answer(&http_header, new_socket);
-	DEBUG("> answer sent.\n");
+	// TODO: make parsing function abstract (e.g. parse(...) function point with dlsym)
+	parse_http(new_socket, output_buffer, output_buffer_length);
 
 	fclose(f_r);
 
@@ -242,7 +96,7 @@ static int serve(int server_fd)
 			fcntl(new_socket, F_GETFL) | O_NONBLOCK
 		);
 
-		next_customer(new_socket);
+		answer_request(new_socket);
 
 #ifdef VALGRIND
 		break; // only run once, so that valgrind can test allocations&frees
diff --git a/main.h b/main.h
index 02a9000..6588a40 100644
--- a/main.h
+++ b/main.h
@@ -51,6 +51,24 @@
 // #include <pthread.h>      // maybe later
 // }}}
 
+// {{{ MACROS
+#define EWOULDBLOCK_DELAY     100
+#define READ_BUFFER_LENGTH    9000 // jumboframe?
+#define POST_DATA_MAX_LENGTH  18000
+#define DEBUG_SLEEP_TIME      50000
+
+#ifndef DEBUG
+#define DEBUG(X, ...) // (X, ...)
+#else
+#include <stdarg.h>
+static inline int verbose(const char * format, ...) {
+	va_list va; va_start(va, format); usleep(DEBUG_SLEEP_TIME); return vprintf(format, va);
+}
+#undef DEBUG
+#define DEBUG verbose
+#endif
+// }}}
+
 typedef struct {
 	int    newline_length; // lenght of one newline in bytes (\n has 1, CR/LF has 2)
 	char * method;         // GET/POST or something like that
@@ -63,5 +81,6 @@ typedef struct {
 
 void next_part(Http_Header * http_header, const char * content, size_t content_size);
 void send_answer(Http_Header * http_header, int fd_socket);
+void parse_http(size_t new_socket, char * request, size_t request_length);
 
 // modeline for vim: shiftwidth=2 tabstop=2 number foldmethod=marker
-- 
cgit v1.2.3