Refactoring, stable&functional intermediate state

author: Max Christian Pohle 2021-11-27 13:47:24 +0100
committer: Max Christian Pohle 2021-11-27 13:47:24 +0100
commit: 9cf4f824ed6f214f0041bd855f69e75e8fba4bcf (patch)
tree: f52d5b70059b076910baf2eda041cac47569c0a0 /http_parser.c
parent: 87451879bf098521db618052d6068ace1dd62492 (diff)
download: ohmycgi-9cf4f824ed6f214f0041bd855f69e75e8fba4bcf.tar.bz2
ohmycgi-9cf4f824ed6f214f0041bd855f69e75e8fba4bcf.zip
1 files changed, 56 insertions, 24 deletions
diff --git a/http_parser.c b/http_parser.c
index 05cd094..30e74c8 100644
--- a/http_parser.c
+++ b/http_parser.c
@@ -37,7 +37,47 @@
 #include "main.h"
+static inline char * handle_colon(char ** start, char ** end, char ** search) {
+        // HTTP headers and content are separated by an empty line (which is two
+        // newlines), but before that there are pairs of names and values, separated
+        // by a colon, e.g. Content-Type: text/html and this function is called, when
+        // *end reaches a colon.
+        *end[0] = '\0'; // remember header 'names' and search for the value
+        (*end)++;       // jump over the colon
+        if (0 == strcasecmp("Content-Type", *start)) {
+                *search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...]
+        } else {
+                *search = "\r\n";  // (likely) search for some kind of newline
+        }
+        return *start;  // remember, where name starts, will be important in the newline case
+}
+static inline void handle_semicolon(char ** start, char ** end, char **  search) {
+        // find the form-data boundary in the main header
+        *start += strspn(*start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0')
+        const char s_multipart_form_data[] = "boundary=";
+        if(0 < strcasecmp(*start, s_multipart_form_data))
+        {
+                *end = *end + sizeof(s_multipart_form_data) + 1;
+                *end += strspn(*end, "-");
+                DEBUG("> Boundary found, now looking where it ends...\n");
+        }
+        *search = "\r\n"; // do not search further semicolons
+}
 void parse_http(size_t new_socket, char * request, size_t request_length) {
+        // this http parser modifies the input buffer and replaces single characters
+        // with \0-chars to terminate them, but it does not copy strings, because
+        // that would require tedious length checks and would also allow the user
+        // to submit forms with unreasonable long strings in strange places or to
+        // make it shorter: it would make boundary checks necessary for every single
+        // heaer name, value, sub-value and content. This implementation is more
+        // generic and only requires content length bounary checks.
        char * start = request;
        char * end = NULL;
        char * search = "\r\n";
@@ -50,30 +90,12 @@ void parse_http(size_t new_socket, char * request, size_t request_length) {
                size_t matchlen = strspn(end, search);
                switch(end[0]) {
                        case ':':
-                                end[0] = '\0'; // {{{ remember header 'names' and search for the value
+                                handle_colon(&start, &end, &search);
-                                end++;         // jump over the colon
+                                name = start;
+                        break;
-                                name = start;  // remember, where name starts, will be important in the newline case
-                                if (0 == strcasecmp("Content-Type", start)) {
-                                        search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...]
-                                } else {
-                                        search = "\r\n";  // (likely) search for some kind of newline
-                                } // }}}
-                                break;
                        case ';':
-                                // {{{ find the form-data boundary in the main header
+                                handle_semicolon(&start, &end, &search);
-                                start += strspn(start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0')
+                                http_header.boundary = end;
-                                const char s_multipart_form_data[] = "boundary=";
-                                if(NULL == http_header.boundary && 0 < strcasecmp(start, s_multipart_form_data))
-                                {
-                                        http_header.boundary = end + sizeof(s_multipart_form_data) + 1;
-                                        http_header.boundary += strspn(http_header.boundary, "-");
-                                        DEBUG("> Boundary found, now looking where it ends...\n");
-                                        search = "\r\n";
-                                        continue;
-                                } /// }}}
                                break;
                        case '\r': // fallthrough
                        case '\n':
@@ -165,8 +187,18 @@ void parse_http(size_t new_socket, char * request, size_t request_length) {
                        start = end + matchlen;
        }
+        // failed to find URL? Use a default to avoid NULL pointer exceptions later
+        if(!http_header.url || http_header.url[0] == '\0' || http_header.url[1] == '\0') {
+                http_header.url = "/index.html";
+                DEBUG("Warning: Request had no URL and is probably invalid: %d", http_header.url[0]);
+        }
        DEBUG("> sending answer...\n");
-        send_answer(&http_header, new_socket);
+        FILE * f = fdopen((size_t) new_socket, "w");
+        send_answer(&http_header, f);
+        fflush(f);
+        fclose(f);
        DEBUG("> answer sent.\n");
 }
author	Max Christian Pohle	2021-11-27 13:47:24 +0100
committer	Max Christian Pohle	2021-11-27 13:47:24 +0100
commit	9cf4f824ed6f214f0041bd855f69e75e8fba4bcf (patch)
tree	f52d5b70059b076910baf2eda041cac47569c0a0 /http_parser.c
parent	87451879bf098521db618052d6068ace1dd62492 (diff)
download	ohmycgi-9cf4f824ed6f214f0041bd855f69e75e8fba4bcf.tar.bz2 ohmycgi-9cf4f824ed6f214f0041bd855f69e75e8fba4bcf.zip

diff --git a/http_parser.c b/http_parser.c index 05cd094..30e74c8 100644 --- a/http_parser.c +++ b/http_parser.c
@@ -37,7 +37,47 @@
37		37
38	#include "main.h"	38	#include "main.h"
39		39
		40	static inline char * handle_colon(char start, char end, char ** search) {
		41	// HTTP headers and content are separated by an empty line (which is two
		42	// newlines), but before that there are pairs of names and values, separated
		43	// by a colon, e.g. Content-Type: text/html and this function is called, when
		44	// *end reaches a colon.
		45
		46	*end[0] = '\0'; // remember header 'names' and search for the value
		47	(*end)++; // jump over the colon
		48
		49	if (0 == strcasecmp("Content-Type", *start)) {
		50	*search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...]
		51	} else {
		52	*search = "\r\n"; // (likely) search for some kind of newline
		53	}
		54	return *start; // remember, where name starts, will be important in the newline case
		55	}
		56
		57	static inline void handle_semicolon(char start, char end, char ** search) {
		58	// find the form-data boundary in the main header
		59	start += strspn(start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0')
		60
		61	const char s_multipart_form_data[] = "boundary=";
		62	if(0 < strcasecmp(*start, s_multipart_form_data))
		63	{
		64	end = end + sizeof(s_multipart_form_data) + 1;
		65	end += strspn(end, "-");
		66	DEBUG("> Boundary found, now looking where it ends...\n");
		67	}
		68
		69	*search = "\r\n"; // do not search further semicolons
		70	}
		71
40	void parse_http(size_t new_socket, char * request, size_t request_length) {	72	void parse_http(size_t new_socket, char * request, size_t request_length) {
		73	// this http parser modifies the input buffer and replaces single characters
		74	// with \0-chars to terminate them, but it does not copy strings, because
		75	// that would require tedious length checks and would also allow the user
		76	// to submit forms with unreasonable long strings in strange places or to
		77	// make it shorter: it would make boundary checks necessary for every single
		78	// heaer name, value, sub-value and content. This implementation is more
		79	// generic and only requires content length bounary checks.
		80
41	char * start = request;	81	char * start = request;
42	char * end = NULL;	82	char * end = NULL;
43	char * search = "\r\n";	83	char * search = "\r\n";
@@ -50,30 +90,12 @@ void parse_http(size_t new_socket, char * request, size_t request_length) {
50	size_t matchlen = strspn(end, search);	90	size_t matchlen = strspn(end, search);
51	switch(end[0]) {	91	switch(end[0]) {
52	case ':':	92	case ':':
53	end[0] = '\0'; // {{{ remember header 'names' and search for the value	93	handle_colon(&start, &end, &search);
54	end++; // jump over the colon	94	name = start;
55		95	break;
56	name = start; // remember, where name starts, will be important in the newline case
57
58	if (0 == strcasecmp("Content-Type", start)) {
59	search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...]
60	} else {
61	search = "\r\n"; // (likely) search for some kind of newline
62	} // }}}
63	break;
64	case ';':	96	case ';':
65	// {{{ find the form-data boundary in the main header	97	handle_semicolon(&start, &end, &search);
66	start += strspn(start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0')	98	http_header.boundary = end;
67
68	const char s_multipart_form_data[] = "boundary=";
69	if(NULL == http_header.boundary && 0 < strcasecmp(start, s_multipart_form_data))
70	{
71	http_header.boundary = end + sizeof(s_multipart_form_data) + 1;
72	http_header.boundary += strspn(http_header.boundary, "-");
73	DEBUG("> Boundary found, now looking where it ends...\n");
74	search = "\r\n";
75	continue;
76	} /// }}}
77	break;	99	break;
78	case '\r': // fallthrough	100	case '\r': // fallthrough
79	case '\n':	101	case '\n':
@@ -165,8 +187,18 @@ void parse_http(size_t new_socket, char * request, size_t request_length) {
165	start = end + matchlen;	187	start = end + matchlen;
166	}	188	}
167		189
		190	// failed to find URL? Use a default to avoid NULL pointer exceptions later
		191	if(!http_header.url \|\| http_header.url[0] == '\0' \|\| http_header.url[1] == '\0') {
		192	http_header.url = "/index.html";
		193	DEBUG("Warning: Request had no URL and is probably invalid: %d", http_header.url[0]);
		194	}
		195
168	DEBUG("> sending answer...\n");	196	DEBUG("> sending answer...\n");
169	send_answer(&http_header, new_socket);	197	FILE * f = fdopen((size_t) new_socket, "w");
		198	send_answer(&http_header, f);
		199
		200	fflush(f);
		201	fclose(f);
170	DEBUG("> answer sent.\n");	202	DEBUG("> answer sent.\n");
171	}	203	}
172		204