From db864e290ba1ec4acd74371b150e7770585ef284 Mon Sep 17 00:00:00 2001 From: Max Christian Pohle Date: Wed, 24 Nov 2021 01:22:54 +0100 Subject: Made the implementation more generic again --- Makefile | 2 +- http_parser.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.c | 154 ++------------------------------------------------- main.h | 19 +++++++ 4 files changed, 197 insertions(+), 151 deletions(-) create mode 100644 http_parser.c diff --git a/Makefile b/Makefile index 9a41fc9..2458ed9 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ LIBS := -lcups CFLAGS += -L/usr/local/lib CFLAGS += -I/usr/local/include -FILES := cgi.c +FILES := cgi.c http_parser.c test: main ./main diff --git a/http_parser.c b/http_parser.c new file mode 100644 index 0000000..05cd094 --- /dev/null +++ b/http_parser.c @@ -0,0 +1,173 @@ +/* __ _ +* ____ / /_ ____ ___ __ ___________ _(_) +* / __ \/ __ \/ __ `__ \/ / / / ___/ __ `/ / +* / /_/ / / / / / / / / / /_/ / /__/ /_/ / / +* \____/_/ /_/_/ /_/ /_/\__, /\___/\__, /_/ +* /____/ /____/ +* +* SPDX-License-Identifier: BSD-2-Clause-FreeBSD +* +* Copyright (c) 2021, Max Christian Pohle +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* +* {{{ DISCLAIMER +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +* POSSIBILITY OF SUCH DAMAGE. +* }}} +*/ + +#include "main.h" + +void parse_http(size_t new_socket, char * request, size_t request_length) { + char * start = request; + char * end = NULL; + char * search = "\r\n"; + + Http_Header http_header = {0}; + + char * name = NULL; + while(NULL != (end = strpbrk(start, search))) { // TODO: try harder to break things (are SEGFAULTs possible?) + + size_t matchlen = strspn(end, search); + switch(end[0]) { + case ':': + end[0] = '\0'; // {{{ remember header 'names' and search for the value + end++; // jump over the colon + + name = start; // remember, where name starts, will be important in the newline case + + if (0 == strcasecmp("Content-Type", start)) { + search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...] + } else { + search = "\r\n"; // (likely) search for some kind of newline + } // }}} + break; + case ';': + // {{{ find the form-data boundary in the main header + start += strspn(start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0') + + const char s_multipart_form_data[] = "boundary="; + if(NULL == http_header.boundary && 0 < strcasecmp(start, s_multipart_form_data)) + { + http_header.boundary = end + sizeof(s_multipart_form_data) + 1; + http_header.boundary += strspn(http_header.boundary, "-"); + DEBUG("> Boundary found, now looking where it ends...\n"); + search = "\r\n"; + continue; + } /// }}} + break; + case '\r': // fallthrough + case '\n': + // {{{ newlines are special: sometimes content parts follow and sometimes headers, guess what... + end[0] = '\0'; + search = ":"; // we will continue to search for headers + if(NULL == name) { + if(NULL == http_header.method) { + DEBUG("[%ld]> HTTP REQUEST LINE :: %s \n", matchlen, start); + end[0] = '\0'; + + while(NULL != (start = memchr(start, ' ', end - start))) { + if(NULL == http_header.url) + http_header.url = ++start; + else + start[0] = '\0'; + } + http_header.method = start; + http_header.newline_length = matchlen; + } else { + DEBUG("[...]\n"); // if we want to intentially skip something, we land here by setting name = NUL; + break; + } + } else { // we know that name is not NULL and can work with it + if (0 == strcasecmp("Content-Disposition", name)) + { http_header.content_disposition = start; } + } // }}} + DEBUG("\033[32m[%ld]> '% 20s' = '%s'\033[0m\n", matchlen, name, start); + // {{{ check if a http header ended (e.g. two newlines) + if(matchlen > http_header.newline_length) { + DEBUG("> END HEADERS, because there were %d newlines; boundary='%s'[%ld]\n", matchlen / http_header.newline_length, http_header.boundary, http_header.boundary_size); + end += matchlen; + + // if it was the first header, we calculate the boundary size and expect more headers to come after a boundary + if(http_header.boundary && http_header.boundary_size == 0) { + DEBUG("================================================================================\n"); + http_header.boundary_size = strlen(http_header.boundary); + // skip the first header and boundary... + start = end; + start += strspn(start, "-"); + start += http_header.boundary_size; + start += http_header.newline_length; + continue; + } else { + char * content_start = end; + while(1) + { + size_t size_remaining = (size_t) request_length - (end - request) - 1; + DEBUG("%ld remaining.\n", size_remaining); + + if(size_remaining <= 0) { + DEBUG("> not even the boundary would fit in that what is left.\n"); + break; + } + + if(NULL == (end = memchr((void*) end, '-', size_remaining))) { + DEBUG("no further '-' found\n"); + break; + } + + char * content_end = end - http_header.newline_length; + + end += strspn(end, "-"); + if(0 == strncmp(end, http_header.boundary, http_header.boundary_size)) { + size_t file_size = content_end - content_start; + DEBUG("> Content ends here, size of the last file is %ld\n", file_size); + + content_start[file_size + 1] = '\0'; + next_part(&http_header, content_start, file_size); + + end += http_header.boundary_size; + matchlen = strspn(end, "\r\n"); + DEBUG("> end is at %p, matchlen is %ld\n", end, matchlen); + + search = ":"; + break; + } else { + end = end + 1; + } + } + } + break; + } // }}} if condition after a header + } // switch + + if(NULL == end) + break; + else + start = end + matchlen; + } + + DEBUG("> sending answer...\n"); + send_answer(&http_header, new_socket); + DEBUG("> answer sent.\n"); +} + +// modeline for vim: shiftwidth=2 tabstop=2 number foldmethod=marker diff --git a/main.c b/main.c index c1a7c30..8eb6f40 100644 --- a/main.c +++ b/main.c @@ -36,23 +36,6 @@ */ #include "main.h" -// {{{ MACROS -#define EWOULDBLOCK_DELAY 100 -#define READ_BUFFER_LENGTH 9000 // jumboframe? -#define POST_DATA_MAX_LENGTH 18000 -#define DEBUG_SLEEP_TIME 50000 - -#ifndef DEBUG -#define DEBUG(X, ...) // (X, ...) -#else -#include -static inline int verbose(const char * format, ...) { - va_list va; va_start(va, format); usleep(DEBUG_SLEEP_TIME); return vprintf(format, va); -} -#undef DEBUG -#define DEBUG verbose -#endif -// }}} static int read_everything(FILE * f_r, FILE * output) { const int read_buffer_length = READ_BUFFER_LENGTH; @@ -76,7 +59,7 @@ static int read_everything(FILE * f_r, FILE * output) { return EXIT_SUCCESS; } -static void * next_customer(size_t new_socket) { +static void * answer_request(size_t new_socket) { FILE * f_r = fdopen((size_t) new_socket, "r"); char * output_buffer = NULL; @@ -86,137 +69,8 @@ static void * next_customer(size_t new_socket) { read_everything(f_r, output); // TODO: catch return value and error handling shutdown(new_socket, SHUT_RD); // shutdown the reading half of the connection - char * start = output_buffer; - char * end = NULL; - char * search = "\r\n"; - - Http_Header http_header = {0}; - - char * name = NULL; - while(NULL != (end = strpbrk(start, search))) { // TODO: try harder to break things (are SEGFAULTs possible?) - - size_t matchlen = strspn(end, search); - switch(end[0]) { - case ':': - end[0] = '\0'; // {{{ remember header 'names' and search for the value - end++; // jump over the colon - - name = start; // remember, where name starts, will be important in the newline case - - if (0 == strcasecmp("Content-Type", start)) { - search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...] - } else { - search = "\r\n"; // (likely) search for some kind of newline - } // }}} - break; - case ';': - // {{{ find the form-data boundary in the main header - start += strspn(start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0') - - const char s_multipart_form_data[] = "boundary="; - if(NULL == http_header.boundary && 0 < strcasecmp(start, s_multipart_form_data)) - { - http_header.boundary = end + sizeof(s_multipart_form_data) + 1; - http_header.boundary += strspn(http_header.boundary, "-"); - DEBUG("> Boundary found, now looking where it ends...\n"); - search = "\r\n"; - continue; - } /// }}} - break; - case '\r': // fallthrough - case '\n': - // {{{ newlines are special: sometimes content parts follow and sometimes headers, guess what... - end[0] = '\0'; - search = ":"; // we will continue to search for headers - if(NULL == name) { - if(NULL == http_header.method) { - DEBUG("[%ld]> HTTP REQUEST LINE :: %s \n", matchlen, start); - end[0] = '\0'; - - while(NULL != (start = memchr(start, ' ', end - start))) { - if(NULL == http_header.url) - http_header.url = ++start; - else - start[0] = '\0'; - } - http_header.method = start; - http_header.newline_length = matchlen; - } else { - DEBUG("[...]\n"); // if we want to intentially skip something, we land here by setting name = NUL; - break; - } - } else { // we know that name is not NULL and can work with it - if (0 == strcasecmp("Content-Disposition", name)) - { http_header.content_disposition = start; } - } // }}} - DEBUG("\033[32m[%ld]> '% 20s' = '%s'\033[0m\n", matchlen, name, start); - // {{{ check if a http header ended (e.g. two newlines) - if(matchlen > http_header.newline_length) { - DEBUG("> END HEADERS, because there were %d newlines; boundary='%s'[%ld]\n", matchlen / http_header.newline_length, http_header.boundary, http_header.boundary_size); - end += matchlen; - - // if it was the first header, we calculate the boundary size and expect more headers to come after a boundary - if(http_header.boundary && http_header.boundary_size == 0) { - DEBUG("================================================================================\n"); - http_header.boundary_size = strlen(http_header.boundary); - // skip the first header and boundary... - start = end; - start += strspn(start, "-"); - start += http_header.boundary_size; - start += http_header.newline_length; - continue; - } else { - char * content_start = end; - while(1) - { - size_t size_remaining = (size_t) output_buffer_length - (end - output_buffer) - 1; - DEBUG("%ld remaining.\n", size_remaining); - - if(size_remaining <= 0) { - DEBUG("> not even the boundary would fit in that what is left.\n"); - break; - } - - if(NULL == (end = memchr((void*) end, '-', size_remaining))) { - DEBUG("no further '-' found\n"); - break; - } - - char * content_end = end - http_header.newline_length; - - end += strspn(end, "-"); - if(0 == strncmp(end, http_header.boundary, http_header.boundary_size)) { - size_t file_size = content_end - content_start; - DEBUG("> Content ends here, size of the last file is %ld\n", file_size); - - content_start[file_size + 1] = '\0'; - next_part(&http_header, content_start, file_size); - - end += http_header.boundary_size; - matchlen = strspn(end, "\r\n"); - DEBUG("> end is at %p, matchlen is %ld\n", end, matchlen); - - - search = ":"; - break; - } else { - end = end + 1; - } - } - } - break; - } // }}} if condition after a header - } // switch - - if(NULL == end) - break; - else - start = end + matchlen; - } - - DEBUG("> sending answer...\n"); - send_answer(&http_header, new_socket); - DEBUG("> answer sent.\n"); + // TODO: make parsing function abstract (e.g. parse(...) function point with dlsym) + parse_http(new_socket, output_buffer, output_buffer_length); fclose(f_r); @@ -242,7 +96,7 @@ static int serve(int server_fd) fcntl(new_socket, F_GETFL) | O_NONBLOCK ); - next_customer(new_socket); + answer_request(new_socket); #ifdef VALGRIND break; // only run once, so that valgrind can test allocations&frees diff --git a/main.h b/main.h index 02a9000..6588a40 100644 --- a/main.h +++ b/main.h @@ -51,6 +51,24 @@ // #include // maybe later // }}} +// {{{ MACROS +#define EWOULDBLOCK_DELAY 100 +#define READ_BUFFER_LENGTH 9000 // jumboframe? +#define POST_DATA_MAX_LENGTH 18000 +#define DEBUG_SLEEP_TIME 50000 + +#ifndef DEBUG +#define DEBUG(X, ...) // (X, ...) +#else +#include +static inline int verbose(const char * format, ...) { + va_list va; va_start(va, format); usleep(DEBUG_SLEEP_TIME); return vprintf(format, va); +} +#undef DEBUG +#define DEBUG verbose +#endif +// }}} + typedef struct { int newline_length; // lenght of one newline in bytes (\n has 1, CR/LF has 2) char * method; // GET/POST or something like that @@ -63,5 +81,6 @@ typedef struct { void next_part(Http_Header * http_header, const char * content, size_t content_size); void send_answer(Http_Header * http_header, int fd_socket); +void parse_http(size_t new_socket, char * request, size_t request_length); // modeline for vim: shiftwidth=2 tabstop=2 number foldmethod=marker -- cgit v1.2.3