diff options
Diffstat (limited to 'http_parser.c')
| -rw-r--r-- | http_parser.c | 173 |
1 files changed, 173 insertions, 0 deletions
diff --git a/http_parser.c b/http_parser.c new file mode 100644 index 0000000..05cd094 --- /dev/null +++ b/http_parser.c | |||
| @@ -0,0 +1,173 @@ | |||
| 1 | /* __ _ | ||
| 2 | * ____ / /_ ____ ___ __ ___________ _(_) | ||
| 3 | * / __ \/ __ \/ __ `__ \/ / / / ___/ __ `/ / | ||
| 4 | * / /_/ / / / / / / / / / /_/ / /__/ /_/ / / | ||
| 5 | * \____/_/ /_/_/ /_/ /_/\__, /\___/\__, /_/ | ||
| 6 | * /____/ /____/ | ||
| 7 | * | ||
| 8 | * SPDX-License-Identifier: BSD-2-Clause-FreeBSD | ||
| 9 | * | ||
| 10 | * Copyright (c) 2021, Max Christian Pohle <max@coderonline.de> | ||
| 11 | * | ||
| 12 | * Redistribution and use in source and binary forms, with or without | ||
| 13 | * modification, are permitted provided that the following conditions | ||
| 14 | * are met: | ||
| 15 | * | ||
| 16 | * 1. Redistributions of source code must retain the above copyright | ||
| 17 | * notice, this list of conditions and the following disclaimer. | ||
| 18 | * | ||
| 19 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 20 | * notice, this list of conditions and the following disclaimer in the | ||
| 21 | * documentation and/or other materials provided with the distribution. | ||
| 22 | * | ||
| 23 | * {{{ DISCLAIMER | ||
| 24 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
| 25 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
| 28 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
| 29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
| 30 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
| 31 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
| 32 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
| 33 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
| 34 | * POSSIBILITY OF SUCH DAMAGE. | ||
| 35 | * }}} | ||
| 36 | */ | ||
| 37 | |||
| 38 | #include "main.h" | ||
| 39 | |||
| 40 | void parse_http(size_t new_socket, char * request, size_t request_length) { | ||
| 41 | char * start = request; | ||
| 42 | char * end = NULL; | ||
| 43 | char * search = "\r\n"; | ||
| 44 | |||
| 45 | Http_Header http_header = {0}; | ||
| 46 | |||
| 47 | char * name = NULL; | ||
| 48 | while(NULL != (end = strpbrk(start, search))) { // TODO: try harder to break things (are SEGFAULTs possible?) | ||
| 49 | |||
| 50 | size_t matchlen = strspn(end, search); | ||
| 51 | switch(end[0]) { | ||
| 52 | case ':': | ||
| 53 | end[0] = '\0'; // {{{ remember header 'names' and search for the value | ||
| 54 | end++; // jump over the colon | ||
| 55 | |||
| 56 | name = start; // remember, where name starts, will be important in the newline case | ||
| 57 | |||
| 58 | if (0 == strcasecmp("Content-Type", start)) { | ||
| 59 | search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...] | ||
| 60 | } else { | ||
| 61 | search = "\r\n"; // (likely) search for some kind of newline | ||
| 62 | } // }}} | ||
| 63 | break; | ||
| 64 | case ';': | ||
| 65 | // {{{ find the form-data boundary in the main header | ||
| 66 | start += strspn(start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0') | ||
| 67 | |||
| 68 | const char s_multipart_form_data[] = "boundary="; | ||
| 69 | if(NULL == http_header.boundary && 0 < strcasecmp(start, s_multipart_form_data)) | ||
| 70 | { | ||
| 71 | http_header.boundary = end + sizeof(s_multipart_form_data) + 1; | ||
| 72 | http_header.boundary += strspn(http_header.boundary, "-"); | ||
| 73 | DEBUG("> Boundary found, now looking where it ends...\n"); | ||
| 74 | search = "\r\n"; | ||
| 75 | continue; | ||
| 76 | } /// }}} | ||
| 77 | break; | ||
| 78 | case '\r': // fallthrough | ||
| 79 | case '\n': | ||
| 80 | // {{{ newlines are special: sometimes content parts follow and sometimes headers, guess what... | ||
| 81 | end[0] = '\0'; | ||
| 82 | search = ":"; // we will continue to search for headers | ||
| 83 | if(NULL == name) { | ||
| 84 | if(NULL == http_header.method) { | ||
| 85 | DEBUG("[%ld]> HTTP REQUEST LINE :: %s \n", matchlen, start); | ||
| 86 | end[0] = '\0'; | ||
| 87 | |||
| 88 | while(NULL != (start = memchr(start, ' ', end - start))) { | ||
| 89 | if(NULL == http_header.url) | ||
| 90 | http_header.url = ++start; | ||
| 91 | else | ||
| 92 | start[0] = '\0'; | ||
| 93 | } | ||
| 94 | http_header.method = start; | ||
| 95 | http_header.newline_length = matchlen; | ||
| 96 | } else { | ||
| 97 | DEBUG("[...]\n"); // if we want to intentially skip something, we land here by setting name = NUL; | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | } else { // we know that name is not NULL and can work with it | ||
| 101 | if (0 == strcasecmp("Content-Disposition", name)) | ||
| 102 | { http_header.content_disposition = start; } | ||
| 103 | } // }}} | ||
| 104 | DEBUG("\033[32m[%ld]> '% 20s' = '%s'\033[0m\n", matchlen, name, start); | ||
| 105 | // {{{ check if a http header ended (e.g. two newlines) | ||
| 106 | if(matchlen > http_header.newline_length) { | ||
| 107 | DEBUG("> END HEADERS, because there were %d newlines; boundary='%s'[%ld]\n", matchlen / http_header.newline_length, http_header.boundary, http_header.boundary_size); | ||
| 108 | end += matchlen; | ||
| 109 | |||
| 110 | // if it was the first header, we calculate the boundary size and expect more headers to come after a boundary | ||
| 111 | if(http_header.boundary && http_header.boundary_size == 0) { | ||
| 112 | DEBUG("================================================================================\n"); | ||
| 113 | http_header.boundary_size = strlen(http_header.boundary); | ||
| 114 | // skip the first header and boundary... | ||
| 115 | start = end; | ||
| 116 | start += strspn(start, "-"); | ||
| 117 | start += http_header.boundary_size; | ||
| 118 | start += http_header.newline_length; | ||
| 119 | continue; | ||
| 120 | } else { | ||
| 121 | char * content_start = end; | ||
| 122 | while(1) | ||
| 123 | { | ||
| 124 | size_t size_remaining = (size_t) request_length - (end - request) - 1; | ||
| 125 | DEBUG("%ld remaining.\n", size_remaining); | ||
| 126 | |||
| 127 | if(size_remaining <= 0) { | ||
| 128 | DEBUG("> not even the boundary would fit in that what is left.\n"); | ||
| 129 | break; | ||
| 130 | } | ||
| 131 | |||
| 132 | if(NULL == (end = memchr((void*) end, '-', size_remaining))) { | ||
| 133 | DEBUG("no further '-' found\n"); | ||
| 134 | break; | ||
| 135 | } | ||
| 136 | |||
| 137 | char * content_end = end - http_header.newline_length; | ||
| 138 | |||
| 139 | end += strspn(end, "-"); | ||
| 140 | if(0 == strncmp(end, http_header.boundary, http_header.boundary_size)) { | ||
| 141 | size_t file_size = content_end - content_start; | ||
| 142 | DEBUG("> Content ends here, size of the last file is %ld\n", file_size); | ||
| 143 | |||
| 144 | content_start[file_size + 1] = '\0'; | ||
| 145 | next_part(&http_header, content_start, file_size); | ||
| 146 | |||
| 147 | end += http_header.boundary_size; | ||
| 148 | matchlen = strspn(end, "\r\n"); | ||
| 149 | DEBUG("> end is at %p, matchlen is %ld\n", end, matchlen); | ||
| 150 | |||
| 151 | search = ":"; | ||
| 152 | break; | ||
| 153 | } else { | ||
| 154 | end = end + 1; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | } | ||
| 158 | break; | ||
| 159 | } // }}} if condition after a header | ||
| 160 | } // switch | ||
| 161 | |||
| 162 | if(NULL == end) | ||
| 163 | break; | ||
| 164 | else | ||
| 165 | start = end + matchlen; | ||
| 166 | } | ||
| 167 | |||
| 168 | DEBUG("> sending answer...\n"); | ||
| 169 | send_answer(&http_header, new_socket); | ||
| 170 | DEBUG("> answer sent.\n"); | ||
| 171 | } | ||
| 172 | |||
| 173 | // modeline for vim: shiftwidth=2 tabstop=2 number foldmethod=marker | ||
