diff options
Diffstat (limited to 'http_parser.c')
-rw-r--r-- | http_parser.c | 173 |
1 files changed, 173 insertions, 0 deletions
diff --git a/http_parser.c b/http_parser.c new file mode 100644 index 0000000..05cd094 --- /dev/null +++ b/http_parser.c | |||
@@ -0,0 +1,173 @@ | |||
1 | /* __ _ | ||
2 | * ____ / /_ ____ ___ __ ___________ _(_) | ||
3 | * / __ \/ __ \/ __ `__ \/ / / / ___/ __ `/ / | ||
4 | * / /_/ / / / / / / / / / /_/ / /__/ /_/ / / | ||
5 | * \____/_/ /_/_/ /_/ /_/\__, /\___/\__, /_/ | ||
6 | * /____/ /____/ | ||
7 | * | ||
8 | * SPDX-License-Identifier: BSD-2-Clause-FreeBSD | ||
9 | * | ||
10 | * Copyright (c) 2021, Max Christian Pohle <max@coderonline.de> | ||
11 | * | ||
12 | * Redistribution and use in source and binary forms, with or without | ||
13 | * modification, are permitted provided that the following conditions | ||
14 | * are met: | ||
15 | * | ||
16 | * 1. Redistributions of source code must retain the above copyright | ||
17 | * notice, this list of conditions and the following disclaimer. | ||
18 | * | ||
19 | * 2. Redistributions in binary form must reproduce the above copyright | ||
20 | * notice, this list of conditions and the following disclaimer in the | ||
21 | * documentation and/or other materials provided with the distribution. | ||
22 | * | ||
23 | * {{{ DISCLAIMER | ||
24 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
25 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
28 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
30 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
31 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
32 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
33 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
34 | * POSSIBILITY OF SUCH DAMAGE. | ||
35 | * }}} | ||
36 | */ | ||
37 | |||
38 | #include "main.h" | ||
39 | |||
40 | void parse_http(size_t new_socket, char * request, size_t request_length) { | ||
41 | char * start = request; | ||
42 | char * end = NULL; | ||
43 | char * search = "\r\n"; | ||
44 | |||
45 | Http_Header http_header = {0}; | ||
46 | |||
47 | char * name = NULL; | ||
48 | while(NULL != (end = strpbrk(start, search))) { // TODO: try harder to break things (are SEGFAULTs possible?) | ||
49 | |||
50 | size_t matchlen = strspn(end, search); | ||
51 | switch(end[0]) { | ||
52 | case ':': | ||
53 | end[0] = '\0'; // {{{ remember header 'names' and search for the value | ||
54 | end++; // jump over the colon | ||
55 | |||
56 | name = start; // remember, where name starts, will be important in the newline case | ||
57 | |||
58 | if (0 == strcasecmp("Content-Type", start)) { | ||
59 | search = "\r\n;"; // (more unlikely) also search for a semicolon in Content-Type: [...]; boundary=[...] | ||
60 | } else { | ||
61 | search = "\r\n"; // (likely) search for some kind of newline | ||
62 | } // }}} | ||
63 | break; | ||
64 | case ';': | ||
65 | // {{{ find the form-data boundary in the main header | ||
66 | start += strspn(start, "; "); // remove spaces and semicolons (boundary check implicit; also stops at '\0') | ||
67 | |||
68 | const char s_multipart_form_data[] = "boundary="; | ||
69 | if(NULL == http_header.boundary && 0 < strcasecmp(start, s_multipart_form_data)) | ||
70 | { | ||
71 | http_header.boundary = end + sizeof(s_multipart_form_data) + 1; | ||
72 | http_header.boundary += strspn(http_header.boundary, "-"); | ||
73 | DEBUG("> Boundary found, now looking where it ends...\n"); | ||
74 | search = "\r\n"; | ||
75 | continue; | ||
76 | } /// }}} | ||
77 | break; | ||
78 | case '\r': // fallthrough | ||
79 | case '\n': | ||
80 | // {{{ newlines are special: sometimes content parts follow and sometimes headers, guess what... | ||
81 | end[0] = '\0'; | ||
82 | search = ":"; // we will continue to search for headers | ||
83 | if(NULL == name) { | ||
84 | if(NULL == http_header.method) { | ||
85 | DEBUG("[%ld]> HTTP REQUEST LINE :: %s \n", matchlen, start); | ||
86 | end[0] = '\0'; | ||
87 | |||
88 | while(NULL != (start = memchr(start, ' ', end - start))) { | ||
89 | if(NULL == http_header.url) | ||
90 | http_header.url = ++start; | ||
91 | else | ||
92 | start[0] = '\0'; | ||
93 | } | ||
94 | http_header.method = start; | ||
95 | http_header.newline_length = matchlen; | ||
96 | } else { | ||
97 | DEBUG("[...]\n"); // if we want to intentially skip something, we land here by setting name = NUL; | ||
98 | break; | ||
99 | } | ||
100 | } else { // we know that name is not NULL and can work with it | ||
101 | if (0 == strcasecmp("Content-Disposition", name)) | ||
102 | { http_header.content_disposition = start; } | ||
103 | } // }}} | ||
104 | DEBUG("\033[32m[%ld]> '% 20s' = '%s'\033[0m\n", matchlen, name, start); | ||
105 | // {{{ check if a http header ended (e.g. two newlines) | ||
106 | if(matchlen > http_header.newline_length) { | ||
107 | DEBUG("> END HEADERS, because there were %d newlines; boundary='%s'[%ld]\n", matchlen / http_header.newline_length, http_header.boundary, http_header.boundary_size); | ||
108 | end += matchlen; | ||
109 | |||
110 | // if it was the first header, we calculate the boundary size and expect more headers to come after a boundary | ||
111 | if(http_header.boundary && http_header.boundary_size == 0) { | ||
112 | DEBUG("================================================================================\n"); | ||
113 | http_header.boundary_size = strlen(http_header.boundary); | ||
114 | // skip the first header and boundary... | ||
115 | start = end; | ||
116 | start += strspn(start, "-"); | ||
117 | start += http_header.boundary_size; | ||
118 | start += http_header.newline_length; | ||
119 | continue; | ||
120 | } else { | ||
121 | char * content_start = end; | ||
122 | while(1) | ||
123 | { | ||
124 | size_t size_remaining = (size_t) request_length - (end - request) - 1; | ||
125 | DEBUG("%ld remaining.\n", size_remaining); | ||
126 | |||
127 | if(size_remaining <= 0) { | ||
128 | DEBUG("> not even the boundary would fit in that what is left.\n"); | ||
129 | break; | ||
130 | } | ||
131 | |||
132 | if(NULL == (end = memchr((void*) end, '-', size_remaining))) { | ||
133 | DEBUG("no further '-' found\n"); | ||
134 | break; | ||
135 | } | ||
136 | |||
137 | char * content_end = end - http_header.newline_length; | ||
138 | |||
139 | end += strspn(end, "-"); | ||
140 | if(0 == strncmp(end, http_header.boundary, http_header.boundary_size)) { | ||
141 | size_t file_size = content_end - content_start; | ||
142 | DEBUG("> Content ends here, size of the last file is %ld\n", file_size); | ||
143 | |||
144 | content_start[file_size + 1] = '\0'; | ||
145 | next_part(&http_header, content_start, file_size); | ||
146 | |||
147 | end += http_header.boundary_size; | ||
148 | matchlen = strspn(end, "\r\n"); | ||
149 | DEBUG("> end is at %p, matchlen is %ld\n", end, matchlen); | ||
150 | |||
151 | search = ":"; | ||
152 | break; | ||
153 | } else { | ||
154 | end = end + 1; | ||
155 | } | ||
156 | } | ||
157 | } | ||
158 | break; | ||
159 | } // }}} if condition after a header | ||
160 | } // switch | ||
161 | |||
162 | if(NULL == end) | ||
163 | break; | ||
164 | else | ||
165 | start = end + matchlen; | ||
166 | } | ||
167 | |||
168 | DEBUG("> sending answer...\n"); | ||
169 | send_answer(&http_header, new_socket); | ||
170 | DEBUG("> answer sent.\n"); | ||
171 | } | ||
172 | |||
173 | // modeline for vim: shiftwidth=2 tabstop=2 number foldmethod=marker | ||