Commit | Line | Data |
---|---|---|
460b9539 | 1 | /* |
2 | * This file is part of DisOrder | |
39d4aa6b | 3 | * Copyright (C) 2005, 2007 Richard Kettlewell |
460b9539 | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU General Public License as published by | |
7 | * the Free Software Foundation; either version 2 of the License, or | |
8 | * (at your option) any later version. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License | |
16 | * along with this program; if not, write to the Free Software | |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | |
18 | * USA | |
19 | */ | |
39d4aa6b RK |
20 | /** @file lib/mime.c |
21 | * @brief Support for MIME and allied protocols | |
22 | */ | |
460b9539 | 23 | |
24 | #include <config.h> | |
25 | #include "types.h" | |
26 | ||
27 | #include <string.h> | |
28 | #include <ctype.h> | |
29 | ||
30 | #include "mem.h" | |
31 | #include "mime.h" | |
32 | #include "vector.h" | |
33 | #include "hex.h" | |
39d4aa6b | 34 | #include "log.h" |
460b9539 | 35 | |
39d4aa6b | 36 | /** @brief Match whitespace characters */ |
460b9539 | 37 | static int whitespace(int c) { |
38 | switch(c) { | |
39 | case ' ': | |
40 | case '\t': | |
41 | case '\r': | |
42 | case '\n': | |
43 | return 1; | |
44 | default: | |
45 | return 0; | |
46 | } | |
47 | } | |
48 | ||
39d4aa6b | 49 | /** @brief Match RFC2045 tspecial characters */ |
460b9539 | 50 | static int tspecial(int c) { |
51 | switch(c) { | |
52 | case '(': | |
53 | case ')': | |
54 | case '<': | |
55 | case '>': | |
56 | case '@': | |
57 | case ',': | |
58 | case ';': | |
59 | case ':': | |
60 | case '\\': | |
61 | case '"': | |
62 | case '/': | |
63 | case '[': | |
64 | case ']': | |
65 | case '?': | |
66 | case '=': | |
67 | return 1; | |
68 | default: | |
69 | return 0; | |
70 | } | |
71 | } | |
72 | ||
39d4aa6b RK |
73 | /** @brief Mathc RFC2616 seprator characters */ |
74 | static int http_separator(int c) { | |
75 | switch(c) { | |
76 | case '(': | |
77 | case ')': | |
78 | case '<': | |
79 | case '>': | |
80 | case '@': | |
81 | case ',': | |
82 | case ';': | |
83 | case ':': | |
84 | case '\\': | |
85 | case '"': | |
86 | case '/': | |
87 | case '[': | |
88 | case ']': | |
89 | case '?': | |
90 | case '=': | |
91 | case '{': | |
92 | case '}': | |
93 | case ' ': | |
94 | case '\t': | |
95 | return 1; | |
96 | default: | |
97 | return 0; | |
98 | } | |
99 | } | |
100 | ||
101 | /** @brief Match CRLF */ | |
102 | static int iscrlf(const char *ptr) { | |
103 | return ptr[0] == '\r' && ptr[1] == '\n'; | |
104 | } | |
105 | ||
106 | /** @brief Skip whitespace | |
107 | * @param rfc822_comments If true, skip RFC822 nested comments | |
108 | */ | |
109 | static const char *skipwhite(const char *s, int rfc822_comments) { | |
460b9539 | 110 | int c, depth; |
111 | ||
112 | for(;;) { | |
113 | switch(c = *s) { | |
114 | case ' ': | |
115 | case '\t': | |
116 | case '\r': | |
117 | case '\n': | |
118 | ++s; | |
119 | break; | |
120 | case '(': | |
39d4aa6b RK |
121 | if(!rfc822_comments) |
122 | return s; | |
460b9539 | 123 | ++s; |
124 | depth = 1; | |
125 | while(*s && depth) { | |
126 | c = *s++; | |
127 | switch(c) { | |
128 | case '(': ++depth; break; | |
129 | case ')': --depth; break; | |
130 | case '\\': | |
131 | if(!*s) return 0; | |
132 | ++s; | |
133 | break; | |
134 | } | |
135 | } | |
136 | if(depth) return 0; | |
137 | break; | |
138 | default: | |
139 | return s; | |
140 | } | |
141 | } | |
142 | } | |
143 | ||
39d4aa6b RK |
144 | /** @brief Test for a word character |
145 | * @param c Character to test | |
146 | * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) | |
147 | * @return 1 if @p c is a word character, else 0 | |
148 | */ | |
149 | static int iswordchar(int c, int (*special)(int)) { | |
150 | return !(c <= ' ' || c > '~' || special(c)); | |
151 | } | |
152 | ||
153 | /** @brief Parse an RFC1521/RFC2616 word | |
154 | * @param s Pointer to start of word | |
155 | * @param valuep Where to store value | |
156 | * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) | |
157 | * @return Pointer just after end of word or NULL if there's no word | |
158 | * | |
159 | * A word is a token or a quoted-string. | |
160 | */ | |
161 | static const char *parseword(const char *s, char **valuep, | |
162 | int (*special)(int)) { | |
163 | struct dynstr value[1]; | |
460b9539 | 164 | int c; |
165 | ||
39d4aa6b RK |
166 | dynstr_init(value); |
167 | if(*s == '"') { | |
168 | ++s; | |
169 | while((c = *s++) != '"') { | |
170 | switch(c) { | |
171 | case '\\': | |
172 | if(!(c = *s++)) return 0; | |
173 | default: | |
174 | dynstr_append(value, c); | |
175 | break; | |
176 | } | |
460b9539 | 177 | } |
39d4aa6b RK |
178 | if(!c) return 0; |
179 | } else { | |
180 | if(!iswordchar((unsigned char)*s, special)) | |
181 | return NULL; | |
182 | dynstr_init(value); | |
183 | while(iswordchar((unsigned char)*s, special)) | |
184 | dynstr_append(value, *s++); | |
460b9539 | 185 | } |
39d4aa6b RK |
186 | dynstr_terminate(value); |
187 | *valuep = value->vec; | |
460b9539 | 188 | return s; |
189 | } | |
190 | ||
39d4aa6b RK |
191 | /** @brief Parse an RFC1521/RFC2616 token |
192 | * @param s Pointer to start of token | |
193 | * @param valuep Where to store value | |
194 | * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) | |
195 | * @return Pointer just after end of token or NULL if there's no token | |
196 | */ | |
197 | static const char *parsetoken(const char *s, char **valuep, | |
198 | int (*special)(int)) { | |
199 | if(*s == '"') return 0; | |
200 | return parseword(s, valuep, special); | |
201 | } | |
202 | ||
203 | /** @brief Parse a MIME content-type field | |
204 | * @param s Start of field | |
205 | * @param typep Where to store type | |
206 | * @param parameternamep Where to store parameter name | |
207 | * @param parameternvaluep Wher to store parameter value | |
208 | * @return 0 on success, non-0 on error | |
209 | */ | |
460b9539 | 210 | int mime_content_type(const char *s, |
211 | char **typep, | |
212 | char **parameternamep, | |
213 | char **parametervaluep) { | |
39d4aa6b | 214 | struct dynstr type, parametername; |
460b9539 | 215 | |
216 | dynstr_init(&type); | |
39d4aa6b | 217 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 218 | if(!*s) return -1; |
219 | while(*s && !tspecial(*s) && !whitespace(*s)) | |
220 | dynstr_append(&type, tolower((unsigned char)*s++)); | |
39d4aa6b | 221 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 222 | if(*s++ != '/') return -1; |
223 | dynstr_append(&type, '/'); | |
39d4aa6b | 224 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 225 | while(*s && !tspecial(*s) && !whitespace(*s)) |
226 | dynstr_append(&type, tolower((unsigned char)*s++)); | |
39d4aa6b | 227 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 228 | |
229 | if(*s == ';') { | |
230 | dynstr_init(¶metername); | |
231 | ++s; | |
39d4aa6b | 232 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 233 | if(!*s) return -1; |
234 | while(*s && !tspecial(*s) && !whitespace(*s)) | |
235 | dynstr_append(¶metername, tolower((unsigned char)*s++)); | |
39d4aa6b | 236 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 237 | if(*s++ != '=') return -1; |
39d4aa6b RK |
238 | if(!(s = skipwhite(s, 1))) return -1; |
239 | if(!(s = parseword(s, parametervaluep, tspecial))) return -1; | |
240 | if(!(s = skipwhite(s, 1))) return -1; | |
460b9539 | 241 | dynstr_terminate(¶metername); |
242 | *parameternamep = parametername.vec; | |
243 | } else | |
244 | *parametervaluep = *parameternamep = 0; | |
245 | dynstr_terminate(&type); | |
246 | *typep = type.vec; | |
247 | return 0; | |
248 | } | |
249 | ||
39d4aa6b RK |
250 | /** @brief Parse a MIME message |
251 | * @param s Start of message | |
252 | * @param callback Called for each header field | |
253 | * @param u Passed to callback | |
254 | * @return Pointer to decoded body (might be in original string) | |
255 | */ | |
460b9539 | 256 | const char *mime_parse(const char *s, |
257 | int (*callback)(const char *name, const char *value, | |
258 | void *u), | |
259 | void *u) { | |
260 | struct dynstr name, value; | |
261 | char *cte = 0, *p; | |
262 | ||
263 | while(*s && !iscrlf(s)) { | |
264 | dynstr_init(&name); | |
265 | dynstr_init(&value); | |
266 | while(*s && !tspecial(*s) && !whitespace(*s)) | |
267 | dynstr_append(&name, tolower((unsigned char)*s++)); | |
39d4aa6b | 268 | if(!(s = skipwhite(s, 1))) return 0; |
460b9539 | 269 | if(*s != ':') return 0; |
270 | ++s; | |
271 | while(*s && !(*s == '\n' && !(s[1] == ' ' || s[1] == '\t'))) | |
272 | dynstr_append(&value, *s++); | |
273 | if(*s) ++s; | |
274 | dynstr_terminate(&name); | |
275 | dynstr_terminate(&value); | |
276 | if(!strcmp(name.vec, "content-transfer-encoding")) { | |
277 | cte = xstrdup(value.vec); | |
278 | for(p = cte; *p; p++) | |
279 | *p = tolower((unsigned char)*p); | |
280 | } | |
281 | if(callback(name.vec, value.vec, u)) return 0; | |
282 | } | |
283 | if(*s) s += 2; | |
284 | if(cte) { | |
285 | if(!strcmp(cte, "base64")) return mime_base64(s); | |
286 | if(!strcmp(cte, "quoted-printable")) return mime_qp(s); | |
287 | } | |
288 | return s; | |
289 | } | |
290 | ||
291 | static int isboundary(const char *ptr, const char *boundary, size_t bl) { | |
292 | return (ptr[0] == '-' | |
293 | && ptr[1] == '-' | |
294 | && !strncmp(ptr + 2, boundary, bl) | |
295 | && (iscrlf(ptr + bl + 2) | |
296 | || (ptr[bl + 2] == '-' | |
297 | && ptr[bl + 3] == '-' | |
298 | && iscrlf(ptr + bl + 4)))); | |
299 | } | |
300 | ||
301 | static int isfinal(const char *ptr, const char *boundary, size_t bl) { | |
302 | return (ptr[0] == '-' | |
303 | && ptr[1] == '-' | |
304 | && !strncmp(ptr + 2, boundary, bl) | |
305 | && ptr[bl + 2] == '-' | |
306 | && ptr[bl + 3] == '-' | |
307 | && iscrlf(ptr + bl + 4)); | |
308 | } | |
309 | ||
39d4aa6b RK |
310 | /** @brief Parse a multipart MIME body |
311 | * @param s Start of message | |
312 | * @param callback CAllback for each part | |
313 | * @param boundary Boundary string | |
314 | * @param u Passed to callback | |
315 | * @return 0 on success, non-0 on error | |
316 | */ | |
460b9539 | 317 | int mime_multipart(const char *s, |
318 | int (*callback)(const char *s, void *u), | |
319 | const char *boundary, | |
320 | void *u) { | |
321 | size_t bl = strlen(boundary); | |
322 | const char *start, *e; | |
323 | int ret; | |
324 | ||
325 | if(!isboundary(s, boundary, bl)) return -1; | |
326 | while(!isfinal(s, boundary, bl)) { | |
327 | s = strstr(s, "\r\n") + 2; | |
328 | start = s; | |
329 | while(!isboundary(s, boundary, bl)) { | |
330 | if(!(e = strstr(s, "\r\n"))) return -1; | |
331 | s = e + 2; | |
332 | } | |
333 | if((ret = callback(xstrndup(start, | |
334 | s == start ? 0 : s - start - 2), | |
335 | u))) | |
336 | return ret; | |
337 | } | |
338 | return 0; | |
339 | } | |
340 | ||
39d4aa6b RK |
341 | /** @brief Parse an RFC2388-style content-disposition field |
342 | * @param s Start of field | |
343 | * @param typep Where to store type | |
344 | * @param parameternamep Where to store parameter name | |
345 | * @param parameternvaluep Wher to store parameter value | |
346 | * @return 0 on success, non-0 on error | |
347 | */ | |
460b9539 | 348 | int mime_rfc2388_content_disposition(const char *s, |
349 | char **dispositionp, | |
350 | char **parameternamep, | |
351 | char **parametervaluep) { | |
39d4aa6b | 352 | struct dynstr disposition, parametername; |
460b9539 | 353 | |
354 | dynstr_init(&disposition); | |
39d4aa6b | 355 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 356 | if(!*s) return -1; |
357 | while(*s && !tspecial(*s) && !whitespace(*s)) | |
358 | dynstr_append(&disposition, tolower((unsigned char)*s++)); | |
39d4aa6b | 359 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 360 | |
361 | if(*s == ';') { | |
362 | dynstr_init(¶metername); | |
363 | ++s; | |
39d4aa6b | 364 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 365 | if(!*s) return -1; |
366 | while(*s && !tspecial(*s) && !whitespace(*s)) | |
367 | dynstr_append(¶metername, tolower((unsigned char)*s++)); | |
39d4aa6b | 368 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 369 | if(*s++ != '=') return -1; |
39d4aa6b RK |
370 | if(!(s = skipwhite(s, 1))) return -1; |
371 | if(!(s = parseword(s, parametervaluep, tspecial))) return -1; | |
372 | if(!(s = skipwhite(s, 1))) return -1; | |
460b9539 | 373 | dynstr_terminate(¶metername); |
374 | *parameternamep = parametername.vec; | |
375 | } else | |
376 | *parametervaluep = *parameternamep = 0; | |
377 | dynstr_terminate(&disposition); | |
378 | *dispositionp = disposition.vec; | |
379 | return 0; | |
380 | } | |
381 | ||
39d4aa6b RK |
382 | /** @brief Convert MIME quoted-printable |
383 | * @param s Quoted-printable data | |
384 | * @return Decoded data | |
385 | */ | |
460b9539 | 386 | char *mime_qp(const char *s) { |
387 | struct dynstr d; | |
388 | int c, a, b; | |
389 | const char *t; | |
390 | ||
391 | dynstr_init(&d); | |
392 | while((c = *s++)) { | |
393 | switch(c) { | |
394 | case '=': | |
395 | if((a = unhexdigitq(s[0])) != -1 | |
396 | && (b = unhexdigitq(s[1])) != -1) { | |
397 | dynstr_append(&d, a * 16 + b); | |
398 | s += 2; | |
399 | } else { | |
400 | t = s; | |
401 | while(*t == ' ' || *t == '\t') ++t; | |
402 | if(iscrlf(t)) { | |
403 | /* soft line break */ | |
404 | s = t + 2; | |
405 | } else | |
406 | return 0; | |
407 | } | |
408 | break; | |
409 | case ' ': | |
410 | case '\t': | |
411 | t = s; | |
412 | while(*t == ' ' || *t == '\t') ++t; | |
413 | if(iscrlf(t)) | |
414 | /* trailing space is always eliminated */ | |
415 | s = t; | |
416 | else | |
417 | dynstr_append(&d, c); | |
418 | break; | |
419 | default: | |
420 | dynstr_append(&d, c); | |
421 | break; | |
422 | } | |
423 | } | |
424 | dynstr_terminate(&d); | |
425 | return d.vec; | |
426 | } | |
427 | ||
39d4aa6b RK |
428 | /** @brief Convert MIME base64 |
429 | * @param s base64 data | |
430 | * @return Decoded data | |
431 | */ | |
460b9539 | 432 | char *mime_base64(const char *s) { |
433 | struct dynstr d; | |
434 | const char *t; | |
435 | int b[4], n, c; | |
436 | static const char table[] = | |
437 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | |
438 | ||
439 | dynstr_init(&d); | |
440 | n = 0; | |
441 | while((c = (unsigned char)*s++)) { | |
442 | if((t = strchr(table, c))) { | |
443 | b[n++] = t - table; | |
444 | if(n == 4) { | |
445 | dynstr_append(&d, (b[0] << 2) + (b[1] >> 4)); | |
446 | dynstr_append(&d, (b[1] << 4) + (b[2] >> 2)); | |
447 | dynstr_append(&d, (b[2] << 6) + b[3]); | |
448 | n = 0; | |
449 | } | |
450 | } else if(c == '=') { | |
451 | if(n >= 2) { | |
452 | dynstr_append(&d, (b[0] << 2) + (b[1] >> 4)); | |
453 | if(n == 3) | |
454 | dynstr_append(&d, (b[1] << 4) + (b[2] >> 2)); | |
455 | } | |
456 | break; | |
457 | } | |
458 | } | |
459 | dynstr_terminate(&d); | |
460 | return d.vec; | |
461 | } | |
462 | ||
39d4aa6b RK |
463 | /** @brief Parse a RFC2109 Cookie: header |
464 | * @param s Header field value | |
465 | * @param cd Where to store result | |
466 | * @return 0 on success, non-0 on error | |
467 | */ | |
468 | int parse_cookie(const char *s, | |
469 | struct cookiedata *cd) { | |
470 | char *n = 0, *v = 0; | |
471 | ||
472 | memset(cd, 0, sizeof *cd); | |
473 | s = skipwhite(s, 0); | |
474 | while(*s) { | |
475 | /* Skip separators */ | |
476 | if(*s == ';' || *s == ',') { | |
477 | ++s; | |
478 | s = skipwhite(s, 0); | |
479 | continue; | |
480 | } | |
481 | if(!(s = parsetoken(s, &n, http_separator))) return -1; | |
482 | s = skipwhite(s, 0); | |
483 | if(*s++ != '=') return -1; | |
484 | s = skipwhite(s, 0); | |
485 | if(!(s = parseword(s, &v, http_separator))) return -1; | |
486 | if(n[0] == '$') { | |
487 | /* Some bit of meta-information */ | |
488 | if(!strcmp(n, "$Version")) | |
489 | cd->version = v; | |
490 | else if(!strcmp(n, "$Path")) { | |
491 | if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].path == 0) | |
492 | cd->cookies[cd->ncookies-1].path = v; | |
493 | else { | |
494 | error(0, "redundant $Path in Cookie: header"); | |
495 | return -1; | |
496 | } | |
497 | } else if(!strcmp(n, "$Domain")) { | |
498 | if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].domain == 0) | |
499 | cd->cookies[cd->ncookies-1].domain = v; | |
500 | else { | |
501 | error(0, "redundant $Domain in Cookie: header"); | |
502 | return -1; | |
503 | } | |
504 | } | |
505 | } else { | |
506 | /* It's a new cookie */ | |
507 | cd->cookies = xrealloc(cd->cookies, | |
508 | (cd->ncookies + 1) * sizeof (struct cookie)); | |
509 | cd->cookies[cd->ncookies].name = n; | |
510 | cd->cookies[cd->ncookies].value = v; | |
511 | cd->cookies[cd->ncookies].path = 0; | |
512 | cd->cookies[cd->ncookies].domain = 0; | |
513 | ++cd->ncookies; | |
514 | } | |
515 | s = skipwhite(s, 0); | |
516 | if(*s && (*s != ',' && *s != ';')) { | |
517 | error(0, "missing separator in Cookie: header"); | |
518 | return -1; | |
519 | } | |
520 | } | |
521 | return 0; | |
522 | } | |
523 | ||
524 | /** @brief Find a named cookie | |
525 | * @param cd Parse cookie data | |
526 | * @param name Name of cookie | |
527 | * @return Cookie structure or NULL if not found | |
528 | */ | |
529 | const struct cookie *find_cookie(const struct cookiedata *cd, | |
530 | const char *name) { | |
531 | int n; | |
532 | ||
533 | for(n = 0; n < cd->ncookies; ++n) | |
534 | if(!strcmp(cd->cookies[n].name, name)) | |
535 | return &cd->cookies[n]; | |
536 | return 0; | |
537 | } | |
538 | ||
460b9539 | 539 | /* |
540 | Local Variables: | |
541 | c-basic-offset:2 | |
542 | comment-column:40 | |
543 | fill-column:79 | |
544 | End: | |
545 | */ |