| 1 | /* |
| 2 | * This file is part of DisOrder |
| 3 | * Copyright (C) 2005, 2007 Richard Kettlewell |
| 4 | * |
| 5 | * This program is free software; you can redistribute it and/or modify |
| 6 | * it under the terms of the GNU General Public License as published by |
| 7 | * the Free Software Foundation; either version 2 of the License, or |
| 8 | * (at your option) any later version. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, but |
| 11 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | * General Public License for more details. |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License |
| 16 | * along with this program; if not, write to the Free Software |
| 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
| 18 | * USA |
| 19 | */ |
| 20 | /** @file lib/mime.c |
| 21 | * @brief Support for MIME and allied protocols |
| 22 | */ |
| 23 | |
| 24 | #include <config.h> |
| 25 | #include "types.h" |
| 26 | |
| 27 | #include <string.h> |
| 28 | #include <ctype.h> |
| 29 | |
| 30 | #include "mem.h" |
| 31 | #include "mime.h" |
| 32 | #include "vector.h" |
| 33 | #include "hex.h" |
| 34 | #include "log.h" |
| 35 | |
| 36 | /** @brief Match whitespace characters */ |
| 37 | static int whitespace(int c) { |
| 38 | switch(c) { |
| 39 | case ' ': |
| 40 | case '\t': |
| 41 | case '\r': |
| 42 | case '\n': |
| 43 | return 1; |
| 44 | default: |
| 45 | return 0; |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | /** @brief Match RFC2045 tspecial characters */ |
| 50 | static int tspecial(int c) { |
| 51 | switch(c) { |
| 52 | case '(': |
| 53 | case ')': |
| 54 | case '<': |
| 55 | case '>': |
| 56 | case '@': |
| 57 | case ',': |
| 58 | case ';': |
| 59 | case ':': |
| 60 | case '\\': |
| 61 | case '"': |
| 62 | case '/': |
| 63 | case '[': |
| 64 | case ']': |
| 65 | case '?': |
| 66 | case '=': |
| 67 | return 1; |
| 68 | default: |
| 69 | return 0; |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | /** @brief Mathc RFC2616 seprator characters */ |
| 74 | static int http_separator(int c) { |
| 75 | switch(c) { |
| 76 | case '(': |
| 77 | case ')': |
| 78 | case '<': |
| 79 | case '>': |
| 80 | case '@': |
| 81 | case ',': |
| 82 | case ';': |
| 83 | case ':': |
| 84 | case '\\': |
| 85 | case '"': |
| 86 | case '/': |
| 87 | case '[': |
| 88 | case ']': |
| 89 | case '?': |
| 90 | case '=': |
| 91 | case '{': |
| 92 | case '}': |
| 93 | case ' ': |
| 94 | case '\t': |
| 95 | return 1; |
| 96 | default: |
| 97 | return 0; |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | /** @brief Match CRLF */ |
| 102 | static int iscrlf(const char *ptr) { |
| 103 | return ptr[0] == '\r' && ptr[1] == '\n'; |
| 104 | } |
| 105 | |
| 106 | /** @brief Skip whitespace |
| 107 | * @param rfc822_comments If true, skip RFC822 nested comments |
| 108 | */ |
| 109 | static const char *skipwhite(const char *s, int rfc822_comments) { |
| 110 | int c, depth; |
| 111 | |
| 112 | for(;;) { |
| 113 | switch(c = *s) { |
| 114 | case ' ': |
| 115 | case '\t': |
| 116 | case '\r': |
| 117 | case '\n': |
| 118 | ++s; |
| 119 | break; |
| 120 | case '(': |
| 121 | if(!rfc822_comments) |
| 122 | return s; |
| 123 | ++s; |
| 124 | depth = 1; |
| 125 | while(*s && depth) { |
| 126 | c = *s++; |
| 127 | switch(c) { |
| 128 | case '(': ++depth; break; |
| 129 | case ')': --depth; break; |
| 130 | case '\\': |
| 131 | if(!*s) return 0; |
| 132 | ++s; |
| 133 | break; |
| 134 | } |
| 135 | } |
| 136 | if(depth) return 0; |
| 137 | break; |
| 138 | default: |
| 139 | return s; |
| 140 | } |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | /** @brief Test for a word character |
| 145 | * @param c Character to test |
| 146 | * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) |
| 147 | * @return 1 if @p c is a word character, else 0 |
| 148 | */ |
| 149 | static int iswordchar(int c, int (*special)(int)) { |
| 150 | return !(c <= ' ' || c > '~' || special(c)); |
| 151 | } |
| 152 | |
| 153 | /** @brief Parse an RFC1521/RFC2616 word |
| 154 | * @param s Pointer to start of word |
| 155 | * @param valuep Where to store value |
| 156 | * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) |
| 157 | * @return Pointer just after end of word or NULL if there's no word |
| 158 | * |
| 159 | * A word is a token or a quoted-string. |
| 160 | */ |
| 161 | static const char *parseword(const char *s, char **valuep, |
| 162 | int (*special)(int)) { |
| 163 | struct dynstr value[1]; |
| 164 | int c; |
| 165 | |
| 166 | dynstr_init(value); |
| 167 | if(*s == '"') { |
| 168 | ++s; |
| 169 | while((c = *s++) != '"') { |
| 170 | switch(c) { |
| 171 | case '\\': |
| 172 | if(!(c = *s++)) return 0; |
| 173 | default: |
| 174 | dynstr_append(value, c); |
| 175 | break; |
| 176 | } |
| 177 | } |
| 178 | if(!c) return 0; |
| 179 | } else { |
| 180 | if(!iswordchar((unsigned char)*s, special)) |
| 181 | return NULL; |
| 182 | dynstr_init(value); |
| 183 | while(iswordchar((unsigned char)*s, special)) |
| 184 | dynstr_append(value, *s++); |
| 185 | } |
| 186 | dynstr_terminate(value); |
| 187 | *valuep = value->vec; |
| 188 | return s; |
| 189 | } |
| 190 | |
| 191 | /** @brief Parse an RFC1521/RFC2616 token |
| 192 | * @param s Pointer to start of token |
| 193 | * @param valuep Where to store value |
| 194 | * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) |
| 195 | * @return Pointer just after end of token or NULL if there's no token |
| 196 | */ |
| 197 | static const char *parsetoken(const char *s, char **valuep, |
| 198 | int (*special)(int)) { |
| 199 | if(*s == '"') return 0; |
| 200 | return parseword(s, valuep, special); |
| 201 | } |
| 202 | |
| 203 | /** @brief Parse a MIME content-type field |
| 204 | * @param s Start of field |
| 205 | * @param typep Where to store type |
| 206 | * @param parameternamep Where to store parameter name |
| 207 | * @param parameternvaluep Wher to store parameter value |
| 208 | * @return 0 on success, non-0 on error |
| 209 | */ |
| 210 | int mime_content_type(const char *s, |
| 211 | char **typep, |
| 212 | char **parameternamep, |
| 213 | char **parametervaluep) { |
| 214 | struct dynstr type, parametername; |
| 215 | |
| 216 | dynstr_init(&type); |
| 217 | if(!(s = skipwhite(s, 1))) return -1; |
| 218 | if(!*s) return -1; |
| 219 | while(*s && !tspecial(*s) && !whitespace(*s)) |
| 220 | dynstr_append(&type, tolower((unsigned char)*s++)); |
| 221 | if(!(s = skipwhite(s, 1))) return -1; |
| 222 | if(*s++ != '/') return -1; |
| 223 | dynstr_append(&type, '/'); |
| 224 | if(!(s = skipwhite(s, 1))) return -1; |
| 225 | while(*s && !tspecial(*s) && !whitespace(*s)) |
| 226 | dynstr_append(&type, tolower((unsigned char)*s++)); |
| 227 | if(!(s = skipwhite(s, 1))) return -1; |
| 228 | |
| 229 | if(*s == ';') { |
| 230 | dynstr_init(¶metername); |
| 231 | ++s; |
| 232 | if(!(s = skipwhite(s, 1))) return -1; |
| 233 | if(!*s) return -1; |
| 234 | while(*s && !tspecial(*s) && !whitespace(*s)) |
| 235 | dynstr_append(¶metername, tolower((unsigned char)*s++)); |
| 236 | if(!(s = skipwhite(s, 1))) return -1; |
| 237 | if(*s++ != '=') return -1; |
| 238 | if(!(s = skipwhite(s, 1))) return -1; |
| 239 | if(!(s = parseword(s, parametervaluep, tspecial))) return -1; |
| 240 | if(!(s = skipwhite(s, 1))) return -1; |
| 241 | dynstr_terminate(¶metername); |
| 242 | *parameternamep = parametername.vec; |
| 243 | } else |
| 244 | *parametervaluep = *parameternamep = 0; |
| 245 | dynstr_terminate(&type); |
| 246 | *typep = type.vec; |
| 247 | return 0; |
| 248 | } |
| 249 | |
| 250 | /** @brief Parse a MIME message |
| 251 | * @param s Start of message |
| 252 | * @param callback Called for each header field |
| 253 | * @param u Passed to callback |
| 254 | * @return Pointer to decoded body (might be in original string) |
| 255 | */ |
| 256 | const char *mime_parse(const char *s, |
| 257 | int (*callback)(const char *name, const char *value, |
| 258 | void *u), |
| 259 | void *u) { |
| 260 | struct dynstr name, value; |
| 261 | char *cte = 0, *p; |
| 262 | |
| 263 | while(*s && !iscrlf(s)) { |
| 264 | dynstr_init(&name); |
| 265 | dynstr_init(&value); |
| 266 | while(*s && !tspecial(*s) && !whitespace(*s)) |
| 267 | dynstr_append(&name, tolower((unsigned char)*s++)); |
| 268 | if(!(s = skipwhite(s, 1))) return 0; |
| 269 | if(*s != ':') return 0; |
| 270 | ++s; |
| 271 | while(*s && !(*s == '\n' && !(s[1] == ' ' || s[1] == '\t'))) |
| 272 | dynstr_append(&value, *s++); |
| 273 | if(*s) ++s; |
| 274 | dynstr_terminate(&name); |
| 275 | dynstr_terminate(&value); |
| 276 | if(!strcmp(name.vec, "content-transfer-encoding")) { |
| 277 | cte = xstrdup(value.vec); |
| 278 | for(p = cte; *p; p++) |
| 279 | *p = tolower((unsigned char)*p); |
| 280 | } |
| 281 | if(callback(name.vec, value.vec, u)) return 0; |
| 282 | } |
| 283 | if(*s) s += 2; |
| 284 | if(cte) { |
| 285 | if(!strcmp(cte, "base64")) return mime_base64(s); |
| 286 | if(!strcmp(cte, "quoted-printable")) return mime_qp(s); |
| 287 | } |
| 288 | return s; |
| 289 | } |
| 290 | |
| 291 | static int isboundary(const char *ptr, const char *boundary, size_t bl) { |
| 292 | return (ptr[0] == '-' |
| 293 | && ptr[1] == '-' |
| 294 | && !strncmp(ptr + 2, boundary, bl) |
| 295 | && (iscrlf(ptr + bl + 2) |
| 296 | || (ptr[bl + 2] == '-' |
| 297 | && ptr[bl + 3] == '-' |
| 298 | && iscrlf(ptr + bl + 4)))); |
| 299 | } |
| 300 | |
| 301 | static int isfinal(const char *ptr, const char *boundary, size_t bl) { |
| 302 | return (ptr[0] == '-' |
| 303 | && ptr[1] == '-' |
| 304 | && !strncmp(ptr + 2, boundary, bl) |
| 305 | && ptr[bl + 2] == '-' |
| 306 | && ptr[bl + 3] == '-' |
| 307 | && iscrlf(ptr + bl + 4)); |
| 308 | } |
| 309 | |
| 310 | /** @brief Parse a multipart MIME body |
| 311 | * @param s Start of message |
| 312 | * @param callback CAllback for each part |
| 313 | * @param boundary Boundary string |
| 314 | * @param u Passed to callback |
| 315 | * @return 0 on success, non-0 on error |
| 316 | */ |
| 317 | int mime_multipart(const char *s, |
| 318 | int (*callback)(const char *s, void *u), |
| 319 | const char *boundary, |
| 320 | void *u) { |
| 321 | size_t bl = strlen(boundary); |
| 322 | const char *start, *e; |
| 323 | int ret; |
| 324 | |
| 325 | if(!isboundary(s, boundary, bl)) return -1; |
| 326 | while(!isfinal(s, boundary, bl)) { |
| 327 | s = strstr(s, "\r\n") + 2; |
| 328 | start = s; |
| 329 | while(!isboundary(s, boundary, bl)) { |
| 330 | if(!(e = strstr(s, "\r\n"))) return -1; |
| 331 | s = e + 2; |
| 332 | } |
| 333 | if((ret = callback(xstrndup(start, |
| 334 | s == start ? 0 : s - start - 2), |
| 335 | u))) |
| 336 | return ret; |
| 337 | } |
| 338 | return 0; |
| 339 | } |
| 340 | |
| 341 | /** @brief Parse an RFC2388-style content-disposition field |
| 342 | * @param s Start of field |
| 343 | * @param typep Where to store type |
| 344 | * @param parameternamep Where to store parameter name |
| 345 | * @param parameternvaluep Wher to store parameter value |
| 346 | * @return 0 on success, non-0 on error |
| 347 | */ |
| 348 | int mime_rfc2388_content_disposition(const char *s, |
| 349 | char **dispositionp, |
| 350 | char **parameternamep, |
| 351 | char **parametervaluep) { |
| 352 | struct dynstr disposition, parametername; |
| 353 | |
| 354 | dynstr_init(&disposition); |
| 355 | if(!(s = skipwhite(s, 1))) return -1; |
| 356 | if(!*s) return -1; |
| 357 | while(*s && !tspecial(*s) && !whitespace(*s)) |
| 358 | dynstr_append(&disposition, tolower((unsigned char)*s++)); |
| 359 | if(!(s = skipwhite(s, 1))) return -1; |
| 360 | |
| 361 | if(*s == ';') { |
| 362 | dynstr_init(¶metername); |
| 363 | ++s; |
| 364 | if(!(s = skipwhite(s, 1))) return -1; |
| 365 | if(!*s) return -1; |
| 366 | while(*s && !tspecial(*s) && !whitespace(*s)) |
| 367 | dynstr_append(¶metername, tolower((unsigned char)*s++)); |
| 368 | if(!(s = skipwhite(s, 1))) return -1; |
| 369 | if(*s++ != '=') return -1; |
| 370 | if(!(s = skipwhite(s, 1))) return -1; |
| 371 | if(!(s = parseword(s, parametervaluep, tspecial))) return -1; |
| 372 | if(!(s = skipwhite(s, 1))) return -1; |
| 373 | dynstr_terminate(¶metername); |
| 374 | *parameternamep = parametername.vec; |
| 375 | } else |
| 376 | *parametervaluep = *parameternamep = 0; |
| 377 | dynstr_terminate(&disposition); |
| 378 | *dispositionp = disposition.vec; |
| 379 | return 0; |
| 380 | } |
| 381 | |
| 382 | /** @brief Convert MIME quoted-printable |
| 383 | * @param s Quoted-printable data |
| 384 | * @return Decoded data |
| 385 | */ |
| 386 | char *mime_qp(const char *s) { |
| 387 | struct dynstr d; |
| 388 | int c, a, b; |
| 389 | const char *t; |
| 390 | |
| 391 | dynstr_init(&d); |
| 392 | while((c = *s++)) { |
| 393 | switch(c) { |
| 394 | case '=': |
| 395 | if((a = unhexdigitq(s[0])) != -1 |
| 396 | && (b = unhexdigitq(s[1])) != -1) { |
| 397 | dynstr_append(&d, a * 16 + b); |
| 398 | s += 2; |
| 399 | } else { |
| 400 | t = s; |
| 401 | while(*t == ' ' || *t == '\t') ++t; |
| 402 | if(iscrlf(t)) { |
| 403 | /* soft line break */ |
| 404 | s = t + 2; |
| 405 | } else |
| 406 | return 0; |
| 407 | } |
| 408 | break; |
| 409 | case ' ': |
| 410 | case '\t': |
| 411 | t = s; |
| 412 | while(*t == ' ' || *t == '\t') ++t; |
| 413 | if(iscrlf(t)) |
| 414 | /* trailing space is always eliminated */ |
| 415 | s = t; |
| 416 | else |
| 417 | dynstr_append(&d, c); |
| 418 | break; |
| 419 | default: |
| 420 | dynstr_append(&d, c); |
| 421 | break; |
| 422 | } |
| 423 | } |
| 424 | dynstr_terminate(&d); |
| 425 | return d.vec; |
| 426 | } |
| 427 | |
| 428 | /** @brief Convert MIME base64 |
| 429 | * @param s base64 data |
| 430 | * @return Decoded data |
| 431 | */ |
| 432 | char *mime_base64(const char *s) { |
| 433 | struct dynstr d; |
| 434 | const char *t; |
| 435 | int b[4], n, c; |
| 436 | static const char table[] = |
| 437 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
| 438 | |
| 439 | dynstr_init(&d); |
| 440 | n = 0; |
| 441 | while((c = (unsigned char)*s++)) { |
| 442 | if((t = strchr(table, c))) { |
| 443 | b[n++] = t - table; |
| 444 | if(n == 4) { |
| 445 | dynstr_append(&d, (b[0] << 2) + (b[1] >> 4)); |
| 446 | dynstr_append(&d, (b[1] << 4) + (b[2] >> 2)); |
| 447 | dynstr_append(&d, (b[2] << 6) + b[3]); |
| 448 | n = 0; |
| 449 | } |
| 450 | } else if(c == '=') { |
| 451 | if(n >= 2) { |
| 452 | dynstr_append(&d, (b[0] << 2) + (b[1] >> 4)); |
| 453 | if(n == 3) |
| 454 | dynstr_append(&d, (b[1] << 4) + (b[2] >> 2)); |
| 455 | } |
| 456 | break; |
| 457 | } |
| 458 | } |
| 459 | dynstr_terminate(&d); |
| 460 | return d.vec; |
| 461 | } |
| 462 | |
| 463 | /** @brief Parse a RFC2109 Cookie: header |
| 464 | * @param s Header field value |
| 465 | * @param cd Where to store result |
| 466 | * @return 0 on success, non-0 on error |
| 467 | */ |
| 468 | int parse_cookie(const char *s, |
| 469 | struct cookiedata *cd) { |
| 470 | char *n = 0, *v = 0; |
| 471 | |
| 472 | memset(cd, 0, sizeof *cd); |
| 473 | s = skipwhite(s, 0); |
| 474 | while(*s) { |
| 475 | /* Skip separators */ |
| 476 | if(*s == ';' || *s == ',') { |
| 477 | ++s; |
| 478 | s = skipwhite(s, 0); |
| 479 | continue; |
| 480 | } |
| 481 | if(!(s = parsetoken(s, &n, http_separator))) return -1; |
| 482 | s = skipwhite(s, 0); |
| 483 | if(*s++ != '=') return -1; |
| 484 | s = skipwhite(s, 0); |
| 485 | if(!(s = parseword(s, &v, http_separator))) return -1; |
| 486 | if(n[0] == '$') { |
| 487 | /* Some bit of meta-information */ |
| 488 | if(!strcmp(n, "$Version")) |
| 489 | cd->version = v; |
| 490 | else if(!strcmp(n, "$Path")) { |
| 491 | if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].path == 0) |
| 492 | cd->cookies[cd->ncookies-1].path = v; |
| 493 | else { |
| 494 | error(0, "redundant $Path in Cookie: header"); |
| 495 | return -1; |
| 496 | } |
| 497 | } else if(!strcmp(n, "$Domain")) { |
| 498 | if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].domain == 0) |
| 499 | cd->cookies[cd->ncookies-1].domain = v; |
| 500 | else { |
| 501 | error(0, "redundant $Domain in Cookie: header"); |
| 502 | return -1; |
| 503 | } |
| 504 | } |
| 505 | } else { |
| 506 | /* It's a new cookie */ |
| 507 | cd->cookies = xrealloc(cd->cookies, |
| 508 | (cd->ncookies + 1) * sizeof (struct cookie)); |
| 509 | cd->cookies[cd->ncookies].name = n; |
| 510 | cd->cookies[cd->ncookies].value = v; |
| 511 | cd->cookies[cd->ncookies].path = 0; |
| 512 | cd->cookies[cd->ncookies].domain = 0; |
| 513 | ++cd->ncookies; |
| 514 | } |
| 515 | s = skipwhite(s, 0); |
| 516 | if(*s && (*s != ',' && *s != ';')) { |
| 517 | error(0, "missing separator in Cookie: header"); |
| 518 | return -1; |
| 519 | } |
| 520 | } |
| 521 | return 0; |
| 522 | } |
| 523 | |
| 524 | /** @brief Find a named cookie |
| 525 | * @param cd Parse cookie data |
| 526 | * @param name Name of cookie |
| 527 | * @return Cookie structure or NULL if not found |
| 528 | */ |
| 529 | const struct cookie *find_cookie(const struct cookiedata *cd, |
| 530 | const char *name) { |
| 531 | int n; |
| 532 | |
| 533 | for(n = 0; n < cd->ncookies; ++n) |
| 534 | if(!strcmp(cd->cookies[n].name, name)) |
| 535 | return &cd->cookies[n]; |
| 536 | return 0; |
| 537 | } |
| 538 | |
| 539 | /* |
| 540 | Local Variables: |
| 541 | c-basic-offset:2 |
| 542 | comment-column:40 |
| 543 | fill-column:79 |
| 544 | End: |
| 545 | */ |