X-Git-Url: https://git.distorted.org.uk/~mdw/disorder/blobdiff_plain/5818980adea84ce4cd92a1b4549e9806c9d56c58..82c01b317cd1892f4376c68be88a74f971493428:/lib/mime.c diff --git a/lib/mime.c b/lib/mime.c index 87fc43a..8776647 100644 --- a/lib/mime.c +++ b/lib/mime.c @@ -34,6 +34,7 @@ #include "vector.h" #include "hex.h" #include "log.h" +#include "base64.h" /** @brief Match whitespace characters */ static int whitespace(int c) { @@ -72,7 +73,7 @@ static int tspecial(int c) { } } -/** @brief Match RFC2616 seprator characters */ +/** @brief Match RFC2616 separator characters */ static int http_separator(int c) { switch(c) { case '(': @@ -106,7 +107,9 @@ static int iscrlf(const char *ptr) { } /** @brief Skip whitespace + * @param s Pointer into string * @param rfc822_comments If true, skip RFC822 nested comments + * @return Pointer into string after whitespace */ static const char *skipwhite(const char *s, int rfc822_comments) { int c, depth; @@ -206,8 +209,10 @@ static const char *parsetoken(const char *s, char **valuep, * @param s Start of field * @param typep Where to store type * @param parameternamep Where to store parameter name - * @param parameternvaluep Wher to store parameter value + * @param parametervaluep Wher to store parameter value * @return 0 on success, non-0 on error + * + * See RFC 2045 s5. */ int mime_content_type(const char *s, char **typep, @@ -253,7 +258,12 @@ int mime_content_type(const char *s, * @param s Start of message * @param callback Called for each header field * @param u Passed to callback - * @return Pointer to decoded body (might be in original string) + * @return Pointer to decoded body (might be in original string), or NULL on error + * + * This does an RFC 822-style parse and honors Content-Transfer-Encoding as + * described in RFC 2045 + * s6. @p callback is called for each header field encountered, in order, + * with ASCII characters in the header name forced to lower case. */ const char *mime_parse(const char *s, int (*callback)(const char *name, const char *value, @@ -290,6 +300,7 @@ const char *mime_parse(const char *s, return s; } +/** @brief Match the boundary string */ static int isboundary(const char *ptr, const char *boundary, size_t bl) { return (ptr[0] == '-' && ptr[1] == '-' @@ -300,6 +311,7 @@ static int isboundary(const char *ptr, const char *boundary, size_t bl) { && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0)))); } +/** @brief Match the final boundary string */ static int isfinal(const char *ptr, const char *boundary, size_t bl) { return (ptr[0] == '-' && ptr[1] == '-' @@ -311,10 +323,14 @@ static int isfinal(const char *ptr, const char *boundary, size_t bl) { /** @brief Parse a multipart MIME body * @param s Start of message - * @param callback CAllback for each part + * @param callback Callback for each part * @param boundary Boundary string * @param u Passed to callback * @return 0 on success, non-0 on error + * + * See RFC 2046 + * s5.1. @p callback is called for each part (not yet decoded in any way) + * in succession; you should probably call mime_parse() for each part. */ int mime_multipart(const char *s, int (*callback)(const char *s, void *u), @@ -346,10 +362,13 @@ int mime_multipart(const char *s, /** @brief Parse an RFC2388-style content-disposition field * @param s Start of field - * @param typep Where to store type + * @param dispositionp Where to store disposition * @param parameternamep Where to store parameter name - * @param parameternvaluep Wher to store parameter value + * @param parametervaluep Wher to store parameter value * @return 0 on success, non-0 on error + * + * See RFC 2388 s3 + * and RFC 2183. */ int mime_rfc2388_content_disposition(const char *s, char **dispositionp, @@ -388,6 +407,9 @@ int mime_rfc2388_content_disposition(const char *s, /** @brief Convert MIME quoted-printable * @param s Quoted-printable data * @return Decoded data + * + * See RFC 2045 + * s6.7. */ char *mime_qp(const char *s) { struct dynstr d; @@ -431,91 +453,12 @@ char *mime_qp(const char *s) { return d.vec; } -static const char mime_base64_table[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - -/** @brief Convert MIME base64 - * @param s base64 data - * @return Decoded data - */ -char *mime_base64(const char *s, size_t *nsp) { - struct dynstr d; - const char *t; - int b[4], n, c; - - dynstr_init(&d); - n = 0; - while((c = (unsigned char)*s++)) { - if((t = strchr(mime_base64_table, c))) { - b[n++] = t - mime_base64_table; - if(n == 4) { - dynstr_append(&d, (b[0] << 2) + (b[1] >> 4)); - dynstr_append(&d, (b[1] << 4) + (b[2] >> 2)); - dynstr_append(&d, (b[2] << 6) + b[3]); - n = 0; - } - } else if(c == '=') { - if(n >= 2) { - dynstr_append(&d, (b[0] << 2) + (b[1] >> 4)); - if(n == 3) - dynstr_append(&d, (b[1] << 4) + (b[2] >> 2)); - } - break; - } - } - if(nsp) - *nsp = d.nvec; - dynstr_terminate(&d); - return d.vec; -} - -/** @brief Convert a binary string to base64 - * @param s Bytes to convert - * @param ns Number of bytes to convert - * @return Encoded data - * - * This function does not attempt to split up lines. - */ -char *mime_to_base64(const uint8_t *s, size_t ns) { - struct dynstr d[1]; - - dynstr_init(d); - while(ns >= 3) { - /* Input bytes with output bits: AAAAAABB BBBBCCCC CCDDDDDD */ - /* Output bytes with input bits: 000000 001111 111122 222222 */ - dynstr_append(d, mime_base64_table[s[0] >> 2]); - dynstr_append(d, mime_base64_table[((s[0] & 3) << 4) - + (s[1] >> 4)]); - dynstr_append(d, mime_base64_table[((s[1] & 15) << 2) - + (s[2] >> 6)]); - dynstr_append(d, mime_base64_table[s[2] & 63]); - ns -= 3; - s += 3; - } - if(ns > 0) { - dynstr_append(d, mime_base64_table[s[0] >> 2]); - switch(ns) { - case 1: - dynstr_append(d, mime_base64_table[(s[0] & 3) << 4]); - dynstr_append(d, '='); - dynstr_append(d, '='); - break; - case 2: - dynstr_append(d, mime_base64_table[((s[0] & 3) << 4) - + (s[1] >> 4)]); - dynstr_append(d, mime_base64_table[(s[1] & 15) << 2]); - dynstr_append(d, '='); - break; - } - } - dynstr_terminate(d); - return d->vec; -} - /** @brief Parse a RFC2109 Cookie: header * @param s Header field value * @param cd Where to store result * @return 0 on success, non-0 on error + * + * See RFC 2109. */ int parse_cookie(const char *s, struct cookiedata *cd) { @@ -588,6 +531,132 @@ const struct cookie *find_cookie(const struct cookiedata *cd, return 0; } +/** @brief RFC822 quoting + * @param s String to quote + * @param force If non-0, always quote + * @return Possibly quoted string + */ +char *quote822(const char *s, int force) { + const char *t; + struct dynstr d[1]; + int c; + + if(!force) { + /* See if we need to quote */ + for(t = s; (c = (unsigned char)*t); ++t) { + if(tspecial(c) || http_separator(c) || whitespace(c)) + break; + } + if(*t) + force = 1; + } + + if(!force) + return xstrdup(s); + + dynstr_init(d); + dynstr_append(d, '"'); + for(t = s; (c = (unsigned char)*t); ++t) { + if(c == '"' || c == '\\') + dynstr_append(d, '\\'); + dynstr_append(d, c); + } + dynstr_append(d, '"'); + dynstr_terminate(d); + return d->vec; +} + +/** @brief Return true if @p ptr points at trailing space */ +static int is_trailing_space(const char *ptr) { + if(*ptr == ' ' || *ptr == '\t') { + while(*ptr == ' ' || *ptr == '\t') + ++ptr; + return *ptr == '\n' || *ptr == 0; + } else + return 0; +} + +/** @brief Encoding text as quoted-printable + * @param text String to encode + * @return Encoded string + * + * See RFC2045 + * s6.7. + */ +char *mime_to_qp(const char *text) { + struct dynstr d[1]; + int linelength = 0; /* length of current line */ + char buffer[10]; + + dynstr_init(d); + /* The rules are: + * 1. Anything except newline can be replaced with =%02X + * 2. Newline, 33-60 and 62-126 stand for themselves (i.e. not '=') + * 3. Non-trailing space/tab stand for themselves. + * 4. Output lines are limited to 76 chars, with = being used + * as a soft line break + * 5. Newlines aren't counted towards the 76 char limit. + */ + while(*text) { + const int c = (unsigned char)*text; + if(c == '\n') { + /* Newline stands as itself */ + dynstr_append(d, '\n'); + linelength = 0; + } else if((c >= 33 && c <= 126 && c != '=') + || ((c == ' ' || c == '\t') + && !is_trailing_space(text))) { + /* Things that can stand for themselves */ + dynstr_append(d, c); + ++linelength; + } else { + /* Anything else that needs encoding */ + snprintf(buffer, sizeof buffer, "=%02X", c); + dynstr_append_string(d, buffer); + linelength += 3; + } + ++text; + if(linelength > 73 && *text && *text != '\n') { + /* Next character might overflow 76 character limit if encoded, so we + * insert a soft break */ + dynstr_append_string(d, "=\n"); + linelength = 0; + } + } + /* Ensure there is a final newline */ + if(linelength) + dynstr_append(d, '\n'); + /* That's all */ + dynstr_terminate(d); + return d->vec; +} + +/** @brief Encode text + * @param text Underlying UTF-8 text + * @param charsetp Where to store charset string + * @param encodingp Where to store encoding string + * @return Encoded text (might be @ref text) + */ +const char *mime_encode_text(const char *text, + const char **charsetp, + const char **encodingp) { + const char *ptr; + + /* See if there are in fact any non-ASCII characters */ + for(ptr = text; *ptr; ++ptr) + if((unsigned char)*ptr >= 128) + break; + if(!*ptr) { + /* Plain old ASCII, no encoding required */ + *charsetp = "us-ascii"; + *encodingp = "7bit"; + return text; + } + *charsetp = "utf-8"; + *encodingp = "quoted-printable"; + return mime_to_qp(text); +} + /* Local Variables: c-basic-offset:2