X-Git-Url: https://git.distorted.org.uk/~mdw/mLib/blobdiff_plain/173ff44a439dc7cf51a3f7a433c4d0d444b59293..236f657b6dab66f31f4902cecfc03b4673f5bb98:/codec/baseconv.c diff --git a/codec/baseconv.c b/codec/baseconv.c new file mode 100644 index 0000000..1d50735 --- /dev/null +++ b/codec/baseconv.c @@ -0,0 +1,491 @@ +/* -*-c-*- + * + * Binary base-conversion encoding and decoding (base64, base32, etc.) + * + * (c) 1997 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of the mLib utilities library. + * + * mLib is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * mLib is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with mLib; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include +#include +#include +#include + +#include "alloc.h" +#include "codec.h" +#include "dstr.h" +#include "sub.h" + +#include "base64.h" +#include "base32.h" +#include "hex.h" + +/*----- Important tables --------------------------------------------------*/ + +/* --- Magic constants --- */ + +#define NV -1 /* Not valid */ +#define PC -2 /* Padding character */ +#define NL -3 /* Newline character */ + +/* --- Base64 --- */ + +static const char + encodemap_base64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/" }, + encodemap_file64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+%" }, + encodemap_base64url[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789-_" }; + +static const signed char decodemap_base64[] = { + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, 62, NV, NV, NV, 63, /* 2x */ + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */ + NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */ + NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */ + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */ +}, decodemap_file64[] = { + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */ + NV, NV, NV, NV, NV, 63, NV, NV, NV, NV, NV, 62, NV, NV, NV, NV, /* 2x */ + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */ + NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */ + NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */ + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */ +}, decodemap_base64url[] = { + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, 62, NV, NV, /* 2x */ + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */ + NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, 63, /* 5x */ + NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */ + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */ +}; + +/* --- Base32 --- */ + +static const char + encodemap_base32[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" }, + encodemap_base32hex[] = { "0123456789ABCDEFGHIJKLMNOPQRSTUV" }; + +static const signed char decodemap_base32[] = { + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */ + NV, NV, 26, 27, 28, 29, 30, 31, NV, NV, NV, NV, NV, PC, NV, NV, /* 3x */ + NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */ +}, decodemap_base32hex[] = { + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NV, NV, NV, PC, NV, NV, /* 3x */ + NV, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 4x */ + 25, 26, 27, 28, 29, 30, 31, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 5x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */ +}; + +/* --- Hex --- */ + +static const char + encodemap_hex[] = { "0123456789ABCDEF" }; + +static const signed char decodemap_hex[] = { + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NV, NV, NV, NV, NV, NV, /* 3x */ + NV, 10, 11, 12, 13, 14, 15, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 4x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 5x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */ + NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */ +}; + +/*----- Base conversion macros --------------------------------------------*/ + +/* --- @BASECONV@ --- * + * + * Arguments: @x@ = an input digit of width @IWD@ bits + * @iwd@ = input digit width in bits + * @owd@ = output digit width in bits + * @put@ = function or macro to output a digit + * + * Use: Inserts the bits of @x@ into an accumulator. As digits @y@ + * of with @owd@ become ready, @put(y)@ is invoked to emit them. + */ + +#define BASECONV(x, iwd, owd, put) do { \ + a = (a << iwd) | x; \ + nb += iwd; \ + while (nb >= owd) { \ + nb -= owd; \ + put((a >> nb) & ((1 << owd) - 1)); \ + } \ +} while (0) + +/* --- @BASECONV_FLUSH@ --- * + * + * Arguments: @iwd@ = input digit width in bits + * @owd@ = output digit width in bits + * @put@ = function or macro to output a digit + * + * Use: Flushes remaining digits from the base-conversion shift + * register. The bits in the shift register are padded on the + * right with zeros. Digits of width @owd@ are emitted by + * invoking @put@. + */ + +#define BASECONV_FLUSH(iwd, owd, put) do { \ + if (nb) { \ + while (nb < owd) { a <<= iwd; nb += iwd; } \ + nb -= owd; \ + put((a >> nb) & ((1 << owd) - 1)); \ + } \ +} while (0) + +/* --- @BASECONV_PAD@ --- * + * + * Arguments: @iwd@ = input digit width in bits + * @owd@ = output digit width in bits + * @pad@ = function or macro to output padding + * + * Use: Invokes @pad@ sufficiently often to realign the shift + * register. + */ + +#define BASECONV_PAD(iwd, owd, pad) do { \ + for (;;) { \ + while (nb >= owd) { pad; nb -= owd; } \ + if (!nb) break; \ + nb += iwd; \ + } \ +} while (0) + +#define NULL_PAD(iwd, owd, pad) do ; while (0) + +/*----- Lists of things to make -------------------------------------------*/ + +#define CODECS(_) \ + /* NAME, CTXN, ACC */ \ + _(base64, base64, acc) \ + _(file64, base64, acc) \ + _(base64url, base64, acc) \ + _(base32, base32, accl) \ + _(base32hex, base32, accl) \ + _(hex, hex, acc) + +#define CTXS(_) \ + /* CTXN, WD, ACC */ \ + _(base64, 6, acc) \ + _(base32, 5, accl) \ + _(hex, 4, acc) + +#define base64_PADDING BASECONV_PAD +#define base64_FLAGMASK ~(CDCF_LOWERC | CDCF_IGNCASE) +#define base64_FLAGXOR 0 +#define base64_OLDFLAGS CDCF_IGNJUNK + +#define base32_PADDING BASECONV_PAD +#define base32_FLAGMASK ~0 +#define base32_FLAGXOR 0 +#define base32_OLDFLAGS CDCF_IGNJUNK + +#define hex_PADDING NULL_PAD +#define hex_FLAGMASK ~0 +#define hex_FLAGXOR 0 +#define hex_OLDFLAGS (CDCF_IGNJUNK | CDCF_LOWERC) + +/*----- Data structures ---------------------------------------------------*/ + +#define OBJ(ctxn, wd, acc) \ + \ +typedef struct ctxn##_codec { \ + codec c; \ + ctxn##_ctx ctx; \ + const char *encodemap; \ + const signed char *decodemap; \ +} ctxn##_codec; + +CTXS(OBJ) + +/*----- State packing -----------------------------------------------------* + * + * These macros convert between the state required by the new encoding and + * decoding core and the old externally-visible context structures. It's + * unpleasant, I know; maybe we can drop the old interface later. + */ + +enum { + ST_MAIN, /* Main decoding state */ + ST_PAD, /* Decoding trailing padding */ + ST_END /* Finished decoding */ +}; + +#define STATE_UNPACK(acc) \ + unsigned long a = (ctx->acc >> 0) & 0xffff; \ + unsigned nb = (ctx->acc >> 16) & 0xff; \ + unsigned st = (ctx->acc >> 24) & 0xff; \ + unsigned f = ctx->qsz; + +#define STATE_PACK(acc) do { \ + ctx->acc = (((a & 0xffff) << 0) | \ + (((unsigned long)nb & 0xff) << 16) | \ + (((unsigned long)st & 0xff) << 24)); \ +} while (0) + +/*----- Main encoder and decoder ------------------------------------------*/ + +#define WRAP(stuff) do { \ + if (maxln && lnlen >= maxln) { \ + dstr_puts(d, ctx->indent); \ + lnlen = 0; \ + } \ + stuff \ + lnlen++; \ +} while (0) + +#define PUTWRAP(x) WRAP({ \ + char ch = encodemap[x]; \ + if (f & CDCF_LOWERC) ch = tolower((unsigned char)ch); \ + DPUTC(d, ch); \ +}) + +#define PADWRAP WRAP({ DPUTC(d, '='); }) + +#define PUTRAW(x) DPUTC(d, x) + +#define ENCODER(ctxn, wd, acc) \ + \ +/* --- @CTXN_doencode@ --- * \ + * \ + * Arguments: @CTXN_ctx *ctx@ = pointer to a context block \ + * @const char *encodemap@ = pointer to encoding map \ + * @const unsigned char *p@ = pointer to a source buffer \ + * @size_t sz@ = size of the source buffer \ + * @dstr *d@ = pointer to destination string \ + * \ + * Returns: Zero on success, or @CDCERR_@ error code. \ + * \ + * Use: Main encoder function. \ + */ \ + \ +static int ctxn##_doencode(ctxn##_ctx *ctx, const char *encodemap, \ + const unsigned char *p, size_t sz, dstr *d) \ +{ \ + STATE_UNPACK(acc); \ + const unsigned char *l = p + sz; \ + unsigned lnlen = ctx->lnlen, maxln = ctx->maxline; \ + \ + if (p) { \ + while (p < l) BASECONV(*p++, 8, wd, PUTWRAP); \ + } else { \ + BASECONV_FLUSH(8, wd, PUTWRAP); \ + if (!(f & CDCF_NOEQPAD)) ctxn##_PADDING(8, wd, PADWRAP); \ + } \ + \ + STATE_PACK(acc); \ + ctx->lnlen = lnlen; \ + return (0); \ +} \ + \ +/* --- @CTXN_dodecode@ --- * \ + * \ + * Arguments: @CTXN_ctx *ctx@ = pointer to a context block \ + * @const signed char *decodemap@ = pointer to decode map \ + * @const char *p@ = pointer to a source buffer \ + * @size_t sz@ = size of the source buffer \ + * @dstr *d@ = pointer to destination string \ + * \ + * Returns: Zero on success, or @CDCERR_@ error code. \ + * \ + * Use: Main decoder function. \ + */ \ + \ +static int ctxn##_dodecode(ctxn##_ctx *ctx, \ + const signed char *decodemap, \ + const unsigned char *p, size_t sz, dstr *d) \ +{ \ + STATE_UNPACK(acc); \ + const unsigned char *l = p + sz; \ + int ch; \ + int x; \ + \ + if (p) { \ + while (p < l) { \ + ch = *p++; \ + switch (f & (CDCF_LOWERC | CDCF_IGNCASE)) { \ + case 0: \ + break; \ + case CDCF_LOWERC: \ + if (isupper(ch)) goto badch; \ + default: \ + ch = toupper(ch); \ + } \ + x = decodemap[ch]; \ + switch (x) { \ + case NV: \ + badch: \ + if (!(f & CDCF_IGNINVCH)) return (CDCERR_INVCH); \ + break; \ + case PC: \ + if (f & CDCF_IGNEQMID) break; \ + if (f & CDCF_NOEQPAD) goto badch; \ + if (st == ST_MAIN && \ + !(f & CDCF_IGNZPAD) && (a & ((1 << nb) - 1))) \ + return (CDCERR_INVZPAD); \ + st = ST_PAD; \ + if (!(f & CDCF_IGNEQPAD)) { \ + if (!nb) return (CDCERR_INVEQPAD); \ + nb = (nb + wd)%8; \ + st = ST_PAD; \ + } \ + break; \ + case NL: \ + if (f & CDCF_IGNNEWL) break; \ + return (CDCERR_INVCH); \ + default: \ + if (st != ST_MAIN) \ + return (CDCERR_INVEQPAD); \ + BASECONV(x, wd, 8, PUTRAW); \ + break; \ + } \ + } \ + } else { \ + if (st == ST_MAIN && \ + !(f & CDCF_IGNZPAD) && (a & ((1 << nb) - 1))) \ + return (CDCERR_INVZPAD); \ + if (!(f & (CDCF_IGNEQPAD | CDCF_IGNEQMID | CDCF_NOEQPAD)) && nb) \ + return (CDCERR_INVEQPAD); \ + } \ + \ + STATE_PACK(acc); \ + return (0); \ +} + +CTXS(ENCODER) + +/*----- Codec implementation ----------------------------------------------*/ + +#define OPS(ctxn, wd, acc) \ + \ +static int ctxn##_enc(codec *c, const void *p, size_t sz, dstr *d) \ +{ \ + ctxn##_codec *bc = (ctxn##_codec *)c; \ + return (ctxn##_doencode(&bc->ctx, bc->encodemap, p, sz, d)); \ +} \ + \ +static int ctxn##_dec(codec *c, const void *p, size_t sz, dstr *d) \ +{ \ + ctxn##_codec *bc = (ctxn##_codec *)c; \ + return (ctxn##_dodecode(&bc->ctx, bc->decodemap, p, sz, d)); \ +} \ + \ +static void ctxn##_destroy(codec *c) \ +{ \ + ctxn##_codec *bc = (ctxn##_codec *)c; \ + if (bc->ctx.indent) xfree((/*unconst*/ char *)bc->ctx.indent); \ + DESTROY(bc); \ +} \ + \ +static codec *ctxn##_docreate(unsigned flags, \ + const char *indent, unsigned maxline, \ + const codec_ops *ops, \ + const char *encodemap, \ + const signed char *decodemap) \ +{ \ + ctxn##_codec *bc = CREATE(ctxn##_codec); \ + bc->c.ops = ops; \ + bc->ctx.acc = 0; \ + bc->ctx.qsz = (flags & ctxn##_FLAGMASK) ^ ctxn##_FLAGXOR; \ + bc->ctx.lnlen = 0; \ + bc->ctx.indent = indent ? xstrdup(indent) : 0; \ + bc->ctx.maxline = maxline; \ + bc->encodemap = encodemap; \ + bc->decodemap = decodemap; \ + return (&bc->c); \ +} + +CTXS(OPS) + +#define CLASS(name, ctxn, acc) \ + \ +static const codec_ops \ + name##_encode_ops = { &name##_class, ctxn##_enc, ctxn##_destroy }, \ + name##_decode_ops = { &name##_class, ctxn##_dec, ctxn##_destroy }; \ + \ +static codec *name##_encoder(unsigned flags, \ + const char *indent, unsigned maxline) \ +{ \ + return ctxn##_docreate(flags, indent, maxline, \ + &name##_encode_ops, \ + encodemap_##name, \ + decodemap_##name); \ +} \ + \ +static codec *name##_decoder(unsigned flags) \ +{ \ + return ctxn##_docreate(flags, 0, 0, \ + &name##_decode_ops, \ + encodemap_##name, \ + decodemap_##name); \ +} \ + \ +const codec_class name##_class = \ + { #name, name##_encoder, name##_decoder }; + +CODECS(CLASS) + +/*----- Compatibility veneers ---------------------------------------------*/ + +#define COMPAT(ctxn, wd, acc) \ + \ +void ctxn##_encode(ctxn##_ctx *ctx, const void *p, size_t sz, dstr *d) \ + { ctxn##_doencode(ctx, encodemap_##ctxn, p, sz, d); } \ + \ +void ctxn##_decode(ctxn##_ctx *ctx, const void *p, size_t sz, dstr *d) \ + { ctxn##_dodecode(ctx, decodemap_##ctxn, p, sz, d); } \ + \ +void ctxn##_init(ctxn##_ctx *ctx) \ +{ \ + ctx->acc = 0; \ + ctx->qsz = (ctxn##_OLDFLAGS & ctxn##_FLAGMASK) ^ ctxn##_FLAGXOR; \ + ctx->lnlen = 0; \ + ctx->indent = "\n"; \ + ctx->maxline = 72; \ +} + +CTXS(COMPAT) + +/*----- That's all, folks -------------------------------------------------*/