utils/macros.h: Add <ctype.h> and `foocmp' helper macros.
[mLib] / codec / baseconv.c
CommitLineData
236f657b
MW
1/* -*-c-*-
2 *
3 * Binary base-conversion encoding and decoding (base64, base32, etc.)
4 *
5 * (c) 1997 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include <ctype.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34
35#include "alloc.h"
36#include "codec.h"
37#include "dstr.h"
36188114 38#include "macros.h"
236f657b
MW
39#include "sub.h"
40
41#include "base64.h"
42#include "base32.h"
43#include "hex.h"
44
45/*----- Important tables --------------------------------------------------*/
46
47/* --- Magic constants --- */
48
49#define NV -1 /* Not valid */
50#define PC -2 /* Padding character */
51#define NL -3 /* Newline character */
09fbf4d0 52#define SP -4 /* Space character */
236f657b
MW
53
54/* --- Base64 --- */
55
56static const char
57 encodemap_base64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
58 "abcdefghijklmnopqrstuvwxyz"
59 "0123456789+/" },
60 encodemap_file64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
61 "abcdefghijklmnopqrstuvwxyz"
62 "0123456789+%" },
63 encodemap_base64url[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
64 "abcdefghijklmnopqrstuvwxyz"
65 "0123456789-_" };
66
67static const signed char decodemap_base64[] = {
09fbf4d0 68 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 69 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 70 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, 62, NV, NV, NV, 63, /* 2x */
236f657b
MW
71 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
72 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
73 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
74 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
75 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
76}, decodemap_file64[] = {
09fbf4d0 77 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 78 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 79 SP, NV, NV, NV, NV, 63, NV, NV, NV, NV, NV, 62, NV, NV, NV, NV, /* 2x */
236f657b
MW
80 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
81 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
82 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
83 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
84 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
85}, decodemap_base64url[] = {
09fbf4d0 86 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 87 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 88 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, 62, NV, NV, /* 2x */
236f657b
MW
89 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
90 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
91 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, 63, /* 5x */
92 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
93 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
94};
95
96/* --- Base32 --- */
97
98static const char
99 encodemap_base32[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" },
100 encodemap_base32hex[] = { "0123456789ABCDEFGHIJKLMNOPQRSTUV" };
101
102static const signed char decodemap_base32[] = {
09fbf4d0 103 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 104 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 105 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
236f657b
MW
106 NV, NV, 26, 27, 28, 29, 30, 31, NV, NV, NV, NV, NV, PC, NV, NV, /* 3x */
107 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
108 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
109 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
110 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
111}, decodemap_base32hex[] = {
09fbf4d0 112 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 113 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 114 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
236f657b
MW
115 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NV, NV, NV, PC, NV, NV, /* 3x */
116 NV, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 4x */
117 25, 26, 27, 28, 29, 30, 31, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 5x */
118 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
119 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
120};
121
122/* --- Hex --- */
123
124static const char
125 encodemap_hex[] = { "0123456789ABCDEF" };
126
127static const signed char decodemap_hex[] = {
09fbf4d0 128 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 129 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 130 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
236f657b
MW
131 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NV, NV, NV, NV, NV, NV, /* 3x */
132 NV, 10, 11, 12, 13, 14, 15, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 4x */
133 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 5x */
134 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
135 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
136};
137
138/*----- Base conversion macros --------------------------------------------*/
139
140/* --- @BASECONV@ --- *
141 *
142 * Arguments: @x@ = an input digit of width @IWD@ bits
143 * @iwd@ = input digit width in bits
144 * @owd@ = output digit width in bits
145 * @put@ = function or macro to output a digit
146 *
147 * Use: Inserts the bits of @x@ into an accumulator. As digits @y@
148 * of with @owd@ become ready, @put(y)@ is invoked to emit them.
149 */
150
151#define BASECONV(x, iwd, owd, put) do { \
152 a = (a << iwd) | x; \
153 nb += iwd; \
154 while (nb >= owd) { \
155 nb -= owd; \
156 put((a >> nb) & ((1 << owd) - 1)); \
157 } \
158} while (0)
159
160/* --- @BASECONV_FLUSH@ --- *
161 *
162 * Arguments: @iwd@ = input digit width in bits
163 * @owd@ = output digit width in bits
164 * @put@ = function or macro to output a digit
165 *
166 * Use: Flushes remaining digits from the base-conversion shift
167 * register. The bits in the shift register are padded on the
168 * right with zeros. Digits of width @owd@ are emitted by
169 * invoking @put@.
170 */
171
172#define BASECONV_FLUSH(iwd, owd, put) do { \
173 if (nb) { \
174 while (nb < owd) { a <<= iwd; nb += iwd; } \
175 nb -= owd; \
176 put((a >> nb) & ((1 << owd) - 1)); \
177 } \
178} while (0)
179
180/* --- @BASECONV_PAD@ --- *
181 *
182 * Arguments: @iwd@ = input digit width in bits
183 * @owd@ = output digit width in bits
184 * @pad@ = function or macro to output padding
185 *
186 * Use: Invokes @pad@ sufficiently often to realign the shift
187 * register.
188 */
189
190#define BASECONV_PAD(iwd, owd, pad) do { \
191 for (;;) { \
192 while (nb >= owd) { pad; nb -= owd; } \
193 if (!nb) break; \
194 nb += iwd; \
195 } \
196} while (0)
197
198#define NULL_PAD(iwd, owd, pad) do ; while (0)
199
200/*----- Lists of things to make -------------------------------------------*/
201
202#define CODECS(_) \
203 /* NAME, CTXN, ACC */ \
204 _(base64, base64, acc) \
205 _(file64, base64, acc) \
206 _(base64url, base64, acc) \
207 _(base32, base32, accl) \
208 _(base32hex, base32, accl) \
209 _(hex, hex, acc)
210
211#define CTXS(_) \
212 /* CTXN, WD, ACC */ \
213 _(base64, 6, acc) \
214 _(base32, 5, accl) \
215 _(hex, 4, acc)
216
217#define base64_PADDING BASECONV_PAD
218#define base64_FLAGMASK ~(CDCF_LOWERC | CDCF_IGNCASE)
219#define base64_FLAGXOR 0
220#define base64_OLDFLAGS CDCF_IGNJUNK
221
222#define base32_PADDING BASECONV_PAD
223#define base32_FLAGMASK ~0
224#define base32_FLAGXOR 0
225#define base32_OLDFLAGS CDCF_IGNJUNK
226
227#define hex_PADDING NULL_PAD
228#define hex_FLAGMASK ~0
229#define hex_FLAGXOR 0
230#define hex_OLDFLAGS (CDCF_IGNJUNK | CDCF_LOWERC)
231
232/*----- Data structures ---------------------------------------------------*/
233
234#define OBJ(ctxn, wd, acc) \
235 \
236typedef struct ctxn##_codec { \
237 codec c; \
238 ctxn##_ctx ctx; \
239 const char *encodemap; \
240 const signed char *decodemap; \
241} ctxn##_codec;
242
243CTXS(OBJ)
244
245/*----- State packing -----------------------------------------------------*
246 *
247 * These macros convert between the state required by the new encoding and
248 * decoding core and the old externally-visible context structures. It's
249 * unpleasant, I know; maybe we can drop the old interface later.
250 */
251
252enum {
253 ST_MAIN, /* Main decoding state */
254 ST_PAD, /* Decoding trailing padding */
255 ST_END /* Finished decoding */
256};
257
258#define STATE_UNPACK(acc) \
259 unsigned long a = (ctx->acc >> 0) & 0xffff; \
260 unsigned nb = (ctx->acc >> 16) & 0xff; \
261 unsigned st = (ctx->acc >> 24) & 0xff; \
262 unsigned f = ctx->qsz;
263
264#define STATE_PACK(acc) do { \
265 ctx->acc = (((a & 0xffff) << 0) | \
266 (((unsigned long)nb & 0xff) << 16) | \
267 (((unsigned long)st & 0xff) << 24)); \
268} while (0)
269
270/*----- Main encoder and decoder ------------------------------------------*/
271
272#define WRAP(stuff) do { \
273 if (maxln && lnlen >= maxln) { \
274 dstr_puts(d, ctx->indent); \
275 lnlen = 0; \
276 } \
277 stuff \
278 lnlen++; \
279} while (0)
280
281#define PUTWRAP(x) WRAP({ \
282 char ch = encodemap[x]; \
36188114 283 if (f & CDCF_LOWERC) ch = TOLOWER(ch); \
236f657b
MW
284 DPUTC(d, ch); \
285})
286
287#define PADWRAP WRAP({ DPUTC(d, '='); })
288
289#define PUTRAW(x) DPUTC(d, x)
290
291#define ENCODER(ctxn, wd, acc) \
292 \
293/* --- @CTXN_doencode@ --- * \
294 * \
295 * Arguments: @CTXN_ctx *ctx@ = pointer to a context block \
296 * @const char *encodemap@ = pointer to encoding map \
297 * @const unsigned char *p@ = pointer to a source buffer \
298 * @size_t sz@ = size of the source buffer \
299 * @dstr *d@ = pointer to destination string \
300 * \
301 * Returns: Zero on success, or @CDCERR_@ error code. \
302 * \
303 * Use: Main encoder function. \
304 */ \
305 \
306static int ctxn##_doencode(ctxn##_ctx *ctx, const char *encodemap, \
307 const unsigned char *p, size_t sz, dstr *d) \
308{ \
309 STATE_UNPACK(acc); \
310 const unsigned char *l = p + sz; \
311 unsigned lnlen = ctx->lnlen, maxln = ctx->maxline; \
312 \
313 if (p) { \
314 while (p < l) BASECONV(*p++, 8, wd, PUTWRAP); \
315 } else { \
316 BASECONV_FLUSH(8, wd, PUTWRAP); \
317 if (!(f & CDCF_NOEQPAD)) ctxn##_PADDING(8, wd, PADWRAP); \
318 } \
319 \
320 STATE_PACK(acc); \
321 ctx->lnlen = lnlen; \
322 return (0); \
323} \
324 \
325/* --- @CTXN_dodecode@ --- * \
326 * \
327 * Arguments: @CTXN_ctx *ctx@ = pointer to a context block \
328 * @const signed char *decodemap@ = pointer to decode map \
329 * @const char *p@ = pointer to a source buffer \
330 * @size_t sz@ = size of the source buffer \
331 * @dstr *d@ = pointer to destination string \
332 * \
333 * Returns: Zero on success, or @CDCERR_@ error code. \
334 * \
335 * Use: Main decoder function. \
336 */ \
337 \
338static int ctxn##_dodecode(ctxn##_ctx *ctx, \
339 const signed char *decodemap, \
340 const unsigned char *p, size_t sz, dstr *d) \
341{ \
342 STATE_UNPACK(acc); \
343 const unsigned char *l = p + sz; \
344 int ch; \
345 int x; \
346 \
347 if (p) { \
348 while (p < l) { \
349 ch = *p++; \
350 switch (f & (CDCF_LOWERC | CDCF_IGNCASE)) { \
351 case 0: \
352 break; \
353 case CDCF_LOWERC: \
36188114 354 if (ISUPPER(ch)) goto badch; \
236f657b 355 default: \
36188114 356 ch = TOUPPER(ch); \
236f657b
MW
357 } \
358 x = decodemap[ch]; \
359 switch (x) { \
360 case NV: \
361 badch: \
362 if (!(f & CDCF_IGNINVCH)) return (CDCERR_INVCH); \
363 break; \
364 case PC: \
365 if (f & CDCF_IGNEQMID) break; \
366 if (f & CDCF_NOEQPAD) goto badch; \
367 if (st == ST_MAIN && \
368 !(f & CDCF_IGNZPAD) && (a & ((1 << nb) - 1))) \
369 return (CDCERR_INVZPAD); \
370 st = ST_PAD; \
371 if (!(f & CDCF_IGNEQPAD)) { \
372 if (!nb) return (CDCERR_INVEQPAD); \
373 nb = (nb + wd)%8; \
374 st = ST_PAD; \
375 } \
376 break; \
377 case NL: \
378 if (f & CDCF_IGNNEWL) break; \
379 return (CDCERR_INVCH); \
09fbf4d0
MW
380 case SP: \
381 if (f & CDCF_IGNSPC) break; \
382 return (CDCERR_INVCH); \
236f657b 383 default: \
09fbf4d0 384 if (st != ST_MAIN) return (CDCERR_INVEQPAD); \
236f657b
MW
385 BASECONV(x, wd, 8, PUTRAW); \
386 break; \
387 } \
388 } \
389 } else { \
390 if (st == ST_MAIN && \
391 !(f & CDCF_IGNZPAD) && (a & ((1 << nb) - 1))) \
392 return (CDCERR_INVZPAD); \
393 if (!(f & (CDCF_IGNEQPAD | CDCF_IGNEQMID | CDCF_NOEQPAD)) && nb) \
394 return (CDCERR_INVEQPAD); \
395 } \
396 \
397 STATE_PACK(acc); \
398 return (0); \
399}
400
401CTXS(ENCODER)
402
403/*----- Codec implementation ----------------------------------------------*/
404
405#define OPS(ctxn, wd, acc) \
406 \
407static int ctxn##_enc(codec *c, const void *p, size_t sz, dstr *d) \
408{ \
409 ctxn##_codec *bc = (ctxn##_codec *)c; \
410 return (ctxn##_doencode(&bc->ctx, bc->encodemap, p, sz, d)); \
411} \
412 \
413static int ctxn##_dec(codec *c, const void *p, size_t sz, dstr *d) \
414{ \
415 ctxn##_codec *bc = (ctxn##_codec *)c; \
416 return (ctxn##_dodecode(&bc->ctx, bc->decodemap, p, sz, d)); \
417} \
418 \
419static void ctxn##_destroy(codec *c) \
420{ \
421 ctxn##_codec *bc = (ctxn##_codec *)c; \
422 if (bc->ctx.indent) xfree((/*unconst*/ char *)bc->ctx.indent); \
423 DESTROY(bc); \
424} \
425 \
426static codec *ctxn##_docreate(unsigned flags, \
427 const char *indent, unsigned maxline, \
428 const codec_ops *ops, \
429 const char *encodemap, \
430 const signed char *decodemap) \
431{ \
432 ctxn##_codec *bc = CREATE(ctxn##_codec); \
433 bc->c.ops = ops; \
434 bc->ctx.acc = 0; \
435 bc->ctx.qsz = (flags & ctxn##_FLAGMASK) ^ ctxn##_FLAGXOR; \
436 bc->ctx.lnlen = 0; \
437 bc->ctx.indent = indent ? xstrdup(indent) : 0; \
438 bc->ctx.maxline = maxline; \
439 bc->encodemap = encodemap; \
440 bc->decodemap = decodemap; \
441 return (&bc->c); \
442}
443
444CTXS(OPS)
445
446#define CLASS(name, ctxn, acc) \
447 \
448static const codec_ops \
449 name##_encode_ops = { &name##_class, ctxn##_enc, ctxn##_destroy }, \
450 name##_decode_ops = { &name##_class, ctxn##_dec, ctxn##_destroy }; \
451 \
452static codec *name##_encoder(unsigned flags, \
453 const char *indent, unsigned maxline) \
454{ \
455 return ctxn##_docreate(flags, indent, maxline, \
456 &name##_encode_ops, \
457 encodemap_##name, \
458 decodemap_##name); \
459} \
460 \
461static codec *name##_decoder(unsigned flags) \
462{ \
463 return ctxn##_docreate(flags, 0, 0, \
464 &name##_decode_ops, \
465 encodemap_##name, \
466 decodemap_##name); \
467} \
468 \
469const codec_class name##_class = \
470 { #name, name##_encoder, name##_decoder };
471
472CODECS(CLASS)
473
474/*----- Compatibility veneers ---------------------------------------------*/
475
476#define COMPAT(ctxn, wd, acc) \
477 \
478void ctxn##_encode(ctxn##_ctx *ctx, const void *p, size_t sz, dstr *d) \
479 { ctxn##_doencode(ctx, encodemap_##ctxn, p, sz, d); } \
480 \
481void ctxn##_decode(ctxn##_ctx *ctx, const void *p, size_t sz, dstr *d) \
482 { ctxn##_dodecode(ctx, decodemap_##ctxn, p, sz, d); } \
483 \
484void ctxn##_init(ctxn##_ctx *ctx) \
485{ \
486 ctx->acc = 0; \
487 ctx->qsz = (ctxn##_OLDFLAGS & ctxn##_FLAGMASK) ^ ctxn##_FLAGXOR; \
488 ctx->lnlen = 0; \
489 ctx->indent = "\n"; \
490 ctx->maxline = 72; \
491}
492
493CTXS(COMPAT)
494
495/*----- That's all, folks -------------------------------------------------*/