tiger-mktab.c: Don't have printf swallow a kludge64 whole.
[u/mdw/catacomb] / hashsum.c
1 /* -*-c-*-
2 *
3 * $Id$
4 *
5 * Hash files using some secure hash function
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Header files ------------------------------------------------------*/
31
32 #include "config.h"
33
34 #include <assert.h>
35 #include <ctype.h>
36 #include <errno.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40
41 #include <mLib/alloc.h>
42 #include <mLib/dstr.h>
43 #include <mLib/mdwopt.h>
44 #include <mLib/quis.h>
45 #include <mLib/report.h>
46 #include <mLib/sub.h>
47 #include <mLib/str.h>
48
49 #include <mLib/hex.h>
50 #include <mLib/base32.h>
51 #include <mLib/base64.h>
52
53 #include "ghash.h"
54 #include "cc.h"
55
56 /*----- Static variables --------------------------------------------------*/
57
58 #define f_binary 1u
59 #define f_bogus 2u
60 #define f_verbose 4u
61 #define f_check 8u
62 #define f_files 16u
63 #define f_raw 32u
64 #define f_oddhash 64u
65 #define f_escape 128u
66 #define f_oddenc 256u
67
68 /*----- Encoding and decoding ---------------------------------------------*/
69
70 /* --- Hex encoding --- */
71
72 static void puthex(const octet *buf, size_t sz, FILE *fp)
73 {
74 while (sz) {
75 fprintf(fp, "%02x", *buf++);
76 sz--;
77 }
78 }
79
80 static size_t gethex(const char *p, octet *q, size_t sz, char **pp)
81 {
82 size_t i = 0;
83 while (sz > 0 &&
84 isxdigit((unsigned char)p[0]) &&
85 isxdigit((unsigned char)p[1])) {
86 char buf[3];
87 buf[0] = p[0];
88 buf[1] = p[1];
89 buf[2] = 0;
90 *q++ = strtoul(buf, 0, 16);
91 sz--;
92 p += 2;
93 i++;
94 }
95 if (pp)
96 *pp = (char *)p;
97 return (i);
98 }
99
100 /* --- Base64 encoding --- */
101
102 static void putb64(const octet *buf, size_t sz, FILE *fp)
103 {
104 base64_ctx b;
105 dstr d = DSTR_INIT;
106
107 base64_init(&b);
108 b.indent = "";
109 b.maxline = 0;
110 base64_encode(&b, buf, sz, &d);
111 base64_encode(&b, 0, 0, &d);
112 dstr_write(&d, fp);
113 dstr_destroy(&d);
114 }
115
116 static size_t getb64(const char *p, octet *q, size_t sz, char **pp)
117 {
118 base64_ctx b;
119 dstr d = DSTR_INIT;
120 size_t n = strlen(p);
121
122 base64_init(&b);
123 base64_decode(&b, p, n, &d);
124 if (pp) *pp = (/*unconst*/ char *)p + n;
125 base64_decode(&b, 0, 0, &d);
126 assert(d.len <= sz);
127 memcpy(q, d.buf, sz);
128 n = d.len;
129 dstr_destroy(&d);
130 return (n);
131 }
132
133 /* --- Base32 encoding --- */
134
135 static void putb32(const octet *buf, size_t sz, FILE *fp)
136 {
137 base32_ctx b;
138 dstr d = DSTR_INIT;
139
140 base32_init(&b);
141 b.indent = "";
142 b.maxline = 0;
143 base32_encode(&b, buf, sz, &d);
144 base32_encode(&b, 0, 0, &d);
145 dstr_write(&d, fp);
146 dstr_destroy(&d);
147 }
148
149 static size_t getb32(const char *p, octet *q, size_t sz, char **pp)
150 {
151 base32_ctx b;
152 dstr d = DSTR_INIT;
153 size_t n = strlen(p);
154
155 base32_init(&b);
156 base32_decode(&b, p, n, &d);
157 if (pp) *pp = (/*unconst*/ char *)p + n;
158 base32_decode(&b, 0, 0, &d);
159 assert(d.len <= sz);
160 memcpy(q, d.buf, sz);
161 n = d.len;
162 dstr_destroy(&d);
163 return (n);
164 }
165
166 /* --- Table --- */
167
168 typedef struct encodeops {
169 const char *name;
170 void (*put)(const octet *, size_t, FILE *);
171 size_t (*get)(const char *, octet *, size_t, char **);
172 } encodeops;
173
174 static const encodeops encodingtab[] = {
175 { "hex", puthex, gethex },
176 { "base64", putb64, getb64 },
177 { "base32", putb32, getb32 },
178 { 0, 0, 0 }
179 };
180
181 static const encodeops *getencoding(const char *ename)
182 {
183 const encodeops *e;
184
185 for (e = encodingtab; e->name; e++) {
186 if (strcmp(ename, e->name) == 0)
187 return (e);
188 }
189 return (0);
190 }
191
192 /*----- Support functions -------------------------------------------------*/
193
194 /* --- @fhash@ --- *
195 *
196 * Arguments: @const char *file@ = file name to be hashed (null for stdin)
197 * @unsigned f@ = flags to set
198 * @const gchash *gch@ = pointer to hash function to use
199 * @void *buf@ = pointer to hash output buffer
200 *
201 * Returns: Zero if it worked, nonzero on error.
202 *
203 * Use: Hashes a file.
204 */
205
206 static int fhash(const char *file, unsigned f, const gchash *gch, void *buf)
207 {
208 FILE *fp;
209 char fbuf[BUFSIZ];
210 size_t sz;
211 ghash *h;
212 int e;
213
214 if (!file || strcmp(file, "-") == 0)
215 fp = stdin;
216 else if ((fp = fopen(file, f & f_binary ? "rb" : "r")) == 0)
217 return (-1);
218
219 h = GH_INIT(gch);
220 while ((sz = fread(fbuf, 1, sizeof(fbuf), fp)) > 0)
221 GH_HASH(h, fbuf, sz);
222 GH_DONE(h, buf);
223 GH_DESTROY(h);
224 e = ferror(fp);
225 if (file)
226 fclose(fp);
227 return (e ? -1 : 0);
228 }
229
230 /* --- @gethash@ --- *
231 *
232 * Arguments: @const char *name@ = pointer to name string
233 *
234 * Returns: Pointer to appropriate hash class.
235 *
236 * Use: Chooses a hash function by name.
237 */
238
239 static const gchash *gethash(const char *name)
240 {
241 const gchash *const *g, *gg = 0;
242 size_t sz = strlen(name);
243 for (g = ghashtab; *g; g++) {
244 if (strncmp(name, (*g)->name, sz) == 0) {
245 if ((*g)->name[sz] == 0) {
246 gg = *g;
247 break;
248 } else if (gg)
249 return (0);
250 else
251 gg = *g;
252 }
253 }
254 return (gg);
255 }
256
257 /* --- @getstring@ --- *
258 *
259 * Arguments: @FILE *fp@ = stream from which to read
260 * @const char *p@ = string to read from instead
261 * @dstr *d@ = destination string
262 * @unsigned raw@ = raw or cooked read
263 *
264 * Returns: Zero if OK, nonzero on end-of-file.
265 *
266 * Use: Reads a filename (or something similar) from a stream.
267 */
268
269 static int getstring(FILE *fp, const char *p, dstr *d, unsigned raw)
270 {
271 int ch;
272 int q = 0;
273
274 /* --- Raw: just read exactly what's written up to a null byte --- */
275
276 #define NEXTCH (fp ? getc(fp) : (unsigned char)*p++)
277 #define EOFCH (fp ? EOF : 0)
278
279 if (raw) {
280 if ((ch = NEXTCH) == EOFCH)
281 return (EOF);
282 for (;;) {
283 if (!ch)
284 break;
285 DPUTC(d, ch);
286 if ((ch = NEXTCH) == EOFCH)
287 break;
288 }
289 DPUTZ(d);
290 return (0);
291 }
292
293 /* --- Skip as far as whitespace --- *
294 *
295 * Also skip past comments.
296 */
297
298 again:
299 ch = NEXTCH;
300 while (isspace(ch))
301 ch = NEXTCH;
302 if (ch == '#') {
303 do ch = NEXTCH; while (ch != '\n' && ch != EOFCH);
304 goto again;
305 }
306 if (ch == EOFCH)
307 return (EOF);
308
309 /* --- If the character is a quote then read a quoted string --- */
310
311 switch (ch) {
312 case '`':
313 ch = '\'';
314 case '\'':
315 case '\"':
316 q = ch;
317 ch = NEXTCH;
318 break;
319 }
320
321 /* --- Now read all sorts of interesting things --- */
322
323 for (;;) {
324
325 /* --- Handle an escaped thing --- */
326
327 if (ch == '\\') {
328 ch = NEXTCH;
329 if (ch == EOFCH)
330 break;
331 switch (ch) {
332 case 'a': ch = '\a'; break;
333 case 'b': ch = '\b'; break;
334 case 'f': ch = '\f'; break;
335 case 'n': ch = '\n'; break;
336 case 'r': ch = '\r'; break;
337 case 't': ch = '\t'; break;
338 case 'v': ch = '\v'; break;
339 }
340 DPUTC(d, ch);
341 ch = NEXTCH;
342 continue;
343 }
344
345 /* --- If it's a quote or some other end marker then stop --- */
346
347 if (ch == q)
348 break;
349 if (!q && isspace(ch))
350 break;
351
352 /* --- Otherwise contribute and continue --- */
353
354 DPUTC(d, ch);
355 if ((ch = NEXTCH) == EOFCH)
356 break;
357 }
358
359 /* --- Done --- */
360
361 DPUTZ(d);
362 return (0);
363
364 #undef NEXTCH
365 #undef EOFCH
366 }
367
368 /* --- @putstring@ --- *
369 *
370 * Arguments: @FILE *fp@ = stream to write on
371 * @const char *p@ = pointer to text
372 * @unsigned raw@ = whether the string is to be written raw
373 *
374 * Returns: ---
375 *
376 * Use: Emits a string to a stream.
377 */
378
379 static void putstring(FILE *fp, const char *p, unsigned raw)
380 {
381 size_t sz = strlen(p);
382 unsigned qq;
383 const char *q;
384
385 /* --- Just write the string null terminated if raw --- */
386
387 if (raw) {
388 fwrite(p, 1, sz + 1, fp);
389 return;
390 }
391
392 /* --- Check for any dodgy characters --- */
393
394 qq = 0;
395 for (q = p; *q; q++) {
396 if (isspace((unsigned char)*q)) {
397 qq = '\"';
398 break;
399 }
400 }
401
402 if (qq)
403 putc(qq, fp);
404
405 /* --- Emit the string --- */
406
407 for (q = p; *q; q++) {
408 switch (*q) {
409 case '\a': fputc('\\', fp); fputc('a', fp); break;
410 case '\b': fputc('\\', fp); fputc('b', fp); break;
411 case '\f': fputc('\\', fp); fputc('f', fp); break;
412 case '\n': fputc('\\', fp); fputc('n', fp); break;
413 case '\r': fputc('\\', fp); fputc('r', fp); break;
414 case '\t': fputc('\\', fp); fputc('t', fp); break;
415 case '\v': fputc('\\', fp); fputc('v', fp); break;
416 case '`': fputc('\\', fp); fputc('`', fp); break;
417 case '\'': fputc('\\', fp); fputc('\'', fp); break;
418 case '\"': fputc('\\', fp); fputc('\"', fp); break;
419 case '#': fputc('\\', fp); fputc('#', fp); break;
420 default:
421 putc(*q, fp);
422 break;
423 }
424 }
425
426 /* --- Done --- */
427
428 if (qq)
429 putc(qq, fp);
430 }
431
432 /*----- Guts --------------------------------------------------------------*/
433
434 static int checkhash(const char *file, unsigned f,
435 const gchash *gch, const encodeops *e)
436 {
437 int rc;
438 FILE *fp;
439 dstr d = DSTR_INIT;
440 dstr dd = DSTR_INIT;
441 unsigned long n = 0, nfail = 0;
442 octet *buf = xmalloc(2 * gch->hashsz);
443
444 if (!file || strcmp(file, "-") == 0)
445 fp = stdin;
446 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
447 moan("couldn't open `%s': %s", file, strerror(errno));
448 return (EXIT_FAILURE);
449 }
450
451 while (DRESET(&d), dstr_putline(&d, fp) != EOF) {
452 char *p = d.buf;
453 char *q;
454 unsigned ff = f;
455
456 /* --- Handle a directive --- */
457
458 if (*p == '#') {
459 p++;
460 if ((q = str_getword(&p)) == 0)
461 continue;
462 if (strcmp(q, "hash") == 0) {
463 const gchash *g;
464 if ((q = str_getword(&p)) == 0)
465 continue;
466 if ((g = gethash(q)) == 0)
467 continue;
468 gch = g;
469 xfree(buf);
470 buf = xmalloc(2 * gch->hashsz);
471 } else if (strcmp(q, "encoding") == 0) {
472 const encodeops *ee;
473 if ((q = str_getword(&p)) == 0)
474 continue;
475 if ((ee = getencoding(q)) == 0)
476 continue;
477 e = ee;
478 } else if (strcmp(q, "escape") == 0)
479 f |= f_escape;
480 continue;
481 }
482
483 /* --- Otherwise it's a hex thing --- */
484
485 q = p;
486 while (*p && *p != ' ')
487 p++;
488 if (!*p)
489 continue;
490 *p++ = 0;
491 if (e->get(q, buf, gch->hashsz, 0) < gch->hashsz)
492 continue;
493 if (*p == '*')
494 ff |= f_binary;
495 else if (*p != ' ')
496 continue;
497 p++;
498
499 if (f & f_escape) {
500 DRESET(&dd);
501 getstring(0, p, &dd, 0);
502 p = dd.buf;
503 }
504
505 if (fhash(p, ff, gch, buf + gch->hashsz)) {
506 moan("couldn't read `%s': %s", p, strerror(errno));
507 rc = EXIT_FAILURE;
508 continue;
509 }
510 if (memcmp(buf, buf + gch->hashsz, gch->hashsz) != 0) {
511 if (ff & f_verbose)
512 fprintf(stderr, "FAIL %s\n", p);
513 else
514 moan("%s check failed for `%s'", gch->name, p);
515 nfail++;
516 rc = EXIT_FAILURE;
517 } else {
518 if (ff & f_verbose)
519 fprintf(stderr, "OK %s\n", p);
520 }
521 n++;
522 }
523
524 dstr_destroy(&d);
525 dstr_destroy(&dd);
526 xfree(buf);
527 if ((f & f_verbose) && nfail)
528 moan("%lu of %lu file(s) failed %s check", nfail, n, gch->name);
529 else if (!n)
530 moan("no files checked");
531 return (0);
532 }
533
534 static int dohash(const char *file, unsigned f,
535 const gchash *gch, const encodeops *e)
536 {
537 int rc = 0;
538 octet *p = xmalloc(gch->hashsz);
539
540 if (fhash(file, f, gch, p)) {
541 moan("couldn't read `%s': %s", file ? file : "<stdin>", strerror(errno));
542 rc = EXIT_FAILURE;
543 } else {
544 e->put(p, gch->hashsz, stdout);
545 if (file) {
546 fputc(' ', stdout);
547 fputc(f & f_binary ? '*' : ' ', stdout);
548 if (f & f_escape)
549 putstring(stdout, file, 0);
550 else
551 fputs(file, stdout);
552 }
553 fputc('\n', stdout);
554 }
555
556 xfree(p);
557 return (rc);
558 }
559
560 static int dofile(const char *file, unsigned f,
561 const gchash *gch, const encodeops *e)
562 {
563 return (f & f_check ? checkhash : dohash)(file, f, gch, e);
564 }
565
566 static int hashfiles(const char *file, unsigned f,
567 const gchash *gch, const encodeops *e)
568 {
569 FILE *fp;
570 dstr d = DSTR_INIT;
571 int rc = 0;
572 int rrc;
573
574 if (!file || strcmp(file, "-") == 0)
575 fp = stdin;
576 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
577 moan("couldn't open `%s': %s", file, strerror(errno));
578 return (EXIT_FAILURE);
579 }
580
581 for (;;) {
582 DRESET(&d);
583 if (getstring(fp, 0, &d, f & f_raw))
584 break;
585 if ((rrc = dofile(d.buf, f, gch, e)) != 0)
586 rc = rrc;
587 }
588
589 return (rc);
590 }
591
592 static int hashsum(const char *file, unsigned f,
593 const gchash *gch, const encodeops *e)
594 {
595 return (f & f_files ? hashfiles : dofile)(file, f, gch, e);
596 }
597
598 /*----- Main driver -------------------------------------------------------*/
599
600 void version(FILE *fp)
601 {
602 pquis(fp, "$, Catacomb version " VERSION "\n");
603 }
604
605 static void usage(FILE *fp)
606 {
607 pquis(fp, "Usage: $ [-f0ebcv] [-a ALGORITHM] [-E ENC] [FILES...]\n");
608 }
609
610 static void help(FILE *fp, const gchash *gch)
611 {
612 version(fp);
613 fputc('\n', fp);
614 usage(fp);
615 pquis(fp, "\n\
616 Generates or checks message digests on files. Options available:\n\
617 \n\
618 -h, --help Display this help message.\n\
619 -V, --version Display program's version number.\n\
620 -u, --usage Display a terse usage message.\n\
621 -l, --list [ITEM...] Show known hash functions and/or encodings.\n\
622 \n\
623 -a, --algorithm=ALG Use the message digest algorithm ALG.\n\
624 -E, --encoding=ENC Represent hashes using encoding ENC.\n\
625 \n\
626 -f, --files Read a list of file names from standard input.\n\
627 -0, --null File names are null terminated, not plain text.\n\
628 \n\
629 -e, --escape Escape funny characters in filenames.\n\
630 -c, --check Check message digests rather than emitting them.\n\
631 -b, --binary When reading files, treat them as binary.\n\
632 -v, --verbose Be verbose when checking digests.\n\
633 \n\
634 For a list of hashing algorithms and encodings, type `$ --list'.\n\
635 ");
636 if (gch)
637 fprintf(fp, "The default message digest algorithm is %s.\n", gch->name);
638 }
639
640 #define LISTS(LI) \
641 LI("Lists", list, listtab[i].name, listtab[i].name) \
642 LI("Hash functions", hash, ghashtab[i], ghashtab[i]->name) \
643 LI("Encodings", enc, encodingtab[i].name, encodingtab[i].name)
644
645 MAKELISTTAB(listtab, LISTS)
646
647 int main(int argc, char *argv[])
648 {
649 unsigned f = 0;
650 const gchash *gch = 0;
651 const encodeops *e = &encodingtab[0];
652 int rc;
653
654 /* --- Initialization --- */
655
656 ego(argv[0]);
657 sub_init();
658
659 /* --- Choose a hash function from the name --- */
660
661 {
662 char *q = xstrdup(QUIS);
663 size_t len = strlen(q);
664 if (len > 3 && strcmp(q + len - 3, "sum") == 0) {
665 q[len - 3] = 0;
666 gch = gethash(q);
667 }
668 if (!gch)
669 gch = gethash("md5");
670 xfree(q);
671 }
672
673 /* --- Read options --- */
674
675 for (;;) {
676 static struct option opts[] = {
677 { "help", 0, 0, 'h' },
678 { "verbose", 0, 0, 'V' },
679 { "usage", 0, 0, 'u' },
680
681 { "algorithm", OPTF_ARGREQ, 0, 'a' },
682 { "hash", OPTF_ARGREQ, 0, 'a' },
683 { "encoding", OPTF_ARGREQ, 0, 'E' },
684 { "list", 0, 0, 'l' },
685
686 { "files", 0, 0, 'f' },
687 { "find", 0, 0, 'f' },
688 { "null", 0, 0, '0' },
689
690 { "escape", 0, 0, 'e' },
691 { "check", 0, 0, 'c' },
692 { "binary", 0, 0, 'b' },
693 { "verbose", 0, 0, 'v' },
694
695 { 0, 0, 0, 0 }
696 };
697 int i = mdwopt(argc, argv, "hVu a:E:l f0 ecbv", opts, 0, 0, 0);
698 if (i < 0)
699 break;
700
701 switch (i) {
702 case 'h':
703 help(stdout, gch);
704 exit(0);
705 case 'V':
706 version(stdout);
707 exit(0);
708 case 'u':
709 usage(stdout);
710 exit(0);
711 case 'l':
712 exit(displaylists(listtab, argv + optind));
713 case 'a':
714 if ((gch = gethash(optarg)) == 0)
715 die(EXIT_FAILURE, "unknown hash algorithm `%s'", optarg);
716 f |= f_oddhash;
717 break;
718 case 'E':
719 if ((e = getencoding(optarg)) == 0)
720 die(EXIT_FAILURE, "unknown encoding `%s'", optarg);
721 f |= f_oddenc;
722 break;
723 case 'f':
724 f |= f_files;
725 break;
726 case '0':
727 f |= f_raw;
728 break;
729 case 'e':
730 f |= f_escape;
731 break;
732 case 'c':
733 f |= f_check;
734 break;
735 case 'b':
736 f |= f_binary;
737 break;
738 case 'v':
739 f |= f_verbose;
740 break;
741 default:
742 f |= f_bogus;
743 break;
744 }
745 }
746
747 if (f & f_bogus) {
748 usage(stderr);
749 exit(EXIT_FAILURE);
750 }
751 argv += optind;
752 argc -= optind;
753
754 /* --- Generate output --- */
755
756 if (!(f & f_check) && (argc || (f & f_files))) {
757 if (f & f_oddhash) printf("#hash %s\n", gch->name);
758 if (f & f_oddenc) printf("#encoding %s\n", e->name);
759 if (f & f_escape) fputs("#escape\n", stdout);
760 }
761 if (!argc)
762 rc = hashsum(0, f, gch, e);
763 else {
764 int i;
765 int rrc;
766
767 rc = 0;
768 for (i = 0; i < argc; i++) {
769 if ((rrc = hashsum(argv[i], f, gch, e)) != 0)
770 rc = rrc;
771 }
772 }
773
774 return (rc);
775 }
776
777 /*----- That's all, folks -------------------------------------------------*/