492eb9b3fbce82f0e281ae12fb00b3bae0a5e7f5
[u/mdw/catacomb] / hashsum.c
1 /* -*-c-*-
2 *
3 * $Id$
4 *
5 * Hash files using some secure hash function
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Header files ------------------------------------------------------*/
31
32 #include "config.h"
33
34 #include <assert.h>
35 #include <ctype.h>
36 #include <errno.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40
41 #include <mLib/alloc.h>
42 #include <mLib/dstr.h>
43 #include <mLib/mdwopt.h>
44 #include <mLib/quis.h>
45 #include <mLib/report.h>
46 #include <mLib/sub.h>
47 #include <mLib/str.h>
48
49 #include <mLib/hex.h>
50 #include <mLib/base32.h>
51 #include <mLib/base64.h>
52
53 #include "ghash.h"
54
55 /*----- Static variables --------------------------------------------------*/
56
57 #define f_binary 1u
58 #define f_bogus 2u
59 #define f_verbose 4u
60 #define f_check 8u
61 #define f_files 16u
62 #define f_raw 32u
63 #define f_oddhash 64u
64 #define f_escape 128u
65 #define f_oddenc 256u
66
67 /*----- Encoding and decoding ---------------------------------------------*/
68
69 /* --- Hex encoding --- */
70
71 static void puthex(const octet *buf, size_t sz, FILE *fp)
72 {
73 while (sz) {
74 fprintf(fp, "%02x", *buf++);
75 sz--;
76 }
77 }
78
79 static size_t gethex(const char *p, octet *q, size_t sz, char **pp)
80 {
81 size_t i = 0;
82 while (sz > 0 &&
83 isxdigit((unsigned char)p[0]) &&
84 isxdigit((unsigned char)p[1])) {
85 char buf[3];
86 buf[0] = p[0];
87 buf[1] = p[1];
88 buf[2] = 0;
89 *q++ = strtoul(buf, 0, 16);
90 sz--;
91 p += 2;
92 i++;
93 }
94 if (pp)
95 *pp = (char *)p;
96 return (i);
97 }
98
99 /* --- Base64 encoding --- */
100
101 static void putb64(const octet *buf, size_t sz, FILE *fp)
102 {
103 base64_ctx b;
104 dstr d = DSTR_INIT;
105
106 base64_init(&b);
107 b.indent = "";
108 b.maxline = 0;
109 base64_encode(&b, buf, sz, &d);
110 base64_encode(&b, 0, 0, &d);
111 dstr_write(&d, fp);
112 dstr_destroy(&d);
113 }
114
115 static size_t getb64(const char *p, octet *q, size_t sz, char **pp)
116 {
117 base64_ctx b;
118 dstr d = DSTR_INIT;
119 size_t n = strlen(p);
120
121 base64_init(&b);
122 base64_decode(&b, p, n, &d);
123 if (pp) *pp = (/*unconst*/ char *)p + n;
124 base64_decode(&b, 0, 0, &d);
125 assert(d.len <= sz);
126 memcpy(q, d.buf, sz);
127 n = d.len;
128 dstr_destroy(&d);
129 return (n);
130 }
131
132 /* --- Base32 encoding --- */
133
134 static void putb32(const octet *buf, size_t sz, FILE *fp)
135 {
136 base32_ctx b;
137 dstr d = DSTR_INIT;
138
139 base32_init(&b);
140 b.indent = "";
141 b.maxline = 0;
142 base32_encode(&b, buf, sz, &d);
143 base32_encode(&b, 0, 0, &d);
144 dstr_write(&d, fp);
145 dstr_destroy(&d);
146 }
147
148 static size_t getb32(const char *p, octet *q, size_t sz, char **pp)
149 {
150 base32_ctx b;
151 dstr d = DSTR_INIT;
152 size_t n = strlen(p);
153
154 base32_init(&b);
155 base32_decode(&b, p, n, &d);
156 if (pp) *pp = (/*unconst*/ char *)p + n;
157 base32_decode(&b, 0, 0, &d);
158 assert(d.len <= sz);
159 memcpy(q, d.buf, sz);
160 n = d.len;
161 dstr_destroy(&d);
162 return (n);
163 }
164
165 /* --- Table --- */
166
167 typedef struct encops {
168 const char *name;
169 void (*put)(const octet *, size_t, FILE *);
170 size_t (*get)(const char *, octet *, size_t, char **);
171 } encops;
172
173 static const encops enctab[] = {
174 { "hex", puthex, gethex },
175 { "base64", putb64, getb64 },
176 { "base32", putb32, getb32 },
177 { 0, 0, 0 }
178 };
179
180 static const encops *getenc(const char *ename)
181 {
182 const encops *e;
183
184 for (e = enctab; e->name; e++) {
185 if (strcmp(ename, e->name) == 0)
186 return (e);
187 }
188 return (0);
189 }
190
191 /*----- Support functions -------------------------------------------------*/
192
193 /* --- @fhash@ --- *
194 *
195 * Arguments: @const char *file@ = file name to be hashed (null for stdin)
196 * @unsigned f@ = flags to set
197 * @const gchash *gch@ = pointer to hash function to use
198 * @void *buf@ = pointer to hash output buffer
199 *
200 * Returns: Zero if it worked, nonzero on error.
201 *
202 * Use: Hashes a file.
203 */
204
205 static int fhash(const char *file, unsigned f, const gchash *gch, void *buf)
206 {
207 FILE *fp;
208 char fbuf[BUFSIZ];
209 size_t sz;
210 ghash *h;
211 int e;
212
213 if (!file)
214 fp = stdin;
215 else if ((fp = fopen(file, f & f_binary ? "rb" : "r")) == 0)
216 return (-1);
217
218 h = GH_INIT(gch);
219 while ((sz = fread(fbuf, 1, sizeof(fbuf), fp)) > 0)
220 GH_HASH(h, fbuf, sz);
221 GH_DONE(h, buf);
222 GH_DESTROY(h);
223 e = ferror(fp);
224 if (file)
225 fclose(fp);
226 return (e ? -1 : 0);
227 }
228
229 /* --- @gethash@ --- *
230 *
231 * Arguments: @const char *name@ = pointer to name string
232 *
233 * Returns: Pointer to appropriate hash class.
234 *
235 * Use: Chooses a hash function by name.
236 */
237
238 static const gchash *gethash(const char *name)
239 {
240 const gchash *const *g, *gg = 0;
241 size_t sz = strlen(name);
242 for (g = ghashtab; *g; g++) {
243 if (strncmp(name, (*g)->name, sz) == 0) {
244 if ((*g)->name[sz] == 0) {
245 gg = *g;
246 break;
247 } else if (gg)
248 return (0);
249 else
250 gg = *g;
251 }
252 }
253 return (gg);
254 }
255
256 /* --- @getstring@ --- *
257 *
258 * Arguments: @FILE *fp@ = stream from which to read
259 * @const char *p@ = string to read from instead
260 * @dstr *d@ = destination string
261 * @unsigned raw@ = raw or cooked read
262 *
263 * Returns: Zero if OK, nonzero on end-of-file.
264 *
265 * Use: Reads a filename (or something similar) from a stream.
266 */
267
268 static int getstring(FILE *fp, const char *p, dstr *d, unsigned raw)
269 {
270 int ch;
271 int q = 0;
272
273 /* --- Raw: just read exactly what's written up to a null byte --- */
274
275 #define NEXTCH (fp ? getc(fp) : (unsigned char)*p++)
276 #define EOFCH (fp ? EOF : 0)
277
278 if (raw) {
279 if ((ch = NEXTCH) == EOFCH)
280 return (EOF);
281 for (;;) {
282 if (!ch)
283 break;
284 DPUTC(d, ch);
285 if ((ch = NEXTCH) == EOFCH)
286 break;
287 }
288 DPUTZ(d);
289 return (0);
290 }
291
292 /* --- Skip as far as whitespace --- *
293 *
294 * Also skip past comments.
295 */
296
297 again:
298 ch = NEXTCH;
299 while (isspace(ch))
300 ch = NEXTCH;
301 if (ch == '#') {
302 do ch = NEXTCH; while (ch != '\n' && ch != EOFCH);
303 goto again;
304 }
305 if (ch == EOFCH)
306 return (EOF);
307
308 /* --- If the character is a quote then read a quoted string --- */
309
310 switch (ch) {
311 case '`':
312 ch = '\'';
313 case '\'':
314 case '\"':
315 q = ch;
316 ch = NEXTCH;
317 break;
318 }
319
320 /* --- Now read all sorts of interesting things --- */
321
322 for (;;) {
323
324 /* --- Handle an escaped thing --- */
325
326 if (ch == '\\') {
327 ch = NEXTCH;
328 if (ch == EOFCH)
329 break;
330 switch (ch) {
331 case 'a': ch = '\a'; break;
332 case 'b': ch = '\b'; break;
333 case 'f': ch = '\f'; break;
334 case 'n': ch = '\n'; break;
335 case 'r': ch = '\r'; break;
336 case 't': ch = '\t'; break;
337 case 'v': ch = '\v'; break;
338 }
339 DPUTC(d, ch);
340 ch = NEXTCH;
341 continue;
342 }
343
344 /* --- If it's a quote or some other end marker then stop --- */
345
346 if (ch == q)
347 break;
348 if (!q && isspace(ch))
349 break;
350
351 /* --- Otherwise contribute and continue --- */
352
353 DPUTC(d, ch);
354 if ((ch = NEXTCH) == EOFCH)
355 break;
356 }
357
358 /* --- Done --- */
359
360 DPUTZ(d);
361 return (0);
362
363 #undef NEXTCH
364 #undef EOFCH
365 }
366
367 /* --- @putstring@ --- *
368 *
369 * Arguments: @FILE *fp@ = stream to write on
370 * @const char *p@ = pointer to text
371 * @unsigned raw@ = whether the string is to be written raw
372 *
373 * Returns: ---
374 *
375 * Use: Emits a string to a stream.
376 */
377
378 static void putstring(FILE *fp, const char *p, unsigned raw)
379 {
380 size_t sz = strlen(p);
381 unsigned qq;
382 const char *q;
383
384 /* --- Just write the string null terminated if raw --- */
385
386 if (raw) {
387 fwrite(p, 1, sz + 1, fp);
388 return;
389 }
390
391 /* --- Check for any dodgy characters --- */
392
393 qq = 0;
394 for (q = p; *q; q++) {
395 if (isspace((unsigned char)*q)) {
396 qq = '\"';
397 break;
398 }
399 }
400
401 if (qq)
402 putc(qq, fp);
403
404 /* --- Emit the string --- */
405
406 for (q = p; *q; q++) {
407 switch (*q) {
408 case '\a': fputc('\\', fp); fputc('a', fp); break;
409 case '\b': fputc('\\', fp); fputc('b', fp); break;
410 case '\f': fputc('\\', fp); fputc('f', fp); break;
411 case '\n': fputc('\\', fp); fputc('n', fp); break;
412 case '\r': fputc('\\', fp); fputc('r', fp); break;
413 case '\t': fputc('\\', fp); fputc('t', fp); break;
414 case '\v': fputc('\\', fp); fputc('v', fp); break;
415 case '`': fputc('\\', fp); fputc('`', fp); break;
416 case '\'': fputc('\\', fp); fputc('\'', fp); break;
417 case '\"': fputc('\\', fp); fputc('\"', fp); break;
418 case '#': fputc('\\', fp); fputc('#', fp); break;
419 default:
420 putc(*q, fp);
421 break;
422 }
423 }
424
425 /* --- Done --- */
426
427 if (qq)
428 putc(qq, fp);
429 }
430
431 /*----- Guts --------------------------------------------------------------*/
432
433 static int checkhash(const char *file, unsigned f,
434 const gchash *gch, const encops *e)
435 {
436 int rc;
437 FILE *fp;
438 dstr d = DSTR_INIT;
439 dstr dd = DSTR_INIT;
440 unsigned long n = 0, nfail = 0;
441 octet *buf = xmalloc(2 * gch->hashsz);
442
443 if (!file)
444 fp = stdin;
445 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
446 moan("couldn't open `%s': %s", file, strerror(errno));
447 return (EXIT_FAILURE);
448 }
449
450 while (DRESET(&d), dstr_putline(&d, fp) != EOF) {
451 char *p = d.buf;
452 char *q;
453 unsigned ff = f;
454
455 /* --- Handle a directive --- */
456
457 if (*p == '#') {
458 p++;
459 if ((q = str_getword(&p)) == 0)
460 continue;
461 if (strcmp(q, "hash") == 0) {
462 const gchash *g;
463 if ((q = str_getword(&p)) == 0)
464 continue;
465 if ((g = gethash(q)) == 0)
466 continue;
467 gch = g;
468 xfree(buf);
469 buf = xmalloc(2 * gch->hashsz);
470 } else if (strcmp(q, "encoding") == 0) {
471 const encops *ee;
472 if ((q = str_getword(&p)) == 0)
473 continue;
474 if ((ee = getenc(q)) == 0)
475 continue;
476 e = ee;
477 } else if (strcmp(q, "escape") == 0)
478 f |= f_escape;
479 continue;
480 }
481
482 /* --- Otherwise it's a hex thing --- */
483
484 q = p;
485 while (*p && *p != ' ')
486 p++;
487 if (!*p)
488 continue;
489 *p++ = 0;
490 if (e->get(q, buf, gch->hashsz, 0) < gch->hashsz)
491 continue;
492 if (*p == '*')
493 ff |= f_binary;
494 else if (*p != ' ')
495 continue;
496 p++;
497
498 if (f & f_escape) {
499 DRESET(&dd);
500 getstring(0, p, &dd, 0);
501 p = dd.buf;
502 }
503
504 if (fhash(p, ff, gch, buf + gch->hashsz)) {
505 moan("couldn't read `%s': %s", p, strerror(errno));
506 rc = EXIT_FAILURE;
507 continue;
508 }
509 if (memcmp(buf, buf + gch->hashsz, gch->hashsz) != 0) {
510 if (ff & f_verbose)
511 fprintf(stderr, "FAIL %s\n", p);
512 else
513 moan("%s check failed for `%s'", gch->name, p);
514 nfail++;
515 rc = EXIT_FAILURE;
516 } else {
517 if (ff & f_verbose)
518 fprintf(stderr, "OK %s\n", p);
519 }
520 n++;
521 }
522
523 dstr_destroy(&d);
524 dstr_destroy(&dd);
525 xfree(buf);
526 if ((f & f_verbose) && nfail)
527 moan("%lu of %lu file(s) failed %s check", nfail, n, gch->name);
528 else if (!n)
529 moan("no files checked");
530 return (0);
531 }
532
533 static int dohash(const char *file, unsigned f,
534 const gchash *gch, const encops *e)
535 {
536 int rc = 0;
537 octet *p = xmalloc(gch->hashsz);
538
539 if (fhash(file, f, gch, p)) {
540 moan("couldn't read `%s': %s", file ? file : "<stdin>", strerror(errno));
541 rc = EXIT_FAILURE;
542 } else {
543 e->put(p, gch->hashsz, stdout);
544 if (file) {
545 fputc(' ', stdout);
546 fputc(f & f_binary ? '*' : ' ', stdout);
547 if (f & f_escape)
548 putstring(stdout, file, 0);
549 else
550 fputs(file, stdout);
551 }
552 fputc('\n', stdout);
553 }
554
555 xfree(p);
556 return (rc);
557 }
558
559 static int dofile(const char *file, unsigned f,
560 const gchash *gch, const encops *e)
561 {
562 return (f & f_check ? checkhash : dohash)(file, f, gch, e);
563 }
564
565 static int hashfiles(const char *file, unsigned f,
566 const gchash *gch, const encops *e)
567 {
568 FILE *fp;
569 dstr d = DSTR_INIT;
570 int rc = 0;
571 int rrc;
572
573 if (!file)
574 fp = stdin;
575 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
576 moan("couldn't open `%s': %s", file, strerror(errno));
577 return (EXIT_FAILURE);
578 }
579
580 for (;;) {
581 DRESET(&d);
582 if (getstring(fp, 0, &d, f & f_raw))
583 break;
584 if ((rrc = dofile(d.buf, f, gch, e)) != 0)
585 rc = rrc;
586 }
587
588 return (rc);
589 }
590
591 static int hashsum(const char *file, unsigned f,
592 const gchash *gch, const encops *e)
593 {
594 return (f & f_files ? hashfiles : dofile)(file, f, gch, e);
595 }
596
597 /*----- Main driver -------------------------------------------------------*/
598
599 static void version(FILE *fp)
600 {
601 pquis(fp, "$, Catacomb version " VERSION "\n");
602 }
603
604 static void usage(FILE *fp)
605 {
606 pquis(fp, "Usage: $ [-f0ebcv] [-a algorithm] [files...]\n");
607 }
608
609 static void help(FILE *fp, const gchash *gch)
610 {
611 version(fp);
612 fputc('\n', fp);
613 usage(fp);
614 pquis(fp, "\n\
615 Generates or checks message digests on files. Options available:\n\
616 \n\
617 -h, --help Display this help message.\n\
618 -V, --version Display program's version number.\n\
619 -u, --usage Display a terse usage message.\n\
620 \n\
621 -a, --algorithm=ALG Use the message digest algorithm ALG.\n\
622 -E, --encoding=ENC Represent hashes using encoding ENC.\n\
623 \n\
624 -f, --files Read a list of file names from standard input.\n\
625 -0, --null File names are null terminated, not plain text.\n\
626 \n\
627 -e, --escape Escape funny characters in filenames.\n\
628 -c, --check Check message digests rather than emitting them.\n\
629 -b, --binary When reading files, treat them as binary.\n\
630 -v, --verbose Be verbose when checking digests.\n\
631 \n\
632 For a list of hashing algorithms and encodings, type `$ --list'.\n\
633 ");
634 if (gch)
635 fprintf(fp, "The default message digest algorithm is %s.\n", gch->name);
636 }
637
638 int main(int argc, char *argv[])
639 {
640 unsigned f = 0;
641 const gchash *gch = 0;
642 const encops *e = &enctab[0];
643 int rc;
644
645 /* --- Initialization --- */
646
647 ego(argv[0]);
648 sub_init();
649
650 /* --- Choose a hash function from the name --- */
651
652 {
653 char *q = xstrdup(QUIS);
654 size_t len = strlen(q);
655 if (len > 3 && strcmp(q + len - 3, "sum") == 0) {
656 q[len - 3] = 0;
657 gch = gethash(q);
658 }
659 if (!gch)
660 gch = gethash("md5");
661 xfree(q);
662 }
663
664 /* --- Read options --- */
665
666 for (;;) {
667 static struct option opts[] = {
668 { "help", 0, 0, 'h' },
669 { "verbose", 0, 0, 'V' },
670 { "usage", 0, 0, 'u' },
671
672 { "algorithm", OPTF_ARGREQ, 0, 'a' },
673 { "hash", OPTF_ARGREQ, 0, 'a' },
674 { "encoding", OPTF_ARGREQ, 0, 'E' },
675 { "list", 0, 0, 'l' },
676
677 { "files", 0, 0, 'f' },
678 { "find", 0, 0, 'f' },
679 { "null", 0, 0, '0' },
680
681 { "escape", 0, 0, 'e' },
682 { "check", 0, 0, 'c' },
683 { "binary", 0, 0, 'b' },
684 { "verbose", 0, 0, 'v' },
685
686 { 0, 0, 0, 0 }
687 };
688 int i = mdwopt(argc, argv, "hVu a:E:l f0 ecbv", opts, 0, 0, 0);
689 if (i < 0)
690 break;
691
692 switch (i) {
693 case 'h':
694 help(stdout, gch);
695 exit(0);
696 case 'V':
697 version(stdout);
698 exit(0);
699 case 'u':
700 usage(stdout);
701 exit(0);
702 case 'a':
703 if ((gch = gethash(optarg)) == 0)
704 die(EXIT_FAILURE, "unknown hash algorithm `%s'", optarg);
705 f |= f_oddhash;
706 break;
707 case 'l': {
708 unsigned j;
709 printf("Algorithms: ");
710 for (j = 0; ghashtab[j]; j++) {
711 if (j) fputc(' ', stdout);
712 printf("%s", ghashtab[j]->name);
713 }
714 fputc('\n', stdout);
715 printf("Encodings: ");
716 for (j = 0; enctab[j].name; j++) {
717 if (j) fputc(' ', stdout);
718 printf("%s", enctab[j].name);
719 }
720 fputc('\n', stdout);
721 exit(0);
722 } break;
723 case 'E':
724 if ((e = getenc(optarg)) == 0)
725 die(EXIT_FAILURE, "unknown encoding `%s'", optarg);
726 f |= f_oddenc;
727 break;
728 case 'f':
729 f |= f_files;
730 break;
731 case '0':
732 f |= f_raw;
733 break;
734 case 'e':
735 f |= f_escape;
736 break;
737 case 'c':
738 f |= f_check;
739 break;
740 case 'b':
741 f |= f_binary;
742 break;
743 case 'v':
744 f |= f_verbose;
745 break;
746 default:
747 f |= f_bogus;
748 break;
749 }
750 }
751
752 if (f & f_bogus) {
753 usage(stderr);
754 exit(EXIT_FAILURE);
755 }
756 argv += optind;
757 argc -= optind;
758
759 /* --- Generate output --- */
760
761 if (!argc)
762 rc = hashsum(0, f, gch, e);
763 else {
764 int i;
765 int rrc;
766
767 rc = 0;
768 if (!(f & f_check)) {
769 if (f & f_oddhash) printf("#hash %s\n", gch->name);
770 if (f & f_oddenc) printf("#encoding %s\n", e->name);
771 if (f & f_escape) fputs("#escape\n", stdout);
772 }
773 for (i = 0; i < argc; i++) {
774 if ((rrc = hashsum(argv[i], f, gch, e)) != 0)
775 rc = rrc;
776 }
777 }
778
779 return (rc);
780 }
781
782 /*----- That's all, folks -------------------------------------------------*/