Added support for MD2 hash function.
[u/mdw/catacomb] / hashsum.c
1 /* -*-c-*-
2 *
3 * $Id: hashsum.c,v 1.7 2001/02/21 20:03:22 mdw Exp $
4 *
5 * Hash files using some secure hash function
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Revision history --------------------------------------------------*
31 *
32 * $Log: hashsum.c,v $
33 * Revision 1.7 2001/02/21 20:03:22 mdw
34 * Added support for MD2 hash function.
35 *
36 * Revision 1.6 2001/01/25 21:40:14 mdw
37 * Support for new SHA variants added.
38 *
39 * Revision 1.5 2000/12/06 20:33:27 mdw
40 * Make flags be macros rather than enumerations, to ensure that they're
41 * unsigned.
42 *
43 * Revision 1.4 2000/08/04 23:23:44 mdw
44 * Various <ctype.h> fixes.
45 *
46 * Revision 1.3 2000/07/29 17:02:43 mdw
47 * (checkhash): Be pettier about spaces between the hash and filename, for
48 * compatiblity with `md5sum'.
49 *
50 * Revision 1.2 2000/07/15 21:14:05 mdw
51 * Missed `-e' out of the usage string.
52 *
53 * Revision 1.1 2000/07/15 20:52:34 mdw
54 * Useful replacement for `md5sum' with support for many different hash
55 * functions and for reading filename lists from `find'.
56 *
57 */
58
59 /*----- Header files ------------------------------------------------------*/
60
61 #include "config.h"
62
63 #include <ctype.h>
64 #include <errno.h>
65 #include <stdio.h>
66 #include <stdlib.h>
67 #include <string.h>
68
69 #include <mLib/alloc.h>
70 #include <mLib/dstr.h>
71 #include <mLib/mdwopt.h>
72 #include <mLib/quis.h>
73 #include <mLib/report.h>
74 #include <mLib/sub.h>
75 #include <mLib/str.h>
76
77 #include "ghash.h"
78
79 #include "md2.h"
80 #include "md4.h"
81 #include "md5.h"
82 #include "rmd128.h"
83 #include "rmd160.h"
84 #include "rmd256.h"
85 #include "rmd320.h"
86 #include "sha.h"
87 #include "sha256.h"
88 #include "sha384.h"
89 #include "sha512.h"
90 #include "tiger.h"
91
92 /*----- Static variables --------------------------------------------------*/
93
94 static const gchash *hashtab[] = {
95 &md5, &md4, &md2,
96 &sha, &sha256, &sha384, &sha512,
97 &rmd128, &rmd160, &rmd256, &rmd320,
98 &tiger,
99 0
100 };
101
102 #define f_binary 1u
103 #define f_bogus 2u
104 #define f_verbose 4u
105 #define f_check 8u
106 #define f_files 16u
107 #define f_raw 32u
108 #define f_oddhash 64u
109 #define f_escape 128u
110
111 /*----- Support functions -------------------------------------------------*/
112
113 /* --- @fhash@ --- *
114 *
115 * Arguments: @const char *file@ = file name to be hashed (null for stdin)
116 * @unsigned f@ = flags to set
117 * @const gchash *gch@ = pointer to hash function to use
118 * @void *buf@ = pointer to hash output buffer
119 *
120 * Returns: Zero if it worked, nonzero on error.
121 *
122 * Use: Hashes a file.
123 */
124
125 static int fhash(const char *file, unsigned f, const gchash *gch, void *buf)
126 {
127 FILE *fp;
128 char fbuf[BUFSIZ];
129 size_t sz;
130 ghash *h;
131 int e;
132
133 if (!file)
134 fp = stdin;
135 else if ((fp = fopen(file, f & f_binary ? "rb" : "r")) == 0)
136 return (-1);
137
138 h = gch->init();
139 while ((sz = fread(fbuf, 1, sizeof(fbuf), fp)) > 0)
140 h->ops->hash(h, fbuf, sz);
141 h->ops->done(h, buf);
142 h->ops->destroy(h);
143 e = ferror(fp);
144 if (file)
145 fclose(fp);
146 return (e ? -1 : 0);
147 }
148
149 /* --- @puthex@ --- *
150 *
151 * Arguments: @const octet *buf@ = pointer to a binary buffer
152 * @size_t sz@ = size of the buffer
153 * @FILE *fp@ = pointer to output file handle
154 *
155 * Returns: ---
156 *
157 * Use: Writes a hex dump of a block of memory.
158 */
159
160 static void puthex(const octet *buf, size_t sz, FILE *fp)
161 {
162 while (sz) {
163 fprintf(fp, "%02x", *buf++);
164 sz--;
165 }
166 }
167
168 /* --- @gethex@ --- *
169 *
170 * Arguments: @const char *p@ = pointer to input string
171 * @octet *q@ = pointer to output buffer
172 * @size_t sz@ = size of the output buffer
173 * @char **pp@ = where to put the end pointer
174 *
175 * Returns: The number of bytes written to the buffer.
176 *
177 * Use: Reads hex dumps from the input string.
178 */
179
180 static size_t gethex(const char *p, octet *q, size_t sz, char **pp)
181 {
182 size_t i = 0;
183 while (sz > 0 &&
184 isxdigit((unsigned char)p[0]) &&
185 isxdigit((unsigned char)p[1])) {
186 char buf[3];
187 buf[0] = p[0];
188 buf[1] = p[1];
189 buf[2] = 0;
190 *q++ = strtoul(buf, 0, 16);
191 sz--;
192 p += 2;
193 i++;
194 }
195 if (pp)
196 *pp = (char *)p;
197 return (i);
198 }
199
200 /* --- @gethash@ --- *
201 *
202 * Arguments: @const char *name@ = pointer to name string
203 *
204 * Returns: Pointer to appropriate hash class.
205 *
206 * Use: Chooses a hash function by name.
207 */
208
209 static const gchash *gethash(const char *name)
210 {
211 const gchash **g, *gg = 0;
212 size_t sz = strlen(name);
213 for (g = hashtab; *g; g++) {
214 if (strncmp(name, (*g)->name, sz) == 0) {
215 if ((*g)->name[sz] == 0) {
216 gg = *g;
217 break;
218 } else if (gg)
219 return (0);
220 else
221 gg = *g;
222 }
223 }
224 return (gg);
225 }
226
227 /* --- @getstring@ --- *
228 *
229 * Arguments: @FILE *fp@ = stream from which to read
230 * @const char *p@ = string to read from instead
231 * @dstr *d@ = destination string
232 * @unsigned raw@ = raw or cooked read
233 *
234 * Returns: Zero if OK, nonzero on end-of-file.
235 *
236 * Use: Reads a filename (or something similar) from a stream.
237 */
238
239 static int getstring(FILE *fp, const char *p, dstr *d, unsigned raw)
240 {
241 int ch;
242 int q = 0;
243
244 /* --- Raw: just read exactly what's written up to a null byte --- */
245
246 #define NEXTCH (fp ? getc(fp) : (unsigned char)*p++)
247 #define EOFCH (fp ? EOF : 0)
248
249 if (raw) {
250 if ((ch = NEXTCH) == EOFCH)
251 return (EOF);
252 for (;;) {
253 if (!ch)
254 break;
255 DPUTC(d, ch);
256 if ((ch = NEXTCH) == EOFCH)
257 break;
258 }
259 DPUTZ(d);
260 return (0);
261 }
262
263 /* --- Skip as far as whitespace --- *
264 *
265 * Also skip past comments.
266 */
267
268 again:
269 ch = NEXTCH;
270 while (isspace(ch))
271 ch = NEXTCH;
272 if (ch == '#') {
273 do ch = NEXTCH; while (ch != '\n' && ch != EOFCH);
274 goto again;
275 }
276 if (ch == EOFCH)
277 return (EOF);
278
279 /* --- If the character is a quote then read a quoted string --- */
280
281 switch (ch) {
282 case '`':
283 ch = '\'';
284 case '\'':
285 case '\"':
286 q = ch;
287 ch = NEXTCH;
288 break;
289 }
290
291 /* --- Now read all sorts of interesting things --- */
292
293 for (;;) {
294
295 /* --- Handle an escaped thing --- */
296
297 if (ch == '\\') {
298 ch = NEXTCH;
299 if (ch == EOFCH)
300 break;
301 switch (ch) {
302 case 'a': ch = '\a'; break;
303 case 'b': ch = '\b'; break;
304 case 'f': ch = '\f'; break;
305 case 'n': ch = '\n'; break;
306 case 'r': ch = '\r'; break;
307 case 't': ch = '\t'; break;
308 case 'v': ch = '\v'; break;
309 }
310 DPUTC(d, ch);
311 ch = NEXTCH;
312 continue;
313 }
314
315 /* --- If it's a quote or some other end marker then stop --- */
316
317 if (ch == q)
318 break;
319 if (!q && isspace(ch))
320 break;
321
322 /* --- Otherwise contribute and continue --- */
323
324 DPUTC(d, ch);
325 if ((ch = NEXTCH) == EOFCH)
326 break;
327 }
328
329 /* --- Done --- */
330
331 DPUTZ(d);
332 return (0);
333
334 #undef NEXTCH
335 #undef EOFCH
336 }
337
338 /* --- @putstring@ --- *
339 *
340 * Arguments: @FILE *fp@ = stream to write on
341 * @const char *p@ = pointer to text
342 * @unsigned raw@ = whether the string is to be written raw
343 *
344 * Returns: ---
345 *
346 * Use: Emits a string to a stream.
347 */
348
349 static void putstring(FILE *fp, const char *p, unsigned raw)
350 {
351 size_t sz = strlen(p);
352 unsigned qq;
353 const char *q;
354
355 /* --- Just write the string null terminated if raw --- */
356
357 if (raw) {
358 fwrite(p, 1, sz + 1, fp);
359 return;
360 }
361
362 /* --- Check for any dodgy characters --- */
363
364 qq = 0;
365 for (q = p; *q; q++) {
366 if (isspace((unsigned char)*q)) {
367 qq = '\"';
368 break;
369 }
370 }
371
372 if (qq)
373 putc(qq, fp);
374
375 /* --- Emit the string --- */
376
377 for (q = p; *q; q++) {
378 switch (*q) {
379 case '\a': fputc('\\', fp); fputc('a', fp); break;
380 case '\b': fputc('\\', fp); fputc('b', fp); break;
381 case '\f': fputc('\\', fp); fputc('f', fp); break;
382 case '\n': fputc('\\', fp); fputc('n', fp); break;
383 case '\r': fputc('\\', fp); fputc('r', fp); break;
384 case '\t': fputc('\\', fp); fputc('t', fp); break;
385 case '\v': fputc('\\', fp); fputc('v', fp); break;
386 case '`': fputc('\\', fp); fputc('`', fp); break;
387 case '\'': fputc('\\', fp); fputc('\'', fp); break;
388 case '\"': fputc('\\', fp); fputc('\"', fp); break;
389 case '#': fputc('\\', fp); fputc('#', fp); break;
390 default:
391 putc(*q, fp);
392 break;
393 }
394 }
395
396 /* --- Done --- */
397
398 if (qq)
399 putc(qq, fp);
400 }
401
402 /*----- Guts --------------------------------------------------------------*/
403
404 static int checkhash(const char *file, unsigned f, const gchash *gch)
405 {
406 int rc;
407 FILE *fp;
408 dstr d = DSTR_INIT;
409 dstr dd = DSTR_INIT;
410 unsigned long n = 0, nfail = 0;
411 octet *buf = xmalloc(2 * gch->hashsz);
412
413 if (!file)
414 fp = stdin;
415 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
416 moan("couldn't open `%s': %s", file, strerror(errno));
417 return (EXIT_FAILURE);
418 }
419
420 while (DRESET(&d), dstr_putline(&d, fp) != EOF) {
421 char *p = d.buf;
422 char *q;
423 unsigned ff = f;
424
425 /* --- Handle a directive --- */
426
427 if (*p == '#') {
428 p++;
429 if ((q = str_getword(&p)) == 0)
430 continue;
431 if (strcmp(q, "hash") == 0) {
432 const gchash *g;
433 if ((q = str_getword(&p)) == 0)
434 continue;
435 if ((g = gethash(q)) == 0)
436 continue;
437 gch = g;
438 xfree(buf);
439 buf = xmalloc(2 * gch->hashsz);
440 } else if (strcmp(q, "escape") == 0)
441 f |= f_escape;
442 continue;
443 }
444
445 /* --- Otherwise it's a hex thing --- */
446
447 q = p;
448 while (*p && *p != ' ')
449 p++;
450 if (!*p)
451 continue;
452 *p++ = 0;
453 if (gethex(q, buf, gch->hashsz, 0) < gch->hashsz)
454 continue;
455 if (*p == '*')
456 ff |= f_binary;
457 else if (*p != ' ')
458 continue;
459 p++;
460
461 if (f & f_escape) {
462 DRESET(&dd);
463 getstring(0, p, &dd, 0);
464 p = dd.buf;
465 }
466
467 if (fhash(p, ff, gch, buf + gch->hashsz)) {
468 moan("couldn't read `%s': %s", p, strerror(errno));
469 rc = EXIT_FAILURE;
470 continue;
471 }
472 if (memcmp(buf, buf + gch->hashsz, gch->hashsz) != 0) {
473 if (ff & f_verbose)
474 fprintf(stderr, "FAIL %s\n", p);
475 else
476 moan("%s check failed for `%s'", gch->name, p);
477 nfail++;
478 rc = EXIT_FAILURE;
479 } else {
480 if (ff & f_verbose)
481 fprintf(stderr, "OK %s\n", p);
482 }
483 n++;
484 }
485
486 dstr_destroy(&d);
487 dstr_destroy(&dd);
488 xfree(buf);
489 if ((f & f_verbose) && nfail)
490 moan("%lu of %lu file(s) failed %s check", nfail, n, gch->name);
491 else if (!n)
492 moan("no files checked");
493 return (0);
494 }
495
496 static int dohash(const char *file, unsigned f, const gchash *gch)
497 {
498 int rc = 0;
499 octet *p = xmalloc(gch->hashsz);
500
501 if (fhash(file, f, gch, p)) {
502 moan("couldn't read `%s': %s", file ? file : "<stdin>", strerror(errno));
503 rc = EXIT_FAILURE;
504 } else {
505 puthex(p, gch->hashsz, stdout);
506 if (file) {
507 fputc(' ', stdout);
508 fputc(f & f_binary ? '*' : ' ', stdout);
509 if (f & f_escape)
510 putstring(stdout, file, 0);
511 else
512 fputs(file, stdout);
513 }
514 fputc('\n', stdout);
515 }
516
517 xfree(p);
518 return (rc);
519 }
520
521 static int dofile(const char *file, unsigned f, const gchash *gch)
522 {
523 return (f & f_check ? checkhash : dohash)(file, f, gch);
524 }
525
526 static int hashfiles(const char *file, unsigned f, const gchash *gch)
527 {
528 FILE *fp;
529 dstr d = DSTR_INIT;
530 int rc = 0;
531 int rrc;
532
533 if (!file)
534 fp = stdin;
535 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
536 moan("couldn't open `%s': %s", file, strerror(errno));
537 return (EXIT_FAILURE);
538 }
539
540 for (;;) {
541 DRESET(&d);
542 if (getstring(fp, 0, &d, f & f_raw))
543 break;
544 if ((rrc = dofile(d.buf, f, gch)) != 0)
545 rc = rrc;
546 }
547
548 return (rc);
549 }
550
551 static int hashsum(const char *file, unsigned f, const gchash *gch)
552 {
553 return (f & f_files ? hashfiles : dofile)(file, f, gch);
554 }
555
556 /*----- Main driver -------------------------------------------------------*/
557
558 static void version(FILE *fp)
559 {
560 pquis(fp, "$, Catacomb version " VERSION "\n");
561 }
562
563 static void usage(FILE *fp)
564 {
565 pquis(fp, "Usage: $ [-f0ebcv] [-a algorithm] [files...]\n");
566 }
567
568 static void help(FILE *fp, const gchash *gch)
569 {
570 version(fp);
571 fputc('\n', fp);
572 usage(fp);
573 pquis(fp, "\n\
574 Generates or checks message digests on files. Options available:\n\
575 \n\
576 -h, --help Display this help message.\n\
577 -V, --version Display program's version number.\n\
578 -u, --usage Display a terse usage message.\n\
579 \n\
580 -a, --algorithm=ALG Use the message digest algorithm ALG.\n\
581 \n\
582 -f, --files Read a list of file names from standard input.\n\
583 -0, --null File names are null terminated, not plain text.\n\
584 \n\
585 -e, --escape Escape funny characters in filenames.\n\
586 -c, --check Check message digests rather than emitting them.\n\
587 -b, --binary When reading files, treat them as binary.\n\
588 -v, --verbose Be verbose when checking digests.\n\
589 \n\
590 For a list of supported message digest algorithms, type `$ --list'.\n\
591 ");
592 if (gch)
593 fprintf(fp, "The default message digest algorithm is %s.\n", gch->name);
594 }
595
596 int main(int argc, char *argv[])
597 {
598 unsigned f = 0;
599 const gchash *gch = 0;
600 int rc;
601
602 /* --- Initialization --- */
603
604 ego(argv[0]);
605 sub_init();
606
607 /* --- Choose a hash function from the name --- */
608
609 {
610 char *q = xstrdup(QUIS);
611 size_t len = strlen(q);
612 if (len > 3 && strcmp(q + len - 3, "sum") == 0) {
613 q[len - 3] = 0;
614 gch = gethash(q);
615 }
616 if (!gch)
617 gch = hashtab[0];
618 xfree(q);
619 }
620
621 /* --- Read options --- */
622
623 for (;;) {
624 static struct option opts[] = {
625 { "help", 0, 0, 'h' },
626 { "verbose", 0, 0, 'V' },
627 { "usage", 0, 0, 'u' },
628
629 { "algorithm", OPTF_ARGREQ, 0, 'a' },
630 { "hash", OPTF_ARGREQ, 0, 'a' },
631 { "list", 0, 0, 'l' },
632
633 { "files", 0, 0, 'f' },
634 { "find", 0, 0, 'f' },
635 { "null", 0, 0, '0' },
636
637 { "escape", 0, 0, 'e' },
638 { "check", 0, 0, 'c' },
639 { "binary", 0, 0, 'b' },
640 { "verbose", 0, 0, 'v' },
641
642 { 0, 0, 0, 0 }
643 };
644 int i = mdwopt(argc, argv, "hVu a:l f0 ecbv", opts, 0, 0, 0);
645 if (i < 0)
646 break;
647
648 switch (i) {
649 case 'h':
650 help(stdout, gch);
651 exit(0);
652 case 'V':
653 version(stdout);
654 exit(0);
655 case 'u':
656 usage(stdout);
657 exit(0);
658 case 'a':
659 if ((gch = gethash(optarg)) == 0)
660 die(EXIT_FAILURE, "unknown hash algorithm `%s'", optarg);
661 f |= f_oddhash;
662 break;
663 case 'l': {
664 unsigned j;
665 for (j = 0; hashtab[j]; j++) {
666 if (j)
667 fputc(' ', stdout);
668 printf("%s", hashtab[j]->name);
669 }
670 fputc('\n', stdout);
671 exit(0);
672 } break;
673 case 'f':
674 f |= f_files;
675 break;
676 case '0':
677 f |= f_raw;
678 break;
679 case 'e':
680 f |= f_escape;
681 break;
682 case 'c':
683 f |= f_check;
684 break;
685 case 'b':
686 f |= f_binary;
687 break;
688 case 'v':
689 f |= f_verbose;
690 break;
691 default:
692 f |= f_bogus;
693 break;
694 }
695 }
696
697 if (f & f_bogus) {
698 usage(stderr);
699 exit(EXIT_FAILURE);
700 }
701 argv += optind;
702 argc -= optind;
703
704 /* --- Generate output --- */
705
706 if (!(f & f_check)) {
707 if (f & f_oddhash)
708 printf("#hash %s\n", gch->name);
709 if (f & f_escape)
710 fputs("#escape\n", stdout);
711 }
712
713 if (argc) {
714 int i;
715 int rrc;
716 rc = 0;
717 for (i = 0; i < argc; i++) {
718 if ((rrc = hashsum(argv[i], f, gch)) != 0)
719 rc = rrc;
720 }
721 } else
722 rc = hashsum(0, f, gch);
723
724 return (rc);
725 }
726
727 /*----- That's all, folks -------------------------------------------------*/