(checkhash): Be pettier about spaces between the hash and filename, for
[u/mdw/catacomb] / hashsum.c
1 /* -*-c-*-
2 *
3 * $Id: hashsum.c,v 1.3 2000/07/29 17:02:43 mdw Exp $
4 *
5 * Hash files using some secure hash function
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Revision history --------------------------------------------------*
31 *
32 * $Log: hashsum.c,v $
33 * Revision 1.3 2000/07/29 17:02:43 mdw
34 * (checkhash): Be pettier about spaces between the hash and filename, for
35 * compatiblity with `md5sum'.
36 *
37 * Revision 1.2 2000/07/15 21:14:05 mdw
38 * Missed `-e' out of the usage string.
39 *
40 * Revision 1.1 2000/07/15 20:52:34 mdw
41 * Useful replacement for `md5sum' with support for many different hash
42 * functions and for reading filename lists from `find'.
43 *
44 */
45
46 /*----- Header files ------------------------------------------------------*/
47
48 #include "config.h"
49
50 #include <ctype.h>
51 #include <errno.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55
56 #include <mLib/alloc.h>
57 #include <mLib/dstr.h>
58 #include <mLib/mdwopt.h>
59 #include <mLib/quis.h>
60 #include <mLib/report.h>
61 #include <mLib/sub.h>
62 #include <mLib/str.h>
63
64 #include "ghash.h"
65
66 #include "md4.h"
67 #include "md5.h"
68 #include "rmd128.h"
69 #include "rmd160.h"
70 #include "rmd256.h"
71 #include "rmd320.h"
72 #include "sha.h"
73 #include "tiger.h"
74
75 /*----- Static variables --------------------------------------------------*/
76
77 static const gchash *hashtab[] = {
78 &md5, &md4, &sha, &rmd128, &rmd160, &rmd256, &rmd320, &tiger,
79 0
80 };
81
82 enum {
83 f_binary = 1,
84 f_bogus = 2,
85 f_verbose = 4,
86 f_check = 8,
87 f_files = 16,
88 f_raw = 32,
89 f_oddhash = 64,
90 f_escape = 128
91 };
92
93 /*----- Support functions -------------------------------------------------*/
94
95 /* --- @fhash@ --- *
96 *
97 * Arguments: @const char *file@ = file name to be hashed (null for stdin)
98 * @unsigned f@ = flags to set
99 * @const gchash *gch@ = pointer to hash function to use
100 * @void *buf@ = pointer to hash output buffer
101 *
102 * Returns: Zero if it worked, nonzero on error.
103 *
104 * Use: Hashes a file.
105 */
106
107 static int fhash(const char *file, unsigned f, const gchash *gch, void *buf)
108 {
109 FILE *fp;
110 char fbuf[BUFSIZ];
111 size_t sz;
112 ghash *h;
113 int e;
114
115 if (!file)
116 fp = stdin;
117 else if ((fp = fopen(file, f & f_binary ? "rb" : "r")) == 0)
118 return (-1);
119
120 h = gch->init();
121 while ((sz = fread(fbuf, 1, sizeof(fbuf), fp)) > 0)
122 h->ops->hash(h, fbuf, sz);
123 h->ops->done(h, buf);
124 h->ops->destroy(h);
125 e = ferror(fp);
126 if (file)
127 fclose(fp);
128 return (e ? -1 : 0);
129 }
130
131 /* --- @puthex@ --- *
132 *
133 * Arguments: @const octet *buf@ = pointer to a binary buffer
134 * @size_t sz@ = size of the buffer
135 * @FILE *fp@ = pointer to output file handle
136 *
137 * Returns: ---
138 *
139 * Use: Writes a hex dump of a block of memory.
140 */
141
142 static void puthex(const octet *buf, size_t sz, FILE *fp)
143 {
144 while (sz) {
145 fprintf(fp, "%02x", *buf++);
146 sz--;
147 }
148 }
149
150 /* --- @gethex@ --- *
151 *
152 * Arguments: @const char *p@ = pointer to input string
153 * @octet *q@ = pointer to output buffer
154 * @size_t sz@ = size of the output buffer
155 * @char **pp@ = where to put the end pointer
156 *
157 * Returns: The number of bytes written to the buffer.
158 *
159 * Use: Reads hex dumps from the input string.
160 */
161
162 static size_t gethex(const char *p, octet *q, size_t sz, char **pp)
163 {
164 size_t i = 0;
165 while (sz > 0 &&
166 isxdigit((unsigned char)p[0]) &&
167 isxdigit((unsigned char)p[1])) {
168 char buf[3];
169 buf[0] = p[0];
170 buf[1] = p[1];
171 buf[2] = 0;
172 *q++ = strtoul(buf, 0, 16);
173 sz--;
174 p += 2;
175 i++;
176 }
177 if (pp)
178 *pp = (char *)p;
179 return (i);
180 }
181
182 /* --- @gethash@ --- *
183 *
184 * Arguments: @const char *name@ = pointer to name string
185 *
186 * Returns: Pointer to appropriate hash class.
187 *
188 * Use: Chooses a hash function by name.
189 */
190
191 static const gchash *gethash(const char *name)
192 {
193 const gchash **g, *gg = 0;
194 size_t sz = strlen(name);
195 for (g = hashtab; *g; g++) {
196 if (strncmp(name, (*g)->name, sz) == 0) {
197 if ((*g)->name[sz] == 0) {
198 gg = *g;
199 break;
200 } else if (gg)
201 return (0);
202 else
203 gg = *g;
204 }
205 }
206 return (gg);
207 }
208
209 /* --- @getstring@ --- *
210 *
211 * Arguments: @FILE *fp@ = stream from which to read
212 * @const char *p@ = string to read from instead
213 * @dstr *d@ = destination string
214 * @unsigned raw@ = raw or cooked read
215 *
216 * Returns: Zero if OK, nonzero on end-of-file.
217 *
218 * Use: Reads a filename (or something similar) from a stream.
219 */
220
221 static int getstring(FILE *fp, const char *p, dstr *d, unsigned raw)
222 {
223 int ch;
224 int q = 0;
225
226 /* --- Raw: just read exactly what's written up to a null byte --- */
227
228 #define NEXTCH (fp ? getc(fp) : *p++)
229 #define EOFCH (fp ? EOF : 0)
230
231 if (raw) {
232 if ((ch = NEXTCH) == EOFCH)
233 return (EOF);
234 for (;;) {
235 if (!ch)
236 break;
237 DPUTC(d, ch);
238 if ((ch = NEXTCH) == EOFCH)
239 break;
240 }
241 DPUTZ(d);
242 return (0);
243 }
244
245 /* --- Skip as far as whitespace --- *
246 *
247 * Also skip past comments.
248 */
249
250 again:
251 ch = NEXTCH;
252 while (isspace((unsigned char)ch))
253 ch = NEXTCH;
254 if (ch == '#') {
255 do ch = NEXTCH; while (ch != '\n' && ch != EOFCH);
256 goto again;
257 }
258 if (ch == EOFCH)
259 return (EOF);
260
261 /* --- If the character is a quote then read a quoted string --- */
262
263 switch (ch) {
264 case '`':
265 ch = '\'';
266 case '\'':
267 case '\"':
268 q = ch;
269 ch = NEXTCH;
270 break;
271 }
272
273 /* --- Now read all sorts of interesting things --- */
274
275 for (;;) {
276
277 /* --- Handle an escaped thing --- */
278
279 if (ch == '\\') {
280 ch = NEXTCH;
281 if (ch == EOFCH)
282 break;
283 switch (ch) {
284 case 'a': ch = '\a'; break;
285 case 'b': ch = '\b'; break;
286 case 'f': ch = '\f'; break;
287 case 'n': ch = '\n'; break;
288 case 'r': ch = '\r'; break;
289 case 't': ch = '\t'; break;
290 case 'v': ch = '\v'; break;
291 }
292 DPUTC(d, ch);
293 ch = NEXTCH;
294 continue;
295 }
296
297 /* --- If it's a quote or some other end marker then stop --- */
298
299 if (ch == q)
300 break;
301 if (!q && isspace((unsigned char)ch))
302 break;
303
304 /* --- Otherwise contribute and continue --- */
305
306 DPUTC(d, ch);
307 if ((ch = NEXTCH) == EOFCH)
308 break;
309 }
310
311 /* --- Done --- */
312
313 DPUTZ(d);
314 return (0);
315
316 #undef NEXTCH
317 #undef EOFCH
318 }
319
320 /* --- @putstring@ --- *
321 *
322 * Arguments: @FILE *fp@ = stream to write on
323 * @const char *p@ = pointer to text
324 * @unsigned raw@ = whether the string is to be written raw
325 *
326 * Returns: ---
327 *
328 * Use: Emits a string to a stream.
329 */
330
331 static void putstring(FILE *fp, const char *p, unsigned raw)
332 {
333 size_t sz = strlen(p);
334 unsigned qq;
335 const char *q;
336
337 /* --- Just write the string null terminated if raw --- */
338
339 if (raw) {
340 fwrite(p, 1, sz + 1, fp);
341 return;
342 }
343
344 /* --- Check for any dodgy characters --- */
345
346 qq = 0;
347 for (q = p; *q; q++) {
348 if (isspace((unsigned char)*q)) {
349 qq = '\"';
350 break;
351 }
352 }
353
354 if (qq)
355 putc(qq, fp);
356
357 /* --- Emit the string --- */
358
359 for (q = p; *q; q++) {
360 switch (*q) {
361 case '\a': fputc('\\', fp); fputc('a', fp); break;
362 case '\b': fputc('\\', fp); fputc('b', fp); break;
363 case '\f': fputc('\\', fp); fputc('f', fp); break;
364 case '\n': fputc('\\', fp); fputc('n', fp); break;
365 case '\r': fputc('\\', fp); fputc('r', fp); break;
366 case '\t': fputc('\\', fp); fputc('t', fp); break;
367 case '\v': fputc('\\', fp); fputc('v', fp); break;
368 case '`': fputc('\\', fp); fputc('`', fp); break;
369 case '\'': fputc('\\', fp); fputc('\'', fp); break;
370 case '\"': fputc('\\', fp); fputc('\"', fp); break;
371 case '#': fputc('\\', fp); fputc('#', fp); break;
372 default:
373 putc(*q, fp);
374 break;
375 }
376 }
377
378 /* --- Done --- */
379
380 if (qq)
381 putc(qq, fp);
382 }
383
384 /*----- Guts --------------------------------------------------------------*/
385
386 static int checkhash(const char *file, unsigned f, const gchash *gch)
387 {
388 int rc;
389 FILE *fp;
390 dstr d = DSTR_INIT;
391 dstr dd = DSTR_INIT;
392 unsigned long n = 0, nfail = 0;
393 octet *buf = xmalloc(2 * gch->hashsz);
394
395 if (!file)
396 fp = stdin;
397 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
398 moan("couldn't open `%s': %s", file, strerror(errno));
399 return (EXIT_FAILURE);
400 }
401
402 while (DRESET(&d), dstr_putline(&d, fp) != EOF) {
403 char *p = d.buf;
404 char *q;
405 unsigned ff = f;
406
407 /* --- Handle a directive --- */
408
409 if (*p == '#') {
410 p++;
411 if ((q = str_getword(&p)) == 0)
412 continue;
413 if (strcmp(q, "hash") == 0) {
414 const gchash *g;
415 if ((q = str_getword(&p)) == 0)
416 continue;
417 if ((g = gethash(q)) == 0)
418 continue;
419 gch = g;
420 xfree(buf);
421 buf = xmalloc(2 * gch->hashsz);
422 } else if (strcmp(q, "escape") == 0)
423 f |= f_escape;
424 continue;
425 }
426
427 /* --- Otherwise it's a hex thing --- */
428
429 q = p;
430 while (*p && *p != ' ')
431 p++;
432 if (!*p)
433 continue;
434 *p++ = 0;
435 if (gethex(q, buf, gch->hashsz, 0) < gch->hashsz)
436 continue;
437 if (*p == '*')
438 ff |= f_binary;
439 else if (*p != ' ')
440 continue;
441 p++;
442
443 if (f & f_escape) {
444 DRESET(&dd);
445 getstring(0, p, &dd, 0);
446 p = dd.buf;
447 }
448
449 if (fhash(p, ff, gch, buf + gch->hashsz)) {
450 moan("couldn't read `%s': %s", p, strerror(errno));
451 rc = EXIT_FAILURE;
452 continue;
453 }
454 if (memcmp(buf, buf + gch->hashsz, gch->hashsz) != 0) {
455 if (ff & f_verbose)
456 fprintf(stderr, "FAIL %s\n", p);
457 else
458 moan("%s check failed for `%s'", gch->name, p);
459 nfail++;
460 rc = EXIT_FAILURE;
461 } else {
462 if (ff & f_verbose)
463 fprintf(stderr, "OK %s\n", p);
464 }
465 n++;
466 }
467
468 dstr_destroy(&d);
469 dstr_destroy(&dd);
470 xfree(buf);
471 if ((f & f_verbose) && nfail)
472 moan("%lu of %lu file(s) failed %s check", nfail, n, gch->name);
473 else if (!n)
474 moan("no files checked");
475 return (0);
476 }
477
478 static int dohash(const char *file, unsigned f, const gchash *gch)
479 {
480 int rc = 0;
481 octet *p = xmalloc(gch->hashsz);
482
483 if (fhash(file, f, gch, p)) {
484 moan("couldn't read `%s': %s", file ? file : "<stdin>", strerror(errno));
485 rc = EXIT_FAILURE;
486 } else {
487 puthex(p, gch->hashsz, stdout);
488 if (file) {
489 fputc(' ', stdout);
490 fputc(f & f_binary ? '*' : ' ', stdout);
491 if (f & f_escape)
492 putstring(stdout, file, 0);
493 else
494 fputs(file, stdout);
495 }
496 fputc('\n', stdout);
497 }
498
499 xfree(p);
500 return (rc);
501 }
502
503 static int dofile(const char *file, unsigned f, const gchash *gch)
504 {
505 return (f & f_check ? checkhash : dohash)(file, f, gch);
506 }
507
508 static int hashfiles(const char *file, unsigned f, const gchash *gch)
509 {
510 FILE *fp;
511 dstr d = DSTR_INIT;
512 int rc = 0;
513 int rrc;
514
515 if (!file)
516 fp = stdin;
517 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
518 moan("couldn't open `%s': %s", file, strerror(errno));
519 return (EXIT_FAILURE);
520 }
521
522 for (;;) {
523 DRESET(&d);
524 if (getstring(fp, 0, &d, f & f_raw))
525 break;
526 if ((rrc = dofile(d.buf, f, gch)) != 0)
527 rc = rrc;
528 }
529
530 return (rc);
531 }
532
533 static int hashsum(const char *file, unsigned f, const gchash *gch)
534 {
535 return (f & f_files ? hashfiles : dofile)(file, f, gch);
536 }
537
538 /*----- Main driver -------------------------------------------------------*/
539
540 static void version(FILE *fp)
541 {
542 pquis(fp, "$, Catacomb version " VERSION "\n");
543 }
544
545 static void usage(FILE *fp)
546 {
547 pquis(fp, "Usage: $ [-f0ebcv] [-a algorithm] [files...]\n");
548 }
549
550 static void help(FILE *fp, const gchash *gch)
551 {
552 version(fp);
553 fputc('\n', fp);
554 usage(fp);
555 pquis(fp, "\n\
556 Generates or checks message digests on files. Options available:\n\
557 \n\
558 -h, --help Display this help message.\n\
559 -V, --version Display program's version number.\n\
560 -u, --usage Display a terse usage message.\n\
561 \n\
562 -a, --algorithm=ALG Use the message digest algorithm ALG.\n\
563 \n\
564 -f, --files Read a list of file names from standard input.\n\
565 -0, --null File names are null terminated, not plain text.\n\
566 \n\
567 -e, --escape Escape funny characters in filenames.\n\
568 -c, --check Check message digests rather than emitting them.\n\
569 -b, --binary When reading files, treat them as binary.\n\
570 -v, --verbose Be verbose when checking digests.\n\
571 \n\
572 For a list of supported message digest algorithms, type `$ --list'.\n\
573 ");
574 if (gch)
575 fprintf(fp, "The default message digest algorithm is %s.\n", gch->name);
576 }
577
578 int main(int argc, char *argv[])
579 {
580 unsigned f = 0;
581 const gchash *gch = 0;
582 int rc;
583
584 /* --- Initialization --- */
585
586 ego(argv[0]);
587 sub_init();
588
589 /* --- Choose a hash function from the name --- */
590
591 {
592 char *q = xstrdup(QUIS);
593 size_t len = strlen(q);
594 if (len > 3 && strcmp(q + len - 3, "sum") == 0) {
595 q[len - 3] = 0;
596 gch = gethash(q);
597 }
598 if (!gch)
599 gch = hashtab[0];
600 xfree(q);
601 }
602
603 /* --- Read options --- */
604
605 for (;;) {
606 static struct option opts[] = {
607 { "help", 0, 0, 'h' },
608 { "verbose", 0, 0, 'V' },
609 { "usage", 0, 0, 'u' },
610
611 { "algorithm", OPTF_ARGREQ, 0, 'a' },
612 { "hash", OPTF_ARGREQ, 0, 'a' },
613 { "list", 0, 0, 'l' },
614
615 { "files", 0, 0, 'f' },
616 { "find", 0, 0, 'f' },
617 { "null", 0, 0, '0' },
618
619 { "escape", 0, 0, 'e' },
620 { "check", 0, 0, 'c' },
621 { "binary", 0, 0, 'b' },
622 { "verbose", 0, 0, 'v' },
623
624 { 0, 0, 0, 0 }
625 };
626 int i = mdwopt(argc, argv, "hVu a:l f0 ecbv", opts, 0, 0, 0);
627 if (i < 0)
628 break;
629
630 switch (i) {
631 case 'h':
632 help(stdout, gch);
633 exit(0);
634 case 'V':
635 version(stdout);
636 exit(0);
637 case 'u':
638 usage(stdout);
639 exit(0);
640 case 'a':
641 if ((gch = gethash(optarg)) == 0)
642 die(EXIT_FAILURE, "unknown hash algorithm `%s'", optarg);
643 f |= f_oddhash;
644 break;
645 case 'l': {
646 unsigned j;
647 for (j = 0; hashtab[j]; j++) {
648 if (j)
649 fputc(' ', stdout);
650 printf("%s", hashtab[j]->name);
651 }
652 fputc('\n', stdout);
653 exit(0);
654 } break;
655 case 'f':
656 f |= f_files;
657 break;
658 case '0':
659 f |= f_raw;
660 break;
661 case 'e':
662 f |= f_escape;
663 break;
664 case 'c':
665 f |= f_check;
666 break;
667 case 'b':
668 f |= f_binary;
669 break;
670 case 'v':
671 f |= f_verbose;
672 break;
673 default:
674 f |= f_bogus;
675 break;
676 }
677 }
678
679 if (f & f_bogus) {
680 usage(stderr);
681 exit(EXIT_FAILURE);
682 }
683 argv += optind;
684 argc -= optind;
685
686 /* --- Generate output --- */
687
688 if (!(f & f_check)) {
689 if (f & f_oddhash)
690 printf("#hash %s\n", gch->name);
691 if (f & f_escape)
692 fputs("#escape\n", stdout);
693 }
694
695 if (argc) {
696 int i;
697 int rrc;
698 rc = 0;
699 for (i = 0; i < argc; i++) {
700 if ((rrc = hashsum(argv[i], f, gch)) != 0)
701 rc = rrc;
702 }
703 } else
704 rc = hashsum(0, f, gch);
705
706 return (rc);
707 }
708
709 /*----- That's all, folks -------------------------------------------------*/