Missed `-e' out of the usage string.
[u/mdw/catacomb] / hashsum.c
1 /* -*-c-*-
2 *
3 * $Id: hashsum.c,v 1.2 2000/07/15 21:14:05 mdw Exp $
4 *
5 * Hash files using some secure hash function
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Revision history --------------------------------------------------*
31 *
32 * $Log: hashsum.c,v $
33 * Revision 1.2 2000/07/15 21:14:05 mdw
34 * Missed `-e' out of the usage string.
35 *
36 * Revision 1.1 2000/07/15 20:52:34 mdw
37 * Useful replacement for `md5sum' with support for many different hash
38 * functions and for reading filename lists from `find'.
39 *
40 */
41
42 /*----- Header files ------------------------------------------------------*/
43
44 #include "config.h"
45
46 #include <ctype.h>
47 #include <errno.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <string.h>
51
52 #include <mLib/alloc.h>
53 #include <mLib/dstr.h>
54 #include <mLib/mdwopt.h>
55 #include <mLib/quis.h>
56 #include <mLib/report.h>
57 #include <mLib/sub.h>
58 #include <mLib/str.h>
59
60 #include "ghash.h"
61
62 #include "md4.h"
63 #include "md5.h"
64 #include "rmd128.h"
65 #include "rmd160.h"
66 #include "rmd256.h"
67 #include "rmd320.h"
68 #include "sha.h"
69 #include "tiger.h"
70
71 /*----- Static variables --------------------------------------------------*/
72
73 static const gchash *hashtab[] = {
74 &md5, &md4, &sha, &rmd128, &rmd160, &rmd256, &rmd320, &tiger,
75 0
76 };
77
78 enum {
79 f_binary = 1,
80 f_bogus = 2,
81 f_verbose = 4,
82 f_check = 8,
83 f_files = 16,
84 f_raw = 32,
85 f_oddhash = 64,
86 f_escape = 128
87 };
88
89 /*----- Support functions -------------------------------------------------*/
90
91 /* --- @fhash@ --- *
92 *
93 * Arguments: @const char *file@ = file name to be hashed (null for stdin)
94 * @unsigned f@ = flags to set
95 * @const gchash *gch@ = pointer to hash function to use
96 * @void *buf@ = pointer to hash output buffer
97 *
98 * Returns: Zero if it worked, nonzero on error.
99 *
100 * Use: Hashes a file.
101 */
102
103 static int fhash(const char *file, unsigned f, const gchash *gch, void *buf)
104 {
105 FILE *fp;
106 char fbuf[BUFSIZ];
107 size_t sz;
108 ghash *h;
109 int e;
110
111 if (!file)
112 fp = stdin;
113 else if ((fp = fopen(file, f & f_binary ? "rb" : "r")) == 0)
114 return (-1);
115
116 h = gch->init();
117 while ((sz = fread(fbuf, 1, sizeof(fbuf), fp)) > 0)
118 h->ops->hash(h, fbuf, sz);
119 h->ops->done(h, buf);
120 h->ops->destroy(h);
121 e = ferror(fp);
122 if (file)
123 fclose(fp);
124 return (e ? -1 : 0);
125 }
126
127 /* --- @puthex@ --- *
128 *
129 * Arguments: @const octet *buf@ = pointer to a binary buffer
130 * @size_t sz@ = size of the buffer
131 * @FILE *fp@ = pointer to output file handle
132 *
133 * Returns: ---
134 *
135 * Use: Writes a hex dump of a block of memory.
136 */
137
138 static void puthex(const octet *buf, size_t sz, FILE *fp)
139 {
140 while (sz) {
141 fprintf(fp, "%02x", *buf++);
142 sz--;
143 }
144 }
145
146 /* --- @gethex@ --- *
147 *
148 * Arguments: @const char *p@ = pointer to input string
149 * @octet *q@ = pointer to output buffer
150 * @size_t sz@ = size of the output buffer
151 * @char **pp@ = where to put the end pointer
152 *
153 * Returns: The number of bytes written to the buffer.
154 *
155 * Use: Reads hex dumps from the input string.
156 */
157
158 static size_t gethex(const char *p, octet *q, size_t sz, char **pp)
159 {
160 size_t i = 0;
161 while (sz > 0 &&
162 isxdigit((unsigned char)p[0]) &&
163 isxdigit((unsigned char)p[1])) {
164 char buf[3];
165 buf[0] = p[0];
166 buf[1] = p[1];
167 buf[2] = 0;
168 *q++ = strtoul(buf, 0, 16);
169 sz--;
170 p += 2;
171 i++;
172 }
173 if (pp)
174 *pp = (char *)p;
175 return (i);
176 }
177
178 /* --- @gethash@ --- *
179 *
180 * Arguments: @const char *name@ = pointer to name string
181 *
182 * Returns: Pointer to appropriate hash class.
183 *
184 * Use: Chooses a hash function by name.
185 */
186
187 static const gchash *gethash(const char *name)
188 {
189 const gchash **g, *gg = 0;
190 size_t sz = strlen(name);
191 for (g = hashtab; *g; g++) {
192 if (strncmp(name, (*g)->name, sz) == 0) {
193 if ((*g)->name[sz] == 0) {
194 gg = *g;
195 break;
196 } else if (gg)
197 return (0);
198 else
199 gg = *g;
200 }
201 }
202 return (gg);
203 }
204
205 /* --- @getstring@ --- *
206 *
207 * Arguments: @FILE *fp@ = stream from which to read
208 * @const char *p@ = string to read from instead
209 * @dstr *d@ = destination string
210 * @unsigned raw@ = raw or cooked read
211 *
212 * Returns: Zero if OK, nonzero on end-of-file.
213 *
214 * Use: Reads a filename (or something similar) from a stream.
215 */
216
217 static int getstring(FILE *fp, const char *p, dstr *d, unsigned raw)
218 {
219 int ch;
220 int q = 0;
221
222 /* --- Raw: just read exactly what's written up to a null byte --- */
223
224 #define NEXTCH (fp ? getc(fp) : *p++)
225 #define EOFCH (fp ? EOF : 0)
226
227 if (raw) {
228 if ((ch = NEXTCH) == EOFCH)
229 return (EOF);
230 for (;;) {
231 if (!ch)
232 break;
233 DPUTC(d, ch);
234 if ((ch = NEXTCH) == EOFCH)
235 break;
236 }
237 DPUTZ(d);
238 return (0);
239 }
240
241 /* --- Skip as far as whitespace --- *
242 *
243 * Also skip past comments.
244 */
245
246 again:
247 ch = NEXTCH;
248 while (isspace((unsigned char)ch))
249 ch = NEXTCH;
250 if (ch == '#') {
251 do ch = NEXTCH; while (ch != '\n' && ch != EOFCH);
252 goto again;
253 }
254 if (ch == EOFCH)
255 return (EOF);
256
257 /* --- If the character is a quote then read a quoted string --- */
258
259 switch (ch) {
260 case '`':
261 ch = '\'';
262 case '\'':
263 case '\"':
264 q = ch;
265 ch = NEXTCH;
266 break;
267 }
268
269 /* --- Now read all sorts of interesting things --- */
270
271 for (;;) {
272
273 /* --- Handle an escaped thing --- */
274
275 if (ch == '\\') {
276 ch = NEXTCH;
277 if (ch == EOFCH)
278 break;
279 switch (ch) {
280 case 'a': ch = '\a'; break;
281 case 'b': ch = '\b'; break;
282 case 'f': ch = '\f'; break;
283 case 'n': ch = '\n'; break;
284 case 'r': ch = '\r'; break;
285 case 't': ch = '\t'; break;
286 case 'v': ch = '\v'; break;
287 }
288 DPUTC(d, ch);
289 ch = NEXTCH;
290 continue;
291 }
292
293 /* --- If it's a quote or some other end marker then stop --- */
294
295 if (ch == q)
296 break;
297 if (!q && isspace((unsigned char)ch))
298 break;
299
300 /* --- Otherwise contribute and continue --- */
301
302 DPUTC(d, ch);
303 if ((ch = NEXTCH) == EOFCH)
304 break;
305 }
306
307 /* --- Done --- */
308
309 DPUTZ(d);
310 return (0);
311
312 #undef NEXTCH
313 #undef EOFCH
314 }
315
316 /* --- @putstring@ --- *
317 *
318 * Arguments: @FILE *fp@ = stream to write on
319 * @const char *p@ = pointer to text
320 * @unsigned raw@ = whether the string is to be written raw
321 *
322 * Returns: ---
323 *
324 * Use: Emits a string to a stream.
325 */
326
327 static void putstring(FILE *fp, const char *p, unsigned raw)
328 {
329 size_t sz = strlen(p);
330 unsigned qq;
331 const char *q;
332
333 /* --- Just write the string null terminated if raw --- */
334
335 if (raw) {
336 fwrite(p, 1, sz + 1, fp);
337 return;
338 }
339
340 /* --- Check for any dodgy characters --- */
341
342 qq = 0;
343 for (q = p; *q; q++) {
344 if (isspace((unsigned char)*q)) {
345 qq = '\"';
346 break;
347 }
348 }
349
350 if (qq)
351 putc(qq, fp);
352
353 /* --- Emit the string --- */
354
355 for (q = p; *q; q++) {
356 switch (*q) {
357 case '\a': fputc('\\', fp); fputc('a', fp); break;
358 case '\b': fputc('\\', fp); fputc('b', fp); break;
359 case '\f': fputc('\\', fp); fputc('f', fp); break;
360 case '\n': fputc('\\', fp); fputc('n', fp); break;
361 case '\r': fputc('\\', fp); fputc('r', fp); break;
362 case '\t': fputc('\\', fp); fputc('t', fp); break;
363 case '\v': fputc('\\', fp); fputc('v', fp); break;
364 case '`': fputc('\\', fp); fputc('`', fp); break;
365 case '\'': fputc('\\', fp); fputc('\'', fp); break;
366 case '\"': fputc('\\', fp); fputc('\"', fp); break;
367 case '#': fputc('\\', fp); fputc('#', fp); break;
368 default:
369 putc(*q, fp);
370 break;
371 }
372 }
373
374 /* --- Done --- */
375
376 if (qq)
377 putc(qq, fp);
378 }
379
380 /*----- Guts --------------------------------------------------------------*/
381
382 static int checkhash(const char *file, unsigned f, const gchash *gch)
383 {
384 int rc;
385 FILE *fp;
386 dstr d = DSTR_INIT;
387 dstr dd = DSTR_INIT;
388 unsigned long n = 0, nfail = 0;
389 octet *buf = xmalloc(2 * gch->hashsz);
390
391 if (!file)
392 fp = stdin;
393 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
394 moan("couldn't open `%s': %s", file, strerror(errno));
395 return (EXIT_FAILURE);
396 }
397
398 while (DRESET(&d), dstr_putline(&d, fp) != EOF) {
399 char *p = d.buf;
400 char *q;
401 unsigned ff = f;
402
403 /* --- Handle a directive --- */
404
405 if (*p == '#') {
406 p++;
407 if ((q = str_getword(&p)) == 0)
408 continue;
409 if (strcmp(q, "hash") == 0) {
410 const gchash *g;
411 if ((q = str_getword(&p)) == 0)
412 continue;
413 if ((g = gethash(q)) == 0)
414 continue;
415 gch = g;
416 xfree(buf);
417 buf = xmalloc(2 * gch->hashsz);
418 } else if (strcmp(q, "escape") == 0)
419 f |= f_escape;
420 continue;
421 }
422
423 /* --- Otherwise it's a hex thing --- */
424
425 if ((q = str_getword(&p)) == 0)
426 continue;
427 if (gethex(q, buf, gch->hashsz, 0) < gch->hashsz)
428 continue;
429 while (isspace((unsigned char)*p))
430 p++;
431 if (*p == '*') {
432 p++;
433 ff |= f_binary;
434 }
435 if (!*p)
436 continue;
437
438 if (f & f_escape) {
439 DRESET(&dd);
440 getstring(0, p, &dd, 0);
441 p = dd.buf;
442 }
443
444 if (fhash(p, ff, gch, buf + gch->hashsz)) {
445 moan("couldn't read `%s': %s", p, strerror(errno));
446 rc = EXIT_FAILURE;
447 continue;
448 }
449 if (memcmp(buf, buf + gch->hashsz, gch->hashsz) != 0) {
450 if (ff & f_verbose)
451 fprintf(stderr, "FAIL %s\n", p);
452 else
453 moan("%s check failed for `%s'", gch->name, p);
454 nfail++;
455 rc = EXIT_FAILURE;
456 } else {
457 if (ff & f_verbose)
458 fprintf(stderr, "OK %s\n", p);
459 }
460 n++;
461 }
462
463 dstr_destroy(&d);
464 dstr_destroy(&dd);
465 xfree(buf);
466 if ((f & f_verbose) && nfail)
467 moan("%lu of %lu file(s) failed %s check", nfail, n, gch->name);
468 else if (!n)
469 moan("no files checked");
470 return (0);
471 }
472
473 static int dohash(const char *file, unsigned f, const gchash *gch)
474 {
475 int rc = 0;
476 octet *p = xmalloc(gch->hashsz);
477
478 if (fhash(file, f, gch, p)) {
479 moan("couldn't read `%s': %s", file ? file : "<stdin>", strerror(errno));
480 rc = EXIT_FAILURE;
481 } else {
482 puthex(p, gch->hashsz, stdout);
483 if (file) {
484 fputc(' ', stdout);
485 fputc(f & f_binary ? '*' : ' ', stdout);
486 if (f & f_escape)
487 putstring(stdout, file, 0);
488 else
489 fputs(file, stdout);
490 }
491 fputc('\n', stdout);
492 }
493
494 xfree(p);
495 return (rc);
496 }
497
498 static int hashfiles(const char *file, unsigned f, const gchash *gch)
499 {
500 FILE *fp;
501 dstr d = DSTR_INIT;
502 int rc = 0;
503 int rrc;
504
505 if (!file)
506 fp = stdin;
507 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
508 moan("couldn't open `%s': %s", file, strerror(errno));
509 return (EXIT_FAILURE);
510 }
511
512 for (;;) {
513 DRESET(&d);
514 if (getstring(fp, 0, &d, f & f_raw))
515 break;
516 if ((rrc = dohash(d.buf, f, gch)) != 0)
517 rc = rrc;
518 }
519
520 return (rc);
521 }
522
523 static int hashsum(const char *file, unsigned f, const gchash *gch)
524 {
525 if (f & f_check)
526 return (checkhash(file, f, gch));
527 if (f & f_files)
528 return (hashfiles(file, f, gch));
529 return (dohash(file, f, gch));
530 }
531
532 /*----- Main driver -------------------------------------------------------*/
533
534 static void version(FILE *fp)
535 {
536 pquis(fp, "$, Catacomb version " VERSION "\n");
537 }
538
539 static void usage(FILE *fp)
540 {
541 pquis(fp, "Usage: $ [-f0ebcv] [-a algorithm] [files...]\n");
542 }
543
544 static void help(FILE *fp, const gchash *gch)
545 {
546 version(fp);
547 fputc('\n', fp);
548 usage(fp);
549 pquis(fp, "\n\
550 Generates or checks message digests on files. Options available:\n\
551 \n\
552 -h, --help Display this help message.\n\
553 -V, --version Display program's version number.\n\
554 -u, --usage Display a terse usage message.\n\
555 \n\
556 -a, --algorithm=ALG Use the message digest algorithm ALG.\n\
557 \n\
558 -f, --files Read a list of file names from standard input.\n\
559 -0, --null File names are null terminated, not plain text.\n\
560 \n\
561 -e, --escape Escape funny characters in filenames.\n\
562 -c, --check Check message digests rather than emitting them.\n\
563 -b, --binary When reading files, treat them as binary.\n\
564 -v, --verbose Be verbose when checking digests.\n\
565 \n\
566 For a list of supported message digest algorithms, type `$ --list'.\n\
567 ");
568 if (gch)
569 fprintf(fp, "The default message digest algorithm is %s.\n", gch->name);
570 }
571
572 int main(int argc, char *argv[])
573 {
574 unsigned f = 0;
575 const gchash *gch = 0;
576 int rc;
577
578 /* --- Initialization --- */
579
580 ego(argv[0]);
581 sub_init();
582
583 /* --- Choose a hash function from the name --- */
584
585 {
586 char *q = xstrdup(QUIS);
587 size_t len = strlen(q);
588 if (len > 3 && strcmp(q + len - 3, "sum") == 0) {
589 q[len - 3] = 0;
590 gch = gethash(q);
591 }
592 if (!gch)
593 gch = hashtab[0];
594 xfree(q);
595 }
596
597 /* --- Read options --- */
598
599 for (;;) {
600 static struct option opts[] = {
601 { "help", 0, 0, 'h' },
602 { "verbose", 0, 0, 'V' },
603 { "usage", 0, 0, 'u' },
604
605 { "algorithm", OPTF_ARGREQ, 0, 'a' },
606 { "hash", OPTF_ARGREQ, 0, 'a' },
607 { "list", 0, 0, 'l' },
608
609 { "files", 0, 0, 'f' },
610 { "find", 0, 0, 'f' },
611 { "null", 0, 0, '0' },
612
613 { "escape", 0, 0, 'e' },
614 { "check", 0, 0, 'c' },
615 { "binary", 0, 0, 'b' },
616 { "verbose", 0, 0, 'v' },
617
618 { 0, 0, 0, 0 }
619 };
620 int i = mdwopt(argc, argv, "hVu a:l f0 ecbv", opts, 0, 0, 0);
621 if (i < 0)
622 break;
623
624 switch (i) {
625 case 'h':
626 help(stdout, gch);
627 exit(0);
628 case 'V':
629 version(stdout);
630 exit(0);
631 case 'u':
632 usage(stdout);
633 exit(0);
634 case 'a':
635 if ((gch = gethash(optarg)) == 0)
636 die(EXIT_FAILURE, "unknown hash algorithm `%s'", optarg);
637 f |= f_oddhash;
638 break;
639 case 'l': {
640 unsigned j;
641 for (j = 0; hashtab[j]; j++) {
642 if (j)
643 fputc(' ', stdout);
644 printf("%s", hashtab[j]->name);
645 }
646 fputc('\n', stdout);
647 exit(0);
648 } break;
649 case 'f':
650 f |= f_files;
651 break;
652 case '0':
653 f |= f_raw;
654 break;
655 case 'e':
656 f |= f_escape;
657 break;
658 case 'c':
659 f |= f_check;
660 break;
661 case 'b':
662 f |= f_binary;
663 break;
664 case 'v':
665 f |= f_verbose;
666 break;
667 default:
668 f |= f_bogus;
669 break;
670 }
671 }
672
673 if (f & f_bogus) {
674 usage(stderr);
675 exit(EXIT_FAILURE);
676 }
677 argv += optind;
678 argc -= optind;
679
680 /* --- Generate output --- */
681
682 if (!(f & f_check)) {
683 if (f & f_oddhash)
684 printf("#hash %s\n", gch->name);
685 if (f & f_escape)
686 fputs("#escape\n", stdout);
687 }
688
689 if (argc) {
690 int i;
691 int rrc;
692 rc = 0;
693 for (i = 0; i < argc; i++) {
694 if ((rrc = hashsum(argv[i], f, gch)) != 0)
695 rc = rrc;
696 }
697 } else
698 rc = hashsum(0, f, gch);
699
700 return (rc);
701 }
702
703 /*----- That's all, folks -------------------------------------------------*/