11a5d378e060239ab9b155aaea29bd8c7a0789ce
[u/mdw/catacomb] / hashsum.c
1 /* -*-c-*-
2 *
3 * $Id: hashsum.c,v 1.9 2004/04/04 19:42:59 mdw Exp $
4 *
5 * Hash files using some secure hash function
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Revision history --------------------------------------------------*
31 *
32 * $Log: hashsum.c,v $
33 * Revision 1.9 2004/04/04 19:42:59 mdw
34 * Add set -e.
35 *
36 * Revision 1.8 2001/04/19 18:26:33 mdw
37 * Add CRC as another hash function.
38 *
39 * Revision 1.7 2001/02/21 20:03:22 mdw
40 * Added support for MD2 hash function.
41 *
42 * Revision 1.6 2001/01/25 21:40:14 mdw
43 * Support for new SHA variants added.
44 *
45 * Revision 1.5 2000/12/06 20:33:27 mdw
46 * Make flags be macros rather than enumerations, to ensure that they're
47 * unsigned.
48 *
49 * Revision 1.4 2000/08/04 23:23:44 mdw
50 * Various <ctype.h> fixes.
51 *
52 * Revision 1.3 2000/07/29 17:02:43 mdw
53 * (checkhash): Be pettier about spaces between the hash and filename, for
54 * compatiblity with `md5sum'.
55 *
56 * Revision 1.2 2000/07/15 21:14:05 mdw
57 * Missed `-e' out of the usage string.
58 *
59 * Revision 1.1 2000/07/15 20:52:34 mdw
60 * Useful replacement for `md5sum' with support for many different hash
61 * functions and for reading filename lists from `find'.
62 *
63 */
64
65 /*----- Header files ------------------------------------------------------*/
66
67 #include "config.h"
68
69 #include <ctype.h>
70 #include <errno.h>
71 #include <stdio.h>
72 #include <stdlib.h>
73 #include <string.h>
74
75 #include <mLib/alloc.h>
76 #include <mLib/dstr.h>
77 #include <mLib/mdwopt.h>
78 #include <mLib/quis.h>
79 #include <mLib/report.h>
80 #include <mLib/sub.h>
81 #include <mLib/str.h>
82
83 #include "ghash.h"
84
85 /*----- Static variables --------------------------------------------------*/
86
87 #define f_binary 1u
88 #define f_bogus 2u
89 #define f_verbose 4u
90 #define f_check 8u
91 #define f_files 16u
92 #define f_raw 32u
93 #define f_oddhash 64u
94 #define f_escape 128u
95
96 /*----- Support functions -------------------------------------------------*/
97
98 /* --- @fhash@ --- *
99 *
100 * Arguments: @const char *file@ = file name to be hashed (null for stdin)
101 * @unsigned f@ = flags to set
102 * @const gchash *gch@ = pointer to hash function to use
103 * @void *buf@ = pointer to hash output buffer
104 *
105 * Returns: Zero if it worked, nonzero on error.
106 *
107 * Use: Hashes a file.
108 */
109
110 static int fhash(const char *file, unsigned f, const gchash *gch, void *buf)
111 {
112 FILE *fp;
113 char fbuf[BUFSIZ];
114 size_t sz;
115 ghash *h;
116 int e;
117
118 if (!file)
119 fp = stdin;
120 else if ((fp = fopen(file, f & f_binary ? "rb" : "r")) == 0)
121 return (-1);
122
123 h = gch->init();
124 while ((sz = fread(fbuf, 1, sizeof(fbuf), fp)) > 0)
125 h->ops->hash(h, fbuf, sz);
126 h->ops->done(h, buf);
127 h->ops->destroy(h);
128 e = ferror(fp);
129 if (file)
130 fclose(fp);
131 return (e ? -1 : 0);
132 }
133
134 /* --- @puthex@ --- *
135 *
136 * Arguments: @const octet *buf@ = pointer to a binary buffer
137 * @size_t sz@ = size of the buffer
138 * @FILE *fp@ = pointer to output file handle
139 *
140 * Returns: ---
141 *
142 * Use: Writes a hex dump of a block of memory.
143 */
144
145 static void puthex(const octet *buf, size_t sz, FILE *fp)
146 {
147 while (sz) {
148 fprintf(fp, "%02x", *buf++);
149 sz--;
150 }
151 }
152
153 /* --- @gethex@ --- *
154 *
155 * Arguments: @const char *p@ = pointer to input string
156 * @octet *q@ = pointer to output buffer
157 * @size_t sz@ = size of the output buffer
158 * @char **pp@ = where to put the end pointer
159 *
160 * Returns: The number of bytes written to the buffer.
161 *
162 * Use: Reads hex dumps from the input string.
163 */
164
165 static size_t gethex(const char *p, octet *q, size_t sz, char **pp)
166 {
167 size_t i = 0;
168 while (sz > 0 &&
169 isxdigit((unsigned char)p[0]) &&
170 isxdigit((unsigned char)p[1])) {
171 char buf[3];
172 buf[0] = p[0];
173 buf[1] = p[1];
174 buf[2] = 0;
175 *q++ = strtoul(buf, 0, 16);
176 sz--;
177 p += 2;
178 i++;
179 }
180 if (pp)
181 *pp = (char *)p;
182 return (i);
183 }
184
185 /* --- @gethash@ --- *
186 *
187 * Arguments: @const char *name@ = pointer to name string
188 *
189 * Returns: Pointer to appropriate hash class.
190 *
191 * Use: Chooses a hash function by name.
192 */
193
194 static const gchash *gethash(const char *name)
195 {
196 const gchash *const *g, *gg = 0;
197 size_t sz = strlen(name);
198 for (g = ghashtab; *g; g++) {
199 if (strncmp(name, (*g)->name, sz) == 0) {
200 if ((*g)->name[sz] == 0) {
201 gg = *g;
202 break;
203 } else if (gg)
204 return (0);
205 else
206 gg = *g;
207 }
208 }
209 return (gg);
210 }
211
212 /* --- @getstring@ --- *
213 *
214 * Arguments: @FILE *fp@ = stream from which to read
215 * @const char *p@ = string to read from instead
216 * @dstr *d@ = destination string
217 * @unsigned raw@ = raw or cooked read
218 *
219 * Returns: Zero if OK, nonzero on end-of-file.
220 *
221 * Use: Reads a filename (or something similar) from a stream.
222 */
223
224 static int getstring(FILE *fp, const char *p, dstr *d, unsigned raw)
225 {
226 int ch;
227 int q = 0;
228
229 /* --- Raw: just read exactly what's written up to a null byte --- */
230
231 #define NEXTCH (fp ? getc(fp) : (unsigned char)*p++)
232 #define EOFCH (fp ? EOF : 0)
233
234 if (raw) {
235 if ((ch = NEXTCH) == EOFCH)
236 return (EOF);
237 for (;;) {
238 if (!ch)
239 break;
240 DPUTC(d, ch);
241 if ((ch = NEXTCH) == EOFCH)
242 break;
243 }
244 DPUTZ(d);
245 return (0);
246 }
247
248 /* --- Skip as far as whitespace --- *
249 *
250 * Also skip past comments.
251 */
252
253 again:
254 ch = NEXTCH;
255 while (isspace(ch))
256 ch = NEXTCH;
257 if (ch == '#') {
258 do ch = NEXTCH; while (ch != '\n' && ch != EOFCH);
259 goto again;
260 }
261 if (ch == EOFCH)
262 return (EOF);
263
264 /* --- If the character is a quote then read a quoted string --- */
265
266 switch (ch) {
267 case '`':
268 ch = '\'';
269 case '\'':
270 case '\"':
271 q = ch;
272 ch = NEXTCH;
273 break;
274 }
275
276 /* --- Now read all sorts of interesting things --- */
277
278 for (;;) {
279
280 /* --- Handle an escaped thing --- */
281
282 if (ch == '\\') {
283 ch = NEXTCH;
284 if (ch == EOFCH)
285 break;
286 switch (ch) {
287 case 'a': ch = '\a'; break;
288 case 'b': ch = '\b'; break;
289 case 'f': ch = '\f'; break;
290 case 'n': ch = '\n'; break;
291 case 'r': ch = '\r'; break;
292 case 't': ch = '\t'; break;
293 case 'v': ch = '\v'; break;
294 }
295 DPUTC(d, ch);
296 ch = NEXTCH;
297 continue;
298 }
299
300 /* --- If it's a quote or some other end marker then stop --- */
301
302 if (ch == q)
303 break;
304 if (!q && isspace(ch))
305 break;
306
307 /* --- Otherwise contribute and continue --- */
308
309 DPUTC(d, ch);
310 if ((ch = NEXTCH) == EOFCH)
311 break;
312 }
313
314 /* --- Done --- */
315
316 DPUTZ(d);
317 return (0);
318
319 #undef NEXTCH
320 #undef EOFCH
321 }
322
323 /* --- @putstring@ --- *
324 *
325 * Arguments: @FILE *fp@ = stream to write on
326 * @const char *p@ = pointer to text
327 * @unsigned raw@ = whether the string is to be written raw
328 *
329 * Returns: ---
330 *
331 * Use: Emits a string to a stream.
332 */
333
334 static void putstring(FILE *fp, const char *p, unsigned raw)
335 {
336 size_t sz = strlen(p);
337 unsigned qq;
338 const char *q;
339
340 /* --- Just write the string null terminated if raw --- */
341
342 if (raw) {
343 fwrite(p, 1, sz + 1, fp);
344 return;
345 }
346
347 /* --- Check for any dodgy characters --- */
348
349 qq = 0;
350 for (q = p; *q; q++) {
351 if (isspace((unsigned char)*q)) {
352 qq = '\"';
353 break;
354 }
355 }
356
357 if (qq)
358 putc(qq, fp);
359
360 /* --- Emit the string --- */
361
362 for (q = p; *q; q++) {
363 switch (*q) {
364 case '\a': fputc('\\', fp); fputc('a', fp); break;
365 case '\b': fputc('\\', fp); fputc('b', fp); break;
366 case '\f': fputc('\\', fp); fputc('f', fp); break;
367 case '\n': fputc('\\', fp); fputc('n', fp); break;
368 case '\r': fputc('\\', fp); fputc('r', fp); break;
369 case '\t': fputc('\\', fp); fputc('t', fp); break;
370 case '\v': fputc('\\', fp); fputc('v', fp); break;
371 case '`': fputc('\\', fp); fputc('`', fp); break;
372 case '\'': fputc('\\', fp); fputc('\'', fp); break;
373 case '\"': fputc('\\', fp); fputc('\"', fp); break;
374 case '#': fputc('\\', fp); fputc('#', fp); break;
375 default:
376 putc(*q, fp);
377 break;
378 }
379 }
380
381 /* --- Done --- */
382
383 if (qq)
384 putc(qq, fp);
385 }
386
387 /*----- Guts --------------------------------------------------------------*/
388
389 static int checkhash(const char *file, unsigned f, const gchash *gch)
390 {
391 int rc;
392 FILE *fp;
393 dstr d = DSTR_INIT;
394 dstr dd = DSTR_INIT;
395 unsigned long n = 0, nfail = 0;
396 octet *buf = xmalloc(2 * gch->hashsz);
397
398 if (!file)
399 fp = stdin;
400 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
401 moan("couldn't open `%s': %s", file, strerror(errno));
402 return (EXIT_FAILURE);
403 }
404
405 while (DRESET(&d), dstr_putline(&d, fp) != EOF) {
406 char *p = d.buf;
407 char *q;
408 unsigned ff = f;
409
410 /* --- Handle a directive --- */
411
412 if (*p == '#') {
413 p++;
414 if ((q = str_getword(&p)) == 0)
415 continue;
416 if (strcmp(q, "hash") == 0) {
417 const gchash *g;
418 if ((q = str_getword(&p)) == 0)
419 continue;
420 if ((g = gethash(q)) == 0)
421 continue;
422 gch = g;
423 xfree(buf);
424 buf = xmalloc(2 * gch->hashsz);
425 } else if (strcmp(q, "escape") == 0)
426 f |= f_escape;
427 continue;
428 }
429
430 /* --- Otherwise it's a hex thing --- */
431
432 q = p;
433 while (*p && *p != ' ')
434 p++;
435 if (!*p)
436 continue;
437 *p++ = 0;
438 if (gethex(q, buf, gch->hashsz, 0) < gch->hashsz)
439 continue;
440 if (*p == '*')
441 ff |= f_binary;
442 else if (*p != ' ')
443 continue;
444 p++;
445
446 if (f & f_escape) {
447 DRESET(&dd);
448 getstring(0, p, &dd, 0);
449 p = dd.buf;
450 }
451
452 if (fhash(p, ff, gch, buf + gch->hashsz)) {
453 moan("couldn't read `%s': %s", p, strerror(errno));
454 rc = EXIT_FAILURE;
455 continue;
456 }
457 if (memcmp(buf, buf + gch->hashsz, gch->hashsz) != 0) {
458 if (ff & f_verbose)
459 fprintf(stderr, "FAIL %s\n", p);
460 else
461 moan("%s check failed for `%s'", gch->name, p);
462 nfail++;
463 rc = EXIT_FAILURE;
464 } else {
465 if (ff & f_verbose)
466 fprintf(stderr, "OK %s\n", p);
467 }
468 n++;
469 }
470
471 dstr_destroy(&d);
472 dstr_destroy(&dd);
473 xfree(buf);
474 if ((f & f_verbose) && nfail)
475 moan("%lu of %lu file(s) failed %s check", nfail, n, gch->name);
476 else if (!n)
477 moan("no files checked");
478 return (0);
479 }
480
481 static int dohash(const char *file, unsigned f, const gchash *gch)
482 {
483 int rc = 0;
484 octet *p = xmalloc(gch->hashsz);
485
486 if (fhash(file, f, gch, p)) {
487 moan("couldn't read `%s': %s", file ? file : "<stdin>", strerror(errno));
488 rc = EXIT_FAILURE;
489 } else {
490 puthex(p, gch->hashsz, stdout);
491 if (file) {
492 fputc(' ', stdout);
493 fputc(f & f_binary ? '*' : ' ', stdout);
494 if (f & f_escape)
495 putstring(stdout, file, 0);
496 else
497 fputs(file, stdout);
498 }
499 fputc('\n', stdout);
500 }
501
502 xfree(p);
503 return (rc);
504 }
505
506 static int dofile(const char *file, unsigned f, const gchash *gch)
507 {
508 return (f & f_check ? checkhash : dohash)(file, f, gch);
509 }
510
511 static int hashfiles(const char *file, unsigned f, const gchash *gch)
512 {
513 FILE *fp;
514 dstr d = DSTR_INIT;
515 int rc = 0;
516 int rrc;
517
518 if (!file)
519 fp = stdin;
520 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
521 moan("couldn't open `%s': %s", file, strerror(errno));
522 return (EXIT_FAILURE);
523 }
524
525 for (;;) {
526 DRESET(&d);
527 if (getstring(fp, 0, &d, f & f_raw))
528 break;
529 if ((rrc = dofile(d.buf, f, gch)) != 0)
530 rc = rrc;
531 }
532
533 return (rc);
534 }
535
536 static int hashsum(const char *file, unsigned f, const gchash *gch)
537 {
538 return (f & f_files ? hashfiles : dofile)(file, f, gch);
539 }
540
541 /*----- Main driver -------------------------------------------------------*/
542
543 static void version(FILE *fp)
544 {
545 pquis(fp, "$, Catacomb version " VERSION "\n");
546 }
547
548 static void usage(FILE *fp)
549 {
550 pquis(fp, "Usage: $ [-f0ebcv] [-a algorithm] [files...]\n");
551 }
552
553 static void help(FILE *fp, const gchash *gch)
554 {
555 version(fp);
556 fputc('\n', fp);
557 usage(fp);
558 pquis(fp, "\n\
559 Generates or checks message digests on files. Options available:\n\
560 \n\
561 -h, --help Display this help message.\n\
562 -V, --version Display program's version number.\n\
563 -u, --usage Display a terse usage message.\n\
564 \n\
565 -a, --algorithm=ALG Use the message digest algorithm ALG.\n\
566 \n\
567 -f, --files Read a list of file names from standard input.\n\
568 -0, --null File names are null terminated, not plain text.\n\
569 \n\
570 -e, --escape Escape funny characters in filenames.\n\
571 -c, --check Check message digests rather than emitting them.\n\
572 -b, --binary When reading files, treat them as binary.\n\
573 -v, --verbose Be verbose when checking digests.\n\
574 \n\
575 For a list of supported message digest algorithms, type `$ --list'.\n\
576 ");
577 if (gch)
578 fprintf(fp, "The default message digest algorithm is %s.\n", gch->name);
579 }
580
581 int main(int argc, char *argv[])
582 {
583 unsigned f = 0;
584 const gchash *gch = 0;
585 int rc;
586
587 /* --- Initialization --- */
588
589 ego(argv[0]);
590 sub_init();
591
592 /* --- Choose a hash function from the name --- */
593
594 {
595 char *q = xstrdup(QUIS);
596 size_t len = strlen(q);
597 if (len > 3 && strcmp(q + len - 3, "sum") == 0) {
598 q[len - 3] = 0;
599 gch = gethash(q);
600 }
601 if (!gch)
602 gch = gethash("md5");
603 xfree(q);
604 }
605
606 /* --- Read options --- */
607
608 for (;;) {
609 static struct option opts[] = {
610 { "help", 0, 0, 'h' },
611 { "verbose", 0, 0, 'V' },
612 { "usage", 0, 0, 'u' },
613
614 { "algorithm", OPTF_ARGREQ, 0, 'a' },
615 { "hash", OPTF_ARGREQ, 0, 'a' },
616 { "list", 0, 0, 'l' },
617
618 { "files", 0, 0, 'f' },
619 { "find", 0, 0, 'f' },
620 { "null", 0, 0, '0' },
621
622 { "escape", 0, 0, 'e' },
623 { "check", 0, 0, 'c' },
624 { "binary", 0, 0, 'b' },
625 { "verbose", 0, 0, 'v' },
626
627 { 0, 0, 0, 0 }
628 };
629 int i = mdwopt(argc, argv, "hVu a:l f0 ecbv", opts, 0, 0, 0);
630 if (i < 0)
631 break;
632
633 switch (i) {
634 case 'h':
635 help(stdout, gch);
636 exit(0);
637 case 'V':
638 version(stdout);
639 exit(0);
640 case 'u':
641 usage(stdout);
642 exit(0);
643 case 'a':
644 if ((gch = gethash(optarg)) == 0)
645 die(EXIT_FAILURE, "unknown hash algorithm `%s'", optarg);
646 f |= f_oddhash;
647 break;
648 case 'l': {
649 unsigned j;
650 for (j = 0; ghashtab[j]; j++) {
651 if (j)
652 fputc(' ', stdout);
653 printf("%s", ghashtab[j]->name);
654 }
655 fputc('\n', stdout);
656 exit(0);
657 } break;
658 case 'f':
659 f |= f_files;
660 break;
661 case '0':
662 f |= f_raw;
663 break;
664 case 'e':
665 f |= f_escape;
666 break;
667 case 'c':
668 f |= f_check;
669 break;
670 case 'b':
671 f |= f_binary;
672 break;
673 case 'v':
674 f |= f_verbose;
675 break;
676 default:
677 f |= f_bogus;
678 break;
679 }
680 }
681
682 if (f & f_bogus) {
683 usage(stderr);
684 exit(EXIT_FAILURE);
685 }
686 argv += optind;
687 argc -= optind;
688
689 /* --- Generate output --- */
690
691 if (!(f & f_check)) {
692 if (f & f_oddhash)
693 printf("#hash %s\n", gch->name);
694 if (f & f_escape)
695 fputs("#escape\n", stdout);
696 }
697
698 if (argc) {
699 int i;
700 int rrc;
701 rc = 0;
702 for (i = 0; i < argc; i++) {
703 if ((rrc = hashsum(argv[i], f, gch)) != 0)
704 rc = rrc;
705 }
706 } else
707 rc = hashsum(0, f, gch);
708
709 return (rc);
710 }
711
712 /*----- That's all, folks -------------------------------------------------*/