Useful replacement for `md5sum' with support for many different hash
[u/mdw/catacomb] / hashsum.c
CommitLineData
e375fe33 1/* -*-c-*-
2 *
3 * $Id: hashsum.c,v 1.1 2000/07/15 20:52:34 mdw Exp $
4 *
5 * Hash files using some secure hash function
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30/*----- Revision history --------------------------------------------------*
31 *
32 * $Log: hashsum.c,v $
33 * Revision 1.1 2000/07/15 20:52:34 mdw
34 * Useful replacement for `md5sum' with support for many different hash
35 * functions and for reading filename lists from `find'.
36 *
37 */
38
39/*----- Header files ------------------------------------------------------*/
40
41#include "config.h"
42
43#include <ctype.h>
44#include <errno.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48
49#include <mLib/alloc.h>
50#include <mLib/dstr.h>
51#include <mLib/mdwopt.h>
52#include <mLib/quis.h>
53#include <mLib/report.h>
54#include <mLib/sub.h>
55#include <mLib/str.h>
56
57#include "ghash.h"
58
59#include "md4.h"
60#include "md5.h"
61#include "rmd128.h"
62#include "rmd160.h"
63#include "rmd256.h"
64#include "rmd320.h"
65#include "sha.h"
66#include "tiger.h"
67
68/*----- Static variables --------------------------------------------------*/
69
70static const gchash *hashtab[] = {
71 &md5, &md4, &sha, &rmd128, &rmd160, &rmd256, &rmd320, &tiger,
72 0
73};
74
75enum {
76 f_binary = 1,
77 f_bogus = 2,
78 f_verbose = 4,
79 f_check = 8,
80 f_files = 16,
81 f_raw = 32,
82 f_oddhash = 64,
83 f_escape = 128
84};
85
86/*----- Support functions -------------------------------------------------*/
87
88/* --- @fhash@ --- *
89 *
90 * Arguments: @const char *file@ = file name to be hashed (null for stdin)
91 * @unsigned f@ = flags to set
92 * @const gchash *gch@ = pointer to hash function to use
93 * @void *buf@ = pointer to hash output buffer
94 *
95 * Returns: Zero if it worked, nonzero on error.
96 *
97 * Use: Hashes a file.
98 */
99
100static int fhash(const char *file, unsigned f, const gchash *gch, void *buf)
101{
102 FILE *fp;
103 char fbuf[BUFSIZ];
104 size_t sz;
105 ghash *h;
106 int e;
107
108 if (!file)
109 fp = stdin;
110 else if ((fp = fopen(file, f & f_binary ? "rb" : "r")) == 0)
111 return (-1);
112
113 h = gch->init();
114 while ((sz = fread(fbuf, 1, sizeof(fbuf), fp)) > 0)
115 h->ops->hash(h, fbuf, sz);
116 h->ops->done(h, buf);
117 h->ops->destroy(h);
118 e = ferror(fp);
119 if (file)
120 fclose(fp);
121 return (e ? -1 : 0);
122}
123
124/* --- @puthex@ --- *
125 *
126 * Arguments: @const octet *buf@ = pointer to a binary buffer
127 * @size_t sz@ = size of the buffer
128 * @FILE *fp@ = pointer to output file handle
129 *
130 * Returns: ---
131 *
132 * Use: Writes a hex dump of a block of memory.
133 */
134
135static void puthex(const octet *buf, size_t sz, FILE *fp)
136{
137 while (sz) {
138 fprintf(fp, "%02x", *buf++);
139 sz--;
140 }
141}
142
143/* --- @gethex@ --- *
144 *
145 * Arguments: @const char *p@ = pointer to input string
146 * @octet *q@ = pointer to output buffer
147 * @size_t sz@ = size of the output buffer
148 * @char **pp@ = where to put the end pointer
149 *
150 * Returns: The number of bytes written to the buffer.
151 *
152 * Use: Reads hex dumps from the input string.
153 */
154
155static size_t gethex(const char *p, octet *q, size_t sz, char **pp)
156{
157 size_t i = 0;
158 while (sz > 0 &&
159 isxdigit((unsigned char)p[0]) &&
160 isxdigit((unsigned char)p[1])) {
161 char buf[3];
162 buf[0] = p[0];
163 buf[1] = p[1];
164 buf[2] = 0;
165 *q++ = strtoul(buf, 0, 16);
166 sz--;
167 p += 2;
168 i++;
169 }
170 if (pp)
171 *pp = (char *)p;
172 return (i);
173}
174
175/* --- @gethash@ --- *
176 *
177 * Arguments: @const char *name@ = pointer to name string
178 *
179 * Returns: Pointer to appropriate hash class.
180 *
181 * Use: Chooses a hash function by name.
182 */
183
184static const gchash *gethash(const char *name)
185{
186 const gchash **g, *gg = 0;
187 size_t sz = strlen(name);
188 for (g = hashtab; *g; g++) {
189 if (strncmp(name, (*g)->name, sz) == 0) {
190 if ((*g)->name[sz] == 0) {
191 gg = *g;
192 break;
193 } else if (gg)
194 return (0);
195 else
196 gg = *g;
197 }
198 }
199 return (gg);
200}
201
202/* --- @getstring@ --- *
203 *
204 * Arguments: @FILE *fp@ = stream from which to read
205 * @const char *p@ = string to read from instead
206 * @dstr *d@ = destination string
207 * @unsigned raw@ = raw or cooked read
208 *
209 * Returns: Zero if OK, nonzero on end-of-file.
210 *
211 * Use: Reads a filename (or something similar) from a stream.
212 */
213
214static int getstring(FILE *fp, const char *p, dstr *d, unsigned raw)
215{
216 int ch;
217 int q = 0;
218
219 /* --- Raw: just read exactly what's written up to a null byte --- */
220
221#define NEXTCH (fp ? getc(fp) : *p++)
222#define EOFCH (fp ? EOF : 0)
223
224 if (raw) {
225 if ((ch = NEXTCH) == EOFCH)
226 return (EOF);
227 for (;;) {
228 if (!ch)
229 break;
230 DPUTC(d, ch);
231 if ((ch = NEXTCH) == EOFCH)
232 break;
233 }
234 DPUTZ(d);
235 return (0);
236 }
237
238 /* --- Skip as far as whitespace --- *
239 *
240 * Also skip past comments.
241 */
242
243again:
244 ch = NEXTCH;
245 while (isspace((unsigned char)ch))
246 ch = NEXTCH;
247 if (ch == '#') {
248 do ch = NEXTCH; while (ch != '\n' && ch != EOFCH);
249 goto again;
250 }
251 if (ch == EOFCH)
252 return (EOF);
253
254 /* --- If the character is a quote then read a quoted string --- */
255
256 switch (ch) {
257 case '`':
258 ch = '\'';
259 case '\'':
260 case '\"':
261 q = ch;
262 ch = NEXTCH;
263 break;
264 }
265
266 /* --- Now read all sorts of interesting things --- */
267
268 for (;;) {
269
270 /* --- Handle an escaped thing --- */
271
272 if (ch == '\\') {
273 ch = NEXTCH;
274 if (ch == EOFCH)
275 break;
276 switch (ch) {
277 case 'a': ch = '\a'; break;
278 case 'b': ch = '\b'; break;
279 case 'f': ch = '\f'; break;
280 case 'n': ch = '\n'; break;
281 case 'r': ch = '\r'; break;
282 case 't': ch = '\t'; break;
283 case 'v': ch = '\v'; break;
284 }
285 DPUTC(d, ch);
286 ch = NEXTCH;
287 continue;
288 }
289
290 /* --- If it's a quote or some other end marker then stop --- */
291
292 if (ch == q)
293 break;
294 if (!q && isspace((unsigned char)ch))
295 break;
296
297 /* --- Otherwise contribute and continue --- */
298
299 DPUTC(d, ch);
300 if ((ch = NEXTCH) == EOFCH)
301 break;
302 }
303
304 /* --- Done --- */
305
306 DPUTZ(d);
307 return (0);
308
309#undef NEXTCH
310#undef EOFCH
311}
312
313/* --- @putstring@ --- *
314 *
315 * Arguments: @FILE *fp@ = stream to write on
316 * @const char *p@ = pointer to text
317 * @unsigned raw@ = whether the string is to be written raw
318 *
319 * Returns: ---
320 *
321 * Use: Emits a string to a stream.
322 */
323
324static void putstring(FILE *fp, const char *p, unsigned raw)
325{
326 size_t sz = strlen(p);
327 unsigned qq;
328 const char *q;
329
330 /* --- Just write the string null terminated if raw --- */
331
332 if (raw) {
333 fwrite(p, 1, sz + 1, fp);
334 return;
335 }
336
337 /* --- Check for any dodgy characters --- */
338
339 qq = 0;
340 for (q = p; *q; q++) {
341 if (isspace((unsigned char)*q)) {
342 qq = '\"';
343 break;
344 }
345 }
346
347 if (qq)
348 putc(qq, fp);
349
350 /* --- Emit the string --- */
351
352 for (q = p; *q; q++) {
353 switch (*q) {
354 case '\a': fputc('\\', fp); fputc('a', fp); break;
355 case '\b': fputc('\\', fp); fputc('b', fp); break;
356 case '\f': fputc('\\', fp); fputc('f', fp); break;
357 case '\n': fputc('\\', fp); fputc('n', fp); break;
358 case '\r': fputc('\\', fp); fputc('r', fp); break;
359 case '\t': fputc('\\', fp); fputc('t', fp); break;
360 case '\v': fputc('\\', fp); fputc('v', fp); break;
361 case '`': fputc('\\', fp); fputc('`', fp); break;
362 case '\'': fputc('\\', fp); fputc('\'', fp); break;
363 case '\"': fputc('\\', fp); fputc('\"', fp); break;
364 case '#': fputc('\\', fp); fputc('#', fp); break;
365 default:
366 putc(*q, fp);
367 break;
368 }
369 }
370
371 /* --- Done --- */
372
373 if (qq)
374 putc(qq, fp);
375}
376
377/*----- Guts --------------------------------------------------------------*/
378
379static int checkhash(const char *file, unsigned f, const gchash *gch)
380{
381 int rc;
382 FILE *fp;
383 dstr d = DSTR_INIT;
384 dstr dd = DSTR_INIT;
385 unsigned long n = 0, nfail = 0;
386 octet *buf = xmalloc(2 * gch->hashsz);
387
388 if (!file)
389 fp = stdin;
390 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
391 moan("couldn't open `%s': %s", file, strerror(errno));
392 return (EXIT_FAILURE);
393 }
394
395 while (DRESET(&d), dstr_putline(&d, fp) != EOF) {
396 char *p = d.buf;
397 char *q;
398 unsigned ff = f;
399
400 /* --- Handle a directive --- */
401
402 if (*p == '#') {
403 p++;
404 if ((q = str_getword(&p)) == 0)
405 continue;
406 if (strcmp(q, "hash") == 0) {
407 const gchash *g;
408 if ((q = str_getword(&p)) == 0)
409 continue;
410 if ((g = gethash(q)) == 0)
411 continue;
412 gch = g;
413 xfree(buf);
414 buf = xmalloc(2 * gch->hashsz);
415 } else if (strcmp(q, "escape") == 0)
416 f |= f_escape;
417 continue;
418 }
419
420 /* --- Otherwise it's a hex thing --- */
421
422 if ((q = str_getword(&p)) == 0)
423 continue;
424 if (gethex(q, buf, gch->hashsz, 0) < gch->hashsz)
425 continue;
426 while (isspace((unsigned char)*p))
427 p++;
428 if (*p == '*') {
429 p++;
430 ff |= f_binary;
431 }
432 if (!*p)
433 continue;
434
435 if (f & f_escape) {
436 DRESET(&dd);
437 getstring(0, p, &dd, 0);
438 p = dd.buf;
439 }
440
441 if (fhash(p, ff, gch, buf + gch->hashsz)) {
442 moan("couldn't read `%s': %s", p, strerror(errno));
443 rc = EXIT_FAILURE;
444 continue;
445 }
446 if (memcmp(buf, buf + gch->hashsz, gch->hashsz) != 0) {
447 if (ff & f_verbose)
448 fprintf(stderr, "FAIL %s\n", p);
449 else
450 moan("%s check failed for `%s'", gch->name, p);
451 nfail++;
452 rc = EXIT_FAILURE;
453 } else {
454 if (ff & f_verbose)
455 fprintf(stderr, "OK %s\n", p);
456 }
457 n++;
458 }
459
460 dstr_destroy(&d);
461 dstr_destroy(&dd);
462 xfree(buf);
463 if ((f & f_verbose) && nfail)
464 moan("%lu of %lu file(s) failed %s check", nfail, n, gch->name);
465 else if (!n)
466 moan("no files checked");
467 return (0);
468}
469
470static int dohash(const char *file, unsigned f, const gchash *gch)
471{
472 int rc = 0;
473 octet *p = xmalloc(gch->hashsz);
474
475 if (fhash(file, f, gch, p)) {
476 moan("couldn't read `%s': %s", file ? file : "<stdin>", strerror(errno));
477 rc = EXIT_FAILURE;
478 } else {
479 puthex(p, gch->hashsz, stdout);
480 if (file) {
481 fputc(' ', stdout);
482 fputc(f & f_binary ? '*' : ' ', stdout);
483 if (f & f_escape)
484 putstring(stdout, file, 0);
485 else
486 fputs(file, stdout);
487 }
488 fputc('\n', stdout);
489 }
490
491 xfree(p);
492 return (rc);
493}
494
495static int hashfiles(const char *file, unsigned f, const gchash *gch)
496{
497 FILE *fp;
498 dstr d = DSTR_INIT;
499 int rc = 0;
500 int rrc;
501
502 if (!file)
503 fp = stdin;
504 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
505 moan("couldn't open `%s': %s", file, strerror(errno));
506 return (EXIT_FAILURE);
507 }
508
509 for (;;) {
510 DRESET(&d);
511 if (getstring(fp, 0, &d, f & f_raw))
512 break;
513 if ((rrc = dohash(d.buf, f, gch)) != 0)
514 rc = rrc;
515 }
516
517 return (rc);
518}
519
520static int hashsum(const char *file, unsigned f, const gchash *gch)
521{
522 if (f & f_check)
523 return (checkhash(file, f, gch));
524 if (f & f_files)
525 return (hashfiles(file, f, gch));
526 return (dohash(file, f, gch));
527}
528
529/*----- Main driver -------------------------------------------------------*/
530
531static void version(FILE *fp)
532{
533 pquis(fp, "$, Catacomb version " VERSION "\n");
534}
535
536static void usage(FILE *fp)
537{
538 pquis(fp, "Usage: $ [-f0bcv] [-a algorithm] [files...]\n");
539}
540
541static void help(FILE *fp, const gchash *gch)
542{
543 version(fp);
544 fputc('\n', fp);
545 usage(fp);
546 pquis(fp, "\n\
547Generates or checks message digests on files. Options available:\n\
548\n\
549-h, --help Display this help message.\n\
550-V, --version Display program's version number.\n\
551-u, --usage Display a terse usage message.\n\
552\n\
553-a, --algorithm=ALG Use the message digest algorithm ALG.\n\
554\n\
555-f, --files Read a list of file names from standard input.\n\
556-0, --null File names are null terminated, not plain text.\n\
557\n\
558-e, --escape Escape funny characters in filenames.\n\
559-c, --check Check message digests rather than emitting them.\n\
560-b, --binary When reading files, treat them as binary.\n\
561-v, --verbose Be verbose when checking digests.\n\
562\n\
563For a list of supported message digest algorithms, type `$ --list'.\n\
564");
565 if (gch)
566 fprintf(fp, "The default message digest algorithm is %s.\n", gch->name);
567}
568
569int main(int argc, char *argv[])
570{
571 unsigned f = 0;
572 const gchash *gch = 0;
573 int rc;
574
575 /* --- Initialization --- */
576
577 ego(argv[0]);
578 sub_init();
579
580 /* --- Choose a hash function from the name --- */
581
582 {
583 char *q = xstrdup(QUIS);
584 size_t len = strlen(q);
585 if (len > 3 && strcmp(q + len - 3, "sum") == 0) {
586 q[len - 3] = 0;
587 gch = gethash(q);
588 }
589 if (!gch)
590 gch = hashtab[0];
591 xfree(q);
592 }
593
594 /* --- Read options --- */
595
596 for (;;) {
597 static struct option opts[] = {
598 { "help", 0, 0, 'h' },
599 { "verbose", 0, 0, 'V' },
600 { "usage", 0, 0, 'u' },
601
602 { "algorithm", OPTF_ARGREQ, 0, 'a' },
603 { "hash", OPTF_ARGREQ, 0, 'a' },
604 { "list", 0, 0, 'l' },
605
606 { "files", 0, 0, 'f' },
607 { "find", 0, 0, 'f' },
608 { "null", 0, 0, '0' },
609
610 { "escape", 0, 0, 'e' },
611 { "check", 0, 0, 'c' },
612 { "binary", 0, 0, 'b' },
613 { "verbose", 0, 0, 'v' },
614
615 { 0, 0, 0, 0 }
616 };
617 int i = mdwopt(argc, argv, "hVu a:l f0 ecbv", opts, 0, 0, 0);
618 if (i < 0)
619 break;
620
621 switch (i) {
622 case 'h':
623 help(stdout, gch);
624 exit(0);
625 case 'V':
626 version(stdout);
627 exit(0);
628 case 'u':
629 usage(stdout);
630 exit(0);
631 case 'a':
632 if ((gch = gethash(optarg)) == 0)
633 die(EXIT_FAILURE, "unknown hash algorithm `%s'", optarg);
634 f |= f_oddhash;
635 break;
636 case 'l': {
637 unsigned j;
638 for (j = 0; hashtab[j]; j++) {
639 if (j)
640 fputc(' ', stdout);
641 printf("%s", hashtab[j]->name);
642 }
643 fputc('\n', stdout);
644 exit(0);
645 } break;
646 case 'f':
647 f |= f_files;
648 break;
649 case '0':
650 f |= f_raw;
651 break;
652 case 'e':
653 f |= f_escape;
654 break;
655 case 'c':
656 f |= f_check;
657 break;
658 case 'b':
659 f |= f_binary;
660 break;
661 case 'v':
662 f |= f_verbose;
663 break;
664 default:
665 f |= f_bogus;
666 break;
667 }
668 }
669
670 if (f & f_bogus) {
671 usage(stderr);
672 exit(EXIT_FAILURE);
673 }
674 argv += optind;
675 argc -= optind;
676
677 /* --- Generate output --- */
678
679 if (!(f & f_check)) {
680 if (f & f_oddhash)
681 printf("#hash %s\n", gch->name);
682 if (f & f_escape)
683 fputs("#escape\n", stdout);
684 }
685
686 if (argc) {
687 int i;
688 int rrc;
689 rc = 0;
690 for (i = 0; i < argc; i++) {
691 if ((rrc = hashsum(argv[i], f, gch)) != 0)
692 rc = rrc;
693 }
694 } else
695 rc = hashsum(0, f, gch);
696
697 return (rc);
698}
699
700/*----- That's all, folks -------------------------------------------------*/