Expunge revision histories in files.
[u/mdw/catacomb] / hashsum.c
CommitLineData
e375fe33 1/* -*-c-*-
2 *
b817bfc6 3 * $Id: hashsum.c,v 1.10 2004/04/08 01:36:15 mdw Exp $
e375fe33 4 *
5 * Hash files using some secure hash function
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
e375fe33 30/*----- Header files ------------------------------------------------------*/
31
32#include "config.h"
33
34#include <ctype.h>
35#include <errno.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40#include <mLib/alloc.h>
41#include <mLib/dstr.h>
42#include <mLib/mdwopt.h>
43#include <mLib/quis.h>
44#include <mLib/report.h>
45#include <mLib/sub.h>
46#include <mLib/str.h>
47
48#include "ghash.h"
49
e375fe33 50/*----- Static variables --------------------------------------------------*/
51
16efd15b 52#define f_binary 1u
53#define f_bogus 2u
54#define f_verbose 4u
55#define f_check 8u
56#define f_files 16u
57#define f_raw 32u
58#define f_oddhash 64u
59#define f_escape 128u
e375fe33 60
61/*----- Support functions -------------------------------------------------*/
62
63/* --- @fhash@ --- *
64 *
65 * Arguments: @const char *file@ = file name to be hashed (null for stdin)
66 * @unsigned f@ = flags to set
67 * @const gchash *gch@ = pointer to hash function to use
68 * @void *buf@ = pointer to hash output buffer
69 *
70 * Returns: Zero if it worked, nonzero on error.
71 *
72 * Use: Hashes a file.
73 */
74
75static int fhash(const char *file, unsigned f, const gchash *gch, void *buf)
76{
77 FILE *fp;
78 char fbuf[BUFSIZ];
79 size_t sz;
80 ghash *h;
81 int e;
82
83 if (!file)
84 fp = stdin;
85 else if ((fp = fopen(file, f & f_binary ? "rb" : "r")) == 0)
86 return (-1);
87
b817bfc6 88 h = GH_INIT(gch);
e375fe33 89 while ((sz = fread(fbuf, 1, sizeof(fbuf), fp)) > 0)
b817bfc6 90 GH_HASH(h, fbuf, sz);
91 GH_DONE(h, buf);
92 GH_DESTROY(h);
e375fe33 93 e = ferror(fp);
94 if (file)
95 fclose(fp);
96 return (e ? -1 : 0);
97}
98
99/* --- @puthex@ --- *
100 *
101 * Arguments: @const octet *buf@ = pointer to a binary buffer
102 * @size_t sz@ = size of the buffer
103 * @FILE *fp@ = pointer to output file handle
104 *
105 * Returns: ---
106 *
107 * Use: Writes a hex dump of a block of memory.
108 */
109
110static void puthex(const octet *buf, size_t sz, FILE *fp)
111{
112 while (sz) {
113 fprintf(fp, "%02x", *buf++);
114 sz--;
115 }
116}
117
118/* --- @gethex@ --- *
119 *
120 * Arguments: @const char *p@ = pointer to input string
121 * @octet *q@ = pointer to output buffer
122 * @size_t sz@ = size of the output buffer
123 * @char **pp@ = where to put the end pointer
124 *
125 * Returns: The number of bytes written to the buffer.
126 *
127 * Use: Reads hex dumps from the input string.
128 */
129
130static size_t gethex(const char *p, octet *q, size_t sz, char **pp)
131{
132 size_t i = 0;
133 while (sz > 0 &&
134 isxdigit((unsigned char)p[0]) &&
135 isxdigit((unsigned char)p[1])) {
136 char buf[3];
137 buf[0] = p[0];
138 buf[1] = p[1];
139 buf[2] = 0;
140 *q++ = strtoul(buf, 0, 16);
141 sz--;
142 p += 2;
143 i++;
144 }
145 if (pp)
146 *pp = (char *)p;
147 return (i);
148}
149
150/* --- @gethash@ --- *
151 *
152 * Arguments: @const char *name@ = pointer to name string
153 *
154 * Returns: Pointer to appropriate hash class.
155 *
156 * Use: Chooses a hash function by name.
157 */
158
159static const gchash *gethash(const char *name)
160{
e9026a0a 161 const gchash *const *g, *gg = 0;
e375fe33 162 size_t sz = strlen(name);
e9026a0a 163 for (g = ghashtab; *g; g++) {
e375fe33 164 if (strncmp(name, (*g)->name, sz) == 0) {
165 if ((*g)->name[sz] == 0) {
166 gg = *g;
167 break;
168 } else if (gg)
169 return (0);
170 else
171 gg = *g;
172 }
173 }
174 return (gg);
175}
176
177/* --- @getstring@ --- *
178 *
179 * Arguments: @FILE *fp@ = stream from which to read
180 * @const char *p@ = string to read from instead
181 * @dstr *d@ = destination string
182 * @unsigned raw@ = raw or cooked read
183 *
184 * Returns: Zero if OK, nonzero on end-of-file.
185 *
186 * Use: Reads a filename (or something similar) from a stream.
187 */
188
189static int getstring(FILE *fp, const char *p, dstr *d, unsigned raw)
190{
191 int ch;
192 int q = 0;
193
194 /* --- Raw: just read exactly what's written up to a null byte --- */
195
d470270a 196#define NEXTCH (fp ? getc(fp) : (unsigned char)*p++)
e375fe33 197#define EOFCH (fp ? EOF : 0)
198
199 if (raw) {
200 if ((ch = NEXTCH) == EOFCH)
201 return (EOF);
202 for (;;) {
203 if (!ch)
204 break;
205 DPUTC(d, ch);
206 if ((ch = NEXTCH) == EOFCH)
207 break;
208 }
209 DPUTZ(d);
210 return (0);
211 }
212
213 /* --- Skip as far as whitespace --- *
214 *
215 * Also skip past comments.
216 */
217
218again:
219 ch = NEXTCH;
d470270a 220 while (isspace(ch))
e375fe33 221 ch = NEXTCH;
222 if (ch == '#') {
223 do ch = NEXTCH; while (ch != '\n' && ch != EOFCH);
224 goto again;
225 }
226 if (ch == EOFCH)
227 return (EOF);
228
229 /* --- If the character is a quote then read a quoted string --- */
230
231 switch (ch) {
232 case '`':
233 ch = '\'';
234 case '\'':
235 case '\"':
236 q = ch;
237 ch = NEXTCH;
238 break;
239 }
240
241 /* --- Now read all sorts of interesting things --- */
242
243 for (;;) {
244
245 /* --- Handle an escaped thing --- */
246
247 if (ch == '\\') {
248 ch = NEXTCH;
249 if (ch == EOFCH)
250 break;
251 switch (ch) {
252 case 'a': ch = '\a'; break;
253 case 'b': ch = '\b'; break;
254 case 'f': ch = '\f'; break;
255 case 'n': ch = '\n'; break;
256 case 'r': ch = '\r'; break;
257 case 't': ch = '\t'; break;
258 case 'v': ch = '\v'; break;
259 }
260 DPUTC(d, ch);
261 ch = NEXTCH;
262 continue;
263 }
264
265 /* --- If it's a quote or some other end marker then stop --- */
266
267 if (ch == q)
268 break;
d470270a 269 if (!q && isspace(ch))
e375fe33 270 break;
271
272 /* --- Otherwise contribute and continue --- */
273
274 DPUTC(d, ch);
275 if ((ch = NEXTCH) == EOFCH)
276 break;
277 }
278
279 /* --- Done --- */
280
281 DPUTZ(d);
282 return (0);
283
284#undef NEXTCH
285#undef EOFCH
286}
287
288/* --- @putstring@ --- *
289 *
290 * Arguments: @FILE *fp@ = stream to write on
291 * @const char *p@ = pointer to text
292 * @unsigned raw@ = whether the string is to be written raw
293 *
294 * Returns: ---
295 *
296 * Use: Emits a string to a stream.
297 */
298
299static void putstring(FILE *fp, const char *p, unsigned raw)
300{
301 size_t sz = strlen(p);
302 unsigned qq;
303 const char *q;
304
305 /* --- Just write the string null terminated if raw --- */
306
307 if (raw) {
308 fwrite(p, 1, sz + 1, fp);
309 return;
310 }
311
312 /* --- Check for any dodgy characters --- */
313
314 qq = 0;
315 for (q = p; *q; q++) {
316 if (isspace((unsigned char)*q)) {
317 qq = '\"';
318 break;
319 }
320 }
321
322 if (qq)
323 putc(qq, fp);
324
325 /* --- Emit the string --- */
326
327 for (q = p; *q; q++) {
328 switch (*q) {
329 case '\a': fputc('\\', fp); fputc('a', fp); break;
330 case '\b': fputc('\\', fp); fputc('b', fp); break;
331 case '\f': fputc('\\', fp); fputc('f', fp); break;
332 case '\n': fputc('\\', fp); fputc('n', fp); break;
333 case '\r': fputc('\\', fp); fputc('r', fp); break;
334 case '\t': fputc('\\', fp); fputc('t', fp); break;
335 case '\v': fputc('\\', fp); fputc('v', fp); break;
336 case '`': fputc('\\', fp); fputc('`', fp); break;
337 case '\'': fputc('\\', fp); fputc('\'', fp); break;
338 case '\"': fputc('\\', fp); fputc('\"', fp); break;
339 case '#': fputc('\\', fp); fputc('#', fp); break;
340 default:
341 putc(*q, fp);
342 break;
343 }
344 }
345
346 /* --- Done --- */
347
348 if (qq)
349 putc(qq, fp);
350}
351
352/*----- Guts --------------------------------------------------------------*/
353
354static int checkhash(const char *file, unsigned f, const gchash *gch)
355{
356 int rc;
357 FILE *fp;
358 dstr d = DSTR_INIT;
359 dstr dd = DSTR_INIT;
360 unsigned long n = 0, nfail = 0;
361 octet *buf = xmalloc(2 * gch->hashsz);
362
363 if (!file)
364 fp = stdin;
365 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
366 moan("couldn't open `%s': %s", file, strerror(errno));
367 return (EXIT_FAILURE);
368 }
369
370 while (DRESET(&d), dstr_putline(&d, fp) != EOF) {
371 char *p = d.buf;
372 char *q;
373 unsigned ff = f;
374
375 /* --- Handle a directive --- */
376
377 if (*p == '#') {
378 p++;
379 if ((q = str_getword(&p)) == 0)
380 continue;
381 if (strcmp(q, "hash") == 0) {
382 const gchash *g;
383 if ((q = str_getword(&p)) == 0)
384 continue;
385 if ((g = gethash(q)) == 0)
386 continue;
387 gch = g;
388 xfree(buf);
389 buf = xmalloc(2 * gch->hashsz);
390 } else if (strcmp(q, "escape") == 0)
391 f |= f_escape;
392 continue;
393 }
394
395 /* --- Otherwise it's a hex thing --- */
396
12902a5c 397 q = p;
398 while (*p && *p != ' ')
399 p++;
400 if (!*p)
e375fe33 401 continue;
12902a5c 402 *p++ = 0;
e375fe33 403 if (gethex(q, buf, gch->hashsz, 0) < gch->hashsz)
404 continue;
12902a5c 405 if (*p == '*')
e375fe33 406 ff |= f_binary;
12902a5c 407 else if (*p != ' ')
e375fe33 408 continue;
12902a5c 409 p++;
e375fe33 410
411 if (f & f_escape) {
412 DRESET(&dd);
413 getstring(0, p, &dd, 0);
414 p = dd.buf;
415 }
416
417 if (fhash(p, ff, gch, buf + gch->hashsz)) {
418 moan("couldn't read `%s': %s", p, strerror(errno));
419 rc = EXIT_FAILURE;
420 continue;
421 }
422 if (memcmp(buf, buf + gch->hashsz, gch->hashsz) != 0) {
423 if (ff & f_verbose)
424 fprintf(stderr, "FAIL %s\n", p);
425 else
426 moan("%s check failed for `%s'", gch->name, p);
427 nfail++;
428 rc = EXIT_FAILURE;
429 } else {
430 if (ff & f_verbose)
431 fprintf(stderr, "OK %s\n", p);
432 }
433 n++;
434 }
435
436 dstr_destroy(&d);
437 dstr_destroy(&dd);
438 xfree(buf);
439 if ((f & f_verbose) && nfail)
440 moan("%lu of %lu file(s) failed %s check", nfail, n, gch->name);
441 else if (!n)
442 moan("no files checked");
443 return (0);
444}
445
446static int dohash(const char *file, unsigned f, const gchash *gch)
447{
448 int rc = 0;
449 octet *p = xmalloc(gch->hashsz);
450
451 if (fhash(file, f, gch, p)) {
452 moan("couldn't read `%s': %s", file ? file : "<stdin>", strerror(errno));
453 rc = EXIT_FAILURE;
454 } else {
455 puthex(p, gch->hashsz, stdout);
456 if (file) {
457 fputc(' ', stdout);
458 fputc(f & f_binary ? '*' : ' ', stdout);
459 if (f & f_escape)
460 putstring(stdout, file, 0);
461 else
462 fputs(file, stdout);
463 }
464 fputc('\n', stdout);
465 }
466
467 xfree(p);
468 return (rc);
469}
470
12902a5c 471static int dofile(const char *file, unsigned f, const gchash *gch)
472{
473 return (f & f_check ? checkhash : dohash)(file, f, gch);
474}
475
e375fe33 476static int hashfiles(const char *file, unsigned f, const gchash *gch)
477{
478 FILE *fp;
479 dstr d = DSTR_INIT;
480 int rc = 0;
481 int rrc;
482
483 if (!file)
484 fp = stdin;
485 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
486 moan("couldn't open `%s': %s", file, strerror(errno));
487 return (EXIT_FAILURE);
488 }
489
490 for (;;) {
491 DRESET(&d);
492 if (getstring(fp, 0, &d, f & f_raw))
493 break;
12902a5c 494 if ((rrc = dofile(d.buf, f, gch)) != 0)
e375fe33 495 rc = rrc;
496 }
497
498 return (rc);
499}
500
501static int hashsum(const char *file, unsigned f, const gchash *gch)
502{
12902a5c 503 return (f & f_files ? hashfiles : dofile)(file, f, gch);
e375fe33 504}
505
506/*----- Main driver -------------------------------------------------------*/
507
508static void version(FILE *fp)
509{
510 pquis(fp, "$, Catacomb version " VERSION "\n");
511}
512
513static void usage(FILE *fp)
514{
5af0f9cb 515 pquis(fp, "Usage: $ [-f0ebcv] [-a algorithm] [files...]\n");
e375fe33 516}
517
518static void help(FILE *fp, const gchash *gch)
519{
520 version(fp);
521 fputc('\n', fp);
522 usage(fp);
523 pquis(fp, "\n\
524Generates or checks message digests on files. Options available:\n\
525\n\
526-h, --help Display this help message.\n\
527-V, --version Display program's version number.\n\
528-u, --usage Display a terse usage message.\n\
529\n\
530-a, --algorithm=ALG Use the message digest algorithm ALG.\n\
531\n\
532-f, --files Read a list of file names from standard input.\n\
533-0, --null File names are null terminated, not plain text.\n\
534\n\
535-e, --escape Escape funny characters in filenames.\n\
536-c, --check Check message digests rather than emitting them.\n\
537-b, --binary When reading files, treat them as binary.\n\
538-v, --verbose Be verbose when checking digests.\n\
539\n\
540For a list of supported message digest algorithms, type `$ --list'.\n\
541");
542 if (gch)
543 fprintf(fp, "The default message digest algorithm is %s.\n", gch->name);
544}
545
546int main(int argc, char *argv[])
547{
548 unsigned f = 0;
549 const gchash *gch = 0;
550 int rc;
551
552 /* --- Initialization --- */
553
554 ego(argv[0]);
555 sub_init();
556
557 /* --- Choose a hash function from the name --- */
558
559 {
560 char *q = xstrdup(QUIS);
561 size_t len = strlen(q);
562 if (len > 3 && strcmp(q + len - 3, "sum") == 0) {
563 q[len - 3] = 0;
564 gch = gethash(q);
565 }
566 if (!gch)
e9026a0a 567 gch = gethash("md5");
e375fe33 568 xfree(q);
569 }
570
571 /* --- Read options --- */
572
573 for (;;) {
574 static struct option opts[] = {
575 { "help", 0, 0, 'h' },
576 { "verbose", 0, 0, 'V' },
577 { "usage", 0, 0, 'u' },
578
579 { "algorithm", OPTF_ARGREQ, 0, 'a' },
580 { "hash", OPTF_ARGREQ, 0, 'a' },
581 { "list", 0, 0, 'l' },
582
583 { "files", 0, 0, 'f' },
584 { "find", 0, 0, 'f' },
585 { "null", 0, 0, '0' },
586
587 { "escape", 0, 0, 'e' },
588 { "check", 0, 0, 'c' },
589 { "binary", 0, 0, 'b' },
590 { "verbose", 0, 0, 'v' },
591
592 { 0, 0, 0, 0 }
593 };
594 int i = mdwopt(argc, argv, "hVu a:l f0 ecbv", opts, 0, 0, 0);
595 if (i < 0)
596 break;
597
598 switch (i) {
599 case 'h':
600 help(stdout, gch);
601 exit(0);
602 case 'V':
603 version(stdout);
604 exit(0);
605 case 'u':
606 usage(stdout);
607 exit(0);
608 case 'a':
609 if ((gch = gethash(optarg)) == 0)
610 die(EXIT_FAILURE, "unknown hash algorithm `%s'", optarg);
611 f |= f_oddhash;
612 break;
613 case 'l': {
614 unsigned j;
e9026a0a 615 for (j = 0; ghashtab[j]; j++) {
e375fe33 616 if (j)
617 fputc(' ', stdout);
e9026a0a 618 printf("%s", ghashtab[j]->name);
e375fe33 619 }
620 fputc('\n', stdout);
621 exit(0);
622 } break;
623 case 'f':
624 f |= f_files;
625 break;
626 case '0':
627 f |= f_raw;
628 break;
629 case 'e':
630 f |= f_escape;
631 break;
632 case 'c':
633 f |= f_check;
634 break;
635 case 'b':
636 f |= f_binary;
637 break;
638 case 'v':
639 f |= f_verbose;
640 break;
641 default:
642 f |= f_bogus;
643 break;
644 }
645 }
646
647 if (f & f_bogus) {
648 usage(stderr);
649 exit(EXIT_FAILURE);
650 }
651 argv += optind;
652 argc -= optind;
653
654 /* --- Generate output --- */
655
656 if (!(f & f_check)) {
657 if (f & f_oddhash)
658 printf("#hash %s\n", gch->name);
659 if (f & f_escape)
660 fputs("#escape\n", stdout);
661 }
662
663 if (argc) {
664 int i;
665 int rrc;
666 rc = 0;
667 for (i = 0; i < argc; i++) {
668 if ((rrc = hashsum(argv[i], f, gch)) != 0)
669 rc = rrc;
670 }
671 } else
672 rc = hashsum(0, f, gch);
673
674 return (rc);
675}
676
677/*----- That's all, folks -------------------------------------------------*/