Merge branch 'master' of git+ssh://metalzone.distorted.org.uk/~mdw/public-git/catacomb/
[u/mdw/catacomb] / hashsum.c
CommitLineData
e375fe33 1/* -*-c-*-
2 *
5685a696 3 * $Id$
e375fe33 4 *
5 * Hash files using some secure hash function
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
e375fe33 30/*----- Header files ------------------------------------------------------*/
31
32#include "config.h"
33
5685a696 34#include <assert.h>
e375fe33 35#include <ctype.h>
36#include <errno.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40
41#include <mLib/alloc.h>
42#include <mLib/dstr.h>
43#include <mLib/mdwopt.h>
44#include <mLib/quis.h>
45#include <mLib/report.h>
46#include <mLib/sub.h>
47#include <mLib/str.h>
48
5685a696 49#include <mLib/hex.h>
50#include <mLib/base32.h>
51#include <mLib/base64.h>
52
e375fe33 53#include "ghash.h"
c65df279 54#include "cc.h"
e375fe33 55
e375fe33 56/*----- Static variables --------------------------------------------------*/
57
16efd15b 58#define f_binary 1u
59#define f_bogus 2u
60#define f_verbose 4u
61#define f_check 8u
62#define f_files 16u
63#define f_raw 32u
64#define f_oddhash 64u
65#define f_escape 128u
5685a696 66#define f_oddenc 256u
67
68/*----- Encoding and decoding ---------------------------------------------*/
69
70/* --- Hex encoding --- */
71
72static void puthex(const octet *buf, size_t sz, FILE *fp)
73{
74 while (sz) {
75 fprintf(fp, "%02x", *buf++);
76 sz--;
77 }
78}
79
80static size_t gethex(const char *p, octet *q, size_t sz, char **pp)
81{
82 size_t i = 0;
83 while (sz > 0 &&
84 isxdigit((unsigned char)p[0]) &&
85 isxdigit((unsigned char)p[1])) {
86 char buf[3];
87 buf[0] = p[0];
88 buf[1] = p[1];
89 buf[2] = 0;
90 *q++ = strtoul(buf, 0, 16);
91 sz--;
92 p += 2;
93 i++;
94 }
95 if (pp)
96 *pp = (char *)p;
97 return (i);
98}
99
100/* --- Base64 encoding --- */
101
102static void putb64(const octet *buf, size_t sz, FILE *fp)
103{
104 base64_ctx b;
105 dstr d = DSTR_INIT;
106
107 base64_init(&b);
108 b.indent = "";
109 b.maxline = 0;
110 base64_encode(&b, buf, sz, &d);
111 base64_encode(&b, 0, 0, &d);
112 dstr_write(&d, fp);
113 dstr_destroy(&d);
114}
115
116static size_t getb64(const char *p, octet *q, size_t sz, char **pp)
117{
118 base64_ctx b;
119 dstr d = DSTR_INIT;
120 size_t n = strlen(p);
121
122 base64_init(&b);
123 base64_decode(&b, p, n, &d);
124 if (pp) *pp = (/*unconst*/ char *)p + n;
125 base64_decode(&b, 0, 0, &d);
126 assert(d.len <= sz);
127 memcpy(q, d.buf, sz);
128 n = d.len;
129 dstr_destroy(&d);
130 return (n);
131}
132
133/* --- Base32 encoding --- */
134
135static void putb32(const octet *buf, size_t sz, FILE *fp)
136{
137 base32_ctx b;
138 dstr d = DSTR_INIT;
139
140 base32_init(&b);
141 b.indent = "";
142 b.maxline = 0;
143 base32_encode(&b, buf, sz, &d);
144 base32_encode(&b, 0, 0, &d);
145 dstr_write(&d, fp);
146 dstr_destroy(&d);
147}
148
149static size_t getb32(const char *p, octet *q, size_t sz, char **pp)
150{
151 base32_ctx b;
152 dstr d = DSTR_INIT;
153 size_t n = strlen(p);
154
155 base32_init(&b);
156 base32_decode(&b, p, n, &d);
157 if (pp) *pp = (/*unconst*/ char *)p + n;
158 base32_decode(&b, 0, 0, &d);
159 assert(d.len <= sz);
160 memcpy(q, d.buf, sz);
161 n = d.len;
162 dstr_destroy(&d);
163 return (n);
164}
165
166/* --- Table --- */
167
c65df279 168typedef struct encodeops {
5685a696 169 const char *name;
170 void (*put)(const octet *, size_t, FILE *);
171 size_t (*get)(const char *, octet *, size_t, char **);
c65df279 172} encodeops;
5685a696 173
c65df279 174static const encodeops encodingtab[] = {
5685a696 175 { "hex", puthex, gethex },
176 { "base64", putb64, getb64 },
177 { "base32", putb32, getb32 },
178 { 0, 0, 0 }
179};
180
c65df279 181static const encodeops *getencoding(const char *ename)
5685a696 182{
c65df279 183 const encodeops *e;
5685a696 184
c65df279 185 for (e = encodingtab; e->name; e++) {
5685a696 186 if (strcmp(ename, e->name) == 0)
187 return (e);
188 }
189 return (0);
190}
e375fe33 191
192/*----- Support functions -------------------------------------------------*/
193
194/* --- @fhash@ --- *
195 *
196 * Arguments: @const char *file@ = file name to be hashed (null for stdin)
197 * @unsigned f@ = flags to set
198 * @const gchash *gch@ = pointer to hash function to use
199 * @void *buf@ = pointer to hash output buffer
200 *
201 * Returns: Zero if it worked, nonzero on error.
202 *
203 * Use: Hashes a file.
204 */
205
206static int fhash(const char *file, unsigned f, const gchash *gch, void *buf)
207{
208 FILE *fp;
209 char fbuf[BUFSIZ];
210 size_t sz;
211 ghash *h;
212 int e;
213
d7e6bc66 214 if (!file || strcmp(file, "-") == 0)
e375fe33 215 fp = stdin;
216 else if ((fp = fopen(file, f & f_binary ? "rb" : "r")) == 0)
217 return (-1);
218
b817bfc6 219 h = GH_INIT(gch);
e375fe33 220 while ((sz = fread(fbuf, 1, sizeof(fbuf), fp)) > 0)
b817bfc6 221 GH_HASH(h, fbuf, sz);
222 GH_DONE(h, buf);
223 GH_DESTROY(h);
e375fe33 224 e = ferror(fp);
225 if (file)
226 fclose(fp);
227 return (e ? -1 : 0);
228}
229
e375fe33 230/* --- @gethash@ --- *
231 *
232 * Arguments: @const char *name@ = pointer to name string
233 *
234 * Returns: Pointer to appropriate hash class.
235 *
236 * Use: Chooses a hash function by name.
237 */
238
239static const gchash *gethash(const char *name)
240{
e9026a0a 241 const gchash *const *g, *gg = 0;
e375fe33 242 size_t sz = strlen(name);
e9026a0a 243 for (g = ghashtab; *g; g++) {
e375fe33 244 if (strncmp(name, (*g)->name, sz) == 0) {
245 if ((*g)->name[sz] == 0) {
246 gg = *g;
247 break;
248 } else if (gg)
249 return (0);
250 else
251 gg = *g;
252 }
253 }
254 return (gg);
255}
256
257/* --- @getstring@ --- *
258 *
259 * Arguments: @FILE *fp@ = stream from which to read
260 * @const char *p@ = string to read from instead
261 * @dstr *d@ = destination string
262 * @unsigned raw@ = raw or cooked read
263 *
264 * Returns: Zero if OK, nonzero on end-of-file.
265 *
266 * Use: Reads a filename (or something similar) from a stream.
267 */
268
269static int getstring(FILE *fp, const char *p, dstr *d, unsigned raw)
270{
271 int ch;
272 int q = 0;
273
274 /* --- Raw: just read exactly what's written up to a null byte --- */
275
d470270a 276#define NEXTCH (fp ? getc(fp) : (unsigned char)*p++)
e375fe33 277#define EOFCH (fp ? EOF : 0)
278
279 if (raw) {
280 if ((ch = NEXTCH) == EOFCH)
281 return (EOF);
282 for (;;) {
283 if (!ch)
284 break;
285 DPUTC(d, ch);
286 if ((ch = NEXTCH) == EOFCH)
287 break;
288 }
289 DPUTZ(d);
290 return (0);
291 }
292
293 /* --- Skip as far as whitespace --- *
294 *
295 * Also skip past comments.
296 */
297
298again:
299 ch = NEXTCH;
d470270a 300 while (isspace(ch))
e375fe33 301 ch = NEXTCH;
302 if (ch == '#') {
303 do ch = NEXTCH; while (ch != '\n' && ch != EOFCH);
304 goto again;
305 }
306 if (ch == EOFCH)
307 return (EOF);
308
309 /* --- If the character is a quote then read a quoted string --- */
310
311 switch (ch) {
312 case '`':
313 ch = '\'';
314 case '\'':
315 case '\"':
316 q = ch;
317 ch = NEXTCH;
318 break;
319 }
320
321 /* --- Now read all sorts of interesting things --- */
322
323 for (;;) {
324
325 /* --- Handle an escaped thing --- */
326
327 if (ch == '\\') {
328 ch = NEXTCH;
329 if (ch == EOFCH)
330 break;
331 switch (ch) {
332 case 'a': ch = '\a'; break;
333 case 'b': ch = '\b'; break;
334 case 'f': ch = '\f'; break;
335 case 'n': ch = '\n'; break;
336 case 'r': ch = '\r'; break;
337 case 't': ch = '\t'; break;
338 case 'v': ch = '\v'; break;
339 }
340 DPUTC(d, ch);
341 ch = NEXTCH;
342 continue;
343 }
344
345 /* --- If it's a quote or some other end marker then stop --- */
346
347 if (ch == q)
348 break;
d470270a 349 if (!q && isspace(ch))
e375fe33 350 break;
351
352 /* --- Otherwise contribute and continue --- */
353
354 DPUTC(d, ch);
355 if ((ch = NEXTCH) == EOFCH)
356 break;
357 }
358
359 /* --- Done --- */
360
361 DPUTZ(d);
362 return (0);
363
364#undef NEXTCH
365#undef EOFCH
366}
367
368/* --- @putstring@ --- *
369 *
370 * Arguments: @FILE *fp@ = stream to write on
371 * @const char *p@ = pointer to text
372 * @unsigned raw@ = whether the string is to be written raw
373 *
374 * Returns: ---
375 *
376 * Use: Emits a string to a stream.
377 */
378
379static void putstring(FILE *fp, const char *p, unsigned raw)
380{
381 size_t sz = strlen(p);
382 unsigned qq;
383 const char *q;
384
385 /* --- Just write the string null terminated if raw --- */
386
387 if (raw) {
388 fwrite(p, 1, sz + 1, fp);
389 return;
390 }
391
392 /* --- Check for any dodgy characters --- */
393
394 qq = 0;
395 for (q = p; *q; q++) {
396 if (isspace((unsigned char)*q)) {
397 qq = '\"';
398 break;
399 }
400 }
401
402 if (qq)
403 putc(qq, fp);
404
405 /* --- Emit the string --- */
406
407 for (q = p; *q; q++) {
408 switch (*q) {
409 case '\a': fputc('\\', fp); fputc('a', fp); break;
410 case '\b': fputc('\\', fp); fputc('b', fp); break;
411 case '\f': fputc('\\', fp); fputc('f', fp); break;
412 case '\n': fputc('\\', fp); fputc('n', fp); break;
413 case '\r': fputc('\\', fp); fputc('r', fp); break;
414 case '\t': fputc('\\', fp); fputc('t', fp); break;
415 case '\v': fputc('\\', fp); fputc('v', fp); break;
416 case '`': fputc('\\', fp); fputc('`', fp); break;
417 case '\'': fputc('\\', fp); fputc('\'', fp); break;
418 case '\"': fputc('\\', fp); fputc('\"', fp); break;
419 case '#': fputc('\\', fp); fputc('#', fp); break;
420 default:
421 putc(*q, fp);
422 break;
423 }
424 }
425
426 /* --- Done --- */
427
428 if (qq)
429 putc(qq, fp);
430}
431
432/*----- Guts --------------------------------------------------------------*/
433
5685a696 434static int checkhash(const char *file, unsigned f,
c65df279 435 const gchash *gch, const encodeops *e)
e375fe33 436{
437 int rc;
438 FILE *fp;
439 dstr d = DSTR_INIT;
440 dstr dd = DSTR_INIT;
441 unsigned long n = 0, nfail = 0;
442 octet *buf = xmalloc(2 * gch->hashsz);
443
d7e6bc66 444 if (!file || strcmp(file, "-") == 0)
e375fe33 445 fp = stdin;
446 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
447 moan("couldn't open `%s': %s", file, strerror(errno));
448 return (EXIT_FAILURE);
449 }
450
451 while (DRESET(&d), dstr_putline(&d, fp) != EOF) {
452 char *p = d.buf;
453 char *q;
454 unsigned ff = f;
455
456 /* --- Handle a directive --- */
457
458 if (*p == '#') {
459 p++;
460 if ((q = str_getword(&p)) == 0)
461 continue;
462 if (strcmp(q, "hash") == 0) {
463 const gchash *g;
464 if ((q = str_getword(&p)) == 0)
465 continue;
466 if ((g = gethash(q)) == 0)
467 continue;
468 gch = g;
469 xfree(buf);
470 buf = xmalloc(2 * gch->hashsz);
5685a696 471 } else if (strcmp(q, "encoding") == 0) {
c65df279 472 const encodeops *ee;
5685a696 473 if ((q = str_getword(&p)) == 0)
474 continue;
c65df279 475 if ((ee = getencoding(q)) == 0)
5685a696 476 continue;
477 e = ee;
e375fe33 478 } else if (strcmp(q, "escape") == 0)
479 f |= f_escape;
480 continue;
481 }
482
483 /* --- Otherwise it's a hex thing --- */
484
12902a5c 485 q = p;
486 while (*p && *p != ' ')
487 p++;
488 if (!*p)
e375fe33 489 continue;
12902a5c 490 *p++ = 0;
5685a696 491 if (e->get(q, buf, gch->hashsz, 0) < gch->hashsz)
e375fe33 492 continue;
12902a5c 493 if (*p == '*')
e375fe33 494 ff |= f_binary;
12902a5c 495 else if (*p != ' ')
e375fe33 496 continue;
12902a5c 497 p++;
e375fe33 498
499 if (f & f_escape) {
500 DRESET(&dd);
501 getstring(0, p, &dd, 0);
502 p = dd.buf;
503 }
504
505 if (fhash(p, ff, gch, buf + gch->hashsz)) {
506 moan("couldn't read `%s': %s", p, strerror(errno));
507 rc = EXIT_FAILURE;
508 continue;
509 }
510 if (memcmp(buf, buf + gch->hashsz, gch->hashsz) != 0) {
511 if (ff & f_verbose)
512 fprintf(stderr, "FAIL %s\n", p);
513 else
514 moan("%s check failed for `%s'", gch->name, p);
515 nfail++;
516 rc = EXIT_FAILURE;
517 } else {
518 if (ff & f_verbose)
519 fprintf(stderr, "OK %s\n", p);
520 }
521 n++;
522 }
523
524 dstr_destroy(&d);
525 dstr_destroy(&dd);
526 xfree(buf);
527 if ((f & f_verbose) && nfail)
528 moan("%lu of %lu file(s) failed %s check", nfail, n, gch->name);
529 else if (!n)
530 moan("no files checked");
531 return (0);
532}
533
5685a696 534static int dohash(const char *file, unsigned f,
c65df279 535 const gchash *gch, const encodeops *e)
e375fe33 536{
537 int rc = 0;
538 octet *p = xmalloc(gch->hashsz);
539
540 if (fhash(file, f, gch, p)) {
541 moan("couldn't read `%s': %s", file ? file : "<stdin>", strerror(errno));
542 rc = EXIT_FAILURE;
543 } else {
5685a696 544 e->put(p, gch->hashsz, stdout);
e375fe33 545 if (file) {
546 fputc(' ', stdout);
547 fputc(f & f_binary ? '*' : ' ', stdout);
548 if (f & f_escape)
549 putstring(stdout, file, 0);
550 else
551 fputs(file, stdout);
552 }
553 fputc('\n', stdout);
554 }
555
556 xfree(p);
557 return (rc);
558}
559
5685a696 560static int dofile(const char *file, unsigned f,
c65df279 561 const gchash *gch, const encodeops *e)
12902a5c 562{
5685a696 563 return (f & f_check ? checkhash : dohash)(file, f, gch, e);
12902a5c 564}
565
5685a696 566static int hashfiles(const char *file, unsigned f,
c65df279 567 const gchash *gch, const encodeops *e)
e375fe33 568{
569 FILE *fp;
570 dstr d = DSTR_INIT;
571 int rc = 0;
572 int rrc;
573
d7e6bc66 574 if (!file || strcmp(file, "-") == 0)
e375fe33 575 fp = stdin;
576 else if ((fp = fopen(file, f & f_raw ? "r" : "rb")) == 0) {
577 moan("couldn't open `%s': %s", file, strerror(errno));
578 return (EXIT_FAILURE);
579 }
580
581 for (;;) {
582 DRESET(&d);
583 if (getstring(fp, 0, &d, f & f_raw))
584 break;
5685a696 585 if ((rrc = dofile(d.buf, f, gch, e)) != 0)
e375fe33 586 rc = rrc;
587 }
588
589 return (rc);
590}
591
5685a696 592static int hashsum(const char *file, unsigned f,
c65df279 593 const gchash *gch, const encodeops *e)
e375fe33 594{
5685a696 595 return (f & f_files ? hashfiles : dofile)(file, f, gch, e);
e375fe33 596}
597
598/*----- Main driver -------------------------------------------------------*/
599
c65df279 600void version(FILE *fp)
e375fe33 601{
602 pquis(fp, "$, Catacomb version " VERSION "\n");
603}
604
605static void usage(FILE *fp)
606{
c65df279 607 pquis(fp, "Usage: $ [-f0ebcv] [-a ALGORITHM] [-E ENC] [FILES...]\n");
e375fe33 608}
609
610static void help(FILE *fp, const gchash *gch)
611{
612 version(fp);
613 fputc('\n', fp);
614 usage(fp);
615 pquis(fp, "\n\
616Generates or checks message digests on files. Options available:\n\
617\n\
618-h, --help Display this help message.\n\
619-V, --version Display program's version number.\n\
620-u, --usage Display a terse usage message.\n\
c65df279 621-l, --list [ITEM...] Show known hash functions and/or encodings.\n\
e375fe33 622\n\
623-a, --algorithm=ALG Use the message digest algorithm ALG.\n\
92c494ce 624-E, --encoding=ENC Represent hashes using encoding ENC.\n\
e375fe33 625\n\
626-f, --files Read a list of file names from standard input.\n\
627-0, --null File names are null terminated, not plain text.\n\
628\n\
629-e, --escape Escape funny characters in filenames.\n\
630-c, --check Check message digests rather than emitting them.\n\
631-b, --binary When reading files, treat them as binary.\n\
632-v, --verbose Be verbose when checking digests.\n\
633\n\
92c494ce 634For a list of hashing algorithms and encodings, type `$ --list'.\n\
e375fe33 635");
636 if (gch)
637 fprintf(fp, "The default message digest algorithm is %s.\n", gch->name);
638}
639
c65df279 640#define LISTS(LI) \
641 LI("Lists", list, listtab[i].name, listtab[i].name) \
642 LI("Hash functions", hash, ghashtab[i], ghashtab[i]->name) \
643 LI("Encodings", enc, encodingtab[i].name, encodingtab[i].name)
644
645MAKELISTTAB(listtab, LISTS)
646
e375fe33 647int main(int argc, char *argv[])
648{
649 unsigned f = 0;
650 const gchash *gch = 0;
c65df279 651 const encodeops *e = &encodingtab[0];
e375fe33 652 int rc;
653
654 /* --- Initialization --- */
655
656 ego(argv[0]);
657 sub_init();
658
659 /* --- Choose a hash function from the name --- */
660
661 {
662 char *q = xstrdup(QUIS);
663 size_t len = strlen(q);
664 if (len > 3 && strcmp(q + len - 3, "sum") == 0) {
665 q[len - 3] = 0;
666 gch = gethash(q);
667 }
668 if (!gch)
e9026a0a 669 gch = gethash("md5");
e375fe33 670 xfree(q);
671 }
672
673 /* --- Read options --- */
674
675 for (;;) {
676 static struct option opts[] = {
677 { "help", 0, 0, 'h' },
678 { "verbose", 0, 0, 'V' },
679 { "usage", 0, 0, 'u' },
680
681 { "algorithm", OPTF_ARGREQ, 0, 'a' },
682 { "hash", OPTF_ARGREQ, 0, 'a' },
5685a696 683 { "encoding", OPTF_ARGREQ, 0, 'E' },
e375fe33 684 { "list", 0, 0, 'l' },
685
686 { "files", 0, 0, 'f' },
687 { "find", 0, 0, 'f' },
688 { "null", 0, 0, '0' },
689
690 { "escape", 0, 0, 'e' },
691 { "check", 0, 0, 'c' },
692 { "binary", 0, 0, 'b' },
693 { "verbose", 0, 0, 'v' },
694
695 { 0, 0, 0, 0 }
696 };
5685a696 697 int i = mdwopt(argc, argv, "hVu a:E:l f0 ecbv", opts, 0, 0, 0);
e375fe33 698 if (i < 0)
699 break;
700
701 switch (i) {
702 case 'h':
703 help(stdout, gch);
704 exit(0);
705 case 'V':
706 version(stdout);
707 exit(0);
708 case 'u':
709 usage(stdout);
710 exit(0);
c65df279 711 case 'l':
712 exit(displaylists(listtab, argv + optind));
e375fe33 713 case 'a':
714 if ((gch = gethash(optarg)) == 0)
715 die(EXIT_FAILURE, "unknown hash algorithm `%s'", optarg);
716 f |= f_oddhash;
717 break;
5685a696 718 case 'E':
c65df279 719 if ((e = getencoding(optarg)) == 0)
5685a696 720 die(EXIT_FAILURE, "unknown encoding `%s'", optarg);
721 f |= f_oddenc;
722 break;
e375fe33 723 case 'f':
724 f |= f_files;
725 break;
726 case '0':
727 f |= f_raw;
728 break;
729 case 'e':
730 f |= f_escape;
731 break;
732 case 'c':
733 f |= f_check;
734 break;
735 case 'b':
736 f |= f_binary;
737 break;
738 case 'v':
739 f |= f_verbose;
740 break;
741 default:
742 f |= f_bogus;
743 break;
744 }
745 }
746
747 if (f & f_bogus) {
748 usage(stderr);
749 exit(EXIT_FAILURE);
750 }
751 argv += optind;
752 argc -= optind;
753
754 /* --- Generate output --- */
755
92c494ce 756 if (!argc)
757 rc = hashsum(0, f, gch, e);
758 else {
e375fe33 759 int i;
760 int rrc;
92c494ce 761
e375fe33 762 rc = 0;
92c494ce 763 if (!(f & f_check)) {
764 if (f & f_oddhash) printf("#hash %s\n", gch->name);
765 if (f & f_oddenc) printf("#encoding %s\n", e->name);
766 if (f & f_escape) fputs("#escape\n", stdout);
767 }
e375fe33 768 for (i = 0; i < argc; i++) {
5685a696 769 if ((rrc = hashsum(argv[i], f, gch, e)) != 0)
e375fe33 770 rc = rrc;
771 }
92c494ce 772 }
e375fe33 773
774 return (rc);
775}
776
777/*----- That's all, folks -------------------------------------------------*/