5 * Hash files using some secure hash function
7 * (c) 2000 Straylight/Edgeware
10 /*----- Licensing notice --------------------------------------------------*
12 * This file is part of Catacomb.
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
30 /*----- Header files ------------------------------------------------------*/
32 #define _FILE_OFFSET_BITS 64
43 #include <mLib/alloc.h>
44 #include <mLib/dstr.h>
45 #include <mLib/mdwopt.h>
46 #include <mLib/quis.h>
47 #include <mLib/report.h>
52 #include <mLib/base32.h>
53 #include <mLib/base64.h>
59 # if defined(__riscos)
61 # elif defined(__unix) || defined(unix)
68 /*----- Static variables --------------------------------------------------*/
79 #define f_progress 512u
81 /*----- Encoding and decoding ---------------------------------------------*/
83 /* --- Hex encoding --- */
85 static void puthex(const octet
*buf
, size_t sz
, FILE *fp
)
88 fprintf(fp
, "%02x", *buf
++);
93 static size_t gethex(const char *p
, octet
*q
, size_t sz
, char **pp
)
97 isxdigit((unsigned char)p
[0]) &&
98 isxdigit((unsigned char)p
[1])) {
103 *q
++ = strtoul(buf
, 0, 16);
113 /* --- Base64 encoding --- */
115 static void putb64(const octet
*buf
, size_t sz
, FILE *fp
)
123 base64_encode(&b
, buf
, sz
, &d
);
124 base64_encode(&b
, 0, 0, &d
);
129 static size_t getb64(const char *p
, octet
*q
, size_t sz
, char **pp
)
133 size_t n
= strlen(p
);
136 base64_decode(&b
, p
, n
, &d
);
137 if (pp
) *pp
= (/*unconst*/ char *)p
+ n
;
138 base64_decode(&b
, 0, 0, &d
);
140 memcpy(q
, d
.buf
, sz
);
146 /* --- Base32 encoding --- */
148 static void putb32(const octet
*buf
, size_t sz
, FILE *fp
)
156 base32_encode(&b
, buf
, sz
, &d
);
157 base32_encode(&b
, 0, 0, &d
);
162 static size_t getb32(const char *p
, octet
*q
, size_t sz
, char **pp
)
166 size_t n
= strlen(p
);
169 base32_decode(&b
, p
, n
, &d
);
170 if (pp
) *pp
= (/*unconst*/ char *)p
+ n
;
171 base32_decode(&b
, 0, 0, &d
);
173 memcpy(q
, d
.buf
, sz
);
181 typedef struct encodeops
{
183 void (*put
)(const octet
*, size_t, FILE *);
184 size_t (*get
)(const char *, octet
*, size_t, char **);
187 static const encodeops encodingtab
[] = {
188 { "hex", puthex
, gethex
},
189 { "base64", putb64
, getb64
},
190 { "base32", putb32
, getb32
},
194 static const encodeops
*getencoding(const char *ename
)
198 for (e
= encodingtab
; e
->name
; e
++) {
199 if (strcmp(ename
, e
->name
) == 0)
205 /*----- Support functions -------------------------------------------------*/
209 * Arguments: @const char *file@ = file name to be hashed (null for stdin)
210 * @unsigned f@ = flags to set
211 * @const gchash *gch@ = pointer to hash function to use
212 * @void *buf@ = pointer to hash output buffer
214 * Returns: Zero if it worked, nonzero on error.
216 * Use: Hashes a file.
224 static void prhuman_time(FILE *fp
, unsigned long n
)
226 const static struct unit utime
[] = {
227 { "s", 60 }, { "m", 60 }, { "h", 24 }, { "d", 0 }
231 const struct unit
*u
= utime
;
233 while (u
[1].m
&& n
> u
[0].m
*u
[1].m
) { n
/= u
->m
; u
++; }
234 m
= n
/ u
[1].m
; n
%= u
[0].m
;
235 if (m
) fprintf(fp
, "%3lu%s%02lu%s", m
, u
[1].name
, n
, u
[0].name
);
236 else fprintf(fp
, " %2lu%s", n
, u
[0].name
);
239 static void prhuman_data(FILE *fp
, off_t n
)
241 const static struct unit udata
[] = {
242 { " ", 1024 }, { "k", 1024 }, { "M", 1024 }, { "G", 1024 },
243 { "T", 1024 }, { "P", 1024 }, { "E", 1024 }, { "Z", 1024 },
248 const struct unit
*u
= udata
;
250 while (u
->m
&& x
>= u
->m
) { x
/= u
->m
; u
++; }
251 fprintf(fp
, "%6.1f%s", x
, u
->name
);
254 static int fhash(const char *file
, unsigned f
, const gchash
*gch
, void *buf
)
257 char fbuf
[1024 * 128];
264 static char baton
[] = "-\\|/";
266 time_t now
, last
, start
;
269 if (!file
|| strcmp(file
, "-") == 0)
271 else if ((fp
= fopen(file
, f
& f_binary ?
"rb" : "r")) == 0)
274 if (f
& f_progress
) {
275 if ((fo
= ftello(fp
)) >= 0 &&
276 fseeko(fp
, 0, SEEK_END
) >= 0 &&
278 fseeko(fp
, fo
, SEEK_SET
) < 0))
280 if (fo
!= -1 && fsz
!= -1) fsz
-= fo
;
285 else if ((p
= strchr(file
+ sz
- 20, PATHSEP
)) != 0) {
286 dstr_puts(&d
, "..."); dstr_puts(&d
, p
);
288 p
= strrchr(file
, PATHSEP
);
289 if (!p
) dstr_putf(&d
, "%.20s...", file
);
290 else dstr_putf(&d
, "...%.17s...", p
);
292 start
= last
= time(0);
296 while ((sz
= fread(fbuf
, 1, sizeof(fbuf
), fp
)) > 0) {
297 GH_HASH(h
, fbuf
, sz
);
298 if (f
& f_progress
) {
301 if (difftime(now
, last
) < 1) continue;
303 fprintf(stderr
, "\r%-24s", d
.buf
);
304 fprintf(stderr
, "%c ", *bp
++); if (!*bp
) bp
= baton
;
305 prhuman_data(stderr
, fo
);
308 prhuman_data(stderr
, fsz
);
310 pc
= (fo
*16 + fsz
/2)/fsz
;
311 for (i
= 0; i
< pc
; i
++) fputc('.', stderr
);
312 for (; i
< 16; i
++) fputc(' ', stderr
);
313 fprintf(stderr
, "] %3d%%", (int)((fo
*100 + 50)/fsz
));
314 fprintf(stderr
, " ETA ");
315 prhuman_time(stderr
, difftime(now
, start
)*(fsz
- fo
)/fo
);
321 if (f
& f_progress
) fprintf(stderr
, "\r%78s\r", "");
328 /* --- @gethash@ --- *
330 * Arguments: @const char *name@ = pointer to name string
332 * Returns: Pointer to appropriate hash class.
334 * Use: Chooses a hash function by name.
337 static const gchash
*gethash(const char *name
)
339 const gchash
*const *g
, *gg
= 0;
340 size_t sz
= strlen(name
);
341 for (g
= ghashtab
; *g
; g
++) {
342 if (strncmp(name
, (*g
)->name
, sz
) == 0) {
343 if ((*g
)->name
[sz
] == 0) {
355 /* --- @getstring@ --- *
357 * Arguments: @FILE *fp@ = stream from which to read
358 * @const char *p@ = string to read from instead
359 * @dstr *d@ = destination string
360 * @unsigned raw@ = raw or cooked read
362 * Returns: Zero if OK, nonzero on end-of-file.
364 * Use: Reads a filename (or something similar) from a stream.
367 static int getstring(FILE *fp
, const char *p
, dstr
*d
, unsigned raw
)
372 /* --- Raw: just read exactly what's written up to a null byte --- */
374 #define NEXTCH (fp ? getc(fp) : (unsigned char)*p++)
375 #define EOFCH (fp ? EOF : 0)
378 if ((ch
= NEXTCH
) == EOFCH
)
384 if ((ch
= NEXTCH
) == EOFCH
)
391 /* --- Skip as far as whitespace --- *
393 * Also skip past comments.
401 do ch
= NEXTCH
; while (ch
!= '\n' && ch
!= EOFCH
);
407 /* --- If the character is a quote then read a quoted string --- */
419 /* --- Now read all sorts of interesting things --- */
423 /* --- Handle an escaped thing --- */
430 case 'a': ch
= '\a'; break;
431 case 'b': ch
= '\b'; break;
432 case 'f': ch
= '\f'; break;
433 case 'n': ch
= '\n'; break;
434 case 'r': ch
= '\r'; break;
435 case 't': ch
= '\t'; break;
436 case 'v': ch
= '\v'; break;
443 /* --- If it's a quote or some other end marker then stop --- */
447 if (!q
&& isspace(ch
))
450 /* --- Otherwise contribute and continue --- */
453 if ((ch
= NEXTCH
) == EOFCH
)
466 /* --- @putstring@ --- *
468 * Arguments: @FILE *fp@ = stream to write on
469 * @const char *p@ = pointer to text
470 * @unsigned raw@ = whether the string is to be written raw
474 * Use: Emits a string to a stream.
477 static void putstring(FILE *fp
, const char *p
, unsigned raw
)
479 size_t sz
= strlen(p
);
483 /* --- Just write the string null terminated if raw --- */
486 fwrite(p
, 1, sz
+ 1, fp
);
490 /* --- Check for any dodgy characters --- */
493 for (q
= p
; *q
; q
++) {
494 if (isspace((unsigned char)*q
)) {
503 /* --- Emit the string --- */
505 for (q
= p
; *q
; q
++) {
507 case '\a': fputc('\\', fp
); fputc('a', fp
); break;
508 case '\b': fputc('\\', fp
); fputc('b', fp
); break;
509 case '\f': fputc('\\', fp
); fputc('f', fp
); break;
510 case '\n': fputc('\\', fp
); fputc('n', fp
); break;
511 case '\r': fputc('\\', fp
); fputc('r', fp
); break;
512 case '\t': fputc('\\', fp
); fputc('t', fp
); break;
513 case '\v': fputc('\\', fp
); fputc('v', fp
); break;
514 case '`': fputc('\\', fp
); fputc('`', fp
); break;
515 case '\'': fputc('\\', fp
); fputc('\'', fp
); break;
516 case '\"': fputc('\\', fp
); fputc('\"', fp
); break;
517 case '#': fputc('\\', fp
); fputc('#', fp
); break;
530 /*----- Guts --------------------------------------------------------------*/
532 static int checkhash(const char *file
, unsigned f
,
533 const gchash
*gch
, const encodeops
*e
)
539 unsigned long n
= 0, nfail
= 0;
540 octet
*buf
= xmalloc(2 * gch
->hashsz
);
542 if (!file
|| strcmp(file
, "-") == 0)
544 else if ((fp
= fopen(file
, f
& f_raw ?
"r" : "rb")) == 0) {
545 moan("couldn't open `%s': %s", file
, strerror(errno
));
546 return (EXIT_FAILURE
);
549 while (DRESET(&d
), dstr_putline(&d
, fp
) != EOF
) {
554 /* --- Handle a directive --- */
558 if ((q
= str_getword(&p
)) == 0)
560 if (strcmp(q
, "hash") == 0) {
562 if ((q
= str_getword(&p
)) == 0)
564 if ((g
= gethash(q
)) == 0)
568 buf
= xmalloc(2 * gch
->hashsz
);
569 } else if (strcmp(q
, "encoding") == 0) {
571 if ((q
= str_getword(&p
)) == 0)
573 if ((ee
= getencoding(q
)) == 0)
576 } else if (strcmp(q
, "escape") == 0)
581 /* --- Otherwise it's a hex thing --- */
584 while (*p
&& *p
!= ' ')
589 if (e
->get(q
, buf
, gch
->hashsz
, 0) < gch
->hashsz
)
599 getstring(0, p
, &dd
, 0);
603 if (fhash(p
, ff
, gch
, buf
+ gch
->hashsz
)) {
604 moan("couldn't read `%s': %s", p
, strerror(errno
));
608 if (memcmp(buf
, buf
+ gch
->hashsz
, gch
->hashsz
) != 0) {
610 fprintf(stderr
, "FAIL %s\n", p
);
612 moan("%s check failed for `%s'", gch
->name
, p
);
617 fprintf(stderr
, "OK %s\n", p
);
625 if ((f
& f_verbose
) && nfail
)
626 moan("%lu of %lu file(s) failed %s check", nfail
, n
, gch
->name
);
628 moan("no files checked");
632 static int dohash(const char *file
, unsigned f
,
633 const gchash
*gch
, const encodeops
*e
)
636 octet
*p
= xmalloc(gch
->hashsz
);
638 if (fhash(file
, f
, gch
, p
)) {
639 moan("couldn't read `%s': %s", file ? file
: "<stdin>", strerror(errno
));
642 e
->put(p
, gch
->hashsz
, stdout
);
645 fputc(f
& f_binary ?
'*' : ' ', stdout
);
647 putstring(stdout
, file
, 0);
658 static int dofile(const char *file
, unsigned f
,
659 const gchash
*gch
, const encodeops
*e
)
661 return (f
& f_check ? checkhash
: dohash
)(file
, f
, gch
, e
);
664 static int hashfiles(const char *file
, unsigned f
,
665 const gchash
*gch
, const encodeops
*e
)
672 if (!file
|| strcmp(file
, "-") == 0)
674 else if ((fp
= fopen(file
, f
& f_raw ?
"r" : "rb")) == 0) {
675 moan("couldn't open `%s': %s", file
, strerror(errno
));
676 return (EXIT_FAILURE
);
681 if (getstring(fp
, 0, &d
, f
& f_raw
))
683 if ((rrc
= dofile(d
.buf
, f
, gch
, e
)) != 0)
690 static int hashsum(const char *file
, unsigned f
,
691 const gchash
*gch
, const encodeops
*e
)
693 return (f
& f_files ? hashfiles
: dofile
)(file
, f
, gch
, e
);
696 /*----- Main driver -------------------------------------------------------*/
698 void version(FILE *fp
)
700 pquis(fp
, "$, Catacomb version " VERSION
"\n");
703 static void usage(FILE *fp
)
705 pquis(fp
, "Usage: $ [-f0ebcv] [-a ALGORITHM] [-E ENC] [FILES...]\n");
708 static void help(FILE *fp
, const gchash
*gch
)
714 Generates or checks message digests on files. Options available:\n\
716 -h, --help Display this help message.\n\
717 -V, --version Display program's version number.\n\
718 -u, --usage Display a terse usage message.\n\
719 -l, --list [ITEM...] Show known hash functions and/or encodings.\n\
721 -a, --algorithm=ALG Use the message digest algorithm ALG.\n\
722 -E, --encoding=ENC Represent hashes using encoding ENC.\n\
724 -f, --files Read a list of file names from standard input.\n\
725 -0, --null File names are null terminated, not plain text.\n\
727 -e, --escape Escape funny characters in filenames.\n\
728 -c, --check Check message digests rather than emitting them.\n\
729 -b, --binary When reading files, treat them as binary.\n\
730 -v, --verbose Be verbose when checking digests.\n\
732 For a list of hashing algorithms and encodings, type `$ --list'.\n\
735 fprintf(fp
, "The default message digest algorithm is %s.\n", gch
->name
);
739 LI("Lists", list, listtab[i].name, listtab[i].name) \
740 LI("Hash functions", hash, ghashtab[i], ghashtab[i]->name) \
741 LI("Encodings", enc, encodingtab[i].name, encodingtab[i].name)
743 MAKELISTTAB(listtab
, LISTS
)
745 int main(int argc
, char *argv
[])
748 const gchash
*gch
= 0;
749 const encodeops
*e
= &encodingtab
[0];
752 /* --- Initialization --- */
757 /* --- Choose a hash function from the name --- */
760 char *q
= xstrdup(QUIS
);
761 size_t len
= strlen(q
);
762 if (len
> 3 && strcmp(q
+ len
- 3, "sum") == 0) {
767 gch
= gethash("md5");
771 /* --- Read options --- */
774 static struct option opts
[] = {
775 { "help", 0, 0, 'h' },
776 { "verbose", 0, 0, 'V' },
777 { "usage", 0, 0, 'u' },
779 { "algorithm", OPTF_ARGREQ
, 0, 'a' },
780 { "hash", OPTF_ARGREQ
, 0, 'a' },
781 { "encoding", OPTF_ARGREQ
, 0, 'E' },
782 { "list", 0, 0, 'l' },
784 { "files", 0, 0, 'f' },
785 { "find", 0, 0, 'f' },
786 { "null", 0, 0, '0' },
788 { "escape", 0, 0, 'e' },
789 { "check", 0, 0, 'c' },
790 { "binary", 0, 0, 'b' },
791 { "verbose", 0, 0, 'v' },
792 { "progress", 0, 0, 'p' },
796 int i
= mdwopt(argc
, argv
, "hVu a:E:l f0 ecbvp", opts
, 0, 0, 0);
811 exit(displaylists(listtab
, argv
+ optind
));
813 if ((gch
= gethash(optarg
)) == 0)
814 die(EXIT_FAILURE
, "unknown hash algorithm `%s'", optarg
);
818 if ((e
= getencoding(optarg
)) == 0)
819 die(EXIT_FAILURE
, "unknown encoding `%s'", optarg
);
856 /* --- Generate output --- */
858 if (!(f
& f_check
) && (argc
|| (f
& f_files
))) {
859 if (f
& f_oddhash
) printf("#hash %s\n", gch
->name
);
860 if (f
& f_oddenc
) printf("#encoding %s\n", e
->name
);
861 if (f
& f_escape
) fputs("#escape\n", stdout
);
864 rc
= hashsum(0, f
, gch
, e
);
870 for (i
= 0; i
< argc
; i
++) {
871 if ((rrc
= hashsum(argv
[i
], f
, gch
, e
)) != 0)
879 /*----- That's all, folks -------------------------------------------------*/