hashsum.c: Document `--progress' in the `--help' display.
[u/mdw/catacomb] / cc-hash.c
CommitLineData
18b3351a
MW
1/* -*-c-*-
2 *
3 * Common functions for hashing utilities
4 *
5 * (c) 2011 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Catacomb.
11 *
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#define _FILE_OFFSET_BITS 64
31
32#include "config.h"
33
34#include <ctype.h>
f5e91c02 35#include <errno.h>
18b3351a
MW
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
f5e91c02
MW
40#include <sys/types.h>
41#include <dirent.h>
42
43#include <mLib/alloc.h>
18b3351a 44#include <mLib/dstr.h>
f5e91c02 45#include <mLib/report.h>
f377eee1 46#include <mLib/str.h>
18b3351a
MW
47
48#include <mLib/hex.h>
49#include <mLib/base32.h>
50#include <mLib/base64.h>
51
52#include "ghash.h"
53#include "cc.h"
54
55/*----- Encoding and decoding ---------------------------------------------*/
56
57/* --- Hex encoding --- */
58
59static void puthex(const octet *buf, size_t sz, FILE *fp)
60{
61 while (sz) {
62 fprintf(fp, "%02x", *buf++);
63 sz--;
64 }
65}
66
67static size_t gethex(const char *p, octet *q, size_t sz, char **pp)
68{
69 size_t i = 0;
70 while (sz > 0 &&
71 isxdigit((unsigned char)p[0]) &&
72 isxdigit((unsigned char)p[1])) {
73 char buf[3];
74 buf[0] = p[0];
75 buf[1] = p[1];
76 buf[2] = 0;
77 *q++ = strtoul(buf, 0, 16);
78 sz--;
79 p += 2;
80 i++;
81 }
82 if (pp)
83 *pp = (char *)p;
84 return (i);
85}
86
87/* --- Base64 encoding --- */
88
89static void putbase64(const octet *buf, size_t sz, FILE *fp)
90{
91 base64_ctx b;
92 dstr d = DSTR_INIT;
93
94 base64_init(&b);
95 b.indent = "";
96 b.maxline = 0;
97 base64_encode(&b, buf, sz, &d);
98 base64_encode(&b, 0, 0, &d);
99 dstr_write(&d, fp);
100 dstr_destroy(&d);
101}
102
103static size_t getbase64(const char *p, octet *q, size_t sz, char **pp)
104{
105 base64_ctx b;
106 dstr d = DSTR_INIT;
107 size_t n = strlen(p);
108
109 base64_init(&b);
110 base64_decode(&b, p, n, &d);
111 if (pp) *pp = (/*unconst*/ char *)p + n;
112 base64_decode(&b, 0, 0, &d);
113 assert(d.len <= sz);
114 memcpy(q, d.buf, sz);
115 n = d.len;
116 dstr_destroy(&d);
117 return (n);
118}
119
120/* --- Base32 encoding --- */
121
122static void putbase32(const octet *buf, size_t sz, FILE *fp)
123{
124 base32_ctx b;
125 dstr d = DSTR_INIT;
126
127 base32_init(&b);
128 b.indent = "";
129 b.maxline = 0;
130 base32_encode(&b, buf, sz, &d);
131 base32_encode(&b, 0, 0, &d);
132 dstr_write(&d, fp);
133 dstr_destroy(&d);
134}
135
136static size_t getbase32(const char *p, octet *q, size_t sz, char **pp)
137{
138 base32_ctx b;
139 dstr d = DSTR_INIT;
140 size_t n = strlen(p);
141
142 base32_init(&b);
143 base32_decode(&b, p, n, &d);
144 if (pp) *pp = (/*unconst*/ char *)p + n;
145 base32_decode(&b, 0, 0, &d);
146 assert(d.len <= sz);
147 memcpy(q, d.buf, sz);
148 n = d.len;
149 dstr_destroy(&d);
150 return (n);
151}
152
153/* --- Table --- */
154
155const encodeops encodingtab[] = {
156#define TAB(tag, name) { #name, put##name, get##name },
157 ENCODINGS(TAB)
158#undef TAB
159 { 0, 0, 0 }
160};
161
162const encodeops *getencoding(const char *ename)
163{
164 const encodeops *e;
165
166 for (e = encodingtab; e->name; e++) {
167 if (strcmp(ename, e->name) == 0)
168 return (e);
169 }
170 return (0);
171}
172
173/*----- File hashing ------------------------------------------------------*/
174
f5e91c02
MW
175enum {
176 FHETY_DIR,
177 FHETY_FILE
178};
179
180typedef struct fhent {
181 struct fhent *next;
182 unsigned ty;
183 struct fhent *sub;
184 char name[1];
185} fhdir;
186
f377eee1
MW
187/* --- @gethash@ --- *
188 *
189 * Arguments: @const char *name@ = pointer to name string
190 *
191 * Returns: Pointer to appropriate hash class.
192 *
193 * Use: Chooses a hash function by name.
194 */
195
196const gchash *gethash(const char *name)
197{
198 const gchash *const *g, *gg = 0;
199 size_t sz = strlen(name);
200 for (g = ghashtab; *g; g++) {
201 if (strncmp(name, (*g)->name, sz) == 0) {
202 if ((*g)->name[sz] == 0) {
203 gg = *g;
204 break;
205 } else if (gg)
206 return (0);
207 else
208 gg = *g;
209 }
210 }
211 return (gg);
212}
213
f5e91c02
MW
214/* --- @describefile@ --- *
215 *
216 * Arguments: @const struct stat *st@ = pointer to file state
217 *
218 * Returns: A snappy one-word description of the file.
219 */
220
221const char *describefile(const struct stat *st)
222{
223 switch (st->st_mode & S_IFMT) {
224 case S_IFBLK: return ("block-special");
225 case S_IFCHR: return ("char-special");
226 case S_IFIFO: return ("fifo");
227 case S_IFREG: return ("file");
228 case S_IFLNK: return ("symlink");
229 case S_IFDIR: return ("directory");
230 case S_IFSOCK: return ("socket");
231 default: return ("unknown");
232 }
233}
234
07290a45
MW
235/* --- @fhash_init@ ---*
236 *
237 * Arguments: @fhashstate *fh@ = pointer to fhash state to initialize
238 * @const gchash *gch@ = hash class to set
239 * @unsigned f@ initial flags to set
240 *
241 * Returns: ---
242 *
243 * Use: Initializes an @fhashstate@ structure.
244 */
245
246void fhash_init(fhashstate *fh, const gchash *gch, unsigned f)
f5e91c02 247 { fh->f = f; fh->gch = gch; fh->ents = 0; }
07290a45
MW
248
249/* --- @fhash_free@ --- *
250 *
251 * Arguments: @fhashstate *fh@ = pointer to fhash state to free
252 *
253 * Returns: ---
254 *
255 * Use: Frees an fhash state.
256 */
257
f5e91c02
MW
258static void freefhents(struct fhent *fhe)
259{
260 struct fhent *ffhe;
261
262 for (; fhe; fhe = ffhe) {
263 ffhe = fhe->next;
264 freefhents(fhe->sub);
265 xfree(fhe);
266 }
267}
268
269void fhash_free(fhashstate *fh)
270 { freefhents(fh->ents); }
07290a45 271
18b3351a
MW
272/* --- @fhash@ --- *
273 *
07290a45 274 * Arguments: @fhashstate *fh@ = pointer to fhash state
18b3351a
MW
275 * @const char *file@ = file name to be hashed (null for stdin)
276 * @void *buf@ = pointer to hash output buffer
277 *
278 * Returns: Zero if it worked, nonzero on error.
279 *
280 * Use: Hashes a file.
281 */
282
07290a45 283int fhash(fhashstate *fh, const char *file, void *buf)
18b3351a
MW
284{
285 FILE *fp;
286 char fbuf[1024 * 128];
287 size_t sz;
288 ghash *h;
289 int rc = 0;
f5e91c02
MW
290 struct fhent *fhe, **ffhe;
291 const char *p, *q;
292 size_t n;
18b3351a
MW
293 fprogress ff;
294
295 if (!file || strcmp(file, "-") == 0)
296 fp = stdin;
07290a45 297 else if ((fp = fopen(file, fh->f & FHF_BINARY ? "rb" : "r")) == 0)
18b3351a
MW
298 return (-1);
299
07290a45 300 if (fh->f & FHF_PROGRESS) {
95ccefe3 301 if (fprogress_init(&ff, file ? file : "<stdin>", fp)) return (-1);
18b3351a
MW
302 }
303
f5e91c02
MW
304 if (fh->f & FHF_JUNK) {
305 p = file;
306 if (strncmp(p, "./", 2) == 0) p += 2;
307 q = p;
308 ffhe = &fh->ents;
309 for (;;) {
310 if (*q == '/' || *q == 0) {
311 n = q - p;
312 for (; *ffhe; ffhe = &(*ffhe)->next) {
313 fhe = *ffhe;
314 if (strncmp(p, fhe->name, n) == 0 && fhe->name[n] == 0)
315 goto found;
316 }
317 fhe = xmalloc(offsetof(struct fhent, name) + n + 1);
318 fhe->next = 0;
319 fhe->ty = *q == '/' ? FHETY_DIR : FHETY_FILE;
320 fhe->sub = 0;
321 *ffhe = fhe;
322 memcpy(fhe->name, p, n); fhe->name[n] = 0;
323 found:
324 if (!*q) break;
325 while (*++q == '/');
326 p = q;
327 ffhe = &fhe->sub;
328 } else
329 q++;
330 }
331 }
332
07290a45 333 h = GH_INIT(fh->gch);
18b3351a
MW
334 while ((sz = fread(fbuf, 1, sizeof(fbuf), fp)) > 0) {
335 GH_HASH(h, fbuf, sz);
07290a45 336 if (fh->f & FHF_PROGRESS) fprogress_update(&ff, sz);
18b3351a
MW
337 }
338 if (ferror(fp)) rc = -1;
339 if (fp != stdin) fclose(fp);
07290a45 340 if (fh->f & FHF_PROGRESS) fprogress_done(&ff);
18b3351a
MW
341 GH_DONE(h, buf);
342 GH_DESTROY(h);
343 return (rc);
344}
345
f5e91c02
MW
346/* --- @fhash_junk@ --- *
347 *
348 * Arguments: @fhashstate *fh@ = pointer to fhash state
349 * @void (*func)(const char *, const struct stat *, void *)@
350 * @void *p@ = pointer to pass to function
351 *
352 * Returns: Positive if any junk was found, negative on error, zero if
353 * everything was fine.
354 *
355 * Use: Reports junk files in any directories covered by the hash
356 * state.
357 */
358
359struct fhjunk {
360 int (*func)(const char *, const struct stat *, void *);
361 void *p;
362 dstr *d;
363};
364
365static int fhjunk(struct fhjunk *fhj, struct fhent *ents)
366{
367 DIR *dp;
368 int rc = 0, rrc;
369 struct stat st;
370 struct dirent *d;
371 const char *dname;
372 size_t n = fhj->d->len;
373 struct fhent *fhe;
374
375 dname = n ? fhj->d->buf : ".";
376 if ((dp = opendir(dname)) == 0) {
377 moan("failed to open directory `%s': %s", dname, strerror(errno));
378 rc = -1;
379 goto subs;
380 }
381 if (n) {
382 dstr_putc(fhj->d, '/');
383 n++;
384 }
385 while (errno = 0, (d = readdir(dp)) != 0) {
386 if (strcmp(d->d_name, ".") == 0 || strcmp(d->d_name, "..") == 0)
387 continue;
388 for (fhe = ents; fhe; fhe = fhe->next) {
389 if (strcmp(d->d_name, fhe->name) == 0) goto found;
390 }
391 fhj->d->len = n;
392 dstr_puts(fhj->d, d->d_name);
393 if (!lstat(fhj->d->buf, &st)) {
394 if (!rc) rc = 1;
395 rrc = fhj->func(fhj->d->buf, &st, fhj->p);
396 } else {
397 rc = -1;
398 rrc = fhj->func(fhj->d->buf, 0, fhj->p);
399 }
400 if (rrc < 0) rc = -1;
401 found:;
402 }
403 closedir(dp);
404 if (errno) {
405 moan("failed to read directory `%s': %s", dname, strerror(errno));
406 rc = -1;
407 }
408
409subs:
410 for (fhe = ents; fhe; fhe = fhe->next) {
411 if (fhe->ty == FHETY_DIR) {
412 fhj->d->len = n;
413 dstr_puts(fhj->d, fhe->name);
414 rrc = fhjunk(fhj, fhe->sub);
415 if (rrc < 0) rc = -1;
416 else if (!rc) rc = rrc;
417 }
418 }
419
420 return (rc);
421}
422
423int fhash_junk(fhashstate *fh,
424 int (*func)(const char *, const struct stat *, void *),
425 void *p)
426{
427 dstr d = DSTR_INIT;
428 struct fhjunk fhj;
429 int rc;
430
431 fhj.func = func;
432 fhj.p = p;
433 fhj.d = &d;
434 rc = fhjunk(&fhj, fh->ents);
435 dstr_destroy(&d);
436 return (rc);
437}
438
f377eee1
MW
439/* --- @hfparse@ --- *
440 *
441 * Arguments: @hfpctx *hfp@ = pointer to the context structure
442 *
443 * Returns: A code indicating what happened.
444 *
445 * Use: Parses a line from the input file.
446 */
447
448int hfparse(hfpctx *hfp)
449{
450 char *p, *q;
451 const gchash *gch;
452 const encodeops *ee;
453 dstr *d = hfp->dline;
454 size_t hsz;
455
456 /* --- Fetch the input line and get ready to parse --- */
457
458 DRESET(d);
459 if (dstr_putline(d, hfp->fp) == EOF) return (HF_EOF);
460 p = d->buf;
461
462 /* --- Parse magic comments --- */
463
464 if (*p == '#') {
465 p++;
466 if ((q = str_getword(&p)) == 0) return (HF_BAD);
467 if (strcmp(q, "hash") == 0) {
468 if ((q = str_getword(&p)) == 0) return (HF_BAD);
469 if ((gch = gethash(q)) == 0) return (HF_BAD);
470 hfp->gch = gch;
471 return (HF_HASH);
472 } else if (strcmp(q, "encoding") == 0) {
473 if ((q = str_getword(&p)) == 0) return (HF_BAD);
474 if ((ee = getencoding(q)) == 0) return (HF_BAD);
475 hfp->ee = ee;
476 return (HF_ENC);
477 } else if (strcmp(q, "escape") == 0) {
478 hfp->f |= HFF_ESCAPE;
479 return (HF_ESC);
480 }
481 return (HF_BAD);
482 }
483
484 /* --- Otherwise it's a file line --- */
485
486 q = p;
487 while (*p && *p != ' ') p++;
488 if (!*p) return (HF_BAD);
489 *p++ = 0;
490 hsz = hfp->gch->hashsz;
491 if (hfp->ee->get(q, hfp->hbuf, hsz, 0) < hsz) return (HF_BAD);
492 switch (*p) {
493 case '*': hfp->f |= FHF_BINARY; break;
494 case ' ': hfp->f &= ~FHF_BINARY; break;
495 default: return (HF_BAD);
496 }
497 p++;
498
499 DRESET(hfp->dfile);
500 if (hfp->f & HFF_ESCAPE)
501 getstring(&p, hfp->dfile, GSF_STRING);
502 else {
503 dstr_putm(hfp->dfile, p, d->len - (p - d->buf));
504 dstr_putz(hfp->dfile);
505 }
506
507 return (HF_FILE);
508}
509
18b3351a
MW
510/*----- String I/O --------------------------------------------------------*/
511
512/* --- @getstring@ --- *
513 *
514 * Arguments: @void *in@ = input source
515 * @dstr *d@ = destination string
516 * @unsigned f@ = input flags
517 *
518 * Returns: Zero if OK, nonzero on end-of-file.
519 *
520 * Use: Reads a filename (or something similar) from a stream.
521 */
522
523static int nextch_file(void *in)
524 { FILE *fp = in; return (getc(fp)); }
525
526static int nextch_string(void *in)
527 { const unsigned char **p = in; return (*(*p)++); }
528
529int getstring(void *in, dstr *d, unsigned f)
530{
531 int ch;
532 int eofch = (f & GSF_STRING) ? 0 : EOF;
533 int (*nextch)(void *) = (f & GSF_STRING) ? nextch_string : nextch_file;
534 int q = 0;
535
536 /* --- Raw: just read exactly what's written up to a null byte --- */
537
538 if (f & GSF_RAW) {
539 if ((ch = nextch(in)) == eofch)
540 return (EOF);
541 for (;;) {
542 if (!ch)
543 break;
544 DPUTC(d, ch);
545 if ((ch = nextch(in)) == eofch)
546 break;
547 }
548 DPUTZ(d);
549 return (0);
550 }
551
552 /* --- Skip as far as whitespace --- *
553 *
554 * Also skip past comments.
555 */
556
557again:
558 ch = nextch(in);
559 while (isspace(ch))
560 ch = nextch(in);
561 if (ch == '#') {
562 do ch = nextch(in); while (ch != '\n' && ch != eofch);
563 goto again;
564 }
565 if (ch == eofch)
566 return (EOF);
567
568 /* --- If the character is a quote then read a quoted string --- */
569
570 switch (ch) {
571 case '`':
572 ch = '\'';
573 case '\'':
574 case '\"':
575 q = ch;
576 ch = nextch(in);
577 break;
578 }
579
580 /* --- Now read all sorts of interesting things --- */
581
582 for (;;) {
583
584 /* --- Handle an escaped thing --- */
585
586 if (ch == '\\') {
587 ch = nextch(in);
588 if (ch == eofch)
589 break;
590 switch (ch) {
591 case 'a': ch = '\a'; break;
592 case 'b': ch = '\b'; break;
593 case 'f': ch = '\f'; break;
594 case 'n': ch = '\n'; break;
595 case 'r': ch = '\r'; break;
596 case 't': ch = '\t'; break;
597 case 'v': ch = '\v'; break;
598 }
599 DPUTC(d, ch);
600 ch = nextch(in);
601 continue;
602 }
603
604 /* --- If it's a quote or some other end marker then stop --- */
605
606 if (ch == q)
607 break;
608 if (!q && isspace(ch))
609 break;
610
611 /* --- Otherwise contribute and continue --- */
612
613 DPUTC(d, ch);
614 if ((ch = nextch(in)) == eofch)
615 break;
616 }
617
618 /* --- Done --- */
619
620 DPUTZ(d);
621 return (0);
622}
623
624/* --- @putstring@ --- *
625 *
626 * Arguments: @FILE *fp@ = stream to write on
627 * @const char *p@ = pointer to text
628 * @unsigned f@ = output flags
629 *
630 * Returns: ---
631 *
632 * Use: Emits a string to a stream.
633 */
634
635void putstring(FILE *fp, const char *p, unsigned f)
636{
637 size_t sz = strlen(p);
638 unsigned qq;
639 const char *q;
640
641 /* --- Just write the string null terminated if raw --- */
642
643 if (f & GSF_RAW) {
644 fwrite(p, 1, sz + 1, fp);
645 return;
646 }
647
648 /* --- Check for any dodgy characters --- */
649
650 qq = 0;
651 for (q = p; *q; q++) {
652 if (isspace((unsigned char)*q)) {
653 qq = '\"';
654 break;
655 }
656 }
657
658 if (qq)
659 putc(qq, fp);
660
661 /* --- Emit the string --- */
662
663 for (q = p; *q; q++) {
664 switch (*q) {
665 case '\a': fputc('\\', fp); fputc('a', fp); break;
666 case '\b': fputc('\\', fp); fputc('b', fp); break;
667 case '\f': fputc('\\', fp); fputc('f', fp); break;
668 case '\n': fputc('\\', fp); fputc('n', fp); break;
669 case '\r': fputc('\\', fp); fputc('r', fp); break;
670 case '\t': fputc('\\', fp); fputc('t', fp); break;
671 case '\v': fputc('\\', fp); fputc('v', fp); break;
672 case '`': fputc('\\', fp); fputc('`', fp); break;
673 case '\'': fputc('\\', fp); fputc('\'', fp); break;
674 case '\"': fputc('\\', fp); fputc('\"', fp); break;
675 default:
676 putc(*q, fp);
677 break;
678 }
679 }
680
681 /* --- Done --- */
682
683 if (qq)
684 putc(qq, fp);
685}
686
687/*----- That's all, folks -------------------------------------------------*/