X-Git-Url: https://git.distorted.org.uk/u/mdw/catacomb/blobdiff_plain/ba6e6b64033b1f9de49feccb5c9cd438354481f7..0f00dc4c8eb47e67bc0f148c2dd109f73a451e0a:/progs/cc-hash.c diff --git a/progs/cc-hash.c b/progs/cc-hash.c new file mode 100644 index 0000000..64fea0e --- /dev/null +++ b/progs/cc-hash.c @@ -0,0 +1,687 @@ +/* -*-c-*- + * + * Common functions for hashing utilities + * + * (c) 2011 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#define _FILE_OFFSET_BITS 64 + +#include "config.h" + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include "ghash.h" +#include "cc.h" + +/*----- Encoding and decoding ---------------------------------------------*/ + +/* --- Hex encoding --- */ + +static void puthex(const octet *buf, size_t sz, FILE *fp) +{ + while (sz) { + fprintf(fp, "%02x", *buf++); + sz--; + } +} + +static size_t gethex(const char *p, octet *q, size_t sz, char **pp) +{ + size_t i = 0; + while (sz > 0 && + isxdigit((unsigned char)p[0]) && + isxdigit((unsigned char)p[1])) { + char buf[3]; + buf[0] = p[0]; + buf[1] = p[1]; + buf[2] = 0; + *q++ = strtoul(buf, 0, 16); + sz--; + p += 2; + i++; + } + if (pp) + *pp = (char *)p; + return (i); +} + +/* --- Base64 encoding --- */ + +static void putbase64(const octet *buf, size_t sz, FILE *fp) +{ + base64_ctx b; + dstr d = DSTR_INIT; + + base64_init(&b); + b.indent = ""; + b.maxline = 0; + base64_encode(&b, buf, sz, &d); + base64_encode(&b, 0, 0, &d); + dstr_write(&d, fp); + dstr_destroy(&d); +} + +static size_t getbase64(const char *p, octet *q, size_t sz, char **pp) +{ + base64_ctx b; + dstr d = DSTR_INIT; + size_t n = strlen(p); + + base64_init(&b); + base64_decode(&b, p, n, &d); + if (pp) *pp = (/*unconst*/ char *)p + n; + base64_decode(&b, 0, 0, &d); + assert(d.len <= sz); + memcpy(q, d.buf, sz); + n = d.len; + dstr_destroy(&d); + return (n); +} + +/* --- Base32 encoding --- */ + +static void putbase32(const octet *buf, size_t sz, FILE *fp) +{ + base32_ctx b; + dstr d = DSTR_INIT; + + base32_init(&b); + b.indent = ""; + b.maxline = 0; + base32_encode(&b, buf, sz, &d); + base32_encode(&b, 0, 0, &d); + dstr_write(&d, fp); + dstr_destroy(&d); +} + +static size_t getbase32(const char *p, octet *q, size_t sz, char **pp) +{ + base32_ctx b; + dstr d = DSTR_INIT; + size_t n = strlen(p); + + base32_init(&b); + base32_decode(&b, p, n, &d); + if (pp) *pp = (/*unconst*/ char *)p + n; + base32_decode(&b, 0, 0, &d); + assert(d.len <= sz); + memcpy(q, d.buf, sz); + n = d.len; + dstr_destroy(&d); + return (n); +} + +/* --- Table --- */ + +const encodeops encodingtab[] = { +#define TAB(tag, name) { #name, put##name, get##name }, + ENCODINGS(TAB) +#undef TAB + { 0, 0, 0 } +}; + +const encodeops *getencoding(const char *ename) +{ + const encodeops *e; + + for (e = encodingtab; e->name; e++) { + if (strcmp(ename, e->name) == 0) + return (e); + } + return (0); +} + +/*----- File hashing ------------------------------------------------------*/ + +enum { + FHETY_DIR, + FHETY_FILE +}; + +typedef struct fhent { + struct fhent *next; + unsigned ty; + struct fhent *sub; + char name[1]; +} fhdir; + +/* --- @gethash@ --- * + * + * Arguments: @const char *name@ = pointer to name string + * + * Returns: Pointer to appropriate hash class. + * + * Use: Chooses a hash function by name. + */ + +const gchash *gethash(const char *name) +{ + const gchash *const *g, *gg = 0; + size_t sz = strlen(name); + for (g = ghashtab; *g; g++) { + if (strncmp(name, (*g)->name, sz) == 0) { + if ((*g)->name[sz] == 0) { + gg = *g; + break; + } else if (gg) + return (0); + else + gg = *g; + } + } + return (gg); +} + +/* --- @describefile@ --- * + * + * Arguments: @const struct stat *st@ = pointer to file state + * + * Returns: A snappy one-word description of the file. + */ + +const char *describefile(const struct stat *st) +{ + switch (st->st_mode & S_IFMT) { + case S_IFBLK: return ("block-special"); + case S_IFCHR: return ("char-special"); + case S_IFIFO: return ("fifo"); + case S_IFREG: return ("file"); + case S_IFLNK: return ("symlink"); + case S_IFDIR: return ("directory"); + case S_IFSOCK: return ("socket"); + default: return ("unknown"); + } +} + +/* --- @fhash_init@ ---* + * + * Arguments: @fhashstate *fh@ = pointer to fhash state to initialize + * @const gchash *gch@ = hash class to set + * @unsigned f@ initial flags to set + * + * Returns: --- + * + * Use: Initializes an @fhashstate@ structure. + */ + +void fhash_init(fhashstate *fh, const gchash *gch, unsigned f) + { fh->f = f; fh->gch = gch; fh->ents = 0; } + +/* --- @fhash_free@ --- * + * + * Arguments: @fhashstate *fh@ = pointer to fhash state to free + * + * Returns: --- + * + * Use: Frees an fhash state. + */ + +static void freefhents(struct fhent *fhe) +{ + struct fhent *ffhe; + + for (; fhe; fhe = ffhe) { + ffhe = fhe->next; + freefhents(fhe->sub); + xfree(fhe); + } +} + +void fhash_free(fhashstate *fh) + { freefhents(fh->ents); } + +/* --- @fhash@ --- * + * + * Arguments: @fhashstate *fh@ = pointer to fhash state + * @const char *file@ = file name to be hashed (null for stdin) + * @void *buf@ = pointer to hash output buffer + * + * Returns: Zero if it worked, nonzero on error. + * + * Use: Hashes a file. + */ + +int fhash(fhashstate *fh, const char *file, void *buf) +{ + FILE *fp; + char fbuf[1024 * 128]; + size_t sz; + ghash *h; + int rc = 0; + struct fhent *fhe, **ffhe; + const char *p, *q; + size_t n; + fprogress ff; + + if (!file || strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, fh->f & FHF_BINARY ? "rb" : "r")) == 0) + return (-1); + + if (fh->f & FHF_PROGRESS) { + if (fprogress_init(&ff, file ? file : "", fp)) return (-1); + } + + if (fh->f & FHF_JUNK) { + p = file; + if (strncmp(p, "./", 2) == 0) p += 2; + q = p; + ffhe = &fh->ents; + for (;;) { + if (*q == '/' || *q == 0) { + n = q - p; + for (; *ffhe; ffhe = &(*ffhe)->next) { + fhe = *ffhe; + if (strncmp(p, fhe->name, n) == 0 && fhe->name[n] == 0) + goto found; + } + fhe = xmalloc(offsetof(struct fhent, name) + n + 1); + fhe->next = 0; + fhe->ty = *q == '/' ? FHETY_DIR : FHETY_FILE; + fhe->sub = 0; + *ffhe = fhe; + memcpy(fhe->name, p, n); fhe->name[n] = 0; + found: + if (!*q) break; + while (*++q == '/'); + p = q; + ffhe = &fhe->sub; + } else + q++; + } + } + + h = GH_INIT(fh->gch); + while ((sz = fread(fbuf, 1, sizeof(fbuf), fp)) > 0) { + GH_HASH(h, fbuf, sz); + if (fh->f & FHF_PROGRESS) fprogress_update(&ff, sz); + } + if (ferror(fp)) rc = -1; + if (fp != stdin) fclose(fp); + if (fh->f & FHF_PROGRESS) fprogress_done(&ff); + GH_DONE(h, buf); + GH_DESTROY(h); + return (rc); +} + +/* --- @fhash_junk@ --- * + * + * Arguments: @fhashstate *fh@ = pointer to fhash state + * @void (*func)(const char *, const struct stat *, void *)@ + * @void *p@ = pointer to pass to function + * + * Returns: Positive if any junk was found, negative on error, zero if + * everything was fine. + * + * Use: Reports junk files in any directories covered by the hash + * state. + */ + +struct fhjunk { + int (*func)(const char *, const struct stat *, void *); + void *p; + dstr *d; +}; + +static int fhjunk(struct fhjunk *fhj, struct fhent *ents) +{ + DIR *dp; + int rc = 0, rrc; + struct stat st; + struct dirent *d; + const char *dname; + size_t n = fhj->d->len; + struct fhent *fhe; + + dname = n ? fhj->d->buf : "."; + if ((dp = opendir(dname)) == 0) { + moan("failed to open directory `%s': %s", dname, strerror(errno)); + rc = -1; + goto subs; + } + if (n) { + dstr_putc(fhj->d, '/'); + n++; + } + while (errno = 0, (d = readdir(dp)) != 0) { + if (strcmp(d->d_name, ".") == 0 || strcmp(d->d_name, "..") == 0) + continue; + for (fhe = ents; fhe; fhe = fhe->next) { + if (strcmp(d->d_name, fhe->name) == 0) goto found; + } + fhj->d->len = n; + dstr_puts(fhj->d, d->d_name); + if (!lstat(fhj->d->buf, &st)) { + if (!rc) rc = 1; + rrc = fhj->func(fhj->d->buf, &st, fhj->p); + } else { + rc = -1; + rrc = fhj->func(fhj->d->buf, 0, fhj->p); + } + if (rrc < 0) rc = -1; + found:; + } + closedir(dp); + if (errno) { + moan("failed to read directory `%s': %s", dname, strerror(errno)); + rc = -1; + } + +subs: + for (fhe = ents; fhe; fhe = fhe->next) { + if (fhe->ty == FHETY_DIR) { + fhj->d->len = n; + dstr_puts(fhj->d, fhe->name); + rrc = fhjunk(fhj, fhe->sub); + if (rrc < 0) rc = -1; + else if (!rc) rc = rrc; + } + } + + return (rc); +} + +int fhash_junk(fhashstate *fh, + int (*func)(const char *, const struct stat *, void *), + void *p) +{ + dstr d = DSTR_INIT; + struct fhjunk fhj; + int rc; + + fhj.func = func; + fhj.p = p; + fhj.d = &d; + rc = fhjunk(&fhj, fh->ents); + dstr_destroy(&d); + return (rc); +} + +/* --- @hfparse@ --- * + * + * Arguments: @hfpctx *hfp@ = pointer to the context structure + * + * Returns: A code indicating what happened. + * + * Use: Parses a line from the input file. + */ + +int hfparse(hfpctx *hfp) +{ + char *p, *q; + const gchash *gch; + const encodeops *ee; + dstr *d = hfp->dline; + size_t hsz; + + /* --- Fetch the input line and get ready to parse --- */ + + DRESET(d); + if (dstr_putline(d, hfp->fp) == EOF) return (HF_EOF); + p = d->buf; + + /* --- Parse magic comments --- */ + + if (*p == '#') { + p++; + if ((q = str_getword(&p)) == 0) return (HF_BAD); + if (strcmp(q, "hash") == 0) { + if ((q = str_getword(&p)) == 0) return (HF_BAD); + if ((gch = gethash(q)) == 0) return (HF_BAD); + hfp->gch = gch; + return (HF_HASH); + } else if (strcmp(q, "encoding") == 0) { + if ((q = str_getword(&p)) == 0) return (HF_BAD); + if ((ee = getencoding(q)) == 0) return (HF_BAD); + hfp->ee = ee; + return (HF_ENC); + } else if (strcmp(q, "escape") == 0) { + hfp->f |= HFF_ESCAPE; + return (HF_ESC); + } + return (HF_BAD); + } + + /* --- Otherwise it's a file line --- */ + + q = p; + while (*p && *p != ' ') p++; + if (!*p) return (HF_BAD); + *p++ = 0; + hsz = hfp->gch->hashsz; + if (hfp->ee->get(q, hfp->hbuf, hsz, 0) < hsz) return (HF_BAD); + switch (*p) { + case '*': hfp->f |= FHF_BINARY; break; + case ' ': hfp->f &= ~FHF_BINARY; break; + default: return (HF_BAD); + } + p++; + + DRESET(hfp->dfile); + if (hfp->f & HFF_ESCAPE) + getstring(&p, hfp->dfile, GSF_STRING); + else { + dstr_putm(hfp->dfile, p, d->len - (p - d->buf)); + dstr_putz(hfp->dfile); + } + + return (HF_FILE); +} + +/*----- String I/O --------------------------------------------------------*/ + +/* --- @getstring@ --- * + * + * Arguments: @void *in@ = input source + * @dstr *d@ = destination string + * @unsigned f@ = input flags + * + * Returns: Zero if OK, nonzero on end-of-file. + * + * Use: Reads a filename (or something similar) from a stream. + */ + +static int nextch_file(void *in) + { FILE *fp = in; return (getc(fp)); } + +static int nextch_string(void *in) + { const unsigned char **p = in; return (*(*p)++); } + +int getstring(void *in, dstr *d, unsigned f) +{ + int ch; + int eofch = (f & GSF_STRING) ? 0 : EOF; + int (*nextch)(void *) = (f & GSF_STRING) ? nextch_string : nextch_file; + int q = 0; + + /* --- Raw: just read exactly what's written up to a null byte --- */ + + if (f & GSF_RAW) { + if ((ch = nextch(in)) == eofch) + return (EOF); + for (;;) { + if (!ch) + break; + DPUTC(d, ch); + if ((ch = nextch(in)) == eofch) + break; + } + DPUTZ(d); + return (0); + } + + /* --- Skip as far as whitespace --- * + * + * Also skip past comments. + */ + +again: + ch = nextch(in); + while (isspace(ch)) + ch = nextch(in); + if (ch == '#') { + do ch = nextch(in); while (ch != '\n' && ch != eofch); + goto again; + } + if (ch == eofch) + return (EOF); + + /* --- If the character is a quote then read a quoted string --- */ + + switch (ch) { + case '`': + ch = '\''; + case '\'': + case '\"': + q = ch; + ch = nextch(in); + break; + } + + /* --- Now read all sorts of interesting things --- */ + + for (;;) { + + /* --- Handle an escaped thing --- */ + + if (ch == '\\') { + ch = nextch(in); + if (ch == eofch) + break; + switch (ch) { + case 'a': ch = '\a'; break; + case 'b': ch = '\b'; break; + case 'f': ch = '\f'; break; + case 'n': ch = '\n'; break; + case 'r': ch = '\r'; break; + case 't': ch = '\t'; break; + case 'v': ch = '\v'; break; + } + DPUTC(d, ch); + ch = nextch(in); + continue; + } + + /* --- If it's a quote or some other end marker then stop --- */ + + if (ch == q) + break; + if (!q && isspace(ch)) + break; + + /* --- Otherwise contribute and continue --- */ + + DPUTC(d, ch); + if ((ch = nextch(in)) == eofch) + break; + } + + /* --- Done --- */ + + DPUTZ(d); + return (0); +} + +/* --- @putstring@ --- * + * + * Arguments: @FILE *fp@ = stream to write on + * @const char *p@ = pointer to text + * @unsigned f@ = output flags + * + * Returns: --- + * + * Use: Emits a string to a stream. + */ + +void putstring(FILE *fp, const char *p, unsigned f) +{ + size_t sz = strlen(p); + unsigned qq; + const char *q; + + /* --- Just write the string null terminated if raw --- */ + + if (f & GSF_RAW) { + fwrite(p, 1, sz + 1, fp); + return; + } + + /* --- Check for any dodgy characters --- */ + + qq = 0; + for (q = p; *q; q++) { + if (isspace((unsigned char)*q)) { + qq = '\"'; + break; + } + } + + if (qq) + putc(qq, fp); + + /* --- Emit the string --- */ + + for (q = p; *q; q++) { + switch (*q) { + case '\a': fputc('\\', fp); fputc('a', fp); break; + case '\b': fputc('\\', fp); fputc('b', fp); break; + case '\f': fputc('\\', fp); fputc('f', fp); break; + case '\n': fputc('\\', fp); fputc('n', fp); break; + case '\r': fputc('\\', fp); fputc('r', fp); break; + case '\t': fputc('\\', fp); fputc('t', fp); break; + case '\v': fputc('\\', fp); fputc('v', fp); break; + case '`': fputc('\\', fp); fputc('`', fp); break; + case '\'': fputc('\\', fp); fputc('\'', fp); break; + case '\"': fputc('\\', fp); fputc('\"', fp); break; + default: + putc(*q, fp); + break; + } + } + + /* --- Done --- */ + + if (qq) + putc(qq, fp); +} + +/*----- That's all, folks -------------------------------------------------*/