From: Mark Wooding Date: Fri, 9 Aug 2019 11:03:14 +0000 (+0100) Subject: *.c: General spring-clean of the coding style. X-Git-Url: https://git.distorted.org.uk/~mdw/anag/commitdiff_plain/904ac13a4c7d86c18d031ad3f113659440ab86b8 *.c: General spring-clean of the coding style. Also, add some documentation and tweak some of the algorithms. --- diff --git a/anag.c b/anag.c index 80da3c3..315930d 100644 --- a/anag.c +++ b/anag.c @@ -35,14 +35,10 @@ static const char *file = DICTIONARY; /*----- Help text functions -----------------------------------------------*/ static void usage(FILE *fp) -{ - pquis(fp, "Usage: $ [-f file] expression\n"); -} + { pquis(fp, "Usage: $ [-f file] expression\n"); } static void version(FILE *fp) -{ - pquis(fp, "$, version " VERSION "\n"); -} + { pquis(fp, "$, version " VERSION "\n"); } static void help(FILE *fp) { @@ -175,15 +171,12 @@ static unsigned nextopt(const char *const **arg) size_t sz; const char *p; - /* --- Pick the next option off the front --- */ - + /* Pick the next option off the front. */ *arg = av + ai; - if (ai >= ac) - return (O_EOF); + if (ai >= ac) return (O_EOF); p = av[ai++]; - /* --- Cope with various forms of magic --- */ - + /* Cope with various forms of magic. */ if (p[0] != '-') { if (!p[1]) switch (*p) { case '&': return (O_AND); @@ -195,65 +188,44 @@ static unsigned nextopt(const char *const **arg) goto bad; } - /* --- Now cope with other sorts of weirdies --- * - * - * By the end of this, a leading `-' or `--' will have been stripped. + /* Now cope with other sorts of weirdies. By the end of this, a leading + * `-' or `--' will have been stripped. */ - p++; - if (!*p) - goto bad; - if (*p == '-') - p++; + if (!*p) goto bad; + if (*p == '-') p++; if (!*p) { - if (ai < ac) - die("syntax error near `--': rubbish at end of line"); + if (ai < ac) die("syntax error near `--': rubbish at end of line"); return (O_EOF); } - /* --- Now look the word up in my table --- */ - + /*Now look the word up in my table. */ sz = strlen(p); oo = 0; for (o = opttab; o->name; o++) { if (strncmp(p, o->name, sz) == 0) { - if (strlen(o->name) == sz || ((o->f & OF_SHORT) && sz == 1)) { - oo = o; - break; - } - if (oo) { + if (strlen(o->name) == sz || ((o->f & OF_SHORT) && sz == 1)) + { oo = o; break; } + if (oo) die("ambiguous option name `-%s' (could match `-%s' or `-%s')", p, oo->name, o->name); - } oo = o; } } - if (!oo) - die("unrecognized option name `-%s'", p); - - /* --- Sort out the arguments --- */ + if (!oo) die("unrecognized option name `-%s'", p); + /* Sort out the arguments. */ if (ai + oo->nargs > ac) die("too few arguments for `-%s' (need %u)", oo->name, oo->nargs); ai += oo->nargs; - /* --- Now process the option --- */ - + /* Now process the option. */ switch (oo->tag) { - case O_HELP: - help(stdout); - exit(0); - case O_VERSION: - version(stdout); - exit(0); - case O_USAGE: - usage(stdout); - exit(0); - case O_FILE: - file = (*arg)[1]; - break; - default: - return (oo->tag); + case O_HELP: help(stdout); exit(0); + case O_VERSION: version(stdout); exit(0); + case O_USAGE: usage(stdout); exit(0); + case O_FILE: file = (*arg)[1]; break; + default: return (oo->tag); } continue; bad: @@ -372,8 +344,7 @@ static void p_next(p_ctx *p) { static const char *const eof[] = { "", 0 }; p->t = nextopt(&p->a); - if (p->t == O_EOF) - p->a = eof; + if (p->t == O_EOF) p->a = eof; } static void p_factor(p_ctx *p, node **nn) @@ -382,8 +353,7 @@ static void p_factor(p_ctx *p, node **nn) if (p->t == O_LPAREN) { p_next(p); p_expr(p, nn); - if (p->t != O_RPAREN) - die("syntax error near `%s': missing `)'", *p->a); + if (p->t != O_RPAREN) die("syntax error near `%s': missing `)'", *p->a); p_next(p); } else if (p->t == O_NOT) { n = xmalloc(sizeof(node_un)); @@ -419,14 +389,9 @@ static void p_term(p_ctx *p, node **nn) for (;;) { p_factor(p, nn); switch (p->t) { - case O_AND: - p_next(p); - default: - break; - case O_RPAREN: - case O_OR: - case O_EOF: - return; + case O_AND: p_next(p); break; + default: break; + case O_RPAREN: case O_OR: case O_EOF: return; } n = xmalloc(sizeof(node_bin)); n->left = *nn; @@ -441,8 +406,7 @@ static void p_expr(p_ctx *p, node **nn) node_bin *n; for (;;) { p_term(p, nn); - if (p->t != O_OR) - break; + if (p->t != O_OR) break; p_next(p); n = xmalloc(sizeof(node_bin)); n->left = *nn; @@ -477,10 +441,9 @@ static node *p_argv(int argc, const char *const argv[]) exit(EXIT_FAILURE); } p_expr(&p, &n); - if (p.t != O_EOF) { + if (p.t != O_EOF) die("syntax error near `%s': rubbish at end of line (too many `)'s?)", *p.a); - } return (n); } @@ -545,12 +508,10 @@ int main(int argc, char *argv[]) die("error opening `%s': %s", file, strerror(errno)); for (;;) { dstr_reset(&d); - if (dstr_putline(&d, fp) < 0) - break; + if (dstr_putline(&d, fp) < 0) break; l = d.buf + d.len; for (p = q = d.buf; p < l; p++) { - if (!isalnum((unsigned char)*p)) - continue; + if (!isalnum((unsigned char)*p)) continue; *q++ = tolower((unsigned char)*p); } *q = 0; @@ -563,10 +524,8 @@ int main(int argc, char *argv[]) } if (ferror(fp) || fclose(fp)) die("error reading `%s': %s", file, strerror(errno)); - for (a = aa_head; a; a = a->next) { - if (a->func(a->p)) - ok = 1; - } + for (a = aa_head; a; a = a->next) + if (a->func(a->p)) ok = 1; if (fflush(stdout) || ferror(stdout) || fclose(stdout)) die("error writing output: %s", strerror(errno)); if (!ok) pquis(stderr, "$: no matches found\n"); diff --git a/anagram.c b/anagram.c index d4eb5e7..a1ed21f 100644 --- a/anagram.c +++ b/anagram.c @@ -30,10 +30,12 @@ /*----- Data structures ---------------------------------------------------*/ +typedef unsigned countmap[UCHAR_MAX + 1]; + typedef struct node_gram { node n; size_t sz; - unsigned f[UCHAR_MAX + 1]; + countmap count; } node_gram; /*----- Main code ---------------------------------------------------------*/ @@ -43,45 +45,40 @@ typedef struct node_gram { * Arguments: @node_gram *n@ = pointer to node to fill in * @const char *p@ = pointer to string to compile from * - * Returns: The length of the string. + * Returns: --- * * Use: Fills in an anagram node with letter frequencies. */ -static size_t compile(node_gram *n, const char *p) +static void compile(const char *p, countmap count) { - size_t len = 0; - unsigned i; - memset(n->f, 0, sizeof(n->f)); - while (*p) { - i = (unsigned char)*p++; - n->f[i]++; - len++; - } - return (len); + memset(count, 0, sizeof(countmap)); + while (*p) count[(unsigned char)*p++]++; } /* --- Node matcher --- */ static int n_gram(node *nn, const char *p, size_t sz) { + /* A string X is a subgram of Y unless, for some character CH, X has more + * characters of value CH than Y. + * + * Special feature: count `.' in the pattern as meaning `don't care'. + */ + node_gram *n = (node_gram *)nn; - unsigned f[UCHAR_MAX]; + countmap count; const char *q; unsigned i; - if (n->sz && sz != n->sz) - return (0); - memcpy(f, n->f, sizeof(f)); + if (n->sz && sz != n->sz) return (0); + memcpy(count, n->count, sizeof(count)); q = p + sz; while (p < q) { i = (unsigned char)*p++; - if (f[i]) - f[i]--; - else if (f['.']) - f['.']--; - else - return (0); + if (count[i]) count[i]--; + else if (count['.']) count['.']--; + else return (0); } return (1); } @@ -92,7 +89,8 @@ node *anagram(const char *const *av) { node_gram *n = xmalloc(sizeof(*n)); n->n.func = n_gram; - n->sz = compile(n, av[0]); + compile(av[0], n->count); + n->sz = strlen(av[0]); return (&n->n); } @@ -100,8 +98,8 @@ node *subgram(const char *const *av) { node_gram *n = xmalloc(sizeof(*n)); n->n.func = n_gram; + compile(av[0], n->count); n->sz = 0; - compile(n, av[0]); return (&n->n); } diff --git a/longest.c b/longest.c index 1879e48..c9053bb 100644 --- a/longest.c +++ b/longest.c @@ -75,18 +75,15 @@ static int aa_output(void *p) link *l; if (!n->l) return (0); - for (l = n->l; l; l = l->next) - puts(l->p); + for (l = n->l; l; l = l->next) puts(l->p); return (1); } static int n_longest(node *nn, const char *p, size_t sz) { node_longest *n = (node_longest *)nn; - if (sz < n->len) - return (0); - if (sz > n->len) - empty(n); + if (sz < n->len) return (0); + if (sz > n->len) empty(n); insert(n, p, sz); return (0); } @@ -94,10 +91,8 @@ static int n_longest(node *nn, const char *p, size_t sz) static int n_shortest(node *nn, const char *p, size_t sz) { node_longest *n = (node_longest *)nn; - if (n->len && sz > n->len) - return (0); - if (!n->len || sz < n->len) - empty(n); + if (n->len && sz > n->len) return (0); + if (!n->len || sz < n->len) empty(n); insert(n, p, sz); return (0); } diff --git a/mono.c b/mono.c index 2e5d8ea..2b6ca4a 100644 --- a/mono.c +++ b/mono.c @@ -43,24 +43,23 @@ typedef struct node_mono { static int n_mono(node *nn, const char *p, size_t sz) { node_mono *n = (node_mono *)nn; - unsigned map[UCHAR_MAX], imap[UCHAR_MAX]; + unsigned map[UCHAR_MAX]; const unsigned char *q = n->p; - int ch, i; + unsigned max = 0; + int ch; + + /* Monoalphabetic substitution preserves length. */ + if (sz != n->len) return (0); - if (sz != n->len) - return (0); - memset(map, 0, sizeof(map)); - memset(imap, 0, sizeof(imap)); + /* Canonicalize this string as we did the pattern. Fail if we find a + * mismatch. + */ + memset(map, UCHAR_MAX, sizeof(map)); while (*p) { ch = *p++; - i = *q++; - if (!map[i]) { - if (imap[ch]) - return (0); - map[i] = ch; - imap[ch] = 1; - } else if (map[i] != ch) - return (0); + if (map[ch] >= max) map[ch] = max++; + if (max >= UCHAR_MAX) return (0); + if (*q++ != map[ch]) return (0); } return (1); } @@ -70,25 +69,29 @@ static int n_mono(node *nn, const char *p, size_t sz) node *mono(const char *const *av) { unsigned char map[UCHAR_MAX]; - unsigned max; - int ch; - const char *p; + const char *p = av[0]; unsigned char *q; + unsigned max = 0; + int ch; node_mono *n = xmalloc(sizeof(*n)); n->n.func = n_mono; - memset(map, UCHAR_MAX, sizeof(map)); - max = 0; - p = av[0]; + n->p = q = xmalloc(n->len); n->len = strlen(p); - q = xmalloc(n->len); - n->p = q; + + /* Convert the string into the canonical representative of its equivalence + * class, specifically the lexically first string which is equivalent under + * monoalphabetic substitution. That is: replace the first distinct + * character with zero, the second with one, and so on. + */ + memset(map, UCHAR_MAX, sizeof(map)); while (*p) { ch = *p++; - if (map[ch] >= max) - map[ch] = max++; + if (map[ch] >= max) map[ch] = max++; + assert(max < UCHAR_MAX); *q++ = map[ch]; } + return (&n->n); } diff --git a/trackword.c b/trackword.c index b2e3074..ce51e2e 100644 --- a/trackword.c +++ b/trackword.c @@ -63,22 +63,16 @@ long isqrt(long a) { long i, q; - /* --- Initial guess is half the input length --- */ - - for (i = q = a; q; q >>= 2) - i >>= 1; - - /* --- Do the main series iteration --- */ + /* Initial guess is half the input length. */ + for (i = q = a; q; q >>= 2) i >>= 1; + /* Do the main series iteration. */ for (;;) { - q = i * i - a; - if (!q || (q < 0 && -q <= 2 * i)) - return (i); - q /= 2 * i; - if (!q) - i--; - else - i -= q; + q = i*i - a; + if (q > -2*i && q <= 0) return (i); + q /= 2*i; + if (!q) i--; + else i -= q; } } @@ -98,21 +92,16 @@ static int track(tcell *c, const char *p) { tcell **cc; - if (*p++ != c->ch) - return (0); - if (!*p) - return (1); - c->f = 1; + if (*p++ != c->ch) return (0); + if (!*p) return (1); + c->f = 1; for (cc = c->hop; *cc; cc++) { - if ((*cc)->f) - continue; - if (track(*cc, p)) { - c->f = 0; - return (1); - } + if ((*cc)->f) continue; + if (track(*cc, p)) { c->f = 0; return (1); } } c->f = 0; + return (0); } @@ -123,12 +112,9 @@ static int n_track(node *nn, const char *p, size_t sz) node_track *n = (node_track *)nn; tcell *c; - if (!*p) - return (1); - for (c = n->c; c->ch; c++) { - if (track(c, p)) - return (1); - } + if (!*p) return (1); + for (c = n->c; c->ch; c++) + if (track(c, p)) return (1); return (0); } @@ -143,48 +129,41 @@ node *trackword(const char *const *av) size_t sz = strlen(p); tcell *c, **cc, **ccc; - /* --- Work out the dimensions --- */ - + /* Work out the dimensions. */ x = strcspn(p, "/"); if (!p[x]) { x = isqrt(sz); - if (x * x != sz) + if (x*x != sz) die("bad trackword `%s': not square, and no line markers", p); y = x; } - if (!x) - die("bad trackword `%s': no columns", p); + if (!x) die("bad trackword `%s': no columns", p); y = 0; l = p + sz; q = p; for (;;) { - if (*q == '/') - q++; - if (!*q) - break; + if (*q == '/') q++; + if (!*q) break; if (l - q < x) die("bad trackword `%s': inconsistent line lengths", p); q += x; y++; } - if (!y) - die("bad trackword `%s': no rows", p); - - /* --- Build the match node --- */ + if (!y) die("bad trackword `%s': no rows", p); + /* Build the match node. */ n = xmalloc(sizeof(*n)); n->n.func = n_track; n->x = x; n->y = y; - n->c = xmalloc((x * y + 1) * sizeof(tcell)); + n->c = xmalloc((x*y + 1)*sizeof(tcell)); q = p; c = n->c; for (j = 0; j < y; j++) { - if (*q == '/') - q++; + if (*q == '/') q++; for (i = 0; i < x; i++) { c->ch = *q++; c->f = 0; @@ -216,20 +195,15 @@ node *trackword(const char *const *av) } c->ch = 0; - /* --- Now prune out bogus links --- */ - + /* Now prune out bogus links. */ for (c = n->c; c->ch; c++) { ccc = c->hop; - for (cc = c->hop; *cc; cc++) { - if (isalpha((unsigned char)(*cc)->ch)) { - *ccc++ = *cc; - } - } + for (cc = c->hop; *cc; cc++) + if (isalpha((unsigned char)(*cc)->ch)) *ccc++ = *cc; *ccc++ = 0; } - /* --- Done --- */ - + /* Done. */ return (&n->n); } diff --git a/util.c b/util.c index 97f498e..223fcf4 100644 --- a/util.c +++ b/util.c @@ -56,12 +56,9 @@ static const char *quis = ""; void ego(const char *p) { const char *q = p; - while (*q) { - if (*q++ == PATHSEP) - p = q; - } - if (*p == '-') - p++; + while (*q) + if (*q++ == PATHSEP) p = q; + if (*p == '-') p++; quis = p; } @@ -87,20 +84,16 @@ int pquis(FILE *fp, const char *p) while (*p) { sz = strcspn(p, "$"); if (sz) { - if (fwrite(p, 1, sz, fp) < sz) - return (EOF); + if (fwrite(p, 1, sz, fp) < sz) return (EOF); p += sz; } if (*p == '$') { p++; if (*p == '$') { - if (fputc('$', fp) == EOF) - return (EOF); + if (fputc('$', fp) == EOF) return (EOF); p++; - } else { - if (fputs(quis, fp) == EOF) - return (EOF); - } + } else if (fputs(quis, fp) == EOF) + return (EOF); } } return (0); @@ -142,8 +135,7 @@ void die(const char *f, ...) void *xmalloc(size_t sz) { void *p = malloc(sz); - if (!p) - die("not enough memory"); + if (!p) die("not enough memory"); return (p); } @@ -161,8 +153,7 @@ void *xmalloc(size_t sz) void *xrealloc(void *p, size_t sz) { p = realloc(p, sz); - if (!p) - die("not enough memory"); + if (!p) die("not enough memory"); return (p); } @@ -208,23 +199,15 @@ void dstr_ensure(dstr *d, size_t sz) size_t rq = d->len + sz; size_t nsz; - /* --- If we have enough space, just leave it --- */ - - if (rq <= d->sz) - return; - - /* --- Grow the buffer --- */ + /* If we have enough space, just leave it. */ + if (rq <= d->sz) return; + /* Grow the buffer. */ nsz = d->sz; - - if (nsz == 0) - nsz = (DSTR_INITSZ >> 1); + if (nsz == 0) nsz = (DSTR_INITSZ >> 1); do nsz <<= 1; while (nsz < rq); - - if (d->buf) - d->buf = xrealloc(d->buf, nsz); - else - d->buf = xmalloc(nsz); + if (d->buf) d->buf = xrealloc(d->buf, nsz); + else d->buf = xmalloc(nsz); d->sz = nsz; } @@ -250,33 +233,27 @@ int dstr_putline(dstr *d, FILE *fp) for (;;) { - /* --- Read the next byte --- */ - + /* Read the next byte. */ ch = getc(fp); - /* --- End-of-file when no characters read is special --- */ - - if (ch == EOF && !rd) - return (EOF); - - /* --- Make sure there's some buffer space --- */ + /* End-of-file when no characters read is special. */ + if (ch == EOF && !rd) return (EOF); + /* Make sure there's some buffer space. */ if (!left) { d->len = off; dstr_ensure(d, 1); left = d->sz - off; } - /* --- End-of-file or newline ends the loop --- */ - + /* End-of-file or newline ends the loop. */ if (ch == EOF || ch == '\n') { d->buf[off] = 0; d->len = off; return rd; } - /* --- Append the character and continue --- */ - + /* Append the character and continue. */ d->buf[off++] = ch; left--; rd++; } diff --git a/wildcard.c b/wildcard.c index 78b3168..82f33f0 100644 --- a/wildcard.c +++ b/wildcard.c @@ -51,81 +51,96 @@ typedef struct node_wild { static int match(const char *p, const char *s) { - for (;;) { - char pch = *p++, pche, sch; - int sense; + char pch, pche, sch; + int sense; + for (;;) { + pch = *p++; switch (pch) { + case '?': - if (!*s) - return (0); - s++; + /* If there's no character left, then we fail; otherwise consume + * anything and move on. + */ + + if (!*s++) return (0); break; + case '*': - if (!*p) - return (1); - while (*s) { - if (match(p, s)) - return (1); + /* If there's no more pattern then we win. */ + if (!*p) return (1); + + /* Try skipping any number of characters from the pattern looking for + * a match. + */ + do { + if (match(p, s)) return (1); s++; - } + } while (*s); return (0); + case '[': - if (!*s) - return (0); + /* Character sets. This is the hard part. */ + + /* If there is no character left, then we fail. */ + if (!*s) return (0); + + /* Fetch the string character, and start munching through the + * pattern. + */ sch = *s++; - pch = *p++; - sense = 1; - if (pch == '^' || pch == '!') { - sense = !sense; - pch = *p++; - } + pch = *p++; sense = 1; + + /* Maybe we need to negate. */ + if (pch == '^' || pch == '!') { sense = !sense; pch = *p++; } + + /* A close bracket here is literal. Watch for ranges. */ if (pch == ']') { if (*p == '-' && p[1] && p[1] != ']') { - pche = p[1]; - p += 2; - if (pch <= sch && sch <= pche) - goto class_match; - } else if (pch == sch) - goto class_match; + pche = p[1]; p += 2; + if (pch <= sch && sch <= pche) goto class_match; + } else if (pch == sch) goto class_match; pch = *p++; } + + /* Work through the other characters and ranges in the set. */ for (;; pch = *p++) { - if (!pch || pch == ']') - goto class_nomatch; + if (!pch || pch == ']') goto class_nomatch; if (*p == '-' && p[1] && p[1] != ']') { - pche = p[1]; - p += 2; - if (pch <= sch && sch <= pche) - goto class_match; - } else if (pch == sch) - goto class_match; + pche = p[1]; p += 2; + if (pch <= sch && sch <= pche) goto class_match; + } else if (pch == sch) goto class_match; } + class_match: - if (!sense) - return (0); + /* Found a match. Chew through the rest of the pattern. */ + if (!sense) return (0); for (;;) { - pch = *p++; - if (!pch) - return (0); - if (pch == ']') - break; - if (*p == '-' && p[1] && p[1] != ']') - p += 2; + pch = *p++; if (!pch) return (0); + if (pch == ']') break; + if (*p == '-' && p[1] && p[1] != ']') p += 2; } break; + class_nomatch: - if (sense) - return (0); + /* Found the end of the set, so it's a mismatch. */ + if (sense) return (0); break; + case '\\': + /* Treat the next thing literally. */ pch = *p++; + /* fall through... */ + default: - if (pch != *s) - return (0); - if (!pch) - return (1); - s++; + /* A plain character match. + * + * Trick: If this is the end of the pattern, we expect the end of + * the string. + */ + + if (pch != *s++) return (0); + if (!pch) return (1); break; } }