Add an internal-representation no-op function.
[u/mdw/catacomb] / mkphrase.c
CommitLineData
b55540f6 1/* -*-c-*-
2 *
16efd15b 3 * $Id: mkphrase.c,v 1.2 2000/12/06 20:33:27 mdw Exp $
b55540f6 4 *
5 * Generate passphrases from word lists
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30/*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mkphrase.c,v $
16efd15b 33 * Revision 1.2 2000/12/06 20:33:27 mdw
34 * Make flags be macros rather than enumerations, to ensure that they're
35 * unsigned.
36 *
b55540f6 37 * Revision 1.1 2000/08/06 10:50:55 mdw
38 * (mkphrase): New program for generating random passphrases with measured
39 * strength.
40 *
41 */
42
43/*----- Header files ------------------------------------------------------*/
44
45#include "config.h"
46
47#include <ctype.h>
48#include <errno.h>
49#include <math.h>
50#include <stdio.h>
51#include <stdlib.h>
52#include <string.h>
53
54#include <mLib/alloc.h>
55#include <mLib/bits.h>
56#include <mLib/darray.h>
57#include <mLib/dstr.h>
58#include <mLib/mdwopt.h>
59#include <mLib/quis.h>
60#include <mLib/report.h>
61#include <mLib/sym.h>
62
63#include "grand.h"
64#include "noise.h"
65#include "rand.h"
66
67/*----- Global state ------------------------------------------------------*/
68
69static unsigned min = 0, max = 256; /* Word length bounds */
70static unsigned bits = 128; /* Minimum acceptable entropy */
71static unsigned count = 1; /* How many passphrases to make */
72
73static const char wchars[] = "abcdefghijklmnopqrstuvwxyz'";
74
75typedef struct ppgen_ops {
76 const char *name; /* Name of the generator */
77 void *(*init)(void); /* Initialize generator */
78 void (*scan)(FILE */*fp*/, void */*p*/); /* Scan an input word list */
79 void (*endscan)(void */*p*/); /* Scanning phase completed */
80 double (*gen)(dstr */*d*/, grand */*r*/, void */*p*/);
81 /* Emit word and return entropy */
82 void (*done)(void */*p*/); /* Close down generator */
83} ppgen_ops;
84
85/*----- Word list ---------------------------------------------------------*/
86
87#ifndef STRING_V
88# define STRING_V
89 DA_DECL(string_v, char *);
90#endif
91
92typedef struct wlist {
93 string_v sv;
94 sym_table tab;
95 char *buf;
96 double logp;
97} wlist;
98
99static void *wordlist_init(void)
100{
101 wlist *w = xmalloc(sizeof(wlist));
102 sym_create(&w->tab);
103 w->logp = 0;
104 return (w);
105}
106
107static void wordlist_scan(FILE *fp, void *p)
108{
109 wlist *w = p;
110 dstr d = DSTR_INIT;
111 unsigned f = 0;
112
113 for (;;) {
114 int ch = getc(fp);
115 if (ch == EOF || isspace(ch)) {
116 DPUTZ(&d);
117 if (f && d.len >= min && d.len <= max)
118 sym_find(&w->tab, d.buf, d.len + 1, sizeof(sym_base), 0);
119 f = 0;
120 DRESET(&d);
121 if (ch == EOF)
122 break;
123 continue;
124 }
125 ch = tolower(ch);
126 if (strchr(wchars, ch)) {
127 DPUTC(&d, ch);
128 f = 1;
129 }
130 }
131
132 dstr_destroy(&d);
133}
134
135static void wordlist_endscan(void *p)
136{
137 wlist *w = p;
138 size_t buflen = 0;
139 sym_iter i;
140 sym_base *b;
141 char *q;
142
143 for (sym_mkiter(&i, &w->tab); (b = sym_next(&i)) != 0; )
144 buflen += b->len;
145 w->buf = xmalloc(buflen);
146 q = w->buf;
147 DA_CREATE(&w->sv);
148 for (sym_mkiter(&i, &w->tab); (b = sym_next(&i)) != 0; ) {
149 memcpy(q, SYM_NAME(b), b->len);
150 DA_PUSH(&w->sv, q);
151 q += b->len;
152 }
153 sym_destroy(&w->tab);
154 w->logp = log(DA_LEN(&w->sv))/log(2);
155}
156
157static double wordlist_gen(dstr *d, grand *r, void *p)
158{
159 wlist *w = p;
160 uint32 i = r->ops->range(r, DA_LEN(&w->sv));
161 DPUTS(d, DA(&w->sv)[i]);
162 return (w->logp);
163}
164
165static void wordlist_done(void *p)
166{
167 wlist *w = p;
168 xfree(w->buf);
169 DA_DESTROY(&w->sv);
170 xfree(w);
171}
172
173static ppgen_ops wordlist_ops = {
174 "wordlist",
175 wordlist_init, wordlist_scan, wordlist_endscan, wordlist_gen, wordlist_done
176};
177
178/*----- Markov word model -------------------------------------------------*/
179
180enum {
181 C_START = 27,
182 C_END,
183 VECSZ
184};
185
186typedef struct node {
187 uint32 count;
188 uint32 p[VECSZ];
189} node;
190
191static void *markov_init(void)
192{
193 node (*model)[VECSZ][VECSZ][VECSZ] = xmalloc(sizeof(*model));
194 unsigned i, j, k, l;
195
196 for (i = 0; i < VECSZ; i++) {
197 for (j = 0; j < VECSZ; j++) {
198 for (k = 0; k < VECSZ; k++) {
199 node *n = &(*model)[i][j][k];
200 n->count = 0;
201 for (l = 0; l < VECSZ; l++)
202 n->p[l] = 0;
203 }
204 }
205 }
206
207 return (model);
208}
209
210static void markov_scan(FILE *fp, void *p)
211{
212 node (*model)[VECSZ][VECSZ][VECSZ] = p;
213 unsigned i = C_START, j = C_START, k = C_START, l = C_END;
214
215 for (;;) {
216 int ch = getc(fp);
217 const char *q;
218 node *n = &(*model)[i][j][k];
219
220 if (ch == EOF || isspace(ch)) {
221 if (l != C_END) {
222 l = C_END;
223 n->count++;
224 n->p[l]++;
225 i = j = k = C_START;
226 }
227 if (ch == EOF)
228 break;
229 continue;
230 }
231
232 if ((q = strchr(wchars, tolower(ch))) == 0)
233 continue;
234 l = q - wchars;
235 n->count++;
236 n->p[l]++;
237 i = j; j = k; k = l;
238 }
239}
240
241static double markov_gen(dstr *d, grand *r, void *p)
242{
243 node (*model)[VECSZ][VECSZ][VECSZ] = p;
244 unsigned i = C_START, j = C_START, k = C_START, l;
245 double logp = 0;
246 double log2 = log(2);
247
248 for (;;) {
249 node *n = &(*model)[i][j][k];
250 uint32 z = r->ops->range(r, n->count);
251 for (l = 0; z >= n->p[l]; z -= n->p[l++])
252 ;
253 logp -= log((double)n->p[l]/(double)n->count)/log2;
254 if (l == C_END)
255 break;
256 DPUTC(d, wchars[l]);
257 i = j; j = k; k = l;
258 }
259
260 return (logp);
261}
262
263static void markov_done(void *p)
264{
265 node (*model)[VECSZ][VECSZ][VECSZ] = p;
266 xfree(model);
267}
268
269static ppgen_ops markov_ops = {
270 "markov",
271 markov_init, markov_scan, 0, markov_gen, markov_done
272};
273
274/*----- Main code ---------------------------------------------------------*/
275
276static ppgen_ops *ppgentab[] = {
277 &markov_ops,
278 &wordlist_ops,
279 0
280};
281
282static void version(FILE *fp)
283{
284 pquis(fp, "$, Catacomb version " VERSION "\n");
285}
286
287static void usage(FILE *fp)
288{
289 pquis(fp, "\
290Usage: $ [-p] [-b bits] [-g gen] [-n count] [-r [min-]max] wordlist...\n\
291");
292}
293
294static void help(FILE *fp)
295{
296 ppgen_ops **ops;
297 version(fp);
298 fputc('\n', fp);
299 usage(fp);
300 pquis(fp, "\n\
301Generates random passphrases with the requested level of entropy. Options\n\
302supported are:\n\
303\n\
304-h, --help Show this help text.\n\
305-v, --version Show the program's version number.\n\
306-u, --usage Show a terse usage summary.\n\
307-b, --bits=BITS Produce at least BITS bits of entropy.\n\
308-g, --generator=GEN Use passphrase generator GEN.\n\
309-n, --count=COUNT Generate COUNT passphrases.\n\
310-p, --probability Show -log_2 of probability for each phrase.\n\
311-r, --range=[MIN-]MAX Supply minimum and maximum word lengths.\n\
312\n\
313Generators currently available:");
314 for (ops = ppgentab; *ops; ops++)
315 fprintf(fp, " %s", (*ops)->name);
316 fputc('\n', fp);
317}
318
319int main(int argc, char *argv[])
320{
321 ppgen_ops *ops = ppgentab[0];
322 unsigned f = 0;
323 void *ctx;
324 dstr d = DSTR_INIT;
325 dstr dd = DSTR_INIT;
326 unsigned i;
327
16efd15b 328#define f_bogus 1u
329#define f_showp 2u
b55540f6 330
331 ego(argv[0]);
332 for (;;) {
333 static struct option opts[] = {
334 { "help", 0, 0, 'h' },
335 { "version", 0, 0, 'v' },
336 { "usage", 0, 0, 'u' },
337 { "bits", OPTF_ARGREQ, 0, 'b' },
338 { "generator", OPTF_ARGREQ, 0, 'g' },
339 { "count", OPTF_ARGREQ, 0, 'n' },
340 { "probability", 0, 0, 'p' },
341 { "range", OPTF_ARGREQ, 0, 'r' },
342 { 0, 0, 0, 0 }
343 };
344 int i = mdwopt(argc, argv, "hvu b:g:n:pr:", opts, 0, 0, 0);
345
346 if (i < 0)
347 break;
348 switch (i) {
349 case 'h':
350 help(stdout);
351 exit(0);
352 case 'v':
353 version(stdout);
354 exit(0);
355 case 'u':
356 usage(stdout);
357 exit(0);
358 case 'b': {
359 char *p;
360 unsigned long n = strtoul(optarg, &p, 0);
361 if (*p)
362 die(EXIT_FAILURE, "bad integer `%s'", optarg);
363 bits = n;
364 } break;
365 case 'g': {
366 ppgen_ops **p;
367 size_t n = strlen(optarg);
368 ops = 0;
369 for (p = ppgentab; *p; p++) {
370 if (strncmp(optarg, (*p)->name, n) == 0) {
371 if (!(*p)->name[n]) {
372 ops = *p;
373 break;
374 } else if (ops)
375 die(EXIT_FAILURE, "ambiguous generator name `%s'", optarg);
376 ops = *p;
377 }
378 }
379 if (!ops)
380 die(EXIT_FAILURE, "unknown generator name `%s'", optarg);
381 } break;
382 case 'n': {
383 char *p;
384 unsigned long n = strtoul(optarg, &p, 0);
385 if (*p)
386 die(EXIT_FAILURE, "bad integer `%s'", optarg);
387 count = n;
388 } break;
389 case 'p':
390 f |= f_showp;
391 break;
392 case 'r': {
393 char *p;
394 unsigned long n = min, nn = max;
395 nn = strtoul(optarg, &p, 0);
396 if (*p == '-') {
397 n = nn;
398 nn = strtoul(p + 1, &p, 0);
399 }
400 if (*p)
401 die(EXIT_FAILURE, "bad range string `%s'", optarg);
402 min = n; max = nn;
403 } break;
404 default:
405 f |= f_bogus;
406 break;
407 }
408 }
409
410 argc -= optind;
411 argv += optind;
412 if ((f & f_bogus) || !argc) {
413 usage(stderr);
414 exit(EXIT_FAILURE);
415 }
416
417 rand_noisesrc(RAND_GLOBAL, &noise_source);
418 rand_seed(RAND_GLOBAL, 160);
419
420 ctx = ops->init();
421 while (*argv) {
422 if (strcmp(*argv, "-") == 0)
423 ops->scan(ctx, stdin);
424 else {
425 FILE *fp = fopen(*argv, "r");
426 if (!fp) {
427 die(EXIT_FAILURE, "error opening file `%s': %s",
428 *argv, strerror(errno));
429 }
430 ops->scan(fp, ctx);
431 fclose(fp);
432 }
433 argv++;
434 }
435 if (ops->endscan)
436 ops->endscan(ctx);
437
438 for (i = 0; !count || i < count; i++) {
439 double logp = 0;
440 DRESET(&d);
441 while (logp < bits) {
442 double pp;
443 DRESET(&dd);
444 pp = ops->gen(&dd, &rand_global, ctx);
445 if (!pp || dd.len < min || dd.len > max)
446 continue;
447 if (logp)
448 DPUTC(&d, ' ');
449 DPUTD(&d, &dd);
450 logp += pp;
451 }
452 dstr_write(&d, stdout);
453 if (f & f_showp)
454 printf(" [%g]", logp);
455 fputc('\n', stdout);
456 }
457
458 ops->done(ctx);
459 dstr_destroy(&d);
460 dstr_destroy(&dd);
461 return (0);
462}
463
464/*----- That's all, folks -------------------------------------------------*/