(mkphrase): New program for generating random passphrases with measured
[u/mdw/catacomb] / mkphrase.c
CommitLineData
b55540f6 1/* -*-c-*-
2 *
3 * $Id: mkphrase.c,v 1.1 2000/08/06 10:50:55 mdw Exp $
4 *
5 * Generate passphrases from word lists
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30/*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mkphrase.c,v $
33 * Revision 1.1 2000/08/06 10:50:55 mdw
34 * (mkphrase): New program for generating random passphrases with measured
35 * strength.
36 *
37 */
38
39/*----- Header files ------------------------------------------------------*/
40
41#include "config.h"
42
43#include <ctype.h>
44#include <errno.h>
45#include <math.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49
50#include <mLib/alloc.h>
51#include <mLib/bits.h>
52#include <mLib/darray.h>
53#include <mLib/dstr.h>
54#include <mLib/mdwopt.h>
55#include <mLib/quis.h>
56#include <mLib/report.h>
57#include <mLib/sym.h>
58
59#include "grand.h"
60#include "noise.h"
61#include "rand.h"
62
63/*----- Global state ------------------------------------------------------*/
64
65static unsigned min = 0, max = 256; /* Word length bounds */
66static unsigned bits = 128; /* Minimum acceptable entropy */
67static unsigned count = 1; /* How many passphrases to make */
68
69static const char wchars[] = "abcdefghijklmnopqrstuvwxyz'";
70
71typedef struct ppgen_ops {
72 const char *name; /* Name of the generator */
73 void *(*init)(void); /* Initialize generator */
74 void (*scan)(FILE */*fp*/, void */*p*/); /* Scan an input word list */
75 void (*endscan)(void */*p*/); /* Scanning phase completed */
76 double (*gen)(dstr */*d*/, grand */*r*/, void */*p*/);
77 /* Emit word and return entropy */
78 void (*done)(void */*p*/); /* Close down generator */
79} ppgen_ops;
80
81/*----- Word list ---------------------------------------------------------*/
82
83#ifndef STRING_V
84# define STRING_V
85 DA_DECL(string_v, char *);
86#endif
87
88typedef struct wlist {
89 string_v sv;
90 sym_table tab;
91 char *buf;
92 double logp;
93} wlist;
94
95static void *wordlist_init(void)
96{
97 wlist *w = xmalloc(sizeof(wlist));
98 sym_create(&w->tab);
99 w->logp = 0;
100 return (w);
101}
102
103static void wordlist_scan(FILE *fp, void *p)
104{
105 wlist *w = p;
106 dstr d = DSTR_INIT;
107 unsigned f = 0;
108
109 for (;;) {
110 int ch = getc(fp);
111 if (ch == EOF || isspace(ch)) {
112 DPUTZ(&d);
113 if (f && d.len >= min && d.len <= max)
114 sym_find(&w->tab, d.buf, d.len + 1, sizeof(sym_base), 0);
115 f = 0;
116 DRESET(&d);
117 if (ch == EOF)
118 break;
119 continue;
120 }
121 ch = tolower(ch);
122 if (strchr(wchars, ch)) {
123 DPUTC(&d, ch);
124 f = 1;
125 }
126 }
127
128 dstr_destroy(&d);
129}
130
131static void wordlist_endscan(void *p)
132{
133 wlist *w = p;
134 size_t buflen = 0;
135 sym_iter i;
136 sym_base *b;
137 char *q;
138
139 for (sym_mkiter(&i, &w->tab); (b = sym_next(&i)) != 0; )
140 buflen += b->len;
141 w->buf = xmalloc(buflen);
142 q = w->buf;
143 DA_CREATE(&w->sv);
144 for (sym_mkiter(&i, &w->tab); (b = sym_next(&i)) != 0; ) {
145 memcpy(q, SYM_NAME(b), b->len);
146 DA_PUSH(&w->sv, q);
147 q += b->len;
148 }
149 sym_destroy(&w->tab);
150 w->logp = log(DA_LEN(&w->sv))/log(2);
151}
152
153static double wordlist_gen(dstr *d, grand *r, void *p)
154{
155 wlist *w = p;
156 uint32 i = r->ops->range(r, DA_LEN(&w->sv));
157 DPUTS(d, DA(&w->sv)[i]);
158 return (w->logp);
159}
160
161static void wordlist_done(void *p)
162{
163 wlist *w = p;
164 xfree(w->buf);
165 DA_DESTROY(&w->sv);
166 xfree(w);
167}
168
169static ppgen_ops wordlist_ops = {
170 "wordlist",
171 wordlist_init, wordlist_scan, wordlist_endscan, wordlist_gen, wordlist_done
172};
173
174/*----- Markov word model -------------------------------------------------*/
175
176enum {
177 C_START = 27,
178 C_END,
179 VECSZ
180};
181
182typedef struct node {
183 uint32 count;
184 uint32 p[VECSZ];
185} node;
186
187static void *markov_init(void)
188{
189 node (*model)[VECSZ][VECSZ][VECSZ] = xmalloc(sizeof(*model));
190 unsigned i, j, k, l;
191
192 for (i = 0; i < VECSZ; i++) {
193 for (j = 0; j < VECSZ; j++) {
194 for (k = 0; k < VECSZ; k++) {
195 node *n = &(*model)[i][j][k];
196 n->count = 0;
197 for (l = 0; l < VECSZ; l++)
198 n->p[l] = 0;
199 }
200 }
201 }
202
203 return (model);
204}
205
206static void markov_scan(FILE *fp, void *p)
207{
208 node (*model)[VECSZ][VECSZ][VECSZ] = p;
209 unsigned i = C_START, j = C_START, k = C_START, l = C_END;
210
211 for (;;) {
212 int ch = getc(fp);
213 const char *q;
214 node *n = &(*model)[i][j][k];
215
216 if (ch == EOF || isspace(ch)) {
217 if (l != C_END) {
218 l = C_END;
219 n->count++;
220 n->p[l]++;
221 i = j = k = C_START;
222 }
223 if (ch == EOF)
224 break;
225 continue;
226 }
227
228 if ((q = strchr(wchars, tolower(ch))) == 0)
229 continue;
230 l = q - wchars;
231 n->count++;
232 n->p[l]++;
233 i = j; j = k; k = l;
234 }
235}
236
237static double markov_gen(dstr *d, grand *r, void *p)
238{
239 node (*model)[VECSZ][VECSZ][VECSZ] = p;
240 unsigned i = C_START, j = C_START, k = C_START, l;
241 double logp = 0;
242 double log2 = log(2);
243
244 for (;;) {
245 node *n = &(*model)[i][j][k];
246 uint32 z = r->ops->range(r, n->count);
247 for (l = 0; z >= n->p[l]; z -= n->p[l++])
248 ;
249 logp -= log((double)n->p[l]/(double)n->count)/log2;
250 if (l == C_END)
251 break;
252 DPUTC(d, wchars[l]);
253 i = j; j = k; k = l;
254 }
255
256 return (logp);
257}
258
259static void markov_done(void *p)
260{
261 node (*model)[VECSZ][VECSZ][VECSZ] = p;
262 xfree(model);
263}
264
265static ppgen_ops markov_ops = {
266 "markov",
267 markov_init, markov_scan, 0, markov_gen, markov_done
268};
269
270/*----- Main code ---------------------------------------------------------*/
271
272static ppgen_ops *ppgentab[] = {
273 &markov_ops,
274 &wordlist_ops,
275 0
276};
277
278static void version(FILE *fp)
279{
280 pquis(fp, "$, Catacomb version " VERSION "\n");
281}
282
283static void usage(FILE *fp)
284{
285 pquis(fp, "\
286Usage: $ [-p] [-b bits] [-g gen] [-n count] [-r [min-]max] wordlist...\n\
287");
288}
289
290static void help(FILE *fp)
291{
292 ppgen_ops **ops;
293 version(fp);
294 fputc('\n', fp);
295 usage(fp);
296 pquis(fp, "\n\
297Generates random passphrases with the requested level of entropy. Options\n\
298supported are:\n\
299\n\
300-h, --help Show this help text.\n\
301-v, --version Show the program's version number.\n\
302-u, --usage Show a terse usage summary.\n\
303-b, --bits=BITS Produce at least BITS bits of entropy.\n\
304-g, --generator=GEN Use passphrase generator GEN.\n\
305-n, --count=COUNT Generate COUNT passphrases.\n\
306-p, --probability Show -log_2 of probability for each phrase.\n\
307-r, --range=[MIN-]MAX Supply minimum and maximum word lengths.\n\
308\n\
309Generators currently available:");
310 for (ops = ppgentab; *ops; ops++)
311 fprintf(fp, " %s", (*ops)->name);
312 fputc('\n', fp);
313}
314
315int main(int argc, char *argv[])
316{
317 ppgen_ops *ops = ppgentab[0];
318 unsigned f = 0;
319 void *ctx;
320 dstr d = DSTR_INIT;
321 dstr dd = DSTR_INIT;
322 unsigned i;
323
324 enum {
325 f_bogus = 1,
326 f_showp = 2
327 };
328
329 ego(argv[0]);
330 for (;;) {
331 static struct option opts[] = {
332 { "help", 0, 0, 'h' },
333 { "version", 0, 0, 'v' },
334 { "usage", 0, 0, 'u' },
335 { "bits", OPTF_ARGREQ, 0, 'b' },
336 { "generator", OPTF_ARGREQ, 0, 'g' },
337 { "count", OPTF_ARGREQ, 0, 'n' },
338 { "probability", 0, 0, 'p' },
339 { "range", OPTF_ARGREQ, 0, 'r' },
340 { 0, 0, 0, 0 }
341 };
342 int i = mdwopt(argc, argv, "hvu b:g:n:pr:", opts, 0, 0, 0);
343
344 if (i < 0)
345 break;
346 switch (i) {
347 case 'h':
348 help(stdout);
349 exit(0);
350 case 'v':
351 version(stdout);
352 exit(0);
353 case 'u':
354 usage(stdout);
355 exit(0);
356 case 'b': {
357 char *p;
358 unsigned long n = strtoul(optarg, &p, 0);
359 if (*p)
360 die(EXIT_FAILURE, "bad integer `%s'", optarg);
361 bits = n;
362 } break;
363 case 'g': {
364 ppgen_ops **p;
365 size_t n = strlen(optarg);
366 ops = 0;
367 for (p = ppgentab; *p; p++) {
368 if (strncmp(optarg, (*p)->name, n) == 0) {
369 if (!(*p)->name[n]) {
370 ops = *p;
371 break;
372 } else if (ops)
373 die(EXIT_FAILURE, "ambiguous generator name `%s'", optarg);
374 ops = *p;
375 }
376 }
377 if (!ops)
378 die(EXIT_FAILURE, "unknown generator name `%s'", optarg);
379 } break;
380 case 'n': {
381 char *p;
382 unsigned long n = strtoul(optarg, &p, 0);
383 if (*p)
384 die(EXIT_FAILURE, "bad integer `%s'", optarg);
385 count = n;
386 } break;
387 case 'p':
388 f |= f_showp;
389 break;
390 case 'r': {
391 char *p;
392 unsigned long n = min, nn = max;
393 nn = strtoul(optarg, &p, 0);
394 if (*p == '-') {
395 n = nn;
396 nn = strtoul(p + 1, &p, 0);
397 }
398 if (*p)
399 die(EXIT_FAILURE, "bad range string `%s'", optarg);
400 min = n; max = nn;
401 } break;
402 default:
403 f |= f_bogus;
404 break;
405 }
406 }
407
408 argc -= optind;
409 argv += optind;
410 if ((f & f_bogus) || !argc) {
411 usage(stderr);
412 exit(EXIT_FAILURE);
413 }
414
415 rand_noisesrc(RAND_GLOBAL, &noise_source);
416 rand_seed(RAND_GLOBAL, 160);
417
418 ctx = ops->init();
419 while (*argv) {
420 if (strcmp(*argv, "-") == 0)
421 ops->scan(ctx, stdin);
422 else {
423 FILE *fp = fopen(*argv, "r");
424 if (!fp) {
425 die(EXIT_FAILURE, "error opening file `%s': %s",
426 *argv, strerror(errno));
427 }
428 ops->scan(fp, ctx);
429 fclose(fp);
430 }
431 argv++;
432 }
433 if (ops->endscan)
434 ops->endscan(ctx);
435
436 for (i = 0; !count || i < count; i++) {
437 double logp = 0;
438 DRESET(&d);
439 while (logp < bits) {
440 double pp;
441 DRESET(&dd);
442 pp = ops->gen(&dd, &rand_global, ctx);
443 if (!pp || dd.len < min || dd.len > max)
444 continue;
445 if (logp)
446 DPUTC(&d, ' ');
447 DPUTD(&d, &dd);
448 logp += pp;
449 }
450 dstr_write(&d, stdout);
451 if (f & f_showp)
452 printf(" [%g]", logp);
453 fputc('\n', stdout);
454 }
455
456 ops->done(ctx);
457 dstr_destroy(&d);
458 dstr_destroy(&dd);
459 return (0);
460}
461
462/*----- That's all, folks -------------------------------------------------*/