Make tables of standard encryption schemes etc.
[u/mdw/catacomb] / mkphrase.c
1 /* -*-c-*-
2 *
3 * $Id: mkphrase.c,v 1.3 2003/01/24 20:16:04 mdw Exp $
4 *
5 * Generate passphrases from word lists
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mkphrase.c,v $
33 * Revision 1.3 2003/01/24 20:16:04 mdw
34 * Fix stupidity in reading wordlists from stdin. (Thanks to James
35 * Harvey.)
36 *
37 * Revision 1.2 2000/12/06 20:33:27 mdw
38 * Make flags be macros rather than enumerations, to ensure that they're
39 * unsigned.
40 *
41 * Revision 1.1 2000/08/06 10:50:55 mdw
42 * (mkphrase): New program for generating random passphrases with measured
43 * strength.
44 *
45 */
46
47 /*----- Header files ------------------------------------------------------*/
48
49 #include "config.h"
50
51 #include <ctype.h>
52 #include <errno.h>
53 #include <math.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57
58 #include <mLib/alloc.h>
59 #include <mLib/bits.h>
60 #include <mLib/darray.h>
61 #include <mLib/dstr.h>
62 #include <mLib/mdwopt.h>
63 #include <mLib/quis.h>
64 #include <mLib/report.h>
65 #include <mLib/sym.h>
66
67 #include "grand.h"
68 #include "noise.h"
69 #include "rand.h"
70
71 /*----- Global state ------------------------------------------------------*/
72
73 static unsigned min = 0, max = 256; /* Word length bounds */
74 static unsigned bits = 128; /* Minimum acceptable entropy */
75 static unsigned count = 1; /* How many passphrases to make */
76
77 static const char wchars[] = "abcdefghijklmnopqrstuvwxyz'";
78
79 typedef struct ppgen_ops {
80 const char *name; /* Name of the generator */
81 void *(*init)(void); /* Initialize generator */
82 void (*scan)(FILE */*fp*/, void */*p*/); /* Scan an input word list */
83 void (*endscan)(void */*p*/); /* Scanning phase completed */
84 double (*gen)(dstr */*d*/, grand */*r*/, void */*p*/);
85 /* Emit word and return entropy */
86 void (*done)(void */*p*/); /* Close down generator */
87 } ppgen_ops;
88
89 /*----- Word list ---------------------------------------------------------*/
90
91 #ifndef STRING_V
92 # define STRING_V
93 DA_DECL(string_v, char *);
94 #endif
95
96 typedef struct wlist {
97 string_v sv;
98 sym_table tab;
99 char *buf;
100 double logp;
101 } wlist;
102
103 static void *wordlist_init(void)
104 {
105 wlist *w = xmalloc(sizeof(wlist));
106 sym_create(&w->tab);
107 w->logp = 0;
108 return (w);
109 }
110
111 static void wordlist_scan(FILE *fp, void *p)
112 {
113 wlist *w = p;
114 dstr d = DSTR_INIT;
115 unsigned f = 0;
116
117 for (;;) {
118 int ch = getc(fp);
119 if (ch == EOF || isspace(ch)) {
120 DPUTZ(&d);
121 if (f && d.len >= min && d.len <= max)
122 sym_find(&w->tab, d.buf, d.len + 1, sizeof(sym_base), 0);
123 f = 0;
124 DRESET(&d);
125 if (ch == EOF)
126 break;
127 continue;
128 }
129 ch = tolower(ch);
130 if (strchr(wchars, ch)) {
131 DPUTC(&d, ch);
132 f = 1;
133 }
134 }
135
136 dstr_destroy(&d);
137 }
138
139 static void wordlist_endscan(void *p)
140 {
141 wlist *w = p;
142 size_t buflen = 0;
143 sym_iter i;
144 sym_base *b;
145 char *q;
146
147 for (sym_mkiter(&i, &w->tab); (b = sym_next(&i)) != 0; )
148 buflen += b->len;
149 w->buf = xmalloc(buflen);
150 q = w->buf;
151 DA_CREATE(&w->sv);
152 for (sym_mkiter(&i, &w->tab); (b = sym_next(&i)) != 0; ) {
153 memcpy(q, SYM_NAME(b), b->len);
154 DA_PUSH(&w->sv, q);
155 q += b->len;
156 }
157 sym_destroy(&w->tab);
158 w->logp = log(DA_LEN(&w->sv))/log(2);
159 }
160
161 static double wordlist_gen(dstr *d, grand *r, void *p)
162 {
163 wlist *w = p;
164 uint32 i = r->ops->range(r, DA_LEN(&w->sv));
165 DPUTS(d, DA(&w->sv)[i]);
166 return (w->logp);
167 }
168
169 static void wordlist_done(void *p)
170 {
171 wlist *w = p;
172 xfree(w->buf);
173 DA_DESTROY(&w->sv);
174 xfree(w);
175 }
176
177 static ppgen_ops wordlist_ops = {
178 "wordlist",
179 wordlist_init, wordlist_scan, wordlist_endscan, wordlist_gen, wordlist_done
180 };
181
182 /*----- Markov word model -------------------------------------------------*/
183
184 enum {
185 C_START = 27,
186 C_END,
187 VECSZ
188 };
189
190 typedef struct node {
191 uint32 count;
192 uint32 p[VECSZ];
193 } node;
194
195 static void *markov_init(void)
196 {
197 node (*model)[VECSZ][VECSZ][VECSZ] = xmalloc(sizeof(*model));
198 unsigned i, j, k, l;
199
200 for (i = 0; i < VECSZ; i++) {
201 for (j = 0; j < VECSZ; j++) {
202 for (k = 0; k < VECSZ; k++) {
203 node *n = &(*model)[i][j][k];
204 n->count = 0;
205 for (l = 0; l < VECSZ; l++)
206 n->p[l] = 0;
207 }
208 }
209 }
210
211 return (model);
212 }
213
214 static void markov_scan(FILE *fp, void *p)
215 {
216 node (*model)[VECSZ][VECSZ][VECSZ] = p;
217 unsigned i = C_START, j = C_START, k = C_START, l = C_END;
218
219 for (;;) {
220 int ch = getc(fp);
221 const char *q;
222 node *n = &(*model)[i][j][k];
223
224 if (ch == EOF || isspace(ch)) {
225 if (l != C_END) {
226 l = C_END;
227 n->count++;
228 n->p[l]++;
229 i = j = k = C_START;
230 }
231 if (ch == EOF)
232 break;
233 continue;
234 }
235
236 if ((q = strchr(wchars, tolower(ch))) == 0)
237 continue;
238 l = q - wchars;
239 n->count++;
240 n->p[l]++;
241 i = j; j = k; k = l;
242 }
243 }
244
245 static double markov_gen(dstr *d, grand *r, void *p)
246 {
247 node (*model)[VECSZ][VECSZ][VECSZ] = p;
248 unsigned i = C_START, j = C_START, k = C_START, l;
249 double logp = 0;
250 double log2 = log(2);
251
252 for (;;) {
253 node *n = &(*model)[i][j][k];
254 uint32 z = r->ops->range(r, n->count);
255 for (l = 0; z >= n->p[l]; z -= n->p[l++])
256 ;
257 logp -= log((double)n->p[l]/(double)n->count)/log2;
258 if (l == C_END)
259 break;
260 DPUTC(d, wchars[l]);
261 i = j; j = k; k = l;
262 }
263
264 return (logp);
265 }
266
267 static void markov_done(void *p)
268 {
269 node (*model)[VECSZ][VECSZ][VECSZ] = p;
270 xfree(model);
271 }
272
273 static ppgen_ops markov_ops = {
274 "markov",
275 markov_init, markov_scan, 0, markov_gen, markov_done
276 };
277
278 /*----- Main code ---------------------------------------------------------*/
279
280 static ppgen_ops *ppgentab[] = {
281 &markov_ops,
282 &wordlist_ops,
283 0
284 };
285
286 static void version(FILE *fp)
287 {
288 pquis(fp, "$, Catacomb version " VERSION "\n");
289 }
290
291 static void usage(FILE *fp)
292 {
293 pquis(fp, "\
294 Usage: $ [-p] [-b bits] [-g gen] [-n count] [-r [min-]max] wordlist...\n\
295 ");
296 }
297
298 static void help(FILE *fp)
299 {
300 ppgen_ops **ops;
301 version(fp);
302 fputc('\n', fp);
303 usage(fp);
304 pquis(fp, "\n\
305 Generates random passphrases with the requested level of entropy. Options\n\
306 supported are:\n\
307 \n\
308 -h, --help Show this help text.\n\
309 -v, --version Show the program's version number.\n\
310 -u, --usage Show a terse usage summary.\n\
311 -b, --bits=BITS Produce at least BITS bits of entropy.\n\
312 -g, --generator=GEN Use passphrase generator GEN.\n\
313 -n, --count=COUNT Generate COUNT passphrases.\n\
314 -p, --probability Show -log_2 of probability for each phrase.\n\
315 -r, --range=[MIN-]MAX Supply minimum and maximum word lengths.\n\
316 \n\
317 Generators currently available:");
318 for (ops = ppgentab; *ops; ops++)
319 fprintf(fp, " %s", (*ops)->name);
320 fputc('\n', fp);
321 }
322
323 int main(int argc, char *argv[])
324 {
325 ppgen_ops *ops = ppgentab[0];
326 unsigned f = 0;
327 void *ctx;
328 dstr d = DSTR_INIT;
329 dstr dd = DSTR_INIT;
330 unsigned i;
331
332 #define f_bogus 1u
333 #define f_showp 2u
334
335 ego(argv[0]);
336 for (;;) {
337 static struct option opts[] = {
338 { "help", 0, 0, 'h' },
339 { "version", 0, 0, 'v' },
340 { "usage", 0, 0, 'u' },
341 { "bits", OPTF_ARGREQ, 0, 'b' },
342 { "generator", OPTF_ARGREQ, 0, 'g' },
343 { "count", OPTF_ARGREQ, 0, 'n' },
344 { "probability", 0, 0, 'p' },
345 { "range", OPTF_ARGREQ, 0, 'r' },
346 { 0, 0, 0, 0 }
347 };
348 int i = mdwopt(argc, argv, "hvu b:g:n:pr:", opts, 0, 0, 0);
349
350 if (i < 0)
351 break;
352 switch (i) {
353 case 'h':
354 help(stdout);
355 exit(0);
356 case 'v':
357 version(stdout);
358 exit(0);
359 case 'u':
360 usage(stdout);
361 exit(0);
362 case 'b': {
363 char *p;
364 unsigned long n = strtoul(optarg, &p, 0);
365 if (*p)
366 die(EXIT_FAILURE, "bad integer `%s'", optarg);
367 bits = n;
368 } break;
369 case 'g': {
370 ppgen_ops **p;
371 size_t n = strlen(optarg);
372 ops = 0;
373 for (p = ppgentab; *p; p++) {
374 if (strncmp(optarg, (*p)->name, n) == 0) {
375 if (!(*p)->name[n]) {
376 ops = *p;
377 break;
378 } else if (ops)
379 die(EXIT_FAILURE, "ambiguous generator name `%s'", optarg);
380 ops = *p;
381 }
382 }
383 if (!ops)
384 die(EXIT_FAILURE, "unknown generator name `%s'", optarg);
385 } break;
386 case 'n': {
387 char *p;
388 unsigned long n = strtoul(optarg, &p, 0);
389 if (*p)
390 die(EXIT_FAILURE, "bad integer `%s'", optarg);
391 count = n;
392 } break;
393 case 'p':
394 f |= f_showp;
395 break;
396 case 'r': {
397 char *p;
398 unsigned long n = min, nn = max;
399 nn = strtoul(optarg, &p, 0);
400 if (*p == '-') {
401 n = nn;
402 nn = strtoul(p + 1, &p, 0);
403 }
404 if (*p)
405 die(EXIT_FAILURE, "bad range string `%s'", optarg);
406 min = n; max = nn;
407 } break;
408 default:
409 f |= f_bogus;
410 break;
411 }
412 }
413
414 argc -= optind;
415 argv += optind;
416 if ((f & f_bogus) || !argc) {
417 usage(stderr);
418 exit(EXIT_FAILURE);
419 }
420
421 rand_noisesrc(RAND_GLOBAL, &noise_source);
422 rand_seed(RAND_GLOBAL, 160);
423
424 ctx = ops->init();
425 while (*argv) {
426 if (strcmp(*argv, "-") == 0)
427 ops->scan(stdin, ctx);
428 else {
429 FILE *fp = fopen(*argv, "r");
430 if (!fp) {
431 die(EXIT_FAILURE, "error opening file `%s': %s",
432 *argv, strerror(errno));
433 }
434 ops->scan(fp, ctx);
435 fclose(fp);
436 }
437 argv++;
438 }
439 if (ops->endscan)
440 ops->endscan(ctx);
441
442 for (i = 0; !count || i < count; i++) {
443 double logp = 0;
444 DRESET(&d);
445 while (logp < bits) {
446 double pp;
447 DRESET(&dd);
448 pp = ops->gen(&dd, &rand_global, ctx);
449 if (!pp || dd.len < min || dd.len > max)
450 continue;
451 if (logp)
452 DPUTC(&d, ' ');
453 DPUTD(&d, &dd);
454 logp += pp;
455 }
456 dstr_write(&d, stdout);
457 if (f & f_showp)
458 printf(" [%g]", logp);
459 fputc('\n', stdout);
460 }
461
462 ops->done(ctx);
463 dstr_destroy(&d);
464 dstr_destroy(&dd);
465 return (0);
466 }
467
468 /*----- That's all, folks -------------------------------------------------*/