New restartable interface to Maurer testing.
[u/mdw/catacomb] / maurer.c
1 /* -*-c-*-
2 *
3 * $Id: maurer.c,v 1.2 2000/08/11 21:34:59 mdw Exp $
4 *
5 * Maurer's universal statistical test
6 *
7 * (c) 2000 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Revision history --------------------------------------------------*
31 *
32 * $Log: maurer.c,v $
33 * Revision 1.2 2000/08/11 21:34:59 mdw
34 * New restartable interface to Maurer testing.
35 *
36 * Revision 1.1 2000/06/17 11:29:49 mdw
37 * Maurer's universal statistical test.
38 *
39 */
40
41 /*----- Header files ------------------------------------------------------*/
42
43 #include <assert.h>
44 #include <math.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47
48 #include <mLib/alloc.h>
49 #include <mLib/bits.h>
50
51 #include "maurer.h"
52
53 /*----- Data structures ---------------------------------------------------*/
54
55 typedef struct bitscan {
56 const octet *p;
57 unsigned b;
58 uint32 a;
59 } bitscan;
60
61 /*----- Main code ---------------------------------------------------------*/
62
63 /* --- @more@ --- *
64 *
65 * Arguments: @bitscan *b@ = pointer to a bitscanner structure
66 * @unsigned l@ = number of bits wanted
67 *
68 * Returns: An integer representing the next @l@ bits of the stream.
69 *
70 * Use: Fetches the next @l@ bits of a data stream. The bitscanner
71 * has no way of knowing whether it's run out of data, so make
72 * sure that doesn't happen.
73 */
74
75 static unsigned more(bitscan *b, unsigned l)
76 {
77 unsigned x, m;
78
79 while (b->b < l) {
80 b->a |= U8(*b->p++) << b->b;
81 b->b += 8;
82 }
83 m = (1 << l) - 1;
84 x = b->a & m;
85 b->a >>= l;
86 b->b -= l;
87 return (x);
88 }
89
90 /* --- @maurer_init@ --- *
91 *
92 * Arguments: @maurer_ctx *m@ = pointer to a context to initialize
93 * @unsigned l@ = number of bits to take at a time
94 *
95 * Returns: Minimum recommended amount of data to test.
96 *
97 * Use: Initializes a Maurer testing context. Note that @l@ values
98 * larger than 16 are not supported, and values less than 6 can
99 * give bizarre results.
100 */
101
102 unsigned long maurer_init(maurer_ctx *m, unsigned l)
103 {
104 size_t i;
105
106 assert(((void)"(maurer_init): chunks larger than 16 bits not supported",
107 0 < l && l <= 16));
108
109 m->l = l;
110 m->a = 0;
111 m->b = 0;
112 m->n = 0;
113 m->x = 0;
114
115 m->t = xmalloc(sizeof(unsigned long) << l);
116 for (i = 0; i < (1 << l); i++)
117 m->t[i] = 0;
118 return (((1000ul << l) * l + 7)/8);
119 }
120
121 /* --- @bits@ --- *
122 *
123 * Arguments: @maurer_ctx *m@ = pointer to testing context
124 * @const octet **p@ = address of a buffer pointer
125 * @const octet *q@ = limit of the buffer pointer
126 * @unsigned *x@ = address to store next chunk
127 *
128 * Returns: Nonzero if some more bits arrived.
129 *
130 * Use: Reads some bits from a buffer.
131 */
132
133 static int bits(maurer_ctx *m, const octet **p, const octet *q, unsigned *x)
134 {
135 while (m->b < m->l) {
136 if (*p >= q)
137 return (0);
138 m->a |= U8(*(*p)++) << m->b;
139 m->b += 8;
140 }
141 *x = m->a & ((1 << m->l) - 1);
142 m->a >>= m->l;
143 m->b -= m->l;
144 m->n++;
145 return (1);
146 }
147
148 /* --- @maurer_test@ --- *
149 *
150 * Arguments: @maurer_ctx *m@ = pointer to a Maurer testing context
151 * @const void *buf@ = pointer to a buffer of data
152 * @size_t sz@ = size of the buffer
153 *
154 * Returns: ---
155 *
156 * Use: Scans a buffer of data and updates the testing context.
157 */
158
159 void maurer_test(maurer_ctx *m, const void *buf, size_t sz)
160 {
161 const octet *p = buf, *l = p + sz;
162 unsigned long q = 10 << m->l;
163 unsigned x;
164
165 /* --- Initialize the table --- */
166
167 while (m->n < q) {
168 if (!bits(m, &p, l, &x))
169 return;
170 m->t[x] = m->n;
171 }
172
173 /* --- Update the statistic --- */
174
175 while (bits(m, &p, l, &x)) {
176 m->x += log(m->n - m->t[x]);
177 m->t[x] = m->n;
178 }
179 }
180
181 /* --- @maurer_done@ --- *
182 *
183 * Arguments: @maurer_ctx *m@ = pointer to a Maurer testing context
184 *
185 * Returns: The statistic %$Z_u = (X_u - \mu)/\sigma$%, which should be
186 * normally distributed with a mean of 0 and variance of 1.
187 *
188 * Use: Returns the result of Maurer's universal statistical test.
189 * Also frees the memory allocated during initialization.
190 *
191 * If insufficient data was received, the value @HUGE_VAL@ is
192 * returned.
193 */
194
195 double maurer_done(maurer_ctx *m)
196 {
197 double x, mu, c, sigma;
198 unsigned long q = 10 << m->l, k;
199
200 /* --- Table for means and variances --- */
201
202 struct { double mu, var_1; } vtab[] = {
203 { 0.7326495, 0.690 },
204 { 1.5374383, 1.338 },
205 { 2.4016068, 1.901 },
206 { 3.3112247, 2.358 },
207 { 4.2534266, 2.705 },
208 { 5.2177052, 2.945 },
209 { 6.1962507, 3.125 },
210 { 7.1836656, 3.238 },
211 { 8.1764248, 3.311 },
212 { 9.1723243, 3.356 },
213 { 10.170032 , 3.384 },
214 { 11.168765 , 3.401 },
215 { 12.168070 , 3.410 },
216 { 13.167693 , 3.416 },
217 { 14.167488 , 3.419 },
218 { 15.167379 , 3.421 }
219 };
220
221 /* --- Check for bogus requests --- */
222
223 if (m->n < q) {
224 x = HUGE_VAL;
225 goto done;
226 }
227
228 /* --- Do the maths --- *
229 *
230 * This produces %$X_u$%, which should be normally distributed with a mean
231 * and variance we can compute.
232 */
233
234 k = m->n - q;
235 x = m->x/(k * log(2));
236
237 /* --- Now translate this into %$Z_u$% distributed as %$N(0, 1)$% --- */
238
239 mu = vtab[m->l - 1].mu;
240 c = 0.7 - 0.8/m->l + (1.6 + 12.8/m->l) * pow(k, -4.0/m->l);
241 sigma = sqrt(c * c * vtab[m->l - 1].var_1 / k);
242 x = (x - mu)/sigma;
243
244 /* --- Done (phew!) --- */
245
246 done:
247 xfree(m->t);
248 return (x);
249 }
250
251 /* --- @maurer@ --- *
252 *
253 * Arguments: @const octet *p@ = pointer to a buffer of data
254 * @size_t sz@ = size of the buffer
255 * @unsigned l@ = number of bits to take at a time
256 *
257 * Returns: The statistic %$Z_u$%.
258 *
259 * Use: Simple interface to Maurer's universal statistical test. For
260 * large %$l$% you should use the more complicated restartable
261 * interface.
262 */
263
264 double maurer(const octet *p, size_t sz, unsigned l)
265 {
266 maurer_ctx m;
267
268 maurer_init(&m, l);
269 maurer_test(&m, p, sz);
270 return (maurer_done(&m));
271 }
272
273 /*----- That's all, folks -------------------------------------------------*/