X-Git-Url: https://git.distorted.org.uk/u/mdw/catacomb/blobdiff_plain/39be270832d86dfa24a84cc0e31fa8f3442f4136..78614e02310dbe879d55f0a68e47349db074ff61:/maurer.c diff --git a/maurer.c b/maurer.c index 1828015..a6dd31a 100644 --- a/maurer.c +++ b/maurer.c @@ -1,13 +1,13 @@ /* -*-c-*- * - * $Id: maurer.c,v 1.1 2000/06/17 11:29:49 mdw Exp $ + * $Id: maurer.c,v 1.4 2004/04/08 01:36:15 mdw Exp $ * * Maurer's universal statistical test * * (c) 2000 Straylight/Edgeware */ -/*----- Licensing notice --------------------------------------------------* +/*----- Licensing notice --------------------------------------------------* * * This file is part of Catacomb. * @@ -15,26 +15,18 @@ * it under the terms of the GNU Library General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. - * + * * Catacomb is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. - * + * * You should have received a copy of the GNU Library General Public * License along with Catacomb; if not, write to the Free * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. */ -/*----- Revision history --------------------------------------------------* - * - * $Log: maurer.c,v $ - * Revision 1.1 2000/06/17 11:29:49 mdw - * Maurer's universal statistical test. - * - */ - /*----- Header files ------------------------------------------------------*/ #include @@ -57,59 +49,115 @@ typedef struct bitscan { /*----- Main code ---------------------------------------------------------*/ -/* --- @more@ --- * +/* --- @maurer_init@ --- * * - * Arguments: @bitscan *b@ = pointer to a bitscanner structure - * @unsigned l@ = number of bits wanted + * Arguments: @maurer_ctx *m@ = pointer to a context to initialize + * @unsigned l@ = number of bits to take at a time * - * Returns: An integer representing the next @l@ bits of the stream. + * Returns: Minimum recommended amount of data to test. * - * Use: Fetches the next @l@ bits of a data stream. The bitscanner - * has no way of knowing whether it's run out of data, so make - * sure that doesn't happen. + * Use: Initializes a Maurer testing context. Note that @l@ values + * larger than 16 are not supported, and values less than 6 can + * give bizarre results. */ -static unsigned more(bitscan *b, unsigned l) +unsigned long maurer_init(maurer_ctx *m, unsigned l) { - unsigned x, m; + size_t i; + + assert(((void)"(maurer_init): chunks larger than 16 bits not supported", + 0 < l && l <= 16)); - while (b->b < l) { - b->a |= U8(*b->p++) << b->b; - b->b += 8; + m->l = l; + m->a = 0; + m->b = 0; + m->n = 0; + m->x = 0; + + m->t = xmalloc(sizeof(unsigned long) << l); + for (i = 0; i < (1 << l); i++) + m->t[i] = 0; + return (((1000ul << l) * l + 7)/8); +} + +/* --- @bits@ --- * + * + * Arguments: @maurer_ctx *m@ = pointer to testing context + * @const octet **p@ = address of a buffer pointer + * @const octet *q@ = limit of the buffer pointer + * @unsigned *x@ = address to store next chunk + * + * Returns: Nonzero if some more bits arrived. + * + * Use: Reads some bits from a buffer. + */ + +static int bits(maurer_ctx *m, const octet **p, const octet *q, unsigned *x) +{ + while (m->b < m->l) { + if (*p >= q) + return (0); + m->a |= U8(*(*p)++) << m->b; + m->b += 8; } - m = (1 << l) - 1; - x = b->a & m; - b->a >>= l; - b->b -= l; - return (x); + *x = m->a & ((1 << m->l) - 1); + m->a >>= m->l; + m->b -= m->l; + m->n++; + return (1); } -/* --- @maurer@ --- * +/* --- @maurer_test@ --- * * - * Arguments: @const octet *p@ = pointer to a buffer of data + * Arguments: @maurer_ctx *m@ = pointer to a Maurer testing context + * @const void *buf@ = pointer to a buffer of data * @size_t sz@ = size of the buffer - * @unsigned l@ = number of bits to take at a time + * + * Returns: --- + * + * Use: Scans a buffer of data and updates the testing context. + */ + +void maurer_test(maurer_ctx *m, const void *buf, size_t sz) +{ + const octet *p = buf, *l = p + sz; + unsigned long q = 10 << m->l; + unsigned x; + + /* --- Initialize the table --- */ + + while (m->n < q) { + if (!bits(m, &p, l, &x)) + return; + m->t[x] = m->n; + } + + /* --- Update the statistic --- */ + + while (bits(m, &p, l, &x)) { + m->x += log(m->n - m->t[x]); + m->t[x] = m->n; + } +} + +/* --- @maurer_done@ --- * + * + * Arguments: @maurer_ctx *m@ = pointer to a Maurer testing context * * Returns: The statistic %$Z_u = (X_u - \mu)/\sigma$%, which should be - * normally distributed with a mean of zero and standard - * deviation of 1. + * normally distributed with a mean of 0 and variance of 1. * - * Use: Performs Maurer's universal statistical test on a buffer of - * data. You must ensure that the buffer size @sz@ is larger - * than %$11 l \cdot 2^l$% bits long, and ideally much larger. + * Use: Returns the result of Maurer's universal statistical test. + * Also frees the memory allocated during initialization. * - * Note that, under Unix systems, this function requires the - * maths library. + * If insufficient data was received, the value @HUGE_VAL@ is + * returned. */ -double maurer(const octet *p, size_t sz, unsigned l) +double maurer_done(maurer_ctx *m) { - size_t *t; /* Table of occurrences */ - size_t q, k; /* Block counts */ - size_t i; /* Loop counter */ - bitscan b; /* Bitscanning context */ - double x; /* Statistic accumulator */ - double mu, c, sigma; /* Other useful things */ + double x, mu, c, sigma; + unsigned long q = 10 << m->l, k; /* --- Table for means and variances --- */ @@ -132,59 +180,56 @@ double maurer(const octet *p, size_t sz, unsigned l) { 15.167379 , 3.421 } }; - assert(((void)"bit chunk size out of range", 0 < l && l <= 16)); - - /* --- Initialize everything --- */ - - t = xmalloc(sizeof(size_t) << l); - for (i = 0; i < (1 << l); i++) - t[i] = 0; - - b.p = p; - b.a = 0; - b.b = 0; - - k = (sz * 8)/l; - q = 10 << l; - assert(((void)"buffer too small for test", k > q && k - q > (1 << l))); - - /* --- Set up the table --- */ - - for (i = 0; i < q; i++) { - unsigned a = more(&b, l); - t[a] = i; - } - - /* --- Now compute the statistic --- */ + /* --- Check for bogus requests --- */ - x = 0; - for (; i < k; i++) { - unsigned a = more(&b, l); - x += log(i - t[a]); - t[a] = i; + if (m->n < q) { + x = HUGE_VAL; + goto done; } - /* --- Twiddle at the end --- * + /* --- Do the maths --- * * * This produces %$X_u$%, which should be normally distributed with a mean * and variance we can compute. */ - k -= q; - x /= k * log(2); + k = m->n - q; + x = m->x/(k * log(2)); /* --- Now translate this into %$Z_u$% distributed as %$N(0, 1)$% --- */ - mu = vtab[l - 1].mu; - c = 0.7 - 0.8/l + (1.6 + 12.8/l) * pow(k, -4.0/l); - sigma = sqrt(c * c * vtab[l - 1].var_1 / k); - + mu = vtab[m->l - 1].mu; + c = 0.7 - 0.8/m->l + (1.6 + 12.8/m->l) * pow(k, -4.0/m->l); + sigma = sqrt(c * c * vtab[m->l - 1].var_1 / k); x = (x - mu)/sigma; /* --- Done (phew!) --- */ - xfree(t); +done: + xfree(m->t); return (x); } +/* --- @maurer@ --- * + * + * Arguments: @const octet *p@ = pointer to a buffer of data + * @size_t sz@ = size of the buffer + * @unsigned l@ = number of bits to take at a time + * + * Returns: The statistic %$Z_u$%. + * + * Use: Simple interface to Maurer's universal statistical test. For + * large %$l$% you should use the more complicated restartable + * interface. + */ + +double maurer(const octet *p, size_t sz, unsigned l) +{ + maurer_ctx m; + + maurer_init(&m, l); + maurer_test(&m, p, sz); + return (maurer_done(&m)); +} + /*----- That's all, folks -------------------------------------------------*/