From 39be270832d86dfa24a84cc0e31fa8f3442f4136 Mon Sep 17 00:00:00 2001 From: mdw Date: Sat, 17 Jun 2000 11:29:49 +0000 Subject: [PATCH] Maurer's universal statistical test. --- maurer.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ maurer.h | 77 ++++++++++++++++++++++++++ 2 files changed, 267 insertions(+) create mode 100644 maurer.c create mode 100644 maurer.h diff --git a/maurer.c b/maurer.c new file mode 100644 index 0000000..1828015 --- /dev/null +++ b/maurer.c @@ -0,0 +1,190 @@ +/* -*-c-*- + * + * $Id: maurer.c,v 1.1 2000/06/17 11:29:49 mdw Exp $ + * + * Maurer's universal statistical test + * + * (c) 2000 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Revision history --------------------------------------------------* + * + * $Log: maurer.c,v $ + * Revision 1.1 2000/06/17 11:29:49 mdw + * Maurer's universal statistical test. + * + */ + +/*----- Header files ------------------------------------------------------*/ + +#include +#include +#include +#include + +#include +#include + +#include "maurer.h" + +/*----- Data structures ---------------------------------------------------*/ + +typedef struct bitscan { + const octet *p; + unsigned b; + uint32 a; +} bitscan; + +/*----- Main code ---------------------------------------------------------*/ + +/* --- @more@ --- * + * + * Arguments: @bitscan *b@ = pointer to a bitscanner structure + * @unsigned l@ = number of bits wanted + * + * Returns: An integer representing the next @l@ bits of the stream. + * + * Use: Fetches the next @l@ bits of a data stream. The bitscanner + * has no way of knowing whether it's run out of data, so make + * sure that doesn't happen. + */ + +static unsigned more(bitscan *b, unsigned l) +{ + unsigned x, m; + + while (b->b < l) { + b->a |= U8(*b->p++) << b->b; + b->b += 8; + } + m = (1 << l) - 1; + x = b->a & m; + b->a >>= l; + b->b -= l; + return (x); +} + +/* --- @maurer@ --- * + * + * Arguments: @const octet *p@ = pointer to a buffer of data + * @size_t sz@ = size of the buffer + * @unsigned l@ = number of bits to take at a time + * + * Returns: The statistic %$Z_u = (X_u - \mu)/\sigma$%, which should be + * normally distributed with a mean of zero and standard + * deviation of 1. + * + * Use: Performs Maurer's universal statistical test on a buffer of + * data. You must ensure that the buffer size @sz@ is larger + * than %$11 l \cdot 2^l$% bits long, and ideally much larger. + * + * Note that, under Unix systems, this function requires the + * maths library. + */ + +double maurer(const octet *p, size_t sz, unsigned l) +{ + size_t *t; /* Table of occurrences */ + size_t q, k; /* Block counts */ + size_t i; /* Loop counter */ + bitscan b; /* Bitscanning context */ + double x; /* Statistic accumulator */ + double mu, c, sigma; /* Other useful things */ + + /* --- Table for means and variances --- */ + + struct { double mu, var_1; } vtab[] = { + { 0.7326495, 0.690 }, + { 1.5374383, 1.338 }, + { 2.4016068, 1.901 }, + { 3.3112247, 2.358 }, + { 4.2534266, 2.705 }, + { 5.2177052, 2.945 }, + { 6.1962507, 3.125 }, + { 7.1836656, 3.238 }, + { 8.1764248, 3.311 }, + { 9.1723243, 3.356 }, + { 10.170032 , 3.384 }, + { 11.168765 , 3.401 }, + { 12.168070 , 3.410 }, + { 13.167693 , 3.416 }, + { 14.167488 , 3.419 }, + { 15.167379 , 3.421 } + }; + + assert(((void)"bit chunk size out of range", 0 < l && l <= 16)); + + /* --- Initialize everything --- */ + + t = xmalloc(sizeof(size_t) << l); + for (i = 0; i < (1 << l); i++) + t[i] = 0; + + b.p = p; + b.a = 0; + b.b = 0; + + k = (sz * 8)/l; + q = 10 << l; + assert(((void)"buffer too small for test", k > q && k - q > (1 << l))); + + /* --- Set up the table --- */ + + for (i = 0; i < q; i++) { + unsigned a = more(&b, l); + t[a] = i; + } + + /* --- Now compute the statistic --- */ + + x = 0; + for (; i < k; i++) { + unsigned a = more(&b, l); + x += log(i - t[a]); + t[a] = i; + } + + /* --- Twiddle at the end --- * + * + * This produces %$X_u$%, which should be normally distributed with a mean + * and variance we can compute. + */ + + k -= q; + x /= k * log(2); + + /* --- Now translate this into %$Z_u$% distributed as %$N(0, 1)$% --- */ + + mu = vtab[l - 1].mu; + c = 0.7 - 0.8/l + (1.6 + 12.8/l) * pow(k, -4.0/l); + sigma = sqrt(c * c * vtab[l - 1].var_1 / k); + + x = (x - mu)/sigma; + + /* --- Done (phew!) --- */ + + xfree(t); + return (x); +} + +/*----- That's all, folks -------------------------------------------------*/ diff --git a/maurer.h b/maurer.h new file mode 100644 index 0000000..cd74e8c --- /dev/null +++ b/maurer.h @@ -0,0 +1,77 @@ +/* -*-c-*- + * + * $Id: maurer.h,v 1.1 2000/06/17 11:29:49 mdw Exp $ + * + * Maurer's universal statistical test + * + * (c) 2000 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Revision history --------------------------------------------------* + * + * $Log: maurer.h,v $ + * Revision 1.1 2000/06/17 11:29:49 mdw + * Maurer's universal statistical test. + * + */ + +#ifndef CATACOMB_MAURER_H +#define CATACOMB_MAURER_H + +#ifdef __cplusplus + extern "C" { +#endif + +/*----- Header files ------------------------------------------------------*/ + +#include + +/*----- Functions provided ------------------------------------------------*/ + +/* --- @maurer@ --- * + * + * Arguments: @const octet *p@ = pointer to a buffer of data + * @size_t sz@ = size of the buffer + * @unsigned l@ = number of bits to take at a time + * + * Returns: The statistic %$Z_u = (X_u - \mu)/\sigma$%, which should be + * normally distributed with a mean of zero and standard + * deviation of 1. + * + * Use: Performs Maurer's universal statistical test on a buffer of + * data. You must ensure that the buffer size @sz@ is larger + * than %$11 l \cdot 2^l$% bits long, and ideally much larger. + * + * Note that, under Unix systems, this function requires the + * maths library. + */ + +extern double maurer(const octet */*p*/, size_t /*sz*/, unsigned /*l*/); + +/*----- That's all, folks -------------------------------------------------*/ + +#ifdef __cplusplus + } +#endif + +#endif -- 2.11.0