8fe3c82b |
1 | /* -*-c-*- |
2 | * |
8fe3c82b |
3 | * Simple and efficient universal hashing for hashtables |
4 | * |
5 | * (c) 2003 Straylight/Edgeware |
6 | */ |
7 | |
d4efbcd9 |
8 | /*----- Licensing notice --------------------------------------------------* |
8fe3c82b |
9 | * |
10 | * This file is part of the mLib utilities library. |
11 | * |
12 | * mLib is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU Library General Public License as |
14 | * published by the Free Software Foundation; either version 2 of the |
15 | * License, or (at your option) any later version. |
d4efbcd9 |
16 | * |
8fe3c82b |
17 | * mLib is distributed in the hope that it will be useful, |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20 | * GNU Library General Public License for more details. |
d4efbcd9 |
21 | * |
8fe3c82b |
22 | * You should have received a copy of the GNU Library General Public |
23 | * License along with mLib; if not, write to the Free |
24 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
25 | * MA 02111-1307, USA. |
26 | */ |
27 | |
8fe3c82b |
28 | #ifndef MLIB_UNIHASH_H |
29 | #define MLIB_UNIHASH_H |
30 | |
31 | #ifdef __cplusplus |
32 | extern "C" { |
33 | #endif |
34 | |
35 | |
36 | /*----- Concept -----------------------------------------------------------* |
37 | * |
38 | * Let %$\gf{q}$% be a finite field. Choose an arbitrary %$k \inr \gf{q}$%. |
39 | * Let %$M$% be a message. Injectively pad %$M$% and split it into blocks |
40 | * $m_{n-1}, m_{n-2}, \ldots, m_2, m_1, m_0$% in %$\gf{q}%. |
41 | * Then we compute |
42 | * |
573eadb5 |
43 | * %$H_k(M) = k^{n+1} + \sum_{0\le i<n} m_i k^{i+1}.$% |
8fe3c82b |
44 | * |
45 | * Note that %$H_0(M) = 0$% for all messages %$M$%. |
46 | * |
47 | * If we deal with messages at most %$\ell$% blocks long then %$H_k(\cdot)$% |
48 | * is %$(\ell + 1)/q$%-almost universal. Moreover, if %$q = 2^f$% then |
49 | * %$H_k(\cdot)$% is %$(\ell + 1)/q$%-almost XOR-universal. |
50 | * |
51 | * Proof. Let %$A$% and %$B$% be two messages, represented by |
52 | * %$a_{n-1}, \ldots, a_0$% and %$b_{m-1}, \ldots, b_0$% respectively; and |
53 | * choose any %$\delta \in \gf{q}$%. We must bound the probability that |
54 | * |
55 | * %$k^{n+1} + a_{n-1} k^{n} + \cdots + a_1 k^2 + a_0 k - {}$% |
56 | * %$k^{m+1} - b_{m-1} k^{m} - \cdots - b_1 k^2 - b_0 k = \delta$%. |
57 | * |
58 | * Firstly, we claim that if %$A$% and %$B$% are distinct, there is some |
59 | * nonzero coefficient of %$k$%. For if %$n \ne m$% then, without loss of |
60 | * generality, let %$n > m$%, and hence the coefficient of %$k_n$% is |
61 | * nonzero. Alternatively, if %$n = m$% then there must be some |
62 | * %$i \in \{ 0, \ldots, n - 1 \}$% with %$a_i \ne b_i$%, for otherwise the |
63 | * messages would be identical; but then the coefficient of %$k^{i+1}$% is |
64 | * %$a_i - b_i \ne 0$%. |
65 | * |
66 | * Hence we have a polynomial equation with degree at most %$\ell + 1$%; |
67 | * there must be at most %$\ell + 1$% solutions for %$k$%; but we choose |
68 | * %$k$% at random from a set of %$q$%; so the equation is true with |
69 | * probability at most %$(\ell + 1)/q$%. |
70 | * |
71 | * This function can be used as a simple MAC with provable security against |
72 | * computationally unbounded adversaries. Simply XOR the hash with a random |
73 | * string indexed from a large random pad by some nonce sent with the |
74 | * message. The probability of a forgery attempt being successful is then |
573eadb5 |
75 | * %$(\ell + 1)/2^t$%, where %$t$% is the tag length and %$\ell$% is the |
76 | * longest message permitted. |
8fe3c82b |
77 | */ |
78 | |
79 | /*----- Practicalities ----------------------------------------------------* |
80 | * |
81 | * We work in %$\gf{2^32}$%, represented as a field of polynomials modulo |
573eadb5 |
82 | * %$\texttt{104c11db7}_x$% (this is the standard CRC-32 polynomial). Our |
83 | * blocks are bytes. |
8fe3c82b |
84 | * |
85 | * The choice of a 32-bit hash is made for pragmatic reasons: we're never |
86 | * likely to actually want all 32 bits for a real hashtable anyway. The |
87 | * truncation result is needed to keep us afloat with smaller tables. |
88 | * |
89 | * We compute hashes using a slightly unrolled version of Horner's rule, |
90 | * using the recurrence: |
91 | * |
92 | * %$a_{i+b} = (a_i + m_i) k^b + m_{i+1} k^{b-1} + \cdots + m_{i+b-1} k$% |
93 | * |
94 | * which involves one full-width multiply and %$b - 1$% one-byte multiplies; |
95 | * the latter may be efficiently computed using a table lookup. Start with |
96 | * %$a_0 = k$%. |
97 | * |
98 | * We precompute tables %$S[\cdot][\cdot][\cdot]$%, where |
99 | * |
100 | * %$S[u][v][w] = k^{u+1} x^{8v} w$% |
101 | * for %$0 \le u < b$%, %$0 \le v < 4$%, %$0 \le w < 256)$%. |
102 | * |
103 | * A one-byte multiply is one lookup; a full-width multiply is four lookups |
104 | * and three XORs. The processing required is then %$b + 3$% lookups and |
105 | * %$b + 3$% XORs per batch, or %$(b + 3)/b$% lookups and XORs per byte, at |
106 | * the expense of %$4 b$% kilobytes of tables. This compares relatively |
107 | * favorably with CRC32. Indeed, in tests, this implementation with $b = 4$% |
108 | * is faster than a 32-bit CRC. |
109 | */ |
110 | |
111 | /*----- Header files ------------------------------------------------------*/ |
112 | |
113 | #include <stddef.h> |
114 | |
115 | #ifndef MLIB_BITS_H |
116 | # include "bits.h" |
117 | #endif |
118 | |
119 | /*----- Data structures ---------------------------------------------------*/ |
120 | |
121 | #define UNIHASH_NBATCH 4 |
122 | #define UNIHASH_POLY 0x04c11db7 /* From CRC32 */ |
123 | |
124 | typedef struct unihash_info { |
125 | uint32 s[UNIHASH_NBATCH][4][256]; /* S-tables as described */ |
126 | } unihash_info; |
127 | |
6f444bda |
128 | /*----- A global hash-info table ------------------------------------------*/ |
129 | |
130 | extern unihash_info unihash_global; /* Key this if you like */ |
131 | |
8fe3c82b |
132 | /*----- Functions provided ------------------------------------------------*/ |
133 | |
134 | /* --- @unihash_setkey@ --- * |
135 | * |
136 | * Arguments: @unihash_info *i@ = where to store the precomputed tables |
137 | * @uint32 k@ = the key to set, randomly chosen |
138 | * |
139 | * Returns: --- |
140 | * |
141 | * Use: Calculates the tables required for efficient hashing. |
142 | */ |
143 | |
144 | extern void unihash_setkey(unihash_info */*i*/, uint32 /*k*/); |
145 | |
146 | /* --- @unihash_hash@ --- * |
147 | * |
148 | * Arguments: @const unihash_info *i@ = pointer to precomputed table |
149 | * @uint32 a@ = @UNIHASH_INIT(i)@ or value from previous call |
150 | * @const void *p@ = pointer to data to hash |
151 | * @size_t sz@ = size of the data |
152 | * |
573eadb5 |
153 | * Returns: Hash of data so far. |
8fe3c82b |
154 | * |
d4efbcd9 |
155 | * Use: Hashes data. Call this as many times as needed. |
8fe3c82b |
156 | */ |
157 | |
158 | #define UNIHASH_INIT(i) ((i)->s[0][0][1]) /* %$k$% */ |
159 | |
160 | extern uint32 unihash_hash(const unihash_info */*i*/, uint32 /*a*/, |
161 | const void */*p*/, size_t /*sz*/); |
162 | |
163 | /* --- @unihash@ --- * |
164 | * |
165 | * Arguments: @const unihash_info *i@ = precomputed tables |
166 | * @const void *p@ = pointer to data to hash |
167 | * @size_t sz@ = size of the data |
168 | * |
169 | * Returns: The hash value computed. |
170 | * |
171 | * Use: All-in-one hashing function. No faster than using the |
d4efbcd9 |
172 | * separate calls, but more convenient. |
8fe3c82b |
173 | */ |
174 | |
175 | #define UNIHASH(i, p, sz) (unihash_hash((i), UNIHASH_INIT((i)), (p), (sz))) |
176 | |
177 | extern uint32 unihash(const unihash_info */*i*/, |
178 | const void */*p*/, size_t /*sz*/); |
179 | |
180 | /*----- That's all, folks -------------------------------------------------*/ |
181 | |
182 | #ifdef __cplusplus |
183 | } |
184 | #endif |
185 | |
186 | #endif |