When building the static Huffman tables, it's vital to include the
[sgt/halibut] / deflate.c
1 /*
2 * Reimplementation of Deflate (RFC1951) compression. Adapted from
3 * the version in PuTTY, and extended to write dynamic Huffman
4 * trees and choose block boundaries usefully.
5 */
6
7 /*
8 * TODO:
9 *
10 * - Feature: could do with forms of flush other than SYNC_FLUSH.
11 * I'm not sure exactly how those work when you don't know in
12 * advance that your next block will be static (as we did in
13 * PuTTY). And remember the 9-bit limitation of zlib.
14 * + also, zlib has FULL_FLUSH which clears the LZ77 state as
15 * well, for random access.
16 *
17 * - Compression quality: chooseblock() appears to be computing
18 * wildly inaccurate block size estimates. Possible resolutions:
19 * + find and fix some trivial bug I haven't spotted yet
20 * + abandon the entropic approximation and go with trial
21 * Huffman runs
22 *
23 * - Compression quality: see if increasing SYMLIMIT causes
24 * dynamic blocks to start being consistently smaller than it.
25 * + actually we seem to be there already, but check on a
26 * larger corpus.
27 *
28 * - Compression quality: we ought to be able to fall right back
29 * to actual uncompressed blocks if really necessary, though
30 * it's not clear what the criterion for doing so would be.
31 */
32
33 /*
34 * This software is copyright 2000-2006 Simon Tatham.
35 *
36 * Permission is hereby granted, free of charge, to any person
37 * obtaining a copy of this software and associated documentation
38 * files (the "Software"), to deal in the Software without
39 * restriction, including without limitation the rights to use,
40 * copy, modify, merge, publish, distribute, sublicense, and/or
41 * sell copies of the Software, and to permit persons to whom the
42 * Software is furnished to do so, subject to the following
43 * conditions:
44 *
45 * The above copyright notice and this permission notice shall be
46 * included in all copies or substantial portions of the Software.
47 *
48 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
49 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
50 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
51 * NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
52 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
53 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
54 * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
55 * THE SOFTWARE.
56 */
57
58 #include <stdio.h>
59 #include <stddef.h>
60 #include <string.h>
61 #include <stdlib.h>
62 #include <assert.h>
63
64 #include "deflate.h"
65
66 #define snew(type) ( (type *) malloc(sizeof(type)) )
67 #define snewn(n, type) ( (type *) malloc((n) * sizeof(type)) )
68 #define sresize(x, n, type) ( (type *) realloc((x), (n) * sizeof(type)) )
69 #define sfree(x) ( free((x)) )
70
71 #define lenof(x) (sizeof((x)) / sizeof(*(x)))
72
73 #ifndef FALSE
74 #define FALSE 0
75 #define TRUE (!FALSE)
76 #endif
77
78 /* ----------------------------------------------------------------------
79 * This file can be compiled in a number of modes.
80 *
81 * With -DSTANDALONE, it builds a self-contained deflate tool which
82 * can compress, decompress, and also analyse a deflated file to
83 * print out the sequence of literals and copy commands it
84 * contains.
85 *
86 * With -DTESTMODE, it builds a test application which is given a
87 * file on standard input, both compresses and decompresses it, and
88 * outputs the re-decompressed result so it can be conveniently
89 * diffed against the original. Define -DTESTDBG as well for lots
90 * of diagnostics.
91 */
92
93 #if defined TESTDBG
94 /* gcc-specific diagnostic macro */
95 #define debug_int(x...) ( fprintf(stderr, x) )
96 #define debug(x) ( debug_int x )
97 #else
98 #define debug(x)
99 #endif
100
101 #ifdef STANDALONE
102 #define ANALYSIS
103 #endif
104
105 #ifdef ANALYSIS
106 int analyse_level = 0;
107 #endif
108
109 /* ----------------------------------------------------------------------
110 * Basic LZ77 code. This bit is designed modularly, so it could be
111 * ripped out and used in a different LZ77 compressor. Go to it,
112 * and good luck :-)
113 */
114
115 struct LZ77InternalContext;
116 struct LZ77Context {
117 struct LZ77InternalContext *ictx;
118 void *userdata;
119 void (*literal) (struct LZ77Context * ctx, unsigned char c);
120 void (*match) (struct LZ77Context * ctx, int distance, int len);
121 };
122
123 /*
124 * Initialise the private fields of an LZ77Context. It's up to the
125 * user to initialise the public fields.
126 */
127 static int lz77_init(struct LZ77Context *ctx);
128
129 /*
130 * Supply data to be compressed. Will update the private fields of
131 * the LZ77Context, and will call literal() and match() to output.
132 * If `compress' is FALSE, it will never emit a match, but will
133 * instead call literal() for everything.
134 */
135 static void lz77_compress(struct LZ77Context *ctx,
136 const unsigned char *data, int len, int compress);
137
138 /*
139 * Modifiable parameters.
140 */
141 #define WINSIZE 32768 /* window size. Must be power of 2! */
142 #define HASHMAX 2039 /* one more than max hash value */
143 #define MAXMATCH 32 /* how many matches we track */
144 #define HASHCHARS 3 /* how many chars make a hash */
145
146 /*
147 * This compressor takes a less slapdash approach than the
148 * gzip/zlib one. Rather than allowing our hash chains to fall into
149 * disuse near the far end, we keep them doubly linked so we can
150 * _find_ the far end, and then every time we add a new byte to the
151 * window (thus rolling round by one and removing the previous
152 * byte), we can carefully remove the hash chain entry.
153 */
154
155 #define INVALID -1 /* invalid hash _and_ invalid offset */
156 struct WindowEntry {
157 short next, prev; /* array indices within the window */
158 short hashval;
159 };
160
161 struct HashEntry {
162 short first; /* window index of first in chain */
163 };
164
165 struct Match {
166 int distance, len;
167 };
168
169 struct LZ77InternalContext {
170 struct WindowEntry win[WINSIZE];
171 unsigned char data[WINSIZE];
172 int winpos;
173 struct HashEntry hashtab[HASHMAX];
174 unsigned char pending[HASHCHARS];
175 int npending;
176 };
177
178 static int lz77_hash(const unsigned char *data)
179 {
180 return (257 * data[0] + 263 * data[1] + 269 * data[2]) % HASHMAX;
181 }
182
183 static int lz77_init(struct LZ77Context *ctx)
184 {
185 struct LZ77InternalContext *st;
186 int i;
187
188 st = snew(struct LZ77InternalContext);
189 if (!st)
190 return 0;
191
192 ctx->ictx = st;
193
194 for (i = 0; i < WINSIZE; i++)
195 st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
196 for (i = 0; i < HASHMAX; i++)
197 st->hashtab[i].first = INVALID;
198 st->winpos = 0;
199
200 st->npending = 0;
201
202 return 1;
203 }
204
205 static void lz77_advance(struct LZ77InternalContext *st,
206 unsigned char c, int hash)
207 {
208 int off;
209
210 /*
211 * Remove the hash entry at winpos from the tail of its chain,
212 * or empty the chain if it's the only thing on the chain.
213 */
214 if (st->win[st->winpos].prev != INVALID) {
215 st->win[st->win[st->winpos].prev].next = INVALID;
216 } else if (st->win[st->winpos].hashval != INVALID) {
217 st->hashtab[st->win[st->winpos].hashval].first = INVALID;
218 }
219
220 /*
221 * Create a new entry at winpos and add it to the head of its
222 * hash chain.
223 */
224 st->win[st->winpos].hashval = hash;
225 st->win[st->winpos].prev = INVALID;
226 off = st->win[st->winpos].next = st->hashtab[hash].first;
227 st->hashtab[hash].first = st->winpos;
228 if (off != INVALID)
229 st->win[off].prev = st->winpos;
230 st->data[st->winpos] = c;
231
232 /*
233 * Advance the window pointer.
234 */
235 st->winpos = (st->winpos + 1) & (WINSIZE - 1);
236 }
237
238 #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] )
239
240 static void lz77_compress(struct LZ77Context *ctx,
241 const unsigned char *data, int len, int compress)
242 {
243 struct LZ77InternalContext *st = ctx->ictx;
244 int i, hash, distance, off, nmatch, matchlen, advance;
245 struct Match defermatch, matches[MAXMATCH];
246 int deferchr;
247
248 /*
249 * Add any pending characters from last time to the window. (We
250 * might not be able to.)
251 */
252 for (i = 0; i < st->npending; i++) {
253 unsigned char foo[HASHCHARS];
254 int j;
255 if (len + st->npending - i < HASHCHARS) {
256 /* Update the pending array. */
257 for (j = i; j < st->npending; j++)
258 st->pending[j - i] = st->pending[j];
259 break;
260 }
261 for (j = 0; j < HASHCHARS; j++)
262 foo[j] = (i + j < st->npending ? st->pending[i + j] :
263 data[i + j - st->npending]);
264 lz77_advance(st, foo[0], lz77_hash(foo));
265 }
266 st->npending -= i;
267
268 defermatch.len = 0;
269 deferchr = '\0';
270 while (len > 0) {
271
272 /* Don't even look for a match, if we're not compressing. */
273 if (compress && len >= HASHCHARS) {
274 /*
275 * Hash the next few characters.
276 */
277 hash = lz77_hash(data);
278
279 /*
280 * Look the hash up in the corresponding hash chain and see
281 * what we can find.
282 */
283 nmatch = 0;
284 for (off = st->hashtab[hash].first;
285 off != INVALID; off = st->win[off].next) {
286 /* distance = 1 if off == st->winpos-1 */
287 /* distance = WINSIZE if off == st->winpos */
288 distance =
289 WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE;
290 for (i = 0; i < HASHCHARS; i++)
291 if (CHARAT(i) != CHARAT(i - distance))
292 break;
293 if (i == HASHCHARS) {
294 matches[nmatch].distance = distance;
295 matches[nmatch].len = 3;
296 if (++nmatch >= MAXMATCH)
297 break;
298 }
299 }
300 } else {
301 nmatch = 0;
302 hash = INVALID;
303 }
304
305 if (nmatch > 0) {
306 /*
307 * We've now filled up matches[] with nmatch potential
308 * matches. Follow them down to find the longest. (We
309 * assume here that it's always worth favouring a
310 * longer match over a shorter one.)
311 */
312 matchlen = HASHCHARS;
313 while (matchlen < len) {
314 int j;
315 for (i = j = 0; i < nmatch; i++) {
316 if (CHARAT(matchlen) ==
317 CHARAT(matchlen - matches[i].distance)) {
318 matches[j++] = matches[i];
319 }
320 }
321 if (j == 0)
322 break;
323 matchlen++;
324 nmatch = j;
325 }
326
327 /*
328 * We've now got all the longest matches. We favour the
329 * shorter distances, which means we go with matches[0].
330 * So see if we want to defer it or throw it away.
331 */
332 matches[0].len = matchlen;
333 if (defermatch.len > 0) {
334 if (matches[0].len > defermatch.len + 1) {
335 /* We have a better match. Emit the deferred char,
336 * and defer this match. */
337 ctx->literal(ctx, (unsigned char) deferchr);
338 defermatch = matches[0];
339 deferchr = data[0];
340 advance = 1;
341 } else {
342 /* We don't have a better match. Do the deferred one. */
343 ctx->match(ctx, defermatch.distance, defermatch.len);
344 advance = defermatch.len - 1;
345 defermatch.len = 0;
346 }
347 } else {
348 /* There was no deferred match. Defer this one. */
349 defermatch = matches[0];
350 deferchr = data[0];
351 advance = 1;
352 }
353 } else {
354 /*
355 * We found no matches. Emit the deferred match, if
356 * any; otherwise emit a literal.
357 */
358 if (defermatch.len > 0) {
359 ctx->match(ctx, defermatch.distance, defermatch.len);
360 advance = defermatch.len - 1;
361 defermatch.len = 0;
362 } else {
363 ctx->literal(ctx, data[0]);
364 advance = 1;
365 }
366 }
367
368 /*
369 * Now advance the position by `advance' characters,
370 * keeping the window and hash chains consistent.
371 */
372 while (advance > 0) {
373 if (len >= HASHCHARS) {
374 lz77_advance(st, *data, lz77_hash(data));
375 } else {
376 st->pending[st->npending++] = *data;
377 }
378 data++;
379 len--;
380 advance--;
381 }
382 }
383 }
384
385 /* ----------------------------------------------------------------------
386 * Deflate functionality common to both compression and decompression.
387 */
388
389 static const unsigned char lenlenmap[] = {
390 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
391 };
392
393 #define MAXCODELEN 16
394
395 /*
396 * Given a sequence of Huffman code lengths, compute the actual
397 * codes, in the final form suitable for feeding to outbits (i.e.
398 * already bit-mirrored).
399 *
400 * Returns the maximum code length found. Can also return -1 to
401 * indicate the table was overcommitted (too many or too short
402 * codes to exactly cover the possible space), or -2 to indicate it
403 * was undercommitted (too few or too long codes).
404 */
405 static int hufcodes(const unsigned char *lengths, int *codes, int nsyms)
406 {
407 int count[MAXCODELEN], startcode[MAXCODELEN];
408 int code, maxlen;
409 int i, j;
410
411 /* Count the codes of each length. */
412 maxlen = 0;
413 for (i = 1; i < MAXCODELEN; i++)
414 count[i] = 0;
415 for (i = 0; i < nsyms; i++) {
416 count[lengths[i]]++;
417 if (maxlen < lengths[i])
418 maxlen = lengths[i];
419 }
420 /* Determine the starting code for each length block. */
421 code = 0;
422 for (i = 1; i < MAXCODELEN; i++) {
423 startcode[i] = code;
424 code += count[i];
425 if (code > (1 << i))
426 maxlen = -1; /* overcommitted */
427 code <<= 1;
428 }
429 if (code < (1 << MAXCODELEN))
430 maxlen = -2; /* undercommitted */
431 /* Determine the code for each symbol. Mirrored, of course. */
432 for (i = 0; i < nsyms; i++) {
433 code = startcode[lengths[i]]++;
434 codes[i] = 0;
435 for (j = 0; j < lengths[i]; j++) {
436 codes[i] = (codes[i] << 1) | (code & 1);
437 code >>= 1;
438 }
439 }
440
441 return maxlen;
442 }
443
444 /*
445 * Adler32 checksum function.
446 */
447 static unsigned long adler32_update(unsigned long s,
448 const unsigned char *data, int len)
449 {
450 unsigned s1 = s & 0xFFFF, s2 = (s >> 16) & 0xFFFF;
451 int i;
452
453 for (i = 0; i < len; i++) {
454 s1 += data[i];
455 s2 += s1;
456 if (!(i & 0xFFF)) {
457 s1 %= 65521;
458 s2 %= 65521;
459 }
460 }
461
462 return ((s2 % 65521) << 16) | (s1 % 65521);
463 }
464
465 /*
466 * CRC32 checksum function.
467 */
468
469 static unsigned long crc32_update(unsigned long crcword,
470 const unsigned char *data, int len)
471 {
472 static const unsigned long crc32_table[256] = {
473 0x00000000L, 0x77073096L, 0xEE0E612CL, 0x990951BAL,
474 0x076DC419L, 0x706AF48FL, 0xE963A535L, 0x9E6495A3L,
475 0x0EDB8832L, 0x79DCB8A4L, 0xE0D5E91EL, 0x97D2D988L,
476 0x09B64C2BL, 0x7EB17CBDL, 0xE7B82D07L, 0x90BF1D91L,
477 0x1DB71064L, 0x6AB020F2L, 0xF3B97148L, 0x84BE41DEL,
478 0x1ADAD47DL, 0x6DDDE4EBL, 0xF4D4B551L, 0x83D385C7L,
479 0x136C9856L, 0x646BA8C0L, 0xFD62F97AL, 0x8A65C9ECL,
480 0x14015C4FL, 0x63066CD9L, 0xFA0F3D63L, 0x8D080DF5L,
481 0x3B6E20C8L, 0x4C69105EL, 0xD56041E4L, 0xA2677172L,
482 0x3C03E4D1L, 0x4B04D447L, 0xD20D85FDL, 0xA50AB56BL,
483 0x35B5A8FAL, 0x42B2986CL, 0xDBBBC9D6L, 0xACBCF940L,
484 0x32D86CE3L, 0x45DF5C75L, 0xDCD60DCFL, 0xABD13D59L,
485 0x26D930ACL, 0x51DE003AL, 0xC8D75180L, 0xBFD06116L,
486 0x21B4F4B5L, 0x56B3C423L, 0xCFBA9599L, 0xB8BDA50FL,
487 0x2802B89EL, 0x5F058808L, 0xC60CD9B2L, 0xB10BE924L,
488 0x2F6F7C87L, 0x58684C11L, 0xC1611DABL, 0xB6662D3DL,
489 0x76DC4190L, 0x01DB7106L, 0x98D220BCL, 0xEFD5102AL,
490 0x71B18589L, 0x06B6B51FL, 0x9FBFE4A5L, 0xE8B8D433L,
491 0x7807C9A2L, 0x0F00F934L, 0x9609A88EL, 0xE10E9818L,
492 0x7F6A0DBBL, 0x086D3D2DL, 0x91646C97L, 0xE6635C01L,
493 0x6B6B51F4L, 0x1C6C6162L, 0x856530D8L, 0xF262004EL,
494 0x6C0695EDL, 0x1B01A57BL, 0x8208F4C1L, 0xF50FC457L,
495 0x65B0D9C6L, 0x12B7E950L, 0x8BBEB8EAL, 0xFCB9887CL,
496 0x62DD1DDFL, 0x15DA2D49L, 0x8CD37CF3L, 0xFBD44C65L,
497 0x4DB26158L, 0x3AB551CEL, 0xA3BC0074L, 0xD4BB30E2L,
498 0x4ADFA541L, 0x3DD895D7L, 0xA4D1C46DL, 0xD3D6F4FBL,
499 0x4369E96AL, 0x346ED9FCL, 0xAD678846L, 0xDA60B8D0L,
500 0x44042D73L, 0x33031DE5L, 0xAA0A4C5FL, 0xDD0D7CC9L,
501 0x5005713CL, 0x270241AAL, 0xBE0B1010L, 0xC90C2086L,
502 0x5768B525L, 0x206F85B3L, 0xB966D409L, 0xCE61E49FL,
503 0x5EDEF90EL, 0x29D9C998L, 0xB0D09822L, 0xC7D7A8B4L,
504 0x59B33D17L, 0x2EB40D81L, 0xB7BD5C3BL, 0xC0BA6CADL,
505 0xEDB88320L, 0x9ABFB3B6L, 0x03B6E20CL, 0x74B1D29AL,
506 0xEAD54739L, 0x9DD277AFL, 0x04DB2615L, 0x73DC1683L,
507 0xE3630B12L, 0x94643B84L, 0x0D6D6A3EL, 0x7A6A5AA8L,
508 0xE40ECF0BL, 0x9309FF9DL, 0x0A00AE27L, 0x7D079EB1L,
509 0xF00F9344L, 0x8708A3D2L, 0x1E01F268L, 0x6906C2FEL,
510 0xF762575DL, 0x806567CBL, 0x196C3671L, 0x6E6B06E7L,
511 0xFED41B76L, 0x89D32BE0L, 0x10DA7A5AL, 0x67DD4ACCL,
512 0xF9B9DF6FL, 0x8EBEEFF9L, 0x17B7BE43L, 0x60B08ED5L,
513 0xD6D6A3E8L, 0xA1D1937EL, 0x38D8C2C4L, 0x4FDFF252L,
514 0xD1BB67F1L, 0xA6BC5767L, 0x3FB506DDL, 0x48B2364BL,
515 0xD80D2BDAL, 0xAF0A1B4CL, 0x36034AF6L, 0x41047A60L,
516 0xDF60EFC3L, 0xA867DF55L, 0x316E8EEFL, 0x4669BE79L,
517 0xCB61B38CL, 0xBC66831AL, 0x256FD2A0L, 0x5268E236L,
518 0xCC0C7795L, 0xBB0B4703L, 0x220216B9L, 0x5505262FL,
519 0xC5BA3BBEL, 0xB2BD0B28L, 0x2BB45A92L, 0x5CB36A04L,
520 0xC2D7FFA7L, 0xB5D0CF31L, 0x2CD99E8BL, 0x5BDEAE1DL,
521 0x9B64C2B0L, 0xEC63F226L, 0x756AA39CL, 0x026D930AL,
522 0x9C0906A9L, 0xEB0E363FL, 0x72076785L, 0x05005713L,
523 0x95BF4A82L, 0xE2B87A14L, 0x7BB12BAEL, 0x0CB61B38L,
524 0x92D28E9BL, 0xE5D5BE0DL, 0x7CDCEFB7L, 0x0BDBDF21L,
525 0x86D3D2D4L, 0xF1D4E242L, 0x68DDB3F8L, 0x1FDA836EL,
526 0x81BE16CDL, 0xF6B9265BL, 0x6FB077E1L, 0x18B74777L,
527 0x88085AE6L, 0xFF0F6A70L, 0x66063BCAL, 0x11010B5CL,
528 0x8F659EFFL, 0xF862AE69L, 0x616BFFD3L, 0x166CCF45L,
529 0xA00AE278L, 0xD70DD2EEL, 0x4E048354L, 0x3903B3C2L,
530 0xA7672661L, 0xD06016F7L, 0x4969474DL, 0x3E6E77DBL,
531 0xAED16A4AL, 0xD9D65ADCL, 0x40DF0B66L, 0x37D83BF0L,
532 0xA9BCAE53L, 0xDEBB9EC5L, 0x47B2CF7FL, 0x30B5FFE9L,
533 0xBDBDF21CL, 0xCABAC28AL, 0x53B39330L, 0x24B4A3A6L,
534 0xBAD03605L, 0xCDD70693L, 0x54DE5729L, 0x23D967BFL,
535 0xB3667A2EL, 0xC4614AB8L, 0x5D681B02L, 0x2A6F2B94L,
536 0xB40BBE37L, 0xC30C8EA1L, 0x5A05DF1BL, 0x2D02EF8DL
537 };
538 crcword ^= 0xFFFFFFFFL;
539 while (len--) {
540 unsigned long newbyte = *data++;
541 newbyte ^= crcword & 0xFFL;
542 crcword = (crcword >> 8) ^ crc32_table[newbyte];
543 }
544 return crcword ^ 0xFFFFFFFFL;
545 }
546
547 typedef struct {
548 short code, extrabits;
549 int min, max;
550 } coderecord;
551
552 static const coderecord lencodes[] = {
553 {257, 0, 3, 3},
554 {258, 0, 4, 4},
555 {259, 0, 5, 5},
556 {260, 0, 6, 6},
557 {261, 0, 7, 7},
558 {262, 0, 8, 8},
559 {263, 0, 9, 9},
560 {264, 0, 10, 10},
561 {265, 1, 11, 12},
562 {266, 1, 13, 14},
563 {267, 1, 15, 16},
564 {268, 1, 17, 18},
565 {269, 2, 19, 22},
566 {270, 2, 23, 26},
567 {271, 2, 27, 30},
568 {272, 2, 31, 34},
569 {273, 3, 35, 42},
570 {274, 3, 43, 50},
571 {275, 3, 51, 58},
572 {276, 3, 59, 66},
573 {277, 4, 67, 82},
574 {278, 4, 83, 98},
575 {279, 4, 99, 114},
576 {280, 4, 115, 130},
577 {281, 5, 131, 162},
578 {282, 5, 163, 194},
579 {283, 5, 195, 226},
580 {284, 5, 227, 257},
581 {285, 0, 258, 258},
582 };
583
584 static const coderecord distcodes[] = {
585 {0, 0, 1, 1},
586 {1, 0, 2, 2},
587 {2, 0, 3, 3},
588 {3, 0, 4, 4},
589 {4, 1, 5, 6},
590 {5, 1, 7, 8},
591 {6, 2, 9, 12},
592 {7, 2, 13, 16},
593 {8, 3, 17, 24},
594 {9, 3, 25, 32},
595 {10, 4, 33, 48},
596 {11, 4, 49, 64},
597 {12, 5, 65, 96},
598 {13, 5, 97, 128},
599 {14, 6, 129, 192},
600 {15, 6, 193, 256},
601 {16, 7, 257, 384},
602 {17, 7, 385, 512},
603 {18, 8, 513, 768},
604 {19, 8, 769, 1024},
605 {20, 9, 1025, 1536},
606 {21, 9, 1537, 2048},
607 {22, 10, 2049, 3072},
608 {23, 10, 3073, 4096},
609 {24, 11, 4097, 6144},
610 {25, 11, 6145, 8192},
611 {26, 12, 8193, 12288},
612 {27, 12, 12289, 16384},
613 {28, 13, 16385, 24576},
614 {29, 13, 24577, 32768},
615 };
616
617 /* ----------------------------------------------------------------------
618 * Deflate compression.
619 */
620
621 #define SYMLIMIT 65536
622 #define SYMPFX_LITLEN 0x00000000U
623 #define SYMPFX_DIST 0x40000000U
624 #define SYMPFX_EXTRABITS 0x80000000U
625 #define SYMPFX_CODELEN 0xC0000000U
626 #define SYMPFX_MASK 0xC0000000U
627
628 #define SYM_EXTRABITS_MASK 0x3C000000U
629 #define SYM_EXTRABITS_SHIFT 26
630
631 struct huftrees {
632 unsigned char *len_litlen;
633 int *code_litlen;
634 unsigned char *len_dist;
635 int *code_dist;
636 unsigned char *len_codelen;
637 int *code_codelen;
638 };
639
640 struct deflate_compress_ctx {
641 struct LZ77Context *lzc;
642 unsigned char *outbuf;
643 int outlen, outsize;
644 unsigned long outbits;
645 int noutbits;
646 int firstblock;
647 unsigned long *syms;
648 int symstart, nsyms;
649 int type;
650 unsigned long checksum;
651 unsigned long datasize;
652 int lastblock;
653 int finished;
654 unsigned char static_len1[288], static_len2[30];
655 int static_code1[288], static_code2[30];
656 struct huftrees sht;
657 #ifdef STATISTICS
658 unsigned long bitcount;
659 #endif
660 };
661
662 static void outbits(deflate_compress_ctx *out,
663 unsigned long bits, int nbits)
664 {
665 assert(out->noutbits + nbits <= 32);
666 out->outbits |= bits << out->noutbits;
667 out->noutbits += nbits;
668 while (out->noutbits >= 8) {
669 if (out->outlen >= out->outsize) {
670 out->outsize = out->outlen + 64;
671 out->outbuf = sresize(out->outbuf, out->outsize, unsigned char);
672 }
673 out->outbuf[out->outlen++] = (unsigned char) (out->outbits & 0xFF);
674 out->outbits >>= 8;
675 out->noutbits -= 8;
676 }
677 #ifdef STATISTICS
678 out->bitcount += nbits;
679 #endif
680 }
681
682 /*
683 * Binary heap functions used by buildhuf(). Each one assumes the
684 * heap to be stored in an array of ints, with two ints per node
685 * (user data and key). They take in the old heap length, and
686 * return the new one.
687 */
688 #define HEAPPARENT(x) (((x)-2)/4*2)
689 #define HEAPLEFT(x) ((x)*2+2)
690 #define HEAPRIGHT(x) ((x)*2+4)
691 static int addheap(int *heap, int len, int userdata, int key)
692 {
693 int me, dad, tmp;
694
695 me = len;
696 heap[len++] = userdata;
697 heap[len++] = key;
698
699 while (me > 0) {
700 dad = HEAPPARENT(me);
701 if (heap[me+1] < heap[dad+1]) {
702 tmp = heap[me]; heap[me] = heap[dad]; heap[dad] = tmp;
703 tmp = heap[me+1]; heap[me+1] = heap[dad+1]; heap[dad+1] = tmp;
704 me = dad;
705 } else
706 break;
707 }
708
709 return len;
710 }
711 static int rmheap(int *heap, int len, int *userdata, int *key)
712 {
713 int me, lc, rc, c, tmp;
714
715 len -= 2;
716 *userdata = heap[0];
717 *key = heap[1];
718 heap[0] = heap[len];
719 heap[1] = heap[len+1];
720
721 me = 0;
722
723 while (1) {
724 lc = HEAPLEFT(me);
725 rc = HEAPRIGHT(me);
726 if (lc >= len)
727 break;
728 else if (rc >= len || heap[lc+1] < heap[rc+1])
729 c = lc;
730 else
731 c = rc;
732 if (heap[me+1] > heap[c+1]) {
733 tmp = heap[me]; heap[me] = heap[c]; heap[c] = tmp;
734 tmp = heap[me+1]; heap[me+1] = heap[c+1]; heap[c+1] = tmp;
735 } else
736 break;
737 me = c;
738 }
739
740 return len;
741 }
742
743 /*
744 * The core of the Huffman algorithm: takes an input array of
745 * symbol frequencies, and produces an output array of code
746 * lengths.
747 *
748 * This is basically a generic Huffman implementation, but it has
749 * one zlib-related quirk which is that it caps the output code
750 * lengths to fit in an unsigned char (which is safe since Deflate
751 * will reject anything longer than 15 anyway). Anyone wanting to
752 * rip it out and use it in another context should find that easy
753 * to remove.
754 */
755 #define HUFMAX 286
756 static void buildhuf(const int *freqs, unsigned char *lengths, int nsyms)
757 {
758 int parent[2*HUFMAX-1];
759 int length[2*HUFMAX-1];
760 int heap[2*HUFMAX];
761 int heapsize;
762 int i, j, n;
763 int si, sj;
764
765 assert(nsyms <= HUFMAX);
766
767 memset(parent, 0, sizeof(parent));
768
769 /*
770 * Begin by building the heap.
771 */
772 heapsize = 0;
773 for (i = 0; i < nsyms; i++)
774 if (freqs[i] > 0) /* leave unused symbols out totally */
775 heapsize = addheap(heap, heapsize, i, freqs[i]);
776
777 /*
778 * Now repeatedly take two elements off the heap and merge
779 * them.
780 */
781 n = HUFMAX;
782 while (heapsize > 2) {
783 heapsize = rmheap(heap, heapsize, &i, &si);
784 heapsize = rmheap(heap, heapsize, &j, &sj);
785 parent[i] = n;
786 parent[j] = n;
787 heapsize = addheap(heap, heapsize, n, si + sj);
788 n++;
789 }
790
791 /*
792 * Now we have our tree, in the form of a link from each node
793 * to the index of its parent. Count back down the tree to
794 * determine the code lengths.
795 */
796 memset(length, 0, sizeof(length));
797 /* The tree root has length 0 after that, which is correct. */
798 for (i = n-1; i-- ;)
799 if (parent[i] > 0)
800 length[i] = 1 + length[parent[i]];
801
802 /*
803 * And that's it. (Simple, wasn't it?) Copy the lengths into
804 * the output array and leave.
805 *
806 * Here we cap lengths to fit in unsigned char.
807 */
808 for (i = 0; i < nsyms; i++)
809 lengths[i] = (length[i] > 255 ? 255 : length[i]);
810 }
811
812 /*
813 * Wrapper around buildhuf() which enforces the Deflate restriction
814 * that no code length may exceed 15 bits, or 7 for the auxiliary
815 * code length alphabet. This function has the same calling
816 * semantics as buildhuf(), except that it might modify the freqs
817 * array.
818 */
819 static void deflate_buildhuf(int *freqs, unsigned char *lengths,
820 int nsyms, int limit)
821 {
822 int smallestfreq, totalfreq, nactivesyms;
823 int num, denom, adjust;
824 int i;
825 int maxprob;
826
827 /*
828 * Nasty special case: if the frequency table has fewer than
829 * two non-zero elements, we must invent some, because we can't
830 * have fewer than one bit encoding a symbol.
831 */
832 assert(nsyms >= 2);
833 {
834 int count = 0;
835 for (i = 0; i < nsyms; i++)
836 if (freqs[i] > 0)
837 count++;
838 if (count < 2) {
839 for (i = 0; i < nsyms && count > 0; i++)
840 if (freqs[i] == 0) {
841 freqs[i] = 1;
842 count--;
843 }
844 }
845 }
846
847 /*
848 * First, try building the Huffman table the normal way. If
849 * this works, it's optimal, so we don't want to mess with it.
850 */
851 buildhuf(freqs, lengths, nsyms);
852
853 for (i = 0; i < nsyms; i++)
854 if (lengths[i] > limit)
855 break;
856
857 if (i == nsyms)
858 return; /* OK */
859
860 /*
861 * The Huffman algorithm can only ever generate a code length
862 * of N bits or more if there is a symbol whose probability is
863 * less than the reciprocal of the (N+2)th Fibonacci number
864 * (counting from F_0=0 and F_1=1), i.e. 1/2584 for N=16, or
865 * 1/55 for N=8. (This is a necessary though not sufficient
866 * condition.)
867 *
868 * Why is this? Well, consider the input symbol with the
869 * smallest probability. Let that probability be x. In order
870 * for this symbol to have a code length of at least 1, the
871 * Huffman algorithm will have to merge it with some other
872 * node; and since x is the smallest probability, the node it
873 * gets merged with must be at least x. Thus, the probability
874 * of the resulting combined node will be at least 2x. Now in
875 * order for our node to reach depth 2, this 2x-node must be
876 * merged again. But what with? We can't assume the node it
877 * merges with is at least 2x, because this one might only be
878 * the _second_ smallest remaining node. But we do know the
879 * node it merges with must be at least x, so our order-2
880 * internal node is at least 3x.
881 *
882 * How small a node can merge with _that_ to get an order-3
883 * internal node? Well, it must be at least 2x, because if it
884 * was smaller than that then it would have been one of the two
885 * smallest nodes in the previous step and been merged at that
886 * point. So at least 3x, plus at least 2x, comes to at least
887 * 5x for an order-3 node.
888 *
889 * And so it goes on: at every stage we must merge our current
890 * node with a node at least as big as the bigger of this one's
891 * two parents, and from this starting point that gives rise to
892 * the Fibonacci sequence. So we find that in order to have a
893 * node n levels deep (i.e. a maximum code length of n), the
894 * overall probability of the root of the entire tree must be
895 * at least F_{n+2} times the probability of the rarest symbol.
896 * In other words, since the overall probability is 1, it is a
897 * necessary condition for a code length of 16 or more that
898 * there must be at least one symbol with probability <=
899 * 1/F_18.
900 *
901 * (To demonstrate that a probability this big really can give
902 * rise to a code length of 16, consider the set of input
903 * frequencies { 1-epsilon, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55,
904 * 89, 144, 233, 377, 610, 987 }, for arbitrarily small
905 * epsilon.)
906 *
907 * So here buildhuf() has returned us an overlong code. So to
908 * ensure it doesn't do it again, we add a constant to all the
909 * (non-zero) symbol frequencies, causing them to become more
910 * balanced and removing the danger. We can then feed the
911 * results back to the standard buildhuf() and be
912 * assert()-level confident that the resulting code lengths
913 * contain nothing outside the permitted range.
914 */
915 assert(limit == 15 || limit == 7);
916 maxprob = (limit == 15 ? 2584 : 55); /* no point in computing full F_n */
917 totalfreq = nactivesyms = 0;
918 smallestfreq = -1;
919 for (i = 0; i < nsyms; i++) {
920 if (freqs[i] == 0)
921 continue;
922 if (smallestfreq < 0 || smallestfreq > freqs[i])
923 smallestfreq = freqs[i];
924 totalfreq += freqs[i];
925 nactivesyms++;
926 }
927 assert(smallestfreq <= totalfreq / maxprob);
928
929 /*
930 * We want to find the smallest integer `adjust' such that
931 * (totalfreq + nactivesyms * adjust) / (smallestfreq +
932 * adjust) is less than maxprob. A bit of algebra tells us
933 * that the threshold value is equal to
934 *
935 * totalfreq - maxprob * smallestfreq
936 * ----------------------------------
937 * maxprob - nactivesyms
938 *
939 * rounded up, of course. And we'll only even be trying
940 * this if
941 */
942 num = totalfreq - smallestfreq * maxprob;
943 denom = maxprob - nactivesyms;
944 adjust = (num + denom - 1) / denom;
945
946 /*
947 * Now add `adjust' to all the input symbol frequencies.
948 */
949 for (i = 0; i < nsyms; i++)
950 if (freqs[i] != 0)
951 freqs[i] += adjust;
952
953 /*
954 * Rebuild the Huffman tree...
955 */
956 buildhuf(freqs, lengths, nsyms);
957
958 /*
959 * ... and this time it ought to be OK.
960 */
961 for (i = 0; i < nsyms; i++)
962 assert(lengths[i] <= limit);
963 }
964
965 /*
966 * Compute the bit length of a symbol, given the three Huffman
967 * trees.
968 */
969 static int symsize(unsigned sym, const struct huftrees *trees)
970 {
971 unsigned basesym = sym &~ SYMPFX_MASK;
972
973 switch (sym & SYMPFX_MASK) {
974 case SYMPFX_LITLEN:
975 return trees->len_litlen[basesym];
976 case SYMPFX_DIST:
977 return trees->len_dist[basesym];
978 case SYMPFX_CODELEN:
979 return trees->len_codelen[basesym];
980 default /*case SYMPFX_EXTRABITS*/:
981 return basesym >> SYM_EXTRABITS_SHIFT;
982 }
983 }
984
985 /*
986 * Write out a single symbol, given the three Huffman trees.
987 */
988 static void writesym(deflate_compress_ctx *out,
989 unsigned sym, const struct huftrees *trees)
990 {
991 unsigned basesym = sym &~ SYMPFX_MASK;
992 int i;
993
994 switch (sym & SYMPFX_MASK) {
995 case SYMPFX_LITLEN:
996 debug(("send: litlen %d\n", basesym));
997 outbits(out, trees->code_litlen[basesym], trees->len_litlen[basesym]);
998 break;
999 case SYMPFX_DIST:
1000 debug(("send: dist %d\n", basesym));
1001 outbits(out, trees->code_dist[basesym], trees->len_dist[basesym]);
1002 break;
1003 case SYMPFX_CODELEN:
1004 debug(("send: codelen %d\n", basesym));
1005 outbits(out, trees->code_codelen[basesym],trees->len_codelen[basesym]);
1006 break;
1007 case SYMPFX_EXTRABITS:
1008 i = basesym >> SYM_EXTRABITS_SHIFT;
1009 basesym &= ~SYM_EXTRABITS_MASK;
1010 debug(("send: extrabits %d/%d\n", basesym, i));
1011 outbits(out, basesym, i);
1012 break;
1013 }
1014 }
1015
1016 /*
1017 * outblock() must output _either_ a dynamic block of length
1018 * `dynamic_len', _or_ a static block of length `static_len', but
1019 * it gets to choose which.
1020 */
1021 static void outblock(deflate_compress_ctx *out,
1022 int dynamic_len, int static_len)
1023 {
1024 int freqs1[286], freqs2[30], freqs3[19];
1025 unsigned char len1[286], len2[30], len3[19];
1026 int code1[286], code2[30], code3[19];
1027 int hlit, hdist, hclen, bfinal, btype;
1028 int treesrc[286 + 30];
1029 int treesyms[286 + 30];
1030 int codelen[19];
1031 int i, ntreesrc, ntreesyms;
1032 int dynamic, blklen;
1033 struct huftrees dht;
1034 const struct huftrees *ht;
1035 #ifdef STATISTICS
1036 unsigned long bitcount_before;
1037 #endif
1038
1039 dht.len_litlen = len1;
1040 dht.len_dist = len2;
1041 dht.len_codelen = len3;
1042 dht.code_litlen = code1;
1043 dht.code_dist = code2;
1044 dht.code_codelen = code3;
1045
1046 /*
1047 * We make our choice of block to output by doing all the
1048 * detailed work to determine the exact length of each possible
1049 * block. Then we choose the one which has fewest output bits
1050 * per symbol.
1051 */
1052
1053 /*
1054 * First build the two main Huffman trees for the dynamic
1055 * block.
1056 */
1057
1058 /*
1059 * Count up the frequency tables.
1060 */
1061 memset(freqs1, 0, sizeof(freqs1));
1062 memset(freqs2, 0, sizeof(freqs2));
1063 freqs1[256] = 1; /* we're bound to need one EOB */
1064 for (i = 0; i < dynamic_len; i++) {
1065 unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
1066
1067 /*
1068 * Increment the occurrence counter for this symbol, if
1069 * it's in one of the Huffman alphabets and isn't extra
1070 * bits.
1071 */
1072 if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
1073 sym &= ~SYMPFX_MASK;
1074 assert(sym < lenof(freqs1));
1075 freqs1[sym]++;
1076 } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) {
1077 sym &= ~SYMPFX_MASK;
1078 assert(sym < lenof(freqs2));
1079 freqs2[sym]++;
1080 }
1081 }
1082 deflate_buildhuf(freqs1, len1, lenof(freqs1), 15);
1083 deflate_buildhuf(freqs2, len2, lenof(freqs2), 15);
1084 hufcodes(len1, code1, lenof(freqs1));
1085 hufcodes(len2, code2, lenof(freqs2));
1086
1087 /*
1088 * Determine HLIT and HDIST.
1089 */
1090 for (hlit = 286; hlit > 257 && len1[hlit-1] == 0; hlit--);
1091 for (hdist = 30; hdist > 1 && len2[hdist-1] == 0; hdist--);
1092
1093 /*
1094 * Write out the list of symbols used to transmit the
1095 * trees.
1096 */
1097 ntreesrc = 0;
1098 for (i = 0; i < hlit; i++)
1099 treesrc[ntreesrc++] = len1[i];
1100 for (i = 0; i < hdist; i++)
1101 treesrc[ntreesrc++] = len2[i];
1102 ntreesyms = 0;
1103 for (i = 0; i < ntreesrc ;) {
1104 int j = 1;
1105 int k;
1106
1107 /* Find length of run of the same length code. */
1108 while (i+j < ntreesrc && treesrc[i+j] == treesrc[i])
1109 j++;
1110
1111 /* Encode that run as economically as we can. */
1112 k = j;
1113 if (treesrc[i] == 0) {
1114 /*
1115 * Zero code length: we can output run codes for
1116 * 3-138 zeroes. So if we have fewer than 3 zeroes,
1117 * we just output literals. Otherwise, we output
1118 * nothing but run codes, and tweak their lengths
1119 * to make sure we aren't left with under 3 at the
1120 * end.
1121 */
1122 if (k < 3) {
1123 while (k--)
1124 treesyms[ntreesyms++] = 0 | SYMPFX_CODELEN;
1125 } else {
1126 while (k > 0) {
1127 int rpt = (k < 138 ? k : 138);
1128 if (rpt > k-3 && rpt < k)
1129 rpt = k-3;
1130 assert(rpt >= 3 && rpt <= 138);
1131 if (rpt < 11) {
1132 treesyms[ntreesyms++] = 17 | SYMPFX_CODELEN;
1133 treesyms[ntreesyms++] =
1134 (SYMPFX_EXTRABITS | (rpt - 3) |
1135 (3 << SYM_EXTRABITS_SHIFT));
1136 } else {
1137 treesyms[ntreesyms++] = 18 | SYMPFX_CODELEN;
1138 treesyms[ntreesyms++] =
1139 (SYMPFX_EXTRABITS | (rpt - 11) |
1140 (7 << SYM_EXTRABITS_SHIFT));
1141 }
1142 k -= rpt;
1143 }
1144 }
1145 } else {
1146 /*
1147 * Non-zero code length: we must output the first
1148 * one explicitly, then we can output a copy code
1149 * for 3-6 repeats. So if we have fewer than 4
1150 * repeats, we _just_ output literals. Otherwise,
1151 * we output one literal plus at least one copy
1152 * code, and tweak the copy codes to make sure we
1153 * aren't left with under 3 at the end.
1154 */
1155 assert(treesrc[i] < 16);
1156 treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN;
1157 k--;
1158 if (k < 3) {
1159 while (k--)
1160 treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN;
1161 } else {
1162 while (k > 0) {
1163 int rpt = (k < 6 ? k : 6);
1164 if (rpt > k-3 && rpt < k)
1165 rpt = k-3;
1166 assert(rpt >= 3 && rpt <= 6);
1167 treesyms[ntreesyms++] = 16 | SYMPFX_CODELEN;
1168 treesyms[ntreesyms++] = (SYMPFX_EXTRABITS | (rpt - 3) |
1169 (2 << SYM_EXTRABITS_SHIFT));
1170 k -= rpt;
1171 }
1172 }
1173 }
1174
1175 i += j;
1176 }
1177 assert((unsigned)ntreesyms < lenof(treesyms));
1178
1179 /*
1180 * Count up the frequency table for the tree-transmission
1181 * symbols, and build the auxiliary Huffman tree for that.
1182 */
1183 memset(freqs3, 0, sizeof(freqs3));
1184 for (i = 0; i < ntreesyms; i++) {
1185 unsigned sym = treesyms[i];
1186
1187 /*
1188 * Increment the occurrence counter for this symbol, if
1189 * it's the Huffman alphabet and isn't extra bits.
1190 */
1191 if ((sym & SYMPFX_MASK) == SYMPFX_CODELEN) {
1192 sym &= ~SYMPFX_MASK;
1193 assert(sym < lenof(freqs3));
1194 freqs3[sym]++;
1195 }
1196 }
1197 deflate_buildhuf(freqs3, len3, lenof(freqs3), 7);
1198 hufcodes(len3, code3, lenof(freqs3));
1199
1200 /*
1201 * Reorder the code length codes into transmission order, and
1202 * determine HCLEN.
1203 */
1204 for (i = 0; i < 19; i++)
1205 codelen[i] = len3[lenlenmap[i]];
1206 for (hclen = 19; hclen > 4 && codelen[hclen-1] == 0; hclen--);
1207
1208 /*
1209 * Now work out the exact size of both the dynamic and the
1210 * static block, in bits.
1211 */
1212 {
1213 int ssize, dsize;
1214
1215 /*
1216 * First the dynamic block.
1217 */
1218 dsize = 3 + 5 + 5 + 4; /* 3-bit header, HLIT, HDIST, HCLEN */
1219 dsize += 3 * hclen; /* code-length-alphabet code lengths */
1220 /* Code lengths */
1221 for (i = 0; i < ntreesyms; i++)
1222 dsize += symsize(treesyms[i], &dht);
1223 /* The actual block data */
1224 for (i = 0; i < dynamic_len; i++) {
1225 unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
1226 dsize += symsize(sym, &dht);
1227 }
1228 /* And the end-of-data symbol. */
1229 dsize += symsize(SYMPFX_LITLEN | 256, &dht);
1230
1231 /*
1232 * Now the static block.
1233 */
1234 ssize = 3; /* 3-bit block header */
1235 /* The actual block data */
1236 for (i = 0; i < static_len; i++) {
1237 unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
1238 ssize += symsize(sym, &out->sht);
1239 }
1240 /* And the end-of-data symbol. */
1241 ssize += symsize(SYMPFX_LITLEN | 256, &out->sht);
1242
1243 /*
1244 * Compare the two and decide which to output. We break
1245 * exact ties in favour of the static block, because of the
1246 * special case in which that block has zero length.
1247 */
1248 dynamic = ((double)ssize * dynamic_len > (double)dsize * static_len);
1249 ht = dynamic ? &dht : &out->sht;
1250 blklen = dynamic ? dynamic_len : static_len;
1251 }
1252
1253 /*
1254 * Actually transmit the block.
1255 */
1256
1257 /* 3-bit block header */
1258 bfinal = (out->lastblock ? 1 : 0);
1259 btype = dynamic ? 2 : 1;
1260 debug(("send: bfinal=%d btype=%d\n", bfinal, btype));
1261 outbits(out, bfinal, 1);
1262 outbits(out, btype, 2);
1263
1264 #ifdef STATISTICS
1265 bitcount_before = out->bitcount;
1266 #endif
1267
1268 if (dynamic) {
1269 /* HLIT, HDIST and HCLEN */
1270 debug(("send: hlit=%d hdist=%d hclen=%d\n", hlit, hdist, hclen));
1271 outbits(out, hlit - 257, 5);
1272 outbits(out, hdist - 1, 5);
1273 outbits(out, hclen - 4, 4);
1274
1275 /* Code lengths for the auxiliary tree */
1276 for (i = 0; i < hclen; i++) {
1277 debug(("send: lenlen %d\n", codelen[i]));
1278 outbits(out, codelen[i], 3);
1279 }
1280
1281 /* Code lengths for the literal/length and distance trees */
1282 for (i = 0; i < ntreesyms; i++)
1283 writesym(out, treesyms[i], ht);
1284 #ifdef STATISTICS
1285 fprintf(stderr, "total tree size %lu bits\n",
1286 out->bitcount - bitcount_before);
1287 #endif
1288 }
1289
1290 /* Output the actual symbols from the buffer */
1291 for (i = 0; i < blklen; i++) {
1292 unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
1293 writesym(out, sym, ht);
1294 }
1295
1296 /* Output the end-of-data symbol */
1297 writesym(out, SYMPFX_LITLEN | 256, ht);
1298
1299 /*
1300 * Remove all the just-output symbols from the symbol buffer by
1301 * adjusting symstart and nsyms.
1302 */
1303 out->symstart = (out->symstart + blklen) % SYMLIMIT;
1304 out->nsyms -= blklen;
1305 }
1306
1307 /*
1308 * Give the approximate log-base-2 of an input integer, measured in
1309 * 8ths of a bit. (I.e. this computes an integer approximation to
1310 * 8*logbase2(x).)
1311 */
1312 static int approxlog2(unsigned x)
1313 {
1314 int ret = 31*8;
1315
1316 /*
1317 * Binary-search to get the top bit of x up to bit 31.
1318 */
1319 if (x < 0x00010000U) x <<= 16, ret -= 16*8;
1320 if (x < 0x01000000U) x <<= 8, ret -= 8*8;
1321 if (x < 0x10000000U) x <<= 4, ret -= 4*8;
1322 if (x < 0x40000000U) x <<= 2, ret -= 2*8;
1323 if (x < 0x80000000U) x <<= 1, ret -= 1*8;
1324
1325 /*
1326 * Now we know the logarithm we want is in [ret,ret+1).
1327 * Determine the bottom three bits by checking against
1328 * threshold values.
1329 *
1330 * (Each of these threshold values is 0x80000000 times an odd
1331 * power of 2^(1/16). Therefore, this function rounds to
1332 * nearest.)
1333 */
1334 if (x <= 0xAD583EEAU) {
1335 if (x <= 0x91C3D373U)
1336 ret += (x <= 0x85AAC367U ? 0 : 1);
1337 else
1338 ret += (x <= 0x9EF53260U ? 2 : 3);
1339 } else {
1340 if (x <= 0xCE248C15U)
1341 ret += (x <= 0xBD08A39FU ? 4 : 5);
1342 else
1343 ret += (x <= 0xE0CCDEECU ? 6 : x <= 0xF5257D15L ? 7 : 8);
1344 }
1345
1346 return ret;
1347 }
1348
1349 static void chooseblock(deflate_compress_ctx *out)
1350 {
1351 int freqs1[286], freqs2[30];
1352 int i, len, bestlen, longestlen = 0;
1353 int total1, total2;
1354 int bestvfm;
1355
1356 memset(freqs1, 0, sizeof(freqs1));
1357 memset(freqs2, 0, sizeof(freqs2));
1358 freqs1[256] = 1; /* we're bound to need one EOB */
1359 total1 = 1;
1360 total2 = 0;
1361
1362 /*
1363 * Iterate over all possible block lengths, computing the
1364 * entropic coding approximation to the final length at every
1365 * stage. We divide the result by the number of symbols
1366 * encoded, to determine the `value for money' (overall
1367 * bits-per-symbol count) of a block of that length.
1368 */
1369 bestlen = -1;
1370 bestvfm = 0;
1371
1372 len = 300 * 8; /* very approximate size of the Huffman trees */
1373
1374 for (i = 0; i < out->nsyms; i++) {
1375 unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
1376
1377 if (i > 0 && (sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
1378 /*
1379 * This is a viable point at which to end the block.
1380 * Compute the value for money.
1381 */
1382 int vfm = i * 32768 / len; /* symbols encoded per bit */
1383
1384 if (bestlen < 0 || vfm > bestvfm) {
1385 bestlen = i;
1386 bestvfm = vfm;
1387 }
1388
1389 longestlen = i;
1390 }
1391
1392 /*
1393 * Increment the occurrence counter for this symbol, if
1394 * it's in one of the Huffman alphabets and isn't extra
1395 * bits.
1396 */
1397 if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
1398 sym &= ~SYMPFX_MASK;
1399 assert(sym < lenof(freqs1));
1400 len += freqs1[sym] * approxlog2(freqs1[sym]);
1401 len -= total1 * approxlog2(total1);
1402 freqs1[sym]++;
1403 total1++;
1404 len -= freqs1[sym] * approxlog2(freqs1[sym]);
1405 len += total1 * approxlog2(total1);
1406 } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) {
1407 sym &= ~SYMPFX_MASK;
1408 assert(sym < lenof(freqs2));
1409 len += freqs2[sym] * approxlog2(freqs2[sym]);
1410 len -= total2 * approxlog2(total2);
1411 freqs2[sym]++;
1412 total2++;
1413 len -= freqs2[sym] * approxlog2(freqs2[sym]);
1414 len += total2 * approxlog2(total2);
1415 } else if ((sym & SYMPFX_MASK) == SYMPFX_EXTRABITS) {
1416 len += 8 * ((sym &~ SYMPFX_MASK) >> SYM_EXTRABITS_SHIFT);
1417 }
1418 }
1419
1420 assert(bestlen > 0);
1421
1422 outblock(out, bestlen, longestlen);
1423 }
1424
1425 /*
1426 * Force the current symbol buffer to be flushed out as a single
1427 * block.
1428 */
1429 static void flushblock(deflate_compress_ctx *out)
1430 {
1431 /*
1432 * No need to check that out->nsyms is a valid block length: we
1433 * know it has to be, because flushblock() is called in between
1434 * two matches/literals.
1435 */
1436 outblock(out, out->nsyms, out->nsyms);
1437 assert(out->nsyms == 0);
1438 }
1439
1440 /*
1441 * Place a symbol into the symbols buffer.
1442 */
1443 static void outsym(deflate_compress_ctx *out, unsigned long sym)
1444 {
1445 assert(out->nsyms < SYMLIMIT);
1446 out->syms[(out->symstart + out->nsyms++) % SYMLIMIT] = sym;
1447
1448 if (out->nsyms == SYMLIMIT)
1449 chooseblock(out);
1450 }
1451
1452 static void literal(struct LZ77Context *ectx, unsigned char c)
1453 {
1454 deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata;
1455
1456 outsym(out, SYMPFX_LITLEN | c);
1457 }
1458
1459 static void match(struct LZ77Context *ectx, int distance, int len)
1460 {
1461 const coderecord *d, *l;
1462 int i, j, k;
1463 deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata;
1464
1465 while (len > 0) {
1466 int thislen;
1467
1468 /*
1469 * We can transmit matches of lengths 3 through 258
1470 * inclusive. So if len exceeds 258, we must transmit in
1471 * several steps, with 258 or less in each step.
1472 *
1473 * Specifically: if len >= 261, we can transmit 258 and be
1474 * sure of having at least 3 left for the next step. And if
1475 * len <= 258, we can just transmit len. But if len == 259
1476 * or 260, we must transmit len-3.
1477 */
1478 thislen = (len > 260 ? 258 : len <= 258 ? len : len - 3);
1479 len -= thislen;
1480
1481 /*
1482 * Binary-search to find which length code we're
1483 * transmitting.
1484 */
1485 i = -1;
1486 j = sizeof(lencodes) / sizeof(*lencodes);
1487 while (1) {
1488 assert(j - i >= 2);
1489 k = (j + i) / 2;
1490 if (thislen < lencodes[k].min)
1491 j = k;
1492 else if (thislen > lencodes[k].max)
1493 i = k;
1494 else {
1495 l = &lencodes[k];
1496 break; /* found it! */
1497 }
1498 }
1499
1500 /*
1501 * Transmit the length code.
1502 */
1503 outsym(out, SYMPFX_LITLEN | l->code);
1504
1505 /*
1506 * Transmit the extra bits.
1507 */
1508 if (l->extrabits) {
1509 outsym(out, (SYMPFX_EXTRABITS | (thislen - l->min) |
1510 (l->extrabits << SYM_EXTRABITS_SHIFT)));
1511 }
1512
1513 /*
1514 * Binary-search to find which distance code we're
1515 * transmitting.
1516 */
1517 i = -1;
1518 j = sizeof(distcodes) / sizeof(*distcodes);
1519 while (1) {
1520 assert(j - i >= 2);
1521 k = (j + i) / 2;
1522 if (distance < distcodes[k].min)
1523 j = k;
1524 else if (distance > distcodes[k].max)
1525 i = k;
1526 else {
1527 d = &distcodes[k];
1528 break; /* found it! */
1529 }
1530 }
1531
1532 /*
1533 * Write the distance code.
1534 */
1535 outsym(out, SYMPFX_DIST | d->code);
1536
1537 /*
1538 * Transmit the extra bits.
1539 */
1540 if (d->extrabits) {
1541 outsym(out, (SYMPFX_EXTRABITS | (distance - d->min) |
1542 (d->extrabits << SYM_EXTRABITS_SHIFT)));
1543 }
1544 }
1545 }
1546
1547 deflate_compress_ctx *deflate_compress_new(int type)
1548 {
1549 deflate_compress_ctx *out;
1550 struct LZ77Context *ectx = snew(struct LZ77Context);
1551
1552 lz77_init(ectx);
1553 ectx->literal = literal;
1554 ectx->match = match;
1555
1556 out = snew(deflate_compress_ctx);
1557 out->type = type;
1558 out->outbits = out->noutbits = 0;
1559 out->firstblock = TRUE;
1560 #ifdef STATISTICS
1561 out->bitcount = 0;
1562 #endif
1563
1564 out->syms = snewn(SYMLIMIT, unsigned long);
1565 out->symstart = out->nsyms = 0;
1566
1567 out->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0);
1568 out->datasize = 0;
1569 out->lastblock = FALSE;
1570 out->finished = FALSE;
1571
1572 /*
1573 * Build the static Huffman tables now, so we'll have them
1574 * available every time outblock() is called.
1575 */
1576 {
1577 int i;
1578
1579 for (i = 0; i < lenof(out->static_len1); i++)
1580 out->static_len1[i] = (i < 144 ? 8 :
1581 i < 256 ? 9 :
1582 i < 280 ? 7 : 8);
1583 for (i = 0; i < lenof(out->static_len2); i++)
1584 out->static_len2[i] = 5;
1585 }
1586 hufcodes(out->static_len1, out->static_code1, lenof(out->static_code1));
1587 hufcodes(out->static_len2, out->static_code2, lenof(out->static_code2));
1588 out->sht.len_litlen = out->static_len1;
1589 out->sht.len_dist = out->static_len2;
1590 out->sht.len_codelen = NULL;
1591 out->sht.code_litlen = out->static_code1;
1592 out->sht.code_dist = out->static_code2;
1593 out->sht.code_codelen = NULL;
1594
1595 ectx->userdata = out;
1596 out->lzc = ectx;
1597
1598 return out;
1599 }
1600
1601 void deflate_compress_free(deflate_compress_ctx *out)
1602 {
1603 struct LZ77Context *ectx = out->lzc;
1604
1605 sfree(out->syms);
1606 sfree(ectx->ictx);
1607 sfree(ectx);
1608 sfree(out);
1609 }
1610
1611 void deflate_compress_data(deflate_compress_ctx *out,
1612 const void *vblock, int len, int flushtype,
1613 void **outblock, int *outlen)
1614 {
1615 struct LZ77Context *ectx = out->lzc;
1616 const unsigned char *block = (const unsigned char *)vblock;
1617
1618 assert(!out->finished);
1619
1620 out->outbuf = NULL;
1621 out->outlen = out->outsize = 0;
1622
1623 /*
1624 * If this is the first block, output the header.
1625 */
1626 if (out->firstblock) {
1627 switch (out->type) {
1628 case DEFLATE_TYPE_BARE:
1629 break; /* no header */
1630 case DEFLATE_TYPE_ZLIB:
1631 /*
1632 * zlib (RFC1950) header bytes: 78 9C. (Deflate
1633 * compression, 32K window size, default algorithm.)
1634 */
1635 outbits(out, 0x9C78, 16);
1636 break;
1637 case DEFLATE_TYPE_GZIP:
1638 /*
1639 * Minimal gzip (RFC1952) header:
1640 *
1641 * - basic header of 1F 8B
1642 * - compression method byte (8 = deflate)
1643 * - flags byte (zero: we use no optional features)
1644 * - modification time (zero: no time stamp available)
1645 * - extra flags byte (2: we use maximum compression
1646 * always)
1647 * - operating system byte (255: we do not specify)
1648 */
1649 outbits(out, 0x00088B1F, 32); /* header, CM, flags */
1650 outbits(out, 0, 32); /* mtime */
1651 outbits(out, 0xFF02, 16); /* xflags, OS */
1652 break;
1653 }
1654 out->firstblock = FALSE;
1655 }
1656
1657 /*
1658 * Feed our data to the LZ77 compression phase.
1659 */
1660 lz77_compress(ectx, block, len, TRUE);
1661
1662 /*
1663 * Update checksums and counters.
1664 */
1665 switch (out->type) {
1666 case DEFLATE_TYPE_ZLIB:
1667 out->checksum = adler32_update(out->checksum, block, len);
1668 break;
1669 case DEFLATE_TYPE_GZIP:
1670 out->checksum = crc32_update(out->checksum, block, len);
1671 break;
1672 }
1673 out->datasize += len;
1674
1675 switch (flushtype) {
1676 /*
1677 * FIXME: what other flush types are available and useful?
1678 * In PuTTY, it was clear that we generally wanted to be in
1679 * a static block so it was safe to open one. Here, we
1680 * probably prefer to be _outside_ a block if we can. Think
1681 * about this.
1682 */
1683 case DEFLATE_NO_FLUSH:
1684 break; /* don't flush any data at all (duh) */
1685 case DEFLATE_SYNC_FLUSH:
1686 /*
1687 * Close the current block.
1688 */
1689 flushblock(out);
1690
1691 /*
1692 * Then output an empty _uncompressed_ block: send 000,
1693 * then sync to byte boundary, then send bytes 00 00 FF
1694 * FF.
1695 */
1696 outbits(out, 0, 3);
1697 if (out->noutbits)
1698 outbits(out, 0, 8 - out->noutbits);
1699 outbits(out, 0, 16);
1700 outbits(out, 0xFFFF, 16);
1701 break;
1702 case DEFLATE_END_OF_DATA:
1703 /*
1704 * Output a block with BFINAL set.
1705 */
1706 out->lastblock = TRUE;
1707 flushblock(out);
1708
1709 /*
1710 * Sync to byte boundary, flushing out the final byte.
1711 */
1712 if (out->noutbits)
1713 outbits(out, 0, 8 - out->noutbits);
1714
1715 /*
1716 * Format-specific trailer data.
1717 */
1718 switch (out->type) {
1719 case DEFLATE_TYPE_ZLIB:
1720 /*
1721 * Just write out the Adler32 checksum.
1722 */
1723 outbits(out, (out->checksum >> 24) & 0xFF, 8);
1724 outbits(out, (out->checksum >> 16) & 0xFF, 8);
1725 outbits(out, (out->checksum >> 8) & 0xFF, 8);
1726 outbits(out, (out->checksum >> 0) & 0xFF, 8);
1727 break;
1728 case DEFLATE_TYPE_GZIP:
1729 /*
1730 * Write out the CRC32 checksum and the data length.
1731 */
1732 outbits(out, out->checksum, 32);
1733 outbits(out, out->datasize, 32);
1734 break;
1735 }
1736
1737 out->finished = TRUE;
1738 break;
1739 }
1740
1741 /*
1742 * Return any data that we've generated.
1743 */
1744 *outblock = (void *)out->outbuf;
1745 *outlen = out->outlen;
1746 }
1747
1748 /* ----------------------------------------------------------------------
1749 * Deflate decompression.
1750 */
1751
1752 /*
1753 * The way we work the Huffman decode is to have a table lookup on
1754 * the first N bits of the input stream (in the order they arrive,
1755 * of course, i.e. the first bit of the Huffman code is in bit 0).
1756 * Each table entry lists the number of bits to consume, plus
1757 * either an output code or a pointer to a secondary table.
1758 */
1759 struct table;
1760 struct tableentry;
1761
1762 struct tableentry {
1763 unsigned char nbits;
1764 short code;
1765 struct table *nexttable;
1766 };
1767
1768 struct table {
1769 int mask; /* mask applied to input bit stream */
1770 struct tableentry *table;
1771 };
1772
1773 #define MAXSYMS 288
1774
1775 #define DWINSIZE 32768
1776
1777 /*
1778 * Build a single-level decode table for elements
1779 * [minlength,maxlength) of the provided code/length tables, and
1780 * recurse to build subtables.
1781 */
1782 static struct table *mkonetab(int *codes, unsigned char *lengths, int nsyms,
1783 int pfx, int pfxbits, int bits)
1784 {
1785 struct table *tab = snew(struct table);
1786 int pfxmask = (1 << pfxbits) - 1;
1787 int nbits, i, j, code;
1788
1789 tab->table = snewn(1 << bits, struct tableentry);
1790 tab->mask = (1 << bits) - 1;
1791
1792 for (code = 0; code <= tab->mask; code++) {
1793 tab->table[code].code = -1;
1794 tab->table[code].nbits = 0;
1795 tab->table[code].nexttable = NULL;
1796 }
1797
1798 for (i = 0; i < nsyms; i++) {
1799 if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx)
1800 continue;
1801 code = (codes[i] >> pfxbits) & tab->mask;
1802 for (j = code; j <= tab->mask; j += 1 << (lengths[i] - pfxbits)) {
1803 tab->table[j].code = i;
1804 nbits = lengths[i] - pfxbits;
1805 if (tab->table[j].nbits < nbits)
1806 tab->table[j].nbits = nbits;
1807 }
1808 }
1809 for (code = 0; code <= tab->mask; code++) {
1810 if (tab->table[code].nbits <= bits)
1811 continue;
1812 /* Generate a subtable. */
1813 tab->table[code].code = -1;
1814 nbits = tab->table[code].nbits - bits;
1815 if (nbits > 7)
1816 nbits = 7;
1817 tab->table[code].nbits = bits;
1818 tab->table[code].nexttable = mkonetab(codes, lengths, nsyms,
1819 pfx | (code << pfxbits),
1820 pfxbits + bits, nbits);
1821 }
1822
1823 return tab;
1824 }
1825
1826 /*
1827 * Build a decode table, given a set of Huffman tree lengths.
1828 */
1829 static struct table *mktable(unsigned char *lengths, int nlengths,
1830 #ifdef ANALYSIS
1831 const char *alphabet,
1832 #endif
1833 int *error)
1834 {
1835 int codes[MAXSYMS];
1836 int maxlen;
1837
1838 #ifdef ANALYSIS
1839 if (alphabet && analyse_level > 1) {
1840 int i, col = 0;
1841 printf("code lengths for %s alphabet:\n", alphabet);
1842 for (i = 0; i < nlengths; i++) {
1843 col += printf("%3d", lengths[i]);
1844 if (col > 72) {
1845 putchar('\n');
1846 col = 0;
1847 }
1848 }
1849 if (col > 0)
1850 putchar('\n');
1851 }
1852 #endif
1853
1854 maxlen = hufcodes(lengths, codes, nlengths);
1855
1856 if (maxlen < 0) {
1857 *error = (maxlen == -1 ? DEFLATE_ERR_LARGE_HUFTABLE :
1858 DEFLATE_ERR_SMALL_HUFTABLE);
1859 return NULL;
1860 }
1861
1862 /*
1863 * Now we have the complete list of Huffman codes. Build a
1864 * table.
1865 */
1866 return mkonetab(codes, lengths, nlengths, 0, 0, maxlen < 9 ? maxlen : 9);
1867 }
1868
1869 static int freetable(struct table **ztab)
1870 {
1871 struct table *tab;
1872 int code;
1873
1874 if (ztab == NULL)
1875 return -1;
1876
1877 if (*ztab == NULL)
1878 return 0;
1879
1880 tab = *ztab;
1881
1882 for (code = 0; code <= tab->mask; code++)
1883 if (tab->table[code].nexttable != NULL)
1884 freetable(&tab->table[code].nexttable);
1885
1886 sfree(tab->table);
1887 tab->table = NULL;
1888
1889 sfree(tab);
1890 *ztab = NULL;
1891
1892 return (0);
1893 }
1894
1895 struct deflate_decompress_ctx {
1896 struct table *staticlentable, *staticdisttable;
1897 struct table *currlentable, *currdisttable, *lenlentable;
1898 enum {
1899 ZLIBSTART,
1900 GZIPSTART, GZIPMETHFLAGS, GZIPIGNORE1, GZIPIGNORE2, GZIPIGNORE3,
1901 GZIPEXTRA, GZIPFNAME, GZIPCOMMENT,
1902 OUTSIDEBLK, TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP,
1903 INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM,
1904 UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA,
1905 END,
1906 ADLER1, ADLER2,
1907 CRC1, CRC2, ILEN1, ILEN2,
1908 FINALSPIN
1909 } state;
1910 int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len,
1911 lenrep, lastblock;
1912 int uncomplen;
1913 unsigned char lenlen[19];
1914 unsigned char lengths[286 + 32];
1915 unsigned long bits;
1916 int nbits;
1917 unsigned char window[DWINSIZE];
1918 int winpos;
1919 unsigned char *outblk;
1920 int outlen, outsize;
1921 int type;
1922 unsigned long checksum;
1923 unsigned long bytesout;
1924 int gzflags, gzextralen;
1925 #ifdef ANALYSIS
1926 int bytesread;
1927 int bitcount_before;
1928 #define BITCOUNT(dctx) ( (dctx)->bytesread * 8 - (dctx)->nbits )
1929 #endif
1930 };
1931
1932 deflate_decompress_ctx *deflate_decompress_new(int type)
1933 {
1934 deflate_decompress_ctx *dctx = snew(deflate_decompress_ctx);
1935 unsigned char lengths[288];
1936
1937 memset(lengths, 8, 144);
1938 memset(lengths + 144, 9, 256 - 144);
1939 memset(lengths + 256, 7, 280 - 256);
1940 memset(lengths + 280, 8, 288 - 280);
1941 dctx->staticlentable = mktable(lengths, 288,
1942 #ifdef ANALYSIS
1943 NULL,
1944 #endif
1945 NULL);
1946 assert(dctx->staticlentable);
1947 memset(lengths, 5, 32);
1948 dctx->staticdisttable = mktable(lengths, 32,
1949 #ifdef ANALYSIS
1950 NULL,
1951 #endif
1952 NULL);
1953 assert(dctx->staticdisttable);
1954 dctx->state = (type == DEFLATE_TYPE_ZLIB ? ZLIBSTART :
1955 type == DEFLATE_TYPE_GZIP ? GZIPSTART :
1956 OUTSIDEBLK);
1957 dctx->currlentable = dctx->currdisttable = dctx->lenlentable = NULL;
1958 dctx->bits = 0;
1959 dctx->nbits = 0;
1960 dctx->winpos = 0;
1961 dctx->type = type;
1962 dctx->lastblock = FALSE;
1963 dctx->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0);
1964 dctx->bytesout = 0;
1965 dctx->gzflags = dctx->gzextralen = 0;
1966 #ifdef ANALYSIS
1967 dctx->bytesread = dctx->bitcount_before = 0;
1968 #endif
1969
1970 return dctx;
1971 }
1972
1973 void deflate_decompress_free(deflate_decompress_ctx *dctx)
1974 {
1975 if (dctx->currlentable && dctx->currlentable != dctx->staticlentable)
1976 freetable(&dctx->currlentable);
1977 if (dctx->currdisttable && dctx->currdisttable != dctx->staticdisttable)
1978 freetable(&dctx->currdisttable);
1979 if (dctx->lenlentable)
1980 freetable(&dctx->lenlentable);
1981 freetable(&dctx->staticlentable);
1982 freetable(&dctx->staticdisttable);
1983 sfree(dctx);
1984 }
1985
1986 static int huflookup(unsigned long *bitsp, int *nbitsp, struct table *tab)
1987 {
1988 unsigned long bits = *bitsp;
1989 int nbits = *nbitsp;
1990 while (1) {
1991 struct tableentry *ent;
1992 ent = &tab->table[bits & tab->mask];
1993 if (ent->nbits > nbits)
1994 return -1; /* not enough data */
1995 bits >>= ent->nbits;
1996 nbits -= ent->nbits;
1997 if (ent->code == -1)
1998 tab = ent->nexttable;
1999 else {
2000 *bitsp = bits;
2001 *nbitsp = nbits;
2002 return ent->code;
2003 }
2004
2005 /*
2006 * If we reach here with `tab' null, it can only be because
2007 * there was a missing entry in the Huffman table. This
2008 * should never occur even with invalid input data, because
2009 * we enforce at mktable time that the Huffman codes should
2010 * precisely cover the code space; so we can enforce this
2011 * by assertion.
2012 */
2013 assert(tab);
2014 }
2015 }
2016
2017 static void emit_char(deflate_decompress_ctx *dctx, int c)
2018 {
2019 dctx->window[dctx->winpos] = c;
2020 dctx->winpos = (dctx->winpos + 1) & (DWINSIZE - 1);
2021 if (dctx->outlen >= dctx->outsize) {
2022 dctx->outsize = dctx->outlen * 3 / 2 + 512;
2023 dctx->outblk = sresize(dctx->outblk, dctx->outsize, unsigned char);
2024 }
2025 if (dctx->type == DEFLATE_TYPE_ZLIB) {
2026 unsigned char uc = c;
2027 dctx->checksum = adler32_update(dctx->checksum, &uc, 1);
2028 } else if (dctx->type == DEFLATE_TYPE_GZIP) {
2029 unsigned char uc = c;
2030 dctx->checksum = crc32_update(dctx->checksum, &uc, 1);
2031 }
2032 dctx->outblk[dctx->outlen++] = c;
2033 dctx->bytesout++;
2034 }
2035
2036 #define EATBITS(n) ( dctx->nbits -= (n), dctx->bits >>= (n) )
2037
2038 int deflate_decompress_data(deflate_decompress_ctx *dctx,
2039 const void *vblock, int len,
2040 void **outblock, int *outlen)
2041 {
2042 const coderecord *rec;
2043 const unsigned char *block = (const unsigned char *)vblock;
2044 int code, bfinal, btype, rep, dist, nlen, header, cksum;
2045 int error = 0;
2046
2047 if (len == 0) {
2048 *outblock = NULL;
2049 *outlen = 0;
2050 if (dctx->state != FINALSPIN)
2051 return DEFLATE_ERR_UNEXPECTED_EOF;
2052 else
2053 return 0;
2054 }
2055
2056 dctx->outblk = NULL;
2057 dctx->outsize = 0;
2058 dctx->outlen = 0;
2059
2060 while (len > 0 || dctx->nbits > 0) {
2061 while (dctx->nbits < 24 && len > 0) {
2062 dctx->bits |= (*block++) << dctx->nbits;
2063 dctx->nbits += 8;
2064 len--;
2065 #ifdef ANALYSIS
2066 dctx->bytesread++;
2067 #endif
2068 }
2069 switch (dctx->state) {
2070 case ZLIBSTART:
2071 /* Expect 16-bit zlib header. */
2072 if (dctx->nbits < 16)
2073 goto finished; /* done all we can */
2074
2075 /*
2076 * The header is stored as a big-endian 16-bit integer,
2077 * in contrast to the general little-endian policy in
2078 * the rest of the format :-(
2079 */
2080 header = (((dctx->bits & 0xFF00) >> 8) |
2081 ((dctx->bits & 0x00FF) << 8));
2082 EATBITS(16);
2083
2084 /*
2085 * Check the header:
2086 *
2087 * - bits 8-11 should be 1000 (Deflate/RFC1951)
2088 * - bits 12-15 should be at most 0111 (window size)
2089 * - bit 5 should be zero (no dictionary present)
2090 * - we don't care about bits 6-7 (compression rate)
2091 * - bits 0-4 should be set up to make the whole thing
2092 * a multiple of 31 (checksum).
2093 */
2094 if ((header & 0xF000) > 0x7000 ||
2095 (header & 0x0020) != 0x0000 ||
2096 (header % 31) != 0) {
2097 error = DEFLATE_ERR_ZLIB_HEADER;
2098 goto finished;
2099 }
2100 if ((header & 0x0F00) != 0x0800) {
2101 error = DEFLATE_ERR_ZLIB_WRONGCOMP;
2102 goto finished;
2103 }
2104 dctx->state = OUTSIDEBLK;
2105 break;
2106 case GZIPSTART:
2107 /* Expect 16-bit gzip header. */
2108 if (dctx->nbits < 16)
2109 goto finished;
2110 header = dctx->bits & 0xFFFF;
2111 EATBITS(16);
2112 if (header != 0x8B1F) {
2113 error = DEFLATE_ERR_GZIP_HEADER;
2114 goto finished;
2115 }
2116 dctx->state = GZIPMETHFLAGS;
2117 break;
2118 case GZIPMETHFLAGS:
2119 /* Expect gzip compression method and flags bytes. */
2120 if (dctx->nbits < 16)
2121 goto finished;
2122 header = dctx->bits & 0xFF;
2123 EATBITS(8);
2124 if (header != 8) {
2125 error = DEFLATE_ERR_GZIP_WRONGCOMP;
2126 goto finished;
2127 }
2128 dctx->gzflags = dctx->bits & 0xFF;
2129 if (dctx->gzflags & 2) {
2130 /*
2131 * The FHCRC flag is slightly confusing. RFC1952
2132 * documents it as indicating the presence of a
2133 * two-byte CRC16 of the gzip header, occurring
2134 * just before the beginning of the Deflate stream.
2135 * However, gzip itself (as of 1.3.5) appears to
2136 * believe it indicates that the current gzip
2137 * `member' is not the final one, i.e. that the
2138 * stream is composed of multiple gzip members
2139 * concatenated together, and furthermore gzip will
2140 * refuse to decode any file that has it set.
2141 *
2142 * For this reason, I label it as `disputed' and
2143 * also refuse to decode anything that has it set.
2144 * I don't expect this to be a problem in practice.
2145 */
2146 error = DEFLATE_ERR_GZIP_FHCRC;
2147 goto finished;
2148 }
2149 EATBITS(8);
2150 dctx->state = GZIPIGNORE1;
2151 break;
2152 case GZIPIGNORE1:
2153 case GZIPIGNORE2:
2154 case GZIPIGNORE3:
2155 /* Expect two bytes of gzip timestamp/XFL/OS, which we ignore. */
2156 if (dctx->nbits < 16)
2157 goto finished;
2158 EATBITS(16);
2159 if (dctx->state == GZIPIGNORE3) {
2160 dctx->state = GZIPEXTRA;
2161 } else
2162 dctx->state++; /* maps IGNORE1 -> IGNORE2 -> IGNORE3 */
2163 break;
2164 case GZIPEXTRA:
2165 if (dctx->gzflags & 4) {
2166 /* Expect two bytes of extra-length count, then that many
2167 * extra bytes of header data, all of which we ignore. */
2168 if (!dctx->gzextralen) {
2169 if (dctx->nbits < 16)
2170 goto finished;
2171 dctx->gzextralen = dctx->bits & 0xFFFF;
2172 EATBITS(16);
2173 break;
2174 } else if (dctx->gzextralen > 0) {
2175 if (dctx->nbits < 8)
2176 goto finished;
2177 EATBITS(8);
2178 if (--dctx->gzextralen > 0)
2179 break;
2180 }
2181 }
2182 dctx->state = GZIPFNAME;
2183 break;
2184 case GZIPFNAME:
2185 if (dctx->gzflags & 8) {
2186 /*
2187 * Expect a NUL-terminated filename.
2188 */
2189 if (dctx->nbits < 8)
2190 goto finished;
2191 code = dctx->bits & 0xFF;
2192 EATBITS(8);
2193 } else
2194 code = 0;
2195 if (code == 0)
2196 dctx->state = GZIPCOMMENT;
2197 break;
2198 case GZIPCOMMENT:
2199 if (dctx->gzflags & 16) {
2200 /*
2201 * Expect a NUL-terminated filename.
2202 */
2203 if (dctx->nbits < 8)
2204 goto finished;
2205 code = dctx->bits & 0xFF;
2206 EATBITS(8);
2207 } else
2208 code = 0;
2209 if (code == 0)
2210 dctx->state = OUTSIDEBLK;
2211 break;
2212 case OUTSIDEBLK:
2213 /* Expect 3-bit block header. */
2214 if (dctx->nbits < 3)
2215 goto finished; /* done all we can */
2216 bfinal = dctx->bits & 1;
2217 if (bfinal)
2218 dctx->lastblock = TRUE;
2219 EATBITS(1);
2220 btype = dctx->bits & 3;
2221 EATBITS(2);
2222 if (btype == 0) {
2223 int to_eat = dctx->nbits & 7;
2224 dctx->state = UNCOMP_LEN;
2225 EATBITS(to_eat); /* align to byte boundary */
2226 } else if (btype == 1) {
2227 dctx->currlentable = dctx->staticlentable;
2228 dctx->currdisttable = dctx->staticdisttable;
2229 dctx->state = INBLK;
2230 } else if (btype == 2) {
2231 dctx->state = TREES_HDR;
2232 }
2233 debug(("recv: bfinal=%d btype=%d\n", bfinal, btype));
2234 #ifdef ANALYSIS
2235 if (analyse_level > 1) {
2236 static const char *const btypes[] = {
2237 "uncompressed", "static", "dynamic", "type 3 (unknown)"
2238 };
2239 printf("new block, %sfinal, %s\n",
2240 bfinal ? "" : "not ",
2241 btypes[btype]);
2242 }
2243 #endif
2244 break;
2245 case TREES_HDR:
2246 /*
2247 * Dynamic block header. Five bits of HLIT, five of
2248 * HDIST, four of HCLEN.
2249 */
2250 if (dctx->nbits < 5 + 5 + 4)
2251 goto finished; /* done all we can */
2252 dctx->hlit = 257 + (dctx->bits & 31);
2253 EATBITS(5);
2254 dctx->hdist = 1 + (dctx->bits & 31);
2255 EATBITS(5);
2256 dctx->hclen = 4 + (dctx->bits & 15);
2257 EATBITS(4);
2258 debug(("recv: hlit=%d hdist=%d hclen=%d\n", dctx->hlit,
2259 dctx->hdist, dctx->hclen));
2260 #ifdef ANALYSIS
2261 if (analyse_level > 1)
2262 printf("hlit=%d, hdist=%d, hclen=%d\n",
2263 dctx->hlit, dctx->hdist, dctx->hclen);
2264 #endif
2265 dctx->lenptr = 0;
2266 dctx->state = TREES_LENLEN;
2267 memset(dctx->lenlen, 0, sizeof(dctx->lenlen));
2268 break;
2269 case TREES_LENLEN:
2270 if (dctx->nbits < 3)
2271 goto finished;
2272 while (dctx->lenptr < dctx->hclen && dctx->nbits >= 3) {
2273 dctx->lenlen[lenlenmap[dctx->lenptr++]] =
2274 (unsigned char) (dctx->bits & 7);
2275 debug(("recv: lenlen %d\n", (unsigned char) (dctx->bits & 7)));
2276 EATBITS(3);
2277 }
2278 if (dctx->lenptr == dctx->hclen) {
2279 dctx->lenlentable = mktable(dctx->lenlen, 19,
2280 #ifdef ANALYSIS
2281 "code length",
2282 #endif
2283 &error);
2284 if (!dctx->lenlentable)
2285 goto finished; /* error code set up by mktable */
2286 dctx->state = TREES_LEN;
2287 dctx->lenptr = 0;
2288 }
2289 break;
2290 case TREES_LEN:
2291 if (dctx->lenptr >= dctx->hlit + dctx->hdist) {
2292 dctx->currlentable = mktable(dctx->lengths, dctx->hlit,
2293 #ifdef ANALYSIS
2294 "literal/length",
2295 #endif
2296 &error);
2297 if (!dctx->currlentable)
2298 goto finished; /* error code set up by mktable */
2299 dctx->currdisttable = mktable(dctx->lengths + dctx->hlit,
2300 dctx->hdist,
2301 #ifdef ANALYSIS
2302 "distance",
2303 #endif
2304 &error);
2305 if (!dctx->currdisttable)
2306 goto finished; /* error code set up by mktable */
2307 freetable(&dctx->lenlentable);
2308 dctx->lenlentable = NULL;
2309 dctx->state = INBLK;
2310 break;
2311 }
2312 code = huflookup(&dctx->bits, &dctx->nbits, dctx->lenlentable);
2313 debug(("recv: codelen %d\n", code));
2314 if (code == -1)
2315 goto finished;
2316 if (code < 16) {
2317 #ifdef ANALYSIS
2318 if (analyse_level > 1)
2319 printf("code-length %d\n", code);
2320 #endif
2321 dctx->lengths[dctx->lenptr++] = code;
2322 } else {
2323 dctx->lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7);
2324 dctx->lenaddon = (code == 18 ? 11 : 3);
2325 dctx->lenrep = (code == 16 && dctx->lenptr > 0 ?
2326 dctx->lengths[dctx->lenptr - 1] : 0);
2327 dctx->state = TREES_LENREP;
2328 }
2329 break;
2330 case TREES_LENREP:
2331 if (dctx->nbits < dctx->lenextrabits)
2332 goto finished;
2333 rep =
2334 dctx->lenaddon +
2335 (dctx->bits & ((1 << dctx->lenextrabits) - 1));
2336 EATBITS(dctx->lenextrabits);
2337 if (dctx->lenextrabits)
2338 debug(("recv: codelen-extrabits %d/%d\n", rep - dctx->lenaddon,
2339 dctx->lenextrabits));
2340 #ifdef ANALYSIS
2341 if (analyse_level > 1)
2342 printf("code-length-repeat: %d copies of %d\n", rep,
2343 dctx->lenrep);
2344 #endif
2345 while (rep > 0 && dctx->lenptr < dctx->hlit + dctx->hdist) {
2346 dctx->lengths[dctx->lenptr] = dctx->lenrep;
2347 dctx->lenptr++;
2348 rep--;
2349 }
2350 dctx->state = TREES_LEN;
2351 break;
2352 case INBLK:
2353 #ifdef ANALYSIS
2354 dctx->bitcount_before = BITCOUNT(dctx);
2355 #endif
2356 code = huflookup(&dctx->bits, &dctx->nbits, dctx->currlentable);
2357 debug(("recv: litlen %d\n", code));
2358 if (code == -1)
2359 goto finished;
2360 if (code < 256) {
2361 #ifdef ANALYSIS
2362 if (analyse_level > 0)
2363 printf("%lu: literal %d [%d]\n", dctx->bytesout, code,
2364 BITCOUNT(dctx) - dctx->bitcount_before);
2365 #endif
2366 emit_char(dctx, code);
2367 } else if (code == 256) {
2368 if (dctx->lastblock)
2369 dctx->state = END;
2370 else
2371 dctx->state = OUTSIDEBLK;
2372 if (dctx->currlentable != dctx->staticlentable) {
2373 freetable(&dctx->currlentable);
2374 dctx->currlentable = NULL;
2375 }
2376 if (dctx->currdisttable != dctx->staticdisttable) {
2377 freetable(&dctx->currdisttable);
2378 dctx->currdisttable = NULL;
2379 }
2380 } else if (code < 286) { /* static tree can give >285; ignore */
2381 dctx->state = GOTLENSYM;
2382 dctx->sym = code;
2383 }
2384 break;
2385 case GOTLENSYM:
2386 rec = &lencodes[dctx->sym - 257];
2387 if (dctx->nbits < rec->extrabits)
2388 goto finished;
2389 dctx->len =
2390 rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
2391 if (rec->extrabits)
2392 debug(("recv: litlen-extrabits %d/%d\n",
2393 dctx->len - rec->min, rec->extrabits));
2394 EATBITS(rec->extrabits);
2395 dctx->state = GOTLEN;
2396 break;
2397 case GOTLEN:
2398 code = huflookup(&dctx->bits, &dctx->nbits, dctx->currdisttable);
2399 debug(("recv: dist %d\n", code));
2400 if (code == -1)
2401 goto finished;
2402 dctx->state = GOTDISTSYM;
2403 dctx->sym = code;
2404 break;
2405 case GOTDISTSYM:
2406 rec = &distcodes[dctx->sym];
2407 if (dctx->nbits < rec->extrabits)
2408 goto finished;
2409 dist = rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
2410 if (rec->extrabits)
2411 debug(("recv: dist-extrabits %d/%d\n",
2412 dist - rec->min, rec->extrabits));
2413 EATBITS(rec->extrabits);
2414 dctx->state = INBLK;
2415 #ifdef ANALYSIS
2416 if (analyse_level > 0)
2417 printf("%lu: copy len=%d dist=%d [%d]\n", dctx->bytesout,
2418 dctx->len, dist,
2419 BITCOUNT(dctx) - dctx->bitcount_before);
2420 #endif
2421 while (dctx->len--)
2422 emit_char(dctx, dctx->window[(dctx->winpos - dist) &
2423 (DWINSIZE - 1)]);
2424 break;
2425 case UNCOMP_LEN:
2426 /*
2427 * Uncompressed block. We expect to see a 16-bit LEN.
2428 */
2429 if (dctx->nbits < 16)
2430 goto finished;
2431 dctx->uncomplen = dctx->bits & 0xFFFF;
2432 EATBITS(16);
2433 dctx->state = UNCOMP_NLEN;
2434 break;
2435 case UNCOMP_NLEN:
2436 /*
2437 * Uncompressed block. We expect to see a 16-bit NLEN,
2438 * which should be the one's complement of the previous
2439 * LEN.
2440 */
2441 if (dctx->nbits < 16)
2442 goto finished;
2443 nlen = dctx->bits & 0xFFFF;
2444 EATBITS(16);
2445 if (dctx->uncomplen == 0)
2446 dctx->state = OUTSIDEBLK; /* block is empty */
2447 else
2448 dctx->state = UNCOMP_DATA;
2449 break;
2450 case UNCOMP_DATA:
2451 if (dctx->nbits < 8)
2452 goto finished;
2453 #ifdef ANALYSIS
2454 if (analyse_level > 0)
2455 printf("%lu: uncompressed %d [8]\n", dctx->bytesout,
2456 (int)(dctx->bits & 0xFF));
2457 #endif
2458 emit_char(dctx, dctx->bits & 0xFF);
2459 EATBITS(8);
2460 if (--dctx->uncomplen == 0)
2461 dctx->state = OUTSIDEBLK; /* end of uncompressed block */
2462 break;
2463 case END:
2464 /*
2465 * End of compressed data. We align to a byte boundary,
2466 * and then look for format-specific trailer data.
2467 */
2468 {
2469 int to_eat = dctx->nbits & 7;
2470 EATBITS(to_eat);
2471 }
2472 if (dctx->type == DEFLATE_TYPE_ZLIB)
2473 dctx->state = ADLER1;
2474 else if (dctx->type == DEFLATE_TYPE_GZIP)
2475 dctx->state = CRC1;
2476 else
2477 dctx->state = FINALSPIN;
2478 break;
2479 case ADLER1:
2480 if (dctx->nbits < 16)
2481 goto finished;
2482 cksum = (dctx->bits & 0xFF) << 8;
2483 EATBITS(8);
2484 cksum |= (dctx->bits & 0xFF);
2485 EATBITS(8);
2486 if (cksum != ((dctx->checksum >> 16) & 0xFFFF)) {
2487 error = DEFLATE_ERR_CHECKSUM;
2488 goto finished;
2489 }
2490 dctx->state = ADLER2;
2491 break;
2492 case ADLER2:
2493 if (dctx->nbits < 16)
2494 goto finished;
2495 cksum = (dctx->bits & 0xFF) << 8;
2496 EATBITS(8);
2497 cksum |= (dctx->bits & 0xFF);
2498 EATBITS(8);
2499 if (cksum != (dctx->checksum & 0xFFFF)) {
2500 error = DEFLATE_ERR_CHECKSUM;
2501 goto finished;
2502 }
2503 dctx->state = FINALSPIN;
2504 break;
2505 case CRC1:
2506 if (dctx->nbits < 16)
2507 goto finished;
2508 cksum = dctx->bits & 0xFFFF;
2509 EATBITS(16);
2510 if (cksum != (dctx->checksum & 0xFFFF)) {
2511 error = DEFLATE_ERR_CHECKSUM;
2512 goto finished;
2513 }
2514 dctx->state = CRC2;
2515 break;
2516 case CRC2:
2517 if (dctx->nbits < 16)
2518 goto finished;
2519 cksum = dctx->bits & 0xFFFF;
2520 EATBITS(16);
2521 if (cksum != ((dctx->checksum >> 16) & 0xFFFF)) {
2522 error = DEFLATE_ERR_CHECKSUM;
2523 goto finished;
2524 }
2525 dctx->state = ILEN1;
2526 break;
2527 case ILEN1:
2528 if (dctx->nbits < 16)
2529 goto finished;
2530 cksum = dctx->bits & 0xFFFF;
2531 EATBITS(16);
2532 if (cksum != (dctx->bytesout & 0xFFFF)) {
2533 error = DEFLATE_ERR_INLEN;
2534 goto finished;
2535 }
2536 dctx->state = ILEN2;
2537 break;
2538 case ILEN2:
2539 if (dctx->nbits < 16)
2540 goto finished;
2541 cksum = dctx->bits & 0xFFFF;
2542 EATBITS(16);
2543 if (cksum != ((dctx->bytesout >> 16) & 0xFFFF)) {
2544 error = DEFLATE_ERR_INLEN;
2545 goto finished;
2546 }
2547 dctx->state = FINALSPIN;
2548 break;
2549 case FINALSPIN:
2550 /* Just ignore any trailing garbage on the data stream. */
2551 /* (We could alternatively throw an error here, if we wanted
2552 * to detect and complain about trailing garbage.) */
2553 EATBITS(dctx->nbits);
2554 break;
2555 }
2556 }
2557
2558 finished:
2559 *outblock = dctx->outblk;
2560 *outlen = dctx->outlen;
2561 return error;
2562 }
2563
2564 #define A(code,str) str
2565 const char *const deflate_error_msg[DEFLATE_NUM_ERRORS] = {
2566 DEFLATE_ERRORLIST(A)
2567 };
2568 #undef A
2569
2570 #define A(code,str) #code
2571 const char *const deflate_error_sym[DEFLATE_NUM_ERRORS] = {
2572 DEFLATE_ERRORLIST(A)
2573 };
2574 #undef A
2575
2576 #ifdef STANDALONE
2577
2578 int main(int argc, char **argv)
2579 {
2580 unsigned char buf[65536];
2581 void *outbuf;
2582 int ret, err, outlen;
2583 deflate_decompress_ctx *dhandle;
2584 deflate_compress_ctx *chandle;
2585 int type = DEFLATE_TYPE_ZLIB, opts = TRUE;
2586 int compress = FALSE, decompress = FALSE;
2587 int got_arg = FALSE;
2588 char *filename = NULL;
2589 FILE *fp;
2590
2591 while (--argc) {
2592 char *p = *++argv;
2593
2594 got_arg = TRUE;
2595
2596 if (p[0] == '-' && opts) {
2597 if (!strcmp(p, "-b"))
2598 type = DEFLATE_TYPE_BARE;
2599 else if (!strcmp(p, "-g"))
2600 type = DEFLATE_TYPE_GZIP;
2601 else if (!strcmp(p, "-c"))
2602 compress = TRUE;
2603 else if (!strcmp(p, "-d"))
2604 decompress = TRUE;
2605 else if (!strcmp(p, "-a"))
2606 analyse_level++, decompress = TRUE;
2607 else if (!strcmp(p, "--"))
2608 opts = FALSE; /* next thing is filename */
2609 else {
2610 fprintf(stderr, "unknown command line option '%s'\n", p);
2611 return 1;
2612 }
2613 } else if (!filename) {
2614 filename = p;
2615 } else {
2616 fprintf(stderr, "can only handle one filename\n");
2617 return 1;
2618 }
2619 }
2620
2621 if (!compress && !decompress) {
2622 fprintf(stderr, "usage: deflate [ -c | -d | -a ] [ -b | -g ]"
2623 " [filename]\n");
2624 return (got_arg ? 1 : 0);
2625 }
2626
2627 if (compress && decompress) {
2628 fprintf(stderr, "please do not specify both compression and"
2629 " decompression\n");
2630 return (got_arg ? 1 : 0);
2631 }
2632
2633 if (compress) {
2634 chandle = deflate_compress_new(type);
2635 dhandle = NULL;
2636 } else {
2637 dhandle = deflate_decompress_new(type);
2638 chandle = NULL;
2639 }
2640
2641 if (filename)
2642 fp = fopen(filename, "rb");
2643 else
2644 fp = stdin;
2645
2646 if (!fp) {
2647 assert(filename);
2648 fprintf(stderr, "unable to open '%s'\n", filename);
2649 return 1;
2650 }
2651
2652 do {
2653 ret = fread(buf, 1, sizeof(buf), fp);
2654 outbuf = NULL;
2655 if (dhandle) {
2656 if (ret > 0)
2657 err = deflate_decompress_data(dhandle, buf, ret,
2658 (void **)&outbuf, &outlen);
2659 else
2660 err = deflate_decompress_data(dhandle, NULL, 0,
2661 (void **)&outbuf, &outlen);
2662 } else {
2663 if (ret > 0)
2664 deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH,
2665 (void **)&outbuf, &outlen);
2666 else
2667 deflate_compress_data(chandle, buf, ret, DEFLATE_END_OF_DATA,
2668 (void **)&outbuf, &outlen);
2669 err = 0;
2670 }
2671 if (outbuf) {
2672 if (!analyse_level && outlen)
2673 fwrite(outbuf, 1, outlen, stdout);
2674 sfree(outbuf);
2675 }
2676 if (err > 0) {
2677 fprintf(stderr, "decoding error: %s\n", deflate_error_msg[err]);
2678 return 1;
2679 }
2680 } while (ret > 0);
2681
2682 if (dhandle)
2683 deflate_decompress_free(dhandle);
2684 if (chandle)
2685 deflate_compress_free(chandle);
2686
2687 if (filename)
2688 fclose(fp);
2689
2690 return 0;
2691 }
2692
2693 #endif
2694
2695 #ifdef TESTMODE
2696
2697 int main(int argc, char **argv)
2698 {
2699 char *filename = NULL;
2700 FILE *fp;
2701 deflate_compress_ctx *chandle;
2702 deflate_decompress_ctx *dhandle;
2703 unsigned char buf[65536], *outbuf, *outbuf2;
2704 int ret, err, outlen, outlen2;
2705 int dlen = 0, clen = 0;
2706 int opts = TRUE;
2707
2708 while (--argc) {
2709 char *p = *++argv;
2710
2711 if (p[0] == '-' && opts) {
2712 if (!strcmp(p, "--"))
2713 opts = FALSE; /* next thing is filename */
2714 else {
2715 fprintf(stderr, "unknown command line option '%s'\n", p);
2716 return 1;
2717 }
2718 } else if (!filename) {
2719 filename = p;
2720 } else {
2721 fprintf(stderr, "can only handle one filename\n");
2722 return 1;
2723 }
2724 }
2725
2726 if (filename)
2727 fp = fopen(filename, "rb");
2728 else
2729 fp = stdin;
2730
2731 if (!fp) {
2732 assert(filename);
2733 fprintf(stderr, "unable to open '%s'\n", filename);
2734 return 1;
2735 }
2736
2737 chandle = deflate_compress_new(DEFLATE_TYPE_ZLIB);
2738 dhandle = deflate_decompress_new(DEFLATE_TYPE_ZLIB);
2739
2740 do {
2741 ret = fread(buf, 1, sizeof(buf), fp);
2742 if (ret <= 0) {
2743 deflate_compress_data(chandle, NULL, 0, DEFLATE_END_OF_DATA,
2744 (void **)&outbuf, &outlen);
2745 } else {
2746 dlen += ret;
2747 deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH,
2748 (void **)&outbuf, &outlen);
2749 }
2750 if (outbuf) {
2751 clen += outlen;
2752 err = deflate_decompress_data(dhandle, outbuf, outlen,
2753 (void **)&outbuf2, &outlen2);
2754 sfree(outbuf);
2755 if (outbuf2) {
2756 if (outlen2)
2757 fwrite(outbuf2, 1, outlen2, stdout);
2758 sfree(outbuf2);
2759 }
2760 if (!err && ret <= 0) {
2761 /*
2762 * signal EOF
2763 */
2764 err = deflate_decompress_data(dhandle, NULL, 0,
2765 (void **)&outbuf2, &outlen2);
2766 assert(outbuf2 == NULL);
2767 }
2768 if (err) {
2769 fprintf(stderr, "decoding error: %s\n",
2770 deflate_error_msg[err]);
2771 return 1;
2772 }
2773 }
2774 } while (ret > 0);
2775
2776 fprintf(stderr, "%d plaintext -> %d compressed\n", dlen, clen);
2777
2778 return 0;
2779 }
2780
2781 #endif