7e2417cc |
1 | /* |
2 | * Reimplementation of Deflate (RFC1951) compression. Adapted from |
3 | * the version in PuTTY, and extended to write dynamic Huffman |
4 | * trees and choose block boundaries usefully. |
5 | */ |
6 | |
7 | /* |
8 | * TODO: |
9 | * |
10 | * - Feature: it would probably be useful to add a third format |
11 | * type to read and write actual gzip files. |
12 | * |
13 | * - Feature: the decompress function should return error codes |
14 | * indicating what kind of thing went wrong in a decoding error |
15 | * situation, possibly even including a file pointer. I envisage |
16 | * an enum of error codes in the header file, and one of those |
17 | * nasty preprocessor tricks to permit a user to define a |
18 | * code-to-text mapping array. |
19 | * |
20 | * - Feature: could do with forms of flush other than SYNC_FLUSH. |
21 | * I'm not sure exactly how those work when you don't know in |
22 | * advance that your next block will be static (as we did in |
23 | * PuTTY). And remember the 9-bit limitation of zlib. |
24 | * |
25 | * - Compression quality: introduce the option of choosing a |
26 | * static block instead of a dynamic one, where that's more |
27 | * efficient. |
28 | * |
29 | * - Compression quality: the actual LZ77 engine appears to be |
30 | * unable to track a match going beyond the input data passed to |
31 | * it in a single call. I'd prefer it to be more restartable |
32 | * than that: we ought to be able to pass in our input data in |
33 | * whatever size blocks happen to be convenient and not affect |
34 | * the output at all. |
35 | * |
36 | * - Compression quality: chooseblock() appears to be computing |
37 | * wildly inaccurate block size estimates. Possible resolutions: |
38 | * + find and fix some trivial bug I haven't spotted yet |
39 | * + abandon the entropic approximation and go with trial |
40 | * Huffman runs |
41 | * |
42 | * - Compression quality: see if increasing SYMLIMIT causes |
43 | * dynamic blocks to start being consistently smaller than it. |
44 | * |
45 | * - Compression quality: we ought to be able to fall right back |
46 | * to actual uncompressed blocks if really necessary, though |
47 | * it's not clear what the criterion for doing so would be. |
48 | * |
49 | * - Performance: chooseblock() is currently computing the whole |
50 | * entropic approximation for every possible block size. It |
51 | * ought to be able to update it incrementally as it goes along |
52 | * (assuming of course we don't jack it all in and go for a |
53 | * proper Huffman analysis). |
54 | */ |
55 | |
56 | /* |
57 | * This software is copyright 2000-2006 Simon Tatham. |
58 | * |
59 | * Permission is hereby granted, free of charge, to any person |
60 | * obtaining a copy of this software and associated documentation |
61 | * files (the "Software"), to deal in the Software without |
62 | * restriction, including without limitation the rights to use, |
63 | * copy, modify, merge, publish, distribute, sublicense, and/or |
64 | * sell copies of the Software, and to permit persons to whom the |
65 | * Software is furnished to do so, subject to the following |
66 | * conditions: |
67 | * |
68 | * The above copyright notice and this permission notice shall be |
69 | * included in all copies or substantial portions of the Software. |
70 | * |
71 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
72 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
73 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
74 | * NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE |
75 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
76 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR |
77 | * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
78 | * THE SOFTWARE. |
79 | */ |
80 | |
81 | #include <stdio.h> |
82 | #include <stddef.h> |
83 | #include <string.h> |
84 | #include <stdlib.h> |
85 | #include <assert.h> |
86 | #include <math.h> |
87 | |
88 | #include "deflate.h" |
89 | |
90 | #define snew(type) ( (type *) malloc(sizeof(type)) ) |
91 | #define snewn(n, type) ( (type *) malloc((n) * sizeof(type)) ) |
92 | #define sresize(x, n, type) ( (type *) realloc((x), (n) * sizeof(type)) ) |
93 | #define sfree(x) ( free((x)) ) |
94 | |
95 | #define lenof(x) (sizeof((x)) / sizeof(*(x))) |
96 | |
97 | #if defined TESTDBG |
98 | /* gcc-specific diagnostic macro */ |
99 | #define debug_int(x...) ( fprintf(stderr, x) ) |
100 | #define debug(x) ( debug_int x ) |
101 | #else |
102 | #define debug(x) |
103 | #endif |
104 | |
105 | #ifndef FALSE |
106 | #define FALSE 0 |
107 | #define TRUE (!FALSE) |
108 | #endif |
109 | |
110 | /* ---------------------------------------------------------------------- |
111 | * Basic LZ77 code. This bit is designed modularly, so it could be |
112 | * ripped out and used in a different LZ77 compressor. Go to it, |
113 | * and good luck :-) |
114 | */ |
115 | |
116 | struct LZ77InternalContext; |
117 | struct LZ77Context { |
118 | struct LZ77InternalContext *ictx; |
119 | void *userdata; |
120 | void (*literal) (struct LZ77Context * ctx, unsigned char c); |
121 | void (*match) (struct LZ77Context * ctx, int distance, int len); |
122 | }; |
123 | |
124 | /* |
125 | * Initialise the private fields of an LZ77Context. It's up to the |
126 | * user to initialise the public fields. |
127 | */ |
128 | static int lz77_init(struct LZ77Context *ctx); |
129 | |
130 | /* |
131 | * Supply data to be compressed. Will update the private fields of |
132 | * the LZ77Context, and will call literal() and match() to output. |
133 | * If `compress' is FALSE, it will never emit a match, but will |
134 | * instead call literal() for everything. |
135 | */ |
136 | static void lz77_compress(struct LZ77Context *ctx, |
137 | const unsigned char *data, int len, int compress); |
138 | |
139 | /* |
140 | * Modifiable parameters. |
141 | */ |
142 | #define WINSIZE 32768 /* window size. Must be power of 2! */ |
143 | #define HASHMAX 2039 /* one more than max hash value */ |
144 | #define MAXMATCH 32 /* how many matches we track */ |
145 | #define HASHCHARS 3 /* how many chars make a hash */ |
146 | |
147 | /* |
148 | * This compressor takes a less slapdash approach than the |
149 | * gzip/zlib one. Rather than allowing our hash chains to fall into |
150 | * disuse near the far end, we keep them doubly linked so we can |
151 | * _find_ the far end, and then every time we add a new byte to the |
152 | * window (thus rolling round by one and removing the previous |
153 | * byte), we can carefully remove the hash chain entry. |
154 | */ |
155 | |
156 | #define INVALID -1 /* invalid hash _and_ invalid offset */ |
157 | struct WindowEntry { |
158 | short next, prev; /* array indices within the window */ |
159 | short hashval; |
160 | }; |
161 | |
162 | struct HashEntry { |
163 | short first; /* window index of first in chain */ |
164 | }; |
165 | |
166 | struct Match { |
167 | int distance, len; |
168 | }; |
169 | |
170 | struct LZ77InternalContext { |
171 | struct WindowEntry win[WINSIZE]; |
172 | unsigned char data[WINSIZE]; |
173 | int winpos; |
174 | struct HashEntry hashtab[HASHMAX]; |
175 | unsigned char pending[HASHCHARS]; |
176 | int npending; |
177 | }; |
178 | |
179 | static int lz77_hash(const unsigned char *data) |
180 | { |
181 | return (257 * data[0] + 263 * data[1] + 269 * data[2]) % HASHMAX; |
182 | } |
183 | |
184 | static int lz77_init(struct LZ77Context *ctx) |
185 | { |
186 | struct LZ77InternalContext *st; |
187 | int i; |
188 | |
189 | st = snew(struct LZ77InternalContext); |
190 | if (!st) |
191 | return 0; |
192 | |
193 | ctx->ictx = st; |
194 | |
195 | for (i = 0; i < WINSIZE; i++) |
196 | st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID; |
197 | for (i = 0; i < HASHMAX; i++) |
198 | st->hashtab[i].first = INVALID; |
199 | st->winpos = 0; |
200 | |
201 | st->npending = 0; |
202 | |
203 | return 1; |
204 | } |
205 | |
206 | static void lz77_advance(struct LZ77InternalContext *st, |
207 | unsigned char c, int hash) |
208 | { |
209 | int off; |
210 | |
211 | /* |
212 | * Remove the hash entry at winpos from the tail of its chain, |
213 | * or empty the chain if it's the only thing on the chain. |
214 | */ |
215 | if (st->win[st->winpos].prev != INVALID) { |
216 | st->win[st->win[st->winpos].prev].next = INVALID; |
217 | } else if (st->win[st->winpos].hashval != INVALID) { |
218 | st->hashtab[st->win[st->winpos].hashval].first = INVALID; |
219 | } |
220 | |
221 | /* |
222 | * Create a new entry at winpos and add it to the head of its |
223 | * hash chain. |
224 | */ |
225 | st->win[st->winpos].hashval = hash; |
226 | st->win[st->winpos].prev = INVALID; |
227 | off = st->win[st->winpos].next = st->hashtab[hash].first; |
228 | st->hashtab[hash].first = st->winpos; |
229 | if (off != INVALID) |
230 | st->win[off].prev = st->winpos; |
231 | st->data[st->winpos] = c; |
232 | |
233 | /* |
234 | * Advance the window pointer. |
235 | */ |
236 | st->winpos = (st->winpos + 1) & (WINSIZE - 1); |
237 | } |
238 | |
239 | #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] ) |
240 | |
241 | static void lz77_compress(struct LZ77Context *ctx, |
242 | const unsigned char *data, int len, int compress) |
243 | { |
244 | struct LZ77InternalContext *st = ctx->ictx; |
245 | int i, hash, distance, off, nmatch, matchlen, advance; |
246 | struct Match defermatch, matches[MAXMATCH]; |
247 | int deferchr; |
248 | |
249 | /* |
250 | * Add any pending characters from last time to the window. (We |
251 | * might not be able to.) |
252 | */ |
253 | for (i = 0; i < st->npending; i++) { |
254 | unsigned char foo[HASHCHARS]; |
255 | int j; |
256 | if (len + st->npending - i < HASHCHARS) { |
257 | /* Update the pending array. */ |
258 | for (j = i; j < st->npending; j++) |
259 | st->pending[j - i] = st->pending[j]; |
260 | break; |
261 | } |
262 | for (j = 0; j < HASHCHARS; j++) |
263 | foo[j] = (i + j < st->npending ? st->pending[i + j] : |
264 | data[i + j - st->npending]); |
265 | lz77_advance(st, foo[0], lz77_hash(foo)); |
266 | } |
267 | st->npending -= i; |
268 | |
269 | defermatch.len = 0; |
270 | deferchr = '\0'; |
271 | while (len > 0) { |
272 | |
273 | /* Don't even look for a match, if we're not compressing. */ |
274 | if (compress && len >= HASHCHARS) { |
275 | /* |
276 | * Hash the next few characters. |
277 | */ |
278 | hash = lz77_hash(data); |
279 | |
280 | /* |
281 | * Look the hash up in the corresponding hash chain and see |
282 | * what we can find. |
283 | */ |
284 | nmatch = 0; |
285 | for (off = st->hashtab[hash].first; |
286 | off != INVALID; off = st->win[off].next) { |
287 | /* distance = 1 if off == st->winpos-1 */ |
288 | /* distance = WINSIZE if off == st->winpos */ |
289 | distance = |
290 | WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE; |
291 | for (i = 0; i < HASHCHARS; i++) |
292 | if (CHARAT(i) != CHARAT(i - distance)) |
293 | break; |
294 | if (i == HASHCHARS) { |
295 | matches[nmatch].distance = distance; |
296 | matches[nmatch].len = 3; |
297 | if (++nmatch >= MAXMATCH) |
298 | break; |
299 | } |
300 | } |
301 | } else { |
302 | nmatch = 0; |
303 | hash = INVALID; |
304 | } |
305 | |
306 | if (nmatch > 0) { |
307 | /* |
308 | * We've now filled up matches[] with nmatch potential |
309 | * matches. Follow them down to find the longest. (We |
310 | * assume here that it's always worth favouring a |
311 | * longer match over a shorter one.) |
312 | */ |
313 | matchlen = HASHCHARS; |
314 | while (matchlen < len) { |
315 | int j; |
316 | for (i = j = 0; i < nmatch; i++) { |
317 | if (CHARAT(matchlen) == |
318 | CHARAT(matchlen - matches[i].distance)) { |
319 | matches[j++] = matches[i]; |
320 | } |
321 | } |
322 | if (j == 0) |
323 | break; |
324 | matchlen++; |
325 | nmatch = j; |
326 | } |
327 | |
328 | /* |
329 | * We've now got all the longest matches. We favour the |
330 | * shorter distances, which means we go with matches[0]. |
331 | * So see if we want to defer it or throw it away. |
332 | */ |
333 | matches[0].len = matchlen; |
334 | if (defermatch.len > 0) { |
335 | if (matches[0].len > defermatch.len + 1) { |
336 | /* We have a better match. Emit the deferred char, |
337 | * and defer this match. */ |
338 | ctx->literal(ctx, (unsigned char) deferchr); |
339 | defermatch = matches[0]; |
340 | deferchr = data[0]; |
341 | advance = 1; |
342 | } else { |
343 | /* We don't have a better match. Do the deferred one. */ |
344 | ctx->match(ctx, defermatch.distance, defermatch.len); |
345 | advance = defermatch.len - 1; |
346 | defermatch.len = 0; |
347 | } |
348 | } else { |
349 | /* There was no deferred match. Defer this one. */ |
350 | defermatch = matches[0]; |
351 | deferchr = data[0]; |
352 | advance = 1; |
353 | } |
354 | } else { |
355 | /* |
356 | * We found no matches. Emit the deferred match, if |
357 | * any; otherwise emit a literal. |
358 | */ |
359 | if (defermatch.len > 0) { |
360 | ctx->match(ctx, defermatch.distance, defermatch.len); |
361 | advance = defermatch.len - 1; |
362 | defermatch.len = 0; |
363 | } else { |
364 | ctx->literal(ctx, data[0]); |
365 | advance = 1; |
366 | } |
367 | } |
368 | |
369 | /* |
370 | * Now advance the position by `advance' characters, |
371 | * keeping the window and hash chains consistent. |
372 | */ |
373 | while (advance > 0) { |
374 | if (len >= HASHCHARS) { |
375 | lz77_advance(st, *data, lz77_hash(data)); |
376 | } else { |
377 | st->pending[st->npending++] = *data; |
378 | } |
379 | data++; |
380 | len--; |
381 | advance--; |
382 | } |
383 | } |
384 | } |
385 | |
386 | /* ---------------------------------------------------------------------- |
387 | * Deflate functionality common to both compression and decompression. |
388 | */ |
389 | |
390 | static const unsigned char lenlenmap[] = { |
391 | 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 |
392 | }; |
393 | |
394 | #define MAXCODELEN 16 |
395 | |
396 | /* |
397 | * Given a sequence of Huffman code lengths, compute the actual |
398 | * codes, in the final form suitable for feeding to outbits (i.e. |
399 | * already bit-mirrored). |
400 | * |
401 | * Returns the maximum code length found. |
402 | */ |
403 | static int hufcodes(const unsigned char *lengths, int *codes, int nsyms) |
404 | { |
405 | int count[MAXCODELEN], startcode[MAXCODELEN]; |
406 | int code, maxlen; |
407 | int i, j; |
408 | |
409 | /* Count the codes of each length. */ |
410 | maxlen = 0; |
411 | for (i = 1; i < MAXCODELEN; i++) |
412 | count[i] = 0; |
413 | for (i = 0; i < nsyms; i++) { |
414 | count[lengths[i]]++; |
415 | if (maxlen < lengths[i]) |
416 | maxlen = lengths[i]; |
417 | } |
418 | /* Determine the starting code for each length block. */ |
419 | code = 0; |
420 | for (i = 1; i < MAXCODELEN; i++) { |
421 | startcode[i] = code; |
422 | code += count[i]; |
423 | code <<= 1; |
424 | } |
425 | /* Determine the code for each symbol. Mirrored, of course. */ |
426 | for (i = 0; i < nsyms; i++) { |
427 | code = startcode[lengths[i]]++; |
428 | codes[i] = 0; |
429 | for (j = 0; j < lengths[i]; j++) { |
430 | codes[i] = (codes[i] << 1) | (code & 1); |
431 | code >>= 1; |
432 | } |
433 | } |
434 | |
435 | return maxlen; |
436 | } |
437 | |
438 | /* ---------------------------------------------------------------------- |
439 | * Deflate compression. |
440 | */ |
441 | |
442 | #define SYMLIMIT 65536 |
443 | #define SYMPFX_LITLEN 0x00000000U |
444 | #define SYMPFX_DIST 0x40000000U |
445 | #define SYMPFX_EXTRABITS 0x80000000U |
446 | #define SYMPFX_CODELEN 0xC0000000U |
447 | #define SYMPFX_MASK 0xC0000000U |
448 | |
449 | #define SYM_EXTRABITS_MASK 0x3C000000U |
450 | #define SYM_EXTRABITS_SHIFT 26 |
451 | |
452 | struct deflate_compress_ctx { |
453 | struct LZ77Context *lzc; |
454 | unsigned char *outbuf; |
455 | int outlen, outsize; |
456 | unsigned long outbits; |
457 | int noutbits; |
458 | int firstblock; |
459 | unsigned long *syms; |
460 | int symstart, nsyms; |
461 | int type; |
462 | unsigned long adler32; |
463 | int lastblock; |
464 | int finished; |
465 | #ifdef STATISTICS |
466 | unsigned long bitcount; |
467 | #endif |
468 | }; |
469 | |
470 | static void outbits(deflate_compress_ctx *out, |
471 | unsigned long bits, int nbits) |
472 | { |
473 | assert(out->noutbits + nbits <= 32); |
474 | out->outbits |= bits << out->noutbits; |
475 | out->noutbits += nbits; |
476 | while (out->noutbits >= 8) { |
477 | if (out->outlen >= out->outsize) { |
478 | out->outsize = out->outlen + 64; |
479 | out->outbuf = sresize(out->outbuf, out->outsize, unsigned char); |
480 | } |
481 | out->outbuf[out->outlen++] = (unsigned char) (out->outbits & 0xFF); |
482 | out->outbits >>= 8; |
483 | out->noutbits -= 8; |
484 | } |
485 | #ifdef STATISTICS |
486 | out->bitcount += nbits; |
487 | #endif |
488 | } |
489 | |
490 | /* |
491 | * Binary heap functions used by buildhuf(). Each one assumes the |
492 | * heap to be stored in an array of ints, with two ints per node |
493 | * (user data and key). They take in the old heap length, and |
494 | * return the new one. |
495 | */ |
496 | #define HEAPPARENT(x) (((x)-2)/4*2) |
497 | #define HEAPLEFT(x) ((x)*2+2) |
498 | #define HEAPRIGHT(x) ((x)*2+4) |
499 | static int addheap(int *heap, int len, int userdata, int key) |
500 | { |
501 | int me, dad, tmp; |
502 | |
503 | me = len; |
504 | heap[len++] = userdata; |
505 | heap[len++] = key; |
506 | |
507 | while (me > 0) { |
508 | dad = HEAPPARENT(me); |
509 | if (heap[me+1] < heap[dad+1]) { |
510 | tmp = heap[me]; heap[me] = heap[dad]; heap[dad] = tmp; |
511 | tmp = heap[me+1]; heap[me+1] = heap[dad+1]; heap[dad+1] = tmp; |
512 | me = dad; |
513 | } else |
514 | break; |
515 | } |
516 | |
517 | return len; |
518 | } |
519 | static int rmheap(int *heap, int len, int *userdata, int *key) |
520 | { |
521 | int me, lc, rc, c, tmp; |
522 | |
523 | len -= 2; |
524 | *userdata = heap[0]; |
525 | *key = heap[1]; |
526 | heap[0] = heap[len]; |
527 | heap[1] = heap[len+1]; |
528 | |
529 | me = 0; |
530 | |
531 | while (1) { |
532 | lc = HEAPLEFT(me); |
533 | rc = HEAPRIGHT(me); |
534 | if (lc >= len) |
535 | break; |
536 | else if (rc >= len || heap[lc+1] < heap[rc+1]) |
537 | c = lc; |
538 | else |
539 | c = rc; |
540 | if (heap[me+1] > heap[c+1]) { |
541 | tmp = heap[me]; heap[me] = heap[c]; heap[c] = tmp; |
542 | tmp = heap[me+1]; heap[me+1] = heap[c+1]; heap[c+1] = tmp; |
543 | } else |
544 | break; |
545 | me = c; |
546 | } |
547 | |
548 | return len; |
549 | } |
550 | |
551 | /* |
552 | * The core of the Huffman algorithm: takes an input array of |
553 | * symbol frequencies, and produces an output array of code |
554 | * lengths. |
555 | * |
556 | * This is basically a generic Huffman implementation, but it has |
557 | * one zlib-related quirk which is that it caps the output code |
558 | * lengths to fit in an unsigned char (which is safe since Deflate |
559 | * will reject anything longer than 15 anyway). Anyone wanting to |
560 | * rip it out and use it in another context should find that easy |
561 | * to remove. |
562 | */ |
563 | #define HUFMAX 286 |
564 | static void buildhuf(const int *freqs, unsigned char *lengths, int nsyms) |
565 | { |
566 | int parent[2*HUFMAX-1]; |
567 | int length[2*HUFMAX-1]; |
568 | int heap[2*HUFMAX]; |
569 | int heapsize; |
570 | int i, j, n; |
571 | int si, sj; |
572 | |
573 | assert(nsyms <= HUFMAX); |
574 | |
575 | memset(parent, 0, sizeof(parent)); |
576 | |
577 | /* |
578 | * Begin by building the heap. |
579 | */ |
580 | heapsize = 0; |
581 | for (i = 0; i < nsyms; i++) |
582 | if (freqs[i] > 0) /* leave unused symbols out totally */ |
583 | heapsize = addheap(heap, heapsize, i, freqs[i]); |
584 | |
585 | /* |
586 | * Now repeatedly take two elements off the heap and merge |
587 | * them. |
588 | */ |
589 | n = HUFMAX; |
590 | while (heapsize > 2) { |
591 | heapsize = rmheap(heap, heapsize, &i, &si); |
592 | heapsize = rmheap(heap, heapsize, &j, &sj); |
593 | parent[i] = n; |
594 | parent[j] = n; |
595 | heapsize = addheap(heap, heapsize, n, si + sj); |
596 | n++; |
597 | } |
598 | |
599 | /* |
600 | * Now we have our tree, in the form of a link from each node |
601 | * to the index of its parent. Count back down the tree to |
602 | * determine the code lengths. |
603 | */ |
604 | memset(length, 0, sizeof(length)); |
605 | /* The tree root has length 0 after that, which is correct. */ |
606 | for (i = n-1; i-- ;) |
607 | if (parent[i] > 0) |
608 | length[i] = 1 + length[parent[i]]; |
609 | |
610 | /* |
611 | * And that's it. (Simple, wasn't it?) Copy the lengths into |
612 | * the output array and leave. |
613 | * |
614 | * Here we cap lengths to fit in unsigned char. |
615 | */ |
616 | for (i = 0; i < nsyms; i++) |
617 | lengths[i] = (length[i] > 255 ? 255 : length[i]); |
618 | } |
619 | |
620 | /* |
621 | * Wrapper around buildhuf() which enforces the Deflate restriction |
622 | * that no code length may exceed 15 bits, or 7 for the auxiliary |
623 | * code length alphabet. This function has the same calling |
624 | * semantics as buildhuf(), except that it might modify the freqs |
625 | * array. |
626 | */ |
627 | static void deflate_buildhuf(int *freqs, unsigned char *lengths, |
628 | int nsyms, int limit) |
629 | { |
630 | int smallestfreq, totalfreq, nactivesyms; |
631 | int num, denom, adjust; |
632 | int i; |
633 | int maxprob; |
634 | |
635 | /* |
636 | * First, try building the Huffman table the normal way. If |
637 | * this works, it's optimal, so we don't want to mess with it. |
638 | */ |
639 | buildhuf(freqs, lengths, nsyms); |
640 | |
641 | for (i = 0; i < nsyms; i++) |
642 | if (lengths[i] > limit) |
643 | break; |
644 | |
645 | if (i == nsyms) |
646 | return; /* OK */ |
647 | |
648 | /* |
649 | * The Huffman algorithm can only ever generate a code length |
650 | * of N bits or more if there is a symbol whose probability is |
651 | * less than the reciprocal of the (N+2)th Fibonacci number |
652 | * (counting from F_0=0 and F_1=1), i.e. 1/2584 for N=16, or |
653 | * 1/55 for N=8. (This is a necessary though not sufficient |
654 | * condition.) |
655 | * |
656 | * Why is this? Well, consider the input symbol with the |
657 | * smallest probability. Let that probability be x. In order |
658 | * for this symbol to have a code length of at least 1, the |
659 | * Huffman algorithm will have to merge it with some other |
660 | * node; and since x is the smallest probability, the node it |
661 | * gets merged with must be at least x. Thus, the probability |
662 | * of the resulting combined node will be at least 2x. Now in |
663 | * order for our node to reach depth 2, this 2x-node must be |
664 | * merged again. But what with? We can't assume the node it |
665 | * merges with is at least 2x, because this one might only be |
666 | * the _second_ smallest remaining node. But we do know the |
667 | * node it merges with must be at least x, so our order-2 |
668 | * internal node is at least 3x. |
669 | * |
670 | * How small a node can merge with _that_ to get an order-3 |
671 | * internal node? Well, it must be at least 2x, because if it |
672 | * was smaller than that then it would have been one of the two |
673 | * smallest nodes in the previous step and been merged at that |
674 | * point. So at least 3x, plus at least 2x, comes to at least |
675 | * 5x for an order-3 node. |
676 | * |
677 | * And so it goes on: at every stage we must merge our current |
678 | * node with a node at least as big as the bigger of this one's |
679 | * two parents, and from this starting point that gives rise to |
680 | * the Fibonacci sequence. So we find that in order to have a |
681 | * node n levels deep (i.e. a maximum code length of n), the |
682 | * overall probability of the root of the entire tree must be |
683 | * at least F_{n+2} times the probability of the rarest symbol. |
684 | * In other words, since the overall probability is 1, it is a |
685 | * necessary condition for a code length of 16 or more that |
686 | * there must be at least one symbol with probability <= |
687 | * 1/F_18. |
688 | * |
689 | * (To demonstrate that a probability this big really can give |
690 | * rise to a code length of 16, consider the set of input |
691 | * frequencies { 1-epsilon, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, |
692 | * 89, 144, 233, 377, 610, 987 }, for arbitrarily small |
693 | * epsilon.) |
694 | * |
695 | * So here buildhuf() has returned us an overlong code. So to |
696 | * ensure it doesn't do it again, we add a constant to all the |
697 | * (non-zero) symbol frequencies, causing them to become more |
698 | * balanced and removing the danger. We can then feed the |
699 | * results back to the standard buildhuf() and be |
700 | * assert()-level confident that the resulting code lengths |
701 | * contain nothing outside the permitted range. |
702 | */ |
703 | maxprob = (limit == 16 ? 2584 : 55); /* no point in computing full F_n */ |
704 | totalfreq = nactivesyms = 0; |
705 | smallestfreq = -1; |
706 | for (i = 0; i < nsyms; i++) { |
707 | if (freqs[i] == 0) |
708 | continue; |
709 | if (smallestfreq < 0 || smallestfreq > freqs[i]) |
710 | smallestfreq = freqs[i]; |
711 | totalfreq += freqs[i]; |
712 | nactivesyms++; |
713 | } |
714 | assert(smallestfreq <= totalfreq / maxprob); |
715 | |
716 | /* |
717 | * We want to find the smallest integer `adjust' such that |
718 | * (totalfreq + nactivesyms * adjust) / (smallestfreq + |
719 | * adjust) is less than maxprob. A bit of algebra tells us |
720 | * that the threshold value is equal to |
721 | * |
722 | * totalfreq - maxprob * smallestfreq |
723 | * ---------------------------------- |
724 | * maxprob - nactivesyms |
725 | * |
726 | * rounded up, of course. And we'll only even be trying |
727 | * this if |
728 | */ |
729 | num = totalfreq - smallestfreq * maxprob; |
730 | denom = maxprob - nactivesyms; |
731 | adjust = (num + denom - 1) / denom; |
732 | |
733 | /* |
734 | * Now add `adjust' to all the input symbol frequencies. |
735 | */ |
736 | for (i = 0; i < nsyms; i++) |
737 | if (freqs[i] != 0) |
738 | freqs[i] += adjust; |
739 | |
740 | /* |
741 | * Rebuild the Huffman tree... |
742 | */ |
743 | buildhuf(freqs, lengths, nsyms); |
744 | |
745 | /* |
746 | * ... and this time it ought to be OK. |
747 | */ |
748 | for (i = 0; i < nsyms; i++) |
749 | assert(lengths[i] <= limit); |
750 | } |
751 | |
752 | struct huftrees { |
753 | unsigned char *len_litlen; |
754 | int *code_litlen; |
755 | unsigned char *len_dist; |
756 | int *code_dist; |
757 | unsigned char *len_codelen; |
758 | int *code_codelen; |
759 | }; |
760 | |
761 | /* |
762 | * Write out a single symbol, given the three Huffman trees. |
763 | */ |
764 | static void writesym(deflate_compress_ctx *out, |
765 | unsigned sym, struct huftrees *trees) |
766 | { |
767 | unsigned basesym = sym &~ SYMPFX_MASK; |
768 | int i; |
769 | |
770 | switch (sym & SYMPFX_MASK) { |
771 | case SYMPFX_LITLEN: |
772 | debug(("send: litlen %d\n", basesym)); |
773 | outbits(out, trees->code_litlen[basesym], trees->len_litlen[basesym]); |
774 | break; |
775 | case SYMPFX_DIST: |
776 | debug(("send: dist %d\n", basesym)); |
777 | outbits(out, trees->code_dist[basesym], trees->len_dist[basesym]); |
778 | break; |
779 | case SYMPFX_CODELEN: |
780 | debug(("send: codelen %d\n", basesym)); |
781 | outbits(out, trees->code_codelen[basesym],trees->len_codelen[basesym]); |
782 | break; |
783 | case SYMPFX_EXTRABITS: |
784 | i = basesym >> SYM_EXTRABITS_SHIFT; |
785 | basesym &= ~SYM_EXTRABITS_MASK; |
786 | debug(("send: extrabits %d/%d\n", basesym, i)); |
787 | outbits(out, basesym, i); |
788 | break; |
789 | } |
790 | } |
791 | |
792 | static void outblock(deflate_compress_ctx *out, |
793 | int blklen, int dynamic) |
794 | { |
795 | int freqs1[286], freqs2[30], freqs3[19]; |
796 | unsigned char len1[286], len2[30], len3[19]; |
797 | int code1[286], code2[30], code3[19]; |
798 | int hlit, hdist, hclen, bfinal, btype; |
799 | int treesrc[286 + 30]; |
800 | int treesyms[286 + 30]; |
801 | int codelen[19]; |
802 | int i, ntreesrc, ntreesyms; |
803 | struct huftrees ht; |
804 | #ifdef STATISTICS |
805 | unsigned long bitcount_before; |
806 | #endif |
807 | |
808 | ht.len_litlen = len1; |
809 | ht.len_dist = len2; |
810 | ht.len_codelen = len3; |
811 | ht.code_litlen = code1; |
812 | ht.code_dist = code2; |
813 | ht.code_codelen = code3; |
814 | |
815 | /* |
816 | * Build the two main Huffman trees. |
817 | */ |
818 | if (dynamic) { |
819 | /* |
820 | * Count up the frequency tables. |
821 | */ |
822 | memset(freqs1, 0, sizeof(freqs1)); |
823 | memset(freqs2, 0, sizeof(freqs2)); |
824 | freqs1[256] = 1; /* we're bound to need one EOB */ |
825 | for (i = 0; i < blklen; i++) { |
826 | unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT]; |
827 | |
828 | /* |
829 | * Increment the occurrence counter for this symbol, if |
830 | * it's in one of the Huffman alphabets and isn't extra |
831 | * bits. |
832 | */ |
833 | if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) { |
834 | sym &= ~SYMPFX_MASK; |
835 | assert(sym < lenof(freqs1)); |
836 | freqs1[sym]++; |
837 | } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) { |
838 | sym &= ~SYMPFX_MASK; |
839 | assert(sym < lenof(freqs2)); |
840 | freqs2[sym]++; |
841 | } |
842 | } |
843 | deflate_buildhuf(freqs1, len1, lenof(freqs1), 15); |
844 | deflate_buildhuf(freqs2, len2, lenof(freqs2), 15); |
845 | } else { |
846 | /* |
847 | * Fixed static trees. |
848 | */ |
849 | for (i = 0; i < lenof(len1); i++) |
850 | len1[i] = (i < 144 ? 8 : |
851 | i < 256 ? 9 : |
852 | i < 280 ? 7 : 8); |
853 | for (i = 0; i < lenof(len2); i++) |
854 | len2[i] = 5; |
855 | } |
856 | hufcodes(len1, code1, lenof(freqs1)); |
857 | hufcodes(len2, code2, lenof(freqs2)); |
858 | |
859 | if (dynamic) { |
860 | /* |
861 | * Determine HLIT and HDIST. |
862 | */ |
863 | for (hlit = 286; hlit > 257 && len1[hlit-1] == 0; hlit--); |
864 | for (hdist = 30; hdist > 1 && len2[hdist-1] == 0; hdist--); |
865 | |
866 | /* |
867 | * Write out the list of symbols used to transmit the |
868 | * trees. |
869 | */ |
870 | ntreesrc = 0; |
871 | for (i = 0; i < hlit; i++) |
872 | treesrc[ntreesrc++] = len1[i]; |
873 | for (i = 0; i < hdist; i++) |
874 | treesrc[ntreesrc++] = len2[i]; |
875 | ntreesyms = 0; |
876 | for (i = 0; i < ntreesrc ;) { |
877 | int j = 1; |
878 | int k; |
879 | |
880 | /* Find length of run of the same length code. */ |
881 | while (i+j < ntreesrc && treesrc[i+j] == treesrc[i]) |
882 | j++; |
883 | |
884 | /* Encode that run as economically as we can. */ |
885 | k = j; |
886 | if (treesrc[i] == 0) { |
887 | /* |
888 | * Zero code length: we can output run codes for |
889 | * 3-138 zeroes. So if we have fewer than 3 zeroes, |
890 | * we just output literals. Otherwise, we output |
891 | * nothing but run codes, and tweak their lengths |
892 | * to make sure we aren't left with under 3 at the |
893 | * end. |
894 | */ |
895 | if (k < 3) { |
896 | while (k--) |
897 | treesyms[ntreesyms++] = 0 | SYMPFX_CODELEN; |
898 | } else { |
899 | while (k > 0) { |
900 | int rpt = (k < 138 ? k : 138); |
901 | if (rpt > k-3 && rpt < k) |
902 | rpt = k-3; |
903 | assert(rpt >= 3 && rpt <= 138); |
904 | if (rpt < 11) { |
905 | treesyms[ntreesyms++] = 17 | SYMPFX_CODELEN; |
906 | treesyms[ntreesyms++] = |
907 | (SYMPFX_EXTRABITS | (rpt - 3) | |
908 | (3 << SYM_EXTRABITS_SHIFT)); |
909 | } else { |
910 | treesyms[ntreesyms++] = 18 | SYMPFX_CODELEN; |
911 | treesyms[ntreesyms++] = |
912 | (SYMPFX_EXTRABITS | (rpt - 11) | |
913 | (7 << SYM_EXTRABITS_SHIFT)); |
914 | } |
915 | k -= rpt; |
916 | } |
917 | } |
918 | } else { |
919 | /* |
920 | * Non-zero code length: we must output the first |
921 | * one explicitly, then we can output a copy code |
922 | * for 3-6 repeats. So if we have fewer than 4 |
923 | * repeats, we _just_ output literals. Otherwise, |
924 | * we output one literal plus at least one copy |
925 | * code, and tweak the copy codes to make sure we |
926 | * aren't left with under 3 at the end. |
927 | */ |
928 | assert(treesrc[i] < 16); |
929 | treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN; |
930 | k--; |
931 | if (k < 3) { |
932 | while (k--) |
933 | treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN; |
934 | } else { |
935 | while (k > 0) { |
936 | int rpt = (k < 6 ? k : 6); |
937 | if (rpt > k-3 && rpt < k) |
938 | rpt = k-3; |
939 | assert(rpt >= 3 && rpt <= 6); |
940 | treesyms[ntreesyms++] = 16 | SYMPFX_CODELEN; |
941 | treesyms[ntreesyms++] = (SYMPFX_EXTRABITS | (rpt - 3) | |
942 | (2 << SYM_EXTRABITS_SHIFT)); |
943 | k -= rpt; |
944 | } |
945 | } |
946 | } |
947 | |
948 | i += j; |
949 | } |
950 | assert((unsigned)ntreesyms < lenof(treesyms)); |
951 | |
952 | /* |
953 | * Count up the frequency table for the tree-transmission |
954 | * symbols, and build the auxiliary Huffman tree for that. |
955 | */ |
956 | memset(freqs3, 0, sizeof(freqs3)); |
957 | for (i = 0; i < ntreesyms; i++) { |
958 | unsigned sym = treesyms[i]; |
959 | |
960 | /* |
961 | * Increment the occurrence counter for this symbol, if |
962 | * it's the Huffman alphabet and isn't extra bits. |
963 | */ |
964 | if ((sym & SYMPFX_MASK) == SYMPFX_CODELEN) { |
965 | sym &= ~SYMPFX_MASK; |
966 | assert(sym < lenof(freqs3)); |
967 | freqs3[sym]++; |
968 | } |
969 | } |
970 | deflate_buildhuf(freqs3, len3, lenof(freqs3), 7); |
971 | hufcodes(len3, code3, lenof(freqs3)); |
972 | |
973 | /* |
974 | * Reorder the code length codes into transmission order, and |
975 | * determine HCLEN. |
976 | */ |
977 | for (i = 0; i < 19; i++) |
978 | codelen[i] = len3[lenlenmap[i]]; |
979 | for (hclen = 19; hclen > 4 && codelen[hclen-1] == 0; hclen--); |
980 | } |
981 | |
982 | /* |
983 | * Actually transmit the block. |
984 | */ |
985 | |
986 | /* 3-bit block header */ |
987 | bfinal = (out->lastblock ? 1 : 0); |
988 | btype = dynamic ? 2 : 1; |
989 | debug(("send: bfinal=%d btype=%d\n", bfinal, btype)); |
990 | outbits(out, bfinal, 1); |
991 | outbits(out, btype, 2); |
992 | |
993 | #ifdef STATISTICS |
994 | bitcount_before = out->bitcount; |
995 | #endif |
996 | |
997 | if (dynamic) { |
998 | /* HLIT, HDIST and HCLEN */ |
999 | debug(("send: hlit=%d hdist=%d hclen=%d\n", hlit, hdist, hclen)); |
1000 | outbits(out, hlit - 257, 5); |
1001 | outbits(out, hdist - 1, 5); |
1002 | outbits(out, hclen - 4, 4); |
1003 | |
1004 | /* Code lengths for the auxiliary tree */ |
1005 | for (i = 0; i < hclen; i++) { |
1006 | debug(("send: lenlen %d\n", codelen[i])); |
1007 | outbits(out, codelen[i], 3); |
1008 | } |
1009 | |
1010 | /* Code lengths for the literal/length and distance trees */ |
1011 | for (i = 0; i < ntreesyms; i++) |
1012 | writesym(out, treesyms[i], &ht); |
1013 | #ifdef STATISTICS |
1014 | fprintf(stderr, "total tree size %lu bits\n", |
1015 | out->bitcount - bitcount_before); |
1016 | #endif |
1017 | } |
1018 | |
1019 | /* Output the actual symbols from the buffer */ |
1020 | for (i = 0; i < blklen; i++) { |
1021 | unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT]; |
1022 | writesym(out, sym, &ht); |
1023 | } |
1024 | |
1025 | /* Output the end-of-data symbol */ |
1026 | writesym(out, SYMPFX_LITLEN | 256, &ht); |
1027 | |
1028 | /* |
1029 | * Remove all the just-output symbols from the symbol buffer by |
1030 | * adjusting symstart and nsyms. |
1031 | */ |
1032 | out->symstart = (out->symstart + blklen) % SYMLIMIT; |
1033 | out->nsyms -= blklen; |
1034 | } |
1035 | |
1036 | static void outblock_wrapper(deflate_compress_ctx *out, |
1037 | int best_dynamic_len) |
1038 | { |
1039 | /* |
1040 | * Final block choice function: we have the option of either |
1041 | * outputting a dynamic block of length best_dynamic_len, or a |
1042 | * static block of length out->nsyms. Whichever gives us the |
1043 | * best value for money, we do. |
1044 | * |
1045 | * FIXME: currently we always choose dynamic except for empty |
1046 | * blocks. We should make a sensible judgment. |
1047 | */ |
1048 | if (out->nsyms == 0) |
1049 | outblock(out, 0, FALSE); |
1050 | else |
1051 | outblock(out, best_dynamic_len, TRUE); |
1052 | } |
1053 | |
1054 | static void chooseblock(deflate_compress_ctx *out) |
1055 | { |
1056 | int freqs1[286], freqs2[30]; |
1057 | int i, bestlen; |
1058 | double bestvfm; |
1059 | int nextrabits; |
1060 | |
1061 | memset(freqs1, 0, sizeof(freqs1)); |
1062 | memset(freqs2, 0, sizeof(freqs2)); |
1063 | freqs1[256] = 1; /* we're bound to need one EOB */ |
1064 | nextrabits = 0; |
1065 | |
1066 | /* |
1067 | * Iterate over all possible block lengths, computing the |
1068 | * entropic coding approximation to the final length at every |
1069 | * stage. We divide the result by the number of symbols |
1070 | * encoded, to determine the `value for money' (overall |
1071 | * bits-per-symbol count) of a block of that length. |
1072 | */ |
1073 | bestlen = -1; |
1074 | bestvfm = 0.0; |
1075 | for (i = 0; i < out->nsyms; i++) { |
1076 | unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT]; |
1077 | |
1078 | if (i > 0 && (sym & SYMPFX_MASK) == SYMPFX_LITLEN) { |
1079 | /* |
1080 | * This is a viable point at which to end the block. |
1081 | * Compute the length approximation and hence the value |
1082 | * for money. |
1083 | */ |
1084 | double len = 0.0, vfm; |
1085 | int k; |
1086 | int total; |
1087 | |
1088 | /* |
1089 | * FIXME: we should be doing this incrementally, rather |
1090 | * than recomputing the whole thing at every byte |
1091 | * position. Also, can we fiddle the logs somehow to |
1092 | * avoid having to do floating point? |
1093 | */ |
1094 | total = 0; |
1095 | for (k = 0; k < (int)lenof(freqs1); k++) { |
1096 | if (freqs1[k]) |
1097 | len -= freqs1[k] * log(freqs1[k]); |
1098 | total += freqs1[k]; |
1099 | } |
1100 | if (total) |
1101 | len += total * log(total); |
1102 | total = 0; |
1103 | for (k = 0; k < (int)lenof(freqs2); k++) { |
1104 | if (freqs2[k]) |
1105 | len -= freqs2[k] * log(freqs2[k]); |
1106 | total += freqs2[k]; |
1107 | } |
1108 | if (total) |
1109 | len += total * log(total); |
1110 | len /= log(2); |
1111 | len += nextrabits; |
1112 | len += 300; /* very approximate size of the Huffman trees */ |
1113 | |
1114 | vfm = i / len; /* symbols encoded per bit */ |
1115 | /* fprintf(stderr, "chooseblock: i=%d gives len %g, vfm %g\n", i, len, vfm); */ |
1116 | if (bestlen < 0 || vfm > bestvfm) { |
1117 | bestlen = i; |
1118 | bestvfm = vfm; |
1119 | } |
1120 | } |
1121 | |
1122 | /* |
1123 | * Increment the occurrence counter for this symbol, if |
1124 | * it's in one of the Huffman alphabets and isn't extra |
1125 | * bits. |
1126 | */ |
1127 | if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) { |
1128 | sym &= ~SYMPFX_MASK; |
1129 | assert(sym < lenof(freqs1)); |
1130 | freqs1[sym]++; |
1131 | } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) { |
1132 | sym &= ~SYMPFX_MASK; |
1133 | assert(sym < lenof(freqs2)); |
1134 | freqs2[sym]++; |
1135 | } else if ((sym & SYMPFX_MASK) == SYMPFX_EXTRABITS) { |
1136 | nextrabits += (sym &~ SYMPFX_MASK) >> SYM_EXTRABITS_SHIFT; |
1137 | } |
1138 | } |
1139 | |
1140 | assert(bestlen > 0); |
1141 | |
1142 | /* fprintf(stderr, "chooseblock: bestlen %d, bestvfm %g\n", bestlen, bestvfm); */ |
1143 | outblock_wrapper(out, bestlen); |
1144 | } |
1145 | |
1146 | /* |
1147 | * Force the current symbol buffer to be flushed out as a single |
1148 | * block. |
1149 | */ |
1150 | static void flushblock(deflate_compress_ctx *out) |
1151 | { |
1152 | /* |
1153 | * Because outblock_wrapper guarantees to output either a |
1154 | * dynamic block of the given length or a static block of |
1155 | * length out->nsyms, we know that passing out->nsyms as the |
1156 | * given length will definitely result in us using up the |
1157 | * entire buffer. |
1158 | */ |
1159 | outblock_wrapper(out, out->nsyms); |
1160 | assert(out->nsyms == 0); |
1161 | } |
1162 | |
1163 | /* |
1164 | * Place a symbol into the symbols buffer. |
1165 | */ |
1166 | static void outsym(deflate_compress_ctx *out, unsigned long sym) |
1167 | { |
1168 | assert(out->nsyms < SYMLIMIT); |
1169 | out->syms[(out->symstart + out->nsyms++) % SYMLIMIT] = sym; |
1170 | |
1171 | if (out->nsyms == SYMLIMIT) |
1172 | chooseblock(out); |
1173 | } |
1174 | |
1175 | typedef struct { |
1176 | short code, extrabits; |
1177 | int min, max; |
1178 | } coderecord; |
1179 | |
1180 | static const coderecord lencodes[] = { |
1181 | {257, 0, 3, 3}, |
1182 | {258, 0, 4, 4}, |
1183 | {259, 0, 5, 5}, |
1184 | {260, 0, 6, 6}, |
1185 | {261, 0, 7, 7}, |
1186 | {262, 0, 8, 8}, |
1187 | {263, 0, 9, 9}, |
1188 | {264, 0, 10, 10}, |
1189 | {265, 1, 11, 12}, |
1190 | {266, 1, 13, 14}, |
1191 | {267, 1, 15, 16}, |
1192 | {268, 1, 17, 18}, |
1193 | {269, 2, 19, 22}, |
1194 | {270, 2, 23, 26}, |
1195 | {271, 2, 27, 30}, |
1196 | {272, 2, 31, 34}, |
1197 | {273, 3, 35, 42}, |
1198 | {274, 3, 43, 50}, |
1199 | {275, 3, 51, 58}, |
1200 | {276, 3, 59, 66}, |
1201 | {277, 4, 67, 82}, |
1202 | {278, 4, 83, 98}, |
1203 | {279, 4, 99, 114}, |
1204 | {280, 4, 115, 130}, |
1205 | {281, 5, 131, 162}, |
1206 | {282, 5, 163, 194}, |
1207 | {283, 5, 195, 226}, |
1208 | {284, 5, 227, 257}, |
1209 | {285, 0, 258, 258}, |
1210 | }; |
1211 | |
1212 | static const coderecord distcodes[] = { |
1213 | {0, 0, 1, 1}, |
1214 | {1, 0, 2, 2}, |
1215 | {2, 0, 3, 3}, |
1216 | {3, 0, 4, 4}, |
1217 | {4, 1, 5, 6}, |
1218 | {5, 1, 7, 8}, |
1219 | {6, 2, 9, 12}, |
1220 | {7, 2, 13, 16}, |
1221 | {8, 3, 17, 24}, |
1222 | {9, 3, 25, 32}, |
1223 | {10, 4, 33, 48}, |
1224 | {11, 4, 49, 64}, |
1225 | {12, 5, 65, 96}, |
1226 | {13, 5, 97, 128}, |
1227 | {14, 6, 129, 192}, |
1228 | {15, 6, 193, 256}, |
1229 | {16, 7, 257, 384}, |
1230 | {17, 7, 385, 512}, |
1231 | {18, 8, 513, 768}, |
1232 | {19, 8, 769, 1024}, |
1233 | {20, 9, 1025, 1536}, |
1234 | {21, 9, 1537, 2048}, |
1235 | {22, 10, 2049, 3072}, |
1236 | {23, 10, 3073, 4096}, |
1237 | {24, 11, 4097, 6144}, |
1238 | {25, 11, 6145, 8192}, |
1239 | {26, 12, 8193, 12288}, |
1240 | {27, 12, 12289, 16384}, |
1241 | {28, 13, 16385, 24576}, |
1242 | {29, 13, 24577, 32768}, |
1243 | }; |
1244 | |
1245 | static void literal(struct LZ77Context *ectx, unsigned char c) |
1246 | { |
1247 | deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata; |
1248 | |
1249 | outsym(out, SYMPFX_LITLEN | c); |
1250 | } |
1251 | |
1252 | static void match(struct LZ77Context *ectx, int distance, int len) |
1253 | { |
1254 | const coderecord *d, *l; |
1255 | int i, j, k; |
1256 | deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata; |
1257 | |
1258 | while (len > 0) { |
1259 | int thislen; |
1260 | |
1261 | /* |
1262 | * We can transmit matches of lengths 3 through 258 |
1263 | * inclusive. So if len exceeds 258, we must transmit in |
1264 | * several steps, with 258 or less in each step. |
1265 | * |
1266 | * Specifically: if len >= 261, we can transmit 258 and be |
1267 | * sure of having at least 3 left for the next step. And if |
1268 | * len <= 258, we can just transmit len. But if len == 259 |
1269 | * or 260, we must transmit len-3. |
1270 | */ |
1271 | thislen = (len > 260 ? 258 : len <= 258 ? len : len - 3); |
1272 | len -= thislen; |
1273 | |
1274 | /* |
1275 | * Binary-search to find which length code we're |
1276 | * transmitting. |
1277 | */ |
1278 | i = -1; |
1279 | j = sizeof(lencodes) / sizeof(*lencodes); |
1280 | while (1) { |
1281 | assert(j - i >= 2); |
1282 | k = (j + i) / 2; |
1283 | if (thislen < lencodes[k].min) |
1284 | j = k; |
1285 | else if (thislen > lencodes[k].max) |
1286 | i = k; |
1287 | else { |
1288 | l = &lencodes[k]; |
1289 | break; /* found it! */ |
1290 | } |
1291 | } |
1292 | |
1293 | /* |
1294 | * Transmit the length code. |
1295 | */ |
1296 | outsym(out, SYMPFX_LITLEN | l->code); |
1297 | |
1298 | /* |
1299 | * Transmit the extra bits. |
1300 | */ |
1301 | if (l->extrabits) { |
1302 | outsym(out, (SYMPFX_EXTRABITS | (thislen - l->min) | |
1303 | (l->extrabits << SYM_EXTRABITS_SHIFT))); |
1304 | } |
1305 | |
1306 | /* |
1307 | * Binary-search to find which distance code we're |
1308 | * transmitting. |
1309 | */ |
1310 | i = -1; |
1311 | j = sizeof(distcodes) / sizeof(*distcodes); |
1312 | while (1) { |
1313 | assert(j - i >= 2); |
1314 | k = (j + i) / 2; |
1315 | if (distance < distcodes[k].min) |
1316 | j = k; |
1317 | else if (distance > distcodes[k].max) |
1318 | i = k; |
1319 | else { |
1320 | d = &distcodes[k]; |
1321 | break; /* found it! */ |
1322 | } |
1323 | } |
1324 | |
1325 | /* |
1326 | * Write the distance code. |
1327 | */ |
1328 | outsym(out, SYMPFX_DIST | d->code); |
1329 | |
1330 | /* |
1331 | * Transmit the extra bits. |
1332 | */ |
1333 | if (d->extrabits) { |
1334 | outsym(out, (SYMPFX_EXTRABITS | (distance - d->min) | |
1335 | (d->extrabits << SYM_EXTRABITS_SHIFT))); |
1336 | } |
1337 | } |
1338 | } |
1339 | |
1340 | deflate_compress_ctx *deflate_compress_new(int type) |
1341 | { |
1342 | deflate_compress_ctx *out; |
1343 | struct LZ77Context *ectx = snew(struct LZ77Context); |
1344 | |
1345 | lz77_init(ectx); |
1346 | ectx->literal = literal; |
1347 | ectx->match = match; |
1348 | |
1349 | out = snew(deflate_compress_ctx); |
1350 | out->type = type; |
1351 | out->outbits = out->noutbits = 0; |
1352 | out->firstblock = TRUE; |
1353 | #ifdef STATISTICS |
1354 | out->bitcount = 0; |
1355 | #endif |
1356 | |
1357 | out->syms = snewn(SYMLIMIT, unsigned long); |
1358 | out->symstart = out->nsyms = 0; |
1359 | |
1360 | out->adler32 = 1; |
1361 | out->lastblock = FALSE; |
1362 | out->finished = FALSE; |
1363 | |
1364 | ectx->userdata = out; |
1365 | out->lzc = ectx; |
1366 | |
1367 | return out; |
1368 | } |
1369 | |
1370 | void deflate_compress_free(deflate_compress_ctx *out) |
1371 | { |
1372 | struct LZ77Context *ectx = out->lzc; |
1373 | |
1374 | sfree(out->syms); |
1375 | sfree(out); |
1376 | sfree(ectx->ictx); |
1377 | sfree(ectx); |
1378 | } |
1379 | |
1380 | static unsigned long adler32_update(unsigned long s, |
1381 | const unsigned char *data, int len) |
1382 | { |
1383 | unsigned s1 = s & 0xFFFF, s2 = (s >> 16) & 0xFFFF; |
1384 | int i; |
1385 | |
1386 | for (i = 0; i < len; i++) { |
1387 | s1 += data[i]; |
1388 | s2 += s1; |
1389 | if (!(i & 0xFFF)) { |
1390 | s1 %= 65521; |
1391 | s2 %= 65521; |
1392 | } |
1393 | } |
1394 | |
1395 | return ((s2 % 65521) << 16) | (s1 % 65521); |
1396 | } |
1397 | |
1398 | int deflate_compress_data(deflate_compress_ctx *out, |
1399 | const void *vblock, int len, int flushtype, |
1400 | void **outblock, int *outlen) |
1401 | { |
1402 | struct LZ77Context *ectx = out->lzc; |
1403 | const unsigned char *block = (const unsigned char *)vblock; |
1404 | |
1405 | assert(!out->finished); |
1406 | |
1407 | out->outbuf = NULL; |
1408 | out->outlen = out->outsize = 0; |
1409 | |
1410 | /* |
1411 | * If this is the first block, output the header. |
1412 | */ |
1413 | if (out->firstblock) { |
1414 | switch (out->type) { |
1415 | case DEFLATE_TYPE_BARE: |
1416 | break; /* no header */ |
1417 | case DEFLATE_TYPE_ZLIB: |
1418 | /* |
1419 | * Zlib (RFC1950) header bytes: 78 9C. (Deflate |
1420 | * compression, 32K window size, default algorithm.) |
1421 | */ |
1422 | outbits(out, 0x9C78, 16); |
1423 | break; |
1424 | } |
1425 | out->firstblock = FALSE; |
1426 | } |
1427 | |
1428 | /* |
1429 | * Feed our data to the LZ77 compression phase. |
1430 | */ |
1431 | lz77_compress(ectx, block, len, TRUE); |
1432 | |
1433 | /* |
1434 | * Update checksums. |
1435 | */ |
1436 | if (out->type == DEFLATE_TYPE_ZLIB) |
1437 | out->adler32 = adler32_update(out->adler32, block, len); |
1438 | |
1439 | switch (flushtype) { |
1440 | /* |
1441 | * FIXME: what other flush types are available and useful? |
1442 | * In PuTTY, it was clear that we generally wanted to be in |
1443 | * a static block so it was safe to open one. Here, we |
1444 | * probably prefer to be _outside_ a block if we can. Think |
1445 | * about this. |
1446 | */ |
1447 | case DEFLATE_NO_FLUSH: |
1448 | break; /* don't flush any data at all (duh) */ |
1449 | case DEFLATE_SYNC_FLUSH: |
1450 | /* |
1451 | * Close the current block. |
1452 | */ |
1453 | flushblock(out); |
1454 | |
1455 | /* |
1456 | * Then output an empty _uncompressed_ block: send 000, |
1457 | * then sync to byte boundary, then send bytes 00 00 FF |
1458 | * FF. |
1459 | */ |
1460 | outbits(out, 0, 3); |
1461 | if (out->noutbits) |
1462 | outbits(out, 0, 8 - out->noutbits); |
1463 | outbits(out, 0, 16); |
1464 | outbits(out, 0xFFFF, 16); |
1465 | break; |
1466 | case DEFLATE_END_OF_DATA: |
1467 | /* |
1468 | * Output a block with BFINAL set. |
1469 | */ |
1470 | out->lastblock = TRUE; |
1471 | flushblock(out); |
1472 | |
1473 | /* |
1474 | * Sync to byte boundary, flushing out the final byte. |
1475 | */ |
1476 | if (out->noutbits) |
1477 | outbits(out, 0, 8 - out->noutbits); |
1478 | |
1479 | /* |
1480 | * Output the adler32 checksum, in zlib mode. |
1481 | */ |
1482 | if (out->type == DEFLATE_TYPE_ZLIB) { |
1483 | outbits(out, (out->adler32 >> 24) & 0xFF, 8); |
1484 | outbits(out, (out->adler32 >> 16) & 0xFF, 8); |
1485 | outbits(out, (out->adler32 >> 8) & 0xFF, 8); |
1486 | outbits(out, (out->adler32 >> 0) & 0xFF, 8); |
1487 | } |
1488 | |
1489 | out->finished = TRUE; |
1490 | break; |
1491 | } |
1492 | |
1493 | /* |
1494 | * Return any data that we've generated. |
1495 | */ |
1496 | *outblock = (void *)out->outbuf; |
1497 | *outlen = out->outlen; |
1498 | |
1499 | return 1; |
1500 | } |
1501 | |
1502 | /* ---------------------------------------------------------------------- |
1503 | * deflate decompression. |
1504 | */ |
1505 | |
1506 | /* |
1507 | * The way we work the Huffman decode is to have a table lookup on |
1508 | * the first N bits of the input stream (in the order they arrive, |
1509 | * of course, i.e. the first bit of the Huffman code is in bit 0). |
1510 | * Each table entry lists the number of bits to consume, plus |
1511 | * either an output code or a pointer to a secondary table. |
1512 | */ |
1513 | struct table; |
1514 | struct tableentry; |
1515 | |
1516 | struct tableentry { |
1517 | unsigned char nbits; |
1518 | short code; |
1519 | struct table *nexttable; |
1520 | }; |
1521 | |
1522 | struct table { |
1523 | int mask; /* mask applied to input bit stream */ |
1524 | struct tableentry *table; |
1525 | }; |
1526 | |
1527 | #define MAXSYMS 288 |
1528 | |
1529 | /* |
1530 | * Build a single-level decode table for elements |
1531 | * [minlength,maxlength) of the provided code/length tables, and |
1532 | * recurse to build subtables. |
1533 | */ |
1534 | static struct table *mkonetab(int *codes, unsigned char *lengths, int nsyms, |
1535 | int pfx, int pfxbits, int bits) |
1536 | { |
1537 | struct table *tab = snew(struct table); |
1538 | int pfxmask = (1 << pfxbits) - 1; |
1539 | int nbits, i, j, code; |
1540 | |
1541 | tab->table = snewn(1 << bits, struct tableentry); |
1542 | tab->mask = (1 << bits) - 1; |
1543 | |
1544 | for (code = 0; code <= tab->mask; code++) { |
1545 | tab->table[code].code = -1; |
1546 | tab->table[code].nbits = 0; |
1547 | tab->table[code].nexttable = NULL; |
1548 | } |
1549 | |
1550 | for (i = 0; i < nsyms; i++) { |
1551 | if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx) |
1552 | continue; |
1553 | code = (codes[i] >> pfxbits) & tab->mask; |
1554 | for (j = code; j <= tab->mask; j += 1 << (lengths[i] - pfxbits)) { |
1555 | tab->table[j].code = i; |
1556 | nbits = lengths[i] - pfxbits; |
1557 | if (tab->table[j].nbits < nbits) |
1558 | tab->table[j].nbits = nbits; |
1559 | } |
1560 | } |
1561 | for (code = 0; code <= tab->mask; code++) { |
1562 | if (tab->table[code].nbits <= bits) |
1563 | continue; |
1564 | /* Generate a subtable. */ |
1565 | tab->table[code].code = -1; |
1566 | nbits = tab->table[code].nbits - bits; |
1567 | if (nbits > 7) |
1568 | nbits = 7; |
1569 | tab->table[code].nbits = bits; |
1570 | tab->table[code].nexttable = mkonetab(codes, lengths, nsyms, |
1571 | pfx | (code << pfxbits), |
1572 | pfxbits + bits, nbits); |
1573 | } |
1574 | |
1575 | return tab; |
1576 | } |
1577 | |
1578 | /* |
1579 | * Build a decode table, given a set of Huffman tree lengths. |
1580 | */ |
1581 | static struct table *mktable(unsigned char *lengths, int nlengths) |
1582 | { |
1583 | int codes[MAXSYMS]; |
1584 | int maxlen; |
1585 | |
1586 | maxlen = hufcodes(lengths, codes, nlengths); |
1587 | |
1588 | /* |
1589 | * Now we have the complete list of Huffman codes. Build a |
1590 | * table. |
1591 | */ |
1592 | return mkonetab(codes, lengths, nlengths, 0, 0, maxlen < 9 ? maxlen : 9); |
1593 | } |
1594 | |
1595 | static int freetable(struct table **ztab) |
1596 | { |
1597 | struct table *tab; |
1598 | int code; |
1599 | |
1600 | if (ztab == NULL) |
1601 | return -1; |
1602 | |
1603 | if (*ztab == NULL) |
1604 | return 0; |
1605 | |
1606 | tab = *ztab; |
1607 | |
1608 | for (code = 0; code <= tab->mask; code++) |
1609 | if (tab->table[code].nexttable != NULL) |
1610 | freetable(&tab->table[code].nexttable); |
1611 | |
1612 | sfree(tab->table); |
1613 | tab->table = NULL; |
1614 | |
1615 | sfree(tab); |
1616 | *ztab = NULL; |
1617 | |
1618 | return (0); |
1619 | } |
1620 | |
1621 | struct deflate_decompress_ctx { |
1622 | struct table *staticlentable, *staticdisttable; |
1623 | struct table *currlentable, *currdisttable, *lenlentable; |
1624 | enum { |
1625 | START, OUTSIDEBLK, |
1626 | TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP, |
1627 | INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM, |
1628 | UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA, |
1629 | END, ADLER1, ADLER2, FINALSPIN |
1630 | } state; |
1631 | int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, |
1632 | lenrep, lastblock; |
1633 | int uncomplen; |
1634 | unsigned char lenlen[19]; |
1635 | unsigned char lengths[286 + 32]; |
1636 | unsigned long bits; |
1637 | int nbits; |
1638 | unsigned char window[WINSIZE]; |
1639 | int winpos; |
1640 | unsigned char *outblk; |
1641 | int outlen, outsize; |
1642 | int type; |
1643 | unsigned long adler32; |
1644 | }; |
1645 | |
1646 | deflate_decompress_ctx *deflate_decompress_new(int type) |
1647 | { |
1648 | deflate_decompress_ctx *dctx = snew(deflate_decompress_ctx); |
1649 | unsigned char lengths[288]; |
1650 | |
1651 | memset(lengths, 8, 144); |
1652 | memset(lengths + 144, 9, 256 - 144); |
1653 | memset(lengths + 256, 7, 280 - 256); |
1654 | memset(lengths + 280, 8, 288 - 280); |
1655 | dctx->staticlentable = mktable(lengths, 288); |
1656 | memset(lengths, 5, 32); |
1657 | dctx->staticdisttable = mktable(lengths, 32); |
1658 | if (type == DEFLATE_TYPE_BARE) |
1659 | dctx->state = OUTSIDEBLK; |
1660 | else |
1661 | dctx->state = START; |
1662 | dctx->currlentable = dctx->currdisttable = dctx->lenlentable = NULL; |
1663 | dctx->bits = 0; |
1664 | dctx->nbits = 0; |
1665 | dctx->winpos = 0; |
1666 | dctx->type = type; |
1667 | dctx->lastblock = FALSE; |
1668 | dctx->adler32 = 1; |
1669 | |
1670 | return dctx; |
1671 | } |
1672 | |
1673 | void deflate_decompress_free(deflate_decompress_ctx *dctx) |
1674 | { |
1675 | if (dctx->currlentable && dctx->currlentable != dctx->staticlentable) |
1676 | freetable(&dctx->currlentable); |
1677 | if (dctx->currdisttable && dctx->currdisttable != dctx->staticdisttable) |
1678 | freetable(&dctx->currdisttable); |
1679 | if (dctx->lenlentable) |
1680 | freetable(&dctx->lenlentable); |
1681 | freetable(&dctx->staticlentable); |
1682 | freetable(&dctx->staticdisttable); |
1683 | sfree(dctx); |
1684 | } |
1685 | |
1686 | static int huflookup(unsigned long *bitsp, int *nbitsp, struct table *tab) |
1687 | { |
1688 | unsigned long bits = *bitsp; |
1689 | int nbits = *nbitsp; |
1690 | while (1) { |
1691 | struct tableentry *ent; |
1692 | ent = &tab->table[bits & tab->mask]; |
1693 | if (ent->nbits > nbits) |
1694 | return -1; /* not enough data */ |
1695 | bits >>= ent->nbits; |
1696 | nbits -= ent->nbits; |
1697 | if (ent->code == -1) |
1698 | tab = ent->nexttable; |
1699 | else { |
1700 | *bitsp = bits; |
1701 | *nbitsp = nbits; |
1702 | return ent->code; |
1703 | } |
1704 | |
1705 | if (!tab) { |
1706 | /* |
1707 | * There was a missing entry in the table, presumably |
1708 | * due to an invalid Huffman table description, and the |
1709 | * subsequent data has attempted to use the missing |
1710 | * entry. Return a decoding failure. |
1711 | */ |
1712 | return -2; |
1713 | } |
1714 | } |
1715 | } |
1716 | |
1717 | static void emit_char(deflate_decompress_ctx *dctx, int c) |
1718 | { |
1719 | dctx->window[dctx->winpos] = c; |
1720 | dctx->winpos = (dctx->winpos + 1) & (WINSIZE - 1); |
1721 | if (dctx->outlen >= dctx->outsize) { |
1722 | dctx->outsize = dctx->outlen + 512; |
1723 | dctx->outblk = sresize(dctx->outblk, dctx->outsize, unsigned char); |
1724 | } |
1725 | if (dctx->type == DEFLATE_TYPE_ZLIB) { |
1726 | unsigned char uc = c; |
1727 | dctx->adler32 = adler32_update(dctx->adler32, &uc, 1); |
1728 | } |
1729 | dctx->outblk[dctx->outlen++] = c; |
1730 | } |
1731 | |
1732 | #define EATBITS(n) ( dctx->nbits -= (n), dctx->bits >>= (n) ) |
1733 | |
1734 | int deflate_decompress_data(deflate_decompress_ctx *dctx, |
1735 | const void *vblock, int len, |
1736 | void **outblock, int *outlen) |
1737 | { |
1738 | const coderecord *rec; |
1739 | const unsigned char *block = (const unsigned char *)vblock; |
1740 | int code, bfinal, btype, rep, dist, nlen, header, adler; |
1741 | |
1742 | dctx->outblk = snewn(256, unsigned char); |
1743 | dctx->outsize = 256; |
1744 | dctx->outlen = 0; |
1745 | |
1746 | while (len > 0 || dctx->nbits > 0) { |
1747 | while (dctx->nbits < 24 && len > 0) { |
1748 | dctx->bits |= (*block++) << dctx->nbits; |
1749 | dctx->nbits += 8; |
1750 | len--; |
1751 | } |
1752 | switch (dctx->state) { |
1753 | case START: |
1754 | /* Expect 16-bit zlib header. */ |
1755 | if (dctx->nbits < 16) |
1756 | goto finished; /* done all we can */ |
1757 | |
1758 | /* |
1759 | * The header is stored as a big-endian 16-bit integer, |
1760 | * in contrast to the general little-endian policy in |
1761 | * the rest of the format :-( |
1762 | */ |
1763 | header = (((dctx->bits & 0xFF00) >> 8) | |
1764 | ((dctx->bits & 0x00FF) << 8)); |
1765 | EATBITS(16); |
1766 | |
1767 | /* |
1768 | * Check the header: |
1769 | * |
1770 | * - bits 8-11 should be 1000 (Deflate/RFC1951) |
1771 | * - bits 12-15 should be at most 0111 (window size) |
1772 | * - bit 5 should be zero (no dictionary present) |
1773 | * - we don't care about bits 6-7 (compression rate) |
1774 | * - bits 0-4 should be set up to make the whole thing |
1775 | * a multiple of 31 (checksum). |
1776 | */ |
1777 | if ((header & 0x0F00) != 0x0800 || |
1778 | (header & 0xF000) > 0x7000 || |
1779 | (header & 0x0020) != 0x0000 || |
1780 | (header % 31) != 0) |
1781 | goto decode_error; |
1782 | |
1783 | dctx->state = OUTSIDEBLK; |
1784 | break; |
1785 | case OUTSIDEBLK: |
1786 | /* Expect 3-bit block header. */ |
1787 | if (dctx->nbits < 3) |
1788 | goto finished; /* done all we can */ |
1789 | bfinal = dctx->bits & 1; |
1790 | if (bfinal) |
1791 | dctx->lastblock = TRUE; |
1792 | EATBITS(1); |
1793 | btype = dctx->bits & 3; |
1794 | EATBITS(2); |
1795 | if (btype == 0) { |
1796 | int to_eat = dctx->nbits & 7; |
1797 | dctx->state = UNCOMP_LEN; |
1798 | EATBITS(to_eat); /* align to byte boundary */ |
1799 | } else if (btype == 1) { |
1800 | dctx->currlentable = dctx->staticlentable; |
1801 | dctx->currdisttable = dctx->staticdisttable; |
1802 | dctx->state = INBLK; |
1803 | } else if (btype == 2) { |
1804 | dctx->state = TREES_HDR; |
1805 | } |
1806 | debug(("recv: bfinal=%d btype=%d\n", bfinal, btype)); |
1807 | break; |
1808 | case TREES_HDR: |
1809 | /* |
1810 | * Dynamic block header. Five bits of HLIT, five of |
1811 | * HDIST, four of HCLEN. |
1812 | */ |
1813 | if (dctx->nbits < 5 + 5 + 4) |
1814 | goto finished; /* done all we can */ |
1815 | dctx->hlit = 257 + (dctx->bits & 31); |
1816 | EATBITS(5); |
1817 | dctx->hdist = 1 + (dctx->bits & 31); |
1818 | EATBITS(5); |
1819 | dctx->hclen = 4 + (dctx->bits & 15); |
1820 | EATBITS(4); |
1821 | debug(("recv: hlit=%d hdist=%d hclen=%d\n", dctx->hlit, |
1822 | dctx->hdist, dctx->hclen)); |
1823 | dctx->lenptr = 0; |
1824 | dctx->state = TREES_LENLEN; |
1825 | memset(dctx->lenlen, 0, sizeof(dctx->lenlen)); |
1826 | break; |
1827 | case TREES_LENLEN: |
1828 | if (dctx->nbits < 3) |
1829 | goto finished; |
1830 | while (dctx->lenptr < dctx->hclen && dctx->nbits >= 3) { |
1831 | dctx->lenlen[lenlenmap[dctx->lenptr++]] = |
1832 | (unsigned char) (dctx->bits & 7); |
1833 | debug(("recv: lenlen %d\n", (unsigned char) (dctx->bits & 7))); |
1834 | EATBITS(3); |
1835 | } |
1836 | if (dctx->lenptr == dctx->hclen) { |
1837 | dctx->lenlentable = mktable(dctx->lenlen, 19); |
1838 | dctx->state = TREES_LEN; |
1839 | dctx->lenptr = 0; |
1840 | } |
1841 | break; |
1842 | case TREES_LEN: |
1843 | if (dctx->lenptr >= dctx->hlit + dctx->hdist) { |
1844 | dctx->currlentable = mktable(dctx->lengths, dctx->hlit); |
1845 | dctx->currdisttable = mktable(dctx->lengths + dctx->hlit, |
1846 | dctx->hdist); |
1847 | freetable(&dctx->lenlentable); |
1848 | dctx->lenlentable = NULL; |
1849 | dctx->state = INBLK; |
1850 | break; |
1851 | } |
1852 | code = huflookup(&dctx->bits, &dctx->nbits, dctx->lenlentable); |
1853 | debug(("recv: codelen %d\n", code)); |
1854 | if (code == -1) |
1855 | goto finished; |
1856 | if (code == -2) |
1857 | goto decode_error; |
1858 | if (code < 16) |
1859 | dctx->lengths[dctx->lenptr++] = code; |
1860 | else { |
1861 | dctx->lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7); |
1862 | dctx->lenaddon = (code == 18 ? 11 : 3); |
1863 | dctx->lenrep = (code == 16 && dctx->lenptr > 0 ? |
1864 | dctx->lengths[dctx->lenptr - 1] : 0); |
1865 | dctx->state = TREES_LENREP; |
1866 | } |
1867 | break; |
1868 | case TREES_LENREP: |
1869 | if (dctx->nbits < dctx->lenextrabits) |
1870 | goto finished; |
1871 | rep = |
1872 | dctx->lenaddon + |
1873 | (dctx->bits & ((1 << dctx->lenextrabits) - 1)); |
1874 | EATBITS(dctx->lenextrabits); |
1875 | if (dctx->lenextrabits) |
1876 | debug(("recv: codelen-extrabits %d/%d\n", rep - dctx->lenaddon, |
1877 | dctx->lenextrabits)); |
1878 | while (rep > 0 && dctx->lenptr < dctx->hlit + dctx->hdist) { |
1879 | dctx->lengths[dctx->lenptr] = dctx->lenrep; |
1880 | dctx->lenptr++; |
1881 | rep--; |
1882 | } |
1883 | dctx->state = TREES_LEN; |
1884 | break; |
1885 | case INBLK: |
1886 | code = huflookup(&dctx->bits, &dctx->nbits, dctx->currlentable); |
1887 | debug(("recv: litlen %d\n", code)); |
1888 | if (code == -1) |
1889 | goto finished; |
1890 | if (code == -2) |
1891 | goto decode_error; |
1892 | if (code < 256) |
1893 | emit_char(dctx, code); |
1894 | else if (code == 256) { |
1895 | if (dctx->lastblock) |
1896 | dctx->state = END; |
1897 | else |
1898 | dctx->state = OUTSIDEBLK; |
1899 | if (dctx->currlentable != dctx->staticlentable) { |
1900 | freetable(&dctx->currlentable); |
1901 | dctx->currlentable = NULL; |
1902 | } |
1903 | if (dctx->currdisttable != dctx->staticdisttable) { |
1904 | freetable(&dctx->currdisttable); |
1905 | dctx->currdisttable = NULL; |
1906 | } |
1907 | } else if (code < 286) { /* static tree can give >285; ignore */ |
1908 | dctx->state = GOTLENSYM; |
1909 | dctx->sym = code; |
1910 | } |
1911 | break; |
1912 | case GOTLENSYM: |
1913 | rec = &lencodes[dctx->sym - 257]; |
1914 | if (dctx->nbits < rec->extrabits) |
1915 | goto finished; |
1916 | dctx->len = |
1917 | rec->min + (dctx->bits & ((1 << rec->extrabits) - 1)); |
1918 | if (rec->extrabits) |
1919 | debug(("recv: litlen-extrabits %d/%d\n", |
1920 | dctx->len - rec->min, rec->extrabits)); |
1921 | EATBITS(rec->extrabits); |
1922 | dctx->state = GOTLEN; |
1923 | break; |
1924 | case GOTLEN: |
1925 | code = huflookup(&dctx->bits, &dctx->nbits, dctx->currdisttable); |
1926 | debug(("recv: dist %d\n", code)); |
1927 | if (code == -1) |
1928 | goto finished; |
1929 | if (code == -2) |
1930 | goto decode_error; |
1931 | dctx->state = GOTDISTSYM; |
1932 | dctx->sym = code; |
1933 | break; |
1934 | case GOTDISTSYM: |
1935 | rec = &distcodes[dctx->sym]; |
1936 | if (dctx->nbits < rec->extrabits) |
1937 | goto finished; |
1938 | dist = rec->min + (dctx->bits & ((1 << rec->extrabits) - 1)); |
1939 | if (rec->extrabits) |
1940 | debug(("recv: dist-extrabits %d/%d\n", |
1941 | dist - rec->min, rec->extrabits)); |
1942 | EATBITS(rec->extrabits); |
1943 | dctx->state = INBLK; |
1944 | while (dctx->len--) |
1945 | emit_char(dctx, dctx->window[(dctx->winpos - dist) & |
1946 | (WINSIZE - 1)]); |
1947 | break; |
1948 | case UNCOMP_LEN: |
1949 | /* |
1950 | * Uncompressed block. We expect to see a 16-bit LEN. |
1951 | */ |
1952 | if (dctx->nbits < 16) |
1953 | goto finished; |
1954 | dctx->uncomplen = dctx->bits & 0xFFFF; |
1955 | EATBITS(16); |
1956 | dctx->state = UNCOMP_NLEN; |
1957 | break; |
1958 | case UNCOMP_NLEN: |
1959 | /* |
1960 | * Uncompressed block. We expect to see a 16-bit NLEN, |
1961 | * which should be the one's complement of the previous |
1962 | * LEN. |
1963 | */ |
1964 | if (dctx->nbits < 16) |
1965 | goto finished; |
1966 | nlen = dctx->bits & 0xFFFF; |
1967 | EATBITS(16); |
1968 | if (dctx->uncomplen == 0) |
1969 | dctx->state = OUTSIDEBLK; /* block is empty */ |
1970 | else |
1971 | dctx->state = UNCOMP_DATA; |
1972 | break; |
1973 | case UNCOMP_DATA: |
1974 | if (dctx->nbits < 8) |
1975 | goto finished; |
1976 | emit_char(dctx, dctx->bits & 0xFF); |
1977 | EATBITS(8); |
1978 | if (--dctx->uncomplen == 0) |
1979 | dctx->state = OUTSIDEBLK; /* end of uncompressed block */ |
1980 | break; |
1981 | case END: |
1982 | /* |
1983 | * End of compressed data. We align to a byte boundary, |
1984 | * and then look for format-specific trailer data. |
1985 | */ |
1986 | { |
1987 | int to_eat = dctx->nbits & 7; |
1988 | EATBITS(to_eat); |
1989 | } |
1990 | if (dctx->type == DEFLATE_TYPE_ZLIB) |
1991 | dctx->state = ADLER1; |
1992 | else |
1993 | dctx->state = FINALSPIN; |
1994 | break; |
1995 | case ADLER1: |
1996 | if (dctx->nbits < 16) |
1997 | goto finished; |
1998 | adler = (dctx->bits & 0xFF) << 8; |
1999 | EATBITS(8); |
2000 | adler |= (dctx->bits & 0xFF); |
2001 | EATBITS(8); |
2002 | if (adler != ((dctx->adler32 >> 16) & 0xFFFF)) |
2003 | goto decode_error; |
2004 | dctx->state = ADLER2; |
2005 | break; |
2006 | case ADLER2: |
2007 | if (dctx->nbits < 16) |
2008 | goto finished; |
2009 | adler = (dctx->bits & 0xFF) << 8; |
2010 | EATBITS(8); |
2011 | adler |= (dctx->bits & 0xFF); |
2012 | EATBITS(8); |
2013 | if (adler != (dctx->adler32 & 0xFFFF)) |
2014 | goto decode_error; |
2015 | dctx->state = FINALSPIN; |
2016 | break; |
2017 | case FINALSPIN: |
2018 | /* Just ignore any trailing garbage on the data stream. */ |
2019 | EATBITS(dctx->nbits); |
2020 | break; |
2021 | } |
2022 | } |
2023 | |
2024 | finished: |
2025 | *outblock = dctx->outblk; |
2026 | *outlen = dctx->outlen; |
2027 | return 1; |
2028 | |
2029 | decode_error: |
2030 | sfree(dctx->outblk); |
2031 | *outblock = dctx->outblk = NULL; |
2032 | *outlen = 0; |
2033 | return 0; |
2034 | } |
2035 | |
2036 | #ifdef STANDALONE |
2037 | |
2038 | int main(int argc, char **argv) |
2039 | { |
2040 | unsigned char buf[65536], *outbuf; |
2041 | int ret, outlen; |
2042 | deflate_decompress_ctx *dhandle; |
2043 | deflate_compress_ctx *chandle; |
2044 | int type = DEFLATE_TYPE_ZLIB, opts = TRUE, compress = FALSE; |
2045 | char *filename = NULL; |
2046 | FILE *fp; |
2047 | |
2048 | while (--argc) { |
2049 | char *p = *++argv; |
2050 | |
2051 | if (p[0] == '-' && opts) { |
2052 | if (!strcmp(p, "-d")) |
2053 | type = DEFLATE_TYPE_BARE; |
2054 | if (!strcmp(p, "-c")) |
2055 | compress = TRUE; |
2056 | else if (!strcmp(p, "--")) |
2057 | opts = FALSE; /* next thing is filename */ |
2058 | else { |
2059 | fprintf(stderr, "unknown command line option '%s'\n", p); |
2060 | return 1; |
2061 | } |
2062 | } else if (!filename) { |
2063 | filename = p; |
2064 | } else { |
2065 | fprintf(stderr, "can only handle one filename\n"); |
2066 | return 1; |
2067 | } |
2068 | } |
2069 | |
2070 | if (compress) { |
2071 | chandle = deflate_compress_new(type); |
2072 | dhandle = NULL; |
2073 | } else { |
2074 | dhandle = deflate_decompress_new(type); |
2075 | chandle = NULL; |
2076 | } |
2077 | |
2078 | if (filename) |
2079 | fp = fopen(filename, "rb"); |
2080 | else |
2081 | fp = stdin; |
2082 | |
2083 | if (!fp) { |
2084 | assert(filename); |
2085 | fprintf(stderr, "unable to open '%s'\n", filename); |
2086 | return 1; |
2087 | } |
2088 | |
2089 | do { |
2090 | ret = fread(buf, 1, sizeof(buf), fp); |
2091 | if (dhandle) { |
2092 | if (ret > 0) |
2093 | deflate_decompress_data(dhandle, buf, ret, |
2094 | (void **)&outbuf, &outlen); |
2095 | } else { |
2096 | if (ret > 0) |
2097 | deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH, |
2098 | (void **)&outbuf, &outlen); |
2099 | else |
2100 | deflate_compress_data(chandle, buf, ret, DEFLATE_END_OF_DATA, |
2101 | (void **)&outbuf, &outlen); |
2102 | } |
2103 | if (outbuf) { |
2104 | if (outlen) |
2105 | fwrite(outbuf, 1, outlen, stdout); |
2106 | sfree(outbuf); |
2107 | } else if (dhandle) { |
2108 | fprintf(stderr, "decoding error\n"); |
2109 | return 1; |
2110 | } |
2111 | } while (ret > 0); |
2112 | |
2113 | if (dhandle) |
2114 | deflate_decompress_free(dhandle); |
2115 | if (chandle) |
2116 | deflate_compress_free(chandle); |
2117 | |
2118 | if (filename) |
2119 | fclose(fp); |
2120 | |
2121 | return 0; |
2122 | } |
2123 | |
2124 | #endif |
2125 | |
2126 | #ifdef TESTMODE |
2127 | |
2128 | int main(int argc, char **argv) |
2129 | { |
2130 | char *filename = NULL; |
2131 | FILE *fp; |
2132 | deflate_compress_ctx *chandle; |
2133 | deflate_decompress_ctx *dhandle; |
2134 | unsigned char buf[65536], *outbuf, *outbuf2; |
2135 | int ret, outlen, outlen2; |
2136 | int dlen = 0, clen = 0; |
2137 | int opts = TRUE; |
2138 | |
2139 | while (--argc) { |
2140 | char *p = *++argv; |
2141 | |
2142 | if (p[0] == '-' && opts) { |
2143 | if (!strcmp(p, "--")) |
2144 | opts = FALSE; /* next thing is filename */ |
2145 | else { |
2146 | fprintf(stderr, "unknown command line option '%s'\n", p); |
2147 | return 1; |
2148 | } |
2149 | } else if (!filename) { |
2150 | filename = p; |
2151 | } else { |
2152 | fprintf(stderr, "can only handle one filename\n"); |
2153 | return 1; |
2154 | } |
2155 | } |
2156 | |
2157 | if (filename) |
2158 | fp = fopen(filename, "rb"); |
2159 | else |
2160 | fp = stdin; |
2161 | |
2162 | if (!fp) { |
2163 | assert(filename); |
2164 | fprintf(stderr, "unable to open '%s'\n", filename); |
2165 | return 1; |
2166 | } |
2167 | |
2168 | chandle = deflate_compress_new(DEFLATE_TYPE_ZLIB); |
2169 | dhandle = deflate_decompress_new(DEFLATE_TYPE_ZLIB); |
2170 | |
2171 | do { |
2172 | ret = fread(buf, 1, sizeof(buf), fp); |
2173 | if (ret <= 0) { |
2174 | deflate_compress_data(chandle, NULL, 0, DEFLATE_END_OF_DATA, |
2175 | (void **)&outbuf, &outlen); |
2176 | } else { |
2177 | dlen += ret; |
2178 | deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH, |
2179 | (void **)&outbuf, &outlen); |
2180 | } |
2181 | if (outbuf) { |
2182 | clen += outlen; |
2183 | deflate_decompress_data(dhandle, outbuf, outlen, |
2184 | (void **)&outbuf2, &outlen2); |
2185 | sfree(outbuf); |
2186 | if (outbuf2) { |
2187 | if (outlen2) |
2188 | fwrite(outbuf2, 1, outlen2, stdout); |
2189 | sfree(outbuf2); |
2190 | } else { |
2191 | fprintf(stderr, "decoding error\n"); |
2192 | return 1; |
2193 | } |
2194 | } |
2195 | } while (ret > 0); |
2196 | |
2197 | fprintf(stderr, "%d plaintext -> %d compressed\n", dlen, clen); |
2198 | |
2199 | return 0; |
2200 | } |
2201 | |
2202 | #endif |