2 * Reimplementation of Deflate (RFC1951) compression. Adapted from
3 * the version in PuTTY, and extended to write dynamic Huffman
4 * trees and choose block boundaries usefully.
10 * - Feature: could do with forms of flush other than SYNC_FLUSH.
11 * I'm not sure exactly how those work when you don't know in
12 * advance that your next block will be static (as we did in
13 * PuTTY). And remember the 9-bit limitation of zlib.
14 * + also, zlib has FULL_FLUSH which clears the LZ77 state as
15 * well, for random access.
17 * - Compression quality: chooseblock() appears to be computing
18 * wildly inaccurate block size estimates. Possible resolutions:
19 * + find and fix some trivial bug I haven't spotted yet
20 * + abandon the entropic approximation and go with trial
23 * - Compression quality: see if increasing SYMLIMIT causes
24 * dynamic blocks to start being consistently smaller than it.
25 * + actually we seem to be there already, but check on a
28 * - Compression quality: we ought to be able to fall right back
29 * to actual uncompressed blocks if really necessary, though
30 * it's not clear what the criterion for doing so would be.
34 * This software is copyright 2000-2006 Simon Tatham.
36 * Permission is hereby granted, free of charge, to any person
37 * obtaining a copy of this software and associated documentation
38 * files (the "Software"), to deal in the Software without
39 * restriction, including without limitation the rights to use,
40 * copy, modify, merge, publish, distribute, sublicense, and/or
41 * sell copies of the Software, and to permit persons to whom the
42 * Software is furnished to do so, subject to the following
45 * The above copyright notice and this permission notice shall be
46 * included in all copies or substantial portions of the Software.
48 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
49 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
50 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
51 * NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
52 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
53 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
54 * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
66 #define snew(type) ( (type *) malloc(sizeof(type)) )
67 #define snewn(n, type) ( (type *) malloc((n) * sizeof(type)) )
68 #define sresize(x, n, type) ( (type *) realloc((x), (n) * sizeof(type)) )
69 #define sfree(x) ( free((x)) )
71 #define lenof(x) (sizeof((x)) / sizeof(*(x)))
78 /* ----------------------------------------------------------------------
79 * This file can be compiled in a number of modes.
81 * With -DSTANDALONE, it builds a self-contained deflate tool which
82 * can compress, decompress, and also analyse a deflated file to
83 * print out the sequence of literals and copy commands it
86 * With -DTESTMODE, it builds a test application which is given a
87 * file on standard input, both compresses and decompresses it, and
88 * outputs the re-decompressed result so it can be conveniently
89 * diffed against the original. Define -DTESTDBG as well for lots
94 /* gcc-specific diagnostic macro */
95 #define debug_int(x...) ( fprintf(stderr, x) )
96 #define debug(x) ( debug_int x )
98 #define debug(x) ((void)0)
106 int analyse_level
= 0;
109 /* ----------------------------------------------------------------------
110 * Basic LZ77 code. This bit is designed modularly, so it could be
111 * ripped out and used in a different LZ77 compressor. Go to it,
115 struct LZ77InternalContext
;
117 struct LZ77InternalContext
*ictx
;
119 void (*literal
) (struct LZ77Context
* ctx
, unsigned char c
);
120 void (*match
) (struct LZ77Context
* ctx
, int distance
, int len
);
124 * Initialise the private fields of an LZ77Context. It's up to the
125 * user to initialise the public fields.
127 static int lz77_init(struct LZ77Context
*ctx
);
130 * Supply data to be compressed. Will update the private fields of
131 * the LZ77Context, and will call literal() and match() to output.
132 * If `compress' is FALSE, it will never emit a match, but will
133 * instead call literal() for everything.
135 static void lz77_compress(struct LZ77Context
*ctx
,
136 const unsigned char *data
, int len
, int compress
);
139 * Modifiable parameters.
141 #define WINSIZE 32768 /* window size. Must be power of 2! */
142 #define HASHMAX 2039 /* one more than max hash value */
143 #define MAXMATCH 32 /* how many matches we track */
144 #define HASHCHARS 3 /* how many chars make a hash */
147 * This compressor takes a less slapdash approach than the
148 * gzip/zlib one. Rather than allowing our hash chains to fall into
149 * disuse near the far end, we keep them doubly linked so we can
150 * _find_ the far end, and then every time we add a new byte to the
151 * window (thus rolling round by one and removing the previous
152 * byte), we can carefully remove the hash chain entry.
155 #define INVALID -1 /* invalid hash _and_ invalid offset */
157 short next
, prev
; /* array indices within the window */
162 short first
; /* window index of first in chain */
169 struct LZ77InternalContext
{
170 struct WindowEntry win
[WINSIZE
];
171 unsigned char data
[WINSIZE
];
173 struct HashEntry hashtab
[HASHMAX
];
174 unsigned char pending
[HASHCHARS
];
178 static int lz77_hash(const unsigned char *data
)
180 return (257 * data
[0] + 263 * data
[1] + 269 * data
[2]) % HASHMAX
;
183 static int lz77_init(struct LZ77Context
*ctx
)
185 struct LZ77InternalContext
*st
;
188 st
= snew(struct LZ77InternalContext
);
194 for (i
= 0; i
< WINSIZE
; i
++)
195 st
->win
[i
].next
= st
->win
[i
].prev
= st
->win
[i
].hashval
= INVALID
;
196 for (i
= 0; i
< HASHMAX
; i
++)
197 st
->hashtab
[i
].first
= INVALID
;
205 static void lz77_advance(struct LZ77InternalContext
*st
,
206 unsigned char c
, int hash
)
211 * Remove the hash entry at winpos from the tail of its chain,
212 * or empty the chain if it's the only thing on the chain.
214 if (st
->win
[st
->winpos
].prev
!= INVALID
) {
215 st
->win
[st
->win
[st
->winpos
].prev
].next
= INVALID
;
216 } else if (st
->win
[st
->winpos
].hashval
!= INVALID
) {
217 st
->hashtab
[st
->win
[st
->winpos
].hashval
].first
= INVALID
;
221 * Create a new entry at winpos and add it to the head of its
224 st
->win
[st
->winpos
].hashval
= hash
;
225 st
->win
[st
->winpos
].prev
= INVALID
;
226 off
= st
->win
[st
->winpos
].next
= st
->hashtab
[hash
].first
;
227 st
->hashtab
[hash
].first
= st
->winpos
;
229 st
->win
[off
].prev
= st
->winpos
;
230 st
->data
[st
->winpos
] = c
;
233 * Advance the window pointer.
235 st
->winpos
= (st
->winpos
+ 1) & (WINSIZE
- 1);
238 #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] )
240 static void lz77_compress(struct LZ77Context
*ctx
,
241 const unsigned char *data
, int len
, int compress
)
243 struct LZ77InternalContext
*st
= ctx
->ictx
;
244 int i
, hash
, distance
, off
, nmatch
, matchlen
, advance
;
245 struct Match defermatch
, matches
[MAXMATCH
];
249 * Add any pending characters from last time to the window. (We
250 * might not be able to.)
252 for (i
= 0; i
< st
->npending
; i
++) {
253 unsigned char foo
[HASHCHARS
];
255 if (len
+ st
->npending
- i
< HASHCHARS
) {
256 /* Update the pending array. */
257 for (j
= i
; j
< st
->npending
; j
++)
258 st
->pending
[j
- i
] = st
->pending
[j
];
261 for (j
= 0; j
< HASHCHARS
; j
++)
262 foo
[j
] = (i
+ j
< st
->npending ? st
->pending
[i
+ j
] :
263 data
[i
+ j
- st
->npending
]);
264 lz77_advance(st
, foo
[0], lz77_hash(foo
));
272 /* Don't even look for a match, if we're not compressing. */
273 if (compress
&& len
>= HASHCHARS
) {
275 * Hash the next few characters.
277 hash
= lz77_hash(data
);
280 * Look the hash up in the corresponding hash chain and see
284 for (off
= st
->hashtab
[hash
].first
;
285 off
!= INVALID
; off
= st
->win
[off
].next
) {
286 /* distance = 1 if off == st->winpos-1 */
287 /* distance = WINSIZE if off == st->winpos */
289 WINSIZE
- (off
+ WINSIZE
- st
->winpos
) % WINSIZE
;
290 for (i
= 0; i
< HASHCHARS
; i
++)
291 if (CHARAT(i
) != CHARAT(i
- distance
))
293 if (i
== HASHCHARS
) {
294 matches
[nmatch
].distance
= distance
;
295 matches
[nmatch
].len
= 3;
296 if (++nmatch
>= MAXMATCH
)
307 * We've now filled up matches[] with nmatch potential
308 * matches. Follow them down to find the longest. (We
309 * assume here that it's always worth favouring a
310 * longer match over a shorter one.)
312 matchlen
= HASHCHARS
;
313 while (matchlen
< len
) {
315 for (i
= j
= 0; i
< nmatch
; i
++) {
316 if (CHARAT(matchlen
) ==
317 CHARAT(matchlen
- matches
[i
].distance
)) {
318 matches
[j
++] = matches
[i
];
328 * We've now got all the longest matches. We favour the
329 * shorter distances, which means we go with matches[0].
330 * So see if we want to defer it or throw it away.
332 matches
[0].len
= matchlen
;
333 if (defermatch
.len
> 0) {
334 if (matches
[0].len
> defermatch
.len
+ 1) {
335 /* We have a better match. Emit the deferred char,
336 * and defer this match. */
337 ctx
->literal(ctx
, (unsigned char) deferchr
);
338 defermatch
= matches
[0];
342 /* We don't have a better match. Do the deferred one. */
343 ctx
->match(ctx
, defermatch
.distance
, defermatch
.len
);
344 advance
= defermatch
.len
- 1;
348 /* There was no deferred match. Defer this one. */
349 defermatch
= matches
[0];
355 * We found no matches. Emit the deferred match, if
356 * any; otherwise emit a literal.
358 if (defermatch
.len
> 0) {
359 ctx
->match(ctx
, defermatch
.distance
, defermatch
.len
);
360 advance
= defermatch
.len
- 1;
363 ctx
->literal(ctx
, data
[0]);
369 * Now advance the position by `advance' characters,
370 * keeping the window and hash chains consistent.
372 while (advance
> 0) {
373 if (len
>= HASHCHARS
) {
374 lz77_advance(st
, *data
, lz77_hash(data
));
376 st
->pending
[st
->npending
++] = *data
;
385 /* ----------------------------------------------------------------------
386 * Deflate functionality common to both compression and decompression.
389 static const unsigned char lenlenmap
[] = {
390 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
393 #define MAXCODELEN 16
396 * Given a sequence of Huffman code lengths, compute the actual
397 * codes, in the final form suitable for feeding to outbits (i.e.
398 * already bit-mirrored).
400 * Returns the maximum code length found. Can also return -1 to
401 * indicate the table was overcommitted (too many or too short
402 * codes to exactly cover the possible space), or -2 to indicate it
403 * was undercommitted (too few or too long codes).
405 static int hufcodes(const unsigned char *lengths
, int *codes
, int nsyms
)
407 int count
[MAXCODELEN
], startcode
[MAXCODELEN
];
411 /* Count the codes of each length. */
413 for (i
= 1; i
< MAXCODELEN
; i
++)
415 for (i
= 0; i
< nsyms
; i
++) {
417 if (maxlen
< lengths
[i
])
420 /* Determine the starting code for each length block. */
422 for (i
= 1; i
< MAXCODELEN
; i
++) {
426 maxlen
= -1; /* overcommitted */
429 if (code
< (1 << MAXCODELEN
))
430 maxlen
= -2; /* undercommitted */
431 /* Determine the code for each symbol. Mirrored, of course. */
432 for (i
= 0; i
< nsyms
; i
++) {
433 code
= startcode
[lengths
[i
]]++;
435 for (j
= 0; j
< lengths
[i
]; j
++) {
436 codes
[i
] = (codes
[i
] << 1) | (code
& 1);
445 * Adler32 checksum function.
447 static unsigned long adler32_update(unsigned long s
,
448 const unsigned char *data
, int len
)
450 unsigned s1
= s
& 0xFFFF, s2
= (s
>> 16) & 0xFFFF;
453 for (i
= 0; i
< len
; i
++) {
462 return ((s2
% 65521) << 16) | (s1
% 65521);
466 * CRC32 checksum function.
469 static unsigned long crc32_update(unsigned long crcword
,
470 const unsigned char *data
, int len
)
472 static const unsigned long crc32_table
[256] = {
473 0x00000000L
, 0x77073096L
, 0xEE0E612CL
, 0x990951BAL
,
474 0x076DC419L
, 0x706AF48FL
, 0xE963A535L
, 0x9E6495A3L
,
475 0x0EDB8832L
, 0x79DCB8A4L
, 0xE0D5E91EL
, 0x97D2D988L
,
476 0x09B64C2BL
, 0x7EB17CBDL
, 0xE7B82D07L
, 0x90BF1D91L
,
477 0x1DB71064L
, 0x6AB020F2L
, 0xF3B97148L
, 0x84BE41DEL
,
478 0x1ADAD47DL
, 0x6DDDE4EBL
, 0xF4D4B551L
, 0x83D385C7L
,
479 0x136C9856L
, 0x646BA8C0L
, 0xFD62F97AL
, 0x8A65C9ECL
,
480 0x14015C4FL
, 0x63066CD9L
, 0xFA0F3D63L
, 0x8D080DF5L
,
481 0x3B6E20C8L
, 0x4C69105EL
, 0xD56041E4L
, 0xA2677172L
,
482 0x3C03E4D1L
, 0x4B04D447L
, 0xD20D85FDL
, 0xA50AB56BL
,
483 0x35B5A8FAL
, 0x42B2986CL
, 0xDBBBC9D6L
, 0xACBCF940L
,
484 0x32D86CE3L
, 0x45DF5C75L
, 0xDCD60DCFL
, 0xABD13D59L
,
485 0x26D930ACL
, 0x51DE003AL
, 0xC8D75180L
, 0xBFD06116L
,
486 0x21B4F4B5L
, 0x56B3C423L
, 0xCFBA9599L
, 0xB8BDA50FL
,
487 0x2802B89EL
, 0x5F058808L
, 0xC60CD9B2L
, 0xB10BE924L
,
488 0x2F6F7C87L
, 0x58684C11L
, 0xC1611DABL
, 0xB6662D3DL
,
489 0x76DC4190L
, 0x01DB7106L
, 0x98D220BCL
, 0xEFD5102AL
,
490 0x71B18589L
, 0x06B6B51FL
, 0x9FBFE4A5L
, 0xE8B8D433L
,
491 0x7807C9A2L
, 0x0F00F934L
, 0x9609A88EL
, 0xE10E9818L
,
492 0x7F6A0DBBL
, 0x086D3D2DL
, 0x91646C97L
, 0xE6635C01L
,
493 0x6B6B51F4L
, 0x1C6C6162L
, 0x856530D8L
, 0xF262004EL
,
494 0x6C0695EDL
, 0x1B01A57BL
, 0x8208F4C1L
, 0xF50FC457L
,
495 0x65B0D9C6L
, 0x12B7E950L
, 0x8BBEB8EAL
, 0xFCB9887CL
,
496 0x62DD1DDFL
, 0x15DA2D49L
, 0x8CD37CF3L
, 0xFBD44C65L
,
497 0x4DB26158L
, 0x3AB551CEL
, 0xA3BC0074L
, 0xD4BB30E2L
,
498 0x4ADFA541L
, 0x3DD895D7L
, 0xA4D1C46DL
, 0xD3D6F4FBL
,
499 0x4369E96AL
, 0x346ED9FCL
, 0xAD678846L
, 0xDA60B8D0L
,
500 0x44042D73L
, 0x33031DE5L
, 0xAA0A4C5FL
, 0xDD0D7CC9L
,
501 0x5005713CL
, 0x270241AAL
, 0xBE0B1010L
, 0xC90C2086L
,
502 0x5768B525L
, 0x206F85B3L
, 0xB966D409L
, 0xCE61E49FL
,
503 0x5EDEF90EL
, 0x29D9C998L
, 0xB0D09822L
, 0xC7D7A8B4L
,
504 0x59B33D17L
, 0x2EB40D81L
, 0xB7BD5C3BL
, 0xC0BA6CADL
,
505 0xEDB88320L
, 0x9ABFB3B6L
, 0x03B6E20CL
, 0x74B1D29AL
,
506 0xEAD54739L
, 0x9DD277AFL
, 0x04DB2615L
, 0x73DC1683L
,
507 0xE3630B12L
, 0x94643B84L
, 0x0D6D6A3EL
, 0x7A6A5AA8L
,
508 0xE40ECF0BL
, 0x9309FF9DL
, 0x0A00AE27L
, 0x7D079EB1L
,
509 0xF00F9344L
, 0x8708A3D2L
, 0x1E01F268L
, 0x6906C2FEL
,
510 0xF762575DL
, 0x806567CBL
, 0x196C3671L
, 0x6E6B06E7L
,
511 0xFED41B76L
, 0x89D32BE0L
, 0x10DA7A5AL
, 0x67DD4ACCL
,
512 0xF9B9DF6FL
, 0x8EBEEFF9L
, 0x17B7BE43L
, 0x60B08ED5L
,
513 0xD6D6A3E8L
, 0xA1D1937EL
, 0x38D8C2C4L
, 0x4FDFF252L
,
514 0xD1BB67F1L
, 0xA6BC5767L
, 0x3FB506DDL
, 0x48B2364BL
,
515 0xD80D2BDAL
, 0xAF0A1B4CL
, 0x36034AF6L
, 0x41047A60L
,
516 0xDF60EFC3L
, 0xA867DF55L
, 0x316E8EEFL
, 0x4669BE79L
,
517 0xCB61B38CL
, 0xBC66831AL
, 0x256FD2A0L
, 0x5268E236L
,
518 0xCC0C7795L
, 0xBB0B4703L
, 0x220216B9L
, 0x5505262FL
,
519 0xC5BA3BBEL
, 0xB2BD0B28L
, 0x2BB45A92L
, 0x5CB36A04L
,
520 0xC2D7FFA7L
, 0xB5D0CF31L
, 0x2CD99E8BL
, 0x5BDEAE1DL
,
521 0x9B64C2B0L
, 0xEC63F226L
, 0x756AA39CL
, 0x026D930AL
,
522 0x9C0906A9L
, 0xEB0E363FL
, 0x72076785L
, 0x05005713L
,
523 0x95BF4A82L
, 0xE2B87A14L
, 0x7BB12BAEL
, 0x0CB61B38L
,
524 0x92D28E9BL
, 0xE5D5BE0DL
, 0x7CDCEFB7L
, 0x0BDBDF21L
,
525 0x86D3D2D4L
, 0xF1D4E242L
, 0x68DDB3F8L
, 0x1FDA836EL
,
526 0x81BE16CDL
, 0xF6B9265BL
, 0x6FB077E1L
, 0x18B74777L
,
527 0x88085AE6L
, 0xFF0F6A70L
, 0x66063BCAL
, 0x11010B5CL
,
528 0x8F659EFFL
, 0xF862AE69L
, 0x616BFFD3L
, 0x166CCF45L
,
529 0xA00AE278L
, 0xD70DD2EEL
, 0x4E048354L
, 0x3903B3C2L
,
530 0xA7672661L
, 0xD06016F7L
, 0x4969474DL
, 0x3E6E77DBL
,
531 0xAED16A4AL
, 0xD9D65ADCL
, 0x40DF0B66L
, 0x37D83BF0L
,
532 0xA9BCAE53L
, 0xDEBB9EC5L
, 0x47B2CF7FL
, 0x30B5FFE9L
,
533 0xBDBDF21CL
, 0xCABAC28AL
, 0x53B39330L
, 0x24B4A3A6L
,
534 0xBAD03605L
, 0xCDD70693L
, 0x54DE5729L
, 0x23D967BFL
,
535 0xB3667A2EL
, 0xC4614AB8L
, 0x5D681B02L
, 0x2A6F2B94L
,
536 0xB40BBE37L
, 0xC30C8EA1L
, 0x5A05DF1BL
, 0x2D02EF8DL
538 crcword
^= 0xFFFFFFFFL
;
540 unsigned long newbyte
= *data
++;
541 newbyte
^= crcword
& 0xFFL
;
542 crcword
= (crcword
>> 8) ^ crc32_table
[newbyte
];
544 return crcword
^ 0xFFFFFFFFL
;
548 short code
, extrabits
;
552 static const coderecord lencodes
[] = {
584 static const coderecord distcodes
[] = {
607 {22, 10, 2049, 3072},
608 {23, 10, 3073, 4096},
609 {24, 11, 4097, 6144},
610 {25, 11, 6145, 8192},
611 {26, 12, 8193, 12288},
612 {27, 12, 12289, 16384},
613 {28, 13, 16385, 24576},
614 {29, 13, 24577, 32768},
617 /* ----------------------------------------------------------------------
618 * Deflate compression.
621 #define SYMLIMIT 65536
622 #define SYMPFX_LITLEN 0x00000000U
623 #define SYMPFX_DIST 0x40000000U
624 #define SYMPFX_EXTRABITS 0x80000000U
625 #define SYMPFX_CODELEN 0xC0000000U
626 #define SYMPFX_MASK 0xC0000000U
628 #define SYM_EXTRABITS_MASK 0x3C000000U
629 #define SYM_EXTRABITS_SHIFT 26
632 unsigned char *len_litlen
;
634 unsigned char *len_dist
;
636 unsigned char *len_codelen
;
640 struct deflate_compress_ctx
{
641 struct LZ77Context
*lzc
;
642 unsigned char *outbuf
;
644 unsigned long outbits
;
650 unsigned long checksum
;
651 unsigned long datasize
;
654 unsigned char static_len1
[288], static_len2
[30];
655 int static_code1
[288], static_code2
[30];
658 unsigned long bitcount
;
662 static void outbits(deflate_compress_ctx
*out
,
663 unsigned long bits
, int nbits
)
665 assert(out
->noutbits
+ nbits
<= 32);
666 out
->outbits
|= bits
<< out
->noutbits
;
667 out
->noutbits
+= nbits
;
668 while (out
->noutbits
>= 8) {
669 if (out
->outlen
>= out
->outsize
) {
670 out
->outsize
= out
->outlen
+ 64;
671 out
->outbuf
= sresize(out
->outbuf
, out
->outsize
, unsigned char);
673 out
->outbuf
[out
->outlen
++] = (unsigned char) (out
->outbits
& 0xFF);
678 out
->bitcount
+= nbits
;
683 * Binary heap functions used by buildhuf(). Each one assumes the
684 * heap to be stored in an array of ints, with two ints per node
685 * (user data and key). They take in the old heap length, and
686 * return the new one.
688 #define HEAPPARENT(x) (((x)-2)/4*2)
689 #define HEAPLEFT(x) ((x)*2+2)
690 #define HEAPRIGHT(x) ((x)*2+4)
691 static int addheap(int *heap
, int len
, int userdata
, int key
)
696 heap
[len
++] = userdata
;
700 dad
= HEAPPARENT(me
);
701 if (heap
[me
+1] < heap
[dad
+1]) {
702 tmp
= heap
[me
]; heap
[me
] = heap
[dad
]; heap
[dad
] = tmp
;
703 tmp
= heap
[me
+1]; heap
[me
+1] = heap
[dad
+1]; heap
[dad
+1] = tmp
;
711 static int rmheap(int *heap
, int len
, int *userdata
, int *key
)
713 int me
, lc
, rc
, c
, tmp
;
719 heap
[1] = heap
[len
+1];
728 else if (rc
>= len
|| heap
[lc
+1] < heap
[rc
+1])
732 if (heap
[me
+1] > heap
[c
+1]) {
733 tmp
= heap
[me
]; heap
[me
] = heap
[c
]; heap
[c
] = tmp
;
734 tmp
= heap
[me
+1]; heap
[me
+1] = heap
[c
+1]; heap
[c
+1] = tmp
;
744 * The core of the Huffman algorithm: takes an input array of
745 * symbol frequencies, and produces an output array of code
748 * This is basically a generic Huffman implementation, but it has
749 * one zlib-related quirk which is that it caps the output code
750 * lengths to fit in an unsigned char (which is safe since Deflate
751 * will reject anything longer than 15 anyway). Anyone wanting to
752 * rip it out and use it in another context should find that easy
756 static void buildhuf(const int *freqs
, unsigned char *lengths
, int nsyms
)
758 int parent
[2*HUFMAX
-1];
759 int length
[2*HUFMAX
-1];
765 assert(nsyms
<= HUFMAX
);
767 memset(parent
, 0, sizeof(parent
));
770 * Begin by building the heap.
773 for (i
= 0; i
< nsyms
; i
++)
774 if (freqs
[i
] > 0) /* leave unused symbols out totally */
775 heapsize
= addheap(heap
, heapsize
, i
, freqs
[i
]);
778 * Now repeatedly take two elements off the heap and merge
782 while (heapsize
> 2) {
783 heapsize
= rmheap(heap
, heapsize
, &i
, &si
);
784 heapsize
= rmheap(heap
, heapsize
, &j
, &sj
);
787 heapsize
= addheap(heap
, heapsize
, n
, si
+ sj
);
792 * Now we have our tree, in the form of a link from each node
793 * to the index of its parent. Count back down the tree to
794 * determine the code lengths.
796 memset(length
, 0, sizeof(length
));
797 /* The tree root has length 0 after that, which is correct. */
800 length
[i
] = 1 + length
[parent
[i
]];
803 * And that's it. (Simple, wasn't it?) Copy the lengths into
804 * the output array and leave.
806 * Here we cap lengths to fit in unsigned char.
808 for (i
= 0; i
< nsyms
; i
++)
809 lengths
[i
] = (length
[i
] > 255 ?
255 : length
[i
]);
813 * Wrapper around buildhuf() which enforces the Deflate restriction
814 * that no code length may exceed 15 bits, or 7 for the auxiliary
815 * code length alphabet. This function has the same calling
816 * semantics as buildhuf(), except that it might modify the freqs
819 static void deflate_buildhuf(int *freqs
, unsigned char *lengths
,
820 int nsyms
, int limit
)
822 int smallestfreq
, totalfreq
, nactivesyms
;
823 int num
, denom
, adjust
;
828 * Nasty special case: if the frequency table has fewer than
829 * two non-zero elements, we must invent some, because we can't
830 * have fewer than one bit encoding a symbol.
835 for (i
= 0; i
< nsyms
; i
++)
839 for (i
= 0; i
< nsyms
&& count
> 0; i
++)
848 * First, try building the Huffman table the normal way. If
849 * this works, it's optimal, so we don't want to mess with it.
851 buildhuf(freqs
, lengths
, nsyms
);
853 for (i
= 0; i
< nsyms
; i
++)
854 if (lengths
[i
] > limit
)
861 * The Huffman algorithm can only ever generate a code length
862 * of N bits or more if there is a symbol whose probability is
863 * less than the reciprocal of the (N+2)th Fibonacci number
864 * (counting from F_0=0 and F_1=1), i.e. 1/2584 for N=16, or
865 * 1/55 for N=8. (This is a necessary though not sufficient
868 * Why is this? Well, consider the input symbol with the
869 * smallest probability. Let that probability be x. In order
870 * for this symbol to have a code length of at least 1, the
871 * Huffman algorithm will have to merge it with some other
872 * node; and since x is the smallest probability, the node it
873 * gets merged with must be at least x. Thus, the probability
874 * of the resulting combined node will be at least 2x. Now in
875 * order for our node to reach depth 2, this 2x-node must be
876 * merged again. But what with? We can't assume the node it
877 * merges with is at least 2x, because this one might only be
878 * the _second_ smallest remaining node. But we do know the
879 * node it merges with must be at least x, so our order-2
880 * internal node is at least 3x.
882 * How small a node can merge with _that_ to get an order-3
883 * internal node? Well, it must be at least 2x, because if it
884 * was smaller than that then it would have been one of the two
885 * smallest nodes in the previous step and been merged at that
886 * point. So at least 3x, plus at least 2x, comes to at least
887 * 5x for an order-3 node.
889 * And so it goes on: at every stage we must merge our current
890 * node with a node at least as big as the bigger of this one's
891 * two parents, and from this starting point that gives rise to
892 * the Fibonacci sequence. So we find that in order to have a
893 * node n levels deep (i.e. a maximum code length of n), the
894 * overall probability of the root of the entire tree must be
895 * at least F_{n+2} times the probability of the rarest symbol.
896 * In other words, since the overall probability is 1, it is a
897 * necessary condition for a code length of 16 or more that
898 * there must be at least one symbol with probability <=
901 * (To demonstrate that a probability this big really can give
902 * rise to a code length of 16, consider the set of input
903 * frequencies { 1-epsilon, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55,
904 * 89, 144, 233, 377, 610, 987 }, for arbitrarily small
907 * So here buildhuf() has returned us an overlong code. So to
908 * ensure it doesn't do it again, we add a constant to all the
909 * (non-zero) symbol frequencies, causing them to become more
910 * balanced and removing the danger. We can then feed the
911 * results back to the standard buildhuf() and be
912 * assert()-level confident that the resulting code lengths
913 * contain nothing outside the permitted range.
915 assert(limit
== 15 || limit
== 7);
916 maxprob
= (limit
== 15 ?
2584 : 55); /* no point in computing full F_n */
917 totalfreq
= nactivesyms
= 0;
919 for (i
= 0; i
< nsyms
; i
++) {
922 if (smallestfreq
< 0 || smallestfreq
> freqs
[i
])
923 smallestfreq
= freqs
[i
];
924 totalfreq
+= freqs
[i
];
927 assert(smallestfreq
<= totalfreq
/ maxprob
);
930 * We want to find the smallest integer `adjust' such that
931 * (totalfreq + nactivesyms * adjust) / (smallestfreq +
932 * adjust) is less than maxprob. A bit of algebra tells us
933 * that the threshold value is equal to
935 * totalfreq - maxprob * smallestfreq
936 * ----------------------------------
937 * maxprob - nactivesyms
939 * rounded up, of course. And we'll only even be trying
942 num
= totalfreq
- smallestfreq
* maxprob
;
943 denom
= maxprob
- nactivesyms
;
944 adjust
= (num
+ denom
- 1) / denom
;
947 * Now add `adjust' to all the input symbol frequencies.
949 for (i
= 0; i
< nsyms
; i
++)
954 * Rebuild the Huffman tree...
956 buildhuf(freqs
, lengths
, nsyms
);
959 * ... and this time it ought to be OK.
961 for (i
= 0; i
< nsyms
; i
++)
962 assert(lengths
[i
] <= limit
);
966 * Compute the bit length of a symbol, given the three Huffman
969 static int symsize(unsigned sym
, const struct huftrees
*trees
)
971 unsigned basesym
= sym
&~ SYMPFX_MASK
;
973 switch (sym
& SYMPFX_MASK
) {
975 return trees
->len_litlen
[basesym
];
977 return trees
->len_dist
[basesym
];
979 return trees
->len_codelen
[basesym
];
980 default /*case SYMPFX_EXTRABITS*/:
981 return basesym
>> SYM_EXTRABITS_SHIFT
;
986 * Write out a single symbol, given the three Huffman trees.
988 static void writesym(deflate_compress_ctx
*out
,
989 unsigned sym
, const struct huftrees
*trees
)
991 unsigned basesym
= sym
&~ SYMPFX_MASK
;
994 switch (sym
& SYMPFX_MASK
) {
996 debug(("send: litlen %d\n", basesym
));
997 outbits(out
, trees
->code_litlen
[basesym
], trees
->len_litlen
[basesym
]);
1000 debug(("send: dist %d\n", basesym
));
1001 outbits(out
, trees
->code_dist
[basesym
], trees
->len_dist
[basesym
]);
1003 case SYMPFX_CODELEN
:
1004 debug(("send: codelen %d\n", basesym
));
1005 outbits(out
, trees
->code_codelen
[basesym
],trees
->len_codelen
[basesym
]);
1007 case SYMPFX_EXTRABITS
:
1008 i
= basesym
>> SYM_EXTRABITS_SHIFT
;
1009 basesym
&= ~SYM_EXTRABITS_MASK
;
1010 debug(("send: extrabits %d/%d\n", basesym
, i
));
1011 outbits(out
, basesym
, i
);
1017 * outblock() must output _either_ a dynamic block of length
1018 * `dynamic_len', _or_ a static block of length `static_len', but
1019 * it gets to choose which.
1021 static void outblock(deflate_compress_ctx
*out
,
1022 int dynamic_len
, int static_len
)
1024 int freqs1
[286], freqs2
[30], freqs3
[19];
1025 unsigned char len1
[286], len2
[30], len3
[19];
1026 int code1
[286], code2
[30], code3
[19];
1027 int hlit
, hdist
, hclen
, bfinal
, btype
;
1028 int treesrc
[286 + 30];
1029 int treesyms
[286 + 30];
1031 int i
, ntreesrc
, ntreesyms
;
1032 int dynamic
, blklen
;
1033 struct huftrees dht
;
1034 const struct huftrees
*ht
;
1036 unsigned long bitcount_before
;
1039 dht
.len_litlen
= len1
;
1040 dht
.len_dist
= len2
;
1041 dht
.len_codelen
= len3
;
1042 dht
.code_litlen
= code1
;
1043 dht
.code_dist
= code2
;
1044 dht
.code_codelen
= code3
;
1047 * We make our choice of block to output by doing all the
1048 * detailed work to determine the exact length of each possible
1049 * block. Then we choose the one which has fewest output bits
1054 * First build the two main Huffman trees for the dynamic
1059 * Count up the frequency tables.
1061 memset(freqs1
, 0, sizeof(freqs1
));
1062 memset(freqs2
, 0, sizeof(freqs2
));
1063 freqs1
[256] = 1; /* we're bound to need one EOB */
1064 for (i
= 0; i
< dynamic_len
; i
++) {
1065 unsigned sym
= out
->syms
[(out
->symstart
+ i
) % SYMLIMIT
];
1068 * Increment the occurrence counter for this symbol, if
1069 * it's in one of the Huffman alphabets and isn't extra
1072 if ((sym
& SYMPFX_MASK
) == SYMPFX_LITLEN
) {
1073 sym
&= ~SYMPFX_MASK
;
1074 assert(sym
< lenof(freqs1
));
1076 } else if ((sym
& SYMPFX_MASK
) == SYMPFX_DIST
) {
1077 sym
&= ~SYMPFX_MASK
;
1078 assert(sym
< lenof(freqs2
));
1082 deflate_buildhuf(freqs1
, len1
, lenof(freqs1
), 15);
1083 deflate_buildhuf(freqs2
, len2
, lenof(freqs2
), 15);
1084 hufcodes(len1
, code1
, lenof(freqs1
));
1085 hufcodes(len2
, code2
, lenof(freqs2
));
1088 * Determine HLIT and HDIST.
1090 for (hlit
= 286; hlit
> 257 && len1
[hlit
-1] == 0; hlit
--);
1091 for (hdist
= 30; hdist
> 1 && len2
[hdist
-1] == 0; hdist
--);
1094 * Write out the list of symbols used to transmit the
1098 for (i
= 0; i
< hlit
; i
++)
1099 treesrc
[ntreesrc
++] = len1
[i
];
1100 for (i
= 0; i
< hdist
; i
++)
1101 treesrc
[ntreesrc
++] = len2
[i
];
1103 for (i
= 0; i
< ntreesrc
;) {
1107 /* Find length of run of the same length code. */
1108 while (i
+j
< ntreesrc
&& treesrc
[i
+j
] == treesrc
[i
])
1111 /* Encode that run as economically as we can. */
1113 if (treesrc
[i
] == 0) {
1115 * Zero code length: we can output run codes for
1116 * 3-138 zeroes. So if we have fewer than 3 zeroes,
1117 * we just output literals. Otherwise, we output
1118 * nothing but run codes, and tweak their lengths
1119 * to make sure we aren't left with under 3 at the
1124 treesyms
[ntreesyms
++] = 0 | SYMPFX_CODELEN
;
1127 int rpt
= (k
< 138 ? k
: 138);
1128 if (rpt
> k
-3 && rpt
< k
)
1130 assert(rpt
>= 3 && rpt
<= 138);
1132 treesyms
[ntreesyms
++] = 17 | SYMPFX_CODELEN
;
1133 treesyms
[ntreesyms
++] =
1134 (SYMPFX_EXTRABITS
| (rpt
- 3) |
1135 (3 << SYM_EXTRABITS_SHIFT
));
1137 treesyms
[ntreesyms
++] = 18 | SYMPFX_CODELEN
;
1138 treesyms
[ntreesyms
++] =
1139 (SYMPFX_EXTRABITS
| (rpt
- 11) |
1140 (7 << SYM_EXTRABITS_SHIFT
));
1147 * Non-zero code length: we must output the first
1148 * one explicitly, then we can output a copy code
1149 * for 3-6 repeats. So if we have fewer than 4
1150 * repeats, we _just_ output literals. Otherwise,
1151 * we output one literal plus at least one copy
1152 * code, and tweak the copy codes to make sure we
1153 * aren't left with under 3 at the end.
1155 assert(treesrc
[i
] < 16);
1156 treesyms
[ntreesyms
++] = treesrc
[i
] | SYMPFX_CODELEN
;
1160 treesyms
[ntreesyms
++] = treesrc
[i
] | SYMPFX_CODELEN
;
1163 int rpt
= (k
< 6 ? k
: 6);
1164 if (rpt
> k
-3 && rpt
< k
)
1166 assert(rpt
>= 3 && rpt
<= 6);
1167 treesyms
[ntreesyms
++] = 16 | SYMPFX_CODELEN
;
1168 treesyms
[ntreesyms
++] = (SYMPFX_EXTRABITS
| (rpt
- 3) |
1169 (2 << SYM_EXTRABITS_SHIFT
));
1177 assert((unsigned)ntreesyms
< lenof(treesyms
));
1180 * Count up the frequency table for the tree-transmission
1181 * symbols, and build the auxiliary Huffman tree for that.
1183 memset(freqs3
, 0, sizeof(freqs3
));
1184 for (i
= 0; i
< ntreesyms
; i
++) {
1185 unsigned sym
= treesyms
[i
];
1188 * Increment the occurrence counter for this symbol, if
1189 * it's the Huffman alphabet and isn't extra bits.
1191 if ((sym
& SYMPFX_MASK
) == SYMPFX_CODELEN
) {
1192 sym
&= ~SYMPFX_MASK
;
1193 assert(sym
< lenof(freqs3
));
1197 deflate_buildhuf(freqs3
, len3
, lenof(freqs3
), 7);
1198 hufcodes(len3
, code3
, lenof(freqs3
));
1201 * Reorder the code length codes into transmission order, and
1204 for (i
= 0; i
< 19; i
++)
1205 codelen
[i
] = len3
[lenlenmap
[i
]];
1206 for (hclen
= 19; hclen
> 4 && codelen
[hclen
-1] == 0; hclen
--);
1209 * Now work out the exact size of both the dynamic and the
1210 * static block, in bits.
1216 * First the dynamic block.
1218 dsize
= 3 + 5 + 5 + 4; /* 3-bit header, HLIT, HDIST, HCLEN */
1219 dsize
+= 3 * hclen
; /* code-length-alphabet code lengths */
1221 for (i
= 0; i
< ntreesyms
; i
++)
1222 dsize
+= symsize(treesyms
[i
], &dht
);
1223 /* The actual block data */
1224 for (i
= 0; i
< dynamic_len
; i
++) {
1225 unsigned sym
= out
->syms
[(out
->symstart
+ i
) % SYMLIMIT
];
1226 dsize
+= symsize(sym
, &dht
);
1228 /* And the end-of-data symbol. */
1229 dsize
+= symsize(SYMPFX_LITLEN
| 256, &dht
);
1232 * Now the static block.
1234 ssize
= 3; /* 3-bit block header */
1235 /* The actual block data */
1236 for (i
= 0; i
< static_len
; i
++) {
1237 unsigned sym
= out
->syms
[(out
->symstart
+ i
) % SYMLIMIT
];
1238 ssize
+= symsize(sym
, &out
->sht
);
1240 /* And the end-of-data symbol. */
1241 ssize
+= symsize(SYMPFX_LITLEN
| 256, &out
->sht
);
1244 * Compare the two and decide which to output. We break
1245 * exact ties in favour of the static block, because of the
1246 * special case in which that block has zero length.
1248 dynamic
= ((double)ssize
* dynamic_len
> (double)dsize
* static_len
);
1249 ht
= dynamic ?
&dht
: &out
->sht
;
1250 blklen
= dynamic ? dynamic_len
: static_len
;
1254 * Actually transmit the block.
1257 /* 3-bit block header */
1258 bfinal
= (out
->lastblock ?
1 : 0);
1259 btype
= dynamic ?
2 : 1;
1260 debug(("send: bfinal=%d btype=%d\n", bfinal
, btype
));
1261 outbits(out
, bfinal
, 1);
1262 outbits(out
, btype
, 2);
1265 bitcount_before
= out
->bitcount
;
1269 /* HLIT, HDIST and HCLEN */
1270 debug(("send: hlit=%d hdist=%d hclen=%d\n", hlit
, hdist
, hclen
));
1271 outbits(out
, hlit
- 257, 5);
1272 outbits(out
, hdist
- 1, 5);
1273 outbits(out
, hclen
- 4, 4);
1275 /* Code lengths for the auxiliary tree */
1276 for (i
= 0; i
< hclen
; i
++) {
1277 debug(("send: lenlen %d\n", codelen
[i
]));
1278 outbits(out
, codelen
[i
], 3);
1281 /* Code lengths for the literal/length and distance trees */
1282 for (i
= 0; i
< ntreesyms
; i
++)
1283 writesym(out
, treesyms
[i
], ht
);
1285 fprintf(stderr
, "total tree size %lu bits\n",
1286 out
->bitcount
- bitcount_before
);
1290 /* Output the actual symbols from the buffer */
1291 for (i
= 0; i
< blklen
; i
++) {
1292 unsigned sym
= out
->syms
[(out
->symstart
+ i
) % SYMLIMIT
];
1293 writesym(out
, sym
, ht
);
1296 /* Output the end-of-data symbol */
1297 writesym(out
, SYMPFX_LITLEN
| 256, ht
);
1300 * Remove all the just-output symbols from the symbol buffer by
1301 * adjusting symstart and nsyms.
1303 out
->symstart
= (out
->symstart
+ blklen
) % SYMLIMIT
;
1304 out
->nsyms
-= blklen
;
1308 * Give the approximate log-base-2 of an input integer, measured in
1309 * 8ths of a bit. (I.e. this computes an integer approximation to
1312 static int approxlog2(unsigned x
)
1317 * Binary-search to get the top bit of x up to bit 31.
1319 if (x
< 0x00010000U
) x
<<= 16, ret
-= 16*8;
1320 if (x
< 0x01000000U
) x
<<= 8, ret
-= 8*8;
1321 if (x
< 0x10000000U
) x
<<= 4, ret
-= 4*8;
1322 if (x
< 0x40000000U
) x
<<= 2, ret
-= 2*8;
1323 if (x
< 0x80000000U
) x
<<= 1, ret
-= 1*8;
1326 * Now we know the logarithm we want is in [ret,ret+1).
1327 * Determine the bottom three bits by checking against
1330 * (Each of these threshold values is 0x80000000 times an odd
1331 * power of 2^(1/16). Therefore, this function rounds to
1334 if (x
<= 0xAD583EEAU
) {
1335 if (x
<= 0x91C3D373U
)
1336 ret
+= (x
<= 0x85AAC367U ?
0 : 1);
1338 ret
+= (x
<= 0x9EF53260U ?
2 : 3);
1340 if (x
<= 0xCE248C15U
)
1341 ret
+= (x
<= 0xBD08A39FU ?
4 : 5);
1343 ret
+= (x
<= 0xE0CCDEECU ?
6 : x
<= 0xF5257D15L ?
7 : 8);
1349 static void chooseblock(deflate_compress_ctx
*out
)
1351 int freqs1
[286], freqs2
[30];
1352 int i
, len
, bestlen
, longestlen
= 0;
1356 memset(freqs1
, 0, sizeof(freqs1
));
1357 memset(freqs2
, 0, sizeof(freqs2
));
1358 freqs1
[256] = 1; /* we're bound to need one EOB */
1363 * Iterate over all possible block lengths, computing the
1364 * entropic coding approximation to the final length at every
1365 * stage. We divide the result by the number of symbols
1366 * encoded, to determine the `value for money' (overall
1367 * bits-per-symbol count) of a block of that length.
1372 len
= 300 * 8; /* very approximate size of the Huffman trees */
1374 for (i
= 0; i
< out
->nsyms
; i
++) {
1375 unsigned sym
= out
->syms
[(out
->symstart
+ i
) % SYMLIMIT
];
1377 if (i
> 0 && (sym
& SYMPFX_MASK
) == SYMPFX_LITLEN
) {
1379 * This is a viable point at which to end the block.
1380 * Compute the value for money.
1382 int vfm
= i
* 32768 / len
; /* symbols encoded per bit */
1384 if (bestlen
< 0 || vfm
> bestvfm
) {
1393 * Increment the occurrence counter for this symbol, if
1394 * it's in one of the Huffman alphabets and isn't extra
1397 if ((sym
& SYMPFX_MASK
) == SYMPFX_LITLEN
) {
1398 sym
&= ~SYMPFX_MASK
;
1399 assert(sym
< lenof(freqs1
));
1400 len
+= freqs1
[sym
] * approxlog2(freqs1
[sym
]);
1401 len
-= total1
* approxlog2(total1
);
1404 len
-= freqs1
[sym
] * approxlog2(freqs1
[sym
]);
1405 len
+= total1
* approxlog2(total1
);
1406 } else if ((sym
& SYMPFX_MASK
) == SYMPFX_DIST
) {
1407 sym
&= ~SYMPFX_MASK
;
1408 assert(sym
< lenof(freqs2
));
1409 len
+= freqs2
[sym
] * approxlog2(freqs2
[sym
]);
1410 len
-= total2
* approxlog2(total2
);
1413 len
-= freqs2
[sym
] * approxlog2(freqs2
[sym
]);
1414 len
+= total2
* approxlog2(total2
);
1415 } else if ((sym
& SYMPFX_MASK
) == SYMPFX_EXTRABITS
) {
1416 len
+= 8 * ((sym
&~ SYMPFX_MASK
) >> SYM_EXTRABITS_SHIFT
);
1420 assert(bestlen
> 0);
1422 outblock(out
, bestlen
, longestlen
);
1426 * Force the current symbol buffer to be flushed out as a single
1429 static void flushblock(deflate_compress_ctx
*out
)
1432 * No need to check that out->nsyms is a valid block length: we
1433 * know it has to be, because flushblock() is called in between
1434 * two matches/literals.
1436 outblock(out
, out
->nsyms
, out
->nsyms
);
1437 assert(out
->nsyms
== 0);
1441 * Place a symbol into the symbols buffer.
1443 static void outsym(deflate_compress_ctx
*out
, unsigned long sym
)
1445 assert(out
->nsyms
< SYMLIMIT
);
1446 out
->syms
[(out
->symstart
+ out
->nsyms
++) % SYMLIMIT
] = sym
;
1448 if (out
->nsyms
== SYMLIMIT
)
1452 static void literal(struct LZ77Context
*ectx
, unsigned char c
)
1454 deflate_compress_ctx
*out
= (deflate_compress_ctx
*) ectx
->userdata
;
1456 outsym(out
, SYMPFX_LITLEN
| c
);
1459 static void match(struct LZ77Context
*ectx
, int distance
, int len
)
1461 const coderecord
*d
, *l
;
1463 deflate_compress_ctx
*out
= (deflate_compress_ctx
*) ectx
->userdata
;
1469 * We can transmit matches of lengths 3 through 258
1470 * inclusive. So if len exceeds 258, we must transmit in
1471 * several steps, with 258 or less in each step.
1473 * Specifically: if len >= 261, we can transmit 258 and be
1474 * sure of having at least 3 left for the next step. And if
1475 * len <= 258, we can just transmit len. But if len == 259
1476 * or 260, we must transmit len-3.
1478 thislen
= (len
> 260 ?
258 : len
<= 258 ? len
: len
- 3);
1482 * Binary-search to find which length code we're
1486 j
= sizeof(lencodes
) / sizeof(*lencodes
);
1490 if (thislen
< lencodes
[k
].min
)
1492 else if (thislen
> lencodes
[k
].max
)
1496 break; /* found it! */
1501 * Transmit the length code.
1503 outsym(out
, SYMPFX_LITLEN
| l
->code
);
1506 * Transmit the extra bits.
1509 outsym(out
, (SYMPFX_EXTRABITS
| (thislen
- l
->min
) |
1510 (l
->extrabits
<< SYM_EXTRABITS_SHIFT
)));
1514 * Binary-search to find which distance code we're
1518 j
= sizeof(distcodes
) / sizeof(*distcodes
);
1522 if (distance
< distcodes
[k
].min
)
1524 else if (distance
> distcodes
[k
].max
)
1528 break; /* found it! */
1533 * Write the distance code.
1535 outsym(out
, SYMPFX_DIST
| d
->code
);
1538 * Transmit the extra bits.
1541 outsym(out
, (SYMPFX_EXTRABITS
| (distance
- d
->min
) |
1542 (d
->extrabits
<< SYM_EXTRABITS_SHIFT
)));
1547 deflate_compress_ctx
*deflate_compress_new(int type
)
1549 deflate_compress_ctx
*out
;
1550 struct LZ77Context
*ectx
= snew(struct LZ77Context
);
1553 ectx
->literal
= literal
;
1554 ectx
->match
= match
;
1556 out
= snew(deflate_compress_ctx
);
1558 out
->outbits
= out
->noutbits
= 0;
1559 out
->firstblock
= TRUE
;
1564 out
->syms
= snewn(SYMLIMIT
, unsigned long);
1565 out
->symstart
= out
->nsyms
= 0;
1567 out
->checksum
= (type
== DEFLATE_TYPE_ZLIB ?
1 : 0);
1569 out
->lastblock
= FALSE
;
1570 out
->finished
= FALSE
;
1573 * Build the static Huffman tables now, so we'll have them
1574 * available every time outblock() is called.
1579 for (i
= 0; i
< (int)lenof(out
->static_len1
); i
++)
1580 out
->static_len1
[i
] = (i
< 144 ?
8 :
1583 for (i
= 0; i
< (int)lenof(out
->static_len2
); i
++)
1584 out
->static_len2
[i
] = 5;
1586 hufcodes(out
->static_len1
, out
->static_code1
, lenof(out
->static_code1
));
1587 hufcodes(out
->static_len2
, out
->static_code2
, lenof(out
->static_code2
));
1588 out
->sht
.len_litlen
= out
->static_len1
;
1589 out
->sht
.len_dist
= out
->static_len2
;
1590 out
->sht
.len_codelen
= NULL
;
1591 out
->sht
.code_litlen
= out
->static_code1
;
1592 out
->sht
.code_dist
= out
->static_code2
;
1593 out
->sht
.code_codelen
= NULL
;
1595 ectx
->userdata
= out
;
1601 void deflate_compress_free(deflate_compress_ctx
*out
)
1603 struct LZ77Context
*ectx
= out
->lzc
;
1611 void deflate_compress_data(deflate_compress_ctx
*out
,
1612 const void *vblock
, int len
, int flushtype
,
1613 void **outblock
, int *outlen
)
1615 struct LZ77Context
*ectx
= out
->lzc
;
1616 const unsigned char *block
= (const unsigned char *)vblock
;
1618 assert(!out
->finished
);
1621 out
->outlen
= out
->outsize
= 0;
1624 * If this is the first block, output the header.
1626 if (out
->firstblock
) {
1627 switch (out
->type
) {
1628 case DEFLATE_TYPE_BARE
:
1629 break; /* no header */
1630 case DEFLATE_TYPE_ZLIB
:
1632 * zlib (RFC1950) header bytes: 78 9C. (Deflate
1633 * compression, 32K window size, default algorithm.)
1635 outbits(out
, 0x9C78, 16);
1637 case DEFLATE_TYPE_GZIP
:
1639 * Minimal gzip (RFC1952) header:
1641 * - basic header of 1F 8B
1642 * - compression method byte (8 = deflate)
1643 * - flags byte (zero: we use no optional features)
1644 * - modification time (zero: no time stamp available)
1645 * - extra flags byte (2: we use maximum compression
1647 * - operating system byte (255: we do not specify)
1649 outbits(out
, 0x00088B1F, 32); /* header, CM, flags */
1650 outbits(out
, 0, 32); /* mtime */
1651 outbits(out
, 0xFF02, 16); /* xflags, OS */
1654 out
->firstblock
= FALSE
;
1658 * Feed our data to the LZ77 compression phase.
1660 lz77_compress(ectx
, block
, len
, TRUE
);
1663 * Update checksums and counters.
1665 switch (out
->type
) {
1666 case DEFLATE_TYPE_ZLIB
:
1667 out
->checksum
= adler32_update(out
->checksum
, block
, len
);
1669 case DEFLATE_TYPE_GZIP
:
1670 out
->checksum
= crc32_update(out
->checksum
, block
, len
);
1673 out
->datasize
+= len
;
1675 switch (flushtype
) {
1677 * FIXME: what other flush types are available and useful?
1678 * In PuTTY, it was clear that we generally wanted to be in
1679 * a static block so it was safe to open one. Here, we
1680 * probably prefer to be _outside_ a block if we can. Think
1683 case DEFLATE_NO_FLUSH
:
1684 break; /* don't flush any data at all (duh) */
1685 case DEFLATE_SYNC_FLUSH
:
1687 * Close the current block.
1692 * Then output an empty _uncompressed_ block: send 000,
1693 * then sync to byte boundary, then send bytes 00 00 FF
1698 outbits(out
, 0, 8 - out
->noutbits
);
1699 outbits(out
, 0, 16);
1700 outbits(out
, 0xFFFF, 16);
1702 case DEFLATE_END_OF_DATA
:
1704 * Output a block with BFINAL set.
1706 out
->lastblock
= TRUE
;
1710 * Sync to byte boundary, flushing out the final byte.
1713 outbits(out
, 0, 8 - out
->noutbits
);
1716 * Format-specific trailer data.
1718 switch (out
->type
) {
1719 case DEFLATE_TYPE_ZLIB
:
1721 * Just write out the Adler32 checksum.
1723 outbits(out
, (out
->checksum
>> 24) & 0xFF, 8);
1724 outbits(out
, (out
->checksum
>> 16) & 0xFF, 8);
1725 outbits(out
, (out
->checksum
>> 8) & 0xFF, 8);
1726 outbits(out
, (out
->checksum
>> 0) & 0xFF, 8);
1728 case DEFLATE_TYPE_GZIP
:
1730 * Write out the CRC32 checksum and the data length.
1732 outbits(out
, out
->checksum
, 32);
1733 outbits(out
, out
->datasize
, 32);
1737 out
->finished
= TRUE
;
1742 * Return any data that we've generated.
1744 *outblock
= (void *)out
->outbuf
;
1745 *outlen
= out
->outlen
;
1748 /* ----------------------------------------------------------------------
1749 * Deflate decompression.
1753 * The way we work the Huffman decode is to have a table lookup on
1754 * the first N bits of the input stream (in the order they arrive,
1755 * of course, i.e. the first bit of the Huffman code is in bit 0).
1756 * Each table entry lists the number of bits to consume, plus
1757 * either an output code or a pointer to a secondary table.
1763 unsigned char nbits
;
1765 struct table
*nexttable
;
1769 int mask
; /* mask applied to input bit stream */
1770 struct tableentry
*table
;
1775 #define DWINSIZE 32768
1778 * Build a single-level decode table for elements
1779 * [minlength,maxlength) of the provided code/length tables, and
1780 * recurse to build subtables.
1782 static struct table
*mkonetab(int *codes
, unsigned char *lengths
, int nsyms
,
1783 int pfx
, int pfxbits
, int bits
)
1785 struct table
*tab
= snew(struct table
);
1786 int pfxmask
= (1 << pfxbits
) - 1;
1787 int nbits
, i
, j
, code
;
1789 tab
->table
= snewn(1 << bits
, struct tableentry
);
1790 tab
->mask
= (1 << bits
) - 1;
1792 for (code
= 0; code
<= tab
->mask
; code
++) {
1793 tab
->table
[code
].code
= -1;
1794 tab
->table
[code
].nbits
= 0;
1795 tab
->table
[code
].nexttable
= NULL
;
1798 for (i
= 0; i
< nsyms
; i
++) {
1799 if (lengths
[i
] <= pfxbits
|| (codes
[i
] & pfxmask
) != pfx
)
1801 code
= (codes
[i
] >> pfxbits
) & tab
->mask
;
1802 for (j
= code
; j
<= tab
->mask
; j
+= 1 << (lengths
[i
] - pfxbits
)) {
1803 tab
->table
[j
].code
= i
;
1804 nbits
= lengths
[i
] - pfxbits
;
1805 if (tab
->table
[j
].nbits
< nbits
)
1806 tab
->table
[j
].nbits
= nbits
;
1809 for (code
= 0; code
<= tab
->mask
; code
++) {
1810 if (tab
->table
[code
].nbits
<= bits
)
1812 /* Generate a subtable. */
1813 tab
->table
[code
].code
= -1;
1814 nbits
= tab
->table
[code
].nbits
- bits
;
1817 tab
->table
[code
].nbits
= bits
;
1818 tab
->table
[code
].nexttable
= mkonetab(codes
, lengths
, nsyms
,
1819 pfx
| (code
<< pfxbits
),
1820 pfxbits
+ bits
, nbits
);
1827 * Build a decode table, given a set of Huffman tree lengths.
1829 static struct table
*mktable(unsigned char *lengths
, int nlengths
,
1831 const char *alphabet
,
1839 if (alphabet
&& analyse_level
> 1) {
1841 printf("code lengths for %s alphabet:\n", alphabet
);
1842 for (i
= 0; i
< nlengths
; i
++) {
1843 col
+= printf("%3d", lengths
[i
]);
1854 maxlen
= hufcodes(lengths
, codes
, nlengths
);
1857 *error
= (maxlen
== -1 ? DEFLATE_ERR_LARGE_HUFTABLE
:
1858 DEFLATE_ERR_SMALL_HUFTABLE
);
1863 * Now we have the complete list of Huffman codes. Build a
1866 return mkonetab(codes
, lengths
, nlengths
, 0, 0, maxlen
< 9 ? maxlen
: 9);
1869 static int freetable(struct table
**ztab
)
1882 for (code
= 0; code
<= tab
->mask
; code
++)
1883 if (tab
->table
[code
].nexttable
!= NULL
)
1884 freetable(&tab
->table
[code
].nexttable
);
1895 struct deflate_decompress_ctx
{
1896 struct table
*staticlentable
, *staticdisttable
;
1897 struct table
*currlentable
, *currdisttable
, *lenlentable
;
1900 GZIPSTART
, GZIPMETHFLAGS
, GZIPIGNORE1
, GZIPIGNORE2
, GZIPIGNORE3
,
1901 GZIPEXTRA
, GZIPFNAME
, GZIPCOMMENT
,
1902 OUTSIDEBLK
, TREES_HDR
, TREES_LENLEN
, TREES_LEN
, TREES_LENREP
,
1903 INBLK
, GOTLENSYM
, GOTLEN
, GOTDISTSYM
,
1904 UNCOMP_LEN
, UNCOMP_NLEN
, UNCOMP_DATA
,
1907 CRC1
, CRC2
, ILEN1
, ILEN2
,
1910 int sym
, hlit
, hdist
, hclen
, lenptr
, lenextrabits
, lenaddon
, len
,
1913 unsigned char lenlen
[19];
1914 unsigned char lengths
[286 + 32];
1917 unsigned char window
[DWINSIZE
];
1919 unsigned char *outblk
;
1920 int outlen
, outsize
;
1922 unsigned long checksum
;
1923 unsigned long bytesout
;
1924 int gzflags
, gzextralen
;
1927 int bitcount_before
;
1928 #define BITCOUNT(dctx) ( (dctx)->bytesread * 8 - (dctx)->nbits )
1932 deflate_decompress_ctx
*deflate_decompress_new(int type
)
1934 deflate_decompress_ctx
*dctx
= snew(deflate_decompress_ctx
);
1935 unsigned char lengths
[288];
1937 memset(lengths
, 8, 144);
1938 memset(lengths
+ 144, 9, 256 - 144);
1939 memset(lengths
+ 256, 7, 280 - 256);
1940 memset(lengths
+ 280, 8, 288 - 280);
1941 dctx
->staticlentable
= mktable(lengths
, 288,
1946 assert(dctx
->staticlentable
);
1947 memset(lengths
, 5, 32);
1948 dctx
->staticdisttable
= mktable(lengths
, 32,
1953 assert(dctx
->staticdisttable
);
1954 dctx
->state
= (type
== DEFLATE_TYPE_ZLIB ? ZLIBSTART
:
1955 type
== DEFLATE_TYPE_GZIP ? GZIPSTART
:
1957 dctx
->currlentable
= dctx
->currdisttable
= dctx
->lenlentable
= NULL
;
1962 dctx
->lastblock
= FALSE
;
1963 dctx
->checksum
= (type
== DEFLATE_TYPE_ZLIB ?
1 : 0);
1965 dctx
->gzflags
= dctx
->gzextralen
= 0;
1967 dctx
->bytesread
= dctx
->bitcount_before
= 0;
1973 void deflate_decompress_free(deflate_decompress_ctx
*dctx
)
1975 if (dctx
->currlentable
&& dctx
->currlentable
!= dctx
->staticlentable
)
1976 freetable(&dctx
->currlentable
);
1977 if (dctx
->currdisttable
&& dctx
->currdisttable
!= dctx
->staticdisttable
)
1978 freetable(&dctx
->currdisttable
);
1979 if (dctx
->lenlentable
)
1980 freetable(&dctx
->lenlentable
);
1981 freetable(&dctx
->staticlentable
);
1982 freetable(&dctx
->staticdisttable
);
1986 static int huflookup(unsigned long *bitsp
, int *nbitsp
, struct table
*tab
)
1988 unsigned long bits
= *bitsp
;
1989 int nbits
= *nbitsp
;
1991 struct tableentry
*ent
;
1992 ent
= &tab
->table
[bits
& tab
->mask
];
1993 if (ent
->nbits
> nbits
)
1994 return -1; /* not enough data */
1995 bits
>>= ent
->nbits
;
1996 nbits
-= ent
->nbits
;
1997 if (ent
->code
== -1)
1998 tab
= ent
->nexttable
;
2006 * If we reach here with `tab' null, it can only be because
2007 * there was a missing entry in the Huffman table. This
2008 * should never occur even with invalid input data, because
2009 * we enforce at mktable time that the Huffman codes should
2010 * precisely cover the code space; so we can enforce this
2017 static void emit_char(deflate_decompress_ctx
*dctx
, int c
)
2019 dctx
->window
[dctx
->winpos
] = c
;
2020 dctx
->winpos
= (dctx
->winpos
+ 1) & (DWINSIZE
- 1);
2021 if (dctx
->outlen
>= dctx
->outsize
) {
2022 dctx
->outsize
= dctx
->outlen
* 3 / 2 + 512;
2023 dctx
->outblk
= sresize(dctx
->outblk
, dctx
->outsize
, unsigned char);
2025 if (dctx
->type
== DEFLATE_TYPE_ZLIB
) {
2026 unsigned char uc
= c
;
2027 dctx
->checksum
= adler32_update(dctx
->checksum
, &uc
, 1);
2028 } else if (dctx
->type
== DEFLATE_TYPE_GZIP
) {
2029 unsigned char uc
= c
;
2030 dctx
->checksum
= crc32_update(dctx
->checksum
, &uc
, 1);
2032 dctx
->outblk
[dctx
->outlen
++] = c
;
2036 #define EATBITS(n) ( dctx->nbits -= (n), dctx->bits >>= (n) )
2038 int deflate_decompress_data(deflate_decompress_ctx
*dctx
,
2039 const void *vblock
, int len
,
2040 void **outblock
, int *outlen
)
2042 const coderecord
*rec
;
2043 const unsigned char *block
= (const unsigned char *)vblock
;
2044 int code
, bfinal
, btype
, rep
, dist
, nlen
, header
;
2045 unsigned long cksum
;
2051 if (dctx
->state
!= FINALSPIN
)
2052 return DEFLATE_ERR_UNEXPECTED_EOF
;
2057 dctx
->outblk
= NULL
;
2061 while (len
> 0 || dctx
->nbits
> 0) {
2062 while (dctx
->nbits
< 24 && len
> 0) {
2063 dctx
->bits
|= (*block
++) << dctx
->nbits
;
2070 switch (dctx
->state
) {
2072 /* Expect 16-bit zlib header. */
2073 if (dctx
->nbits
< 16)
2074 goto finished
; /* done all we can */
2077 * The header is stored as a big-endian 16-bit integer,
2078 * in contrast to the general little-endian policy in
2079 * the rest of the format :-(
2081 header
= (((dctx
->bits
& 0xFF00) >> 8) |
2082 ((dctx
->bits
& 0x00FF) << 8));
2088 * - bits 8-11 should be 1000 (Deflate/RFC1951)
2089 * - bits 12-15 should be at most 0111 (window size)
2090 * - bit 5 should be zero (no dictionary present)
2091 * - we don't care about bits 6-7 (compression rate)
2092 * - bits 0-4 should be set up to make the whole thing
2093 * a multiple of 31 (checksum).
2095 if ((header
& 0xF000) > 0x7000 ||
2096 (header
& 0x0020) != 0x0000 ||
2097 (header
% 31) != 0) {
2098 error
= DEFLATE_ERR_ZLIB_HEADER
;
2101 if ((header
& 0x0F00) != 0x0800) {
2102 error
= DEFLATE_ERR_ZLIB_WRONGCOMP
;
2105 dctx
->state
= OUTSIDEBLK
;
2108 /* Expect 16-bit gzip header. */
2109 if (dctx
->nbits
< 16)
2111 header
= dctx
->bits
& 0xFFFF;
2113 if (header
!= 0x8B1F) {
2114 error
= DEFLATE_ERR_GZIP_HEADER
;
2117 dctx
->state
= GZIPMETHFLAGS
;
2120 /* Expect gzip compression method and flags bytes. */
2121 if (dctx
->nbits
< 16)
2123 header
= dctx
->bits
& 0xFF;
2126 error
= DEFLATE_ERR_GZIP_WRONGCOMP
;
2129 dctx
->gzflags
= dctx
->bits
& 0xFF;
2130 if (dctx
->gzflags
& 2) {
2132 * The FHCRC flag is slightly confusing. RFC1952
2133 * documents it as indicating the presence of a
2134 * two-byte CRC16 of the gzip header, occurring
2135 * just before the beginning of the Deflate stream.
2136 * However, gzip itself (as of 1.3.5) appears to
2137 * believe it indicates that the current gzip
2138 * `member' is not the final one, i.e. that the
2139 * stream is composed of multiple gzip members
2140 * concatenated together, and furthermore gzip will
2141 * refuse to decode any file that has it set.
2143 * For this reason, I label it as `disputed' and
2144 * also refuse to decode anything that has it set.
2145 * I don't expect this to be a problem in practice.
2147 error
= DEFLATE_ERR_GZIP_FHCRC
;
2151 dctx
->state
= GZIPIGNORE1
;
2156 /* Expect two bytes of gzip timestamp/XFL/OS, which we ignore. */
2157 if (dctx
->nbits
< 16)
2160 if (dctx
->state
== GZIPIGNORE3
) {
2161 dctx
->state
= GZIPEXTRA
;
2163 dctx
->state
++; /* maps IGNORE1 -> IGNORE2 -> IGNORE3 */
2166 if (dctx
->gzflags
& 4) {
2167 /* Expect two bytes of extra-length count, then that many
2168 * extra bytes of header data, all of which we ignore. */
2169 if (!dctx
->gzextralen
) {
2170 if (dctx
->nbits
< 16)
2172 dctx
->gzextralen
= dctx
->bits
& 0xFFFF;
2175 } else if (dctx
->gzextralen
> 0) {
2176 if (dctx
->nbits
< 8)
2179 if (--dctx
->gzextralen
> 0)
2183 dctx
->state
= GZIPFNAME
;
2186 if (dctx
->gzflags
& 8) {
2188 * Expect a NUL-terminated filename.
2190 if (dctx
->nbits
< 8)
2192 code
= dctx
->bits
& 0xFF;
2197 dctx
->state
= GZIPCOMMENT
;
2200 if (dctx
->gzflags
& 16) {
2202 * Expect a NUL-terminated filename.
2204 if (dctx
->nbits
< 8)
2206 code
= dctx
->bits
& 0xFF;
2211 dctx
->state
= OUTSIDEBLK
;
2214 /* Expect 3-bit block header. */
2215 if (dctx
->nbits
< 3)
2216 goto finished
; /* done all we can */
2217 bfinal
= dctx
->bits
& 1;
2219 dctx
->lastblock
= TRUE
;
2221 btype
= dctx
->bits
& 3;
2224 int to_eat
= dctx
->nbits
& 7;
2225 dctx
->state
= UNCOMP_LEN
;
2226 EATBITS(to_eat
); /* align to byte boundary */
2227 } else if (btype
== 1) {
2228 dctx
->currlentable
= dctx
->staticlentable
;
2229 dctx
->currdisttable
= dctx
->staticdisttable
;
2230 dctx
->state
= INBLK
;
2231 } else if (btype
== 2) {
2232 dctx
->state
= TREES_HDR
;
2234 debug(("recv: bfinal=%d btype=%d\n", bfinal
, btype
));
2236 if (analyse_level
> 1) {
2237 static const char *const btypes
[] = {
2238 "uncompressed", "static", "dynamic", "type 3 (unknown)"
2240 printf("new block, %sfinal, %s\n",
2241 bfinal ?
"" : "not ",
2248 * Dynamic block header. Five bits of HLIT, five of
2249 * HDIST, four of HCLEN.
2251 if (dctx
->nbits
< 5 + 5 + 4)
2252 goto finished
; /* done all we can */
2253 dctx
->hlit
= 257 + (dctx
->bits
& 31);
2255 dctx
->hdist
= 1 + (dctx
->bits
& 31);
2257 dctx
->hclen
= 4 + (dctx
->bits
& 15);
2259 debug(("recv: hlit=%d hdist=%d hclen=%d\n", dctx
->hlit
,
2260 dctx
->hdist
, dctx
->hclen
));
2262 if (analyse_level
> 1)
2263 printf("hlit=%d, hdist=%d, hclen=%d\n",
2264 dctx
->hlit
, dctx
->hdist
, dctx
->hclen
);
2267 dctx
->state
= TREES_LENLEN
;
2268 memset(dctx
->lenlen
, 0, sizeof(dctx
->lenlen
));
2271 if (dctx
->nbits
< 3)
2273 while (dctx
->lenptr
< dctx
->hclen
&& dctx
->nbits
>= 3) {
2274 dctx
->lenlen
[lenlenmap
[dctx
->lenptr
++]] =
2275 (unsigned char) (dctx
->bits
& 7);
2276 debug(("recv: lenlen %d\n", (unsigned char) (dctx
->bits
& 7)));
2279 if (dctx
->lenptr
== dctx
->hclen
) {
2280 dctx
->lenlentable
= mktable(dctx
->lenlen
, 19,
2285 if (!dctx
->lenlentable
)
2286 goto finished
; /* error code set up by mktable */
2287 dctx
->state
= TREES_LEN
;
2292 if (dctx
->lenptr
>= dctx
->hlit
+ dctx
->hdist
) {
2293 dctx
->currlentable
= mktable(dctx
->lengths
, dctx
->hlit
,
2298 if (!dctx
->currlentable
)
2299 goto finished
; /* error code set up by mktable */
2300 if (dctx
->hdist
== 1 && dctx
->lengths
[dctx
->hlit
] == 0) {
2302 * Special case: if the code length list for the
2303 * backward-distance table contains a single zero
2304 * entry, it means this block will never encode a
2305 * backward distance at all (i.e. it's all
2308 dctx
->currdisttable
= NULL
;
2310 dctx
->currdisttable
= mktable(dctx
->lengths
+ dctx
->hlit
,
2316 if (!dctx
->currdisttable
)
2317 goto finished
; /* error code set up by mktable */
2319 freetable(&dctx
->lenlentable
);
2320 dctx
->lenlentable
= NULL
;
2321 dctx
->state
= INBLK
;
2324 code
= huflookup(&dctx
->bits
, &dctx
->nbits
, dctx
->lenlentable
);
2325 debug(("recv: codelen %d\n", code
));
2330 if (analyse_level
> 1)
2331 printf("code-length %d\n", code
);
2333 dctx
->lengths
[dctx
->lenptr
++] = code
;
2335 dctx
->lenextrabits
= (code
== 16 ?
2 : code
== 17 ?
3 : 7);
2336 dctx
->lenaddon
= (code
== 18 ?
11 : 3);
2337 dctx
->lenrep
= (code
== 16 && dctx
->lenptr
> 0 ?
2338 dctx
->lengths
[dctx
->lenptr
- 1] : 0);
2339 dctx
->state
= TREES_LENREP
;
2343 if (dctx
->nbits
< dctx
->lenextrabits
)
2347 (dctx
->bits
& ((1 << dctx
->lenextrabits
) - 1));
2348 EATBITS(dctx
->lenextrabits
);
2349 if (dctx
->lenextrabits
)
2350 debug(("recv: codelen-extrabits %d/%d\n", rep
- dctx
->lenaddon
,
2351 dctx
->lenextrabits
));
2353 if (analyse_level
> 1)
2354 printf("code-length-repeat: %d copies of %d\n", rep
,
2357 while (rep
> 0 && dctx
->lenptr
< dctx
->hlit
+ dctx
->hdist
) {
2358 dctx
->lengths
[dctx
->lenptr
] = dctx
->lenrep
;
2362 dctx
->state
= TREES_LEN
;
2366 dctx
->bitcount_before
= BITCOUNT(dctx
);
2368 code
= huflookup(&dctx
->bits
, &dctx
->nbits
, dctx
->currlentable
);
2369 debug(("recv: litlen %d\n", code
));
2374 if (analyse_level
> 0)
2375 printf("%lu: literal %d [%d]\n", dctx
->bytesout
, code
,
2376 BITCOUNT(dctx
) - dctx
->bitcount_before
);
2378 emit_char(dctx
, code
);
2379 } else if (code
== 256) {
2380 if (dctx
->lastblock
)
2383 dctx
->state
= OUTSIDEBLK
;
2384 if (dctx
->currlentable
!= dctx
->staticlentable
) {
2385 freetable(&dctx
->currlentable
);
2386 dctx
->currlentable
= NULL
;
2388 if (dctx
->currdisttable
&&
2389 dctx
->currdisttable
!= dctx
->staticdisttable
) {
2390 freetable(&dctx
->currdisttable
);
2391 dctx
->currdisttable
= NULL
;
2393 } else if (code
< 286) { /* static tree can give >285; ignore */
2394 dctx
->state
= GOTLENSYM
;
2399 rec
= &lencodes
[dctx
->sym
- 257];
2400 if (dctx
->nbits
< rec
->extrabits
)
2403 rec
->min
+ (dctx
->bits
& ((1 << rec
->extrabits
) - 1));
2405 debug(("recv: litlen-extrabits %d/%d\n",
2406 dctx
->len
- rec
->min
, rec
->extrabits
));
2407 EATBITS(rec
->extrabits
);
2408 dctx
->state
= GOTLEN
;
2411 if (!dctx
->currdisttable
) {
2412 error
= DEFLATE_ERR_NODISTTABLE
;
2415 code
= huflookup(&dctx
->bits
, &dctx
->nbits
, dctx
->currdisttable
);
2416 debug(("recv: dist %d\n", code
));
2419 dctx
->state
= GOTDISTSYM
;
2423 rec
= &distcodes
[dctx
->sym
];
2424 if (dctx
->nbits
< rec
->extrabits
)
2426 dist
= rec
->min
+ (dctx
->bits
& ((1 << rec
->extrabits
) - 1));
2428 debug(("recv: dist-extrabits %d/%d\n",
2429 dist
- rec
->min
, rec
->extrabits
));
2430 EATBITS(rec
->extrabits
);
2431 dctx
->state
= INBLK
;
2433 if (analyse_level
> 0)
2434 printf("%lu: copy len=%d dist=%d [%d]\n", dctx
->bytesout
,
2436 BITCOUNT(dctx
) - dctx
->bitcount_before
);
2439 emit_char(dctx
, dctx
->window
[(dctx
->winpos
- dist
) &
2444 * Uncompressed block. We expect to see a 16-bit LEN.
2446 if (dctx
->nbits
< 16)
2448 dctx
->uncomplen
= dctx
->bits
& 0xFFFF;
2450 dctx
->state
= UNCOMP_NLEN
;
2454 * Uncompressed block. We expect to see a 16-bit NLEN,
2455 * which should be the one's complement of the previous
2458 if (dctx
->nbits
< 16)
2460 nlen
= dctx
->bits
& 0xFFFF;
2462 if (dctx
->uncomplen
!= (nlen
^ 0xFFFF)) {
2463 error
= DEFLATE_ERR_UNCOMP_HDR
;
2466 if (dctx
->uncomplen
== 0) {/* block is empty */
2467 if (dctx
->lastblock
)
2470 dctx
->state
= OUTSIDEBLK
;
2472 dctx
->state
= UNCOMP_DATA
;
2475 if (dctx
->nbits
< 8)
2478 if (analyse_level
> 0)
2479 printf("%lu: uncompressed %d [8]\n", dctx
->bytesout
,
2480 (int)(dctx
->bits
& 0xFF));
2482 emit_char(dctx
, dctx
->bits
& 0xFF);
2484 if (--dctx
->uncomplen
== 0) { /* end of uncompressed block */
2485 if (dctx
->lastblock
)
2488 dctx
->state
= OUTSIDEBLK
;
2493 * End of compressed data. We align to a byte boundary,
2494 * and then look for format-specific trailer data.
2497 int to_eat
= dctx
->nbits
& 7;
2500 if (dctx
->type
== DEFLATE_TYPE_ZLIB
)
2501 dctx
->state
= ADLER1
;
2502 else if (dctx
->type
== DEFLATE_TYPE_GZIP
)
2505 dctx
->state
= FINALSPIN
;
2508 if (dctx
->nbits
< 16)
2510 cksum
= (dctx
->bits
& 0xFF) << 8;
2512 cksum
|= (dctx
->bits
& 0xFF);
2514 if (cksum
!= ((dctx
->checksum
>> 16) & 0xFFFF)) {
2515 error
= DEFLATE_ERR_CHECKSUM
;
2518 dctx
->state
= ADLER2
;
2521 if (dctx
->nbits
< 16)
2523 cksum
= (dctx
->bits
& 0xFF) << 8;
2525 cksum
|= (dctx
->bits
& 0xFF);
2527 if (cksum
!= (dctx
->checksum
& 0xFFFF)) {
2528 error
= DEFLATE_ERR_CHECKSUM
;
2531 dctx
->state
= FINALSPIN
;
2534 if (dctx
->nbits
< 16)
2536 cksum
= dctx
->bits
& 0xFFFF;
2538 if (cksum
!= (dctx
->checksum
& 0xFFFF)) {
2539 error
= DEFLATE_ERR_CHECKSUM
;
2545 if (dctx
->nbits
< 16)
2547 cksum
= dctx
->bits
& 0xFFFF;
2549 if (cksum
!= ((dctx
->checksum
>> 16) & 0xFFFF)) {
2550 error
= DEFLATE_ERR_CHECKSUM
;
2553 dctx
->state
= ILEN1
;
2556 if (dctx
->nbits
< 16)
2558 cksum
= dctx
->bits
& 0xFFFF;
2560 if (cksum
!= (dctx
->bytesout
& 0xFFFF)) {
2561 error
= DEFLATE_ERR_INLEN
;
2564 dctx
->state
= ILEN2
;
2567 if (dctx
->nbits
< 16)
2569 cksum
= dctx
->bits
& 0xFFFF;
2571 if (cksum
!= ((dctx
->bytesout
>> 16) & 0xFFFF)) {
2572 error
= DEFLATE_ERR_INLEN
;
2575 dctx
->state
= FINALSPIN
;
2578 /* Just ignore any trailing garbage on the data stream. */
2579 /* (We could alternatively throw an error here, if we wanted
2580 * to detect and complain about trailing garbage.) */
2581 EATBITS(dctx
->nbits
);
2587 *outblock
= dctx
->outblk
;
2588 *outlen
= dctx
->outlen
;
2592 #define A(code,str) str
2593 const char *const deflate_error_msg
[DEFLATE_NUM_ERRORS
] = {
2594 DEFLATE_ERRORLIST(A
)
2598 #define A(code,str) #code
2599 const char *const deflate_error_sym
[DEFLATE_NUM_ERRORS
] = {
2600 DEFLATE_ERRORLIST(A
)
2604 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
2608 #if defined(WINDOWS_IO) && (defined(STANDALONE) || defined(TESTMODE))
2615 int main(int argc
, char **argv
)
2617 unsigned char buf
[65536];
2619 int ret
, err
, outlen
;
2620 deflate_decompress_ctx
*dhandle
;
2621 deflate_compress_ctx
*chandle
;
2622 int type
= DEFLATE_TYPE_ZLIB
, opts
= TRUE
;
2623 int compress
= FALSE
, decompress
= FALSE
;
2624 int got_arg
= FALSE
;
2625 char *filename
= NULL
;
2633 if (p
[0] == '-' && opts
) {
2634 if (!strcmp(p
, "-b"))
2635 type
= DEFLATE_TYPE_BARE
;
2636 else if (!strcmp(p
, "-g"))
2637 type
= DEFLATE_TYPE_GZIP
;
2638 else if (!strcmp(p
, "-c"))
2640 else if (!strcmp(p
, "-d"))
2642 else if (!strcmp(p
, "-a"))
2643 analyse_level
++, decompress
= TRUE
;
2644 else if (!strcmp(p
, "--"))
2645 opts
= FALSE
; /* next thing is filename */
2647 fprintf(stderr
, "unknown command line option '%s'\n", p
);
2650 } else if (!filename
) {
2653 fprintf(stderr
, "can only handle one filename\n");
2658 if (!compress
&& !decompress
) {
2659 fprintf(stderr
, "usage: deflate [ -c | -d | -a ] [ -b | -g ]"
2661 return (got_arg ?
1 : 0);
2664 if (compress
&& decompress
) {
2665 fprintf(stderr
, "please do not specify both compression and"
2666 " decompression\n");
2667 return (got_arg ?
1 : 0);
2671 chandle
= deflate_compress_new(type
);
2674 dhandle
= deflate_decompress_new(type
);
2679 fp
= fopen(filename
, "rb");
2685 fprintf(stderr
, "unable to open '%s'\n", filename
);
2690 if(_setmode(_fileno(stdout
), _O_BINARY
) == -1)
2692 fprintf(stderr
, "Can't set stdout to binary mode\n");
2698 ret
= fread(buf
, 1, sizeof(buf
), fp
);
2702 err
= deflate_decompress_data(dhandle
, buf
, ret
,
2703 (void **)&outbuf
, &outlen
);
2705 err
= deflate_decompress_data(dhandle
, NULL
, 0,
2706 (void **)&outbuf
, &outlen
);
2709 deflate_compress_data(chandle
, buf
, ret
, DEFLATE_NO_FLUSH
,
2710 (void **)&outbuf
, &outlen
);
2712 deflate_compress_data(chandle
, buf
, ret
, DEFLATE_END_OF_DATA
,
2713 (void **)&outbuf
, &outlen
);
2717 if (!analyse_level
&& outlen
)
2718 fwrite(outbuf
, 1, outlen
, stdout
);
2722 fprintf(stderr
, "decoding error: %s\n", deflate_error_msg
[err
]);
2728 deflate_decompress_free(dhandle
);
2730 deflate_compress_free(chandle
);
2742 int main(int argc
, char **argv
)
2744 char *filename
= NULL
;
2746 deflate_compress_ctx
*chandle
;
2747 deflate_decompress_ctx
*dhandle
;
2748 unsigned char buf
[65536], *outbuf
, *outbuf2
;
2749 int ret
, err
, outlen
, outlen2
;
2750 int dlen
= 0, clen
= 0;
2756 if (p
[0] == '-' && opts
) {
2757 if (!strcmp(p
, "--"))
2758 opts
= FALSE
; /* next thing is filename */
2760 fprintf(stderr
, "unknown command line option '%s'\n", p
);
2763 } else if (!filename
) {
2766 fprintf(stderr
, "can only handle one filename\n");
2772 fp
= fopen(filename
, "rb");
2778 fprintf(stderr
, "unable to open '%s'\n", filename
);
2782 chandle
= deflate_compress_new(DEFLATE_TYPE_ZLIB
);
2783 dhandle
= deflate_decompress_new(DEFLATE_TYPE_ZLIB
);
2786 if(_setmode(_fileno(stdout
), _O_BINARY
) == -1)
2788 fprintf(stderr
, "Can't set stdout to binary mode\n");
2794 ret
= fread(buf
, 1, sizeof(buf
), fp
);
2796 deflate_compress_data(chandle
, NULL
, 0, DEFLATE_END_OF_DATA
,
2797 (void **)&outbuf
, &outlen
);
2800 deflate_compress_data(chandle
, buf
, ret
, DEFLATE_NO_FLUSH
,
2801 (void **)&outbuf
, &outlen
);
2805 err
= deflate_decompress_data(dhandle
, outbuf
, outlen
,
2806 (void **)&outbuf2
, &outlen2
);
2810 fwrite(outbuf2
, 1, outlen2
, stdout
);
2813 if (!err
&& ret
<= 0) {
2817 err
= deflate_decompress_data(dhandle
, NULL
, 0,
2818 (void **)&outbuf2
, &outlen2
);
2819 assert(outbuf2
== NULL
);
2822 fprintf(stderr
, "decoding error: %s\n",
2823 deflate_error_msg
[err
]);
2829 fprintf(stderr
, "%d plaintext -> %d compressed\n", dlen
, clen
);