| 1 | /* |
| 2 | * Reimplementation of Deflate (RFC1951) compression. Adapted from |
| 3 | * the version in PuTTY, and extended to write dynamic Huffman |
| 4 | * trees and choose block boundaries usefully. |
| 5 | */ |
| 6 | |
| 7 | /* |
| 8 | * TODO: |
| 9 | * |
| 10 | * - Feature: could do with forms of flush other than SYNC_FLUSH. |
| 11 | * I'm not sure exactly how those work when you don't know in |
| 12 | * advance that your next block will be static (as we did in |
| 13 | * PuTTY). And remember the 9-bit limitation of zlib. |
| 14 | * + also, zlib has FULL_FLUSH which clears the LZ77 state as |
| 15 | * well, for random access. |
| 16 | * |
| 17 | * - Compression quality: chooseblock() appears to be computing |
| 18 | * wildly inaccurate block size estimates. Possible resolutions: |
| 19 | * + find and fix some trivial bug I haven't spotted yet |
| 20 | * + abandon the entropic approximation and go with trial |
| 21 | * Huffman runs |
| 22 | * |
| 23 | * - Compression quality: see if increasing SYMLIMIT causes |
| 24 | * dynamic blocks to start being consistently smaller than it. |
| 25 | * + actually we seem to be there already, but check on a |
| 26 | * larger corpus. |
| 27 | * |
| 28 | * - Compression quality: we ought to be able to fall right back |
| 29 | * to actual uncompressed blocks if really necessary, though |
| 30 | * it's not clear what the criterion for doing so would be. |
| 31 | */ |
| 32 | |
| 33 | /* |
| 34 | * This software is copyright 2000-2006 Simon Tatham. |
| 35 | * |
| 36 | * Permission is hereby granted, free of charge, to any person |
| 37 | * obtaining a copy of this software and associated documentation |
| 38 | * files (the "Software"), to deal in the Software without |
| 39 | * restriction, including without limitation the rights to use, |
| 40 | * copy, modify, merge, publish, distribute, sublicense, and/or |
| 41 | * sell copies of the Software, and to permit persons to whom the |
| 42 | * Software is furnished to do so, subject to the following |
| 43 | * conditions: |
| 44 | * |
| 45 | * The above copyright notice and this permission notice shall be |
| 46 | * included in all copies or substantial portions of the Software. |
| 47 | * |
| 48 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 49 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
| 50 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 51 | * NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE |
| 52 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 53 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR |
| 54 | * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 55 | * THE SOFTWARE. |
| 56 | */ |
| 57 | |
| 58 | #include <stdio.h> |
| 59 | #include <stddef.h> |
| 60 | #include <string.h> |
| 61 | #include <stdlib.h> |
| 62 | #include <assert.h> |
| 63 | |
| 64 | #include "deflate.h" |
| 65 | |
| 66 | #define snew(type) ( (type *) malloc(sizeof(type)) ) |
| 67 | #define snewn(n, type) ( (type *) malloc((n) * sizeof(type)) ) |
| 68 | #define sresize(x, n, type) ( (type *) realloc((x), (n) * sizeof(type)) ) |
| 69 | #define sfree(x) ( free((x)) ) |
| 70 | |
| 71 | #define lenof(x) (sizeof((x)) / sizeof(*(x))) |
| 72 | |
| 73 | #ifndef FALSE |
| 74 | #define FALSE 0 |
| 75 | #define TRUE (!FALSE) |
| 76 | #endif |
| 77 | |
| 78 | /* ---------------------------------------------------------------------- |
| 79 | * This file can be compiled in a number of modes. |
| 80 | * |
| 81 | * With -DSTANDALONE, it builds a self-contained deflate tool which |
| 82 | * can compress, decompress, and also analyse a deflated file to |
| 83 | * print out the sequence of literals and copy commands it |
| 84 | * contains. |
| 85 | * |
| 86 | * With -DTESTMODE, it builds a test application which is given a |
| 87 | * file on standard input, both compresses and decompresses it, and |
| 88 | * outputs the re-decompressed result so it can be conveniently |
| 89 | * diffed against the original. Define -DTESTDBG as well for lots |
| 90 | * of diagnostics. |
| 91 | */ |
| 92 | |
| 93 | #if defined TESTDBG |
| 94 | /* gcc-specific diagnostic macro */ |
| 95 | #define debug_int(x...) ( fprintf(stderr, x) ) |
| 96 | #define debug(x) ( debug_int x ) |
| 97 | #else |
| 98 | #define debug(x) ((void)0) |
| 99 | #endif |
| 100 | |
| 101 | #ifdef STANDALONE |
| 102 | #define ANALYSIS |
| 103 | #endif |
| 104 | |
| 105 | #ifdef ANALYSIS |
| 106 | int analyse_level = 0; |
| 107 | #endif |
| 108 | |
| 109 | /* ---------------------------------------------------------------------- |
| 110 | * Basic LZ77 code. This bit is designed modularly, so it could be |
| 111 | * ripped out and used in a different LZ77 compressor. Go to it, |
| 112 | * and good luck :-) |
| 113 | */ |
| 114 | |
| 115 | struct LZ77InternalContext; |
| 116 | struct LZ77Context { |
| 117 | struct LZ77InternalContext *ictx; |
| 118 | void *userdata; |
| 119 | void (*literal) (struct LZ77Context * ctx, unsigned char c); |
| 120 | void (*match) (struct LZ77Context * ctx, int distance, int len); |
| 121 | }; |
| 122 | |
| 123 | /* |
| 124 | * Initialise the private fields of an LZ77Context. It's up to the |
| 125 | * user to initialise the public fields. |
| 126 | */ |
| 127 | static int lz77_init(struct LZ77Context *ctx); |
| 128 | |
| 129 | /* |
| 130 | * Supply data to be compressed. Will update the private fields of |
| 131 | * the LZ77Context, and will call literal() and match() to output. |
| 132 | * If `compress' is FALSE, it will never emit a match, but will |
| 133 | * instead call literal() for everything. |
| 134 | */ |
| 135 | static void lz77_compress(struct LZ77Context *ctx, |
| 136 | const unsigned char *data, int len, int compress); |
| 137 | |
| 138 | /* |
| 139 | * Modifiable parameters. |
| 140 | */ |
| 141 | #define WINSIZE 32768 /* window size. Must be power of 2! */ |
| 142 | #define HASHMAX 2039 /* one more than max hash value */ |
| 143 | #define MAXMATCH 32 /* how many matches we track */ |
| 144 | #define HASHCHARS 3 /* how many chars make a hash */ |
| 145 | |
| 146 | /* |
| 147 | * This compressor takes a less slapdash approach than the |
| 148 | * gzip/zlib one. Rather than allowing our hash chains to fall into |
| 149 | * disuse near the far end, we keep them doubly linked so we can |
| 150 | * _find_ the far end, and then every time we add a new byte to the |
| 151 | * window (thus rolling round by one and removing the previous |
| 152 | * byte), we can carefully remove the hash chain entry. |
| 153 | */ |
| 154 | |
| 155 | #define INVALID -1 /* invalid hash _and_ invalid offset */ |
| 156 | struct WindowEntry { |
| 157 | short next, prev; /* array indices within the window */ |
| 158 | short hashval; |
| 159 | }; |
| 160 | |
| 161 | struct HashEntry { |
| 162 | short first; /* window index of first in chain */ |
| 163 | }; |
| 164 | |
| 165 | struct Match { |
| 166 | int distance, len; |
| 167 | }; |
| 168 | |
| 169 | struct LZ77InternalContext { |
| 170 | struct WindowEntry win[WINSIZE]; |
| 171 | unsigned char data[WINSIZE]; |
| 172 | int winpos; |
| 173 | struct HashEntry hashtab[HASHMAX]; |
| 174 | unsigned char pending[HASHCHARS]; |
| 175 | int npending; |
| 176 | }; |
| 177 | |
| 178 | static int lz77_hash(const unsigned char *data) |
| 179 | { |
| 180 | return (257 * data[0] + 263 * data[1] + 269 * data[2]) % HASHMAX; |
| 181 | } |
| 182 | |
| 183 | static int lz77_init(struct LZ77Context *ctx) |
| 184 | { |
| 185 | struct LZ77InternalContext *st; |
| 186 | int i; |
| 187 | |
| 188 | st = snew(struct LZ77InternalContext); |
| 189 | if (!st) |
| 190 | return 0; |
| 191 | |
| 192 | ctx->ictx = st; |
| 193 | |
| 194 | for (i = 0; i < WINSIZE; i++) |
| 195 | st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID; |
| 196 | for (i = 0; i < HASHMAX; i++) |
| 197 | st->hashtab[i].first = INVALID; |
| 198 | st->winpos = 0; |
| 199 | |
| 200 | st->npending = 0; |
| 201 | |
| 202 | return 1; |
| 203 | } |
| 204 | |
| 205 | static void lz77_advance(struct LZ77InternalContext *st, |
| 206 | unsigned char c, int hash) |
| 207 | { |
| 208 | int off; |
| 209 | |
| 210 | /* |
| 211 | * Remove the hash entry at winpos from the tail of its chain, |
| 212 | * or empty the chain if it's the only thing on the chain. |
| 213 | */ |
| 214 | if (st->win[st->winpos].prev != INVALID) { |
| 215 | st->win[st->win[st->winpos].prev].next = INVALID; |
| 216 | } else if (st->win[st->winpos].hashval != INVALID) { |
| 217 | st->hashtab[st->win[st->winpos].hashval].first = INVALID; |
| 218 | } |
| 219 | |
| 220 | /* |
| 221 | * Create a new entry at winpos and add it to the head of its |
| 222 | * hash chain. |
| 223 | */ |
| 224 | st->win[st->winpos].hashval = hash; |
| 225 | st->win[st->winpos].prev = INVALID; |
| 226 | off = st->win[st->winpos].next = st->hashtab[hash].first; |
| 227 | st->hashtab[hash].first = st->winpos; |
| 228 | if (off != INVALID) |
| 229 | st->win[off].prev = st->winpos; |
| 230 | st->data[st->winpos] = c; |
| 231 | |
| 232 | /* |
| 233 | * Advance the window pointer. |
| 234 | */ |
| 235 | st->winpos = (st->winpos + 1) & (WINSIZE - 1); |
| 236 | } |
| 237 | |
| 238 | #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] ) |
| 239 | |
| 240 | static void lz77_compress(struct LZ77Context *ctx, |
| 241 | const unsigned char *data, int len, int compress) |
| 242 | { |
| 243 | struct LZ77InternalContext *st = ctx->ictx; |
| 244 | int i, hash, distance, off, nmatch, matchlen, advance; |
| 245 | struct Match defermatch, matches[MAXMATCH]; |
| 246 | int deferchr; |
| 247 | |
| 248 | /* |
| 249 | * Add any pending characters from last time to the window. (We |
| 250 | * might not be able to.) |
| 251 | */ |
| 252 | for (i = 0; i < st->npending; i++) { |
| 253 | unsigned char foo[HASHCHARS]; |
| 254 | int j; |
| 255 | if (len + st->npending - i < HASHCHARS) { |
| 256 | /* Update the pending array. */ |
| 257 | for (j = i; j < st->npending; j++) |
| 258 | st->pending[j - i] = st->pending[j]; |
| 259 | break; |
| 260 | } |
| 261 | for (j = 0; j < HASHCHARS; j++) |
| 262 | foo[j] = (i + j < st->npending ? st->pending[i + j] : |
| 263 | data[i + j - st->npending]); |
| 264 | lz77_advance(st, foo[0], lz77_hash(foo)); |
| 265 | } |
| 266 | st->npending -= i; |
| 267 | |
| 268 | defermatch.len = 0; |
| 269 | deferchr = '\0'; |
| 270 | while (len > 0) { |
| 271 | |
| 272 | /* Don't even look for a match, if we're not compressing. */ |
| 273 | if (compress && len >= HASHCHARS) { |
| 274 | /* |
| 275 | * Hash the next few characters. |
| 276 | */ |
| 277 | hash = lz77_hash(data); |
| 278 | |
| 279 | /* |
| 280 | * Look the hash up in the corresponding hash chain and see |
| 281 | * what we can find. |
| 282 | */ |
| 283 | nmatch = 0; |
| 284 | for (off = st->hashtab[hash].first; |
| 285 | off != INVALID; off = st->win[off].next) { |
| 286 | /* distance = 1 if off == st->winpos-1 */ |
| 287 | /* distance = WINSIZE if off == st->winpos */ |
| 288 | distance = |
| 289 | WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE; |
| 290 | for (i = 0; i < HASHCHARS; i++) |
| 291 | if (CHARAT(i) != CHARAT(i - distance)) |
| 292 | break; |
| 293 | if (i == HASHCHARS) { |
| 294 | matches[nmatch].distance = distance; |
| 295 | matches[nmatch].len = 3; |
| 296 | if (++nmatch >= MAXMATCH) |
| 297 | break; |
| 298 | } |
| 299 | } |
| 300 | } else { |
| 301 | nmatch = 0; |
| 302 | hash = INVALID; |
| 303 | } |
| 304 | |
| 305 | if (nmatch > 0) { |
| 306 | /* |
| 307 | * We've now filled up matches[] with nmatch potential |
| 308 | * matches. Follow them down to find the longest. (We |
| 309 | * assume here that it's always worth favouring a |
| 310 | * longer match over a shorter one.) |
| 311 | */ |
| 312 | matchlen = HASHCHARS; |
| 313 | while (matchlen < len) { |
| 314 | int j; |
| 315 | for (i = j = 0; i < nmatch; i++) { |
| 316 | if (CHARAT(matchlen) == |
| 317 | CHARAT(matchlen - matches[i].distance)) { |
| 318 | matches[j++] = matches[i]; |
| 319 | } |
| 320 | } |
| 321 | if (j == 0) |
| 322 | break; |
| 323 | matchlen++; |
| 324 | nmatch = j; |
| 325 | } |
| 326 | |
| 327 | /* |
| 328 | * We've now got all the longest matches. We favour the |
| 329 | * shorter distances, which means we go with matches[0]. |
| 330 | * So see if we want to defer it or throw it away. |
| 331 | */ |
| 332 | matches[0].len = matchlen; |
| 333 | if (defermatch.len > 0) { |
| 334 | if (matches[0].len > defermatch.len + 1) { |
| 335 | /* We have a better match. Emit the deferred char, |
| 336 | * and defer this match. */ |
| 337 | ctx->literal(ctx, (unsigned char) deferchr); |
| 338 | defermatch = matches[0]; |
| 339 | deferchr = data[0]; |
| 340 | advance = 1; |
| 341 | } else { |
| 342 | /* We don't have a better match. Do the deferred one. */ |
| 343 | ctx->match(ctx, defermatch.distance, defermatch.len); |
| 344 | advance = defermatch.len - 1; |
| 345 | defermatch.len = 0; |
| 346 | } |
| 347 | } else { |
| 348 | /* There was no deferred match. Defer this one. */ |
| 349 | defermatch = matches[0]; |
| 350 | deferchr = data[0]; |
| 351 | advance = 1; |
| 352 | } |
| 353 | } else { |
| 354 | /* |
| 355 | * We found no matches. Emit the deferred match, if |
| 356 | * any; otherwise emit a literal. |
| 357 | */ |
| 358 | if (defermatch.len > 0) { |
| 359 | ctx->match(ctx, defermatch.distance, defermatch.len); |
| 360 | advance = defermatch.len - 1; |
| 361 | defermatch.len = 0; |
| 362 | } else { |
| 363 | ctx->literal(ctx, data[0]); |
| 364 | advance = 1; |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | /* |
| 369 | * Now advance the position by `advance' characters, |
| 370 | * keeping the window and hash chains consistent. |
| 371 | */ |
| 372 | while (advance > 0) { |
| 373 | if (len >= HASHCHARS) { |
| 374 | lz77_advance(st, *data, lz77_hash(data)); |
| 375 | } else { |
| 376 | st->pending[st->npending++] = *data; |
| 377 | } |
| 378 | data++; |
| 379 | len--; |
| 380 | advance--; |
| 381 | } |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | /* ---------------------------------------------------------------------- |
| 386 | * Deflate functionality common to both compression and decompression. |
| 387 | */ |
| 388 | |
| 389 | static const unsigned char lenlenmap[] = { |
| 390 | 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 |
| 391 | }; |
| 392 | |
| 393 | #define MAXCODELEN 16 |
| 394 | |
| 395 | /* |
| 396 | * Given a sequence of Huffman code lengths, compute the actual |
| 397 | * codes, in the final form suitable for feeding to outbits (i.e. |
| 398 | * already bit-mirrored). |
| 399 | * |
| 400 | * Returns the maximum code length found. Can also return -1 to |
| 401 | * indicate the table was overcommitted (too many or too short |
| 402 | * codes to exactly cover the possible space), or -2 to indicate it |
| 403 | * was undercommitted (too few or too long codes). |
| 404 | */ |
| 405 | static int hufcodes(const unsigned char *lengths, int *codes, int nsyms) |
| 406 | { |
| 407 | int count[MAXCODELEN], startcode[MAXCODELEN]; |
| 408 | int code, maxlen; |
| 409 | int i, j; |
| 410 | |
| 411 | /* Count the codes of each length. */ |
| 412 | maxlen = 0; |
| 413 | for (i = 1; i < MAXCODELEN; i++) |
| 414 | count[i] = 0; |
| 415 | for (i = 0; i < nsyms; i++) { |
| 416 | count[lengths[i]]++; |
| 417 | if (maxlen < lengths[i]) |
| 418 | maxlen = lengths[i]; |
| 419 | } |
| 420 | /* Determine the starting code for each length block. */ |
| 421 | code = 0; |
| 422 | for (i = 1; i < MAXCODELEN; i++) { |
| 423 | startcode[i] = code; |
| 424 | code += count[i]; |
| 425 | if (code > (1 << i)) |
| 426 | maxlen = -1; /* overcommitted */ |
| 427 | code <<= 1; |
| 428 | } |
| 429 | if (code < (1 << MAXCODELEN)) |
| 430 | maxlen = -2; /* undercommitted */ |
| 431 | /* Determine the code for each symbol. Mirrored, of course. */ |
| 432 | for (i = 0; i < nsyms; i++) { |
| 433 | code = startcode[lengths[i]]++; |
| 434 | codes[i] = 0; |
| 435 | for (j = 0; j < lengths[i]; j++) { |
| 436 | codes[i] = (codes[i] << 1) | (code & 1); |
| 437 | code >>= 1; |
| 438 | } |
| 439 | } |
| 440 | |
| 441 | return maxlen; |
| 442 | } |
| 443 | |
| 444 | /* |
| 445 | * Adler32 checksum function. |
| 446 | */ |
| 447 | static unsigned long adler32_update(unsigned long s, |
| 448 | const unsigned char *data, int len) |
| 449 | { |
| 450 | unsigned s1 = s & 0xFFFF, s2 = (s >> 16) & 0xFFFF; |
| 451 | int i; |
| 452 | |
| 453 | for (i = 0; i < len; i++) { |
| 454 | s1 += data[i]; |
| 455 | s2 += s1; |
| 456 | if (!(i & 0xFFF)) { |
| 457 | s1 %= 65521; |
| 458 | s2 %= 65521; |
| 459 | } |
| 460 | } |
| 461 | |
| 462 | return ((s2 % 65521) << 16) | (s1 % 65521); |
| 463 | } |
| 464 | |
| 465 | /* |
| 466 | * CRC32 checksum function. |
| 467 | */ |
| 468 | |
| 469 | static unsigned long crc32_update(unsigned long crcword, |
| 470 | const unsigned char *data, int len) |
| 471 | { |
| 472 | static const unsigned long crc32_table[256] = { |
| 473 | 0x00000000L, 0x77073096L, 0xEE0E612CL, 0x990951BAL, |
| 474 | 0x076DC419L, 0x706AF48FL, 0xE963A535L, 0x9E6495A3L, |
| 475 | 0x0EDB8832L, 0x79DCB8A4L, 0xE0D5E91EL, 0x97D2D988L, |
| 476 | 0x09B64C2BL, 0x7EB17CBDL, 0xE7B82D07L, 0x90BF1D91L, |
| 477 | 0x1DB71064L, 0x6AB020F2L, 0xF3B97148L, 0x84BE41DEL, |
| 478 | 0x1ADAD47DL, 0x6DDDE4EBL, 0xF4D4B551L, 0x83D385C7L, |
| 479 | 0x136C9856L, 0x646BA8C0L, 0xFD62F97AL, 0x8A65C9ECL, |
| 480 | 0x14015C4FL, 0x63066CD9L, 0xFA0F3D63L, 0x8D080DF5L, |
| 481 | 0x3B6E20C8L, 0x4C69105EL, 0xD56041E4L, 0xA2677172L, |
| 482 | 0x3C03E4D1L, 0x4B04D447L, 0xD20D85FDL, 0xA50AB56BL, |
| 483 | 0x35B5A8FAL, 0x42B2986CL, 0xDBBBC9D6L, 0xACBCF940L, |
| 484 | 0x32D86CE3L, 0x45DF5C75L, 0xDCD60DCFL, 0xABD13D59L, |
| 485 | 0x26D930ACL, 0x51DE003AL, 0xC8D75180L, 0xBFD06116L, |
| 486 | 0x21B4F4B5L, 0x56B3C423L, 0xCFBA9599L, 0xB8BDA50FL, |
| 487 | 0x2802B89EL, 0x5F058808L, 0xC60CD9B2L, 0xB10BE924L, |
| 488 | 0x2F6F7C87L, 0x58684C11L, 0xC1611DABL, 0xB6662D3DL, |
| 489 | 0x76DC4190L, 0x01DB7106L, 0x98D220BCL, 0xEFD5102AL, |
| 490 | 0x71B18589L, 0x06B6B51FL, 0x9FBFE4A5L, 0xE8B8D433L, |
| 491 | 0x7807C9A2L, 0x0F00F934L, 0x9609A88EL, 0xE10E9818L, |
| 492 | 0x7F6A0DBBL, 0x086D3D2DL, 0x91646C97L, 0xE6635C01L, |
| 493 | 0x6B6B51F4L, 0x1C6C6162L, 0x856530D8L, 0xF262004EL, |
| 494 | 0x6C0695EDL, 0x1B01A57BL, 0x8208F4C1L, 0xF50FC457L, |
| 495 | 0x65B0D9C6L, 0x12B7E950L, 0x8BBEB8EAL, 0xFCB9887CL, |
| 496 | 0x62DD1DDFL, 0x15DA2D49L, 0x8CD37CF3L, 0xFBD44C65L, |
| 497 | 0x4DB26158L, 0x3AB551CEL, 0xA3BC0074L, 0xD4BB30E2L, |
| 498 | 0x4ADFA541L, 0x3DD895D7L, 0xA4D1C46DL, 0xD3D6F4FBL, |
| 499 | 0x4369E96AL, 0x346ED9FCL, 0xAD678846L, 0xDA60B8D0L, |
| 500 | 0x44042D73L, 0x33031DE5L, 0xAA0A4C5FL, 0xDD0D7CC9L, |
| 501 | 0x5005713CL, 0x270241AAL, 0xBE0B1010L, 0xC90C2086L, |
| 502 | 0x5768B525L, 0x206F85B3L, 0xB966D409L, 0xCE61E49FL, |
| 503 | 0x5EDEF90EL, 0x29D9C998L, 0xB0D09822L, 0xC7D7A8B4L, |
| 504 | 0x59B33D17L, 0x2EB40D81L, 0xB7BD5C3BL, 0xC0BA6CADL, |
| 505 | 0xEDB88320L, 0x9ABFB3B6L, 0x03B6E20CL, 0x74B1D29AL, |
| 506 | 0xEAD54739L, 0x9DD277AFL, 0x04DB2615L, 0x73DC1683L, |
| 507 | 0xE3630B12L, 0x94643B84L, 0x0D6D6A3EL, 0x7A6A5AA8L, |
| 508 | 0xE40ECF0BL, 0x9309FF9DL, 0x0A00AE27L, 0x7D079EB1L, |
| 509 | 0xF00F9344L, 0x8708A3D2L, 0x1E01F268L, 0x6906C2FEL, |
| 510 | 0xF762575DL, 0x806567CBL, 0x196C3671L, 0x6E6B06E7L, |
| 511 | 0xFED41B76L, 0x89D32BE0L, 0x10DA7A5AL, 0x67DD4ACCL, |
| 512 | 0xF9B9DF6FL, 0x8EBEEFF9L, 0x17B7BE43L, 0x60B08ED5L, |
| 513 | 0xD6D6A3E8L, 0xA1D1937EL, 0x38D8C2C4L, 0x4FDFF252L, |
| 514 | 0xD1BB67F1L, 0xA6BC5767L, 0x3FB506DDL, 0x48B2364BL, |
| 515 | 0xD80D2BDAL, 0xAF0A1B4CL, 0x36034AF6L, 0x41047A60L, |
| 516 | 0xDF60EFC3L, 0xA867DF55L, 0x316E8EEFL, 0x4669BE79L, |
| 517 | 0xCB61B38CL, 0xBC66831AL, 0x256FD2A0L, 0x5268E236L, |
| 518 | 0xCC0C7795L, 0xBB0B4703L, 0x220216B9L, 0x5505262FL, |
| 519 | 0xC5BA3BBEL, 0xB2BD0B28L, 0x2BB45A92L, 0x5CB36A04L, |
| 520 | 0xC2D7FFA7L, 0xB5D0CF31L, 0x2CD99E8BL, 0x5BDEAE1DL, |
| 521 | 0x9B64C2B0L, 0xEC63F226L, 0x756AA39CL, 0x026D930AL, |
| 522 | 0x9C0906A9L, 0xEB0E363FL, 0x72076785L, 0x05005713L, |
| 523 | 0x95BF4A82L, 0xE2B87A14L, 0x7BB12BAEL, 0x0CB61B38L, |
| 524 | 0x92D28E9BL, 0xE5D5BE0DL, 0x7CDCEFB7L, 0x0BDBDF21L, |
| 525 | 0x86D3D2D4L, 0xF1D4E242L, 0x68DDB3F8L, 0x1FDA836EL, |
| 526 | 0x81BE16CDL, 0xF6B9265BL, 0x6FB077E1L, 0x18B74777L, |
| 527 | 0x88085AE6L, 0xFF0F6A70L, 0x66063BCAL, 0x11010B5CL, |
| 528 | 0x8F659EFFL, 0xF862AE69L, 0x616BFFD3L, 0x166CCF45L, |
| 529 | 0xA00AE278L, 0xD70DD2EEL, 0x4E048354L, 0x3903B3C2L, |
| 530 | 0xA7672661L, 0xD06016F7L, 0x4969474DL, 0x3E6E77DBL, |
| 531 | 0xAED16A4AL, 0xD9D65ADCL, 0x40DF0B66L, 0x37D83BF0L, |
| 532 | 0xA9BCAE53L, 0xDEBB9EC5L, 0x47B2CF7FL, 0x30B5FFE9L, |
| 533 | 0xBDBDF21CL, 0xCABAC28AL, 0x53B39330L, 0x24B4A3A6L, |
| 534 | 0xBAD03605L, 0xCDD70693L, 0x54DE5729L, 0x23D967BFL, |
| 535 | 0xB3667A2EL, 0xC4614AB8L, 0x5D681B02L, 0x2A6F2B94L, |
| 536 | 0xB40BBE37L, 0xC30C8EA1L, 0x5A05DF1BL, 0x2D02EF8DL |
| 537 | }; |
| 538 | crcword ^= 0xFFFFFFFFL; |
| 539 | while (len--) { |
| 540 | unsigned long newbyte = *data++; |
| 541 | newbyte ^= crcword & 0xFFL; |
| 542 | crcword = (crcword >> 8) ^ crc32_table[newbyte]; |
| 543 | } |
| 544 | return crcword ^ 0xFFFFFFFFL; |
| 545 | } |
| 546 | |
| 547 | typedef struct { |
| 548 | short code, extrabits; |
| 549 | int min, max; |
| 550 | } coderecord; |
| 551 | |
| 552 | static const coderecord lencodes[] = { |
| 553 | {257, 0, 3, 3}, |
| 554 | {258, 0, 4, 4}, |
| 555 | {259, 0, 5, 5}, |
| 556 | {260, 0, 6, 6}, |
| 557 | {261, 0, 7, 7}, |
| 558 | {262, 0, 8, 8}, |
| 559 | {263, 0, 9, 9}, |
| 560 | {264, 0, 10, 10}, |
| 561 | {265, 1, 11, 12}, |
| 562 | {266, 1, 13, 14}, |
| 563 | {267, 1, 15, 16}, |
| 564 | {268, 1, 17, 18}, |
| 565 | {269, 2, 19, 22}, |
| 566 | {270, 2, 23, 26}, |
| 567 | {271, 2, 27, 30}, |
| 568 | {272, 2, 31, 34}, |
| 569 | {273, 3, 35, 42}, |
| 570 | {274, 3, 43, 50}, |
| 571 | {275, 3, 51, 58}, |
| 572 | {276, 3, 59, 66}, |
| 573 | {277, 4, 67, 82}, |
| 574 | {278, 4, 83, 98}, |
| 575 | {279, 4, 99, 114}, |
| 576 | {280, 4, 115, 130}, |
| 577 | {281, 5, 131, 162}, |
| 578 | {282, 5, 163, 194}, |
| 579 | {283, 5, 195, 226}, |
| 580 | {284, 5, 227, 257}, |
| 581 | {285, 0, 258, 258}, |
| 582 | }; |
| 583 | |
| 584 | static const coderecord distcodes[] = { |
| 585 | {0, 0, 1, 1}, |
| 586 | {1, 0, 2, 2}, |
| 587 | {2, 0, 3, 3}, |
| 588 | {3, 0, 4, 4}, |
| 589 | {4, 1, 5, 6}, |
| 590 | {5, 1, 7, 8}, |
| 591 | {6, 2, 9, 12}, |
| 592 | {7, 2, 13, 16}, |
| 593 | {8, 3, 17, 24}, |
| 594 | {9, 3, 25, 32}, |
| 595 | {10, 4, 33, 48}, |
| 596 | {11, 4, 49, 64}, |
| 597 | {12, 5, 65, 96}, |
| 598 | {13, 5, 97, 128}, |
| 599 | {14, 6, 129, 192}, |
| 600 | {15, 6, 193, 256}, |
| 601 | {16, 7, 257, 384}, |
| 602 | {17, 7, 385, 512}, |
| 603 | {18, 8, 513, 768}, |
| 604 | {19, 8, 769, 1024}, |
| 605 | {20, 9, 1025, 1536}, |
| 606 | {21, 9, 1537, 2048}, |
| 607 | {22, 10, 2049, 3072}, |
| 608 | {23, 10, 3073, 4096}, |
| 609 | {24, 11, 4097, 6144}, |
| 610 | {25, 11, 6145, 8192}, |
| 611 | {26, 12, 8193, 12288}, |
| 612 | {27, 12, 12289, 16384}, |
| 613 | {28, 13, 16385, 24576}, |
| 614 | {29, 13, 24577, 32768}, |
| 615 | }; |
| 616 | |
| 617 | /* ---------------------------------------------------------------------- |
| 618 | * Deflate compression. |
| 619 | */ |
| 620 | |
| 621 | #define SYMLIMIT 65536 |
| 622 | #define SYMPFX_LITLEN 0x00000000U |
| 623 | #define SYMPFX_DIST 0x40000000U |
| 624 | #define SYMPFX_EXTRABITS 0x80000000U |
| 625 | #define SYMPFX_CODELEN 0xC0000000U |
| 626 | #define SYMPFX_MASK 0xC0000000U |
| 627 | |
| 628 | #define SYM_EXTRABITS_MASK 0x3C000000U |
| 629 | #define SYM_EXTRABITS_SHIFT 26 |
| 630 | |
| 631 | struct huftrees { |
| 632 | unsigned char *len_litlen; |
| 633 | int *code_litlen; |
| 634 | unsigned char *len_dist; |
| 635 | int *code_dist; |
| 636 | unsigned char *len_codelen; |
| 637 | int *code_codelen; |
| 638 | }; |
| 639 | |
| 640 | struct deflate_compress_ctx { |
| 641 | struct LZ77Context *lzc; |
| 642 | unsigned char *outbuf; |
| 643 | int outlen, outsize; |
| 644 | unsigned long outbits; |
| 645 | int noutbits; |
| 646 | int firstblock; |
| 647 | unsigned long *syms; |
| 648 | int symstart, nsyms; |
| 649 | int type; |
| 650 | unsigned long checksum; |
| 651 | unsigned long datasize; |
| 652 | int lastblock; |
| 653 | int finished; |
| 654 | unsigned char static_len1[288], static_len2[30]; |
| 655 | int static_code1[288], static_code2[30]; |
| 656 | struct huftrees sht; |
| 657 | #ifdef STATISTICS |
| 658 | unsigned long bitcount; |
| 659 | #endif |
| 660 | }; |
| 661 | |
| 662 | static void outbits(deflate_compress_ctx *out, |
| 663 | unsigned long bits, int nbits) |
| 664 | { |
| 665 | assert(out->noutbits + nbits <= 32); |
| 666 | out->outbits |= bits << out->noutbits; |
| 667 | out->noutbits += nbits; |
| 668 | while (out->noutbits >= 8) { |
| 669 | if (out->outlen >= out->outsize) { |
| 670 | out->outsize = out->outlen + 64; |
| 671 | out->outbuf = sresize(out->outbuf, out->outsize, unsigned char); |
| 672 | } |
| 673 | out->outbuf[out->outlen++] = (unsigned char) (out->outbits & 0xFF); |
| 674 | out->outbits >>= 8; |
| 675 | out->noutbits -= 8; |
| 676 | } |
| 677 | #ifdef STATISTICS |
| 678 | out->bitcount += nbits; |
| 679 | #endif |
| 680 | } |
| 681 | |
| 682 | /* |
| 683 | * Binary heap functions used by buildhuf(). Each one assumes the |
| 684 | * heap to be stored in an array of ints, with two ints per node |
| 685 | * (user data and key). They take in the old heap length, and |
| 686 | * return the new one. |
| 687 | */ |
| 688 | #define HEAPPARENT(x) (((x)-2)/4*2) |
| 689 | #define HEAPLEFT(x) ((x)*2+2) |
| 690 | #define HEAPRIGHT(x) ((x)*2+4) |
| 691 | static int addheap(int *heap, int len, int userdata, int key) |
| 692 | { |
| 693 | int me, dad, tmp; |
| 694 | |
| 695 | me = len; |
| 696 | heap[len++] = userdata; |
| 697 | heap[len++] = key; |
| 698 | |
| 699 | while (me > 0) { |
| 700 | dad = HEAPPARENT(me); |
| 701 | if (heap[me+1] < heap[dad+1]) { |
| 702 | tmp = heap[me]; heap[me] = heap[dad]; heap[dad] = tmp; |
| 703 | tmp = heap[me+1]; heap[me+1] = heap[dad+1]; heap[dad+1] = tmp; |
| 704 | me = dad; |
| 705 | } else |
| 706 | break; |
| 707 | } |
| 708 | |
| 709 | return len; |
| 710 | } |
| 711 | static int rmheap(int *heap, int len, int *userdata, int *key) |
| 712 | { |
| 713 | int me, lc, rc, c, tmp; |
| 714 | |
| 715 | len -= 2; |
| 716 | *userdata = heap[0]; |
| 717 | *key = heap[1]; |
| 718 | heap[0] = heap[len]; |
| 719 | heap[1] = heap[len+1]; |
| 720 | |
| 721 | me = 0; |
| 722 | |
| 723 | while (1) { |
| 724 | lc = HEAPLEFT(me); |
| 725 | rc = HEAPRIGHT(me); |
| 726 | if (lc >= len) |
| 727 | break; |
| 728 | else if (rc >= len || heap[lc+1] < heap[rc+1]) |
| 729 | c = lc; |
| 730 | else |
| 731 | c = rc; |
| 732 | if (heap[me+1] > heap[c+1]) { |
| 733 | tmp = heap[me]; heap[me] = heap[c]; heap[c] = tmp; |
| 734 | tmp = heap[me+1]; heap[me+1] = heap[c+1]; heap[c+1] = tmp; |
| 735 | } else |
| 736 | break; |
| 737 | me = c; |
| 738 | } |
| 739 | |
| 740 | return len; |
| 741 | } |
| 742 | |
| 743 | /* |
| 744 | * The core of the Huffman algorithm: takes an input array of |
| 745 | * symbol frequencies, and produces an output array of code |
| 746 | * lengths. |
| 747 | * |
| 748 | * This is basically a generic Huffman implementation, but it has |
| 749 | * one zlib-related quirk which is that it caps the output code |
| 750 | * lengths to fit in an unsigned char (which is safe since Deflate |
| 751 | * will reject anything longer than 15 anyway). Anyone wanting to |
| 752 | * rip it out and use it in another context should find that easy |
| 753 | * to remove. |
| 754 | */ |
| 755 | #define HUFMAX 286 |
| 756 | static void buildhuf(const int *freqs, unsigned char *lengths, int nsyms) |
| 757 | { |
| 758 | int parent[2*HUFMAX-1]; |
| 759 | int length[2*HUFMAX-1]; |
| 760 | int heap[2*HUFMAX]; |
| 761 | int heapsize; |
| 762 | int i, j, n; |
| 763 | int si, sj; |
| 764 | |
| 765 | assert(nsyms <= HUFMAX); |
| 766 | |
| 767 | memset(parent, 0, sizeof(parent)); |
| 768 | |
| 769 | /* |
| 770 | * Begin by building the heap. |
| 771 | */ |
| 772 | heapsize = 0; |
| 773 | for (i = 0; i < nsyms; i++) |
| 774 | if (freqs[i] > 0) /* leave unused symbols out totally */ |
| 775 | heapsize = addheap(heap, heapsize, i, freqs[i]); |
| 776 | |
| 777 | /* |
| 778 | * Now repeatedly take two elements off the heap and merge |
| 779 | * them. |
| 780 | */ |
| 781 | n = HUFMAX; |
| 782 | while (heapsize > 2) { |
| 783 | heapsize = rmheap(heap, heapsize, &i, &si); |
| 784 | heapsize = rmheap(heap, heapsize, &j, &sj); |
| 785 | parent[i] = n; |
| 786 | parent[j] = n; |
| 787 | heapsize = addheap(heap, heapsize, n, si + sj); |
| 788 | n++; |
| 789 | } |
| 790 | |
| 791 | /* |
| 792 | * Now we have our tree, in the form of a link from each node |
| 793 | * to the index of its parent. Count back down the tree to |
| 794 | * determine the code lengths. |
| 795 | */ |
| 796 | memset(length, 0, sizeof(length)); |
| 797 | /* The tree root has length 0 after that, which is correct. */ |
| 798 | for (i = n-1; i-- ;) |
| 799 | if (parent[i] > 0) |
| 800 | length[i] = 1 + length[parent[i]]; |
| 801 | |
| 802 | /* |
| 803 | * And that's it. (Simple, wasn't it?) Copy the lengths into |
| 804 | * the output array and leave. |
| 805 | * |
| 806 | * Here we cap lengths to fit in unsigned char. |
| 807 | */ |
| 808 | for (i = 0; i < nsyms; i++) |
| 809 | lengths[i] = (length[i] > 255 ? 255 : length[i]); |
| 810 | } |
| 811 | |
| 812 | /* |
| 813 | * Wrapper around buildhuf() which enforces the Deflate restriction |
| 814 | * that no code length may exceed 15 bits, or 7 for the auxiliary |
| 815 | * code length alphabet. This function has the same calling |
| 816 | * semantics as buildhuf(), except that it might modify the freqs |
| 817 | * array. |
| 818 | */ |
| 819 | static void deflate_buildhuf(int *freqs, unsigned char *lengths, |
| 820 | int nsyms, int limit) |
| 821 | { |
| 822 | int smallestfreq, totalfreq, nactivesyms; |
| 823 | int num, denom, adjust; |
| 824 | int i; |
| 825 | int maxprob; |
| 826 | |
| 827 | /* |
| 828 | * Nasty special case: if the frequency table has fewer than |
| 829 | * two non-zero elements, we must invent some, because we can't |
| 830 | * have fewer than one bit encoding a symbol. |
| 831 | */ |
| 832 | assert(nsyms >= 2); |
| 833 | { |
| 834 | int count = 0; |
| 835 | for (i = 0; i < nsyms; i++) |
| 836 | if (freqs[i] > 0) |
| 837 | count++; |
| 838 | if (count < 2) { |
| 839 | for (i = 0; i < nsyms && count > 0; i++) |
| 840 | if (freqs[i] == 0) { |
| 841 | freqs[i] = 1; |
| 842 | count--; |
| 843 | } |
| 844 | } |
| 845 | } |
| 846 | |
| 847 | /* |
| 848 | * First, try building the Huffman table the normal way. If |
| 849 | * this works, it's optimal, so we don't want to mess with it. |
| 850 | */ |
| 851 | buildhuf(freqs, lengths, nsyms); |
| 852 | |
| 853 | for (i = 0; i < nsyms; i++) |
| 854 | if (lengths[i] > limit) |
| 855 | break; |
| 856 | |
| 857 | if (i == nsyms) |
| 858 | return; /* OK */ |
| 859 | |
| 860 | /* |
| 861 | * The Huffman algorithm can only ever generate a code length |
| 862 | * of N bits or more if there is a symbol whose probability is |
| 863 | * less than the reciprocal of the (N+2)th Fibonacci number |
| 864 | * (counting from F_0=0 and F_1=1), i.e. 1/2584 for N=16, or |
| 865 | * 1/55 for N=8. (This is a necessary though not sufficient |
| 866 | * condition.) |
| 867 | * |
| 868 | * Why is this? Well, consider the input symbol with the |
| 869 | * smallest probability. Let that probability be x. In order |
| 870 | * for this symbol to have a code length of at least 1, the |
| 871 | * Huffman algorithm will have to merge it with some other |
| 872 | * node; and since x is the smallest probability, the node it |
| 873 | * gets merged with must be at least x. Thus, the probability |
| 874 | * of the resulting combined node will be at least 2x. Now in |
| 875 | * order for our node to reach depth 2, this 2x-node must be |
| 876 | * merged again. But what with? We can't assume the node it |
| 877 | * merges with is at least 2x, because this one might only be |
| 878 | * the _second_ smallest remaining node. But we do know the |
| 879 | * node it merges with must be at least x, so our order-2 |
| 880 | * internal node is at least 3x. |
| 881 | * |
| 882 | * How small a node can merge with _that_ to get an order-3 |
| 883 | * internal node? Well, it must be at least 2x, because if it |
| 884 | * was smaller than that then it would have been one of the two |
| 885 | * smallest nodes in the previous step and been merged at that |
| 886 | * point. So at least 3x, plus at least 2x, comes to at least |
| 887 | * 5x for an order-3 node. |
| 888 | * |
| 889 | * And so it goes on: at every stage we must merge our current |
| 890 | * node with a node at least as big as the bigger of this one's |
| 891 | * two parents, and from this starting point that gives rise to |
| 892 | * the Fibonacci sequence. So we find that in order to have a |
| 893 | * node n levels deep (i.e. a maximum code length of n), the |
| 894 | * overall probability of the root of the entire tree must be |
| 895 | * at least F_{n+2} times the probability of the rarest symbol. |
| 896 | * In other words, since the overall probability is 1, it is a |
| 897 | * necessary condition for a code length of 16 or more that |
| 898 | * there must be at least one symbol with probability <= |
| 899 | * 1/F_18. |
| 900 | * |
| 901 | * (To demonstrate that a probability this big really can give |
| 902 | * rise to a code length of 16, consider the set of input |
| 903 | * frequencies { 1-epsilon, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, |
| 904 | * 89, 144, 233, 377, 610, 987 }, for arbitrarily small |
| 905 | * epsilon.) |
| 906 | * |
| 907 | * So here buildhuf() has returned us an overlong code. So to |
| 908 | * ensure it doesn't do it again, we add a constant to all the |
| 909 | * (non-zero) symbol frequencies, causing them to become more |
| 910 | * balanced and removing the danger. We can then feed the |
| 911 | * results back to the standard buildhuf() and be |
| 912 | * assert()-level confident that the resulting code lengths |
| 913 | * contain nothing outside the permitted range. |
| 914 | */ |
| 915 | assert(limit == 15 || limit == 7); |
| 916 | maxprob = (limit == 15 ? 2584 : 55); /* no point in computing full F_n */ |
| 917 | totalfreq = nactivesyms = 0; |
| 918 | smallestfreq = -1; |
| 919 | for (i = 0; i < nsyms; i++) { |
| 920 | if (freqs[i] == 0) |
| 921 | continue; |
| 922 | if (smallestfreq < 0 || smallestfreq > freqs[i]) |
| 923 | smallestfreq = freqs[i]; |
| 924 | totalfreq += freqs[i]; |
| 925 | nactivesyms++; |
| 926 | } |
| 927 | assert(smallestfreq <= totalfreq / maxprob); |
| 928 | |
| 929 | /* |
| 930 | * We want to find the smallest integer `adjust' such that |
| 931 | * (totalfreq + nactivesyms * adjust) / (smallestfreq + |
| 932 | * adjust) is less than maxprob. A bit of algebra tells us |
| 933 | * that the threshold value is equal to |
| 934 | * |
| 935 | * totalfreq - maxprob * smallestfreq |
| 936 | * ---------------------------------- |
| 937 | * maxprob - nactivesyms |
| 938 | * |
| 939 | * rounded up, of course. And we'll only even be trying |
| 940 | * this if |
| 941 | */ |
| 942 | num = totalfreq - smallestfreq * maxprob; |
| 943 | denom = maxprob - nactivesyms; |
| 944 | adjust = (num + denom - 1) / denom; |
| 945 | |
| 946 | /* |
| 947 | * Now add `adjust' to all the input symbol frequencies. |
| 948 | */ |
| 949 | for (i = 0; i < nsyms; i++) |
| 950 | if (freqs[i] != 0) |
| 951 | freqs[i] += adjust; |
| 952 | |
| 953 | /* |
| 954 | * Rebuild the Huffman tree... |
| 955 | */ |
| 956 | buildhuf(freqs, lengths, nsyms); |
| 957 | |
| 958 | /* |
| 959 | * ... and this time it ought to be OK. |
| 960 | */ |
| 961 | for (i = 0; i < nsyms; i++) |
| 962 | assert(lengths[i] <= limit); |
| 963 | } |
| 964 | |
| 965 | /* |
| 966 | * Compute the bit length of a symbol, given the three Huffman |
| 967 | * trees. |
| 968 | */ |
| 969 | static int symsize(unsigned sym, const struct huftrees *trees) |
| 970 | { |
| 971 | unsigned basesym = sym &~ SYMPFX_MASK; |
| 972 | |
| 973 | switch (sym & SYMPFX_MASK) { |
| 974 | case SYMPFX_LITLEN: |
| 975 | return trees->len_litlen[basesym]; |
| 976 | case SYMPFX_DIST: |
| 977 | return trees->len_dist[basesym]; |
| 978 | case SYMPFX_CODELEN: |
| 979 | return trees->len_codelen[basesym]; |
| 980 | default /*case SYMPFX_EXTRABITS*/: |
| 981 | return basesym >> SYM_EXTRABITS_SHIFT; |
| 982 | } |
| 983 | } |
| 984 | |
| 985 | /* |
| 986 | * Write out a single symbol, given the three Huffman trees. |
| 987 | */ |
| 988 | static void writesym(deflate_compress_ctx *out, |
| 989 | unsigned sym, const struct huftrees *trees) |
| 990 | { |
| 991 | unsigned basesym = sym &~ SYMPFX_MASK; |
| 992 | int i; |
| 993 | |
| 994 | switch (sym & SYMPFX_MASK) { |
| 995 | case SYMPFX_LITLEN: |
| 996 | debug(("send: litlen %d\n", basesym)); |
| 997 | outbits(out, trees->code_litlen[basesym], trees->len_litlen[basesym]); |
| 998 | break; |
| 999 | case SYMPFX_DIST: |
| 1000 | debug(("send: dist %d\n", basesym)); |
| 1001 | outbits(out, trees->code_dist[basesym], trees->len_dist[basesym]); |
| 1002 | break; |
| 1003 | case SYMPFX_CODELEN: |
| 1004 | debug(("send: codelen %d\n", basesym)); |
| 1005 | outbits(out, trees->code_codelen[basesym],trees->len_codelen[basesym]); |
| 1006 | break; |
| 1007 | case SYMPFX_EXTRABITS: |
| 1008 | i = basesym >> SYM_EXTRABITS_SHIFT; |
| 1009 | basesym &= ~SYM_EXTRABITS_MASK; |
| 1010 | debug(("send: extrabits %d/%d\n", basesym, i)); |
| 1011 | outbits(out, basesym, i); |
| 1012 | break; |
| 1013 | } |
| 1014 | } |
| 1015 | |
| 1016 | /* |
| 1017 | * outblock() must output _either_ a dynamic block of length |
| 1018 | * `dynamic_len', _or_ a static block of length `static_len', but |
| 1019 | * it gets to choose which. |
| 1020 | */ |
| 1021 | static void outblock(deflate_compress_ctx *out, |
| 1022 | int dynamic_len, int static_len) |
| 1023 | { |
| 1024 | int freqs1[286], freqs2[30], freqs3[19]; |
| 1025 | unsigned char len1[286], len2[30], len3[19]; |
| 1026 | int code1[286], code2[30], code3[19]; |
| 1027 | int hlit, hdist, hclen, bfinal, btype; |
| 1028 | int treesrc[286 + 30]; |
| 1029 | int treesyms[286 + 30]; |
| 1030 | int codelen[19]; |
| 1031 | int i, ntreesrc, ntreesyms; |
| 1032 | int dynamic, blklen; |
| 1033 | struct huftrees dht; |
| 1034 | const struct huftrees *ht; |
| 1035 | #ifdef STATISTICS |
| 1036 | unsigned long bitcount_before; |
| 1037 | #endif |
| 1038 | |
| 1039 | dht.len_litlen = len1; |
| 1040 | dht.len_dist = len2; |
| 1041 | dht.len_codelen = len3; |
| 1042 | dht.code_litlen = code1; |
| 1043 | dht.code_dist = code2; |
| 1044 | dht.code_codelen = code3; |
| 1045 | |
| 1046 | /* |
| 1047 | * We make our choice of block to output by doing all the |
| 1048 | * detailed work to determine the exact length of each possible |
| 1049 | * block. Then we choose the one which has fewest output bits |
| 1050 | * per symbol. |
| 1051 | */ |
| 1052 | |
| 1053 | /* |
| 1054 | * First build the two main Huffman trees for the dynamic |
| 1055 | * block. |
| 1056 | */ |
| 1057 | |
| 1058 | /* |
| 1059 | * Count up the frequency tables. |
| 1060 | */ |
| 1061 | memset(freqs1, 0, sizeof(freqs1)); |
| 1062 | memset(freqs2, 0, sizeof(freqs2)); |
| 1063 | freqs1[256] = 1; /* we're bound to need one EOB */ |
| 1064 | for (i = 0; i < dynamic_len; i++) { |
| 1065 | unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT]; |
| 1066 | |
| 1067 | /* |
| 1068 | * Increment the occurrence counter for this symbol, if |
| 1069 | * it's in one of the Huffman alphabets and isn't extra |
| 1070 | * bits. |
| 1071 | */ |
| 1072 | if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) { |
| 1073 | sym &= ~SYMPFX_MASK; |
| 1074 | assert(sym < lenof(freqs1)); |
| 1075 | freqs1[sym]++; |
| 1076 | } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) { |
| 1077 | sym &= ~SYMPFX_MASK; |
| 1078 | assert(sym < lenof(freqs2)); |
| 1079 | freqs2[sym]++; |
| 1080 | } |
| 1081 | } |
| 1082 | deflate_buildhuf(freqs1, len1, lenof(freqs1), 15); |
| 1083 | deflate_buildhuf(freqs2, len2, lenof(freqs2), 15); |
| 1084 | hufcodes(len1, code1, lenof(freqs1)); |
| 1085 | hufcodes(len2, code2, lenof(freqs2)); |
| 1086 | |
| 1087 | /* |
| 1088 | * Determine HLIT and HDIST. |
| 1089 | */ |
| 1090 | for (hlit = 286; hlit > 257 && len1[hlit-1] == 0; hlit--); |
| 1091 | for (hdist = 30; hdist > 1 && len2[hdist-1] == 0; hdist--); |
| 1092 | |
| 1093 | /* |
| 1094 | * Write out the list of symbols used to transmit the |
| 1095 | * trees. |
| 1096 | */ |
| 1097 | ntreesrc = 0; |
| 1098 | for (i = 0; i < hlit; i++) |
| 1099 | treesrc[ntreesrc++] = len1[i]; |
| 1100 | for (i = 0; i < hdist; i++) |
| 1101 | treesrc[ntreesrc++] = len2[i]; |
| 1102 | ntreesyms = 0; |
| 1103 | for (i = 0; i < ntreesrc ;) { |
| 1104 | int j = 1; |
| 1105 | int k; |
| 1106 | |
| 1107 | /* Find length of run of the same length code. */ |
| 1108 | while (i+j < ntreesrc && treesrc[i+j] == treesrc[i]) |
| 1109 | j++; |
| 1110 | |
| 1111 | /* Encode that run as economically as we can. */ |
| 1112 | k = j; |
| 1113 | if (treesrc[i] == 0) { |
| 1114 | /* |
| 1115 | * Zero code length: we can output run codes for |
| 1116 | * 3-138 zeroes. So if we have fewer than 3 zeroes, |
| 1117 | * we just output literals. Otherwise, we output |
| 1118 | * nothing but run codes, and tweak their lengths |
| 1119 | * to make sure we aren't left with under 3 at the |
| 1120 | * end. |
| 1121 | */ |
| 1122 | if (k < 3) { |
| 1123 | while (k--) |
| 1124 | treesyms[ntreesyms++] = 0 | SYMPFX_CODELEN; |
| 1125 | } else { |
| 1126 | while (k > 0) { |
| 1127 | int rpt = (k < 138 ? k : 138); |
| 1128 | if (rpt > k-3 && rpt < k) |
| 1129 | rpt = k-3; |
| 1130 | assert(rpt >= 3 && rpt <= 138); |
| 1131 | if (rpt < 11) { |
| 1132 | treesyms[ntreesyms++] = 17 | SYMPFX_CODELEN; |
| 1133 | treesyms[ntreesyms++] = |
| 1134 | (SYMPFX_EXTRABITS | (rpt - 3) | |
| 1135 | (3 << SYM_EXTRABITS_SHIFT)); |
| 1136 | } else { |
| 1137 | treesyms[ntreesyms++] = 18 | SYMPFX_CODELEN; |
| 1138 | treesyms[ntreesyms++] = |
| 1139 | (SYMPFX_EXTRABITS | (rpt - 11) | |
| 1140 | (7 << SYM_EXTRABITS_SHIFT)); |
| 1141 | } |
| 1142 | k -= rpt; |
| 1143 | } |
| 1144 | } |
| 1145 | } else { |
| 1146 | /* |
| 1147 | * Non-zero code length: we must output the first |
| 1148 | * one explicitly, then we can output a copy code |
| 1149 | * for 3-6 repeats. So if we have fewer than 4 |
| 1150 | * repeats, we _just_ output literals. Otherwise, |
| 1151 | * we output one literal plus at least one copy |
| 1152 | * code, and tweak the copy codes to make sure we |
| 1153 | * aren't left with under 3 at the end. |
| 1154 | */ |
| 1155 | assert(treesrc[i] < 16); |
| 1156 | treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN; |
| 1157 | k--; |
| 1158 | if (k < 3) { |
| 1159 | while (k--) |
| 1160 | treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN; |
| 1161 | } else { |
| 1162 | while (k > 0) { |
| 1163 | int rpt = (k < 6 ? k : 6); |
| 1164 | if (rpt > k-3 && rpt < k) |
| 1165 | rpt = k-3; |
| 1166 | assert(rpt >= 3 && rpt <= 6); |
| 1167 | treesyms[ntreesyms++] = 16 | SYMPFX_CODELEN; |
| 1168 | treesyms[ntreesyms++] = (SYMPFX_EXTRABITS | (rpt - 3) | |
| 1169 | (2 << SYM_EXTRABITS_SHIFT)); |
| 1170 | k -= rpt; |
| 1171 | } |
| 1172 | } |
| 1173 | } |
| 1174 | |
| 1175 | i += j; |
| 1176 | } |
| 1177 | assert((unsigned)ntreesyms < lenof(treesyms)); |
| 1178 | |
| 1179 | /* |
| 1180 | * Count up the frequency table for the tree-transmission |
| 1181 | * symbols, and build the auxiliary Huffman tree for that. |
| 1182 | */ |
| 1183 | memset(freqs3, 0, sizeof(freqs3)); |
| 1184 | for (i = 0; i < ntreesyms; i++) { |
| 1185 | unsigned sym = treesyms[i]; |
| 1186 | |
| 1187 | /* |
| 1188 | * Increment the occurrence counter for this symbol, if |
| 1189 | * it's the Huffman alphabet and isn't extra bits. |
| 1190 | */ |
| 1191 | if ((sym & SYMPFX_MASK) == SYMPFX_CODELEN) { |
| 1192 | sym &= ~SYMPFX_MASK; |
| 1193 | assert(sym < lenof(freqs3)); |
| 1194 | freqs3[sym]++; |
| 1195 | } |
| 1196 | } |
| 1197 | deflate_buildhuf(freqs3, len3, lenof(freqs3), 7); |
| 1198 | hufcodes(len3, code3, lenof(freqs3)); |
| 1199 | |
| 1200 | /* |
| 1201 | * Reorder the code length codes into transmission order, and |
| 1202 | * determine HCLEN. |
| 1203 | */ |
| 1204 | for (i = 0; i < 19; i++) |
| 1205 | codelen[i] = len3[lenlenmap[i]]; |
| 1206 | for (hclen = 19; hclen > 4 && codelen[hclen-1] == 0; hclen--); |
| 1207 | |
| 1208 | /* |
| 1209 | * Now work out the exact size of both the dynamic and the |
| 1210 | * static block, in bits. |
| 1211 | */ |
| 1212 | { |
| 1213 | int ssize, dsize; |
| 1214 | |
| 1215 | /* |
| 1216 | * First the dynamic block. |
| 1217 | */ |
| 1218 | dsize = 3 + 5 + 5 + 4; /* 3-bit header, HLIT, HDIST, HCLEN */ |
| 1219 | dsize += 3 * hclen; /* code-length-alphabet code lengths */ |
| 1220 | /* Code lengths */ |
| 1221 | for (i = 0; i < ntreesyms; i++) |
| 1222 | dsize += symsize(treesyms[i], &dht); |
| 1223 | /* The actual block data */ |
| 1224 | for (i = 0; i < dynamic_len; i++) { |
| 1225 | unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT]; |
| 1226 | dsize += symsize(sym, &dht); |
| 1227 | } |
| 1228 | /* And the end-of-data symbol. */ |
| 1229 | dsize += symsize(SYMPFX_LITLEN | 256, &dht); |
| 1230 | |
| 1231 | /* |
| 1232 | * Now the static block. |
| 1233 | */ |
| 1234 | ssize = 3; /* 3-bit block header */ |
| 1235 | /* The actual block data */ |
| 1236 | for (i = 0; i < static_len; i++) { |
| 1237 | unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT]; |
| 1238 | ssize += symsize(sym, &out->sht); |
| 1239 | } |
| 1240 | /* And the end-of-data symbol. */ |
| 1241 | ssize += symsize(SYMPFX_LITLEN | 256, &out->sht); |
| 1242 | |
| 1243 | /* |
| 1244 | * Compare the two and decide which to output. We break |
| 1245 | * exact ties in favour of the static block, because of the |
| 1246 | * special case in which that block has zero length. |
| 1247 | */ |
| 1248 | dynamic = ((double)ssize * dynamic_len > (double)dsize * static_len); |
| 1249 | ht = dynamic ? &dht : &out->sht; |
| 1250 | blklen = dynamic ? dynamic_len : static_len; |
| 1251 | } |
| 1252 | |
| 1253 | /* |
| 1254 | * Actually transmit the block. |
| 1255 | */ |
| 1256 | |
| 1257 | /* 3-bit block header */ |
| 1258 | bfinal = (out->lastblock ? 1 : 0); |
| 1259 | btype = dynamic ? 2 : 1; |
| 1260 | debug(("send: bfinal=%d btype=%d\n", bfinal, btype)); |
| 1261 | outbits(out, bfinal, 1); |
| 1262 | outbits(out, btype, 2); |
| 1263 | |
| 1264 | #ifdef STATISTICS |
| 1265 | bitcount_before = out->bitcount; |
| 1266 | #endif |
| 1267 | |
| 1268 | if (dynamic) { |
| 1269 | /* HLIT, HDIST and HCLEN */ |
| 1270 | debug(("send: hlit=%d hdist=%d hclen=%d\n", hlit, hdist, hclen)); |
| 1271 | outbits(out, hlit - 257, 5); |
| 1272 | outbits(out, hdist - 1, 5); |
| 1273 | outbits(out, hclen - 4, 4); |
| 1274 | |
| 1275 | /* Code lengths for the auxiliary tree */ |
| 1276 | for (i = 0; i < hclen; i++) { |
| 1277 | debug(("send: lenlen %d\n", codelen[i])); |
| 1278 | outbits(out, codelen[i], 3); |
| 1279 | } |
| 1280 | |
| 1281 | /* Code lengths for the literal/length and distance trees */ |
| 1282 | for (i = 0; i < ntreesyms; i++) |
| 1283 | writesym(out, treesyms[i], ht); |
| 1284 | #ifdef STATISTICS |
| 1285 | fprintf(stderr, "total tree size %lu bits\n", |
| 1286 | out->bitcount - bitcount_before); |
| 1287 | #endif |
| 1288 | } |
| 1289 | |
| 1290 | /* Output the actual symbols from the buffer */ |
| 1291 | for (i = 0; i < blklen; i++) { |
| 1292 | unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT]; |
| 1293 | writesym(out, sym, ht); |
| 1294 | } |
| 1295 | |
| 1296 | /* Output the end-of-data symbol */ |
| 1297 | writesym(out, SYMPFX_LITLEN | 256, ht); |
| 1298 | |
| 1299 | /* |
| 1300 | * Remove all the just-output symbols from the symbol buffer by |
| 1301 | * adjusting symstart and nsyms. |
| 1302 | */ |
| 1303 | out->symstart = (out->symstart + blklen) % SYMLIMIT; |
| 1304 | out->nsyms -= blklen; |
| 1305 | } |
| 1306 | |
| 1307 | /* |
| 1308 | * Give the approximate log-base-2 of an input integer, measured in |
| 1309 | * 8ths of a bit. (I.e. this computes an integer approximation to |
| 1310 | * 8*logbase2(x).) |
| 1311 | */ |
| 1312 | static int approxlog2(unsigned x) |
| 1313 | { |
| 1314 | int ret = 31*8; |
| 1315 | |
| 1316 | /* |
| 1317 | * Binary-search to get the top bit of x up to bit 31. |
| 1318 | */ |
| 1319 | if (x < 0x00010000U) x <<= 16, ret -= 16*8; |
| 1320 | if (x < 0x01000000U) x <<= 8, ret -= 8*8; |
| 1321 | if (x < 0x10000000U) x <<= 4, ret -= 4*8; |
| 1322 | if (x < 0x40000000U) x <<= 2, ret -= 2*8; |
| 1323 | if (x < 0x80000000U) x <<= 1, ret -= 1*8; |
| 1324 | |
| 1325 | /* |
| 1326 | * Now we know the logarithm we want is in [ret,ret+1). |
| 1327 | * Determine the bottom three bits by checking against |
| 1328 | * threshold values. |
| 1329 | * |
| 1330 | * (Each of these threshold values is 0x80000000 times an odd |
| 1331 | * power of 2^(1/16). Therefore, this function rounds to |
| 1332 | * nearest.) |
| 1333 | */ |
| 1334 | if (x <= 0xAD583EEAU) { |
| 1335 | if (x <= 0x91C3D373U) |
| 1336 | ret += (x <= 0x85AAC367U ? 0 : 1); |
| 1337 | else |
| 1338 | ret += (x <= 0x9EF53260U ? 2 : 3); |
| 1339 | } else { |
| 1340 | if (x <= 0xCE248C15U) |
| 1341 | ret += (x <= 0xBD08A39FU ? 4 : 5); |
| 1342 | else |
| 1343 | ret += (x <= 0xE0CCDEECU ? 6 : x <= 0xF5257D15L ? 7 : 8); |
| 1344 | } |
| 1345 | |
| 1346 | return ret; |
| 1347 | } |
| 1348 | |
| 1349 | static void chooseblock(deflate_compress_ctx *out) |
| 1350 | { |
| 1351 | int freqs1[286], freqs2[30]; |
| 1352 | int i, len, bestlen, longestlen = 0; |
| 1353 | int total1, total2; |
| 1354 | int bestvfm; |
| 1355 | |
| 1356 | memset(freqs1, 0, sizeof(freqs1)); |
| 1357 | memset(freqs2, 0, sizeof(freqs2)); |
| 1358 | freqs1[256] = 1; /* we're bound to need one EOB */ |
| 1359 | total1 = 1; |
| 1360 | total2 = 0; |
| 1361 | |
| 1362 | /* |
| 1363 | * Iterate over all possible block lengths, computing the |
| 1364 | * entropic coding approximation to the final length at every |
| 1365 | * stage. We divide the result by the number of symbols |
| 1366 | * encoded, to determine the `value for money' (overall |
| 1367 | * bits-per-symbol count) of a block of that length. |
| 1368 | */ |
| 1369 | bestlen = -1; |
| 1370 | bestvfm = 0; |
| 1371 | |
| 1372 | len = 300 * 8; /* very approximate size of the Huffman trees */ |
| 1373 | |
| 1374 | for (i = 0; i < out->nsyms; i++) { |
| 1375 | unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT]; |
| 1376 | |
| 1377 | if (i > 0 && (sym & SYMPFX_MASK) == SYMPFX_LITLEN) { |
| 1378 | /* |
| 1379 | * This is a viable point at which to end the block. |
| 1380 | * Compute the value for money. |
| 1381 | */ |
| 1382 | int vfm = i * 32768 / len; /* symbols encoded per bit */ |
| 1383 | |
| 1384 | if (bestlen < 0 || vfm > bestvfm) { |
| 1385 | bestlen = i; |
| 1386 | bestvfm = vfm; |
| 1387 | } |
| 1388 | |
| 1389 | longestlen = i; |
| 1390 | } |
| 1391 | |
| 1392 | /* |
| 1393 | * Increment the occurrence counter for this symbol, if |
| 1394 | * it's in one of the Huffman alphabets and isn't extra |
| 1395 | * bits. |
| 1396 | */ |
| 1397 | if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) { |
| 1398 | sym &= ~SYMPFX_MASK; |
| 1399 | assert(sym < lenof(freqs1)); |
| 1400 | len += freqs1[sym] * approxlog2(freqs1[sym]); |
| 1401 | len -= total1 * approxlog2(total1); |
| 1402 | freqs1[sym]++; |
| 1403 | total1++; |
| 1404 | len -= freqs1[sym] * approxlog2(freqs1[sym]); |
| 1405 | len += total1 * approxlog2(total1); |
| 1406 | } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) { |
| 1407 | sym &= ~SYMPFX_MASK; |
| 1408 | assert(sym < lenof(freqs2)); |
| 1409 | len += freqs2[sym] * approxlog2(freqs2[sym]); |
| 1410 | len -= total2 * approxlog2(total2); |
| 1411 | freqs2[sym]++; |
| 1412 | total2++; |
| 1413 | len -= freqs2[sym] * approxlog2(freqs2[sym]); |
| 1414 | len += total2 * approxlog2(total2); |
| 1415 | } else if ((sym & SYMPFX_MASK) == SYMPFX_EXTRABITS) { |
| 1416 | len += 8 * ((sym &~ SYMPFX_MASK) >> SYM_EXTRABITS_SHIFT); |
| 1417 | } |
| 1418 | } |
| 1419 | |
| 1420 | assert(bestlen > 0); |
| 1421 | |
| 1422 | outblock(out, bestlen, longestlen); |
| 1423 | } |
| 1424 | |
| 1425 | /* |
| 1426 | * Force the current symbol buffer to be flushed out as a single |
| 1427 | * block. |
| 1428 | */ |
| 1429 | static void flushblock(deflate_compress_ctx *out) |
| 1430 | { |
| 1431 | /* |
| 1432 | * No need to check that out->nsyms is a valid block length: we |
| 1433 | * know it has to be, because flushblock() is called in between |
| 1434 | * two matches/literals. |
| 1435 | */ |
| 1436 | outblock(out, out->nsyms, out->nsyms); |
| 1437 | assert(out->nsyms == 0); |
| 1438 | } |
| 1439 | |
| 1440 | /* |
| 1441 | * Place a symbol into the symbols buffer. |
| 1442 | */ |
| 1443 | static void outsym(deflate_compress_ctx *out, unsigned long sym) |
| 1444 | { |
| 1445 | assert(out->nsyms < SYMLIMIT); |
| 1446 | out->syms[(out->symstart + out->nsyms++) % SYMLIMIT] = sym; |
| 1447 | |
| 1448 | if (out->nsyms == SYMLIMIT) |
| 1449 | chooseblock(out); |
| 1450 | } |
| 1451 | |
| 1452 | static void literal(struct LZ77Context *ectx, unsigned char c) |
| 1453 | { |
| 1454 | deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata; |
| 1455 | |
| 1456 | outsym(out, SYMPFX_LITLEN | c); |
| 1457 | } |
| 1458 | |
| 1459 | static void match(struct LZ77Context *ectx, int distance, int len) |
| 1460 | { |
| 1461 | const coderecord *d, *l; |
| 1462 | int i, j, k; |
| 1463 | deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata; |
| 1464 | |
| 1465 | while (len > 0) { |
| 1466 | int thislen; |
| 1467 | |
| 1468 | /* |
| 1469 | * We can transmit matches of lengths 3 through 258 |
| 1470 | * inclusive. So if len exceeds 258, we must transmit in |
| 1471 | * several steps, with 258 or less in each step. |
| 1472 | * |
| 1473 | * Specifically: if len >= 261, we can transmit 258 and be |
| 1474 | * sure of having at least 3 left for the next step. And if |
| 1475 | * len <= 258, we can just transmit len. But if len == 259 |
| 1476 | * or 260, we must transmit len-3. |
| 1477 | */ |
| 1478 | thislen = (len > 260 ? 258 : len <= 258 ? len : len - 3); |
| 1479 | len -= thislen; |
| 1480 | |
| 1481 | /* |
| 1482 | * Binary-search to find which length code we're |
| 1483 | * transmitting. |
| 1484 | */ |
| 1485 | i = -1; |
| 1486 | j = sizeof(lencodes) / sizeof(*lencodes); |
| 1487 | while (1) { |
| 1488 | assert(j - i >= 2); |
| 1489 | k = (j + i) / 2; |
| 1490 | if (thislen < lencodes[k].min) |
| 1491 | j = k; |
| 1492 | else if (thislen > lencodes[k].max) |
| 1493 | i = k; |
| 1494 | else { |
| 1495 | l = &lencodes[k]; |
| 1496 | break; /* found it! */ |
| 1497 | } |
| 1498 | } |
| 1499 | |
| 1500 | /* |
| 1501 | * Transmit the length code. |
| 1502 | */ |
| 1503 | outsym(out, SYMPFX_LITLEN | l->code); |
| 1504 | |
| 1505 | /* |
| 1506 | * Transmit the extra bits. |
| 1507 | */ |
| 1508 | if (l->extrabits) { |
| 1509 | outsym(out, (SYMPFX_EXTRABITS | (thislen - l->min) | |
| 1510 | (l->extrabits << SYM_EXTRABITS_SHIFT))); |
| 1511 | } |
| 1512 | |
| 1513 | /* |
| 1514 | * Binary-search to find which distance code we're |
| 1515 | * transmitting. |
| 1516 | */ |
| 1517 | i = -1; |
| 1518 | j = sizeof(distcodes) / sizeof(*distcodes); |
| 1519 | while (1) { |
| 1520 | assert(j - i >= 2); |
| 1521 | k = (j + i) / 2; |
| 1522 | if (distance < distcodes[k].min) |
| 1523 | j = k; |
| 1524 | else if (distance > distcodes[k].max) |
| 1525 | i = k; |
| 1526 | else { |
| 1527 | d = &distcodes[k]; |
| 1528 | break; /* found it! */ |
| 1529 | } |
| 1530 | } |
| 1531 | |
| 1532 | /* |
| 1533 | * Write the distance code. |
| 1534 | */ |
| 1535 | outsym(out, SYMPFX_DIST | d->code); |
| 1536 | |
| 1537 | /* |
| 1538 | * Transmit the extra bits. |
| 1539 | */ |
| 1540 | if (d->extrabits) { |
| 1541 | outsym(out, (SYMPFX_EXTRABITS | (distance - d->min) | |
| 1542 | (d->extrabits << SYM_EXTRABITS_SHIFT))); |
| 1543 | } |
| 1544 | } |
| 1545 | } |
| 1546 | |
| 1547 | deflate_compress_ctx *deflate_compress_new(int type) |
| 1548 | { |
| 1549 | deflate_compress_ctx *out; |
| 1550 | struct LZ77Context *ectx = snew(struct LZ77Context); |
| 1551 | |
| 1552 | lz77_init(ectx); |
| 1553 | ectx->literal = literal; |
| 1554 | ectx->match = match; |
| 1555 | |
| 1556 | out = snew(deflate_compress_ctx); |
| 1557 | out->type = type; |
| 1558 | out->outbits = out->noutbits = 0; |
| 1559 | out->firstblock = TRUE; |
| 1560 | #ifdef STATISTICS |
| 1561 | out->bitcount = 0; |
| 1562 | #endif |
| 1563 | |
| 1564 | out->syms = snewn(SYMLIMIT, unsigned long); |
| 1565 | out->symstart = out->nsyms = 0; |
| 1566 | |
| 1567 | out->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0); |
| 1568 | out->datasize = 0; |
| 1569 | out->lastblock = FALSE; |
| 1570 | out->finished = FALSE; |
| 1571 | |
| 1572 | /* |
| 1573 | * Build the static Huffman tables now, so we'll have them |
| 1574 | * available every time outblock() is called. |
| 1575 | */ |
| 1576 | { |
| 1577 | int i; |
| 1578 | |
| 1579 | for (i = 0; i < (int)lenof(out->static_len1); i++) |
| 1580 | out->static_len1[i] = (i < 144 ? 8 : |
| 1581 | i < 256 ? 9 : |
| 1582 | i < 280 ? 7 : 8); |
| 1583 | for (i = 0; i < (int)lenof(out->static_len2); i++) |
| 1584 | out->static_len2[i] = 5; |
| 1585 | } |
| 1586 | hufcodes(out->static_len1, out->static_code1, lenof(out->static_code1)); |
| 1587 | hufcodes(out->static_len2, out->static_code2, lenof(out->static_code2)); |
| 1588 | out->sht.len_litlen = out->static_len1; |
| 1589 | out->sht.len_dist = out->static_len2; |
| 1590 | out->sht.len_codelen = NULL; |
| 1591 | out->sht.code_litlen = out->static_code1; |
| 1592 | out->sht.code_dist = out->static_code2; |
| 1593 | out->sht.code_codelen = NULL; |
| 1594 | |
| 1595 | ectx->userdata = out; |
| 1596 | out->lzc = ectx; |
| 1597 | |
| 1598 | return out; |
| 1599 | } |
| 1600 | |
| 1601 | void deflate_compress_free(deflate_compress_ctx *out) |
| 1602 | { |
| 1603 | struct LZ77Context *ectx = out->lzc; |
| 1604 | |
| 1605 | sfree(out->syms); |
| 1606 | sfree(ectx->ictx); |
| 1607 | sfree(ectx); |
| 1608 | sfree(out); |
| 1609 | } |
| 1610 | |
| 1611 | void deflate_compress_data(deflate_compress_ctx *out, |
| 1612 | const void *vblock, int len, int flushtype, |
| 1613 | void **outblock, int *outlen) |
| 1614 | { |
| 1615 | struct LZ77Context *ectx = out->lzc; |
| 1616 | const unsigned char *block = (const unsigned char *)vblock; |
| 1617 | |
| 1618 | assert(!out->finished); |
| 1619 | |
| 1620 | out->outbuf = NULL; |
| 1621 | out->outlen = out->outsize = 0; |
| 1622 | |
| 1623 | /* |
| 1624 | * If this is the first block, output the header. |
| 1625 | */ |
| 1626 | if (out->firstblock) { |
| 1627 | switch (out->type) { |
| 1628 | case DEFLATE_TYPE_BARE: |
| 1629 | break; /* no header */ |
| 1630 | case DEFLATE_TYPE_ZLIB: |
| 1631 | /* |
| 1632 | * zlib (RFC1950) header bytes: 78 9C. (Deflate |
| 1633 | * compression, 32K window size, default algorithm.) |
| 1634 | */ |
| 1635 | outbits(out, 0x9C78, 16); |
| 1636 | break; |
| 1637 | case DEFLATE_TYPE_GZIP: |
| 1638 | /* |
| 1639 | * Minimal gzip (RFC1952) header: |
| 1640 | * |
| 1641 | * - basic header of 1F 8B |
| 1642 | * - compression method byte (8 = deflate) |
| 1643 | * - flags byte (zero: we use no optional features) |
| 1644 | * - modification time (zero: no time stamp available) |
| 1645 | * - extra flags byte (2: we use maximum compression |
| 1646 | * always) |
| 1647 | * - operating system byte (255: we do not specify) |
| 1648 | */ |
| 1649 | outbits(out, 0x00088B1F, 32); /* header, CM, flags */ |
| 1650 | outbits(out, 0, 32); /* mtime */ |
| 1651 | outbits(out, 0xFF02, 16); /* xflags, OS */ |
| 1652 | break; |
| 1653 | } |
| 1654 | out->firstblock = FALSE; |
| 1655 | } |
| 1656 | |
| 1657 | /* |
| 1658 | * Feed our data to the LZ77 compression phase. |
| 1659 | */ |
| 1660 | lz77_compress(ectx, block, len, TRUE); |
| 1661 | |
| 1662 | /* |
| 1663 | * Update checksums and counters. |
| 1664 | */ |
| 1665 | switch (out->type) { |
| 1666 | case DEFLATE_TYPE_ZLIB: |
| 1667 | out->checksum = adler32_update(out->checksum, block, len); |
| 1668 | break; |
| 1669 | case DEFLATE_TYPE_GZIP: |
| 1670 | out->checksum = crc32_update(out->checksum, block, len); |
| 1671 | break; |
| 1672 | } |
| 1673 | out->datasize += len; |
| 1674 | |
| 1675 | switch (flushtype) { |
| 1676 | /* |
| 1677 | * FIXME: what other flush types are available and useful? |
| 1678 | * In PuTTY, it was clear that we generally wanted to be in |
| 1679 | * a static block so it was safe to open one. Here, we |
| 1680 | * probably prefer to be _outside_ a block if we can. Think |
| 1681 | * about this. |
| 1682 | */ |
| 1683 | case DEFLATE_NO_FLUSH: |
| 1684 | break; /* don't flush any data at all (duh) */ |
| 1685 | case DEFLATE_SYNC_FLUSH: |
| 1686 | /* |
| 1687 | * Close the current block. |
| 1688 | */ |
| 1689 | flushblock(out); |
| 1690 | |
| 1691 | /* |
| 1692 | * Then output an empty _uncompressed_ block: send 000, |
| 1693 | * then sync to byte boundary, then send bytes 00 00 FF |
| 1694 | * FF. |
| 1695 | */ |
| 1696 | outbits(out, 0, 3); |
| 1697 | if (out->noutbits) |
| 1698 | outbits(out, 0, 8 - out->noutbits); |
| 1699 | outbits(out, 0, 16); |
| 1700 | outbits(out, 0xFFFF, 16); |
| 1701 | break; |
| 1702 | case DEFLATE_END_OF_DATA: |
| 1703 | /* |
| 1704 | * Output a block with BFINAL set. |
| 1705 | */ |
| 1706 | out->lastblock = TRUE; |
| 1707 | flushblock(out); |
| 1708 | |
| 1709 | /* |
| 1710 | * Sync to byte boundary, flushing out the final byte. |
| 1711 | */ |
| 1712 | if (out->noutbits) |
| 1713 | outbits(out, 0, 8 - out->noutbits); |
| 1714 | |
| 1715 | /* |
| 1716 | * Format-specific trailer data. |
| 1717 | */ |
| 1718 | switch (out->type) { |
| 1719 | case DEFLATE_TYPE_ZLIB: |
| 1720 | /* |
| 1721 | * Just write out the Adler32 checksum. |
| 1722 | */ |
| 1723 | outbits(out, (out->checksum >> 24) & 0xFF, 8); |
| 1724 | outbits(out, (out->checksum >> 16) & 0xFF, 8); |
| 1725 | outbits(out, (out->checksum >> 8) & 0xFF, 8); |
| 1726 | outbits(out, (out->checksum >> 0) & 0xFF, 8); |
| 1727 | break; |
| 1728 | case DEFLATE_TYPE_GZIP: |
| 1729 | /* |
| 1730 | * Write out the CRC32 checksum and the data length. |
| 1731 | */ |
| 1732 | outbits(out, out->checksum, 32); |
| 1733 | outbits(out, out->datasize, 32); |
| 1734 | break; |
| 1735 | } |
| 1736 | |
| 1737 | out->finished = TRUE; |
| 1738 | break; |
| 1739 | } |
| 1740 | |
| 1741 | /* |
| 1742 | * Return any data that we've generated. |
| 1743 | */ |
| 1744 | *outblock = (void *)out->outbuf; |
| 1745 | *outlen = out->outlen; |
| 1746 | } |
| 1747 | |
| 1748 | /* ---------------------------------------------------------------------- |
| 1749 | * Deflate decompression. |
| 1750 | */ |
| 1751 | |
| 1752 | /* |
| 1753 | * The way we work the Huffman decode is to have a table lookup on |
| 1754 | * the first N bits of the input stream (in the order they arrive, |
| 1755 | * of course, i.e. the first bit of the Huffman code is in bit 0). |
| 1756 | * Each table entry lists the number of bits to consume, plus |
| 1757 | * either an output code or a pointer to a secondary table. |
| 1758 | */ |
| 1759 | struct table; |
| 1760 | struct tableentry; |
| 1761 | |
| 1762 | struct tableentry { |
| 1763 | unsigned char nbits; |
| 1764 | short code; |
| 1765 | struct table *nexttable; |
| 1766 | }; |
| 1767 | |
| 1768 | struct table { |
| 1769 | int mask; /* mask applied to input bit stream */ |
| 1770 | struct tableentry *table; |
| 1771 | }; |
| 1772 | |
| 1773 | #define MAXSYMS 288 |
| 1774 | |
| 1775 | #define DWINSIZE 32768 |
| 1776 | |
| 1777 | /* |
| 1778 | * Build a single-level decode table for elements |
| 1779 | * [minlength,maxlength) of the provided code/length tables, and |
| 1780 | * recurse to build subtables. |
| 1781 | */ |
| 1782 | static struct table *mkonetab(int *codes, unsigned char *lengths, int nsyms, |
| 1783 | int pfx, int pfxbits, int bits) |
| 1784 | { |
| 1785 | struct table *tab = snew(struct table); |
| 1786 | int pfxmask = (1 << pfxbits) - 1; |
| 1787 | int nbits, i, j, code; |
| 1788 | |
| 1789 | tab->table = snewn(1 << bits, struct tableentry); |
| 1790 | tab->mask = (1 << bits) - 1; |
| 1791 | |
| 1792 | for (code = 0; code <= tab->mask; code++) { |
| 1793 | tab->table[code].code = -1; |
| 1794 | tab->table[code].nbits = 0; |
| 1795 | tab->table[code].nexttable = NULL; |
| 1796 | } |
| 1797 | |
| 1798 | for (i = 0; i < nsyms; i++) { |
| 1799 | if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx) |
| 1800 | continue; |
| 1801 | code = (codes[i] >> pfxbits) & tab->mask; |
| 1802 | for (j = code; j <= tab->mask; j += 1 << (lengths[i] - pfxbits)) { |
| 1803 | tab->table[j].code = i; |
| 1804 | nbits = lengths[i] - pfxbits; |
| 1805 | if (tab->table[j].nbits < nbits) |
| 1806 | tab->table[j].nbits = nbits; |
| 1807 | } |
| 1808 | } |
| 1809 | for (code = 0; code <= tab->mask; code++) { |
| 1810 | if (tab->table[code].nbits <= bits) |
| 1811 | continue; |
| 1812 | /* Generate a subtable. */ |
| 1813 | tab->table[code].code = -1; |
| 1814 | nbits = tab->table[code].nbits - bits; |
| 1815 | if (nbits > 7) |
| 1816 | nbits = 7; |
| 1817 | tab->table[code].nbits = bits; |
| 1818 | tab->table[code].nexttable = mkonetab(codes, lengths, nsyms, |
| 1819 | pfx | (code << pfxbits), |
| 1820 | pfxbits + bits, nbits); |
| 1821 | } |
| 1822 | |
| 1823 | return tab; |
| 1824 | } |
| 1825 | |
| 1826 | /* |
| 1827 | * Build a decode table, given a set of Huffman tree lengths. |
| 1828 | */ |
| 1829 | static struct table *mktable(unsigned char *lengths, int nlengths, |
| 1830 | #ifdef ANALYSIS |
| 1831 | const char *alphabet, |
| 1832 | #endif |
| 1833 | int *error) |
| 1834 | { |
| 1835 | int codes[MAXSYMS]; |
| 1836 | int maxlen; |
| 1837 | |
| 1838 | #ifdef ANALYSIS |
| 1839 | if (alphabet && analyse_level > 1) { |
| 1840 | int i, col = 0; |
| 1841 | printf("code lengths for %s alphabet:\n", alphabet); |
| 1842 | for (i = 0; i < nlengths; i++) { |
| 1843 | col += printf("%3d", lengths[i]); |
| 1844 | if (col > 72) { |
| 1845 | putchar('\n'); |
| 1846 | col = 0; |
| 1847 | } |
| 1848 | } |
| 1849 | if (col > 0) |
| 1850 | putchar('\n'); |
| 1851 | } |
| 1852 | #endif |
| 1853 | |
| 1854 | maxlen = hufcodes(lengths, codes, nlengths); |
| 1855 | |
| 1856 | if (maxlen < 0) { |
| 1857 | *error = (maxlen == -1 ? DEFLATE_ERR_LARGE_HUFTABLE : |
| 1858 | DEFLATE_ERR_SMALL_HUFTABLE); |
| 1859 | return NULL; |
| 1860 | } |
| 1861 | |
| 1862 | /* |
| 1863 | * Now we have the complete list of Huffman codes. Build a |
| 1864 | * table. |
| 1865 | */ |
| 1866 | return mkonetab(codes, lengths, nlengths, 0, 0, maxlen < 9 ? maxlen : 9); |
| 1867 | } |
| 1868 | |
| 1869 | static int freetable(struct table **ztab) |
| 1870 | { |
| 1871 | struct table *tab; |
| 1872 | int code; |
| 1873 | |
| 1874 | if (ztab == NULL) |
| 1875 | return -1; |
| 1876 | |
| 1877 | if (*ztab == NULL) |
| 1878 | return 0; |
| 1879 | |
| 1880 | tab = *ztab; |
| 1881 | |
| 1882 | for (code = 0; code <= tab->mask; code++) |
| 1883 | if (tab->table[code].nexttable != NULL) |
| 1884 | freetable(&tab->table[code].nexttable); |
| 1885 | |
| 1886 | sfree(tab->table); |
| 1887 | tab->table = NULL; |
| 1888 | |
| 1889 | sfree(tab); |
| 1890 | *ztab = NULL; |
| 1891 | |
| 1892 | return (0); |
| 1893 | } |
| 1894 | |
| 1895 | struct deflate_decompress_ctx { |
| 1896 | struct table *staticlentable, *staticdisttable; |
| 1897 | struct table *currlentable, *currdisttable, *lenlentable; |
| 1898 | enum { |
| 1899 | ZLIBSTART, |
| 1900 | GZIPSTART, GZIPMETHFLAGS, GZIPIGNORE1, GZIPIGNORE2, GZIPIGNORE3, |
| 1901 | GZIPEXTRA, GZIPFNAME, GZIPCOMMENT, |
| 1902 | OUTSIDEBLK, TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP, |
| 1903 | INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM, |
| 1904 | UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA, |
| 1905 | END, |
| 1906 | ADLER1, ADLER2, |
| 1907 | CRC1, CRC2, ILEN1, ILEN2, |
| 1908 | FINALSPIN |
| 1909 | } state; |
| 1910 | int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, |
| 1911 | lenrep, lastblock; |
| 1912 | int uncomplen; |
| 1913 | unsigned char lenlen[19]; |
| 1914 | unsigned char lengths[286 + 32]; |
| 1915 | unsigned long bits; |
| 1916 | int nbits; |
| 1917 | unsigned char window[DWINSIZE]; |
| 1918 | int winpos; |
| 1919 | unsigned char *outblk; |
| 1920 | int outlen, outsize; |
| 1921 | int type; |
| 1922 | unsigned long checksum; |
| 1923 | unsigned long bytesout; |
| 1924 | int gzflags, gzextralen; |
| 1925 | #ifdef ANALYSIS |
| 1926 | int bytesread; |
| 1927 | int bitcount_before; |
| 1928 | #define BITCOUNT(dctx) ( (dctx)->bytesread * 8 - (dctx)->nbits ) |
| 1929 | #endif |
| 1930 | }; |
| 1931 | |
| 1932 | deflate_decompress_ctx *deflate_decompress_new(int type) |
| 1933 | { |
| 1934 | deflate_decompress_ctx *dctx = snew(deflate_decompress_ctx); |
| 1935 | unsigned char lengths[288]; |
| 1936 | |
| 1937 | memset(lengths, 8, 144); |
| 1938 | memset(lengths + 144, 9, 256 - 144); |
| 1939 | memset(lengths + 256, 7, 280 - 256); |
| 1940 | memset(lengths + 280, 8, 288 - 280); |
| 1941 | dctx->staticlentable = mktable(lengths, 288, |
| 1942 | #ifdef ANALYSIS |
| 1943 | NULL, |
| 1944 | #endif |
| 1945 | NULL); |
| 1946 | assert(dctx->staticlentable); |
| 1947 | memset(lengths, 5, 32); |
| 1948 | dctx->staticdisttable = mktable(lengths, 32, |
| 1949 | #ifdef ANALYSIS |
| 1950 | NULL, |
| 1951 | #endif |
| 1952 | NULL); |
| 1953 | assert(dctx->staticdisttable); |
| 1954 | dctx->state = (type == DEFLATE_TYPE_ZLIB ? ZLIBSTART : |
| 1955 | type == DEFLATE_TYPE_GZIP ? GZIPSTART : |
| 1956 | OUTSIDEBLK); |
| 1957 | dctx->currlentable = dctx->currdisttable = dctx->lenlentable = NULL; |
| 1958 | dctx->bits = 0; |
| 1959 | dctx->nbits = 0; |
| 1960 | dctx->winpos = 0; |
| 1961 | dctx->type = type; |
| 1962 | dctx->lastblock = FALSE; |
| 1963 | dctx->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0); |
| 1964 | dctx->bytesout = 0; |
| 1965 | dctx->gzflags = dctx->gzextralen = 0; |
| 1966 | #ifdef ANALYSIS |
| 1967 | dctx->bytesread = dctx->bitcount_before = 0; |
| 1968 | #endif |
| 1969 | |
| 1970 | return dctx; |
| 1971 | } |
| 1972 | |
| 1973 | void deflate_decompress_free(deflate_decompress_ctx *dctx) |
| 1974 | { |
| 1975 | if (dctx->currlentable && dctx->currlentable != dctx->staticlentable) |
| 1976 | freetable(&dctx->currlentable); |
| 1977 | if (dctx->currdisttable && dctx->currdisttable != dctx->staticdisttable) |
| 1978 | freetable(&dctx->currdisttable); |
| 1979 | if (dctx->lenlentable) |
| 1980 | freetable(&dctx->lenlentable); |
| 1981 | freetable(&dctx->staticlentable); |
| 1982 | freetable(&dctx->staticdisttable); |
| 1983 | sfree(dctx); |
| 1984 | } |
| 1985 | |
| 1986 | static int huflookup(unsigned long *bitsp, int *nbitsp, struct table *tab) |
| 1987 | { |
| 1988 | unsigned long bits = *bitsp; |
| 1989 | int nbits = *nbitsp; |
| 1990 | while (1) { |
| 1991 | struct tableentry *ent; |
| 1992 | ent = &tab->table[bits & tab->mask]; |
| 1993 | if (ent->nbits > nbits) |
| 1994 | return -1; /* not enough data */ |
| 1995 | bits >>= ent->nbits; |
| 1996 | nbits -= ent->nbits; |
| 1997 | if (ent->code == -1) |
| 1998 | tab = ent->nexttable; |
| 1999 | else { |
| 2000 | *bitsp = bits; |
| 2001 | *nbitsp = nbits; |
| 2002 | return ent->code; |
| 2003 | } |
| 2004 | |
| 2005 | /* |
| 2006 | * If we reach here with `tab' null, it can only be because |
| 2007 | * there was a missing entry in the Huffman table. This |
| 2008 | * should never occur even with invalid input data, because |
| 2009 | * we enforce at mktable time that the Huffman codes should |
| 2010 | * precisely cover the code space; so we can enforce this |
| 2011 | * by assertion. |
| 2012 | */ |
| 2013 | assert(tab); |
| 2014 | } |
| 2015 | } |
| 2016 | |
| 2017 | static void emit_char(deflate_decompress_ctx *dctx, int c) |
| 2018 | { |
| 2019 | dctx->window[dctx->winpos] = c; |
| 2020 | dctx->winpos = (dctx->winpos + 1) & (DWINSIZE - 1); |
| 2021 | if (dctx->outlen >= dctx->outsize) { |
| 2022 | dctx->outsize = dctx->outlen * 3 / 2 + 512; |
| 2023 | dctx->outblk = sresize(dctx->outblk, dctx->outsize, unsigned char); |
| 2024 | } |
| 2025 | if (dctx->type == DEFLATE_TYPE_ZLIB) { |
| 2026 | unsigned char uc = c; |
| 2027 | dctx->checksum = adler32_update(dctx->checksum, &uc, 1); |
| 2028 | } else if (dctx->type == DEFLATE_TYPE_GZIP) { |
| 2029 | unsigned char uc = c; |
| 2030 | dctx->checksum = crc32_update(dctx->checksum, &uc, 1); |
| 2031 | } |
| 2032 | dctx->outblk[dctx->outlen++] = c; |
| 2033 | dctx->bytesout++; |
| 2034 | } |
| 2035 | |
| 2036 | #define EATBITS(n) ( dctx->nbits -= (n), dctx->bits >>= (n) ) |
| 2037 | |
| 2038 | int deflate_decompress_data(deflate_decompress_ctx *dctx, |
| 2039 | const void *vblock, int len, |
| 2040 | void **outblock, int *outlen) |
| 2041 | { |
| 2042 | const coderecord *rec; |
| 2043 | const unsigned char *block = (const unsigned char *)vblock; |
| 2044 | int code, bfinal, btype, rep, dist, nlen, header; |
| 2045 | unsigned long cksum; |
| 2046 | int error = 0; |
| 2047 | |
| 2048 | if (len == 0) { |
| 2049 | *outblock = NULL; |
| 2050 | *outlen = 0; |
| 2051 | if (dctx->state != FINALSPIN) |
| 2052 | return DEFLATE_ERR_UNEXPECTED_EOF; |
| 2053 | else |
| 2054 | return 0; |
| 2055 | } |
| 2056 | |
| 2057 | dctx->outblk = NULL; |
| 2058 | dctx->outsize = 0; |
| 2059 | dctx->outlen = 0; |
| 2060 | |
| 2061 | while (len > 0 || dctx->nbits > 0) { |
| 2062 | while (dctx->nbits < 24 && len > 0) { |
| 2063 | dctx->bits |= (*block++) << dctx->nbits; |
| 2064 | dctx->nbits += 8; |
| 2065 | len--; |
| 2066 | #ifdef ANALYSIS |
| 2067 | dctx->bytesread++; |
| 2068 | #endif |
| 2069 | } |
| 2070 | switch (dctx->state) { |
| 2071 | case ZLIBSTART: |
| 2072 | /* Expect 16-bit zlib header. */ |
| 2073 | if (dctx->nbits < 16) |
| 2074 | goto finished; /* done all we can */ |
| 2075 | |
| 2076 | /* |
| 2077 | * The header is stored as a big-endian 16-bit integer, |
| 2078 | * in contrast to the general little-endian policy in |
| 2079 | * the rest of the format :-( |
| 2080 | */ |
| 2081 | header = (((dctx->bits & 0xFF00) >> 8) | |
| 2082 | ((dctx->bits & 0x00FF) << 8)); |
| 2083 | EATBITS(16); |
| 2084 | |
| 2085 | /* |
| 2086 | * Check the header: |
| 2087 | * |
| 2088 | * - bits 8-11 should be 1000 (Deflate/RFC1951) |
| 2089 | * - bits 12-15 should be at most 0111 (window size) |
| 2090 | * - bit 5 should be zero (no dictionary present) |
| 2091 | * - we don't care about bits 6-7 (compression rate) |
| 2092 | * - bits 0-4 should be set up to make the whole thing |
| 2093 | * a multiple of 31 (checksum). |
| 2094 | */ |
| 2095 | if ((header & 0xF000) > 0x7000 || |
| 2096 | (header & 0x0020) != 0x0000 || |
| 2097 | (header % 31) != 0) { |
| 2098 | error = DEFLATE_ERR_ZLIB_HEADER; |
| 2099 | goto finished; |
| 2100 | } |
| 2101 | if ((header & 0x0F00) != 0x0800) { |
| 2102 | error = DEFLATE_ERR_ZLIB_WRONGCOMP; |
| 2103 | goto finished; |
| 2104 | } |
| 2105 | dctx->state = OUTSIDEBLK; |
| 2106 | break; |
| 2107 | case GZIPSTART: |
| 2108 | /* Expect 16-bit gzip header. */ |
| 2109 | if (dctx->nbits < 16) |
| 2110 | goto finished; |
| 2111 | header = dctx->bits & 0xFFFF; |
| 2112 | EATBITS(16); |
| 2113 | if (header != 0x8B1F) { |
| 2114 | error = DEFLATE_ERR_GZIP_HEADER; |
| 2115 | goto finished; |
| 2116 | } |
| 2117 | dctx->state = GZIPMETHFLAGS; |
| 2118 | break; |
| 2119 | case GZIPMETHFLAGS: |
| 2120 | /* Expect gzip compression method and flags bytes. */ |
| 2121 | if (dctx->nbits < 16) |
| 2122 | goto finished; |
| 2123 | header = dctx->bits & 0xFF; |
| 2124 | EATBITS(8); |
| 2125 | if (header != 8) { |
| 2126 | error = DEFLATE_ERR_GZIP_WRONGCOMP; |
| 2127 | goto finished; |
| 2128 | } |
| 2129 | dctx->gzflags = dctx->bits & 0xFF; |
| 2130 | if (dctx->gzflags & 2) { |
| 2131 | /* |
| 2132 | * The FHCRC flag is slightly confusing. RFC1952 |
| 2133 | * documents it as indicating the presence of a |
| 2134 | * two-byte CRC16 of the gzip header, occurring |
| 2135 | * just before the beginning of the Deflate stream. |
| 2136 | * However, gzip itself (as of 1.3.5) appears to |
| 2137 | * believe it indicates that the current gzip |
| 2138 | * `member' is not the final one, i.e. that the |
| 2139 | * stream is composed of multiple gzip members |
| 2140 | * concatenated together, and furthermore gzip will |
| 2141 | * refuse to decode any file that has it set. |
| 2142 | * |
| 2143 | * For this reason, I label it as `disputed' and |
| 2144 | * also refuse to decode anything that has it set. |
| 2145 | * I don't expect this to be a problem in practice. |
| 2146 | */ |
| 2147 | error = DEFLATE_ERR_GZIP_FHCRC; |
| 2148 | goto finished; |
| 2149 | } |
| 2150 | EATBITS(8); |
| 2151 | dctx->state = GZIPIGNORE1; |
| 2152 | break; |
| 2153 | case GZIPIGNORE1: |
| 2154 | case GZIPIGNORE2: |
| 2155 | case GZIPIGNORE3: |
| 2156 | /* Expect two bytes of gzip timestamp/XFL/OS, which we ignore. */ |
| 2157 | if (dctx->nbits < 16) |
| 2158 | goto finished; |
| 2159 | EATBITS(16); |
| 2160 | if (dctx->state == GZIPIGNORE3) { |
| 2161 | dctx->state = GZIPEXTRA; |
| 2162 | } else |
| 2163 | dctx->state++; /* maps IGNORE1 -> IGNORE2 -> IGNORE3 */ |
| 2164 | break; |
| 2165 | case GZIPEXTRA: |
| 2166 | if (dctx->gzflags & 4) { |
| 2167 | /* Expect two bytes of extra-length count, then that many |
| 2168 | * extra bytes of header data, all of which we ignore. */ |
| 2169 | if (!dctx->gzextralen) { |
| 2170 | if (dctx->nbits < 16) |
| 2171 | goto finished; |
| 2172 | dctx->gzextralen = dctx->bits & 0xFFFF; |
| 2173 | EATBITS(16); |
| 2174 | break; |
| 2175 | } else if (dctx->gzextralen > 0) { |
| 2176 | if (dctx->nbits < 8) |
| 2177 | goto finished; |
| 2178 | EATBITS(8); |
| 2179 | if (--dctx->gzextralen > 0) |
| 2180 | break; |
| 2181 | } |
| 2182 | } |
| 2183 | dctx->state = GZIPFNAME; |
| 2184 | break; |
| 2185 | case GZIPFNAME: |
| 2186 | if (dctx->gzflags & 8) { |
| 2187 | /* |
| 2188 | * Expect a NUL-terminated filename. |
| 2189 | */ |
| 2190 | if (dctx->nbits < 8) |
| 2191 | goto finished; |
| 2192 | code = dctx->bits & 0xFF; |
| 2193 | EATBITS(8); |
| 2194 | } else |
| 2195 | code = 0; |
| 2196 | if (code == 0) |
| 2197 | dctx->state = GZIPCOMMENT; |
| 2198 | break; |
| 2199 | case GZIPCOMMENT: |
| 2200 | if (dctx->gzflags & 16) { |
| 2201 | /* |
| 2202 | * Expect a NUL-terminated filename. |
| 2203 | */ |
| 2204 | if (dctx->nbits < 8) |
| 2205 | goto finished; |
| 2206 | code = dctx->bits & 0xFF; |
| 2207 | EATBITS(8); |
| 2208 | } else |
| 2209 | code = 0; |
| 2210 | if (code == 0) |
| 2211 | dctx->state = OUTSIDEBLK; |
| 2212 | break; |
| 2213 | case OUTSIDEBLK: |
| 2214 | /* Expect 3-bit block header. */ |
| 2215 | if (dctx->nbits < 3) |
| 2216 | goto finished; /* done all we can */ |
| 2217 | bfinal = dctx->bits & 1; |
| 2218 | if (bfinal) |
| 2219 | dctx->lastblock = TRUE; |
| 2220 | EATBITS(1); |
| 2221 | btype = dctx->bits & 3; |
| 2222 | EATBITS(2); |
| 2223 | if (btype == 0) { |
| 2224 | int to_eat = dctx->nbits & 7; |
| 2225 | dctx->state = UNCOMP_LEN; |
| 2226 | EATBITS(to_eat); /* align to byte boundary */ |
| 2227 | } else if (btype == 1) { |
| 2228 | dctx->currlentable = dctx->staticlentable; |
| 2229 | dctx->currdisttable = dctx->staticdisttable; |
| 2230 | dctx->state = INBLK; |
| 2231 | } else if (btype == 2) { |
| 2232 | dctx->state = TREES_HDR; |
| 2233 | } |
| 2234 | debug(("recv: bfinal=%d btype=%d\n", bfinal, btype)); |
| 2235 | #ifdef ANALYSIS |
| 2236 | if (analyse_level > 1) { |
| 2237 | static const char *const btypes[] = { |
| 2238 | "uncompressed", "static", "dynamic", "type 3 (unknown)" |
| 2239 | }; |
| 2240 | printf("new block, %sfinal, %s\n", |
| 2241 | bfinal ? "" : "not ", |
| 2242 | btypes[btype]); |
| 2243 | } |
| 2244 | #endif |
| 2245 | break; |
| 2246 | case TREES_HDR: |
| 2247 | /* |
| 2248 | * Dynamic block header. Five bits of HLIT, five of |
| 2249 | * HDIST, four of HCLEN. |
| 2250 | */ |
| 2251 | if (dctx->nbits < 5 + 5 + 4) |
| 2252 | goto finished; /* done all we can */ |
| 2253 | dctx->hlit = 257 + (dctx->bits & 31); |
| 2254 | EATBITS(5); |
| 2255 | dctx->hdist = 1 + (dctx->bits & 31); |
| 2256 | EATBITS(5); |
| 2257 | dctx->hclen = 4 + (dctx->bits & 15); |
| 2258 | EATBITS(4); |
| 2259 | debug(("recv: hlit=%d hdist=%d hclen=%d\n", dctx->hlit, |
| 2260 | dctx->hdist, dctx->hclen)); |
| 2261 | #ifdef ANALYSIS |
| 2262 | if (analyse_level > 1) |
| 2263 | printf("hlit=%d, hdist=%d, hclen=%d\n", |
| 2264 | dctx->hlit, dctx->hdist, dctx->hclen); |
| 2265 | #endif |
| 2266 | dctx->lenptr = 0; |
| 2267 | dctx->state = TREES_LENLEN; |
| 2268 | memset(dctx->lenlen, 0, sizeof(dctx->lenlen)); |
| 2269 | break; |
| 2270 | case TREES_LENLEN: |
| 2271 | if (dctx->nbits < 3) |
| 2272 | goto finished; |
| 2273 | while (dctx->lenptr < dctx->hclen && dctx->nbits >= 3) { |
| 2274 | dctx->lenlen[lenlenmap[dctx->lenptr++]] = |
| 2275 | (unsigned char) (dctx->bits & 7); |
| 2276 | debug(("recv: lenlen %d\n", (unsigned char) (dctx->bits & 7))); |
| 2277 | EATBITS(3); |
| 2278 | } |
| 2279 | if (dctx->lenptr == dctx->hclen) { |
| 2280 | dctx->lenlentable = mktable(dctx->lenlen, 19, |
| 2281 | #ifdef ANALYSIS |
| 2282 | "code length", |
| 2283 | #endif |
| 2284 | &error); |
| 2285 | if (!dctx->lenlentable) |
| 2286 | goto finished; /* error code set up by mktable */ |
| 2287 | dctx->state = TREES_LEN; |
| 2288 | dctx->lenptr = 0; |
| 2289 | } |
| 2290 | break; |
| 2291 | case TREES_LEN: |
| 2292 | if (dctx->lenptr >= dctx->hlit + dctx->hdist) { |
| 2293 | dctx->currlentable = mktable(dctx->lengths, dctx->hlit, |
| 2294 | #ifdef ANALYSIS |
| 2295 | "literal/length", |
| 2296 | #endif |
| 2297 | &error); |
| 2298 | if (!dctx->currlentable) |
| 2299 | goto finished; /* error code set up by mktable */ |
| 2300 | if (dctx->hdist == 1 && dctx->lengths[dctx->hlit] == 0) { |
| 2301 | /* |
| 2302 | * Special case: if the code length list for the |
| 2303 | * backward-distance table contains a single zero |
| 2304 | * entry, it means this block will never encode a |
| 2305 | * backward distance at all (i.e. it's all |
| 2306 | * literals). |
| 2307 | */ |
| 2308 | dctx->currdisttable = NULL; |
| 2309 | } else { |
| 2310 | dctx->currdisttable = mktable(dctx->lengths + dctx->hlit, |
| 2311 | dctx->hdist, |
| 2312 | #ifdef ANALYSIS |
| 2313 | "distance", |
| 2314 | #endif |
| 2315 | &error); |
| 2316 | if (!dctx->currdisttable) |
| 2317 | goto finished; /* error code set up by mktable */ |
| 2318 | } |
| 2319 | freetable(&dctx->lenlentable); |
| 2320 | dctx->lenlentable = NULL; |
| 2321 | dctx->state = INBLK; |
| 2322 | break; |
| 2323 | } |
| 2324 | code = huflookup(&dctx->bits, &dctx->nbits, dctx->lenlentable); |
| 2325 | debug(("recv: codelen %d\n", code)); |
| 2326 | if (code == -1) |
| 2327 | goto finished; |
| 2328 | if (code < 16) { |
| 2329 | #ifdef ANALYSIS |
| 2330 | if (analyse_level > 1) |
| 2331 | printf("code-length %d\n", code); |
| 2332 | #endif |
| 2333 | dctx->lengths[dctx->lenptr++] = code; |
| 2334 | } else { |
| 2335 | dctx->lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7); |
| 2336 | dctx->lenaddon = (code == 18 ? 11 : 3); |
| 2337 | dctx->lenrep = (code == 16 && dctx->lenptr > 0 ? |
| 2338 | dctx->lengths[dctx->lenptr - 1] : 0); |
| 2339 | dctx->state = TREES_LENREP; |
| 2340 | } |
| 2341 | break; |
| 2342 | case TREES_LENREP: |
| 2343 | if (dctx->nbits < dctx->lenextrabits) |
| 2344 | goto finished; |
| 2345 | rep = |
| 2346 | dctx->lenaddon + |
| 2347 | (dctx->bits & ((1 << dctx->lenextrabits) - 1)); |
| 2348 | EATBITS(dctx->lenextrabits); |
| 2349 | if (dctx->lenextrabits) |
| 2350 | debug(("recv: codelen-extrabits %d/%d\n", rep - dctx->lenaddon, |
| 2351 | dctx->lenextrabits)); |
| 2352 | #ifdef ANALYSIS |
| 2353 | if (analyse_level > 1) |
| 2354 | printf("code-length-repeat: %d copies of %d\n", rep, |
| 2355 | dctx->lenrep); |
| 2356 | #endif |
| 2357 | while (rep > 0 && dctx->lenptr < dctx->hlit + dctx->hdist) { |
| 2358 | dctx->lengths[dctx->lenptr] = dctx->lenrep; |
| 2359 | dctx->lenptr++; |
| 2360 | rep--; |
| 2361 | } |
| 2362 | dctx->state = TREES_LEN; |
| 2363 | break; |
| 2364 | case INBLK: |
| 2365 | #ifdef ANALYSIS |
| 2366 | dctx->bitcount_before = BITCOUNT(dctx); |
| 2367 | #endif |
| 2368 | code = huflookup(&dctx->bits, &dctx->nbits, dctx->currlentable); |
| 2369 | debug(("recv: litlen %d\n", code)); |
| 2370 | if (code == -1) |
| 2371 | goto finished; |
| 2372 | if (code < 256) { |
| 2373 | #ifdef ANALYSIS |
| 2374 | if (analyse_level > 0) |
| 2375 | printf("%lu: literal %d [%d]\n", dctx->bytesout, code, |
| 2376 | BITCOUNT(dctx) - dctx->bitcount_before); |
| 2377 | #endif |
| 2378 | emit_char(dctx, code); |
| 2379 | } else if (code == 256) { |
| 2380 | if (dctx->lastblock) |
| 2381 | dctx->state = END; |
| 2382 | else |
| 2383 | dctx->state = OUTSIDEBLK; |
| 2384 | if (dctx->currlentable != dctx->staticlentable) { |
| 2385 | freetable(&dctx->currlentable); |
| 2386 | dctx->currlentable = NULL; |
| 2387 | } |
| 2388 | if (dctx->currdisttable && |
| 2389 | dctx->currdisttable != dctx->staticdisttable) { |
| 2390 | freetable(&dctx->currdisttable); |
| 2391 | dctx->currdisttable = NULL; |
| 2392 | } |
| 2393 | } else if (code < 286) { /* static tree can give >285; ignore */ |
| 2394 | dctx->state = GOTLENSYM; |
| 2395 | dctx->sym = code; |
| 2396 | } |
| 2397 | break; |
| 2398 | case GOTLENSYM: |
| 2399 | rec = &lencodes[dctx->sym - 257]; |
| 2400 | if (dctx->nbits < rec->extrabits) |
| 2401 | goto finished; |
| 2402 | dctx->len = |
| 2403 | rec->min + (dctx->bits & ((1 << rec->extrabits) - 1)); |
| 2404 | if (rec->extrabits) |
| 2405 | debug(("recv: litlen-extrabits %d/%d\n", |
| 2406 | dctx->len - rec->min, rec->extrabits)); |
| 2407 | EATBITS(rec->extrabits); |
| 2408 | dctx->state = GOTLEN; |
| 2409 | break; |
| 2410 | case GOTLEN: |
| 2411 | if (!dctx->currdisttable) { |
| 2412 | error = DEFLATE_ERR_NODISTTABLE; |
| 2413 | goto finished; |
| 2414 | } |
| 2415 | code = huflookup(&dctx->bits, &dctx->nbits, dctx->currdisttable); |
| 2416 | debug(("recv: dist %d\n", code)); |
| 2417 | if (code == -1) |
| 2418 | goto finished; |
| 2419 | dctx->state = GOTDISTSYM; |
| 2420 | dctx->sym = code; |
| 2421 | break; |
| 2422 | case GOTDISTSYM: |
| 2423 | rec = &distcodes[dctx->sym]; |
| 2424 | if (dctx->nbits < rec->extrabits) |
| 2425 | goto finished; |
| 2426 | dist = rec->min + (dctx->bits & ((1 << rec->extrabits) - 1)); |
| 2427 | if (rec->extrabits) |
| 2428 | debug(("recv: dist-extrabits %d/%d\n", |
| 2429 | dist - rec->min, rec->extrabits)); |
| 2430 | EATBITS(rec->extrabits); |
| 2431 | dctx->state = INBLK; |
| 2432 | #ifdef ANALYSIS |
| 2433 | if (analyse_level > 0) |
| 2434 | printf("%lu: copy len=%d dist=%d [%d]\n", dctx->bytesout, |
| 2435 | dctx->len, dist, |
| 2436 | BITCOUNT(dctx) - dctx->bitcount_before); |
| 2437 | #endif |
| 2438 | while (dctx->len--) |
| 2439 | emit_char(dctx, dctx->window[(dctx->winpos - dist) & |
| 2440 | (DWINSIZE - 1)]); |
| 2441 | break; |
| 2442 | case UNCOMP_LEN: |
| 2443 | /* |
| 2444 | * Uncompressed block. We expect to see a 16-bit LEN. |
| 2445 | */ |
| 2446 | if (dctx->nbits < 16) |
| 2447 | goto finished; |
| 2448 | dctx->uncomplen = dctx->bits & 0xFFFF; |
| 2449 | EATBITS(16); |
| 2450 | dctx->state = UNCOMP_NLEN; |
| 2451 | break; |
| 2452 | case UNCOMP_NLEN: |
| 2453 | /* |
| 2454 | * Uncompressed block. We expect to see a 16-bit NLEN, |
| 2455 | * which should be the one's complement of the previous |
| 2456 | * LEN. |
| 2457 | */ |
| 2458 | if (dctx->nbits < 16) |
| 2459 | goto finished; |
| 2460 | nlen = dctx->bits & 0xFFFF; |
| 2461 | EATBITS(16); |
| 2462 | if (dctx->uncomplen != (nlen ^ 0xFFFF)) { |
| 2463 | error = DEFLATE_ERR_UNCOMP_HDR; |
| 2464 | goto finished; |
| 2465 | } |
| 2466 | if (dctx->uncomplen == 0) {/* block is empty */ |
| 2467 | if (dctx->lastblock) |
| 2468 | dctx->state = END; |
| 2469 | else |
| 2470 | dctx->state = OUTSIDEBLK; |
| 2471 | } else |
| 2472 | dctx->state = UNCOMP_DATA; |
| 2473 | break; |
| 2474 | case UNCOMP_DATA: |
| 2475 | if (dctx->nbits < 8) |
| 2476 | goto finished; |
| 2477 | #ifdef ANALYSIS |
| 2478 | if (analyse_level > 0) |
| 2479 | printf("%lu: uncompressed %d [8]\n", dctx->bytesout, |
| 2480 | (int)(dctx->bits & 0xFF)); |
| 2481 | #endif |
| 2482 | emit_char(dctx, dctx->bits & 0xFF); |
| 2483 | EATBITS(8); |
| 2484 | if (--dctx->uncomplen == 0) { /* end of uncompressed block */ |
| 2485 | if (dctx->lastblock) |
| 2486 | dctx->state = END; |
| 2487 | else |
| 2488 | dctx->state = OUTSIDEBLK; |
| 2489 | } |
| 2490 | break; |
| 2491 | case END: |
| 2492 | /* |
| 2493 | * End of compressed data. We align to a byte boundary, |
| 2494 | * and then look for format-specific trailer data. |
| 2495 | */ |
| 2496 | { |
| 2497 | int to_eat = dctx->nbits & 7; |
| 2498 | EATBITS(to_eat); |
| 2499 | } |
| 2500 | if (dctx->type == DEFLATE_TYPE_ZLIB) |
| 2501 | dctx->state = ADLER1; |
| 2502 | else if (dctx->type == DEFLATE_TYPE_GZIP) |
| 2503 | dctx->state = CRC1; |
| 2504 | else |
| 2505 | dctx->state = FINALSPIN; |
| 2506 | break; |
| 2507 | case ADLER1: |
| 2508 | if (dctx->nbits < 16) |
| 2509 | goto finished; |
| 2510 | cksum = (dctx->bits & 0xFF) << 8; |
| 2511 | EATBITS(8); |
| 2512 | cksum |= (dctx->bits & 0xFF); |
| 2513 | EATBITS(8); |
| 2514 | if (cksum != ((dctx->checksum >> 16) & 0xFFFF)) { |
| 2515 | error = DEFLATE_ERR_CHECKSUM; |
| 2516 | goto finished; |
| 2517 | } |
| 2518 | dctx->state = ADLER2; |
| 2519 | break; |
| 2520 | case ADLER2: |
| 2521 | if (dctx->nbits < 16) |
| 2522 | goto finished; |
| 2523 | cksum = (dctx->bits & 0xFF) << 8; |
| 2524 | EATBITS(8); |
| 2525 | cksum |= (dctx->bits & 0xFF); |
| 2526 | EATBITS(8); |
| 2527 | if (cksum != (dctx->checksum & 0xFFFF)) { |
| 2528 | error = DEFLATE_ERR_CHECKSUM; |
| 2529 | goto finished; |
| 2530 | } |
| 2531 | dctx->state = FINALSPIN; |
| 2532 | break; |
| 2533 | case CRC1: |
| 2534 | if (dctx->nbits < 16) |
| 2535 | goto finished; |
| 2536 | cksum = dctx->bits & 0xFFFF; |
| 2537 | EATBITS(16); |
| 2538 | if (cksum != (dctx->checksum & 0xFFFF)) { |
| 2539 | error = DEFLATE_ERR_CHECKSUM; |
| 2540 | goto finished; |
| 2541 | } |
| 2542 | dctx->state = CRC2; |
| 2543 | break; |
| 2544 | case CRC2: |
| 2545 | if (dctx->nbits < 16) |
| 2546 | goto finished; |
| 2547 | cksum = dctx->bits & 0xFFFF; |
| 2548 | EATBITS(16); |
| 2549 | if (cksum != ((dctx->checksum >> 16) & 0xFFFF)) { |
| 2550 | error = DEFLATE_ERR_CHECKSUM; |
| 2551 | goto finished; |
| 2552 | } |
| 2553 | dctx->state = ILEN1; |
| 2554 | break; |
| 2555 | case ILEN1: |
| 2556 | if (dctx->nbits < 16) |
| 2557 | goto finished; |
| 2558 | cksum = dctx->bits & 0xFFFF; |
| 2559 | EATBITS(16); |
| 2560 | if (cksum != (dctx->bytesout & 0xFFFF)) { |
| 2561 | error = DEFLATE_ERR_INLEN; |
| 2562 | goto finished; |
| 2563 | } |
| 2564 | dctx->state = ILEN2; |
| 2565 | break; |
| 2566 | case ILEN2: |
| 2567 | if (dctx->nbits < 16) |
| 2568 | goto finished; |
| 2569 | cksum = dctx->bits & 0xFFFF; |
| 2570 | EATBITS(16); |
| 2571 | if (cksum != ((dctx->bytesout >> 16) & 0xFFFF)) { |
| 2572 | error = DEFLATE_ERR_INLEN; |
| 2573 | goto finished; |
| 2574 | } |
| 2575 | dctx->state = FINALSPIN; |
| 2576 | break; |
| 2577 | case FINALSPIN: |
| 2578 | /* Just ignore any trailing garbage on the data stream. */ |
| 2579 | /* (We could alternatively throw an error here, if we wanted |
| 2580 | * to detect and complain about trailing garbage.) */ |
| 2581 | EATBITS(dctx->nbits); |
| 2582 | break; |
| 2583 | } |
| 2584 | } |
| 2585 | |
| 2586 | finished: |
| 2587 | *outblock = dctx->outblk; |
| 2588 | *outlen = dctx->outlen; |
| 2589 | return error; |
| 2590 | } |
| 2591 | |
| 2592 | #define A(code,str) str |
| 2593 | const char *const deflate_error_msg[DEFLATE_NUM_ERRORS] = { |
| 2594 | DEFLATE_ERRORLIST(A) |
| 2595 | }; |
| 2596 | #undef A |
| 2597 | |
| 2598 | #define A(code,str) #code |
| 2599 | const char *const deflate_error_sym[DEFLATE_NUM_ERRORS] = { |
| 2600 | DEFLATE_ERRORLIST(A) |
| 2601 | }; |
| 2602 | #undef A |
| 2603 | |
| 2604 | #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(_WIN64) |
| 2605 | #define WINDOWS_IO |
| 2606 | #endif |
| 2607 | |
| 2608 | #if defined(WINDOWS_IO) && (defined(STANDALONE) || defined(TESTMODE)) |
| 2609 | #include <fcntl.h> |
| 2610 | #include <io.h> |
| 2611 | #endif |
| 2612 | |
| 2613 | #ifdef STANDALONE |
| 2614 | |
| 2615 | int main(int argc, char **argv) |
| 2616 | { |
| 2617 | unsigned char buf[65536]; |
| 2618 | void *outbuf; |
| 2619 | int ret, err, outlen; |
| 2620 | deflate_decompress_ctx *dhandle; |
| 2621 | deflate_compress_ctx *chandle; |
| 2622 | int type = DEFLATE_TYPE_ZLIB, opts = TRUE; |
| 2623 | int compress = FALSE, decompress = FALSE; |
| 2624 | int got_arg = FALSE; |
| 2625 | char *filename = NULL; |
| 2626 | FILE *fp; |
| 2627 | |
| 2628 | while (--argc) { |
| 2629 | char *p = *++argv; |
| 2630 | |
| 2631 | got_arg = TRUE; |
| 2632 | |
| 2633 | if (p[0] == '-' && opts) { |
| 2634 | if (!strcmp(p, "-b")) |
| 2635 | type = DEFLATE_TYPE_BARE; |
| 2636 | else if (!strcmp(p, "-g")) |
| 2637 | type = DEFLATE_TYPE_GZIP; |
| 2638 | else if (!strcmp(p, "-c")) |
| 2639 | compress = TRUE; |
| 2640 | else if (!strcmp(p, "-d")) |
| 2641 | decompress = TRUE; |
| 2642 | else if (!strcmp(p, "-a")) |
| 2643 | analyse_level++, decompress = TRUE; |
| 2644 | else if (!strcmp(p, "--")) |
| 2645 | opts = FALSE; /* next thing is filename */ |
| 2646 | else { |
| 2647 | fprintf(stderr, "unknown command line option '%s'\n", p); |
| 2648 | return 1; |
| 2649 | } |
| 2650 | } else if (!filename) { |
| 2651 | filename = p; |
| 2652 | } else { |
| 2653 | fprintf(stderr, "can only handle one filename\n"); |
| 2654 | return 1; |
| 2655 | } |
| 2656 | } |
| 2657 | |
| 2658 | if (!compress && !decompress) { |
| 2659 | fprintf(stderr, "usage: deflate [ -c | -d | -a ] [ -b | -g ]" |
| 2660 | " [filename]\n"); |
| 2661 | return (got_arg ? 1 : 0); |
| 2662 | } |
| 2663 | |
| 2664 | if (compress && decompress) { |
| 2665 | fprintf(stderr, "please do not specify both compression and" |
| 2666 | " decompression\n"); |
| 2667 | return (got_arg ? 1 : 0); |
| 2668 | } |
| 2669 | |
| 2670 | if (compress) { |
| 2671 | chandle = deflate_compress_new(type); |
| 2672 | dhandle = NULL; |
| 2673 | } else { |
| 2674 | dhandle = deflate_decompress_new(type); |
| 2675 | chandle = NULL; |
| 2676 | } |
| 2677 | |
| 2678 | if (filename) |
| 2679 | fp = fopen(filename, "rb"); |
| 2680 | else |
| 2681 | fp = stdin; |
| 2682 | |
| 2683 | if (!fp) { |
| 2684 | assert(filename); |
| 2685 | fprintf(stderr, "unable to open '%s'\n", filename); |
| 2686 | return 1; |
| 2687 | } |
| 2688 | |
| 2689 | #ifdef WINDOWS_IO |
| 2690 | if(_setmode(_fileno(stdout), _O_BINARY ) == -1) |
| 2691 | { |
| 2692 | fprintf(stderr, "Can't set stdout to binary mode\n"); |
| 2693 | return 1; |
| 2694 | } |
| 2695 | #endif |
| 2696 | |
| 2697 | do { |
| 2698 | ret = fread(buf, 1, sizeof(buf), fp); |
| 2699 | outbuf = NULL; |
| 2700 | if (dhandle) { |
| 2701 | if (ret > 0) |
| 2702 | err = deflate_decompress_data(dhandle, buf, ret, |
| 2703 | (void **)&outbuf, &outlen); |
| 2704 | else |
| 2705 | err = deflate_decompress_data(dhandle, NULL, 0, |
| 2706 | (void **)&outbuf, &outlen); |
| 2707 | } else { |
| 2708 | if (ret > 0) |
| 2709 | deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH, |
| 2710 | (void **)&outbuf, &outlen); |
| 2711 | else |
| 2712 | deflate_compress_data(chandle, buf, ret, DEFLATE_END_OF_DATA, |
| 2713 | (void **)&outbuf, &outlen); |
| 2714 | err = 0; |
| 2715 | } |
| 2716 | if (outbuf) { |
| 2717 | if (!analyse_level && outlen) |
| 2718 | fwrite(outbuf, 1, outlen, stdout); |
| 2719 | sfree(outbuf); |
| 2720 | } |
| 2721 | if (err > 0) { |
| 2722 | fprintf(stderr, "decoding error: %s\n", deflate_error_msg[err]); |
| 2723 | return 1; |
| 2724 | } |
| 2725 | } while (ret > 0); |
| 2726 | |
| 2727 | if (dhandle) |
| 2728 | deflate_decompress_free(dhandle); |
| 2729 | if (chandle) |
| 2730 | deflate_compress_free(chandle); |
| 2731 | |
| 2732 | if (filename) |
| 2733 | fclose(fp); |
| 2734 | |
| 2735 | return 0; |
| 2736 | } |
| 2737 | |
| 2738 | #endif |
| 2739 | |
| 2740 | #ifdef TESTMODE |
| 2741 | |
| 2742 | int main(int argc, char **argv) |
| 2743 | { |
| 2744 | char *filename = NULL; |
| 2745 | FILE *fp; |
| 2746 | deflate_compress_ctx *chandle; |
| 2747 | deflate_decompress_ctx *dhandle; |
| 2748 | unsigned char buf[65536], *outbuf, *outbuf2; |
| 2749 | int ret, err, outlen, outlen2; |
| 2750 | int dlen = 0, clen = 0; |
| 2751 | int opts = TRUE; |
| 2752 | |
| 2753 | while (--argc) { |
| 2754 | char *p = *++argv; |
| 2755 | |
| 2756 | if (p[0] == '-' && opts) { |
| 2757 | if (!strcmp(p, "--")) |
| 2758 | opts = FALSE; /* next thing is filename */ |
| 2759 | else { |
| 2760 | fprintf(stderr, "unknown command line option '%s'\n", p); |
| 2761 | return 1; |
| 2762 | } |
| 2763 | } else if (!filename) { |
| 2764 | filename = p; |
| 2765 | } else { |
| 2766 | fprintf(stderr, "can only handle one filename\n"); |
| 2767 | return 1; |
| 2768 | } |
| 2769 | } |
| 2770 | |
| 2771 | if (filename) |
| 2772 | fp = fopen(filename, "rb"); |
| 2773 | else |
| 2774 | fp = stdin; |
| 2775 | |
| 2776 | if (!fp) { |
| 2777 | assert(filename); |
| 2778 | fprintf(stderr, "unable to open '%s'\n", filename); |
| 2779 | return 1; |
| 2780 | } |
| 2781 | |
| 2782 | chandle = deflate_compress_new(DEFLATE_TYPE_ZLIB); |
| 2783 | dhandle = deflate_decompress_new(DEFLATE_TYPE_ZLIB); |
| 2784 | |
| 2785 | #ifdef WINDOWS_IO |
| 2786 | if(_setmode(_fileno(stdout), _O_BINARY ) == -1) |
| 2787 | { |
| 2788 | fprintf(stderr, "Can't set stdout to binary mode\n"); |
| 2789 | return 1; |
| 2790 | } |
| 2791 | #endif |
| 2792 | |
| 2793 | do { |
| 2794 | ret = fread(buf, 1, sizeof(buf), fp); |
| 2795 | if (ret <= 0) { |
| 2796 | deflate_compress_data(chandle, NULL, 0, DEFLATE_END_OF_DATA, |
| 2797 | (void **)&outbuf, &outlen); |
| 2798 | } else { |
| 2799 | dlen += ret; |
| 2800 | deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH, |
| 2801 | (void **)&outbuf, &outlen); |
| 2802 | } |
| 2803 | if (outbuf) { |
| 2804 | clen += outlen; |
| 2805 | err = deflate_decompress_data(dhandle, outbuf, outlen, |
| 2806 | (void **)&outbuf2, &outlen2); |
| 2807 | sfree(outbuf); |
| 2808 | if (outbuf2) { |
| 2809 | if (outlen2) |
| 2810 | fwrite(outbuf2, 1, outlen2, stdout); |
| 2811 | sfree(outbuf2); |
| 2812 | } |
| 2813 | if (!err && ret <= 0) { |
| 2814 | /* |
| 2815 | * signal EOF |
| 2816 | */ |
| 2817 | err = deflate_decompress_data(dhandle, NULL, 0, |
| 2818 | (void **)&outbuf2, &outlen2); |
| 2819 | assert(outbuf2 == NULL); |
| 2820 | } |
| 2821 | if (err) { |
| 2822 | fprintf(stderr, "decoding error: %s\n", |
| 2823 | deflate_error_msg[err]); |
| 2824 | return 1; |
| 2825 | } |
| 2826 | } |
| 2827 | } while (ret > 0); |
| 2828 | |
| 2829 | fprintf(stderr, "%d plaintext -> %d compressed\n", dlen, clen); |
| 2830 | |
| 2831 | return 0; |
| 2832 | } |
| 2833 | |
| 2834 | #endif |