mdw@git.distorted.org.uk Git - sgt/halibut/blob - deflate.c

   1 /*
   2  * Reimplementation of Deflate (RFC1951) compression. Adapted from
   3  * the version in PuTTY, and extended to write dynamic Huffman
   4  * trees and choose block boundaries usefully.
   5  */
   6
   7 /*
   8  * TODO:
   9  *
  10  *  - Feature: it would probably be useful to add a third format
  11  *    type to read and write actual gzip files.
  12  *
  13  *  - Feature: the decompress function should return error codes
  14  *    indicating what kind of thing went wrong in a decoding error
  15  *    situation, possibly even including a file pointer. I envisage
  16  *    an enum of error codes in the header file, and one of those
  17  *    nasty preprocessor tricks to permit a user to define a
  18  *    code-to-text mapping array.
  19  *
  20  *  - Feature: could do with forms of flush other than SYNC_FLUSH.
  21  *    I'm not sure exactly how those work when you don't know in
  22  *    advance that your next block will be static (as we did in
  23  *    PuTTY). And remember the 9-bit limitation of zlib.
  24  *
  25  *  - Compression quality: introduce the option of choosing a
  26  *    static block instead of a dynamic one, where that's more
  27  *    efficient.
  28  *
  29  *  - Compression quality: the actual LZ77 engine appears to be
  30  *    unable to track a match going beyond the input data passed to
  31  *    it in a single call. I'd prefer it to be more restartable
  32  *    than that: we ought to be able to pass in our input data in
  33  *    whatever size blocks happen to be convenient and not affect
  34  *    the output at all.
  35  *
  36  *  - Compression quality: chooseblock() appears to be computing
  37  *    wildly inaccurate block size estimates. Possible resolutions:
  38  *     + find and fix some trivial bug I haven't spotted yet
  39  *     + abandon the entropic approximation and go with trial
  40  *       Huffman runs
  41  *
  42  *  - Compression quality: see if increasing SYMLIMIT causes
  43  *    dynamic blocks to start being consistently smaller than it.
  44  *
  45  *  - Compression quality: we ought to be able to fall right back
  46  *    to actual uncompressed blocks if really necessary, though
  47  *    it's not clear what the criterion for doing so would be.
  48  *
  49  *  - Performance: chooseblock() is currently computing the whole
  50  *    entropic approximation for every possible block size. It
  51  *    ought to be able to update it incrementally as it goes along
  52  *    (assuming of course we don't jack it all in and go for a
  53  *    proper Huffman analysis).
  54  */
  55
  56 /*
  57  * This software is copyright 2000-2006 Simon Tatham.
  58  *
  59  * Permission is hereby granted, free of charge, to any person
  60  * obtaining a copy of this software and associated documentation
  61  * files (the "Software"), to deal in the Software without
  62  * restriction, including without limitation the rights to use,
  63  * copy, modify, merge, publish, distribute, sublicense, and/or
  64  * sell copies of the Software, and to permit persons to whom the
  65  * Software is furnished to do so, subject to the following
  66  * conditions:
  67  *
  68  * The above copyright notice and this permission notice shall be
  69  * included in all copies or substantial portions of the Software.
  70  *
  71  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  72  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  73  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  74  * NONINFRINGEMENT.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
  75  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  76  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
  77  * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  78  * THE SOFTWARE.
  79  */
  80
  81 #include <stdio.h>
  82 #include <stddef.h>
  83 #include <string.h>
  84 #include <stdlib.h>
  85 #include <assert.h>
  86 #include <math.h>
  87
  88 #include "deflate.h"
  89
  90 #define snew(type) ( (type *) malloc(sizeof(type)) )
  91 #define snewn(n, type) ( (type *) malloc((n) * sizeof(type)) )
  92 #define sresize(x, n, type) ( (type *) realloc((x), (n) * sizeof(type)) )
  93 #define sfree(x) ( free((x)) )
  94
  95 #define lenof(x) (sizeof((x)) / sizeof(*(x)))
  96
  97 #if defined TESTDBG
  98 /* gcc-specific diagnostic macro */
  99 #define debug_int(x...) ( fprintf(stderr, x) )
 100 #define debug(x) ( debug_int x )
 101 #else
 102 #define debug(x)
 103 #endif
 104
 105 #ifndef FALSE
 106 #define FALSE 0
 107 #define TRUE (!FALSE)
 108 #endif
 109
 110 /* ----------------------------------------------------------------------
 111  * Basic LZ77 code. This bit is designed modularly, so it could be
 112  * ripped out and used in a different LZ77 compressor. Go to it,
 113  * and good luck :-)
 114  */
 115
 116 struct LZ77InternalContext;
 117 struct LZ77Context {
 118     struct LZ77InternalContext *ictx;
 119     void *userdata;
 120     void (*literal) (struct LZ77Context * ctx, unsigned char c);
 121     void (*match) (struct LZ77Context * ctx, int distance, int len);
 122 };
 123
 124 /*
 125  * Initialise the private fields of an LZ77Context. It's up to the
 126  * user to initialise the public fields.
 127  */
 128 static int lz77_init(struct LZ77Context *ctx);
 129
 130 /*
 131  * Supply data to be compressed. Will update the private fields of
 132  * the LZ77Context, and will call literal() and match() to output.
 133  * If `compress' is FALSE, it will never emit a match, but will
 134  * instead call literal() for everything.
 135  */
 136 static void lz77_compress(struct LZ77Context *ctx,
 137                           const unsigned char *data, int len, int compress);
 138
 139 /*
 140  * Modifiable parameters.
 141  */
 142 #define WINSIZE 32768                  /* window size. Must be power of 2! */
 143 #define HASHMAX 2039                   /* one more than max hash value */
 144 #define MAXMATCH 32                    /* how many matches we track */
 145 #define HASHCHARS 3                    /* how many chars make a hash */
 146
 147 /*
 148  * This compressor takes a less slapdash approach than the
 149  * gzip/zlib one. Rather than allowing our hash chains to fall into
 150  * disuse near the far end, we keep them doubly linked so we can
 151  * _find_ the far end, and then every time we add a new byte to the
 152  * window (thus rolling round by one and removing the previous
 153  * byte), we can carefully remove the hash chain entry.
 154  */
 155
 156 #define INVALID -1                     /* invalid hash _and_ invalid offset */
 157 struct WindowEntry {
 158     short next, prev;                  /* array indices within the window */
 159     short hashval;
 160 };
 161
 162 struct HashEntry {
 163     short first;                       /* window index of first in chain */
 164 };
 165
 166 struct Match {
 167     int distance, len;
 168 };
 169
 170 struct LZ77InternalContext {
 171     struct WindowEntry win[WINSIZE];
 172     unsigned char data[WINSIZE];
 173     int winpos;
 174     struct HashEntry hashtab[HASHMAX];
 175     unsigned char pending[HASHCHARS];
 176     int npending;
 177 };
 178
 179 static int lz77_hash(const unsigned char *data)
 180 {
 181     return (257 * data[0] + 263 * data[1] + 269 * data[2]) % HASHMAX;
 182 }
 183
 184 static int lz77_init(struct LZ77Context *ctx)
 185 {
 186     struct LZ77InternalContext *st;
 187     int i;
 188
 189     st = snew(struct LZ77InternalContext);
 190     if (!st)
 191         return 0;
 192
 193     ctx->ictx = st;
 194
 195     for (i = 0; i < WINSIZE; i++)
 196         st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
 197     for (i = 0; i < HASHMAX; i++)
 198         st->hashtab[i].first = INVALID;
 199     st->winpos = 0;
 200
 201     st->npending = 0;
 202
 203     return 1;
 204 }
 205
 206 static void lz77_advance(struct LZ77InternalContext *st,
 207                          unsigned char c, int hash)
 208 {
 209     int off;
 210
 211     /*
 212      * Remove the hash entry at winpos from the tail of its chain,
 213      * or empty the chain if it's the only thing on the chain.
 214      */
 215     if (st->win[st->winpos].prev != INVALID) {
 216         st->win[st->win[st->winpos].prev].next = INVALID;
 217     } else if (st->win[st->winpos].hashval != INVALID) {
 218         st->hashtab[st->win[st->winpos].hashval].first = INVALID;
 219     }
 220
 221     /*
 222      * Create a new entry at winpos and add it to the head of its
 223      * hash chain.
 224      */
 225     st->win[st->winpos].hashval = hash;
 226     st->win[st->winpos].prev = INVALID;
 227     off = st->win[st->winpos].next = st->hashtab[hash].first;
 228     st->hashtab[hash].first = st->winpos;
 229     if (off != INVALID)
 230         st->win[off].prev = st->winpos;
 231     st->data[st->winpos] = c;
 232
 233     /*
 234      * Advance the window pointer.
 235      */
 236     st->winpos = (st->winpos + 1) & (WINSIZE - 1);
 237 }
 238
 239 #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] )
 240
 241 static void lz77_compress(struct LZ77Context *ctx,
 242                           const unsigned char *data, int len, int compress)
 243 {
 244     struct LZ77InternalContext *st = ctx->ictx;
 245     int i, hash, distance, off, nmatch, matchlen, advance;
 246     struct Match defermatch, matches[MAXMATCH];
 247     int deferchr;
 248
 249     /*
 250      * Add any pending characters from last time to the window. (We
 251      * might not be able to.)
 252      */
 253     for (i = 0; i < st->npending; i++) {
 254         unsigned char foo[HASHCHARS];
 255         int j;
 256         if (len + st->npending - i < HASHCHARS) {
 257             /* Update the pending array. */
 258             for (j = i; j < st->npending; j++)
 259                 st->pending[j - i] = st->pending[j];
 260             break;
 261         }
 262         for (j = 0; j < HASHCHARS; j++)
 263             foo[j] = (i + j < st->npending ? st->pending[i + j] :
 264                       data[i + j - st->npending]);
 265         lz77_advance(st, foo[0], lz77_hash(foo));
 266     }
 267     st->npending -= i;
 268
 269     defermatch.len = 0;
 270     deferchr = '\0';
 271     while (len > 0) {
 272
 273         /* Don't even look for a match, if we're not compressing. */
 274         if (compress && len >= HASHCHARS) {
 275             /*
 276              * Hash the next few characters.
 277              */
 278             hash = lz77_hash(data);
 279
 280             /*
 281              * Look the hash up in the corresponding hash chain and see
 282              * what we can find.
 283              */
 284             nmatch = 0;
 285             for (off = st->hashtab[hash].first;
 286                  off != INVALID; off = st->win[off].next) {
 287                 /* distance = 1       if off == st->winpos-1 */
 288                 /* distance = WINSIZE if off == st->winpos   */
 289                 distance =
 290                     WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE;
 291                 for (i = 0; i < HASHCHARS; i++)
 292                     if (CHARAT(i) != CHARAT(i - distance))
 293                         break;
 294                 if (i == HASHCHARS) {
 295                     matches[nmatch].distance = distance;
 296                     matches[nmatch].len = 3;
 297                     if (++nmatch >= MAXMATCH)
 298                         break;
 299                 }
 300             }
 301         } else {
 302             nmatch = 0;
 303             hash = INVALID;
 304         }
 305
 306         if (nmatch > 0) {
 307             /*
 308              * We've now filled up matches[] with nmatch potential
 309              * matches. Follow them down to find the longest. (We
 310              * assume here that it's always worth favouring a
 311              * longer match over a shorter one.)
 312              */
 313             matchlen = HASHCHARS;
 314             while (matchlen < len) {
 315                 int j;
 316                 for (i = j = 0; i < nmatch; i++) {
 317                     if (CHARAT(matchlen) ==
 318                         CHARAT(matchlen - matches[i].distance)) {
 319                         matches[j++] = matches[i];
 320                     }
 321                 }
 322                 if (j == 0)
 323                     break;
 324                 matchlen++;
 325                 nmatch = j;
 326             }
 327
 328             /*
 329              * We've now got all the longest matches. We favour the
 330              * shorter distances, which means we go with matches[0].
 331              * So see if we want to defer it or throw it away.
 332              */
 333             matches[0].len = matchlen;
 334             if (defermatch.len > 0) {
 335                 if (matches[0].len > defermatch.len + 1) {
 336                     /* We have a better match. Emit the deferred char,
 337                      * and defer this match. */
 338                     ctx->literal(ctx, (unsigned char) deferchr);
 339                     defermatch = matches[0];
 340                     deferchr = data[0];
 341                     advance = 1;
 342                 } else {
 343                     /* We don't have a better match. Do the deferred one. */
 344                     ctx->match(ctx, defermatch.distance, defermatch.len);
 345                     advance = defermatch.len - 1;
 346                     defermatch.len = 0;
 347                 }
 348             } else {
 349                 /* There was no deferred match. Defer this one. */
 350                 defermatch = matches[0];
 351                 deferchr = data[0];
 352                 advance = 1;
 353             }
 354         } else {
 355             /*
 356              * We found no matches. Emit the deferred match, if
 357              * any; otherwise emit a literal.
 358              */
 359             if (defermatch.len > 0) {
 360                 ctx->match(ctx, defermatch.distance, defermatch.len);
 361                 advance = defermatch.len - 1;
 362                 defermatch.len = 0;
 363             } else {
 364                 ctx->literal(ctx, data[0]);
 365                 advance = 1;
 366             }
 367         }
 368
 369         /*
 370          * Now advance the position by `advance' characters,
 371          * keeping the window and hash chains consistent.
 372          */
 373         while (advance > 0) {
 374             if (len >= HASHCHARS) {
 375                 lz77_advance(st, *data, lz77_hash(data));
 376             } else {
 377                 st->pending[st->npending++] = *data;
 378             }
 379             data++;
 380             len--;
 381             advance--;
 382         }
 383     }
 384 }
 385
 386 /* ----------------------------------------------------------------------
 387  * Deflate functionality common to both compression and decompression.
 388  */
 389
 390 static const unsigned char lenlenmap[] = {
 391     16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
 392 };
 393
 394 #define MAXCODELEN 16
 395
 396 /*
 397  * Given a sequence of Huffman code lengths, compute the actual
 398  * codes, in the final form suitable for feeding to outbits (i.e.
 399  * already bit-mirrored).
 400  *
 401  * Returns the maximum code length found.
 402  */
 403 static int hufcodes(const unsigned char *lengths, int *codes, int nsyms)
 404 {
 405     int count[MAXCODELEN], startcode[MAXCODELEN];
 406     int code, maxlen;
 407     int i, j;
 408
 409     /* Count the codes of each length. */
 410     maxlen = 0;
 411     for (i = 1; i < MAXCODELEN; i++)
 412         count[i] = 0;
 413     for (i = 0; i < nsyms; i++) {
 414         count[lengths[i]]++;
 415         if (maxlen < lengths[i])
 416             maxlen = lengths[i];
 417     }
 418     /* Determine the starting code for each length block. */
 419     code = 0;
 420     for (i = 1; i < MAXCODELEN; i++) {
 421         startcode[i] = code;
 422         code += count[i];
 423         code <<= 1;
 424     }
 425     /* Determine the code for each symbol. Mirrored, of course. */
 426     for (i = 0; i < nsyms; i++) {
 427         code = startcode[lengths[i]]++;
 428         codes[i] = 0;
 429         for (j = 0; j < lengths[i]; j++) {
 430             codes[i] = (codes[i] << 1) | (code & 1);
 431             code >>= 1;
 432         }
 433     }
 434
 435     return maxlen;
 436 }
 437
 438 /* ----------------------------------------------------------------------
 439  * Deflate compression.
 440  */
 441
 442 #define SYMLIMIT 65536
 443 #define SYMPFX_LITLEN    0x00000000U
 444 #define SYMPFX_DIST      0x40000000U
 445 #define SYMPFX_EXTRABITS 0x80000000U
 446 #define SYMPFX_CODELEN   0xC0000000U
 447 #define SYMPFX_MASK      0xC0000000U
 448
 449 #define SYM_EXTRABITS_MASK 0x3C000000U
 450 #define SYM_EXTRABITS_SHIFT 26
 451
 452 struct deflate_compress_ctx {
 453     struct LZ77Context *lzc;
 454     unsigned char *outbuf;
 455     int outlen, outsize;
 456     unsigned long outbits;
 457     int noutbits;
 458     int firstblock;
 459     unsigned long *syms;
 460     int symstart, nsyms;
 461     int type;
 462     unsigned long adler32;
 463     int lastblock;
 464     int finished;
 465 #ifdef STATISTICS
 466     unsigned long bitcount;
 467 #endif
 468 };
 469
 470 static void outbits(deflate_compress_ctx *out,
 471                     unsigned long bits, int nbits)
 472 {
 473     assert(out->noutbits + nbits <= 32);
 474     out->outbits |= bits << out->noutbits;
 475     out->noutbits += nbits;
 476     while (out->noutbits >= 8) {
 477         if (out->outlen >= out->outsize) {
 478             out->outsize = out->outlen + 64;
 479             out->outbuf = sresize(out->outbuf, out->outsize, unsigned char);
 480         }
 481         out->outbuf[out->outlen++] = (unsigned char) (out->outbits & 0xFF);
 482         out->outbits >>= 8;
 483         out->noutbits -= 8;
 484     }
 485 #ifdef STATISTICS
 486     out->bitcount += nbits;
 487 #endif
 488 }
 489
 490 /*
 491  * Binary heap functions used by buildhuf(). Each one assumes the
 492  * heap to be stored in an array of ints, with two ints per node
 493  * (user data and key). They take in the old heap length, and
 494  * return the new one.
 495  */
 496 #define HEAPPARENT(x) (((x)-2)/4*2)
 497 #define HEAPLEFT(x) ((x)*2+2)
 498 #define HEAPRIGHT(x) ((x)*2+4)
 499 static int addheap(int *heap, int len, int userdata, int key)
 500 {
 501     int me, dad, tmp;
 502
 503     me = len;
 504     heap[len++] = userdata;
 505     heap[len++] = key;
 506
 507     while (me > 0) {
 508         dad = HEAPPARENT(me);
 509         if (heap[me+1] < heap[dad+1]) {
 510             tmp = heap[me]; heap[me] = heap[dad]; heap[dad] = tmp;
 511             tmp = heap[me+1]; heap[me+1] = heap[dad+1]; heap[dad+1] = tmp;
 512             me = dad;
 513         } else
 514             break;
 515     }
 516
 517     return len;
 518 }
 519 static int rmheap(int *heap, int len, int *userdata, int *key)
 520 {
 521     int me, lc, rc, c, tmp;
 522
 523     len -= 2;
 524     *userdata = heap[0];
 525     *key = heap[1];
 526     heap[0] = heap[len];
 527     heap[1] = heap[len+1];
 528
 529     me = 0;
 530
 531     while (1) {
 532         lc = HEAPLEFT(me);
 533         rc = HEAPRIGHT(me);
 534         if (lc >= len)
 535             break;
 536         else if (rc >= len || heap[lc+1] < heap[rc+1])
 537             c = lc;
 538         else
 539             c = rc;
 540         if (heap[me+1] > heap[c+1]) {
 541             tmp = heap[me]; heap[me] = heap[c]; heap[c] = tmp;
 542             tmp = heap[me+1]; heap[me+1] = heap[c+1]; heap[c+1] = tmp;
 543         } else
 544             break;
 545         me = c;
 546     }
 547
 548     return len;
 549 }
 550
 551 /*
 552  * The core of the Huffman algorithm: takes an input array of
 553  * symbol frequencies, and produces an output array of code
 554  * lengths.
 555  *
 556  * This is basically a generic Huffman implementation, but it has
 557  * one zlib-related quirk which is that it caps the output code
 558  * lengths to fit in an unsigned char (which is safe since Deflate
 559  * will reject anything longer than 15 anyway). Anyone wanting to
 560  * rip it out and use it in another context should find that easy
 561  * to remove.
 562  */
 563 #define HUFMAX 286
 564 static void buildhuf(const int *freqs, unsigned char *lengths, int nsyms)
 565 {
 566     int parent[2*HUFMAX-1];
 567     int length[2*HUFMAX-1];
 568     int heap[2*HUFMAX];
 569     int heapsize;
 570     int i, j, n;
 571     int si, sj;
 572
 573     assert(nsyms <= HUFMAX);
 574
 575     memset(parent, 0, sizeof(parent));
 576
 577     /*
 578      * Begin by building the heap.
 579      */
 580     heapsize = 0;
 581     for (i = 0; i < nsyms; i++)
 582         if (freqs[i] > 0)              /* leave unused symbols out totally */
 583             heapsize = addheap(heap, heapsize, i, freqs[i]);
 584
 585     /*
 586      * Now repeatedly take two elements off the heap and merge
 587      * them.
 588      */
 589     n = HUFMAX;
 590     while (heapsize > 2) {
 591         heapsize = rmheap(heap, heapsize, &i, &si);
 592         heapsize = rmheap(heap, heapsize, &j, &sj);
 593         parent[i] = n;
 594         parent[j] = n;
 595         heapsize = addheap(heap, heapsize, n, si + sj);
 596         n++;
 597     }
 598
 599     /*
 600      * Now we have our tree, in the form of a link from each node
 601      * to the index of its parent. Count back down the tree to
 602      * determine the code lengths.
 603      */
 604     memset(length, 0, sizeof(length));
 605     /* The tree root has length 0 after that, which is correct. */
 606     for (i = n-1; i-- ;)
 607         if (parent[i] > 0)
 608             length[i] = 1 + length[parent[i]];
 609
 610     /*
 611      * And that's it. (Simple, wasn't it?) Copy the lengths into
 612      * the output array and leave.
 613      *
 614      * Here we cap lengths to fit in unsigned char.
 615      */
 616     for (i = 0; i < nsyms; i++)
 617         lengths[i] = (length[i] > 255 ? 255 : length[i]);
 618 }
 619
 620 /*
 621  * Wrapper around buildhuf() which enforces the Deflate restriction
 622  * that no code length may exceed 15 bits, or 7 for the auxiliary
 623  * code length alphabet. This function has the same calling
 624  * semantics as buildhuf(), except that it might modify the freqs
 625  * array.
 626  */
 627 static void deflate_buildhuf(int *freqs, unsigned char *lengths,
 628                              int nsyms, int limit)
 629 {
 630     int smallestfreq, totalfreq, nactivesyms;
 631     int num, denom, adjust;
 632     int i;
 633     int maxprob;
 634
 635     /*
 636      * First, try building the Huffman table the normal way. If
 637      * this works, it's optimal, so we don't want to mess with it.
 638      */
 639     buildhuf(freqs, lengths, nsyms);
 640
 641     for (i = 0; i < nsyms; i++)
 642         if (lengths[i] > limit)
 643             break;
 644
 645     if (i == nsyms)
 646         return;                        /* OK */
 647
 648     /*
 649      * The Huffman algorithm can only ever generate a code length
 650      * of N bits or more if there is a symbol whose probability is
 651      * less than the reciprocal of the (N+2)th Fibonacci number
 652      * (counting from F_0=0 and F_1=1), i.e. 1/2584 for N=16, or
 653      * 1/55 for N=8. (This is a necessary though not sufficient
 654      * condition.)
 655      *
 656      * Why is this? Well, consider the input symbol with the
 657      * smallest probability. Let that probability be x. In order
 658      * for this symbol to have a code length of at least 1, the
 659      * Huffman algorithm will have to merge it with some other
 660      * node; and since x is the smallest probability, the node it
 661      * gets merged with must be at least x. Thus, the probability
 662      * of the resulting combined node will be at least 2x. Now in
 663      * order for our node to reach depth 2, this 2x-node must be
 664      * merged again. But what with? We can't assume the node it
 665      * merges with is at least 2x, because this one might only be
 666      * the _second_ smallest remaining node. But we do know the
 667      * node it merges with must be at least x, so our order-2
 668      * internal node is at least 3x.
 669      *
 670      * How small a node can merge with _that_ to get an order-3
 671      * internal node? Well, it must be at least 2x, because if it
 672      * was smaller than that then it would have been one of the two
 673      * smallest nodes in the previous step and been merged at that
 674      * point. So at least 3x, plus at least 2x, comes to at least
 675      * 5x for an order-3 node.
 676      *
 677      * And so it goes on: at every stage we must merge our current
 678      * node with a node at least as big as the bigger of this one's
 679      * two parents, and from this starting point that gives rise to
 680      * the Fibonacci sequence. So we find that in order to have a
 681      * node n levels deep (i.e. a maximum code length of n), the
 682      * overall probability of the root of the entire tree must be
 683      * at least F_{n+2} times the probability of the rarest symbol.
 684      * In other words, since the overall probability is 1, it is a
 685      * necessary condition for a code length of 16 or more that
 686      * there must be at least one symbol with probability <=
 687      * 1/F_18.
 688      *
 689      * (To demonstrate that a probability this big really can give
 690      * rise to a code length of 16, consider the set of input
 691      * frequencies { 1-epsilon, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55,
 692      * 89, 144, 233, 377, 610, 987 }, for arbitrarily small
 693      * epsilon.)
 694      *
 695      * So here buildhuf() has returned us an overlong code. So to
 696      * ensure it doesn't do it again, we add a constant to all the
 697      * (non-zero) symbol frequencies, causing them to become more
 698      * balanced and removing the danger. We can then feed the
 699      * results back to the standard buildhuf() and be
 700      * assert()-level confident that the resulting code lengths
 701      * contain nothing outside the permitted range.
 702      */
 703     maxprob = (limit == 16 ? 2584 : 55);   /* no point in computing full F_n */
 704     totalfreq = nactivesyms = 0;
 705     smallestfreq = -1;
 706     for (i = 0; i < nsyms; i++) {
 707         if (freqs[i] == 0)
 708             continue;
 709         if (smallestfreq < 0 || smallestfreq > freqs[i])
 710             smallestfreq = freqs[i];
 711         totalfreq += freqs[i];
 712         nactivesyms++;
 713     }
 714     assert(smallestfreq <= totalfreq / maxprob);
 715
 716     /*
 717      * We want to find the smallest integer `adjust' such that
 718      * (totalfreq + nactivesyms * adjust) / (smallestfreq +
 719      * adjust) is less than maxprob. A bit of algebra tells us
 720      * that the threshold value is equal to
 721      *
 722      *   totalfreq - maxprob * smallestfreq
 723      *   ----------------------------------
 724      *          maxprob - nactivesyms
 725      *
 726      * rounded up, of course. And we'll only even be trying
 727      * this if
 728      */
 729     num = totalfreq - smallestfreq * maxprob;
 730     denom = maxprob - nactivesyms;
 731     adjust = (num + denom - 1) / denom;
 732
 733     /*
 734      * Now add `adjust' to all the input symbol frequencies.
 735      */
 736     for (i = 0; i < nsyms; i++)
 737         if (freqs[i] != 0)
 738             freqs[i] += adjust;
 739
 740     /*
 741      * Rebuild the Huffman tree...
 742      */
 743     buildhuf(freqs, lengths, nsyms);
 744
 745     /*
 746      * ... and this time it ought to be OK.
 747      */
 748     for (i = 0; i < nsyms; i++)
 749         assert(lengths[i] <= limit);
 750 }
 751
 752 struct huftrees {
 753     unsigned char *len_litlen;
 754     int *code_litlen;
 755     unsigned char *len_dist;
 756     int *code_dist;
 757     unsigned char *len_codelen;
 758     int *code_codelen;
 759 };
 760
 761 /*
 762  * Write out a single symbol, given the three Huffman trees.
 763  */
 764 static void writesym(deflate_compress_ctx *out,
 765                      unsigned sym, struct huftrees *trees)
 766 {
 767     unsigned basesym = sym &~ SYMPFX_MASK;
 768     int i;
 769
 770     switch (sym & SYMPFX_MASK) {
 771       case SYMPFX_LITLEN:
 772         debug(("send: litlen %d\n", basesym));
 773         outbits(out, trees->code_litlen[basesym], trees->len_litlen[basesym]);
 774         break;
 775       case SYMPFX_DIST:
 776         debug(("send: dist %d\n", basesym));
 777         outbits(out, trees->code_dist[basesym], trees->len_dist[basesym]);
 778         break;
 779       case SYMPFX_CODELEN:
 780         debug(("send: codelen %d\n", basesym));
 781         outbits(out, trees->code_codelen[basesym],trees->len_codelen[basesym]);
 782         break;
 783       case SYMPFX_EXTRABITS:
 784         i = basesym >> SYM_EXTRABITS_SHIFT;
 785         basesym &= ~SYM_EXTRABITS_MASK;
 786         debug(("send: extrabits %d/%d\n", basesym, i));
 787         outbits(out, basesym, i);
 788         break;
 789     }
 790 }
 791
 792 static void outblock(deflate_compress_ctx *out,
 793                      int blklen, int dynamic)
 794 {
 795     int freqs1[286], freqs2[30], freqs3[19];
 796     unsigned char len1[286], len2[30], len3[19];
 797     int code1[286], code2[30], code3[19];
 798     int hlit, hdist, hclen, bfinal, btype;
 799     int treesrc[286 + 30];
 800     int treesyms[286 + 30];
 801     int codelen[19];
 802     int i, ntreesrc, ntreesyms;
 803     struct huftrees ht;
 804 #ifdef STATISTICS
 805     unsigned long bitcount_before;
 806 #endif
 807
 808     ht.len_litlen = len1;
 809     ht.len_dist = len2;
 810     ht.len_codelen = len3;
 811     ht.code_litlen = code1;
 812     ht.code_dist = code2;
 813     ht.code_codelen = code3;
 814
 815     /*
 816      * Build the two main Huffman trees.
 817      */
 818     if (dynamic) {
 819         /*
 820          * Count up the frequency tables.
 821          */
 822         memset(freqs1, 0, sizeof(freqs1));
 823         memset(freqs2, 0, sizeof(freqs2));
 824         freqs1[256] = 1;               /* we're bound to need one EOB */
 825         for (i = 0; i < blklen; i++) {
 826             unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
 827
 828             /*
 829              * Increment the occurrence counter for this symbol, if
 830              * it's in one of the Huffman alphabets and isn't extra
 831              * bits.
 832              */
 833             if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
 834                 sym &= ~SYMPFX_MASK;
 835                 assert(sym < lenof(freqs1));
 836                 freqs1[sym]++;
 837             } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) {
 838                 sym &= ~SYMPFX_MASK;
 839                 assert(sym < lenof(freqs2));
 840                 freqs2[sym]++;
 841             }
 842         }
 843         deflate_buildhuf(freqs1, len1, lenof(freqs1), 15);
 844         deflate_buildhuf(freqs2, len2, lenof(freqs2), 15);
 845     } else {
 846         /*
 847          * Fixed static trees.
 848          */
 849         for (i = 0; i < lenof(len1); i++)
 850             len1[i] = (i < 144 ? 8 :
 851                        i < 256 ? 9 :
 852                        i < 280 ? 7 : 8);
 853         for (i = 0; i < lenof(len2); i++)
 854             len2[i] = 5;
 855     }
 856     hufcodes(len1, code1, lenof(freqs1));
 857     hufcodes(len2, code2, lenof(freqs2));
 858
 859     if (dynamic) {
 860         /*
 861          * Determine HLIT and HDIST.
 862          */
 863         for (hlit = 286; hlit > 257 && len1[hlit-1] == 0; hlit--);
 864         for (hdist = 30; hdist > 1 && len2[hdist-1] == 0; hdist--);
 865
 866         /*
 867          * Write out the list of symbols used to transmit the
 868          * trees.
 869          */
 870         ntreesrc = 0;
 871         for (i = 0; i < hlit; i++)
 872             treesrc[ntreesrc++] = len1[i];
 873         for (i = 0; i < hdist; i++)
 874             treesrc[ntreesrc++] = len2[i];
 875         ntreesyms = 0;
 876         for (i = 0; i < ntreesrc ;) {
 877             int j = 1;
 878             int k;
 879
 880             /* Find length of run of the same length code. */
 881             while (i+j < ntreesrc && treesrc[i+j] == treesrc[i])
 882                 j++;
 883
 884             /* Encode that run as economically as we can. */
 885             k = j;
 886             if (treesrc[i] == 0) {
 887                 /*
 888                  * Zero code length: we can output run codes for
 889                  * 3-138 zeroes. So if we have fewer than 3 zeroes,
 890                  * we just output literals. Otherwise, we output
 891                  * nothing but run codes, and tweak their lengths
 892                  * to make sure we aren't left with under 3 at the
 893                  * end.
 894                  */
 895                 if (k < 3) {
 896                     while (k--)
 897                         treesyms[ntreesyms++] = 0 | SYMPFX_CODELEN;
 898                 } else {
 899                     while (k > 0) {
 900                         int rpt = (k < 138 ? k : 138);
 901                         if (rpt > k-3 && rpt < k)
 902                             rpt = k-3;
 903                         assert(rpt >= 3 && rpt <= 138);
 904                         if (rpt < 11) {
 905                             treesyms[ntreesyms++] = 17 | SYMPFX_CODELEN;
 906                             treesyms[ntreesyms++] =
 907                                 (SYMPFX_EXTRABITS | (rpt - 3) |
 908                                  (3 << SYM_EXTRABITS_SHIFT));
 909                         } else {
 910                             treesyms[ntreesyms++] = 18 | SYMPFX_CODELEN;
 911                             treesyms[ntreesyms++] =
 912                                 (SYMPFX_EXTRABITS | (rpt - 11) |
 913                                  (7 << SYM_EXTRABITS_SHIFT));
 914                         }
 915                         k -= rpt;
 916                     }
 917                 }
 918             } else {
 919                 /*
 920                  * Non-zero code length: we must output the first
 921                  * one explicitly, then we can output a copy code
 922                  * for 3-6 repeats. So if we have fewer than 4
 923                  * repeats, we _just_ output literals. Otherwise,
 924                  * we output one literal plus at least one copy
 925                  * code, and tweak the copy codes to make sure we
 926                  * aren't left with under 3 at the end.
 927                  */
 928                 assert(treesrc[i] < 16);
 929                 treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN;
 930                 k--;
 931                 if (k < 3) {
 932                     while (k--)
 933                         treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN;
 934                 } else {
 935                     while (k > 0) {
 936                         int rpt = (k < 6 ? k : 6);
 937                         if (rpt > k-3 && rpt < k)
 938                             rpt = k-3;
 939                         assert(rpt >= 3 && rpt <= 6);
 940                         treesyms[ntreesyms++] = 16 | SYMPFX_CODELEN;
 941                         treesyms[ntreesyms++] = (SYMPFX_EXTRABITS | (rpt - 3) |
 942                                                  (2 << SYM_EXTRABITS_SHIFT));
 943                         k -= rpt;
 944                     }
 945                 }
 946             }
 947
 948             i += j;
 949         }
 950         assert((unsigned)ntreesyms < lenof(treesyms));
 951
 952         /*
 953          * Count up the frequency table for the tree-transmission
 954          * symbols, and build the auxiliary Huffman tree for that.
 955          */
 956         memset(freqs3, 0, sizeof(freqs3));
 957         for (i = 0; i < ntreesyms; i++) {
 958             unsigned sym = treesyms[i];
 959
 960             /*
 961              * Increment the occurrence counter for this symbol, if
 962              * it's the Huffman alphabet and isn't extra bits.
 963              */
 964             if ((sym & SYMPFX_MASK) == SYMPFX_CODELEN) {
 965                 sym &= ~SYMPFX_MASK;
 966                 assert(sym < lenof(freqs3));
 967                 freqs3[sym]++;
 968             }
 969         }
 970         deflate_buildhuf(freqs3, len3, lenof(freqs3), 7);
 971         hufcodes(len3, code3, lenof(freqs3));
 972
 973         /*
 974          * Reorder the code length codes into transmission order, and
 975          * determine HCLEN.
 976          */
 977         for (i = 0; i < 19; i++)
 978             codelen[i] = len3[lenlenmap[i]];
 979         for (hclen = 19; hclen > 4 && codelen[hclen-1] == 0; hclen--);
 980     }
 981
 982     /*
 983      * Actually transmit the block.
 984      */
 985
 986     /* 3-bit block header */
 987     bfinal = (out->lastblock ? 1 : 0);
 988     btype = dynamic ? 2 : 1;
 989     debug(("send: bfinal=%d btype=%d\n", bfinal, btype));
 990     outbits(out, bfinal, 1);
 991     outbits(out, btype, 2);
 992
 993 #ifdef STATISTICS
 994     bitcount_before = out->bitcount;
 995 #endif
 996
 997     if (dynamic) {
 998         /* HLIT, HDIST and HCLEN */
 999         debug(("send: hlit=%d hdist=%d hclen=%d\n", hlit, hdist, hclen));
1000         outbits(out, hlit - 257, 5);
1001         outbits(out, hdist - 1, 5);
1002         outbits(out, hclen - 4, 4);
1003
1004         /* Code lengths for the auxiliary tree */
1005         for (i = 0; i < hclen; i++) {
1006             debug(("send: lenlen %d\n", codelen[i]));
1007             outbits(out, codelen[i], 3);
1008         }
1009
1010         /* Code lengths for the literal/length and distance trees */
1011         for (i = 0; i < ntreesyms; i++)
1012             writesym(out, treesyms[i], &ht);
1013 #ifdef STATISTICS
1014         fprintf(stderr, "total tree size %lu bits\n",
1015                 out->bitcount - bitcount_before);
1016 #endif
1017     }
1018
1019     /* Output the actual symbols from the buffer */
1020     for (i = 0; i < blklen; i++) {
1021         unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
1022         writesym(out, sym, &ht);
1023     }
1024
1025     /* Output the end-of-data symbol */
1026     writesym(out, SYMPFX_LITLEN | 256, &ht);
1027
1028     /*
1029      * Remove all the just-output symbols from the symbol buffer by
1030      * adjusting symstart and nsyms.
1031      */
1032     out->symstart = (out->symstart + blklen) % SYMLIMIT;
1033     out->nsyms -= blklen;
1034 }
1035
1036 static void outblock_wrapper(deflate_compress_ctx *out,
1037                              int best_dynamic_len)
1038 {
1039     /*
1040      * Final block choice function: we have the option of either
1041      * outputting a dynamic block of length best_dynamic_len, or a
1042      * static block of length out->nsyms. Whichever gives us the
1043      * best value for money, we do.
1044      *
1045      * FIXME: currently we always choose dynamic except for empty
1046      * blocks. We should make a sensible judgment.
1047      */
1048     if (out->nsyms == 0)
1049         outblock(out, 0, FALSE);
1050     else
1051         outblock(out, best_dynamic_len, TRUE);
1052 }
1053
1054 static void chooseblock(deflate_compress_ctx *out)
1055 {
1056     int freqs1[286], freqs2[30];
1057     int i, bestlen;
1058     double bestvfm;
1059     int nextrabits;
1060
1061     memset(freqs1, 0, sizeof(freqs1));
1062     memset(freqs2, 0, sizeof(freqs2));
1063     freqs1[256] = 1;                   /* we're bound to need one EOB */
1064     nextrabits = 0;
1065
1066     /*
1067      * Iterate over all possible block lengths, computing the
1068      * entropic coding approximation to the final length at every
1069      * stage. We divide the result by the number of symbols
1070      * encoded, to determine the `value for money' (overall
1071      * bits-per-symbol count) of a block of that length.
1072      */
1073     bestlen = -1;
1074     bestvfm = 0.0;
1075     for (i = 0; i < out->nsyms; i++) {
1076         unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
1077
1078         if (i > 0 && (sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
1079             /*
1080              * This is a viable point at which to end the block.
1081              * Compute the length approximation and hence the value
1082              * for money.
1083              */
1084             double len = 0.0, vfm;
1085             int k;
1086             int total;
1087
1088             /*
1089              * FIXME: we should be doing this incrementally, rather
1090              * than recomputing the whole thing at every byte
1091              * position. Also, can we fiddle the logs somehow to
1092              * avoid having to do floating point?
1093              */
1094             total = 0;
1095             for (k = 0; k < (int)lenof(freqs1); k++) {
1096                 if (freqs1[k])
1097                     len -= freqs1[k] * log(freqs1[k]);
1098                 total += freqs1[k];
1099             }
1100             if (total)
1101                 len += total * log(total);
1102             total = 0;
1103             for (k = 0; k < (int)lenof(freqs2); k++) {
1104                 if (freqs2[k])
1105                     len -= freqs2[k] * log(freqs2[k]);
1106                 total += freqs2[k];
1107             }
1108             if (total)
1109                 len += total * log(total);
1110             len /= log(2);
1111             len += nextrabits;
1112             len += 300;   /* very approximate size of the Huffman trees */
1113
1114             vfm = i / len;             /* symbols encoded per bit */
1115 /* fprintf(stderr, "chooseblock: i=%d gives len %g, vfm %g\n", i, len, vfm); */
1116             if (bestlen < 0 || vfm > bestvfm) {
1117                 bestlen = i;
1118                 bestvfm = vfm;
1119             }
1120         }
1121
1122         /*
1123          * Increment the occurrence counter for this symbol, if
1124          * it's in one of the Huffman alphabets and isn't extra
1125          * bits.
1126          */
1127         if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
1128             sym &= ~SYMPFX_MASK;
1129             assert(sym < lenof(freqs1));
1130             freqs1[sym]++;
1131         } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) {
1132             sym &= ~SYMPFX_MASK;
1133             assert(sym < lenof(freqs2));
1134             freqs2[sym]++;
1135         } else if ((sym & SYMPFX_MASK) == SYMPFX_EXTRABITS) {
1136             nextrabits += (sym &~ SYMPFX_MASK) >> SYM_EXTRABITS_SHIFT;
1137         }
1138     }
1139
1140     assert(bestlen > 0);
1141
1142 /* fprintf(stderr, "chooseblock: bestlen %d, bestvfm %g\n", bestlen, bestvfm); */
1143     outblock_wrapper(out, bestlen);
1144 }
1145
1146 /*
1147  * Force the current symbol buffer to be flushed out as a single
1148  * block.
1149  */
1150 static void flushblock(deflate_compress_ctx *out)
1151 {
1152     /*
1153      * Because outblock_wrapper guarantees to output either a
1154      * dynamic block of the given length or a static block of
1155      * length out->nsyms, we know that passing out->nsyms as the
1156      * given length will definitely result in us using up the
1157      * entire buffer.
1158      */
1159     outblock_wrapper(out, out->nsyms);
1160     assert(out->nsyms == 0);
1161 }
1162
1163 /*
1164  * Place a symbol into the symbols buffer.
1165  */
1166 static void outsym(deflate_compress_ctx *out, unsigned long sym)
1167 {
1168     assert(out->nsyms < SYMLIMIT);
1169     out->syms[(out->symstart + out->nsyms++) % SYMLIMIT] = sym;
1170
1171     if (out->nsyms == SYMLIMIT)
1172         chooseblock(out);
1173 }
1174
1175 typedef struct {
1176     short code, extrabits;
1177     int min, max;
1178 } coderecord;
1179
1180 static const coderecord lencodes[] = {
1181     {257, 0, 3, 3},
1182     {258, 0, 4, 4},
1183     {259, 0, 5, 5},
1184     {260, 0, 6, 6},
1185     {261, 0, 7, 7},
1186     {262, 0, 8, 8},
1187     {263, 0, 9, 9},
1188     {264, 0, 10, 10},
1189     {265, 1, 11, 12},
1190     {266, 1, 13, 14},
1191     {267, 1, 15, 16},
1192     {268, 1, 17, 18},
1193     {269, 2, 19, 22},
1194     {270, 2, 23, 26},
1195     {271, 2, 27, 30},
1196     {272, 2, 31, 34},
1197     {273, 3, 35, 42},
1198     {274, 3, 43, 50},
1199     {275, 3, 51, 58},
1200     {276, 3, 59, 66},
1201     {277, 4, 67, 82},
1202     {278, 4, 83, 98},
1203     {279, 4, 99, 114},
1204     {280, 4, 115, 130},
1205     {281, 5, 131, 162},
1206     {282, 5, 163, 194},
1207     {283, 5, 195, 226},
1208     {284, 5, 227, 257},
1209     {285, 0, 258, 258},
1210 };
1211
1212 static const coderecord distcodes[] = {
1213     {0, 0, 1, 1},
1214     {1, 0, 2, 2},
1215     {2, 0, 3, 3},
1216     {3, 0, 4, 4},
1217     {4, 1, 5, 6},
1218     {5, 1, 7, 8},
1219     {6, 2, 9, 12},
1220     {7, 2, 13, 16},
1221     {8, 3, 17, 24},
1222     {9, 3, 25, 32},
1223     {10, 4, 33, 48},
1224     {11, 4, 49, 64},
1225     {12, 5, 65, 96},
1226     {13, 5, 97, 128},
1227     {14, 6, 129, 192},
1228     {15, 6, 193, 256},
1229     {16, 7, 257, 384},
1230     {17, 7, 385, 512},
1231     {18, 8, 513, 768},
1232     {19, 8, 769, 1024},
1233     {20, 9, 1025, 1536},
1234     {21, 9, 1537, 2048},
1235     {22, 10, 2049, 3072},
1236     {23, 10, 3073, 4096},
1237     {24, 11, 4097, 6144},
1238     {25, 11, 6145, 8192},
1239     {26, 12, 8193, 12288},
1240     {27, 12, 12289, 16384},
1241     {28, 13, 16385, 24576},
1242     {29, 13, 24577, 32768},
1243 };
1244
1245 static void literal(struct LZ77Context *ectx, unsigned char c)
1246 {
1247     deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata;
1248
1249     outsym(out, SYMPFX_LITLEN | c);
1250 }
1251
1252 static void match(struct LZ77Context *ectx, int distance, int len)
1253 {
1254     const coderecord *d, *l;
1255     int i, j, k;
1256     deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata;
1257
1258     while (len > 0) {
1259         int thislen;
1260
1261         /*
1262          * We can transmit matches of lengths 3 through 258
1263          * inclusive. So if len exceeds 258, we must transmit in
1264          * several steps, with 258 or less in each step.
1265          *
1266          * Specifically: if len >= 261, we can transmit 258 and be
1267          * sure of having at least 3 left for the next step. And if
1268          * len <= 258, we can just transmit len. But if len == 259
1269          * or 260, we must transmit len-3.
1270          */
1271         thislen = (len > 260 ? 258 : len <= 258 ? len : len - 3);
1272         len -= thislen;
1273
1274         /*
1275          * Binary-search to find which length code we're
1276          * transmitting.
1277          */
1278         i = -1;
1279         j = sizeof(lencodes) / sizeof(*lencodes);
1280         while (1) {
1281             assert(j - i >= 2);
1282             k = (j + i) / 2;
1283             if (thislen < lencodes[k].min)
1284                 j = k;
1285             else if (thislen > lencodes[k].max)
1286                 i = k;
1287             else {
1288                 l = &lencodes[k];
1289                 break;                 /* found it! */
1290             }
1291         }
1292
1293         /*
1294          * Transmit the length code.
1295          */
1296         outsym(out, SYMPFX_LITLEN | l->code);
1297
1298         /*
1299          * Transmit the extra bits.
1300          */
1301         if (l->extrabits) {
1302             outsym(out, (SYMPFX_EXTRABITS | (thislen - l->min) |
1303                          (l->extrabits << SYM_EXTRABITS_SHIFT)));
1304         }
1305
1306         /*
1307          * Binary-search to find which distance code we're
1308          * transmitting.
1309          */
1310         i = -1;
1311         j = sizeof(distcodes) / sizeof(*distcodes);
1312         while (1) {
1313             assert(j - i >= 2);
1314             k = (j + i) / 2;
1315             if (distance < distcodes[k].min)
1316                 j = k;
1317             else if (distance > distcodes[k].max)
1318                 i = k;
1319             else {
1320                 d = &distcodes[k];
1321                 break;                 /* found it! */
1322             }
1323         }
1324
1325         /*
1326          * Write the distance code.
1327          */
1328         outsym(out, SYMPFX_DIST | d->code);
1329
1330         /*
1331          * Transmit the extra bits.
1332          */
1333         if (d->extrabits) {
1334             outsym(out, (SYMPFX_EXTRABITS | (distance - d->min) |
1335                          (d->extrabits << SYM_EXTRABITS_SHIFT)));
1336         }
1337     }
1338 }
1339
1340 deflate_compress_ctx *deflate_compress_new(int type)
1341 {
1342     deflate_compress_ctx *out;
1343     struct LZ77Context *ectx = snew(struct LZ77Context);
1344
1345     lz77_init(ectx);
1346     ectx->literal = literal;
1347     ectx->match = match;
1348
1349     out = snew(deflate_compress_ctx);
1350     out->type = type;
1351     out->outbits = out->noutbits = 0;
1352     out->firstblock = TRUE;
1353 #ifdef STATISTICS
1354     out->bitcount = 0;
1355 #endif
1356
1357     out->syms = snewn(SYMLIMIT, unsigned long);
1358     out->symstart = out->nsyms = 0;
1359
1360     out->adler32 = 1;
1361     out->lastblock = FALSE;
1362     out->finished = FALSE;
1363
1364     ectx->userdata = out;
1365     out->lzc = ectx;
1366
1367     return out;
1368 }
1369
1370 void deflate_compress_free(deflate_compress_ctx *out)
1371 {
1372     struct LZ77Context *ectx = out->lzc;
1373
1374     sfree(out->syms);
1375     sfree(out);
1376     sfree(ectx->ictx);
1377     sfree(ectx);
1378 }
1379
1380 static unsigned long adler32_update(unsigned long s,
1381                                     const unsigned char *data, int len)
1382 {
1383     unsigned s1 = s & 0xFFFF, s2 = (s >> 16) & 0xFFFF;
1384     int i;
1385
1386     for (i = 0; i < len; i++) {
1387         s1 += data[i];
1388         s2 += s1;
1389         if (!(i & 0xFFF)) {
1390             s1 %= 65521;
1391             s2 %= 65521;
1392         }
1393     }
1394
1395     return ((s2 % 65521) << 16) | (s1 % 65521);
1396 }
1397
1398 int deflate_compress_data(deflate_compress_ctx *out,
1399                           const void *vblock, int len, int flushtype,
1400                           void **outblock, int *outlen)
1401 {
1402     struct LZ77Context *ectx = out->lzc;
1403     const unsigned char *block = (const unsigned char *)vblock;
1404
1405     assert(!out->finished);
1406
1407     out->outbuf = NULL;
1408     out->outlen = out->outsize = 0;
1409
1410     /*
1411      * If this is the first block, output the header.
1412      */
1413     if (out->firstblock) {
1414         switch (out->type) {
1415           case DEFLATE_TYPE_BARE:
1416             break;                     /* no header */
1417           case DEFLATE_TYPE_ZLIB:
1418             /*
1419              * Zlib (RFC1950) header bytes: 78 9C. (Deflate
1420              * compression, 32K window size, default algorithm.)
1421              */
1422             outbits(out, 0x9C78, 16);
1423             break;
1424         }
1425         out->firstblock = FALSE;
1426     }
1427
1428     /*
1429      * Feed our data to the LZ77 compression phase.
1430      */
1431     lz77_compress(ectx, block, len, TRUE);
1432
1433     /*
1434      * Update checksums.
1435      */
1436     if (out->type == DEFLATE_TYPE_ZLIB)
1437         out->adler32 = adler32_update(out->adler32, block, len);
1438
1439     switch (flushtype) {
1440         /*
1441          * FIXME: what other flush types are available and useful?
1442          * In PuTTY, it was clear that we generally wanted to be in
1443          * a static block so it was safe to open one. Here, we
1444          * probably prefer to be _outside_ a block if we can. Think
1445          * about this.
1446          */
1447       case DEFLATE_NO_FLUSH:
1448         break;                         /* don't flush any data at all (duh) */
1449       case DEFLATE_SYNC_FLUSH:
1450         /*
1451          * Close the current block.
1452          */
1453         flushblock(out);
1454
1455         /*
1456          * Then output an empty _uncompressed_ block: send 000,
1457          * then sync to byte boundary, then send bytes 00 00 FF
1458          * FF.
1459          */
1460         outbits(out, 0, 3);
1461         if (out->noutbits)
1462             outbits(out, 0, 8 - out->noutbits);
1463         outbits(out, 0, 16);
1464         outbits(out, 0xFFFF, 16);
1465         break;
1466       case DEFLATE_END_OF_DATA:
1467         /*
1468          * Output a block with BFINAL set.
1469          */
1470         out->lastblock = TRUE;
1471         flushblock(out);
1472
1473         /*
1474          * Sync to byte boundary, flushing out the final byte.
1475          */
1476         if (out->noutbits)
1477             outbits(out, 0, 8 - out->noutbits);
1478
1479         /*
1480          * Output the adler32 checksum, in zlib mode.
1481          */
1482         if (out->type == DEFLATE_TYPE_ZLIB) {
1483             outbits(out, (out->adler32 >> 24) & 0xFF, 8);
1484             outbits(out, (out->adler32 >> 16) & 0xFF, 8);
1485             outbits(out, (out->adler32 >>  8) & 0xFF, 8);
1486             outbits(out, (out->adler32 >>  0) & 0xFF, 8);
1487         }
1488
1489         out->finished = TRUE;
1490         break;
1491     }
1492
1493     /*
1494      * Return any data that we've generated.
1495      */
1496     *outblock = (void *)out->outbuf;
1497     *outlen = out->outlen;
1498
1499     return 1;
1500 }
1501
1502 /* ----------------------------------------------------------------------
1503  * deflate decompression.
1504  */
1505
1506 /*
1507  * The way we work the Huffman decode is to have a table lookup on
1508  * the first N bits of the input stream (in the order they arrive,
1509  * of course, i.e. the first bit of the Huffman code is in bit 0).
1510  * Each table entry lists the number of bits to consume, plus
1511  * either an output code or a pointer to a secondary table.
1512  */
1513 struct table;
1514 struct tableentry;
1515
1516 struct tableentry {
1517     unsigned char nbits;
1518     short code;
1519     struct table *nexttable;
1520 };
1521
1522 struct table {
1523     int mask;                          /* mask applied to input bit stream */
1524     struct tableentry *table;
1525 };
1526
1527 #define MAXSYMS 288
1528
1529 /*
1530  * Build a single-level decode table for elements
1531  * [minlength,maxlength) of the provided code/length tables, and
1532  * recurse to build subtables.
1533  */
1534 static struct table *mkonetab(int *codes, unsigned char *lengths, int nsyms,
1535                               int pfx, int pfxbits, int bits)
1536 {
1537     struct table *tab = snew(struct table);
1538     int pfxmask = (1 << pfxbits) - 1;
1539     int nbits, i, j, code;
1540
1541     tab->table = snewn(1 << bits, struct tableentry);
1542     tab->mask = (1 << bits) - 1;
1543
1544     for (code = 0; code <= tab->mask; code++) {
1545         tab->table[code].code = -1;
1546         tab->table[code].nbits = 0;
1547         tab->table[code].nexttable = NULL;
1548     }
1549
1550     for (i = 0; i < nsyms; i++) {
1551         if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx)
1552             continue;
1553         code = (codes[i] >> pfxbits) & tab->mask;
1554         for (j = code; j <= tab->mask; j += 1 << (lengths[i] - pfxbits)) {
1555             tab->table[j].code = i;
1556             nbits = lengths[i] - pfxbits;
1557             if (tab->table[j].nbits < nbits)
1558                 tab->table[j].nbits = nbits;
1559         }
1560     }
1561     for (code = 0; code <= tab->mask; code++) {
1562         if (tab->table[code].nbits <= bits)
1563             continue;
1564         /* Generate a subtable. */
1565         tab->table[code].code = -1;
1566         nbits = tab->table[code].nbits - bits;
1567         if (nbits > 7)
1568             nbits = 7;
1569         tab->table[code].nbits = bits;
1570         tab->table[code].nexttable = mkonetab(codes, lengths, nsyms,
1571                                               pfx | (code << pfxbits),
1572                                               pfxbits + bits, nbits);
1573     }
1574
1575     return tab;
1576 }
1577
1578 /*
1579  * Build a decode table, given a set of Huffman tree lengths.
1580  */
1581 static struct table *mktable(unsigned char *lengths, int nlengths)
1582 {
1583     int codes[MAXSYMS];
1584     int maxlen;
1585
1586     maxlen = hufcodes(lengths, codes, nlengths);
1587
1588     /*
1589      * Now we have the complete list of Huffman codes. Build a
1590      * table.
1591      */
1592     return mkonetab(codes, lengths, nlengths, 0, 0, maxlen < 9 ? maxlen : 9);
1593 }
1594
1595 static int freetable(struct table **ztab)
1596 {
1597     struct table *tab;
1598     int code;
1599
1600     if (ztab == NULL)
1601         return -1;
1602
1603     if (*ztab == NULL)
1604         return 0;
1605
1606     tab = *ztab;
1607
1608     for (code = 0; code <= tab->mask; code++)
1609         if (tab->table[code].nexttable != NULL)
1610             freetable(&tab->table[code].nexttable);
1611
1612     sfree(tab->table);
1613     tab->table = NULL;
1614
1615     sfree(tab);
1616     *ztab = NULL;
1617
1618     return (0);
1619 }
1620
1621 struct deflate_decompress_ctx {
1622     struct table *staticlentable, *staticdisttable;
1623     struct table *currlentable, *currdisttable, *lenlentable;
1624     enum {
1625         START, OUTSIDEBLK,
1626         TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP,
1627         INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM,
1628         UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA,
1629         END, ADLER1, ADLER2, FINALSPIN
1630     } state;
1631     int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len,
1632         lenrep, lastblock;
1633     int uncomplen;
1634     unsigned char lenlen[19];
1635     unsigned char lengths[286 + 32];
1636     unsigned long bits;
1637     int nbits;
1638     unsigned char window[WINSIZE];
1639     int winpos;
1640     unsigned char *outblk;
1641     int outlen, outsize;
1642     int type;
1643     unsigned long adler32;
1644 };
1645
1646 deflate_decompress_ctx *deflate_decompress_new(int type)
1647 {
1648     deflate_decompress_ctx *dctx = snew(deflate_decompress_ctx);
1649     unsigned char lengths[288];
1650
1651     memset(lengths, 8, 144);
1652     memset(lengths + 144, 9, 256 - 144);
1653     memset(lengths + 256, 7, 280 - 256);
1654     memset(lengths + 280, 8, 288 - 280);
1655     dctx->staticlentable = mktable(lengths, 288);
1656     memset(lengths, 5, 32);
1657     dctx->staticdisttable = mktable(lengths, 32);
1658     if (type == DEFLATE_TYPE_BARE)
1659         dctx->state = OUTSIDEBLK;
1660     else
1661         dctx->state = START;
1662     dctx->currlentable = dctx->currdisttable = dctx->lenlentable = NULL;
1663     dctx->bits = 0;
1664     dctx->nbits = 0;
1665     dctx->winpos = 0;
1666     dctx->type = type;
1667     dctx->lastblock = FALSE;
1668     dctx->adler32 = 1;
1669
1670     return dctx;
1671 }
1672
1673 void deflate_decompress_free(deflate_decompress_ctx *dctx)
1674 {
1675     if (dctx->currlentable && dctx->currlentable != dctx->staticlentable)
1676         freetable(&dctx->currlentable);
1677     if (dctx->currdisttable && dctx->currdisttable != dctx->staticdisttable)
1678         freetable(&dctx->currdisttable);
1679     if (dctx->lenlentable)
1680         freetable(&dctx->lenlentable);
1681     freetable(&dctx->staticlentable);
1682     freetable(&dctx->staticdisttable);
1683     sfree(dctx);
1684 }
1685
1686 static int huflookup(unsigned long *bitsp, int *nbitsp, struct table *tab)
1687 {
1688     unsigned long bits = *bitsp;
1689     int nbits = *nbitsp;
1690     while (1) {
1691         struct tableentry *ent;
1692         ent = &tab->table[bits & tab->mask];
1693         if (ent->nbits > nbits)
1694             return -1;                 /* not enough data */
1695         bits >>= ent->nbits;
1696         nbits -= ent->nbits;
1697         if (ent->code == -1)
1698             tab = ent->nexttable;
1699         else {
1700             *bitsp = bits;
1701             *nbitsp = nbits;
1702             return ent->code;
1703         }
1704
1705         if (!tab) {
1706             /*
1707              * There was a missing entry in the table, presumably
1708              * due to an invalid Huffman table description, and the
1709              * subsequent data has attempted to use the missing
1710              * entry. Return a decoding failure.
1711              */
1712             return -2;
1713         }
1714     }
1715 }
1716
1717 static void emit_char(deflate_decompress_ctx *dctx, int c)
1718 {
1719     dctx->window[dctx->winpos] = c;
1720     dctx->winpos = (dctx->winpos + 1) & (WINSIZE - 1);
1721     if (dctx->outlen >= dctx->outsize) {
1722         dctx->outsize = dctx->outlen + 512;
1723         dctx->outblk = sresize(dctx->outblk, dctx->outsize, unsigned char);
1724     }
1725     if (dctx->type == DEFLATE_TYPE_ZLIB) {
1726         unsigned char uc = c;
1727         dctx->adler32 = adler32_update(dctx->adler32, &uc, 1);
1728     }
1729     dctx->outblk[dctx->outlen++] = c;
1730 }
1731
1732 #define EATBITS(n) ( dctx->nbits -= (n), dctx->bits >>= (n) )
1733
1734 int deflate_decompress_data(deflate_decompress_ctx *dctx,
1735                             const void *vblock, int len,
1736                             void **outblock, int *outlen)
1737 {
1738     const coderecord *rec;
1739     const unsigned char *block = (const unsigned char *)vblock;
1740     int code, bfinal, btype, rep, dist, nlen, header, adler;
1741
1742     dctx->outblk = snewn(256, unsigned char);
1743     dctx->outsize = 256;
1744     dctx->outlen = 0;
1745
1746     while (len > 0 || dctx->nbits > 0) {
1747         while (dctx->nbits < 24 && len > 0) {
1748             dctx->bits |= (*block++) << dctx->nbits;
1749             dctx->nbits += 8;
1750             len--;
1751         }
1752         switch (dctx->state) {
1753           case START:
1754             /* Expect 16-bit zlib header. */
1755             if (dctx->nbits < 16)
1756                 goto finished;         /* done all we can */
1757
1758             /*
1759              * The header is stored as a big-endian 16-bit integer,
1760              * in contrast to the general little-endian policy in
1761              * the rest of the format :-(
1762              */
1763             header = (((dctx->bits & 0xFF00) >> 8) |
1764                       ((dctx->bits & 0x00FF) << 8));
1765             EATBITS(16);
1766
1767             /*
1768              * Check the header:
1769              *
1770              *  - bits 8-11 should be 1000 (Deflate/RFC1951)
1771              *  - bits 12-15 should be at most 0111 (window size)
1772              *  - bit 5 should be zero (no dictionary present)
1773              *  - we don't care about bits 6-7 (compression rate)
1774              *  - bits 0-4 should be set up to make the whole thing
1775              *    a multiple of 31 (checksum).
1776              */
1777             if ((header & 0x0F00) != 0x0800 ||
1778                 (header & 0xF000) >  0x7000 ||
1779                 (header & 0x0020) != 0x0000 ||
1780                 (header % 31) != 0)
1781                 goto decode_error;
1782
1783             dctx->state = OUTSIDEBLK;
1784             break;
1785           case OUTSIDEBLK:
1786             /* Expect 3-bit block header. */
1787             if (dctx->nbits < 3)
1788                 goto finished;         /* done all we can */
1789             bfinal = dctx->bits & 1;
1790             if (bfinal)
1791                 dctx->lastblock = TRUE;
1792             EATBITS(1);
1793             btype = dctx->bits & 3;
1794             EATBITS(2);
1795             if (btype == 0) {
1796                 int to_eat = dctx->nbits & 7;
1797                 dctx->state = UNCOMP_LEN;
1798                 EATBITS(to_eat);       /* align to byte boundary */
1799             } else if (btype == 1) {
1800                 dctx->currlentable = dctx->staticlentable;
1801                 dctx->currdisttable = dctx->staticdisttable;
1802                 dctx->state = INBLK;
1803             } else if (btype == 2) {
1804                 dctx->state = TREES_HDR;
1805             }
1806             debug(("recv: bfinal=%d btype=%d\n", bfinal, btype));
1807             break;
1808           case TREES_HDR:
1809             /*
1810              * Dynamic block header. Five bits of HLIT, five of
1811              * HDIST, four of HCLEN.
1812              */
1813             if (dctx->nbits < 5 + 5 + 4)
1814                 goto finished;         /* done all we can */
1815             dctx->hlit = 257 + (dctx->bits & 31);
1816             EATBITS(5);
1817             dctx->hdist = 1 + (dctx->bits & 31);
1818             EATBITS(5);
1819             dctx->hclen = 4 + (dctx->bits & 15);
1820             EATBITS(4);
1821             debug(("recv: hlit=%d hdist=%d hclen=%d\n", dctx->hlit,
1822                    dctx->hdist, dctx->hclen));
1823             dctx->lenptr = 0;
1824             dctx->state = TREES_LENLEN;
1825             memset(dctx->lenlen, 0, sizeof(dctx->lenlen));
1826             break;
1827           case TREES_LENLEN:
1828             if (dctx->nbits < 3)
1829                 goto finished;
1830             while (dctx->lenptr < dctx->hclen && dctx->nbits >= 3) {
1831                 dctx->lenlen[lenlenmap[dctx->lenptr++]] =
1832                     (unsigned char) (dctx->bits & 7);
1833                 debug(("recv: lenlen %d\n", (unsigned char) (dctx->bits & 7)));
1834                 EATBITS(3);
1835             }
1836             if (dctx->lenptr == dctx->hclen) {
1837                 dctx->lenlentable = mktable(dctx->lenlen, 19);
1838                 dctx->state = TREES_LEN;
1839                 dctx->lenptr = 0;
1840             }
1841             break;
1842           case TREES_LEN:
1843             if (dctx->lenptr >= dctx->hlit + dctx->hdist) {
1844                 dctx->currlentable = mktable(dctx->lengths, dctx->hlit);
1845                 dctx->currdisttable = mktable(dctx->lengths + dctx->hlit,
1846                                               dctx->hdist);
1847                 freetable(&dctx->lenlentable);
1848                 dctx->lenlentable = NULL;
1849                 dctx->state = INBLK;
1850                 break;
1851             }
1852             code = huflookup(&dctx->bits, &dctx->nbits, dctx->lenlentable);
1853             debug(("recv: codelen %d\n", code));
1854             if (code == -1)
1855                 goto finished;
1856             if (code == -2)
1857                 goto decode_error;
1858             if (code < 16)
1859                 dctx->lengths[dctx->lenptr++] = code;
1860             else {
1861                 dctx->lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7);
1862                 dctx->lenaddon = (code == 18 ? 11 : 3);
1863                 dctx->lenrep = (code == 16 && dctx->lenptr > 0 ?
1864                                 dctx->lengths[dctx->lenptr - 1] : 0);
1865                 dctx->state = TREES_LENREP;
1866             }
1867             break;
1868           case TREES_LENREP:
1869             if (dctx->nbits < dctx->lenextrabits)
1870                 goto finished;
1871             rep =
1872                 dctx->lenaddon +
1873                 (dctx->bits & ((1 << dctx->lenextrabits) - 1));
1874             EATBITS(dctx->lenextrabits);
1875             if (dctx->lenextrabits)
1876                 debug(("recv: codelen-extrabits %d/%d\n", rep - dctx->lenaddon,
1877                        dctx->lenextrabits));
1878             while (rep > 0 && dctx->lenptr < dctx->hlit + dctx->hdist) {
1879                 dctx->lengths[dctx->lenptr] = dctx->lenrep;
1880                 dctx->lenptr++;
1881                 rep--;
1882             }
1883             dctx->state = TREES_LEN;
1884             break;
1885           case INBLK:
1886             code = huflookup(&dctx->bits, &dctx->nbits, dctx->currlentable);
1887             debug(("recv: litlen %d\n", code));
1888             if (code == -1)
1889                 goto finished;
1890             if (code == -2)
1891                 goto decode_error;
1892             if (code < 256)
1893                 emit_char(dctx, code);
1894             else if (code == 256) {
1895                 if (dctx->lastblock)
1896                     dctx->state = END;
1897                 else
1898                     dctx->state = OUTSIDEBLK;
1899                 if (dctx->currlentable != dctx->staticlentable) {
1900                     freetable(&dctx->currlentable);
1901                     dctx->currlentable = NULL;
1902                 }
1903                 if (dctx->currdisttable != dctx->staticdisttable) {
1904                     freetable(&dctx->currdisttable);
1905                     dctx->currdisttable = NULL;
1906                 }
1907             } else if (code < 286) {   /* static tree can give >285; ignore */
1908                 dctx->state = GOTLENSYM;
1909                 dctx->sym = code;
1910             }
1911             break;
1912           case GOTLENSYM:
1913             rec = &lencodes[dctx->sym - 257];
1914             if (dctx->nbits < rec->extrabits)
1915                 goto finished;
1916             dctx->len =
1917                 rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
1918             if (rec->extrabits)
1919                 debug(("recv: litlen-extrabits %d/%d\n",
1920                        dctx->len - rec->min, rec->extrabits));
1921             EATBITS(rec->extrabits);
1922             dctx->state = GOTLEN;
1923             break;
1924           case GOTLEN:
1925             code = huflookup(&dctx->bits, &dctx->nbits, dctx->currdisttable);
1926             debug(("recv: dist %d\n", code));
1927             if (code == -1)
1928                 goto finished;
1929             if (code == -2)
1930                 goto decode_error;
1931             dctx->state = GOTDISTSYM;
1932             dctx->sym = code;
1933             break;
1934           case GOTDISTSYM:
1935             rec = &distcodes[dctx->sym];
1936             if (dctx->nbits < rec->extrabits)
1937                 goto finished;
1938             dist = rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
1939             if (rec->extrabits)
1940                 debug(("recv: dist-extrabits %d/%d\n",
1941                        dist - rec->min, rec->extrabits));
1942             EATBITS(rec->extrabits);
1943             dctx->state = INBLK;
1944             while (dctx->len--)
1945                 emit_char(dctx, dctx->window[(dctx->winpos - dist) &
1946                                              (WINSIZE - 1)]);
1947             break;
1948           case UNCOMP_LEN:
1949             /*
1950              * Uncompressed block. We expect to see a 16-bit LEN.
1951              */
1952             if (dctx->nbits < 16)
1953                 goto finished;
1954             dctx->uncomplen = dctx->bits & 0xFFFF;
1955             EATBITS(16);
1956             dctx->state = UNCOMP_NLEN;
1957             break;
1958           case UNCOMP_NLEN:
1959             /*
1960              * Uncompressed block. We expect to see a 16-bit NLEN,
1961              * which should be the one's complement of the previous
1962              * LEN.
1963              */
1964             if (dctx->nbits < 16)
1965                 goto finished;
1966             nlen = dctx->bits & 0xFFFF;
1967             EATBITS(16);
1968             if (dctx->uncomplen == 0)
1969                 dctx->state = OUTSIDEBLK;       /* block is empty */
1970             else
1971                 dctx->state = UNCOMP_DATA;
1972             break;
1973           case UNCOMP_DATA:
1974             if (dctx->nbits < 8)
1975                 goto finished;
1976             emit_char(dctx, dctx->bits & 0xFF);
1977             EATBITS(8);
1978             if (--dctx->uncomplen == 0)
1979                 dctx->state = OUTSIDEBLK;       /* end of uncompressed block */
1980             break;
1981           case END:
1982             /*
1983              * End of compressed data. We align to a byte boundary,
1984              * and then look for format-specific trailer data.
1985              */
1986             {
1987                 int to_eat = dctx->nbits & 7;
1988                 EATBITS(to_eat);
1989             }
1990             if (dctx->type == DEFLATE_TYPE_ZLIB)
1991                 dctx->state = ADLER1;
1992             else
1993                 dctx->state = FINALSPIN;
1994             break;
1995           case ADLER1:
1996             if (dctx->nbits < 16)
1997                 goto finished;
1998             adler = (dctx->bits & 0xFF) << 8;
1999             EATBITS(8);
2000             adler |= (dctx->bits & 0xFF);
2001             EATBITS(8);
2002             if (adler != ((dctx->adler32 >> 16) & 0xFFFF))
2003                 goto decode_error;
2004             dctx->state = ADLER2;
2005             break;
2006           case ADLER2:
2007             if (dctx->nbits < 16)
2008                 goto finished;
2009             adler = (dctx->bits & 0xFF) << 8;
2010             EATBITS(8);
2011             adler |= (dctx->bits & 0xFF);
2012             EATBITS(8);
2013             if (adler != (dctx->adler32 & 0xFFFF))
2014                 goto decode_error;
2015             dctx->state = FINALSPIN;
2016             break;
2017           case FINALSPIN:
2018             /* Just ignore any trailing garbage on the data stream. */
2019             EATBITS(dctx->nbits);
2020             break;
2021         }
2022     }
2023
2024     finished:
2025     *outblock = dctx->outblk;
2026     *outlen = dctx->outlen;
2027     return 1;
2028
2029     decode_error:
2030     sfree(dctx->outblk);
2031     *outblock = dctx->outblk = NULL;
2032     *outlen = 0;
2033     return 0;
2034 }
2035
2036 #ifdef STANDALONE
2037
2038 int main(int argc, char **argv)
2039 {
2040     unsigned char buf[65536], *outbuf;
2041     int ret, outlen;
2042     deflate_decompress_ctx *dhandle;
2043     deflate_compress_ctx *chandle;
2044     int type = DEFLATE_TYPE_ZLIB, opts = TRUE, compress = FALSE;
2045     char *filename = NULL;
2046     FILE *fp;
2047
2048     while (--argc) {
2049         char *p = *++argv;
2050
2051         if (p[0] == '-' && opts) {
2052             if (!strcmp(p, "-d"))
2053                 type = DEFLATE_TYPE_BARE;
2054             if (!strcmp(p, "-c"))
2055                 compress = TRUE;
2056             else if (!strcmp(p, "--"))
2057                 opts = FALSE;          /* next thing is filename */
2058             else {
2059                 fprintf(stderr, "unknown command line option '%s'\n", p);
2060                 return 1;
2061             }
2062         } else if (!filename) {
2063             filename = p;
2064         } else {
2065             fprintf(stderr, "can only handle one filename\n");
2066             return 1;
2067         }
2068     }
2069
2070     if (compress) {
2071         chandle = deflate_compress_new(type);
2072         dhandle = NULL;
2073     } else {
2074         dhandle = deflate_decompress_new(type);
2075         chandle = NULL;
2076     }
2077
2078     if (filename)
2079         fp = fopen(filename, "rb");
2080     else
2081         fp = stdin;
2082
2083     if (!fp) {
2084         assert(filename);
2085         fprintf(stderr, "unable to open '%s'\n", filename);
2086         return 1;
2087     }
2088
2089     do {
2090         ret = fread(buf, 1, sizeof(buf), fp);
2091         if (dhandle) {
2092             if (ret > 0)
2093                 deflate_decompress_data(dhandle, buf, ret,
2094                                         (void **)&outbuf, &outlen);
2095         } else {
2096             if (ret > 0)
2097                 deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH,
2098                                       (void **)&outbuf, &outlen);
2099             else
2100                 deflate_compress_data(chandle, buf, ret, DEFLATE_END_OF_DATA,
2101                                       (void **)&outbuf, &outlen);
2102         }
2103         if (outbuf) {
2104             if (outlen)
2105                 fwrite(outbuf, 1, outlen, stdout);
2106             sfree(outbuf);
2107         } else if (dhandle) {
2108             fprintf(stderr, "decoding error\n");
2109             return 1;
2110         }
2111     } while (ret > 0);
2112
2113     if (dhandle)
2114         deflate_decompress_free(dhandle);
2115     if (chandle)
2116         deflate_compress_free(chandle);
2117
2118     if (filename)
2119         fclose(fp);
2120
2121     return 0;
2122 }
2123
2124 #endif
2125
2126 #ifdef TESTMODE
2127
2128 int main(int argc, char **argv)
2129 {
2130     char *filename = NULL;
2131     FILE *fp;
2132     deflate_compress_ctx *chandle;
2133     deflate_decompress_ctx *dhandle;
2134     unsigned char buf[65536], *outbuf, *outbuf2;
2135     int ret, outlen, outlen2;
2136     int dlen = 0, clen = 0;
2137     int opts = TRUE;
2138
2139     while (--argc) {
2140         char *p = *++argv;
2141
2142         if (p[0] == '-' && opts) {
2143             if (!strcmp(p, "--"))
2144                 opts = FALSE;          /* next thing is filename */
2145             else {
2146                 fprintf(stderr, "unknown command line option '%s'\n", p);
2147                 return 1;
2148             }
2149         } else if (!filename) {
2150             filename = p;
2151         } else {
2152             fprintf(stderr, "can only handle one filename\n");
2153             return 1;
2154         }
2155     }
2156
2157     if (filename)
2158         fp = fopen(filename, "rb");
2159     else
2160         fp = stdin;
2161
2162     if (!fp) {
2163         assert(filename);
2164         fprintf(stderr, "unable to open '%s'\n", filename);
2165         return 1;
2166     }
2167
2168     chandle = deflate_compress_new(DEFLATE_TYPE_ZLIB);
2169     dhandle = deflate_decompress_new(DEFLATE_TYPE_ZLIB);
2170
2171     do {
2172         ret = fread(buf, 1, sizeof(buf), fp);
2173         if (ret <= 0) {
2174             deflate_compress_data(chandle, NULL, 0, DEFLATE_END_OF_DATA,
2175                                   (void **)&outbuf, &outlen);
2176         } else {
2177             dlen += ret;
2178             deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH,
2179                                   (void **)&outbuf, &outlen);
2180         }
2181         if (outbuf) {
2182             clen += outlen;
2183             deflate_decompress_data(dhandle, outbuf, outlen,
2184                                     (void **)&outbuf2, &outlen2);
2185             sfree(outbuf);
2186             if (outbuf2) {
2187                 if (outlen2)
2188                     fwrite(outbuf2, 1, outlen2, stdout);
2189                 sfree(outbuf2);
2190             } else {
2191                 fprintf(stderr, "decoding error\n");
2192                 return 1;
2193             }
2194         }
2195     } while (ret > 0);
2196
2197     fprintf(stderr, "%d plaintext -> %d compressed\n", dlen, clen);
2198
2199     return 0;
2200 }
2201
2202 #endif