Replace PuTTY's 2-3-4 tree implementation with the shiny new counted
[u/mdw/putty] / sshzlib.c
1 /*
2 * Zlib (RFC1950 / RFC1951) compression for PuTTY.
3 *
4 * There will no doubt be criticism of my decision to reimplement
5 * Zlib compression from scratch instead of using the existing zlib
6 * code. People will cry `reinventing the wheel'; they'll claim
7 * that the `fundamental basis of OSS' is code reuse; they'll want
8 * to see a really good reason for me having chosen not to use the
9 * existing code.
10 *
11 * Well, here are my reasons. Firstly, I don't want to link the
12 * whole of zlib into the PuTTY binary; PuTTY is justifiably proud
13 * of its small size and I think zlib contains a lot of unnecessary
14 * baggage for the kind of compression that SSH requires.
15 *
16 * Secondly, I also don't like the alternative of using zlib.dll.
17 * Another thing PuTTY is justifiably proud of is its ease of
18 * installation, and the last thing I want to do is to start
19 * mandating DLLs. Not only that, but there are two _kinds_ of
20 * zlib.dll kicking around, one with C calling conventions on the
21 * exported functions and another with WINAPI conventions, and
22 * there would be a significant danger of getting the wrong one.
23 *
24 * Thirdly, there seems to be a difference of opinion on the IETF
25 * secsh mailing list about the correct way to round off a
26 * compressed packet and start the next. In particular, there's
27 * some talk of switching to a mechanism zlib isn't currently
28 * capable of supporting (see below for an explanation). Given that
29 * sort of uncertainty, I thought it might be better to have code
30 * that will support even the zlib-incompatible worst case.
31 *
32 * Fourthly, it's a _second implementation_. Second implementations
33 * are fundamentally a Good Thing in standardisation efforts. The
34 * difference of opinion mentioned above has arisen _precisely_
35 * because there has been only one zlib implementation and
36 * everybody has used it. I don't intend that this should happen
37 * again.
38 */
39
40 #include <stdlib.h>
41 #include <assert.h>
42
43 /* FIXME */
44 #include <windows.h>
45 #include <stdio.h>
46 #include "putty.h"
47
48 #include "ssh.h"
49
50 /* ----------------------------------------------------------------------
51 * Basic LZ77 code. This bit is designed modularly, so it could be
52 * ripped out and used in a different LZ77 compressor. Go to it,
53 * and good luck :-)
54 */
55
56 struct LZ77InternalContext;
57 struct LZ77Context {
58 struct LZ77InternalContext *ictx;
59 void *userdata;
60 void (*literal)(struct LZ77Context *ctx, unsigned char c);
61 void (*match)(struct LZ77Context *ctx, int distance, int len);
62 };
63
64 /*
65 * Initialise the private fields of an LZ77Context. It's up to the
66 * user to initialise the public fields.
67 */
68 static int lz77_init(struct LZ77Context *ctx);
69
70 /*
71 * Supply data to be compressed. Will update the private fields of
72 * the LZ77Context, and will call literal() and match() to output.
73 * If `compress' is FALSE, it will never emit a match, but will
74 * instead call literal() for everything.
75 */
76 static void lz77_compress(struct LZ77Context *ctx,
77 unsigned char *data, int len, int compress);
78
79 /*
80 * Modifiable parameters.
81 */
82 #define WINSIZE 32768 /* window size. Must be power of 2! */
83 #define HASHMAX 2039 /* one more than max hash value */
84 #define MAXMATCH 32 /* how many matches we track */
85 #define HASHCHARS 3 /* how many chars make a hash */
86
87 /*
88 * This compressor takes a less slapdash approach than the
89 * gzip/zlib one. Rather than allowing our hash chains to fall into
90 * disuse near the far end, we keep them doubly linked so we can
91 * _find_ the far end, and then every time we add a new byte to the
92 * window (thus rolling round by one and removing the previous
93 * byte), we can carefully remove the hash chain entry.
94 */
95
96 #define INVALID -1 /* invalid hash _and_ invalid offset */
97 struct WindowEntry {
98 short next, prev; /* array indices within the window */
99 short hashval;
100 };
101
102 struct HashEntry {
103 short first; /* window index of first in chain */
104 };
105
106 struct Match {
107 int distance, len;
108 };
109
110 struct LZ77InternalContext {
111 struct WindowEntry win[WINSIZE];
112 unsigned char data[WINSIZE];
113 int winpos;
114 struct HashEntry hashtab[HASHMAX];
115 unsigned char pending[HASHCHARS];
116 int npending;
117 };
118
119 static int lz77_hash(unsigned char *data) {
120 return (257*data[0] + 263*data[1] + 269*data[2]) % HASHMAX;
121 }
122
123 static int lz77_init(struct LZ77Context *ctx) {
124 struct LZ77InternalContext *st;
125 int i;
126
127 st = (struct LZ77InternalContext *)smalloc(sizeof(*st));
128 if (!st)
129 return 0;
130
131 ctx->ictx = st;
132
133 for (i = 0; i < WINSIZE; i++)
134 st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
135 for (i = 0; i < HASHMAX; i++)
136 st->hashtab[i].first = INVALID;
137 st->winpos = 0;
138
139 st->npending = 0;
140
141 return 1;
142 }
143
144 static void lz77_advance(struct LZ77InternalContext *st,
145 unsigned char c, int hash) {
146 int off;
147
148 /*
149 * Remove the hash entry at winpos from the tail of its chain,
150 * or empty the chain if it's the only thing on the chain.
151 */
152 if (st->win[st->winpos].prev != INVALID) {
153 st->win[st->win[st->winpos].prev].next = INVALID;
154 } else if (st->win[st->winpos].hashval != INVALID) {
155 st->hashtab[st->win[st->winpos].hashval].first = INVALID;
156 }
157
158 /*
159 * Create a new entry at winpos and add it to the head of its
160 * hash chain.
161 */
162 st->win[st->winpos].hashval = hash;
163 st->win[st->winpos].prev = INVALID;
164 off = st->win[st->winpos].next = st->hashtab[hash].first;
165 st->hashtab[hash].first = st->winpos;
166 if (off != INVALID)
167 st->win[off].prev = st->winpos;
168 st->data[st->winpos] = c;
169
170 /*
171 * Advance the window pointer.
172 */
173 st->winpos = (st->winpos + 1) & (WINSIZE-1);
174 }
175
176 #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] )
177
178 static void lz77_compress(struct LZ77Context *ctx,
179 unsigned char *data, int len, int compress) {
180 struct LZ77InternalContext *st = ctx->ictx;
181 int i, hash, distance, off, nmatch, matchlen, advance;
182 struct Match defermatch, matches[MAXMATCH];
183 int deferchr;
184
185 /*
186 * Add any pending characters from last time to the window. (We
187 * might not be able to.)
188 */
189 for (i = 0; i < st->npending; i++) {
190 unsigned char foo[HASHCHARS];
191 int j;
192 if (len + st->npending - i < HASHCHARS) {
193 /* Update the pending array. */
194 for (j = i; j < st->npending; j++)
195 st->pending[j-i] = st->pending[j];
196 break;
197 }
198 for (j = 0; j < HASHCHARS; j++)
199 foo[j] = (i + j < st->npending ? st->pending[i+j] :
200 data[i + j - st->npending]);
201 lz77_advance(st, foo[0], lz77_hash(foo));
202 }
203 st->npending -= i;
204
205 defermatch.len = 0;
206 while (len > 0) {
207
208 /* Don't even look for a match, if we're not compressing. */
209 if (compress && len >= HASHCHARS) {
210 /*
211 * Hash the next few characters.
212 */
213 hash = lz77_hash(data);
214
215 /*
216 * Look the hash up in the corresponding hash chain and see
217 * what we can find.
218 */
219 nmatch = 0;
220 for (off = st->hashtab[hash].first;
221 off != INVALID; off = st->win[off].next) {
222 /* distance = 1 if off == st->winpos-1 */
223 /* distance = WINSIZE if off == st->winpos */
224 distance = WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE;
225 for (i = 0; i < HASHCHARS; i++)
226 if (CHARAT(i) != CHARAT(i-distance))
227 break;
228 if (i == HASHCHARS) {
229 matches[nmatch].distance = distance;
230 matches[nmatch].len = 3;
231 if (++nmatch >= MAXMATCH)
232 break;
233 }
234 }
235 } else {
236 nmatch = 0;
237 hash = INVALID;
238 }
239
240 if (nmatch > 0) {
241 /*
242 * We've now filled up matches[] with nmatch potential
243 * matches. Follow them down to find the longest. (We
244 * assume here that it's always worth favouring a
245 * longer match over a shorter one.)
246 */
247 matchlen = HASHCHARS;
248 while (matchlen < len) {
249 int j;
250 for (i = j = 0; i < nmatch; i++) {
251 if (CHARAT(matchlen) ==
252 CHARAT(matchlen - matches[i].distance)) {
253 matches[j++] = matches[i];
254 }
255 }
256 if (j == 0)
257 break;
258 matchlen++;
259 nmatch = j;
260 }
261
262 /*
263 * We've now got all the longest matches. We favour the
264 * shorter distances, which means we go with matches[0].
265 * So see if we want to defer it or throw it away.
266 */
267 matches[0].len = matchlen;
268 if (defermatch.len > 0) {
269 if (matches[0].len > defermatch.len + 1) {
270 /* We have a better match. Emit the deferred char,
271 * and defer this match. */
272 ctx->literal(ctx, (unsigned char)deferchr);
273 defermatch = matches[0];
274 deferchr = data[0];
275 advance = 1;
276 } else {
277 /* We don't have a better match. Do the deferred one. */
278 ctx->match(ctx, defermatch.distance, defermatch.len);
279 advance = defermatch.len - 1;
280 defermatch.len = 0;
281 }
282 } else {
283 /* There was no deferred match. Defer this one. */
284 defermatch = matches[0];
285 deferchr = data[0];
286 advance = 1;
287 }
288 } else {
289 /*
290 * We found no matches. Emit the deferred match, if
291 * any; otherwise emit a literal.
292 */
293 if (defermatch.len > 0) {
294 ctx->match(ctx, defermatch.distance, defermatch.len);
295 advance = defermatch.len - 1;
296 defermatch.len = 0;
297 } else {
298 ctx->literal(ctx, data[0]);
299 advance = 1;
300 }
301 }
302
303 /*
304 * Now advance the position by `advance' characters,
305 * keeping the window and hash chains consistent.
306 */
307 while (advance > 0) {
308 if (len >= HASHCHARS) {
309 lz77_advance(st, *data, lz77_hash(data));
310 } else {
311 st->pending[st->npending++] = *data;
312 }
313 data++;
314 len--;
315 advance--;
316 }
317 }
318 }
319
320 /* ----------------------------------------------------------------------
321 * Zlib compression. We always use the static Huffman tree option.
322 * Mostly this is because it's hard to scan a block in advance to
323 * work out better trees; dynamic trees are great when you're
324 * compressing a large file under no significant time constraint,
325 * but when you're compressing little bits in real time, things get
326 * hairier.
327 *
328 * I suppose it's possible that I could compute Huffman trees based
329 * on the frequencies in the _previous_ block, as a sort of
330 * heuristic, but I'm not confident that the gain would balance out
331 * having to transmit the trees.
332 */
333
334 static struct LZ77Context ectx;
335
336 struct Outbuf {
337 unsigned char *outbuf;
338 int outlen, outsize;
339 unsigned long outbits;
340 int noutbits;
341 int firstblock;
342 int comp_disabled;
343 };
344
345 static void outbits(struct Outbuf *out, unsigned long bits, int nbits) {
346 assert(out->noutbits + nbits <= 32);
347 out->outbits |= bits << out->noutbits;
348 out->noutbits += nbits;
349 while (out->noutbits >= 8) {
350 if (out->outlen >= out->outsize) {
351 out->outsize = out->outlen + 64;
352 out->outbuf = srealloc(out->outbuf, out->outsize);
353 }
354 out->outbuf[out->outlen++] = (unsigned char)(out->outbits & 0xFF);
355 out->outbits >>= 8;
356 out->noutbits -= 8;
357 }
358 }
359
360 static const unsigned char mirrorbytes[256] = {
361 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
362 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
363 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
364 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
365 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
366 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
367 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
368 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
369 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
370 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
371 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
372 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
373 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
374 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
375 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
376 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
377 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
378 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
379 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
380 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
381 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
382 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
383 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
384 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
385 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
386 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
387 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
388 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
389 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
390 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
391 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
392 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
393 };
394
395 typedef struct {
396 short code, extrabits;
397 int min, max;
398 } coderecord;
399
400 static const coderecord lencodes[] = {
401 {257, 0, 3,3},
402 {258, 0, 4,4},
403 {259, 0, 5,5},
404 {260, 0, 6,6},
405 {261, 0, 7,7},
406 {262, 0, 8,8},
407 {263, 0, 9,9},
408 {264, 0, 10,10},
409 {265, 1, 11,12},
410 {266, 1, 13,14},
411 {267, 1, 15,16},
412 {268, 1, 17,18},
413 {269, 2, 19,22},
414 {270, 2, 23,26},
415 {271, 2, 27,30},
416 {272, 2, 31,34},
417 {273, 3, 35,42},
418 {274, 3, 43,50},
419 {275, 3, 51,58},
420 {276, 3, 59,66},
421 {277, 4, 67,82},
422 {278, 4, 83,98},
423 {279, 4, 99,114},
424 {280, 4, 115,130},
425 {281, 5, 131,162},
426 {282, 5, 163,194},
427 {283, 5, 195,226},
428 {284, 5, 227,257},
429 {285, 0, 258,258},
430 };
431
432 static const coderecord distcodes[] = {
433 {0, 0, 1,1},
434 {1, 0, 2,2},
435 {2, 0, 3,3},
436 {3, 0, 4,4},
437 {4, 1, 5,6},
438 {5, 1, 7,8},
439 {6, 2, 9,12},
440 {7, 2, 13,16},
441 {8, 3, 17,24},
442 {9, 3, 25,32},
443 {10, 4, 33,48},
444 {11, 4, 49,64},
445 {12, 5, 65,96},
446 {13, 5, 97,128},
447 {14, 6, 129,192},
448 {15, 6, 193,256},
449 {16, 7, 257,384},
450 {17, 7, 385,512},
451 {18, 8, 513,768},
452 {19, 8, 769,1024},
453 {20, 9, 1025,1536},
454 {21, 9, 1537,2048},
455 {22, 10, 2049,3072},
456 {23, 10, 3073,4096},
457 {24, 11, 4097,6144},
458 {25, 11, 6145,8192},
459 {26, 12, 8193,12288},
460 {27, 12, 12289,16384},
461 {28, 13, 16385,24576},
462 {29, 13, 24577,32768},
463 };
464
465 static void zlib_literal(struct LZ77Context *ectx, unsigned char c) {
466 struct Outbuf *out = (struct Outbuf *)ectx->userdata;
467
468 if (out->comp_disabled) {
469 /*
470 * We're in an uncompressed block, so just output the byte.
471 */
472 outbits(out, c, 8);
473 return;
474 }
475
476 if (c <= 143) {
477 /* 0 through 143 are 8 bits long starting at 00110000. */
478 outbits(out, mirrorbytes[0x30 + c], 8);
479 } else {
480 /* 144 through 255 are 9 bits long starting at 110010000. */
481 outbits(out, 1 + 2*mirrorbytes[0x90 - 144 + c], 9);
482 }
483 }
484
485 static void zlib_match(struct LZ77Context *ectx, int distance, int len) {
486 const coderecord *d, *l;
487 int i, j, k;
488 struct Outbuf *out = (struct Outbuf *)ectx->userdata;
489
490 assert(!out->comp_disabled);
491
492 while (len > 0) {
493 int thislen;
494
495 /*
496 * We can transmit matches of lengths 3 through 258
497 * inclusive. So if len exceeds 258, we must transmit in
498 * several steps, with 258 or less in each step.
499 *
500 * Specifically: if len >= 261, we can transmit 258 and be
501 * sure of having at least 3 left for the next step. And if
502 * len <= 258, we can just transmit len. But if len == 259
503 * or 260, we must transmit len-3.
504 */
505 thislen = (len > 260 ? 258 : len <= 258 ? len : len-3);
506 len -= thislen;
507
508 /*
509 * Binary-search to find which length code we're
510 * transmitting.
511 */
512 i = -1; j = sizeof(lencodes)/sizeof(*lencodes);
513 while (j - i >= 2) {
514 k = (j+i)/2;
515 if (thislen < lencodes[k].min)
516 j = k;
517 else if (thislen > lencodes[k].max)
518 i = k;
519 else {
520 l = &lencodes[k];
521 break; /* found it! */
522 }
523 }
524
525 /*
526 * Transmit the length code. 256-279 are seven bits
527 * starting at 0000000; 280-287 are eight bits starting at
528 * 11000000.
529 */
530 if (l->code <= 279) {
531 outbits(out, mirrorbytes[(l->code-256)*2], 7);
532 } else {
533 outbits(out, mirrorbytes[0xc0 - 280 + l->code], 8);
534 }
535
536 /*
537 * Transmit the extra bits.
538 */
539 if (l->extrabits)
540 outbits(out, thislen - l->min, l->extrabits);
541
542 /*
543 * Binary-search to find which distance code we're
544 * transmitting.
545 */
546 i = -1; j = sizeof(distcodes)/sizeof(*distcodes);
547 while (j - i >= 2) {
548 k = (j+i)/2;
549 if (distance < distcodes[k].min)
550 j = k;
551 else if (distance > distcodes[k].max)
552 i = k;
553 else {
554 d = &distcodes[k];
555 break; /* found it! */
556 }
557 }
558
559 /*
560 * Transmit the distance code. Five bits starting at 00000.
561 */
562 outbits(out, mirrorbytes[d->code*8], 5);
563
564 /*
565 * Transmit the extra bits.
566 */
567 if (d->extrabits)
568 outbits(out, distance - d->min, d->extrabits);
569 }
570 }
571
572 void zlib_compress_init(void) {
573 struct Outbuf *out;
574
575 lz77_init(&ectx);
576 ectx.literal = zlib_literal;
577 ectx.match = zlib_match;
578
579 out = smalloc(sizeof(struct Outbuf));
580 out->outbits = out->noutbits = 0;
581 out->firstblock = 1;
582 out->comp_disabled = FALSE;
583 ectx.userdata = out;
584
585 logevent("Initialised zlib (RFC1950) compression");
586 }
587
588 /*
589 * Turn off actual LZ77 analysis for one block, to facilitate
590 * construction of a precise-length IGNORE packet. Returns the
591 * length adjustment (which is only valid for packets < 65536
592 * bytes, but that seems reasonable enough).
593 */
594 int zlib_disable_compression(void) {
595 struct Outbuf *out = (struct Outbuf *)ectx.userdata;
596 int n;
597
598 out->comp_disabled = TRUE;
599
600 n = 0;
601 /*
602 * If this is the first block, we will start by outputting two
603 * header bytes, and then three bits to begin an uncompressed
604 * block. This will cost three bytes (because we will start on
605 * a byte boundary, this is certain).
606 */
607 if (out->firstblock) {
608 n = 3;
609 } else {
610 /*
611 * Otherwise, we will output seven bits to close the
612 * previous static block, and _then_ three bits to begin an
613 * uncompressed block, and then flush the current byte.
614 * This may cost two bytes or three, depending on noutbits.
615 */
616 n += (out->noutbits + 10) / 8;
617 }
618
619 /*
620 * Now we output four bytes for the length / ~length pair in
621 * the uncompressed block.
622 */
623 n += 4;
624
625 return n;
626 }
627
628 int zlib_compress_block(unsigned char *block, int len,
629 unsigned char **outblock, int *outlen) {
630 struct Outbuf *out = (struct Outbuf *)ectx.userdata;
631 int in_block;
632
633 out->outbuf = NULL;
634 out->outlen = out->outsize = 0;
635
636 /*
637 * If this is the first block, output the Zlib (RFC1950) header
638 * bytes 78 9C. (Deflate compression, 32K window size, default
639 * algorithm.)
640 */
641 if (out->firstblock) {
642 outbits(out, 0x9C78, 16);
643 out->firstblock = 0;
644
645 in_block = FALSE;
646 }
647
648 if (out->comp_disabled) {
649 if (in_block)
650 outbits(out, 0, 7); /* close static block */
651
652 while (len > 0) {
653 int blen = (len < 65535 ? len : 65535);
654
655 /*
656 * Start a Deflate (RFC1951) uncompressed block. We
657 * transmit a zero bit (BFINAL=0), followed by a zero
658 * bit and a one bit (BTYPE=00). Of course these are in
659 * the wrong order (00 0).
660 */
661 outbits(out, 0, 3);
662
663 /*
664 * Output zero bits to align to a byte boundary.
665 */
666 if (out->noutbits)
667 outbits(out, 0, 8 - out->noutbits);
668
669 /*
670 * Output the block length, and then its one's
671 * complement. They're little-endian, so all we need to
672 * do is pass them straight to outbits() with bit count
673 * 16.
674 */
675 outbits(out, blen, 16);
676 outbits(out, blen ^ 0xFFFF, 16);
677
678 /*
679 * Do the `compression': we need to pass the data to
680 * lz77_compress so that it will be taken into account
681 * for subsequent (distance,length) pairs. But
682 * lz77_compress is passed FALSE, which means it won't
683 * actually find (or even look for) any matches; so
684 * every character will be passed straight to
685 * zlib_literal which will spot out->comp_disabled and
686 * emit in the uncompressed format.
687 */
688 lz77_compress(&ectx, block, blen, FALSE);
689
690 len -= blen;
691 block += blen;
692 }
693 outbits(out, 2, 3); /* open new block */
694 } else {
695 if (!in_block) {
696 /*
697 * Start a Deflate (RFC1951) fixed-trees block. We
698 * transmit a zero bit (BFINAL=0), followed by a zero
699 * bit and a one bit (BTYPE=01). Of course these are in
700 * the wrong order (01 0).
701 */
702 outbits(out, 2, 3);
703 }
704
705 /*
706 * Do the compression.
707 */
708 lz77_compress(&ectx, block, len, TRUE);
709
710 /*
711 * End the block (by transmitting code 256, which is
712 * 0000000 in fixed-tree mode), and transmit some empty
713 * blocks to ensure we have emitted the byte containing the
714 * last piece of genuine data. There are three ways we can
715 * do this:
716 *
717 * - Minimal flush. Output end-of-block and then open a
718 * new static block. This takes 9 bits, which is
719 * guaranteed to flush out the last genuine code in the
720 * closed block; but allegedly zlib can't handle it.
721 *
722 * - Zlib partial flush. Output EOB, open and close an
723 * empty static block, and _then_ open the new block.
724 * This is the best zlib can handle.
725 *
726 * - Zlib sync flush. Output EOB, then an empty
727 * _uncompressed_ block (000, then sync to byte
728 * boundary, then send bytes 00 00 FF FF). Then open the
729 * new block.
730 *
731 * For the moment, we will use Zlib partial flush.
732 */
733 outbits(out, 0, 7); /* close block */
734 outbits(out, 2, 3+7); /* empty static block */
735 outbits(out, 2, 3); /* open new block */
736 }
737
738 out->comp_disabled = FALSE;
739
740 *outblock = out->outbuf;
741 *outlen = out->outlen;
742
743 return 1;
744 }
745
746 /* ----------------------------------------------------------------------
747 * Zlib decompression. Of course, even though our compressor always
748 * uses static trees, our _decompressor_ has to be capable of
749 * handling dynamic trees if it sees them.
750 */
751
752 /*
753 * The way we work the Huffman decode is to have a table lookup on
754 * the first N bits of the input stream (in the order they arrive,
755 * of course, i.e. the first bit of the Huffman code is in bit 0).
756 * Each table entry lists the number of bits to consume, plus
757 * either an output code or a pointer to a secondary table.
758 */
759 struct zlib_table;
760 struct zlib_tableentry;
761
762 struct zlib_tableentry {
763 unsigned char nbits;
764 short code;
765 struct zlib_table *nexttable;
766 };
767
768 struct zlib_table {
769 int mask; /* mask applied to input bit stream */
770 struct zlib_tableentry *table;
771 };
772
773 #define MAXCODELEN 16
774 #define MAXSYMS 288
775
776 /*
777 * Build a single-level decode table for elements
778 * [minlength,maxlength) of the provided code/length tables, and
779 * recurse to build subtables.
780 */
781 static struct zlib_table *zlib_mkonetab(int *codes, unsigned char *lengths,
782 int nsyms,
783 int pfx, int pfxbits, int bits) {
784 struct zlib_table *tab = smalloc(sizeof(struct zlib_table));
785 int pfxmask = (1 << pfxbits) - 1;
786 int nbits, i, j, code;
787
788 tab->table = smalloc((1 << bits) * sizeof(struct zlib_tableentry));
789 tab->mask = (1 << bits) - 1;
790
791 for (code = 0; code <= tab->mask; code++) {
792 tab->table[code].code = -1;
793 tab->table[code].nbits = 0;
794 tab->table[code].nexttable = NULL;
795 }
796
797 for (i = 0; i < nsyms; i++) {
798 if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx)
799 continue;
800 code = (codes[i] >> pfxbits) & tab->mask;
801 for (j = code; j <= tab->mask; j += 1 << (lengths[i]-pfxbits)) {
802 tab->table[j].code = i;
803 nbits = lengths[i] - pfxbits;
804 if (tab->table[j].nbits < nbits)
805 tab->table[j].nbits = nbits;
806 }
807 }
808 for (code = 0; code <= tab->mask; code++) {
809 if (tab->table[code].nbits <= bits)
810 continue;
811 /* Generate a subtable. */
812 tab->table[code].code = -1;
813 nbits = tab->table[code].nbits - bits;
814 if (nbits > 7)
815 nbits = 7;
816 tab->table[code].nbits = bits;
817 tab->table[code].nexttable = zlib_mkonetab(codes, lengths, nsyms,
818 pfx | (code << pfxbits),
819 pfxbits + bits, nbits);
820 }
821
822 return tab;
823 }
824
825 /*
826 * Build a decode table, given a set of Huffman tree lengths.
827 */
828 static struct zlib_table *zlib_mktable(unsigned char *lengths, int nlengths) {
829 int count[MAXCODELEN], startcode[MAXCODELEN], codes[MAXSYMS];
830 int code, maxlen;
831 int i, j;
832
833 /* Count the codes of each length. */
834 maxlen = 0;
835 for (i = 1; i < MAXCODELEN; i++) count[i] = 0;
836 for (i = 0; i < nlengths; i++) {
837 count[lengths[i]]++;
838 if (maxlen < lengths[i])
839 maxlen = lengths[i];
840 }
841 /* Determine the starting code for each length block. */
842 code = 0;
843 for (i = 1; i < MAXCODELEN; i++) {
844 startcode[i] = code;
845 code += count[i];
846 code <<= 1;
847 }
848 /* Determine the code for each symbol. Mirrored, of course. */
849 for (i = 0; i < nlengths; i++) {
850 code = startcode[lengths[i]]++;
851 codes[i] = 0;
852 for (j = 0; j < lengths[i]; j++) {
853 codes[i] = (codes[i] << 1) | (code & 1);
854 code >>= 1;
855 }
856 }
857
858 /*
859 * Now we have the complete list of Huffman codes. Build a
860 * table.
861 */
862 return zlib_mkonetab(codes, lengths, nlengths, 0, 0,
863 maxlen < 9 ? maxlen : 9);
864 }
865
866 static int zlib_freetable(struct zlib_table ** ztab) {
867 struct zlib_table *tab;
868 int code;
869
870 if (ztab == NULL)
871 return -1;
872
873 if (*ztab == NULL)
874 return 0;
875
876 tab = *ztab;
877
878 for (code = 0; code <= tab->mask; code++)
879 if (tab->table[code].nexttable != NULL)
880 zlib_freetable(&tab->table[code].nexttable);
881
882 sfree(tab->table);
883 tab->table = NULL;
884
885 sfree(tab);
886 *ztab = NULL;
887
888 return(0);
889 }
890
891 static struct zlib_decompress_ctx {
892 struct zlib_table *staticlentable, *staticdisttable;
893 struct zlib_table *currlentable, *currdisttable, *lenlentable;
894 enum {
895 START, OUTSIDEBLK,
896 TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP,
897 INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM,
898 UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA
899 } state;
900 int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, lenrep;
901 int uncomplen;
902 unsigned char lenlen[19];
903 unsigned char lengths[286+32];
904 unsigned long bits;
905 int nbits;
906 unsigned char window[WINSIZE];
907 int winpos;
908 unsigned char *outblk;
909 int outlen, outsize;
910 } dctx;
911
912 void zlib_decompress_init(void) {
913 unsigned char lengths[288];
914 memset(lengths, 8, 144);
915 memset(lengths+144, 9, 256-144);
916 memset(lengths+256, 7, 280-256);
917 memset(lengths+280, 8, 288-280);
918 dctx.staticlentable = zlib_mktable(lengths, 288);
919 memset(lengths, 5, 32);
920 dctx.staticdisttable = zlib_mktable(lengths, 32);
921 dctx.state = START; /* even before header */
922 dctx.currlentable = dctx.currdisttable = dctx.lenlentable = NULL;
923 dctx.bits = 0;
924 dctx.nbits = 0;
925 logevent("Initialised zlib (RFC1950) decompression");
926 }
927
928 int zlib_huflookup(unsigned long *bitsp, int *nbitsp, struct zlib_table *tab) {
929 unsigned long bits = *bitsp;
930 int nbits = *nbitsp;
931 while (1) {
932 struct zlib_tableentry *ent;
933 ent = &tab->table[bits & tab->mask];
934 if (ent->nbits > nbits)
935 return -1; /* not enough data */
936 bits >>= ent->nbits;
937 nbits -= ent->nbits;
938 if (ent->code == -1)
939 tab = ent->nexttable;
940 else {
941 *bitsp = bits;
942 *nbitsp = nbits;
943 return ent->code;
944 }
945 }
946 }
947
948 static void zlib_emit_char(int c) {
949 dctx.window[dctx.winpos] = c;
950 dctx.winpos = (dctx.winpos + 1) & (WINSIZE-1);
951 if (dctx.outlen >= dctx.outsize) {
952 dctx.outsize = dctx.outlen + 512;
953 dctx.outblk = srealloc(dctx.outblk, dctx.outsize);
954 }
955 dctx.outblk[dctx.outlen++] = c;
956 }
957
958 #define EATBITS(n) ( dctx.nbits -= (n), dctx.bits >>= (n) )
959
960 int zlib_decompress_block(unsigned char *block, int len,
961 unsigned char **outblock, int *outlen) {
962 const coderecord *rec;
963 int code, blktype, rep, dist, nlen;
964 static const unsigned char lenlenmap[] = {
965 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
966 };
967
968 dctx.outblk = NULL;
969 dctx.outsize = dctx.outlen = 0;
970
971 while (len > 0 || dctx.nbits > 0) {
972 while (dctx.nbits < 24 && len > 0) {
973 dctx.bits |= (*block++) << dctx.nbits;
974 dctx.nbits += 8;
975 len--;
976 }
977 switch (dctx.state) {
978 case START:
979 /* Expect 16-bit zlib header, which we'll dishonourably ignore. */
980 if (dctx.nbits < 16)
981 goto finished; /* done all we can */
982 EATBITS(16);
983 dctx.state = OUTSIDEBLK;
984 break;
985 case OUTSIDEBLK:
986 /* Expect 3-bit block header. */
987 if (dctx.nbits < 3)
988 goto finished; /* done all we can */
989 EATBITS(1);
990 blktype = dctx.bits & 3;
991 EATBITS(2);
992 if (blktype == 0) {
993 int to_eat = dctx.nbits & 7;
994 dctx.state = UNCOMP_LEN;
995 EATBITS(to_eat); /* align to byte boundary */
996 } else if (blktype == 1) {
997 dctx.currlentable = dctx.staticlentable;
998 dctx.currdisttable = dctx.staticdisttable;
999 dctx.state = INBLK;
1000 } else if (blktype == 2) {
1001 dctx.state = TREES_HDR;
1002 }
1003 break;
1004 case TREES_HDR:
1005 /*
1006 * Dynamic block header. Five bits of HLIT, five of
1007 * HDIST, four of HCLEN.
1008 */
1009 if (dctx.nbits < 5+5+4)
1010 goto finished; /* done all we can */
1011 dctx.hlit = 257 + (dctx.bits & 31); EATBITS(5);
1012 dctx.hdist = 1 + (dctx.bits & 31); EATBITS(5);
1013 dctx.hclen = 4 + (dctx.bits & 15); EATBITS(4);
1014 dctx.lenptr = 0;
1015 dctx.state = TREES_LENLEN;
1016 memset(dctx.lenlen, 0, sizeof(dctx.lenlen));
1017 break;
1018 case TREES_LENLEN:
1019 if (dctx.nbits < 3)
1020 goto finished;
1021 while (dctx.lenptr < dctx.hclen && dctx.nbits >= 3) {
1022 dctx.lenlen[lenlenmap[dctx.lenptr++]] =
1023 (unsigned char)(dctx.bits & 7);
1024 EATBITS(3);
1025 }
1026 if (dctx.lenptr == dctx.hclen) {
1027 dctx.lenlentable = zlib_mktable(dctx.lenlen, 19);
1028 dctx.state = TREES_LEN;
1029 dctx.lenptr = 0;
1030 }
1031 break;
1032 case TREES_LEN:
1033 if (dctx.lenptr >= dctx.hlit+dctx.hdist) {
1034 dctx.currlentable = zlib_mktable(dctx.lengths, dctx.hlit);
1035 dctx.currdisttable = zlib_mktable(dctx.lengths + dctx.hlit,
1036 dctx.hdist);
1037 zlib_freetable(&dctx.lenlentable);
1038 dctx.state = INBLK;
1039 break;
1040 }
1041 code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.lenlentable);
1042 if (code == -1)
1043 goto finished;
1044 if (code < 16)
1045 dctx.lengths[dctx.lenptr++] = code;
1046 else {
1047 dctx.lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7);
1048 dctx.lenaddon = (code == 18 ? 11 : 3);
1049 dctx.lenrep = (code == 16 && dctx.lenptr > 0 ?
1050 dctx.lengths[dctx.lenptr-1] : 0);
1051 dctx.state = TREES_LENREP;
1052 }
1053 break;
1054 case TREES_LENREP:
1055 if (dctx.nbits < dctx.lenextrabits)
1056 goto finished;
1057 rep = dctx.lenaddon + (dctx.bits & ((1<<dctx.lenextrabits)-1));
1058 EATBITS(dctx.lenextrabits);
1059 while (rep > 0 && dctx.lenptr < dctx.hlit+dctx.hdist) {
1060 dctx.lengths[dctx.lenptr] = dctx.lenrep;
1061 dctx.lenptr++;
1062 rep--;
1063 }
1064 dctx.state = TREES_LEN;
1065 break;
1066 case INBLK:
1067 code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.currlentable);
1068 if (code == -1)
1069 goto finished;
1070 if (code < 256)
1071 zlib_emit_char(code);
1072 else if (code == 256) {
1073 dctx.state = OUTSIDEBLK;
1074 if (dctx.currlentable != dctx.staticlentable)
1075 zlib_freetable(&dctx.currlentable);
1076 if (dctx.currdisttable != dctx.staticdisttable)
1077 zlib_freetable(&dctx.currdisttable);
1078 } else if (code < 286) { /* static tree can give >285; ignore */
1079 dctx.state = GOTLENSYM;
1080 dctx.sym = code;
1081 }
1082 break;
1083 case GOTLENSYM:
1084 rec = &lencodes[dctx.sym - 257];
1085 if (dctx.nbits < rec->extrabits)
1086 goto finished;
1087 dctx.len = rec->min + (dctx.bits & ((1<<rec->extrabits)-1));
1088 EATBITS(rec->extrabits);
1089 dctx.state = GOTLEN;
1090 break;
1091 case GOTLEN:
1092 code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.currdisttable);
1093 if (code == -1)
1094 goto finished;
1095 dctx.state = GOTDISTSYM;
1096 dctx.sym = code;
1097 break;
1098 case GOTDISTSYM:
1099 rec = &distcodes[dctx.sym];
1100 if (dctx.nbits < rec->extrabits)
1101 goto finished;
1102 dist = rec->min + (dctx.bits & ((1<<rec->extrabits)-1));
1103 EATBITS(rec->extrabits);
1104 dctx.state = INBLK;
1105 while (dctx.len--)
1106 zlib_emit_char(dctx.window[(dctx.winpos-dist) & (WINSIZE-1)]);
1107 break;
1108 case UNCOMP_LEN:
1109 /*
1110 * Uncompressed block. We expect to see a 16-bit LEN.
1111 */
1112 if (dctx.nbits < 16)
1113 goto finished;
1114 dctx.uncomplen = dctx.bits & 0xFFFF;
1115 EATBITS(16);
1116 dctx.state = UNCOMP_NLEN;
1117 break;
1118 case UNCOMP_NLEN:
1119 /*
1120 * Uncompressed block. We expect to see a 16-bit NLEN,
1121 * which should be the one's complement of the previous
1122 * LEN.
1123 */
1124 if (dctx.nbits < 16)
1125 goto finished;
1126 nlen = dctx.bits & 0xFFFF;
1127 EATBITS(16);
1128 dctx.state = UNCOMP_DATA;
1129 break;
1130 case UNCOMP_DATA:
1131 if (dctx.nbits < 8)
1132 goto finished;
1133 zlib_emit_char(dctx.bits & 0xFF);
1134 EATBITS(8);
1135 if (--dctx.uncomplen == 0)
1136 dctx.state = OUTSIDEBLK; /* end of uncompressed block */
1137 break;
1138 }
1139 }
1140
1141 finished:
1142 *outblock = dctx.outblk;
1143 *outlen = dctx.outlen;
1144
1145 return 1;
1146 }
1147
1148 const struct ssh_compress ssh_zlib = {
1149 "zlib",
1150 zlib_compress_init,
1151 zlib_compress_block,
1152 zlib_decompress_init,
1153 zlib_decompress_block,
1154 zlib_disable_compression
1155 };