Fix a few trivial compiler warnings
[u/mdw/putty] / sshzlib.c
1 /*
2 * Zlib (RFC1950 / RFC1951) compression for PuTTY.
3 *
4 * There will no doubt be criticism of my decision to reimplement
5 * Zlib compression from scratch instead of using the existing zlib
6 * code. People will cry `reinventing the wheel'; they'll claim
7 * that the `fundamental basis of OSS' is code reuse; they'll want
8 * to see a really good reason for me having chosen not to use the
9 * existing code.
10 *
11 * Well, here are my reasons. Firstly, I don't want to link the
12 * whole of zlib into the PuTTY binary; PuTTY is justifiably proud
13 * of its small size and I think zlib contains a lot of unnecessary
14 * baggage for the kind of compression that SSH requires.
15 *
16 * Secondly, I also don't like the alternative of using zlib.dll.
17 * Another thing PuTTY is justifiably proud of is its ease of
18 * installation, and the last thing I want to do is to start
19 * mandating DLLs. Not only that, but there are two _kinds_ of
20 * zlib.dll kicking around, one with C calling conventions on the
21 * exported functions and another with WINAPI conventions, and
22 * there would be a significant danger of getting the wrong one.
23 *
24 * Thirdly, there seems to be a difference of opinion on the IETF
25 * secsh mailing list about the correct way to round off a
26 * compressed packet and start the next. In particular, there's
27 * some talk of switching to a mechanism zlib isn't currently
28 * capable of supporting (see below for an explanation). Given that
29 * sort of uncertainty, I thought it might be better to have code
30 * that will support even the zlib-incompatible worst case.
31 *
32 * Fourthly, it's a _second implementation_. Second implementations
33 * are fundamentally a Good Thing in standardisation efforts. The
34 * difference of opinion mentioned above has arisen _precisely_
35 * because there has been only one zlib implementation and
36 * everybody has used it. I don't intend that this should happen
37 * again.
38 */
39
40 #include <stdlib.h>
41 #include <assert.h>
42
43 /* FIXME */
44 #include <windows.h>
45 #include <stdio.h>
46 #include "putty.h"
47
48 #include "ssh.h"
49
50 /* ----------------------------------------------------------------------
51 * Basic LZ77 code. This bit is designed modularly, so it could be
52 * ripped out and used in a different LZ77 compressor. Go to it,
53 * and good luck :-)
54 */
55
56 struct LZ77InternalContext;
57 struct LZ77Context {
58 struct LZ77InternalContext *ictx;
59 void *userdata;
60 void (*literal)(struct LZ77Context *ctx, unsigned char c);
61 void (*match)(struct LZ77Context *ctx, int distance, int len);
62 };
63
64 /*
65 * Initialise the private fields of an LZ77Context. It's up to the
66 * user to initialise the public fields.
67 */
68 static int lz77_init(struct LZ77Context *ctx);
69
70 /*
71 * Supply data to be compressed. Will update the private fields of
72 * the LZ77Context, and will call literal() and match() to output.
73 * If `compress' is FALSE, it will never emit a match, but will
74 * instead call literal() for everything.
75 */
76 static void lz77_compress(struct LZ77Context *ctx,
77 unsigned char *data, int len, int compress);
78
79 /*
80 * Modifiable parameters.
81 */
82 #define WINSIZE 32768 /* window size. Must be power of 2! */
83 #define HASHMAX 2039 /* one more than max hash value */
84 #define MAXMATCH 32 /* how many matches we track */
85 #define HASHCHARS 3 /* how many chars make a hash */
86
87 /*
88 * This compressor takes a less slapdash approach than the
89 * gzip/zlib one. Rather than allowing our hash chains to fall into
90 * disuse near the far end, we keep them doubly linked so we can
91 * _find_ the far end, and then every time we add a new byte to the
92 * window (thus rolling round by one and removing the previous
93 * byte), we can carefully remove the hash chain entry.
94 */
95
96 #define INVALID -1 /* invalid hash _and_ invalid offset */
97 struct WindowEntry {
98 int next, prev; /* array indices within the window */
99 int hashval;
100 };
101
102 struct HashEntry {
103 int first; /* window index of first in chain */
104 };
105
106 struct Match {
107 int distance, len;
108 };
109
110 struct LZ77InternalContext {
111 struct WindowEntry win[WINSIZE];
112 unsigned char data[WINSIZE];
113 int winpos;
114 struct HashEntry hashtab[HASHMAX];
115 unsigned char pending[HASHCHARS];
116 int npending;
117 };
118
119 static int lz77_hash(unsigned char *data) {
120 return (257*data[0] + 263*data[1] + 269*data[2]) % HASHMAX;
121 }
122
123 static int lz77_init(struct LZ77Context *ctx) {
124 struct LZ77InternalContext *st;
125 int i;
126
127 st = (struct LZ77InternalContext *)smalloc(sizeof(*st));
128 if (!st)
129 return 0;
130
131 ctx->ictx = st;
132
133 for (i = 0; i < WINSIZE; i++)
134 st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
135 for (i = 0; i < HASHMAX; i++)
136 st->hashtab[i].first = INVALID;
137 st->winpos = 0;
138
139 st->npending = 0;
140
141 return 1;
142 }
143
144 static void lz77_advance(struct LZ77InternalContext *st,
145 unsigned char c, int hash) {
146 int off;
147
148 /*
149 * Remove the hash entry at winpos from the tail of its chain,
150 * or empty the chain if it's the only thing on the chain.
151 */
152 if (st->win[st->winpos].prev != INVALID) {
153 st->win[st->win[st->winpos].prev].next = INVALID;
154 } else if (st->win[st->winpos].hashval != INVALID) {
155 st->hashtab[st->win[st->winpos].hashval].first = INVALID;
156 }
157
158 /*
159 * Create a new entry at winpos and add it to the head of its
160 * hash chain.
161 */
162 st->win[st->winpos].hashval = hash;
163 st->win[st->winpos].prev = INVALID;
164 off = st->win[st->winpos].next = st->hashtab[hash].first;
165 st->hashtab[hash].first = st->winpos;
166 if (off != INVALID)
167 st->win[off].prev = st->winpos;
168 st->data[st->winpos] = c;
169
170 /*
171 * Advance the window pointer.
172 */
173 st->winpos = (st->winpos + 1) & (WINSIZE-1);
174 }
175
176 #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] )
177
178 static void lz77_compress(struct LZ77Context *ctx,
179 unsigned char *data, int len, int compress) {
180 struct LZ77InternalContext *st = ctx->ictx;
181 int i, hash, distance, off, nmatch, matchlen, advance;
182 struct Match defermatch, matches[MAXMATCH];
183 int deferchr;
184
185 /*
186 * Add any pending characters from last time to the window. (We
187 * might not be able to.)
188 */
189 for (i = 0; i < st->npending; i++) {
190 unsigned char foo[HASHCHARS];
191 int j;
192 if (len + st->npending - i < HASHCHARS) {
193 /* Update the pending array. */
194 for (j = i; j < st->npending; j++)
195 st->pending[j-i] = st->pending[j];
196 break;
197 }
198 for (j = 0; j < HASHCHARS; j++)
199 foo[j] = (i + j < st->npending ? st->pending[i+j] :
200 data[i + j - st->npending]);
201 lz77_advance(st, foo[0], lz77_hash(foo));
202 }
203 st->npending -= i;
204
205 defermatch.len = 0;
206 while (len > 0) {
207
208 /* Don't even look for a match, if we're not compressing. */
209 if (compress && len >= HASHCHARS) {
210 /*
211 * Hash the next few characters.
212 */
213 hash = lz77_hash(data);
214
215 /*
216 * Look the hash up in the corresponding hash chain and see
217 * what we can find.
218 */
219 nmatch = 0;
220 for (off = st->hashtab[hash].first;
221 off != INVALID; off = st->win[off].next) {
222 /* distance = 1 if off == st->winpos-1 */
223 /* distance = WINSIZE if off == st->winpos */
224 distance = WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE;
225 for (i = 0; i < HASHCHARS; i++)
226 if (CHARAT(i) != CHARAT(i-distance))
227 break;
228 if (i == HASHCHARS) {
229 matches[nmatch].distance = distance;
230 matches[nmatch].len = 3;
231 if (++nmatch >= MAXMATCH)
232 break;
233 }
234 }
235 } else {
236 nmatch = 0;
237 hash = INVALID;
238 }
239
240 if (nmatch > 0) {
241 /*
242 * We've now filled up matches[] with nmatch potential
243 * matches. Follow them down to find the longest. (We
244 * assume here that it's always worth favouring a
245 * longer match over a shorter one.)
246 */
247 matchlen = HASHCHARS;
248 while (matchlen < len) {
249 int j;
250 for (i = j = 0; i < nmatch; i++) {
251 if (CHARAT(matchlen) ==
252 CHARAT(matchlen - matches[i].distance)) {
253 matches[j++] = matches[i];
254 }
255 }
256 if (j == 0)
257 break;
258 matchlen++;
259 nmatch = j;
260 }
261
262 /*
263 * We've now got all the longest matches. We favour the
264 * shorter distances, which means we go with matches[0].
265 * So see if we want to defer it or throw it away.
266 */
267 matches[0].len = matchlen;
268 if (defermatch.len > 0) {
269 if (matches[0].len > defermatch.len + 1) {
270 /* We have a better match. Emit the deferred char,
271 * and defer this match. */
272 ctx->literal(ctx, (unsigned char)deferchr);
273 defermatch = matches[0];
274 deferchr = data[0];
275 advance = 1;
276 } else {
277 /* We don't have a better match. Do the deferred one. */
278 ctx->match(ctx, defermatch.distance, defermatch.len);
279 advance = defermatch.len - 1;
280 defermatch.len = 0;
281 }
282 } else {
283 /* There was no deferred match. Defer this one. */
284 defermatch = matches[0];
285 deferchr = data[0];
286 advance = 1;
287 }
288 } else {
289 /*
290 * We found no matches. Emit the deferred match, if
291 * any; otherwise emit a literal.
292 */
293 if (defermatch.len > 0) {
294 ctx->match(ctx, defermatch.distance, defermatch.len);
295 advance = defermatch.len - 1;
296 defermatch.len = 0;
297 } else {
298 ctx->literal(ctx, data[0]);
299 advance = 1;
300 }
301 }
302
303 /*
304 * Now advance the position by `advance' characters,
305 * keeping the window and hash chains consistent.
306 */
307 while (advance > 0) {
308 if (len >= HASHCHARS) {
309 lz77_advance(st, *data, lz77_hash(data));
310 } else {
311 st->pending[st->npending++] = *data;
312 }
313 data++;
314 len--;
315 advance--;
316 }
317 }
318 }
319
320 /* ----------------------------------------------------------------------
321 * Zlib compression. We always use the static Huffman tree option.
322 * Mostly this is because it's hard to scan a block in advance to
323 * work out better trees; dynamic trees are great when you're
324 * compressing a large file under no significant time constraint,
325 * but when you're compressing little bits in real time, things get
326 * hairier.
327 *
328 * I suppose it's possible that I could compute Huffman trees based
329 * on the frequencies in the _previous_ block, as a sort of
330 * heuristic, but I'm not confident that the gain would balance out
331 * having to transmit the trees.
332 */
333
334 static struct LZ77Context ectx;
335
336 struct Outbuf {
337 unsigned char *outbuf;
338 int outlen, outsize;
339 unsigned long outbits;
340 int noutbits;
341 int firstblock;
342 int comp_disabled;
343 };
344
345 static void outbits(struct Outbuf *out, unsigned long bits, int nbits) {
346 assert(out->noutbits + nbits <= 32);
347 out->outbits |= bits << out->noutbits;
348 out->noutbits += nbits;
349 while (out->noutbits >= 8) {
350 if (out->outlen >= out->outsize) {
351 out->outsize = out->outlen + 64;
352 out->outbuf = srealloc(out->outbuf, out->outsize);
353 }
354 out->outbuf[out->outlen++] = (unsigned char)(out->outbits & 0xFF);
355 out->outbits >>= 8;
356 out->noutbits -= 8;
357 }
358 }
359
360 static const unsigned char mirrorbytes[256] = {
361 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
362 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
363 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
364 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
365 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
366 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
367 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
368 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
369 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
370 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
371 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
372 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
373 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
374 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
375 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
376 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
377 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
378 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
379 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
380 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
381 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
382 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
383 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
384 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
385 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
386 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
387 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
388 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
389 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
390 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
391 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
392 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
393 };
394
395 typedef struct {
396 int code, extrabits, min, max;
397 } coderecord;
398
399 static const coderecord lencodes[] = {
400 {257, 0, 3,3},
401 {258, 0, 4,4},
402 {259, 0, 5,5},
403 {260, 0, 6,6},
404 {261, 0, 7,7},
405 {262, 0, 8,8},
406 {263, 0, 9,9},
407 {264, 0, 10,10},
408 {265, 1, 11,12},
409 {266, 1, 13,14},
410 {267, 1, 15,16},
411 {268, 1, 17,18},
412 {269, 2, 19,22},
413 {270, 2, 23,26},
414 {271, 2, 27,30},
415 {272, 2, 31,34},
416 {273, 3, 35,42},
417 {274, 3, 43,50},
418 {275, 3, 51,58},
419 {276, 3, 59,66},
420 {277, 4, 67,82},
421 {278, 4, 83,98},
422 {279, 4, 99,114},
423 {280, 4, 115,130},
424 {281, 5, 131,162},
425 {282, 5, 163,194},
426 {283, 5, 195,226},
427 {284, 5, 227,257},
428 {285, 0, 258,258},
429 };
430
431 static const coderecord distcodes[] = {
432 {0, 0, 1,1},
433 {1, 0, 2,2},
434 {2, 0, 3,3},
435 {3, 0, 4,4},
436 {4, 1, 5,6},
437 {5, 1, 7,8},
438 {6, 2, 9,12},
439 {7, 2, 13,16},
440 {8, 3, 17,24},
441 {9, 3, 25,32},
442 {10, 4, 33,48},
443 {11, 4, 49,64},
444 {12, 5, 65,96},
445 {13, 5, 97,128},
446 {14, 6, 129,192},
447 {15, 6, 193,256},
448 {16, 7, 257,384},
449 {17, 7, 385,512},
450 {18, 8, 513,768},
451 {19, 8, 769,1024},
452 {20, 9, 1025,1536},
453 {21, 9, 1537,2048},
454 {22, 10, 2049,3072},
455 {23, 10, 3073,4096},
456 {24, 11, 4097,6144},
457 {25, 11, 6145,8192},
458 {26, 12, 8193,12288},
459 {27, 12, 12289,16384},
460 {28, 13, 16385,24576},
461 {29, 13, 24577,32768},
462 };
463
464 static void zlib_literal(struct LZ77Context *ectx, unsigned char c) {
465 struct Outbuf *out = (struct Outbuf *)ectx->userdata;
466
467 if (out->comp_disabled) {
468 /*
469 * We're in an uncompressed block, so just output the byte.
470 */
471 outbits(out, c, 8);
472 return;
473 }
474
475 if (c <= 143) {
476 /* 0 through 143 are 8 bits long starting at 00110000. */
477 outbits(out, mirrorbytes[0x30 + c], 8);
478 } else {
479 /* 144 through 255 are 9 bits long starting at 110010000. */
480 outbits(out, 1 + 2*mirrorbytes[0x90 - 144 + c], 9);
481 }
482 }
483
484 static void zlib_match(struct LZ77Context *ectx, int distance, int len) {
485 const coderecord *d, *l;
486 int i, j, k;
487 struct Outbuf *out = (struct Outbuf *)ectx->userdata;
488
489 assert(!out->comp_disabled);
490
491 while (len > 0) {
492 int thislen;
493
494 /*
495 * We can transmit matches of lengths 3 through 258
496 * inclusive. So if len exceeds 258, we must transmit in
497 * several steps, with 258 or less in each step.
498 *
499 * Specifically: if len >= 261, we can transmit 258 and be
500 * sure of having at least 3 left for the next step. And if
501 * len <= 258, we can just transmit len. But if len == 259
502 * or 260, we must transmit len-3.
503 */
504 thislen = (len > 260 ? 258 : len <= 258 ? len : len-3);
505 len -= thislen;
506
507 /*
508 * Binary-search to find which length code we're
509 * transmitting.
510 */
511 i = -1; j = sizeof(lencodes)/sizeof(*lencodes);
512 while (j - i >= 2) {
513 k = (j+i)/2;
514 if (thislen < lencodes[k].min)
515 j = k;
516 else if (thislen > lencodes[k].max)
517 i = k;
518 else {
519 l = &lencodes[k];
520 break; /* found it! */
521 }
522 }
523
524 /*
525 * Transmit the length code. 256-279 are seven bits
526 * starting at 0000000; 280-287 are eight bits starting at
527 * 11000000.
528 */
529 if (l->code <= 279) {
530 outbits(out, mirrorbytes[(l->code-256)*2], 7);
531 } else {
532 outbits(out, mirrorbytes[0xc0 - 280 + l->code], 8);
533 }
534
535 /*
536 * Transmit the extra bits.
537 */
538 if (l->extrabits)
539 outbits(out, thislen - l->min, l->extrabits);
540
541 /*
542 * Binary-search to find which distance code we're
543 * transmitting.
544 */
545 i = -1; j = sizeof(distcodes)/sizeof(*distcodes);
546 while (j - i >= 2) {
547 k = (j+i)/2;
548 if (distance < distcodes[k].min)
549 j = k;
550 else if (distance > distcodes[k].max)
551 i = k;
552 else {
553 d = &distcodes[k];
554 break; /* found it! */
555 }
556 }
557
558 /*
559 * Transmit the distance code. Five bits starting at 00000.
560 */
561 outbits(out, mirrorbytes[d->code*8], 5);
562
563 /*
564 * Transmit the extra bits.
565 */
566 if (d->extrabits)
567 outbits(out, distance - d->min, d->extrabits);
568 }
569 }
570
571 void zlib_compress_init(void) {
572 struct Outbuf *out;
573
574 lz77_init(&ectx);
575 ectx.literal = zlib_literal;
576 ectx.match = zlib_match;
577
578 out = smalloc(sizeof(struct Outbuf));
579 out->outbits = out->noutbits = 0;
580 out->firstblock = 1;
581 out->comp_disabled = FALSE;
582 ectx.userdata = out;
583
584 logevent("Initialised zlib (RFC1950) compression");
585 }
586
587 /*
588 * Turn off actual LZ77 analysis for one block, to facilitate
589 * construction of a precise-length IGNORE packet. Returns the
590 * length adjustment (which is only valid for packets < 65536
591 * bytes, but that seems reasonable enough).
592 */
593 int zlib_disable_compression(void) {
594 struct Outbuf *out = (struct Outbuf *)ectx.userdata;
595 int n;
596
597 out->comp_disabled = TRUE;
598
599 n = 0;
600 /*
601 * If this is the first block, we will start by outputting two
602 * header bytes, and then three bits to begin an uncompressed
603 * block. This will cost three bytes (because we will start on
604 * a byte boundary, this is certain).
605 */
606 if (out->firstblock) {
607 n = 3;
608 } else {
609 /*
610 * Otherwise, we will output seven bits to close the
611 * previous static block, and _then_ three bits to begin an
612 * uncompressed block, and then flush the current byte.
613 * This may cost two bytes or three, depending on noutbits.
614 */
615 n += (out->noutbits + 10) / 8;
616 }
617
618 /*
619 * Now we output four bytes for the length / ~length pair in
620 * the uncompressed block.
621 */
622 n += 4;
623
624 return n;
625 }
626
627 int zlib_compress_block(unsigned char *block, int len,
628 unsigned char **outblock, int *outlen) {
629 struct Outbuf *out = (struct Outbuf *)ectx.userdata;
630 int in_block;
631
632 out->outbuf = NULL;
633 out->outlen = out->outsize = 0;
634
635 /*
636 * If this is the first block, output the Zlib (RFC1950) header
637 * bytes 78 9C. (Deflate compression, 32K window size, default
638 * algorithm.)
639 */
640 if (out->firstblock) {
641 outbits(out, 0x9C78, 16);
642 out->firstblock = 0;
643
644 in_block = FALSE;
645 }
646
647 if (out->comp_disabled) {
648 if (in_block)
649 outbits(out, 0, 7); /* close static block */
650
651 while (len > 0) {
652 int blen = (len < 65535 ? len : 65535);
653
654 /*
655 * Start a Deflate (RFC1951) uncompressed block. We
656 * transmit a zero bit (BFINAL=0), followed by a zero
657 * bit and a one bit (BTYPE=00). Of course these are in
658 * the wrong order (00 0).
659 */
660 outbits(out, 0, 3);
661
662 /*
663 * Output zero bits to align to a byte boundary.
664 */
665 if (out->noutbits)
666 outbits(out, 0, 8 - out->noutbits);
667
668 /*
669 * Output the block length, and then its one's
670 * complement. They're little-endian, so all we need to
671 * do is pass them straight to outbits() with bit count
672 * 16.
673 */
674 outbits(out, blen, 16);
675 outbits(out, blen ^ 0xFFFF, 16);
676
677 /*
678 * Do the `compression': we need to pass the data to
679 * lz77_compress so that it will be taken into account
680 * for subsequent (distance,length) pairs. But
681 * lz77_compress is passed FALSE, which means it won't
682 * actually find (or even look for) any matches; so
683 * every character will be passed straight to
684 * zlib_literal which will spot out->comp_disabled and
685 * emit in the uncompressed format.
686 */
687 lz77_compress(&ectx, block, blen, FALSE);
688
689 len -= blen;
690 block += blen;
691 }
692 outbits(out, 2, 3); /* open new block */
693 } else {
694 if (!in_block) {
695 /*
696 * Start a Deflate (RFC1951) fixed-trees block. We
697 * transmit a zero bit (BFINAL=0), followed by a zero
698 * bit and a one bit (BTYPE=01). Of course these are in
699 * the wrong order (01 0).
700 */
701 outbits(out, 2, 3);
702 }
703
704 /*
705 * Do the compression.
706 */
707 lz77_compress(&ectx, block, len, TRUE);
708
709 /*
710 * End the block (by transmitting code 256, which is
711 * 0000000 in fixed-tree mode), and transmit some empty
712 * blocks to ensure we have emitted the byte containing the
713 * last piece of genuine data. There are three ways we can
714 * do this:
715 *
716 * - Minimal flush. Output end-of-block and then open a
717 * new static block. This takes 9 bits, which is
718 * guaranteed to flush out the last genuine code in the
719 * closed block; but allegedly zlib can't handle it.
720 *
721 * - Zlib partial flush. Output EOB, open and close an
722 * empty static block, and _then_ open the new block.
723 * This is the best zlib can handle.
724 *
725 * - Zlib sync flush. Output EOB, then an empty
726 * _uncompressed_ block (000, then sync to byte
727 * boundary, then send bytes 00 00 FF FF). Then open the
728 * new block.
729 *
730 * For the moment, we will use Zlib partial flush.
731 */
732 outbits(out, 0, 7); /* close block */
733 outbits(out, 2, 3+7); /* empty static block */
734 outbits(out, 2, 3); /* open new block */
735 }
736
737 out->comp_disabled = FALSE;
738
739 *outblock = out->outbuf;
740 *outlen = out->outlen;
741
742 return 1;
743 }
744
745 /* ----------------------------------------------------------------------
746 * Zlib decompression. Of course, even though our compressor always
747 * uses static trees, our _decompressor_ has to be capable of
748 * handling dynamic trees if it sees them.
749 */
750
751 /*
752 * The way we work the Huffman decode is to have a table lookup on
753 * the first N bits of the input stream (in the order they arrive,
754 * of course, i.e. the first bit of the Huffman code is in bit 0).
755 * Each table entry lists the number of bits to consume, plus
756 * either an output code or a pointer to a secondary table.
757 */
758 struct zlib_table;
759 struct zlib_tableentry;
760
761 struct zlib_tableentry {
762 unsigned char nbits;
763 int code;
764 struct zlib_table *nexttable;
765 };
766
767 struct zlib_table {
768 int mask; /* mask applied to input bit stream */
769 struct zlib_tableentry *table;
770 };
771
772 #define MAXCODELEN 16
773 #define MAXSYMS 288
774
775 /*
776 * Build a single-level decode table for elements
777 * [minlength,maxlength) of the provided code/length tables, and
778 * recurse to build subtables.
779 */
780 static struct zlib_table *zlib_mkonetab(int *codes, unsigned char *lengths,
781 int nsyms,
782 int pfx, int pfxbits, int bits) {
783 struct zlib_table *tab = smalloc(sizeof(struct zlib_table));
784 int pfxmask = (1 << pfxbits) - 1;
785 int nbits, i, j, code;
786
787 tab->table = smalloc((1 << bits) * sizeof(struct zlib_tableentry));
788 tab->mask = (1 << bits) - 1;
789
790 for (code = 0; code <= tab->mask; code++) {
791 tab->table[code].code = -1;
792 tab->table[code].nbits = 0;
793 tab->table[code].nexttable = NULL;
794 }
795
796 for (i = 0; i < nsyms; i++) {
797 if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx)
798 continue;
799 code = (codes[i] >> pfxbits) & tab->mask;
800 for (j = code; j <= tab->mask; j += 1 << (lengths[i]-pfxbits)) {
801 tab->table[j].code = i;
802 nbits = lengths[i] - pfxbits;
803 if (tab->table[j].nbits < nbits)
804 tab->table[j].nbits = nbits;
805 }
806 }
807 for (code = 0; code <= tab->mask; code++) {
808 if (tab->table[code].nbits <= bits)
809 continue;
810 /* Generate a subtable. */
811 tab->table[code].code = -1;
812 nbits = tab->table[code].nbits - bits;
813 if (nbits > 7)
814 nbits = 7;
815 tab->table[code].nbits = bits;
816 tab->table[code].nexttable = zlib_mkonetab(codes, lengths, nsyms,
817 pfx | (code << pfxbits),
818 pfxbits + bits, nbits);
819 }
820
821 return tab;
822 }
823
824 /*
825 * Build a decode table, given a set of Huffman tree lengths.
826 */
827 static struct zlib_table *zlib_mktable(unsigned char *lengths, int nlengths) {
828 int count[MAXCODELEN], startcode[MAXCODELEN], codes[MAXSYMS];
829 int code, maxlen;
830 int i, j;
831
832 /* Count the codes of each length. */
833 maxlen = 0;
834 for (i = 1; i < MAXCODELEN; i++) count[i] = 0;
835 for (i = 0; i < nlengths; i++) {
836 count[lengths[i]]++;
837 if (maxlen < lengths[i])
838 maxlen = lengths[i];
839 }
840 /* Determine the starting code for each length block. */
841 code = 0;
842 for (i = 1; i < MAXCODELEN; i++) {
843 startcode[i] = code;
844 code += count[i];
845 code <<= 1;
846 }
847 /* Determine the code for each symbol. Mirrored, of course. */
848 for (i = 0; i < nlengths; i++) {
849 code = startcode[lengths[i]]++;
850 codes[i] = 0;
851 for (j = 0; j < lengths[i]; j++) {
852 codes[i] = (codes[i] << 1) | (code & 1);
853 code >>= 1;
854 }
855 }
856
857 /*
858 * Now we have the complete list of Huffman codes. Build a
859 * table.
860 */
861 return zlib_mkonetab(codes, lengths, nlengths, 0, 0,
862 maxlen < 9 ? maxlen : 9);
863 }
864
865 static int zlib_freetable(struct zlib_table ** ztab) {
866 struct zlib_table *tab;
867 int code;
868
869 if (ztab == NULL)
870 return -1;
871
872 if (*ztab == NULL)
873 return 0;
874
875 tab = *ztab;
876
877 for (code = 0; code <= tab->mask; code++)
878 if (tab->table[code].nexttable != NULL)
879 zlib_freetable(&tab->table[code].nexttable);
880
881 sfree(tab->table);
882 tab->table = NULL;
883
884 sfree(tab);
885 *ztab = NULL;
886
887 return(0);
888 }
889
890 static struct zlib_decompress_ctx {
891 struct zlib_table *staticlentable, *staticdisttable;
892 struct zlib_table *currlentable, *currdisttable, *lenlentable;
893 enum {
894 START, OUTSIDEBLK,
895 TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP,
896 INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM,
897 UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA
898 } state;
899 int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, lenrep;
900 int uncomplen;
901 unsigned char lenlen[19];
902 unsigned char lengths[286+32];
903 unsigned long bits;
904 int nbits;
905 unsigned char window[WINSIZE];
906 int winpos;
907 unsigned char *outblk;
908 int outlen, outsize;
909 } dctx;
910
911 void zlib_decompress_init(void) {
912 unsigned char lengths[288];
913 memset(lengths, 8, 144);
914 memset(lengths+144, 9, 256-144);
915 memset(lengths+256, 7, 280-256);
916 memset(lengths+280, 8, 288-280);
917 dctx.staticlentable = zlib_mktable(lengths, 288);
918 memset(lengths, 5, 32);
919 dctx.staticdisttable = zlib_mktable(lengths, 32);
920 dctx.state = START; /* even before header */
921 dctx.currlentable = dctx.currdisttable = dctx.lenlentable = NULL;
922 dctx.bits = 0;
923 dctx.nbits = 0;
924 logevent("Initialised zlib (RFC1950) decompression");
925 }
926
927 int zlib_huflookup(unsigned long *bitsp, int *nbitsp, struct zlib_table *tab) {
928 unsigned long bits = *bitsp;
929 int nbits = *nbitsp;
930 while (1) {
931 struct zlib_tableentry *ent;
932 ent = &tab->table[bits & tab->mask];
933 if (ent->nbits > nbits)
934 return -1; /* not enough data */
935 bits >>= ent->nbits;
936 nbits -= ent->nbits;
937 if (ent->code == -1)
938 tab = ent->nexttable;
939 else {
940 *bitsp = bits;
941 *nbitsp = nbits;
942 return ent->code;
943 }
944 }
945 }
946
947 static void zlib_emit_char(int c) {
948 dctx.window[dctx.winpos] = c;
949 dctx.winpos = (dctx.winpos + 1) & (WINSIZE-1);
950 if (dctx.outlen >= dctx.outsize) {
951 dctx.outsize = dctx.outlen + 512;
952 dctx.outblk = srealloc(dctx.outblk, dctx.outsize);
953 }
954 dctx.outblk[dctx.outlen++] = c;
955 }
956
957 #define EATBITS(n) ( dctx.nbits -= (n), dctx.bits >>= (n) )
958
959 int zlib_decompress_block(unsigned char *block, int len,
960 unsigned char **outblock, int *outlen) {
961 const coderecord *rec;
962 int code, blktype, rep, dist, nlen;
963 static const unsigned char lenlenmap[] = {
964 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
965 };
966
967 dctx.outblk = NULL;
968 dctx.outsize = dctx.outlen = 0;
969
970 while (len > 0 || dctx.nbits > 0) {
971 while (dctx.nbits < 24 && len > 0) {
972 dctx.bits |= (*block++) << dctx.nbits;
973 dctx.nbits += 8;
974 len--;
975 }
976 switch (dctx.state) {
977 case START:
978 /* Expect 16-bit zlib header, which we'll dishonourably ignore. */
979 if (dctx.nbits < 16)
980 goto finished; /* done all we can */
981 EATBITS(16);
982 dctx.state = OUTSIDEBLK;
983 break;
984 case OUTSIDEBLK:
985 /* Expect 3-bit block header. */
986 if (dctx.nbits < 3)
987 goto finished; /* done all we can */
988 EATBITS(1);
989 blktype = dctx.bits & 3;
990 EATBITS(2);
991 if (blktype == 0) {
992 int to_eat = dctx.nbits & 7;
993 dctx.state = UNCOMP_LEN;
994 EATBITS(to_eat); /* align to byte boundary */
995 } else if (blktype == 1) {
996 dctx.currlentable = dctx.staticlentable;
997 dctx.currdisttable = dctx.staticdisttable;
998 dctx.state = INBLK;
999 } else if (blktype == 2) {
1000 dctx.state = TREES_HDR;
1001 }
1002 break;
1003 case TREES_HDR:
1004 /*
1005 * Dynamic block header. Five bits of HLIT, five of
1006 * HDIST, four of HCLEN.
1007 */
1008 if (dctx.nbits < 5+5+4)
1009 goto finished; /* done all we can */
1010 dctx.hlit = 257 + (dctx.bits & 31); EATBITS(5);
1011 dctx.hdist = 1 + (dctx.bits & 31); EATBITS(5);
1012 dctx.hclen = 4 + (dctx.bits & 15); EATBITS(4);
1013 dctx.lenptr = 0;
1014 dctx.state = TREES_LENLEN;
1015 memset(dctx.lenlen, 0, sizeof(dctx.lenlen));
1016 break;
1017 case TREES_LENLEN:
1018 if (dctx.nbits < 3)
1019 goto finished;
1020 while (dctx.lenptr < dctx.hclen && dctx.nbits >= 3) {
1021 dctx.lenlen[lenlenmap[dctx.lenptr++]] =
1022 (unsigned char)(dctx.bits & 7);
1023 EATBITS(3);
1024 }
1025 if (dctx.lenptr == dctx.hclen) {
1026 dctx.lenlentable = zlib_mktable(dctx.lenlen, 19);
1027 dctx.state = TREES_LEN;
1028 dctx.lenptr = 0;
1029 }
1030 break;
1031 case TREES_LEN:
1032 if (dctx.lenptr >= dctx.hlit+dctx.hdist) {
1033 dctx.currlentable = zlib_mktable(dctx.lengths, dctx.hlit);
1034 dctx.currdisttable = zlib_mktable(dctx.lengths + dctx.hlit,
1035 dctx.hdist);
1036 zlib_freetable(&dctx.lenlentable);
1037 dctx.state = INBLK;
1038 break;
1039 }
1040 code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.lenlentable);
1041 if (code == -1)
1042 goto finished;
1043 if (code < 16)
1044 dctx.lengths[dctx.lenptr++] = code;
1045 else {
1046 dctx.lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7);
1047 dctx.lenaddon = (code == 18 ? 11 : 3);
1048 dctx.lenrep = (code == 16 && dctx.lenptr > 0 ?
1049 dctx.lengths[dctx.lenptr-1] : 0);
1050 dctx.state = TREES_LENREP;
1051 }
1052 break;
1053 case TREES_LENREP:
1054 if (dctx.nbits < dctx.lenextrabits)
1055 goto finished;
1056 rep = dctx.lenaddon + (dctx.bits & ((1<<dctx.lenextrabits)-1));
1057 EATBITS(dctx.lenextrabits);
1058 while (rep > 0 && dctx.lenptr < dctx.hlit+dctx.hdist) {
1059 dctx.lengths[dctx.lenptr] = dctx.lenrep;
1060 dctx.lenptr++;
1061 rep--;
1062 }
1063 dctx.state = TREES_LEN;
1064 break;
1065 case INBLK:
1066 code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.currlentable);
1067 if (code == -1)
1068 goto finished;
1069 if (code < 256)
1070 zlib_emit_char(code);
1071 else if (code == 256) {
1072 dctx.state = OUTSIDEBLK;
1073 if (dctx.currlentable != dctx.staticlentable)
1074 zlib_freetable(&dctx.currlentable);
1075 if (dctx.currdisttable != dctx.staticdisttable)
1076 zlib_freetable(&dctx.currdisttable);
1077 } else if (code < 286) { /* static tree can give >285; ignore */
1078 dctx.state = GOTLENSYM;
1079 dctx.sym = code;
1080 }
1081 break;
1082 case GOTLENSYM:
1083 rec = &lencodes[dctx.sym - 257];
1084 if (dctx.nbits < rec->extrabits)
1085 goto finished;
1086 dctx.len = rec->min + (dctx.bits & ((1<<rec->extrabits)-1));
1087 EATBITS(rec->extrabits);
1088 dctx.state = GOTLEN;
1089 break;
1090 case GOTLEN:
1091 code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.currdisttable);
1092 if (code == -1)
1093 goto finished;
1094 dctx.state = GOTDISTSYM;
1095 dctx.sym = code;
1096 break;
1097 case GOTDISTSYM:
1098 rec = &distcodes[dctx.sym];
1099 if (dctx.nbits < rec->extrabits)
1100 goto finished;
1101 dist = rec->min + (dctx.bits & ((1<<rec->extrabits)-1));
1102 EATBITS(rec->extrabits);
1103 dctx.state = INBLK;
1104 while (dctx.len--)
1105 zlib_emit_char(dctx.window[(dctx.winpos-dist) & (WINSIZE-1)]);
1106 break;
1107 case UNCOMP_LEN:
1108 /*
1109 * Uncompressed block. We expect to see a 16-bit LEN.
1110 */
1111 if (dctx.nbits < 16)
1112 goto finished;
1113 dctx.uncomplen = dctx.bits & 0xFFFF;
1114 EATBITS(16);
1115 dctx.state = UNCOMP_NLEN;
1116 break;
1117 case UNCOMP_NLEN:
1118 /*
1119 * Uncompressed block. We expect to see a 16-bit NLEN,
1120 * which should be the one's complement of the previous
1121 * LEN.
1122 */
1123 if (dctx.nbits < 16)
1124 goto finished;
1125 nlen = dctx.bits & 0xFFFF;
1126 EATBITS(16);
1127 dctx.state = UNCOMP_DATA;
1128 break;
1129 case UNCOMP_DATA:
1130 if (dctx.nbits < 8)
1131 goto finished;
1132 zlib_emit_char(dctx.bits & 0xFF);
1133 EATBITS(8);
1134 if (--dctx.uncomplen == 0)
1135 dctx.state = OUTSIDEBLK; /* end of uncompressed block */
1136 break;
1137 }
1138 }
1139
1140 finished:
1141 *outblock = dctx.outblk;
1142 *outlen = dctx.outlen;
1143
1144 return 1;
1145 }
1146
1147 const struct ssh_compress ssh_zlib = {
1148 "zlib",
1149 zlib_compress_init,
1150 zlib_compress_block,
1151 zlib_decompress_init,
1152 zlib_decompress_block,
1153 zlib_disable_compression
1154 };