Make memory management uniform: _everything_ now goes through the
[u/mdw/putty] / sshzlib.c
1 /*
2 * Zlib (RFC1950 / RFC1951) compression for PuTTY.
3 *
4 * There will no doubt be criticism of my decision to reimplement
5 * Zlib compression from scratch instead of using the existing zlib
6 * code. People will cry `reinventing the wheel'; they'll claim
7 * that the `fundamental basis of OSS' is code reuse; they'll want
8 * to see a really good reason for me having chosen not to use the
9 * existing code.
10 *
11 * Well, here are my reasons. Firstly, I don't want to link the
12 * whole of zlib into the PuTTY binary; PuTTY is justifiably proud
13 * of its small size and I think zlib contains a lot of unnecessary
14 * baggage for the kind of compression that SSH requires.
15 *
16 * Secondly, I also don't like the alternative of using zlib.dll.
17 * Another thing PuTTY is justifiably proud of is its ease of
18 * installation, and the last thing I want to do is to start
19 * mandating DLLs. Not only that, but there are two _kinds_ of
20 * zlib.dll kicking around, one with C calling conventions on the
21 * exported functions and another with WINAPI conventions, and
22 * there would be a significant danger of getting the wrong one.
23 *
24 * Thirdly, there seems to be a difference of opinion on the IETF
25 * secsh mailing list about the correct way to round off a
26 * compressed packet and start the next. In particular, there's
27 * some talk of switching to a mechanism zlib isn't currently
28 * capable of supporting (see below for an explanation). Given that
29 * sort of uncertainty, I thought it might be better to have code
30 * that will support even the zlib-incompatible worst case.
31 *
32 * Fourthly, it's a _second implementation_. Second implementations
33 * are fundamentally a Good Thing in standardisation efforts. The
34 * difference of opinion mentioned above has arisen _precisely_
35 * because there has been only one zlib implementation and
36 * everybody has used it. I don't intend that this should happen
37 * again.
38 */
39
40 #include <stdlib.h>
41 #include <assert.h>
42
43 /* FIXME */
44 #include <windows.h>
45 #include <stdio.h>
46 #include "putty.h"
47
48 #include "ssh.h"
49
50 /* ----------------------------------------------------------------------
51 * Basic LZ77 code. This bit is designed modularly, so it could be
52 * ripped out and used in a different LZ77 compressor. Go to it,
53 * and good luck :-)
54 */
55
56 struct LZ77InternalContext;
57 struct LZ77Context {
58 struct LZ77InternalContext *ictx;
59 void *userdata;
60 void (*literal)(struct LZ77Context *ctx, unsigned char c);
61 void (*match)(struct LZ77Context *ctx, int distance, int len);
62 };
63
64 /*
65 * Initialise the private fields of an LZ77Context. It's up to the
66 * user to initialise the public fields.
67 */
68 static int lz77_init(struct LZ77Context *ctx);
69
70 /*
71 * Supply data to be compressed. Will update the private fields of
72 * the LZ77Context, and will call literal() and match() to output.
73 */
74 static void lz77_compress(struct LZ77Context *ctx,
75 unsigned char *data, int len);
76
77 /*
78 * Modifiable parameters.
79 */
80 #define WINSIZE 32768 /* window size. Must be power of 2! */
81 #define HASHMAX 2039 /* one more than max hash value */
82 #define MAXMATCH 32 /* how many matches we track */
83 #define HASHCHARS 3 /* how many chars make a hash */
84
85 /*
86 * This compressor takes a less slapdash approach than the
87 * gzip/zlib one. Rather than allowing our hash chains to fall into
88 * disuse near the far end, we keep them doubly linked so we can
89 * _find_ the far end, and then every time we add a new byte to the
90 * window (thus rolling round by one and removing the previous
91 * byte), we can carefully remove the hash chain entry.
92 */
93
94 #define INVALID -1 /* invalid hash _and_ invalid offset */
95 struct WindowEntry {
96 int next, prev; /* array indices within the window */
97 int hashval;
98 };
99
100 struct HashEntry {
101 int first; /* window index of first in chain */
102 };
103
104 struct Match {
105 int distance, len;
106 };
107
108 struct LZ77InternalContext {
109 struct WindowEntry win[WINSIZE];
110 unsigned char data[WINSIZE];
111 int winpos;
112 struct HashEntry hashtab[HASHMAX];
113 unsigned char pending[HASHCHARS];
114 int npending;
115 };
116
117 static int lz77_hash(unsigned char *data) {
118 return (257*data[0] + 263*data[1] + 269*data[2]) % HASHMAX;
119 }
120
121 static int lz77_init(struct LZ77Context *ctx) {
122 struct LZ77InternalContext *st;
123 int i;
124
125 st = (struct LZ77InternalContext *)smalloc(sizeof(*st));
126 if (!st)
127 return 0;
128
129 ctx->ictx = st;
130
131 for (i = 0; i < WINSIZE; i++)
132 st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
133 for (i = 0; i < HASHMAX; i++)
134 st->hashtab[i].first = INVALID;
135 st->winpos = 0;
136
137 st->npending = 0;
138
139 return 1;
140 }
141
142 static void lz77_advance(struct LZ77InternalContext *st,
143 unsigned char c, int hash) {
144 int off;
145
146 /*
147 * Remove the hash entry at winpos from the tail of its chain,
148 * or empty the chain if it's the only thing on the chain.
149 */
150 if (st->win[st->winpos].prev != INVALID) {
151 st->win[st->win[st->winpos].prev].next = INVALID;
152 } else if (st->win[st->winpos].hashval != INVALID) {
153 st->hashtab[st->win[st->winpos].hashval].first = INVALID;
154 }
155
156 /*
157 * Create a new entry at winpos and add it to the head of its
158 * hash chain.
159 */
160 st->win[st->winpos].hashval = hash;
161 st->win[st->winpos].prev = INVALID;
162 off = st->win[st->winpos].next = st->hashtab[hash].first;
163 st->hashtab[hash].first = st->winpos;
164 if (off != INVALID)
165 st->win[off].prev = st->winpos;
166 st->data[st->winpos] = c;
167
168 /*
169 * Advance the window pointer.
170 */
171 st->winpos = (st->winpos + 1) & (WINSIZE-1);
172 }
173
174 #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] )
175
176 static void lz77_compress(struct LZ77Context *ctx,
177 unsigned char *data, int len) {
178 struct LZ77InternalContext *st = ctx->ictx;
179 int i, hash, distance, off, nmatch, matchlen, advance;
180 struct Match defermatch, matches[MAXMATCH];
181 int deferchr;
182
183 /*
184 * Add any pending characters from last time to the window. (We
185 * might not be able to.)
186 */
187 for (i = 0; i < st->npending; i++) {
188 unsigned char foo[HASHCHARS];
189 int j;
190 if (len + st->npending - i < HASHCHARS) {
191 /* Update the pending array. */
192 for (j = i; j < st->npending; j++)
193 st->pending[j-i] = st->pending[j];
194 break;
195 }
196 for (j = 0; j < HASHCHARS; j++)
197 foo[j] = (i + j < st->npending ? st->pending[i+j] :
198 data[i + j - st->npending]);
199 lz77_advance(st, foo[0], lz77_hash(foo));
200 }
201 st->npending -= i;
202
203 defermatch.len = 0;
204 while (len > 0) {
205
206 if (len >= HASHCHARS) {
207 /*
208 * Hash the next few characters.
209 */
210 hash = lz77_hash(data);
211
212 /*
213 * Look the hash up in the corresponding hash chain and see
214 * what we can find.
215 */
216 nmatch = 0;
217 for (off = st->hashtab[hash].first;
218 off != INVALID; off = st->win[off].next) {
219 /* distance = 1 if off == st->winpos-1 */
220 /* distance = WINSIZE if off == st->winpos */
221 distance = WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE;
222 for (i = 0; i < HASHCHARS; i++)
223 if (CHARAT(i) != CHARAT(i-distance))
224 break;
225 if (i == HASHCHARS) {
226 matches[nmatch].distance = distance;
227 matches[nmatch].len = 3;
228 if (++nmatch >= MAXMATCH)
229 break;
230 }
231 }
232 } else {
233 nmatch = 0;
234 hash = INVALID;
235 }
236
237 if (nmatch > 0) {
238 /*
239 * We've now filled up matches[] with nmatch potential
240 * matches. Follow them down to find the longest. (We
241 * assume here that it's always worth favouring a
242 * longer match over a shorter one.)
243 */
244 matchlen = HASHCHARS;
245 while (matchlen < len) {
246 int j;
247 for (i = j = 0; i < nmatch; i++) {
248 if (CHARAT(matchlen) ==
249 CHARAT(matchlen - matches[i].distance)) {
250 matches[j++] = matches[i];
251 }
252 }
253 if (j == 0)
254 break;
255 matchlen++;
256 nmatch = j;
257 }
258
259 /*
260 * We've now got all the longest matches. We favour the
261 * shorter distances, which means we go with matches[0].
262 * So see if we want to defer it or throw it away.
263 */
264 matches[0].len = matchlen;
265 if (defermatch.len > 0) {
266 if (matches[0].len > defermatch.len + 1) {
267 /* We have a better match. Emit the deferred char,
268 * and defer this match. */
269 ctx->literal(ctx, (unsigned char)deferchr);
270 defermatch = matches[0];
271 deferchr = data[0];
272 advance = 1;
273 } else {
274 /* We don't have a better match. Do the deferred one. */
275 ctx->match(ctx, defermatch.distance, defermatch.len);
276 advance = defermatch.len - 1;
277 defermatch.len = 0;
278 }
279 } else {
280 /* There was no deferred match. Defer this one. */
281 defermatch = matches[0];
282 deferchr = data[0];
283 advance = 1;
284 }
285 } else {
286 /*
287 * We found no matches. Emit the deferred match, if
288 * any; otherwise emit a literal.
289 */
290 if (defermatch.len > 0) {
291 ctx->match(ctx, defermatch.distance, defermatch.len);
292 advance = defermatch.len - 1;
293 defermatch.len = 0;
294 } else {
295 ctx->literal(ctx, data[0]);
296 advance = 1;
297 }
298 }
299
300 /*
301 * Now advance the position by `advance' characters,
302 * keeping the window and hash chains consistent.
303 */
304 while (advance > 0) {
305 if (len >= HASHCHARS) {
306 lz77_advance(st, *data, lz77_hash(data));
307 } else {
308 st->pending[st->npending++] = *data;
309 }
310 data++;
311 len--;
312 advance--;
313 }
314 }
315 }
316
317 /* ----------------------------------------------------------------------
318 * Zlib compression. We always use the static Huffman tree option.
319 * Mostly this is because it's hard to scan a block in advance to
320 * work out better trees; dynamic trees are great when you're
321 * compressing a large file under no significant time constraint,
322 * but when you're compressing little bits in real time, things get
323 * hairier.
324 *
325 * I suppose it's possible that I could compute Huffman trees based
326 * on the frequencies in the _previous_ block, as a sort of
327 * heuristic, but I'm not confident that the gain would balance out
328 * having to transmit the trees.
329 */
330
331 static struct LZ77Context ectx;
332
333 struct Outbuf {
334 unsigned char *outbuf;
335 int outlen, outsize;
336 unsigned long outbits;
337 int noutbits;
338 int firstblock;
339 };
340
341 static void outbits(struct Outbuf *out, unsigned long bits, int nbits) {
342 assert(out->noutbits + nbits <= 32);
343 out->outbits |= bits << out->noutbits;
344 out->noutbits += nbits;
345 while (out->noutbits >= 8) {
346 if (out->outlen >= out->outsize) {
347 out->outsize = out->outlen + 64;
348 out->outbuf = srealloc(out->outbuf, out->outsize);
349 }
350 out->outbuf[out->outlen++] = (unsigned char)(out->outbits & 0xFF);
351 out->outbits >>= 8;
352 out->noutbits -= 8;
353 }
354 }
355
356 static const unsigned char mirrorbytes[256] = {
357 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
358 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
359 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
360 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
361 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
362 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
363 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
364 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
365 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
366 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
367 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
368 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
369 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
370 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
371 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
372 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
373 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
374 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
375 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
376 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
377 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
378 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
379 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
380 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
381 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
382 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
383 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
384 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
385 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
386 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
387 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
388 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
389 };
390
391 typedef struct {
392 int code, extrabits, min, max;
393 } coderecord;
394
395 static const coderecord lencodes[] = {
396 {257, 0, 3,3},
397 {258, 0, 4,4},
398 {259, 0, 5,5},
399 {260, 0, 6,6},
400 {261, 0, 7,7},
401 {262, 0, 8,8},
402 {263, 0, 9,9},
403 {264, 0, 10,10},
404 {265, 1, 11,12},
405 {266, 1, 13,14},
406 {267, 1, 15,16},
407 {268, 1, 17,18},
408 {269, 2, 19,22},
409 {270, 2, 23,26},
410 {271, 2, 27,30},
411 {272, 2, 31,34},
412 {273, 3, 35,42},
413 {274, 3, 43,50},
414 {275, 3, 51,58},
415 {276, 3, 59,66},
416 {277, 4, 67,82},
417 {278, 4, 83,98},
418 {279, 4, 99,114},
419 {280, 4, 115,130},
420 {281, 5, 131,162},
421 {282, 5, 163,194},
422 {283, 5, 195,226},
423 {284, 5, 227,257},
424 {285, 0, 258,258},
425 };
426
427 static const coderecord distcodes[] = {
428 {0, 0, 1,1},
429 {1, 0, 2,2},
430 {2, 0, 3,3},
431 {3, 0, 4,4},
432 {4, 1, 5,6},
433 {5, 1, 7,8},
434 {6, 2, 9,12},
435 {7, 2, 13,16},
436 {8, 3, 17,24},
437 {9, 3, 25,32},
438 {10, 4, 33,48},
439 {11, 4, 49,64},
440 {12, 5, 65,96},
441 {13, 5, 97,128},
442 {14, 6, 129,192},
443 {15, 6, 193,256},
444 {16, 7, 257,384},
445 {17, 7, 385,512},
446 {18, 8, 513,768},
447 {19, 8, 769,1024},
448 {20, 9, 1025,1536},
449 {21, 9, 1537,2048},
450 {22, 10, 2049,3072},
451 {23, 10, 3073,4096},
452 {24, 11, 4097,6144},
453 {25, 11, 6145,8192},
454 {26, 12, 8193,12288},
455 {27, 12, 12289,16384},
456 {28, 13, 16385,24576},
457 {29, 13, 24577,32768},
458 };
459
460 static void zlib_literal(struct LZ77Context *ectx, unsigned char c) {
461 struct Outbuf *out = (struct Outbuf *)ectx->userdata;
462
463 if (c <= 143) {
464 /* 0 through 143 are 8 bits long starting at 00110000. */
465 outbits(out, mirrorbytes[0x30 + c], 8);
466 } else {
467 /* 144 through 255 are 9 bits long starting at 110010000. */
468 outbits(out, 1 + 2*mirrorbytes[0x90 - 144 + c], 9);
469 }
470 }
471
472 static void zlib_match(struct LZ77Context *ectx, int distance, int len) {
473 const coderecord *d, *l;
474 int i, j, k;
475 struct Outbuf *out = (struct Outbuf *)ectx->userdata;
476 while (len > 0) {
477 int thislen;
478
479 /*
480 * We can transmit matches of lengths 3 through 258
481 * inclusive. So if len exceeds 258, we must transmit in
482 * several steps, with 258 or less in each step.
483 *
484 * Specifically: if len >= 261, we can transmit 258 and be
485 * sure of having at least 3 left for the next step. And if
486 * len <= 258, we can just transmit len. But if len == 259
487 * or 260, we must transmit len-3.
488 */
489 thislen = (len > 260 ? 258 : len <= 258 ? len : len-3);
490 len -= thislen;
491
492 /*
493 * Binary-search to find which length code we're
494 * transmitting.
495 */
496 i = -1; j = sizeof(lencodes)/sizeof(*lencodes);
497 while (j - i >= 2) {
498 k = (j+i)/2;
499 if (thislen < lencodes[k].min)
500 j = k;
501 else if (thislen > lencodes[k].max)
502 i = k;
503 else {
504 l = &lencodes[k];
505 break; /* found it! */
506 }
507 }
508
509 /*
510 * Transmit the length code. 256-279 are seven bits
511 * starting at 0000000; 280-287 are eight bits starting at
512 * 11000000.
513 */
514 if (l->code <= 279) {
515 outbits(out, mirrorbytes[(l->code-256)*2], 7);
516 } else {
517 outbits(out, mirrorbytes[0xc0 - 280 + l->code], 8);
518 }
519
520 /*
521 * Transmit the extra bits.
522 */
523 if (l->extrabits)
524 outbits(out, thislen - l->min, l->extrabits);
525
526 /*
527 * Binary-search to find which distance code we're
528 * transmitting.
529 */
530 i = -1; j = sizeof(distcodes)/sizeof(*distcodes);
531 while (j - i >= 2) {
532 k = (j+i)/2;
533 if (distance < distcodes[k].min)
534 j = k;
535 else if (distance > distcodes[k].max)
536 i = k;
537 else {
538 d = &distcodes[k];
539 break; /* found it! */
540 }
541 }
542
543 /*
544 * Transmit the distance code. Five bits starting at 00000.
545 */
546 outbits(out, mirrorbytes[d->code*8], 5);
547
548 /*
549 * Transmit the extra bits.
550 */
551 if (d->extrabits)
552 outbits(out, distance - d->min, d->extrabits);
553 }
554 }
555
556 void zlib_compress_init(void) {
557 struct Outbuf *out;
558
559 lz77_init(&ectx);
560 ectx.literal = zlib_literal;
561 ectx.match = zlib_match;
562
563 out = smalloc(sizeof(struct Outbuf));
564 out->outbits = out->noutbits = 0;
565 out->firstblock = 1;
566 ectx.userdata = out;
567
568 logevent("Initialised zlib (RFC1950) compression");
569 }
570
571 int zlib_compress_block(unsigned char *block, int len,
572 unsigned char **outblock, int *outlen) {
573 struct Outbuf *out = (struct Outbuf *)ectx.userdata;
574
575 out->outbuf = NULL;
576 out->outlen = out->outsize = 0;
577
578 /*
579 * If this is the first block, output the Zlib (RFC1950) header
580 * bytes 78 9C. (Deflate compression, 32K window size, default
581 * algorithm.)
582 */
583 if (out->firstblock) {
584 outbits(out, 0x9C78, 16);
585 out->firstblock = 0;
586 /*
587 * Start a Deflate (RFC1951) fixed-trees block. We transmit
588 * a zero bit (BFINAL=0), followed by a zero bit and a one
589 * bit (BTYPE=01). Of course these are in the wrong order
590 * (01 0).
591 */
592 outbits(out, 2, 3);
593 }
594
595 /*
596 * Do the compression.
597 */
598 lz77_compress(&ectx, block, len);
599 /*
600 * End the block (by transmitting code 256, which is 0000000 in
601 * fixed-tree mode), and transmit some empty blocks to ensure
602 * we have emitted the byte containing the last piece of
603 * genuine data. There are three ways we can do this:
604 *
605 * - Minimal flush. Output end-of-block and then open a new
606 * static block. This takes 9 bits, which is guaranteed to
607 * flush out the last genuine code in the closed block; but
608 * allegedly zlib can't handle it.
609 *
610 * - Zlib partial flush. Output EOB, open and close an empty
611 * static block, and _then_ open the new block. This is the
612 * best zlib can handle.
613 *
614 * - Zlib sync flush. Output EOB, then an empty _uncompressed_
615 * block (000, then sync to byte boundary, then send bytes
616 * 00 00 FF FF). Then open the new block.
617 *
618 * For the moment, we will use Zlib partial flush.
619 */
620 outbits(out, 0, 7); /* close block */
621 outbits(out, 2, 3+7); /* empty static block */
622 outbits(out, 2, 3); /* open new block */
623
624 *outblock = out->outbuf;
625 *outlen = out->outlen;
626
627 return 1;
628 }
629
630 /* ----------------------------------------------------------------------
631 * Zlib decompression. Of course, even though our compressor always
632 * uses static trees, our _decompressor_ has to be capable of
633 * handling dynamic trees if it sees them.
634 */
635
636 /*
637 * The way we work the Huffman decode is to have a table lookup on
638 * the first N bits of the input stream (in the order they arrive,
639 * of course, i.e. the first bit of the Huffman code is in bit 0).
640 * Each table entry lists the number of bits to consume, plus
641 * either an output code or a pointer to a secondary table.
642 */
643 struct zlib_table;
644 struct zlib_tableentry;
645
646 struct zlib_tableentry {
647 unsigned char nbits;
648 int code;
649 struct zlib_table *nexttable;
650 };
651
652 struct zlib_table {
653 int mask; /* mask applied to input bit stream */
654 struct zlib_tableentry *table;
655 };
656
657 #define MAXCODELEN 16
658 #define MAXSYMS 288
659
660 /*
661 * Build a single-level decode table for elements
662 * [minlength,maxlength) of the provided code/length tables, and
663 * recurse to build subtables.
664 */
665 static struct zlib_table *zlib_mkonetab(int *codes, unsigned char *lengths,
666 int nsyms,
667 int pfx, int pfxbits, int bits) {
668 struct zlib_table *tab = smalloc(sizeof(struct zlib_table));
669 int pfxmask = (1 << pfxbits) - 1;
670 int nbits, i, j, code;
671
672 tab->table = smalloc((1 << bits) * sizeof(struct zlib_tableentry));
673 tab->mask = (1 << bits) - 1;
674
675 for (code = 0; code <= tab->mask; code++) {
676 tab->table[code].code = -1;
677 tab->table[code].nbits = 0;
678 tab->table[code].nexttable = NULL;
679 }
680
681 for (i = 0; i < nsyms; i++) {
682 if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx)
683 continue;
684 code = (codes[i] >> pfxbits) & tab->mask;
685 for (j = code; j <= tab->mask; j += 1 << (lengths[i]-pfxbits)) {
686 tab->table[j].code = i;
687 nbits = lengths[i] - pfxbits;
688 if (tab->table[j].nbits < nbits)
689 tab->table[j].nbits = nbits;
690 }
691 }
692 for (code = 0; code <= tab->mask; code++) {
693 if (tab->table[code].nbits <= bits)
694 continue;
695 /* Generate a subtable. */
696 tab->table[code].code = -1;
697 nbits = tab->table[code].nbits - bits;
698 if (nbits > 7)
699 nbits = 7;
700 tab->table[code].nbits = bits;
701 tab->table[code].nexttable = zlib_mkonetab(codes, lengths, nsyms,
702 pfx | (code << pfxbits),
703 pfxbits + bits, nbits);
704 }
705
706 return tab;
707 }
708
709 /*
710 * Build a decode table, given a set of Huffman tree lengths.
711 */
712 static struct zlib_table *zlib_mktable(unsigned char *lengths, int nlengths) {
713 int count[MAXCODELEN], startcode[MAXCODELEN], codes[MAXSYMS];
714 int code, maxlen;
715 int i, j;
716
717 /* Count the codes of each length. */
718 maxlen = 0;
719 for (i = 1; i < MAXCODELEN; i++) count[i] = 0;
720 for (i = 0; i < nlengths; i++) {
721 count[lengths[i]]++;
722 if (maxlen < lengths[i])
723 maxlen = lengths[i];
724 }
725 /* Determine the starting code for each length block. */
726 code = 0;
727 for (i = 1; i < MAXCODELEN; i++) {
728 startcode[i] = code;
729 code += count[i];
730 code <<= 1;
731 }
732 /* Determine the code for each symbol. Mirrored, of course. */
733 for (i = 0; i < nlengths; i++) {
734 code = startcode[lengths[i]]++;
735 codes[i] = 0;
736 for (j = 0; j < lengths[i]; j++) {
737 codes[i] = (codes[i] << 1) | (code & 1);
738 code >>= 1;
739 }
740 }
741
742 /*
743 * Now we have the complete list of Huffman codes. Build a
744 * table.
745 */
746 return zlib_mkonetab(codes, lengths, nlengths, 0, 0,
747 maxlen < 9 ? maxlen : 9);
748 }
749
750 static struct zlib_decompress_ctx {
751 struct zlib_table *staticlentable, *staticdisttable;
752 struct zlib_table *currlentable, *currdisttable, *lenlentable;
753 enum {
754 START, OUTSIDEBLK,
755 TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP,
756 INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM,
757 UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA
758 } state;
759 int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, lenrep;
760 int uncomplen;
761 unsigned char lenlen[19];
762 unsigned char lengths[286+32];
763 unsigned long bits;
764 int nbits;
765 unsigned char window[WINSIZE];
766 int winpos;
767 unsigned char *outblk;
768 int outlen, outsize;
769 } dctx;
770
771 void zlib_decompress_init(void) {
772 unsigned char lengths[288];
773 memset(lengths, 8, 144);
774 memset(lengths+144, 9, 256-144);
775 memset(lengths+256, 7, 280-256);
776 memset(lengths+280, 8, 288-280);
777 dctx.staticlentable = zlib_mktable(lengths, 288);
778 memset(lengths, 5, 32);
779 dctx.staticdisttable = zlib_mktable(lengths, 32);
780 dctx.state = START; /* even before header */
781 dctx.currlentable = dctx.currdisttable = dctx.lenlentable = NULL;
782 dctx.bits = 0;
783 dctx.nbits = 0;
784 logevent("Initialised zlib (RFC1950) decompression");
785 }
786
787 int zlib_huflookup(unsigned long *bitsp, int *nbitsp, struct zlib_table *tab) {
788 unsigned long bits = *bitsp;
789 int nbits = *nbitsp;
790 while (1) {
791 struct zlib_tableentry *ent;
792 ent = &tab->table[bits & tab->mask];
793 if (ent->nbits > nbits)
794 return -1; /* not enough data */
795 bits >>= ent->nbits;
796 nbits -= ent->nbits;
797 if (ent->code == -1)
798 tab = ent->nexttable;
799 else {
800 *bitsp = bits;
801 *nbitsp = nbits;
802 return ent->code;
803 }
804 }
805 }
806
807 static void zlib_emit_char(int c) {
808 dctx.window[dctx.winpos] = c;
809 dctx.winpos = (dctx.winpos + 1) & (WINSIZE-1);
810 if (dctx.outlen >= dctx.outsize) {
811 dctx.outsize = dctx.outlen + 512;
812 dctx.outblk = srealloc(dctx.outblk, dctx.outsize);
813 }
814 dctx.outblk[dctx.outlen++] = c;
815 }
816
817 #define EATBITS(n) ( dctx.nbits -= (n), dctx.bits >>= (n) )
818
819 int zlib_decompress_block(unsigned char *block, int len,
820 unsigned char **outblock, int *outlen) {
821 const coderecord *rec;
822 int code, blktype, rep, dist, nlen;
823 static const unsigned char lenlenmap[] = {
824 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
825 };
826
827 dctx.outblk = NULL;
828 dctx.outsize = dctx.outlen = 0;
829
830 while (len > 0 || dctx.nbits > 0) {
831 while (dctx.nbits < 24 && len > 0) {
832 dctx.bits |= (*block++) << dctx.nbits;
833 dctx.nbits += 8;
834 len--;
835 }
836 switch (dctx.state) {
837 case START:
838 /* Expect 16-bit zlib header, which we'll dishonourably ignore. */
839 if (dctx.nbits < 16)
840 goto finished; /* done all we can */
841 EATBITS(16);
842 dctx.state = OUTSIDEBLK;
843 break;
844 case OUTSIDEBLK:
845 /* Expect 3-bit block header. */
846 if (dctx.nbits < 3)
847 goto finished; /* done all we can */
848 EATBITS(1);
849 blktype = dctx.bits & 3;
850 EATBITS(2);
851 if (blktype == 0) {
852 int to_eat = dctx.nbits & 7;
853 dctx.state = UNCOMP_LEN;
854 EATBITS(to_eat); /* align to byte boundary */
855 } else if (blktype == 1) {
856 dctx.currlentable = dctx.staticlentable;
857 dctx.currdisttable = dctx.staticdisttable;
858 dctx.state = INBLK;
859 } else if (blktype == 2) {
860 dctx.state = TREES_HDR;
861 }
862 break;
863 case TREES_HDR:
864 /*
865 * Dynamic block header. Five bits of HLIT, five of
866 * HDIST, four of HCLEN.
867 */
868 if (dctx.nbits < 5+5+4)
869 goto finished; /* done all we can */
870 dctx.hlit = 257 + (dctx.bits & 31); EATBITS(5);
871 dctx.hdist = 1 + (dctx.bits & 31); EATBITS(5);
872 dctx.hclen = 4 + (dctx.bits & 15); EATBITS(4);
873 dctx.lenptr = 0;
874 dctx.state = TREES_LENLEN;
875 memset(dctx.lenlen, 0, sizeof(dctx.lenlen));
876 break;
877 case TREES_LENLEN:
878 if (dctx.nbits < 3)
879 goto finished;
880 while (dctx.lenptr < dctx.hclen && dctx.nbits >= 3) {
881 dctx.lenlen[lenlenmap[dctx.lenptr++]] =
882 (unsigned char)(dctx.bits & 7);
883 EATBITS(3);
884 }
885 if (dctx.lenptr == dctx.hclen) {
886 dctx.lenlentable = zlib_mktable(dctx.lenlen, 19);
887 dctx.state = TREES_LEN;
888 dctx.lenptr = 0;
889 }
890 break;
891 case TREES_LEN:
892 if (dctx.lenptr >= dctx.hlit+dctx.hdist) {
893 dctx.currlentable = zlib_mktable(dctx.lengths, dctx.hlit);
894 dctx.currdisttable = zlib_mktable(dctx.lengths + dctx.hlit,
895 dctx.hdist);
896 /* FIXME: zlib_freetable(dctx.lenlentable); */
897 dctx.state = INBLK;
898 break;
899 }
900 code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.lenlentable);
901 if (code == -1)
902 goto finished;
903 if (code < 16)
904 dctx.lengths[dctx.lenptr++] = code;
905 else {
906 dctx.lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7);
907 dctx.lenaddon = (code == 18 ? 11 : 3);
908 dctx.lenrep = (code == 16 && dctx.lenptr > 0 ?
909 dctx.lengths[dctx.lenptr-1] : 0);
910 dctx.state = TREES_LENREP;
911 }
912 break;
913 case TREES_LENREP:
914 if (dctx.nbits < dctx.lenextrabits)
915 goto finished;
916 rep = dctx.lenaddon + (dctx.bits & ((1<<dctx.lenextrabits)-1));
917 EATBITS(dctx.lenextrabits);
918 while (rep > 0 && dctx.lenptr < dctx.hlit+dctx.hdist) {
919 dctx.lengths[dctx.lenptr] = dctx.lenrep;
920 dctx.lenptr++;
921 rep--;
922 }
923 dctx.state = TREES_LEN;
924 break;
925 case INBLK:
926 code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.currlentable);
927 if (code == -1)
928 goto finished;
929 if (code < 256)
930 zlib_emit_char(code);
931 else if (code == 256) {
932 dctx.state = OUTSIDEBLK;
933 /* FIXME: zlib_freetable(both) if not static */
934 } else if (code < 286) { /* static tree can give >285; ignore */
935 dctx.state = GOTLENSYM;
936 dctx.sym = code;
937 }
938 break;
939 case GOTLENSYM:
940 rec = &lencodes[dctx.sym - 257];
941 if (dctx.nbits < rec->extrabits)
942 goto finished;
943 dctx.len = rec->min + (dctx.bits & ((1<<rec->extrabits)-1));
944 EATBITS(rec->extrabits);
945 dctx.state = GOTLEN;
946 break;
947 case GOTLEN:
948 code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.currdisttable);
949 if (code == -1)
950 goto finished;
951 dctx.state = GOTDISTSYM;
952 dctx.sym = code;
953 break;
954 case GOTDISTSYM:
955 rec = &distcodes[dctx.sym];
956 if (dctx.nbits < rec->extrabits)
957 goto finished;
958 dist = rec->min + (dctx.bits & ((1<<rec->extrabits)-1));
959 EATBITS(rec->extrabits);
960 dctx.state = INBLK;
961 while (dctx.len--)
962 zlib_emit_char(dctx.window[(dctx.winpos-dist) & (WINSIZE-1)]);
963 break;
964 case UNCOMP_LEN:
965 /*
966 * Uncompressed block. We expect to see a 16-bit LEN.
967 */
968 if (dctx.nbits < 16)
969 goto finished;
970 dctx.uncomplen = dctx.bits & 0xFFFF;
971 EATBITS(16);
972 dctx.state = UNCOMP_NLEN;
973 break;
974 case UNCOMP_NLEN:
975 /*
976 * Uncompressed block. We expect to see a 16-bit NLEN,
977 * which should be the one's complement of the previous
978 * LEN.
979 */
980 if (dctx.nbits < 16)
981 goto finished;
982 nlen = dctx.bits & 0xFFFF;
983 EATBITS(16);
984 dctx.state = UNCOMP_DATA;
985 break;
986 case UNCOMP_DATA:
987 if (dctx.nbits < 8)
988 goto finished;
989 zlib_emit_char(dctx.bits & 0xFF);
990 EATBITS(8);
991 if (--dctx.uncomplen == 0)
992 dctx.state = OUTSIDEBLK; /* end of uncompressed block */
993 break;
994 }
995 }
996
997 finished:
998 *outblock = dctx.outblk;
999 *outlen = dctx.outlen;
1000
1001 return 1;
1002 }
1003
1004 const struct ssh_compress ssh_zlib = {
1005 "zlib",
1006 zlib_compress_init,
1007 zlib_compress_block,
1008 zlib_decompress_init,
1009 zlib_decompress_block
1010 };