4ba9b64b |
1 | /* |
2 | * Zlib (RFC1950 / RFC1951) compression for PuTTY. |
3 | * |
4 | * There will no doubt be criticism of my decision to reimplement |
5 | * Zlib compression from scratch instead of using the existing zlib |
6 | * code. People will cry `reinventing the wheel'; they'll claim |
7 | * that the `fundamental basis of OSS' is code reuse; they'll want |
8 | * to see a really good reason for me having chosen not to use the |
9 | * existing code. |
10 | * |
11 | * Well, here are my reasons. Firstly, I don't want to link the |
12 | * whole of zlib into the PuTTY binary; PuTTY is justifiably proud |
13 | * of its small size and I think zlib contains a lot of unnecessary |
14 | * baggage for the kind of compression that SSH requires. |
15 | * |
16 | * Secondly, I also don't like the alternative of using zlib.dll. |
17 | * Another thing PuTTY is justifiably proud of is its ease of |
18 | * installation, and the last thing I want to do is to start |
19 | * mandating DLLs. Not only that, but there are two _kinds_ of |
20 | * zlib.dll kicking around, one with C calling conventions on the |
21 | * exported functions and another with WINAPI conventions, and |
22 | * there would be a significant danger of getting the wrong one. |
23 | * |
24 | * Thirdly, there seems to be a difference of opinion on the IETF |
25 | * secsh mailing list about the correct way to round off a |
26 | * compressed packet and start the next. In particular, there's |
27 | * some talk of switching to a mechanism zlib isn't currently |
28 | * capable of supporting (see below for an explanation). Given that |
29 | * sort of uncertainty, I thought it might be better to have code |
30 | * that will support even the zlib-incompatible worst case. |
31 | * |
32 | * Fourthly, it's a _second implementation_. Second implementations |
33 | * are fundamentally a Good Thing in standardisation efforts. The |
34 | * difference of opinion mentioned above has arisen _precisely_ |
35 | * because there has been only one zlib implementation and |
36 | * everybody has used it. I don't intend that this should happen |
37 | * again. |
38 | */ |
39 | |
40 | #include <stdlib.h> |
41 | #include <assert.h> |
42 | |
43 | /* FIXME */ |
44 | #include <windows.h> |
45 | #include <stdio.h> |
46 | #include "putty.h" |
47 | |
48 | #include "ssh.h" |
49 | |
50 | /* ---------------------------------------------------------------------- |
51 | * Basic LZ77 code. This bit is designed modularly, so it could be |
52 | * ripped out and used in a different LZ77 compressor. Go to it, |
53 | * and good luck :-) |
54 | */ |
55 | |
56 | struct LZ77InternalContext; |
57 | struct LZ77Context { |
58 | struct LZ77InternalContext *ictx; |
59 | void *userdata; |
60 | void (*literal)(struct LZ77Context *ctx, unsigned char c); |
61 | void (*match)(struct LZ77Context *ctx, int distance, int len); |
62 | }; |
63 | |
64 | /* |
65 | * Initialise the private fields of an LZ77Context. It's up to the |
66 | * user to initialise the public fields. |
67 | */ |
68 | static int lz77_init(struct LZ77Context *ctx); |
69 | |
70 | /* |
71 | * Supply data to be compressed. Will update the private fields of |
72 | * the LZ77Context, and will call literal() and match() to output. |
73 | */ |
74 | static void lz77_compress(struct LZ77Context *ctx, |
75 | unsigned char *data, int len); |
76 | |
77 | /* |
78 | * Modifiable parameters. |
79 | */ |
80 | #define WINSIZE 32768 /* window size. Must be power of 2! */ |
81 | #define HASHMAX 2039 /* one more than max hash value */ |
82 | #define MAXMATCH 32 /* how many matches we track */ |
83 | #define HASHCHARS 3 /* how many chars make a hash */ |
84 | |
85 | /* |
86 | * This compressor takes a less slapdash approach than the |
87 | * gzip/zlib one. Rather than allowing our hash chains to fall into |
88 | * disuse near the far end, we keep them doubly linked so we can |
89 | * _find_ the far end, and then every time we add a new byte to the |
90 | * window (thus rolling round by one and removing the previous |
91 | * byte), we can carefully remove the hash chain entry. |
92 | */ |
93 | |
94 | #define INVALID -1 /* invalid hash _and_ invalid offset */ |
95 | struct WindowEntry { |
96 | int next, prev; /* array indices within the window */ |
97 | int hashval; |
98 | }; |
99 | |
100 | struct HashEntry { |
101 | int first; /* window index of first in chain */ |
102 | }; |
103 | |
104 | struct Match { |
105 | int distance, len; |
106 | }; |
107 | |
108 | struct LZ77InternalContext { |
109 | struct WindowEntry win[WINSIZE]; |
110 | unsigned char data[WINSIZE]; |
111 | int winpos; |
112 | struct HashEntry hashtab[HASHMAX]; |
113 | unsigned char pending[HASHCHARS]; |
114 | int npending; |
115 | }; |
116 | |
117 | static int lz77_hash(unsigned char *data) { |
118 | return (257*data[0] + 263*data[1] + 269*data[2]) % HASHMAX; |
119 | } |
120 | |
121 | static int lz77_init(struct LZ77Context *ctx) { |
122 | struct LZ77InternalContext *st; |
123 | int i; |
124 | |
dcbde236 |
125 | st = (struct LZ77InternalContext *)smalloc(sizeof(*st)); |
4ba9b64b |
126 | if (!st) |
127 | return 0; |
128 | |
129 | ctx->ictx = st; |
130 | |
131 | for (i = 0; i < WINSIZE; i++) |
132 | st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID; |
133 | for (i = 0; i < HASHMAX; i++) |
134 | st->hashtab[i].first = INVALID; |
135 | st->winpos = 0; |
136 | |
137 | st->npending = 0; |
138 | |
139 | return 1; |
140 | } |
141 | |
142 | static void lz77_advance(struct LZ77InternalContext *st, |
143 | unsigned char c, int hash) { |
144 | int off; |
145 | |
146 | /* |
147 | * Remove the hash entry at winpos from the tail of its chain, |
148 | * or empty the chain if it's the only thing on the chain. |
149 | */ |
150 | if (st->win[st->winpos].prev != INVALID) { |
151 | st->win[st->win[st->winpos].prev].next = INVALID; |
152 | } else if (st->win[st->winpos].hashval != INVALID) { |
153 | st->hashtab[st->win[st->winpos].hashval].first = INVALID; |
154 | } |
155 | |
156 | /* |
157 | * Create a new entry at winpos and add it to the head of its |
158 | * hash chain. |
159 | */ |
160 | st->win[st->winpos].hashval = hash; |
161 | st->win[st->winpos].prev = INVALID; |
162 | off = st->win[st->winpos].next = st->hashtab[hash].first; |
163 | st->hashtab[hash].first = st->winpos; |
164 | if (off != INVALID) |
165 | st->win[off].prev = st->winpos; |
166 | st->data[st->winpos] = c; |
167 | |
168 | /* |
169 | * Advance the window pointer. |
170 | */ |
171 | st->winpos = (st->winpos + 1) & (WINSIZE-1); |
172 | } |
173 | |
174 | #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] ) |
175 | |
176 | static void lz77_compress(struct LZ77Context *ctx, |
177 | unsigned char *data, int len) { |
178 | struct LZ77InternalContext *st = ctx->ictx; |
179 | int i, hash, distance, off, nmatch, matchlen, advance; |
180 | struct Match defermatch, matches[MAXMATCH]; |
181 | int deferchr; |
182 | |
183 | /* |
184 | * Add any pending characters from last time to the window. (We |
185 | * might not be able to.) |
186 | */ |
187 | for (i = 0; i < st->npending; i++) { |
188 | unsigned char foo[HASHCHARS]; |
189 | int j; |
190 | if (len + st->npending - i < HASHCHARS) { |
191 | /* Update the pending array. */ |
192 | for (j = i; j < st->npending; j++) |
193 | st->pending[j-i] = st->pending[j]; |
194 | break; |
195 | } |
196 | for (j = 0; j < HASHCHARS; j++) |
197 | foo[j] = (i + j < st->npending ? st->pending[i+j] : |
198 | data[i + j - st->npending]); |
199 | lz77_advance(st, foo[0], lz77_hash(foo)); |
200 | } |
201 | st->npending -= i; |
202 | |
203 | defermatch.len = 0; |
204 | while (len > 0) { |
205 | |
206 | if (len >= HASHCHARS) { |
207 | /* |
208 | * Hash the next few characters. |
209 | */ |
210 | hash = lz77_hash(data); |
211 | |
212 | /* |
213 | * Look the hash up in the corresponding hash chain and see |
214 | * what we can find. |
215 | */ |
216 | nmatch = 0; |
217 | for (off = st->hashtab[hash].first; |
218 | off != INVALID; off = st->win[off].next) { |
219 | /* distance = 1 if off == st->winpos-1 */ |
220 | /* distance = WINSIZE if off == st->winpos */ |
221 | distance = WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE; |
222 | for (i = 0; i < HASHCHARS; i++) |
223 | if (CHARAT(i) != CHARAT(i-distance)) |
224 | break; |
225 | if (i == HASHCHARS) { |
226 | matches[nmatch].distance = distance; |
227 | matches[nmatch].len = 3; |
228 | if (++nmatch >= MAXMATCH) |
229 | break; |
230 | } |
231 | } |
232 | } else { |
233 | nmatch = 0; |
234 | hash = INVALID; |
235 | } |
236 | |
237 | if (nmatch > 0) { |
238 | /* |
239 | * We've now filled up matches[] with nmatch potential |
240 | * matches. Follow them down to find the longest. (We |
241 | * assume here that it's always worth favouring a |
242 | * longer match over a shorter one.) |
243 | */ |
244 | matchlen = HASHCHARS; |
245 | while (matchlen < len) { |
246 | int j; |
247 | for (i = j = 0; i < nmatch; i++) { |
248 | if (CHARAT(matchlen) == |
249 | CHARAT(matchlen - matches[i].distance)) { |
250 | matches[j++] = matches[i]; |
251 | } |
252 | } |
253 | if (j == 0) |
254 | break; |
255 | matchlen++; |
256 | nmatch = j; |
257 | } |
258 | |
259 | /* |
260 | * We've now got all the longest matches. We favour the |
261 | * shorter distances, which means we go with matches[0]. |
262 | * So see if we want to defer it or throw it away. |
263 | */ |
264 | matches[0].len = matchlen; |
265 | if (defermatch.len > 0) { |
266 | if (matches[0].len > defermatch.len + 1) { |
267 | /* We have a better match. Emit the deferred char, |
268 | * and defer this match. */ |
269 | ctx->literal(ctx, (unsigned char)deferchr); |
270 | defermatch = matches[0]; |
271 | deferchr = data[0]; |
272 | advance = 1; |
273 | } else { |
274 | /* We don't have a better match. Do the deferred one. */ |
275 | ctx->match(ctx, defermatch.distance, defermatch.len); |
276 | advance = defermatch.len - 1; |
277 | defermatch.len = 0; |
278 | } |
279 | } else { |
280 | /* There was no deferred match. Defer this one. */ |
281 | defermatch = matches[0]; |
282 | deferchr = data[0]; |
283 | advance = 1; |
284 | } |
285 | } else { |
286 | /* |
287 | * We found no matches. Emit the deferred match, if |
288 | * any; otherwise emit a literal. |
289 | */ |
290 | if (defermatch.len > 0) { |
291 | ctx->match(ctx, defermatch.distance, defermatch.len); |
292 | advance = defermatch.len - 1; |
293 | defermatch.len = 0; |
294 | } else { |
295 | ctx->literal(ctx, data[0]); |
296 | advance = 1; |
297 | } |
298 | } |
299 | |
300 | /* |
301 | * Now advance the position by `advance' characters, |
302 | * keeping the window and hash chains consistent. |
303 | */ |
304 | while (advance > 0) { |
305 | if (len >= HASHCHARS) { |
306 | lz77_advance(st, *data, lz77_hash(data)); |
307 | } else { |
308 | st->pending[st->npending++] = *data; |
309 | } |
310 | data++; |
311 | len--; |
312 | advance--; |
313 | } |
314 | } |
315 | } |
316 | |
317 | /* ---------------------------------------------------------------------- |
318 | * Zlib compression. We always use the static Huffman tree option. |
319 | * Mostly this is because it's hard to scan a block in advance to |
320 | * work out better trees; dynamic trees are great when you're |
321 | * compressing a large file under no significant time constraint, |
322 | * but when you're compressing little bits in real time, things get |
323 | * hairier. |
324 | * |
325 | * I suppose it's possible that I could compute Huffman trees based |
326 | * on the frequencies in the _previous_ block, as a sort of |
327 | * heuristic, but I'm not confident that the gain would balance out |
328 | * having to transmit the trees. |
329 | */ |
330 | |
331 | static struct LZ77Context ectx; |
332 | |
333 | struct Outbuf { |
334 | unsigned char *outbuf; |
335 | int outlen, outsize; |
336 | unsigned long outbits; |
337 | int noutbits; |
338 | int firstblock; |
339 | }; |
340 | |
341 | static void outbits(struct Outbuf *out, unsigned long bits, int nbits) { |
342 | assert(out->noutbits + nbits <= 32); |
343 | out->outbits |= bits << out->noutbits; |
344 | out->noutbits += nbits; |
345 | while (out->noutbits >= 8) { |
346 | if (out->outlen >= out->outsize) { |
347 | out->outsize = out->outlen + 64; |
dcbde236 |
348 | out->outbuf = srealloc(out->outbuf, out->outsize); |
4ba9b64b |
349 | } |
350 | out->outbuf[out->outlen++] = (unsigned char)(out->outbits & 0xFF); |
351 | out->outbits >>= 8; |
352 | out->noutbits -= 8; |
353 | } |
354 | } |
355 | |
356 | static const unsigned char mirrorbytes[256] = { |
357 | 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, |
358 | 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, |
359 | 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, |
360 | 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, |
361 | 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, |
362 | 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, |
363 | 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, |
364 | 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, |
365 | 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, |
366 | 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, |
367 | 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, |
368 | 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, |
369 | 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, |
370 | 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, |
371 | 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, |
372 | 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, |
373 | 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, |
374 | 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, |
375 | 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, |
376 | 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, |
377 | 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, |
378 | 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, |
379 | 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, |
380 | 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, |
381 | 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, |
382 | 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, |
383 | 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, |
384 | 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, |
385 | 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, |
386 | 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, |
387 | 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, |
388 | 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, |
389 | }; |
390 | |
391 | typedef struct { |
392 | int code, extrabits, min, max; |
393 | } coderecord; |
394 | |
395 | static const coderecord lencodes[] = { |
396 | {257, 0, 3,3}, |
397 | {258, 0, 4,4}, |
398 | {259, 0, 5,5}, |
399 | {260, 0, 6,6}, |
400 | {261, 0, 7,7}, |
401 | {262, 0, 8,8}, |
402 | {263, 0, 9,9}, |
403 | {264, 0, 10,10}, |
404 | {265, 1, 11,12}, |
405 | {266, 1, 13,14}, |
406 | {267, 1, 15,16}, |
407 | {268, 1, 17,18}, |
408 | {269, 2, 19,22}, |
409 | {270, 2, 23,26}, |
410 | {271, 2, 27,30}, |
411 | {272, 2, 31,34}, |
412 | {273, 3, 35,42}, |
413 | {274, 3, 43,50}, |
414 | {275, 3, 51,58}, |
415 | {276, 3, 59,66}, |
416 | {277, 4, 67,82}, |
417 | {278, 4, 83,98}, |
418 | {279, 4, 99,114}, |
419 | {280, 4, 115,130}, |
420 | {281, 5, 131,162}, |
421 | {282, 5, 163,194}, |
422 | {283, 5, 195,226}, |
423 | {284, 5, 227,257}, |
424 | {285, 0, 258,258}, |
425 | }; |
426 | |
427 | static const coderecord distcodes[] = { |
428 | {0, 0, 1,1}, |
429 | {1, 0, 2,2}, |
430 | {2, 0, 3,3}, |
431 | {3, 0, 4,4}, |
432 | {4, 1, 5,6}, |
433 | {5, 1, 7,8}, |
434 | {6, 2, 9,12}, |
435 | {7, 2, 13,16}, |
436 | {8, 3, 17,24}, |
437 | {9, 3, 25,32}, |
438 | {10, 4, 33,48}, |
439 | {11, 4, 49,64}, |
440 | {12, 5, 65,96}, |
441 | {13, 5, 97,128}, |
442 | {14, 6, 129,192}, |
443 | {15, 6, 193,256}, |
444 | {16, 7, 257,384}, |
445 | {17, 7, 385,512}, |
446 | {18, 8, 513,768}, |
447 | {19, 8, 769,1024}, |
448 | {20, 9, 1025,1536}, |
449 | {21, 9, 1537,2048}, |
450 | {22, 10, 2049,3072}, |
451 | {23, 10, 3073,4096}, |
452 | {24, 11, 4097,6144}, |
453 | {25, 11, 6145,8192}, |
454 | {26, 12, 8193,12288}, |
455 | {27, 12, 12289,16384}, |
456 | {28, 13, 16385,24576}, |
457 | {29, 13, 24577,32768}, |
458 | }; |
459 | |
460 | static void zlib_literal(struct LZ77Context *ectx, unsigned char c) { |
461 | struct Outbuf *out = (struct Outbuf *)ectx->userdata; |
462 | |
463 | if (c <= 143) { |
464 | /* 0 through 143 are 8 bits long starting at 00110000. */ |
465 | outbits(out, mirrorbytes[0x30 + c], 8); |
466 | } else { |
467 | /* 144 through 255 are 9 bits long starting at 110010000. */ |
468 | outbits(out, 1 + 2*mirrorbytes[0x90 - 144 + c], 9); |
469 | } |
470 | } |
471 | |
472 | static void zlib_match(struct LZ77Context *ectx, int distance, int len) { |
473 | const coderecord *d, *l; |
474 | int i, j, k; |
475 | struct Outbuf *out = (struct Outbuf *)ectx->userdata; |
476 | while (len > 0) { |
477 | int thislen; |
478 | |
479 | /* |
480 | * We can transmit matches of lengths 3 through 258 |
481 | * inclusive. So if len exceeds 258, we must transmit in |
482 | * several steps, with 258 or less in each step. |
483 | * |
484 | * Specifically: if len >= 261, we can transmit 258 and be |
485 | * sure of having at least 3 left for the next step. And if |
486 | * len <= 258, we can just transmit len. But if len == 259 |
487 | * or 260, we must transmit len-3. |
488 | */ |
489 | thislen = (len > 260 ? 258 : len <= 258 ? len : len-3); |
490 | len -= thislen; |
491 | |
492 | /* |
493 | * Binary-search to find which length code we're |
494 | * transmitting. |
495 | */ |
496 | i = -1; j = sizeof(lencodes)/sizeof(*lencodes); |
497 | while (j - i >= 2) { |
498 | k = (j+i)/2; |
499 | if (thislen < lencodes[k].min) |
500 | j = k; |
501 | else if (thislen > lencodes[k].max) |
502 | i = k; |
503 | else { |
504 | l = &lencodes[k]; |
505 | break; /* found it! */ |
506 | } |
507 | } |
508 | |
509 | /* |
510 | * Transmit the length code. 256-279 are seven bits |
511 | * starting at 0000000; 280-287 are eight bits starting at |
512 | * 11000000. |
513 | */ |
514 | if (l->code <= 279) { |
515 | outbits(out, mirrorbytes[(l->code-256)*2], 7); |
516 | } else { |
517 | outbits(out, mirrorbytes[0xc0 - 280 + l->code], 8); |
518 | } |
519 | |
520 | /* |
521 | * Transmit the extra bits. |
522 | */ |
523 | if (l->extrabits) |
524 | outbits(out, thislen - l->min, l->extrabits); |
525 | |
526 | /* |
527 | * Binary-search to find which distance code we're |
528 | * transmitting. |
529 | */ |
530 | i = -1; j = sizeof(distcodes)/sizeof(*distcodes); |
531 | while (j - i >= 2) { |
532 | k = (j+i)/2; |
533 | if (distance < distcodes[k].min) |
534 | j = k; |
535 | else if (distance > distcodes[k].max) |
536 | i = k; |
537 | else { |
538 | d = &distcodes[k]; |
539 | break; /* found it! */ |
540 | } |
541 | } |
542 | |
543 | /* |
544 | * Transmit the distance code. Five bits starting at 00000. |
545 | */ |
546 | outbits(out, mirrorbytes[d->code*8], 5); |
547 | |
548 | /* |
549 | * Transmit the extra bits. |
550 | */ |
551 | if (d->extrabits) |
552 | outbits(out, distance - d->min, d->extrabits); |
553 | } |
554 | } |
555 | |
556 | void zlib_compress_init(void) { |
557 | struct Outbuf *out; |
558 | |
559 | lz77_init(&ectx); |
560 | ectx.literal = zlib_literal; |
561 | ectx.match = zlib_match; |
562 | |
dcbde236 |
563 | out = smalloc(sizeof(struct Outbuf)); |
4ba9b64b |
564 | out->outbits = out->noutbits = 0; |
565 | out->firstblock = 1; |
566 | ectx.userdata = out; |
567 | |
568 | logevent("Initialised zlib (RFC1950) compression"); |
569 | } |
570 | |
571 | int zlib_compress_block(unsigned char *block, int len, |
572 | unsigned char **outblock, int *outlen) { |
573 | struct Outbuf *out = (struct Outbuf *)ectx.userdata; |
574 | |
575 | out->outbuf = NULL; |
576 | out->outlen = out->outsize = 0; |
577 | |
578 | /* |
579 | * If this is the first block, output the Zlib (RFC1950) header |
580 | * bytes 78 9C. (Deflate compression, 32K window size, default |
581 | * algorithm.) |
582 | */ |
583 | if (out->firstblock) { |
584 | outbits(out, 0x9C78, 16); |
585 | out->firstblock = 0; |
586 | /* |
587 | * Start a Deflate (RFC1951) fixed-trees block. We transmit |
588 | * a zero bit (BFINAL=0), followed by a zero bit and a one |
589 | * bit (BTYPE=01). Of course these are in the wrong order |
590 | * (01 0). |
591 | */ |
592 | outbits(out, 2, 3); |
593 | } |
594 | |
595 | /* |
596 | * Do the compression. |
597 | */ |
598 | lz77_compress(&ectx, block, len); |
599 | /* |
600 | * End the block (by transmitting code 256, which is 0000000 in |
601 | * fixed-tree mode), and transmit some empty blocks to ensure |
602 | * we have emitted the byte containing the last piece of |
603 | * genuine data. There are three ways we can do this: |
604 | * |
605 | * - Minimal flush. Output end-of-block and then open a new |
606 | * static block. This takes 9 bits, which is guaranteed to |
607 | * flush out the last genuine code in the closed block; but |
608 | * allegedly zlib can't handle it. |
609 | * |
610 | * - Zlib partial flush. Output EOB, open and close an empty |
611 | * static block, and _then_ open the new block. This is the |
612 | * best zlib can handle. |
613 | * |
614 | * - Zlib sync flush. Output EOB, then an empty _uncompressed_ |
615 | * block (000, then sync to byte boundary, then send bytes |
616 | * 00 00 FF FF). Then open the new block. |
617 | * |
618 | * For the moment, we will use Zlib partial flush. |
619 | */ |
620 | outbits(out, 0, 7); /* close block */ |
621 | outbits(out, 2, 3+7); /* empty static block */ |
622 | outbits(out, 2, 3); /* open new block */ |
623 | |
624 | *outblock = out->outbuf; |
625 | *outlen = out->outlen; |
626 | |
627 | return 1; |
628 | } |
629 | |
630 | /* ---------------------------------------------------------------------- |
631 | * Zlib decompression. Of course, even though our compressor always |
632 | * uses static trees, our _decompressor_ has to be capable of |
633 | * handling dynamic trees if it sees them. |
634 | */ |
635 | |
636 | /* |
637 | * The way we work the Huffman decode is to have a table lookup on |
638 | * the first N bits of the input stream (in the order they arrive, |
639 | * of course, i.e. the first bit of the Huffman code is in bit 0). |
640 | * Each table entry lists the number of bits to consume, plus |
641 | * either an output code or a pointer to a secondary table. |
642 | */ |
643 | struct zlib_table; |
644 | struct zlib_tableentry; |
645 | |
646 | struct zlib_tableentry { |
647 | unsigned char nbits; |
648 | int code; |
649 | struct zlib_table *nexttable; |
650 | }; |
651 | |
652 | struct zlib_table { |
653 | int mask; /* mask applied to input bit stream */ |
654 | struct zlib_tableentry *table; |
655 | }; |
656 | |
657 | #define MAXCODELEN 16 |
658 | #define MAXSYMS 288 |
659 | |
660 | /* |
661 | * Build a single-level decode table for elements |
662 | * [minlength,maxlength) of the provided code/length tables, and |
663 | * recurse to build subtables. |
664 | */ |
665 | static struct zlib_table *zlib_mkonetab(int *codes, unsigned char *lengths, |
666 | int nsyms, |
667 | int pfx, int pfxbits, int bits) { |
dcbde236 |
668 | struct zlib_table *tab = smalloc(sizeof(struct zlib_table)); |
4ba9b64b |
669 | int pfxmask = (1 << pfxbits) - 1; |
670 | int nbits, i, j, code; |
671 | |
dcbde236 |
672 | tab->table = smalloc((1 << bits) * sizeof(struct zlib_tableentry)); |
4ba9b64b |
673 | tab->mask = (1 << bits) - 1; |
674 | |
675 | for (code = 0; code <= tab->mask; code++) { |
676 | tab->table[code].code = -1; |
677 | tab->table[code].nbits = 0; |
678 | tab->table[code].nexttable = NULL; |
679 | } |
680 | |
681 | for (i = 0; i < nsyms; i++) { |
682 | if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx) |
683 | continue; |
684 | code = (codes[i] >> pfxbits) & tab->mask; |
685 | for (j = code; j <= tab->mask; j += 1 << (lengths[i]-pfxbits)) { |
686 | tab->table[j].code = i; |
687 | nbits = lengths[i] - pfxbits; |
688 | if (tab->table[j].nbits < nbits) |
689 | tab->table[j].nbits = nbits; |
690 | } |
691 | } |
692 | for (code = 0; code <= tab->mask; code++) { |
693 | if (tab->table[code].nbits <= bits) |
694 | continue; |
695 | /* Generate a subtable. */ |
696 | tab->table[code].code = -1; |
697 | nbits = tab->table[code].nbits - bits; |
698 | if (nbits > 7) |
699 | nbits = 7; |
700 | tab->table[code].nbits = bits; |
701 | tab->table[code].nexttable = zlib_mkonetab(codes, lengths, nsyms, |
702 | pfx | (code << pfxbits), |
703 | pfxbits + bits, nbits); |
704 | } |
705 | |
706 | return tab; |
707 | } |
708 | |
709 | /* |
710 | * Build a decode table, given a set of Huffman tree lengths. |
711 | */ |
712 | static struct zlib_table *zlib_mktable(unsigned char *lengths, int nlengths) { |
713 | int count[MAXCODELEN], startcode[MAXCODELEN], codes[MAXSYMS]; |
714 | int code, maxlen; |
715 | int i, j; |
716 | |
717 | /* Count the codes of each length. */ |
718 | maxlen = 0; |
719 | for (i = 1; i < MAXCODELEN; i++) count[i] = 0; |
720 | for (i = 0; i < nlengths; i++) { |
721 | count[lengths[i]]++; |
722 | if (maxlen < lengths[i]) |
723 | maxlen = lengths[i]; |
724 | } |
725 | /* Determine the starting code for each length block. */ |
726 | code = 0; |
727 | for (i = 1; i < MAXCODELEN; i++) { |
728 | startcode[i] = code; |
729 | code += count[i]; |
730 | code <<= 1; |
731 | } |
732 | /* Determine the code for each symbol. Mirrored, of course. */ |
733 | for (i = 0; i < nlengths; i++) { |
734 | code = startcode[lengths[i]]++; |
735 | codes[i] = 0; |
736 | for (j = 0; j < lengths[i]; j++) { |
737 | codes[i] = (codes[i] << 1) | (code & 1); |
738 | code >>= 1; |
739 | } |
740 | } |
741 | |
742 | /* |
743 | * Now we have the complete list of Huffman codes. Build a |
744 | * table. |
745 | */ |
746 | return zlib_mkonetab(codes, lengths, nlengths, 0, 0, |
747 | maxlen < 9 ? maxlen : 9); |
748 | } |
749 | |
dda46d54 |
750 | static int zlib_freetable(struct zlib_table ** ztab) { |
751 | struct zlib_table *tab; |
752 | int code; |
753 | |
754 | if (ztab == NULL) |
755 | return -1; |
756 | |
757 | if (*ztab == NULL) |
758 | return 0; |
759 | |
760 | tab = *ztab; |
761 | |
762 | for (code = 0; code <= tab->mask; code++) |
763 | if (tab->table[code].nexttable != NULL) |
764 | zlib_freetable(&tab->table[code].nexttable); |
765 | |
766 | sfree(tab->table); |
767 | tab->table = NULL; |
768 | |
769 | sfree(tab); |
770 | *ztab = NULL; |
771 | |
772 | return(0); |
773 | } |
774 | |
4ba9b64b |
775 | static struct zlib_decompress_ctx { |
776 | struct zlib_table *staticlentable, *staticdisttable; |
777 | struct zlib_table *currlentable, *currdisttable, *lenlentable; |
778 | enum { |
779 | START, OUTSIDEBLK, |
780 | TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP, |
781 | INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM, |
782 | UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA |
783 | } state; |
784 | int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, lenrep; |
785 | int uncomplen; |
786 | unsigned char lenlen[19]; |
787 | unsigned char lengths[286+32]; |
788 | unsigned long bits; |
789 | int nbits; |
790 | unsigned char window[WINSIZE]; |
791 | int winpos; |
792 | unsigned char *outblk; |
793 | int outlen, outsize; |
794 | } dctx; |
795 | |
796 | void zlib_decompress_init(void) { |
797 | unsigned char lengths[288]; |
798 | memset(lengths, 8, 144); |
799 | memset(lengths+144, 9, 256-144); |
800 | memset(lengths+256, 7, 280-256); |
801 | memset(lengths+280, 8, 288-280); |
802 | dctx.staticlentable = zlib_mktable(lengths, 288); |
803 | memset(lengths, 5, 32); |
804 | dctx.staticdisttable = zlib_mktable(lengths, 32); |
805 | dctx.state = START; /* even before header */ |
dcbde236 |
806 | dctx.currlentable = dctx.currdisttable = dctx.lenlentable = NULL; |
4ba9b64b |
807 | dctx.bits = 0; |
808 | dctx.nbits = 0; |
809 | logevent("Initialised zlib (RFC1950) decompression"); |
810 | } |
811 | |
812 | int zlib_huflookup(unsigned long *bitsp, int *nbitsp, struct zlib_table *tab) { |
813 | unsigned long bits = *bitsp; |
814 | int nbits = *nbitsp; |
815 | while (1) { |
816 | struct zlib_tableentry *ent; |
817 | ent = &tab->table[bits & tab->mask]; |
818 | if (ent->nbits > nbits) |
819 | return -1; /* not enough data */ |
820 | bits >>= ent->nbits; |
821 | nbits -= ent->nbits; |
822 | if (ent->code == -1) |
823 | tab = ent->nexttable; |
824 | else { |
825 | *bitsp = bits; |
826 | *nbitsp = nbits; |
827 | return ent->code; |
828 | } |
829 | } |
830 | } |
831 | |
832 | static void zlib_emit_char(int c) { |
833 | dctx.window[dctx.winpos] = c; |
834 | dctx.winpos = (dctx.winpos + 1) & (WINSIZE-1); |
835 | if (dctx.outlen >= dctx.outsize) { |
836 | dctx.outsize = dctx.outlen + 512; |
dcbde236 |
837 | dctx.outblk = srealloc(dctx.outblk, dctx.outsize); |
4ba9b64b |
838 | } |
839 | dctx.outblk[dctx.outlen++] = c; |
840 | } |
841 | |
842 | #define EATBITS(n) ( dctx.nbits -= (n), dctx.bits >>= (n) ) |
843 | |
844 | int zlib_decompress_block(unsigned char *block, int len, |
845 | unsigned char **outblock, int *outlen) { |
846 | const coderecord *rec; |
847 | int code, blktype, rep, dist, nlen; |
848 | static const unsigned char lenlenmap[] = { |
849 | 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 |
850 | }; |
851 | |
852 | dctx.outblk = NULL; |
853 | dctx.outsize = dctx.outlen = 0; |
854 | |
855 | while (len > 0 || dctx.nbits > 0) { |
856 | while (dctx.nbits < 24 && len > 0) { |
857 | dctx.bits |= (*block++) << dctx.nbits; |
858 | dctx.nbits += 8; |
859 | len--; |
860 | } |
861 | switch (dctx.state) { |
862 | case START: |
863 | /* Expect 16-bit zlib header, which we'll dishonourably ignore. */ |
864 | if (dctx.nbits < 16) |
865 | goto finished; /* done all we can */ |
866 | EATBITS(16); |
867 | dctx.state = OUTSIDEBLK; |
868 | break; |
869 | case OUTSIDEBLK: |
870 | /* Expect 3-bit block header. */ |
871 | if (dctx.nbits < 3) |
872 | goto finished; /* done all we can */ |
873 | EATBITS(1); |
874 | blktype = dctx.bits & 3; |
875 | EATBITS(2); |
876 | if (blktype == 0) { |
877 | int to_eat = dctx.nbits & 7; |
878 | dctx.state = UNCOMP_LEN; |
879 | EATBITS(to_eat); /* align to byte boundary */ |
880 | } else if (blktype == 1) { |
881 | dctx.currlentable = dctx.staticlentable; |
882 | dctx.currdisttable = dctx.staticdisttable; |
883 | dctx.state = INBLK; |
884 | } else if (blktype == 2) { |
885 | dctx.state = TREES_HDR; |
886 | } |
887 | break; |
888 | case TREES_HDR: |
889 | /* |
890 | * Dynamic block header. Five bits of HLIT, five of |
891 | * HDIST, four of HCLEN. |
892 | */ |
893 | if (dctx.nbits < 5+5+4) |
894 | goto finished; /* done all we can */ |
895 | dctx.hlit = 257 + (dctx.bits & 31); EATBITS(5); |
896 | dctx.hdist = 1 + (dctx.bits & 31); EATBITS(5); |
897 | dctx.hclen = 4 + (dctx.bits & 15); EATBITS(4); |
898 | dctx.lenptr = 0; |
899 | dctx.state = TREES_LENLEN; |
900 | memset(dctx.lenlen, 0, sizeof(dctx.lenlen)); |
901 | break; |
902 | case TREES_LENLEN: |
903 | if (dctx.nbits < 3) |
904 | goto finished; |
905 | while (dctx.lenptr < dctx.hclen && dctx.nbits >= 3) { |
906 | dctx.lenlen[lenlenmap[dctx.lenptr++]] = |
907 | (unsigned char)(dctx.bits & 7); |
908 | EATBITS(3); |
909 | } |
910 | if (dctx.lenptr == dctx.hclen) { |
911 | dctx.lenlentable = zlib_mktable(dctx.lenlen, 19); |
912 | dctx.state = TREES_LEN; |
913 | dctx.lenptr = 0; |
914 | } |
915 | break; |
916 | case TREES_LEN: |
917 | if (dctx.lenptr >= dctx.hlit+dctx.hdist) { |
918 | dctx.currlentable = zlib_mktable(dctx.lengths, dctx.hlit); |
919 | dctx.currdisttable = zlib_mktable(dctx.lengths + dctx.hlit, |
920 | dctx.hdist); |
dda46d54 |
921 | zlib_freetable(&dctx.lenlentable); |
922 | dctx.state = INBLK; |
4ba9b64b |
923 | break; |
924 | } |
925 | code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.lenlentable); |
926 | if (code == -1) |
927 | goto finished; |
928 | if (code < 16) |
929 | dctx.lengths[dctx.lenptr++] = code; |
930 | else { |
931 | dctx.lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7); |
932 | dctx.lenaddon = (code == 18 ? 11 : 3); |
933 | dctx.lenrep = (code == 16 && dctx.lenptr > 0 ? |
934 | dctx.lengths[dctx.lenptr-1] : 0); |
935 | dctx.state = TREES_LENREP; |
936 | } |
937 | break; |
938 | case TREES_LENREP: |
939 | if (dctx.nbits < dctx.lenextrabits) |
940 | goto finished; |
941 | rep = dctx.lenaddon + (dctx.bits & ((1<<dctx.lenextrabits)-1)); |
942 | EATBITS(dctx.lenextrabits); |
943 | while (rep > 0 && dctx.lenptr < dctx.hlit+dctx.hdist) { |
944 | dctx.lengths[dctx.lenptr] = dctx.lenrep; |
945 | dctx.lenptr++; |
946 | rep--; |
947 | } |
948 | dctx.state = TREES_LEN; |
949 | break; |
950 | case INBLK: |
951 | code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.currlentable); |
952 | if (code == -1) |
953 | goto finished; |
954 | if (code < 256) |
955 | zlib_emit_char(code); |
956 | else if (code == 256) { |
957 | dctx.state = OUTSIDEBLK; |
dda46d54 |
958 | if (dctx.currlentable != dctx.staticlentable) |
959 | zlib_freetable(&dctx.currlentable); |
960 | if (dctx.currdisttable != dctx.staticdisttable) |
961 | zlib_freetable(&dctx.currdisttable); |
4ba9b64b |
962 | } else if (code < 286) { /* static tree can give >285; ignore */ |
963 | dctx.state = GOTLENSYM; |
964 | dctx.sym = code; |
965 | } |
966 | break; |
967 | case GOTLENSYM: |
968 | rec = &lencodes[dctx.sym - 257]; |
969 | if (dctx.nbits < rec->extrabits) |
970 | goto finished; |
971 | dctx.len = rec->min + (dctx.bits & ((1<<rec->extrabits)-1)); |
972 | EATBITS(rec->extrabits); |
973 | dctx.state = GOTLEN; |
974 | break; |
975 | case GOTLEN: |
976 | code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.currdisttable); |
977 | if (code == -1) |
978 | goto finished; |
979 | dctx.state = GOTDISTSYM; |
980 | dctx.sym = code; |
981 | break; |
982 | case GOTDISTSYM: |
983 | rec = &distcodes[dctx.sym]; |
984 | if (dctx.nbits < rec->extrabits) |
985 | goto finished; |
986 | dist = rec->min + (dctx.bits & ((1<<rec->extrabits)-1)); |
987 | EATBITS(rec->extrabits); |
988 | dctx.state = INBLK; |
989 | while (dctx.len--) |
990 | zlib_emit_char(dctx.window[(dctx.winpos-dist) & (WINSIZE-1)]); |
991 | break; |
992 | case UNCOMP_LEN: |
993 | /* |
994 | * Uncompressed block. We expect to see a 16-bit LEN. |
995 | */ |
996 | if (dctx.nbits < 16) |
997 | goto finished; |
998 | dctx.uncomplen = dctx.bits & 0xFFFF; |
999 | EATBITS(16); |
1000 | dctx.state = UNCOMP_NLEN; |
1001 | break; |
1002 | case UNCOMP_NLEN: |
1003 | /* |
1004 | * Uncompressed block. We expect to see a 16-bit NLEN, |
1005 | * which should be the one's complement of the previous |
1006 | * LEN. |
1007 | */ |
1008 | if (dctx.nbits < 16) |
1009 | goto finished; |
1010 | nlen = dctx.bits & 0xFFFF; |
1011 | EATBITS(16); |
1012 | dctx.state = UNCOMP_DATA; |
1013 | break; |
1014 | case UNCOMP_DATA: |
1015 | if (dctx.nbits < 8) |
1016 | goto finished; |
1017 | zlib_emit_char(dctx.bits & 0xFF); |
1018 | EATBITS(8); |
1019 | if (--dctx.uncomplen == 0) |
1020 | dctx.state = OUTSIDEBLK; /* end of uncompressed block */ |
1021 | break; |
1022 | } |
1023 | } |
1024 | |
1025 | finished: |
1026 | *outblock = dctx.outblk; |
1027 | *outlen = dctx.outlen; |
1028 | |
1029 | return 1; |
1030 | } |
1031 | |
1032 | const struct ssh_compress ssh_zlib = { |
1033 | "zlib", |
1034 | zlib_compress_init, |
1035 | zlib_compress_block, |
1036 | zlib_decompress_init, |
1037 | zlib_decompress_block |
1038 | }; |