git.distorted.org.uk Git - u/mdw/putty/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Zlib (RFC1950 / RFC1951) compression for PuTTY.
	3	*
	4	* There will no doubt be criticism of my decision to reimplement
	5	* Zlib compression from scratch instead of using the existing zlib
	6	* code. People will cry `reinventing the wheel'; they'll claim
	7	* that the `fundamental basis of OSS' is code reuse; they'll want
	8	* to see a really good reason for me having chosen not to use the
	9	* existing code.
	10	*
	11	* Well, here are my reasons. Firstly, I don't want to link the
	12	* whole of zlib into the PuTTY binary; PuTTY is justifiably proud
	13	* of its small size and I think zlib contains a lot of unnecessary
	14	* baggage for the kind of compression that SSH requires.
	15	*
	16	* Secondly, I also don't like the alternative of using zlib.dll.
	17	* Another thing PuTTY is justifiably proud of is its ease of
	18	* installation, and the last thing I want to do is to start
	19	* mandating DLLs. Not only that, but there are two _kinds_ of
	20	* zlib.dll kicking around, one with C calling conventions on the
	21	* exported functions and another with WINAPI conventions, and
	22	* there would be a significant danger of getting the wrong one.
	23	*
	24	* Thirdly, there seems to be a difference of opinion on the IETF
	25	* secsh mailing list about the correct way to round off a
	26	* compressed packet and start the next. In particular, there's
	27	* some talk of switching to a mechanism zlib isn't currently
	28	* capable of supporting (see below for an explanation). Given that
	29	* sort of uncertainty, I thought it might be better to have code
	30	* that will support even the zlib-incompatible worst case.
	31	*
	32	* Fourthly, it's a _second implementation_. Second implementations
	33	* are fundamentally a Good Thing in standardisation efforts. The
	34	* difference of opinion mentioned above has arisen _precisely_
	35	* because there has been only one zlib implementation and
	36	* everybody has used it. I don't intend that this should happen
	37	* again.
	38	*/
	39
	40	#include <stdlib.h>
	41	#include <assert.h>
	42
	43	/* FIXME */
	44	#include <windows.h>
	45	#include <stdio.h>
	46	#include "putty.h"
	47
	48	#include "ssh.h"
	49
	50	/* ----------------------------------------------------------------------
	51	* Basic LZ77 code. This bit is designed modularly, so it could be
	52	* ripped out and used in a different LZ77 compressor. Go to it,
	53	* and good luck :-)
	54	*/
	55
	56	struct LZ77InternalContext;
	57	struct LZ77Context {
	58	struct LZ77InternalContext *ictx;
	59	void *userdata;
	60	void (literal)(struct LZ77Context ctx, unsigned char c);
	61	void (match)(struct LZ77Context ctx, int distance, int len);
	62	};
	63
	64	/*
	65	* Initialise the private fields of an LZ77Context. It's up to the
	66	* user to initialise the public fields.
	67	*/
	68	static int lz77_init(struct LZ77Context *ctx);
	69
	70	/*
	71	* Supply data to be compressed. Will update the private fields of
	72	* the LZ77Context, and will call literal() and match() to output.
	73	*/
	74	static void lz77_compress(struct LZ77Context *ctx,
	75	unsigned char *data, int len);
	76
	77	/*
	78	* Modifiable parameters.
	79	*/
	80	#define WINSIZE 32768 /* window size. Must be power of 2! */
	81	#define HASHMAX 2039 /* one more than max hash value */
	82	#define MAXMATCH 32 /* how many matches we track */
	83	#define HASHCHARS 3 /* how many chars make a hash */
	84
	85	/*
	86	* This compressor takes a less slapdash approach than the
	87	* gzip/zlib one. Rather than allowing our hash chains to fall into
	88	* disuse near the far end, we keep them doubly linked so we can
	89	* _find_ the far end, and then every time we add a new byte to the
	90	* window (thus rolling round by one and removing the previous
	91	* byte), we can carefully remove the hash chain entry.
	92	*/
	93
	94	#define INVALID -1 /* invalid hash _and_ invalid offset */
	95	struct WindowEntry {
	96	int next, prev; /* array indices within the window */
	97	int hashval;
	98	};
	99
	100	struct HashEntry {
	101	int first; /* window index of first in chain */
	102	};
	103
	104	struct Match {
	105	int distance, len;
	106	};
	107
	108	struct LZ77InternalContext {
	109	struct WindowEntry win[WINSIZE];
	110	unsigned char data[WINSIZE];
	111	int winpos;
	112	struct HashEntry hashtab[HASHMAX];
	113	unsigned char pending[HASHCHARS];
	114	int npending;
	115	};
	116
	117	static int lz77_hash(unsigned char *data) {
	118	return (257data[0] + 263data[1] + 269*data[2]) % HASHMAX;
	119	}
	120
	121	static int lz77_init(struct LZ77Context *ctx) {
	122	struct LZ77InternalContext *st;
	123	int i;
	124
	125	st = (struct LZ77InternalContext )smalloc(sizeof(st));
	126	if (!st)
	127	return 0;
	128
	129	ctx->ictx = st;
	130
	131	for (i = 0; i < WINSIZE; i++)
	132	st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
	133	for (i = 0; i < HASHMAX; i++)
	134	st->hashtab[i].first = INVALID;
	135	st->winpos = 0;
	136
	137	st->npending = 0;
	138
	139	return 1;
	140	}
	141
	142	static void lz77_advance(struct LZ77InternalContext *st,
	143	unsigned char c, int hash) {
	144	int off;
	145
	146	/*
	147	* Remove the hash entry at winpos from the tail of its chain,
	148	* or empty the chain if it's the only thing on the chain.
	149	*/
	150	if (st->win[st->winpos].prev != INVALID) {
	151	st->win[st->win[st->winpos].prev].next = INVALID;
	152	} else if (st->win[st->winpos].hashval != INVALID) {
	153	st->hashtab[st->win[st->winpos].hashval].first = INVALID;
	154	}
	155
	156	/*
	157	* Create a new entry at winpos and add it to the head of its
	158	* hash chain.
	159	*/
	160	st->win[st->winpos].hashval = hash;
	161	st->win[st->winpos].prev = INVALID;
	162	off = st->win[st->winpos].next = st->hashtab[hash].first;
	163	st->hashtab[hash].first = st->winpos;
	164	if (off != INVALID)
	165	st->win[off].prev = st->winpos;
	166	st->data[st->winpos] = c;
	167
	168	/*
	169	* Advance the window pointer.
	170	*/
	171	st->winpos = (st->winpos + 1) & (WINSIZE-1);
	172	}
	173
	174	#define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] )
	175
	176	static void lz77_compress(struct LZ77Context *ctx,
	177	unsigned char *data, int len) {
	178	struct LZ77InternalContext *st = ctx->ictx;
	179	int i, hash, distance, off, nmatch, matchlen, advance;
	180	struct Match defermatch, matches[MAXMATCH];
	181	int deferchr;
	182
	183	/*
	184	* Add any pending characters from last time to the window. (We
	185	* might not be able to.)
	186	*/
	187	for (i = 0; i < st->npending; i++) {
	188	unsigned char foo[HASHCHARS];
	189	int j;
	190	if (len + st->npending - i < HASHCHARS) {
	191	/* Update the pending array. */
	192	for (j = i; j < st->npending; j++)
	193	st->pending[j-i] = st->pending[j];
	194	break;
	195	}
	196	for (j = 0; j < HASHCHARS; j++)
	197	foo[j] = (i + j < st->npending ? st->pending[i+j] :
	198	data[i + j - st->npending]);
	199	lz77_advance(st, foo[0], lz77_hash(foo));
	200	}
	201	st->npending -= i;
	202
	203	defermatch.len = 0;
	204	while (len > 0) {
	205
	206	if (len >= HASHCHARS) {
	207	/*
	208	* Hash the next few characters.
	209	*/
	210	hash = lz77_hash(data);
	211
	212	/*
	213	* Look the hash up in the corresponding hash chain and see
	214	* what we can find.
	215	*/
	216	nmatch = 0;
	217	for (off = st->hashtab[hash].first;
	218	off != INVALID; off = st->win[off].next) {
	219	/* distance = 1 if off == st->winpos-1 */
	220	/* distance = WINSIZE if off == st->winpos */
	221	distance = WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE;
	222	for (i = 0; i < HASHCHARS; i++)
	223	if (CHARAT(i) != CHARAT(i-distance))
	224	break;
	225	if (i == HASHCHARS) {
	226	matches[nmatch].distance = distance;
	227	matches[nmatch].len = 3;
	228	if (++nmatch >= MAXMATCH)
	229	break;
	230	}
	231	}
	232	} else {
	233	nmatch = 0;
	234	hash = INVALID;
	235	}
	236
	237	if (nmatch > 0) {
	238	/*
	239	* We've now filled up matches[] with nmatch potential
	240	* matches. Follow them down to find the longest. (We
	241	* assume here that it's always worth favouring a
	242	* longer match over a shorter one.)
	243	*/
	244	matchlen = HASHCHARS;
	245	while (matchlen < len) {
	246	int j;
	247	for (i = j = 0; i < nmatch; i++) {
	248	if (CHARAT(matchlen) ==
	249	CHARAT(matchlen - matches[i].distance)) {
	250	matches[j++] = matches[i];
	251	}
	252	}
	253	if (j == 0)
	254	break;
	255	matchlen++;
	256	nmatch = j;
	257	}
	258
	259	/*
	260	* We've now got all the longest matches. We favour the
	261	* shorter distances, which means we go with matches[0].
	262	* So see if we want to defer it or throw it away.
	263	*/
	264	matches[0].len = matchlen;
	265	if (defermatch.len > 0) {
	266	if (matches[0].len > defermatch.len + 1) {
	267	/* We have a better match. Emit the deferred char,
	268	* and defer this match. */
	269	ctx->literal(ctx, (unsigned char)deferchr);
	270	defermatch = matches[0];
	271	deferchr = data[0];
	272	advance = 1;
	273	} else {
	274	/* We don't have a better match. Do the deferred one. */
	275	ctx->match(ctx, defermatch.distance, defermatch.len);
	276	advance = defermatch.len - 1;
	277	defermatch.len = 0;
	278	}
	279	} else {
	280	/* There was no deferred match. Defer this one. */
	281	defermatch = matches[0];
	282	deferchr = data[0];
	283	advance = 1;
	284	}
	285	} else {
	286	/*
	287	* We found no matches. Emit the deferred match, if
	288	* any; otherwise emit a literal.
	289	*/
	290	if (defermatch.len > 0) {
	291	ctx->match(ctx, defermatch.distance, defermatch.len);
	292	advance = defermatch.len - 1;
	293	defermatch.len = 0;
	294	} else {
	295	ctx->literal(ctx, data[0]);
	296	advance = 1;
	297	}
	298	}
	299
	300	/*
	301	* Now advance the position by `advance' characters,
	302	* keeping the window and hash chains consistent.
	303	*/
	304	while (advance > 0) {
	305	if (len >= HASHCHARS) {
	306	lz77_advance(st, *data, lz77_hash(data));
	307	} else {
	308	st->pending[st->npending++] = *data;
	309	}
	310	data++;
	311	len--;
	312	advance--;
	313	}
	314	}
	315	}
	316
	317	/* ----------------------------------------------------------------------
	318	* Zlib compression. We always use the static Huffman tree option.
	319	* Mostly this is because it's hard to scan a block in advance to
	320	* work out better trees; dynamic trees are great when you're
	321	* compressing a large file under no significant time constraint,
	322	* but when you're compressing little bits in real time, things get
	323	* hairier.
	324	*
	325	* I suppose it's possible that I could compute Huffman trees based
	326	* on the frequencies in the _previous_ block, as a sort of
	327	* heuristic, but I'm not confident that the gain would balance out
	328	* having to transmit the trees.
	329	*/
	330
	331	static struct LZ77Context ectx;
	332
	333	struct Outbuf {
	334	unsigned char *outbuf;
	335	int outlen, outsize;
	336	unsigned long outbits;
	337	int noutbits;
	338	int firstblock;
	339	};
	340
	341	static void outbits(struct Outbuf *out, unsigned long bits, int nbits) {
	342	assert(out->noutbits + nbits <= 32);
	343	out->outbits \|= bits << out->noutbits;
	344	out->noutbits += nbits;
	345	while (out->noutbits >= 8) {
	346	if (out->outlen >= out->outsize) {
	347	out->outsize = out->outlen + 64;
	348	out->outbuf = srealloc(out->outbuf, out->outsize);
	349	}
	350	out->outbuf[out->outlen++] = (unsigned char)(out->outbits & 0xFF);
	351	out->outbits >>= 8;
	352	out->noutbits -= 8;
	353	}
	354	}
	355
	356	static const unsigned char mirrorbytes[256] = {
	357	0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
	358	0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
	359	0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
	360	0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
	361	0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
	362	0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
	363	0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
	364	0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
	365	0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
	366	0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
	367	0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
	368	0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
	369	0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
	370	0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
	371	0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
	372	0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
	373	0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
	374	0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
	375	0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
	376	0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
	377	0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
	378	0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
	379	0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
	380	0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
	381	0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
	382	0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
	383	0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
	384	0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
	385	0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
	386	0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
	387	0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
	388	0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
	389	};
	390
	391	typedef struct {
	392	int code, extrabits, min, max;
	393	} coderecord;
	394
	395	static const coderecord lencodes[] = {
	396	{257, 0, 3,3},
	397	{258, 0, 4,4},
	398	{259, 0, 5,5},
	399	{260, 0, 6,6},
	400	{261, 0, 7,7},
	401	{262, 0, 8,8},
	402	{263, 0, 9,9},
	403	{264, 0, 10,10},
	404	{265, 1, 11,12},
	405	{266, 1, 13,14},
	406	{267, 1, 15,16},
	407	{268, 1, 17,18},
	408	{269, 2, 19,22},
	409	{270, 2, 23,26},
	410	{271, 2, 27,30},
	411	{272, 2, 31,34},
	412	{273, 3, 35,42},
	413	{274, 3, 43,50},
	414	{275, 3, 51,58},
	415	{276, 3, 59,66},
	416	{277, 4, 67,82},
	417	{278, 4, 83,98},
	418	{279, 4, 99,114},
	419	{280, 4, 115,130},
	420	{281, 5, 131,162},
	421	{282, 5, 163,194},
	422	{283, 5, 195,226},
	423	{284, 5, 227,257},
	424	{285, 0, 258,258},
	425	};
	426
	427	static const coderecord distcodes[] = {
	428	{0, 0, 1,1},
	429	{1, 0, 2,2},
	430	{2, 0, 3,3},
	431	{3, 0, 4,4},
	432	{4, 1, 5,6},
	433	{5, 1, 7,8},
	434	{6, 2, 9,12},
	435	{7, 2, 13,16},
	436	{8, 3, 17,24},
	437	{9, 3, 25,32},
	438	{10, 4, 33,48},
	439	{11, 4, 49,64},
	440	{12, 5, 65,96},
	441	{13, 5, 97,128},
	442	{14, 6, 129,192},
	443	{15, 6, 193,256},
	444	{16, 7, 257,384},
	445	{17, 7, 385,512},
	446	{18, 8, 513,768},
	447	{19, 8, 769,1024},
	448	{20, 9, 1025,1536},
	449	{21, 9, 1537,2048},
	450	{22, 10, 2049,3072},
	451	{23, 10, 3073,4096},
	452	{24, 11, 4097,6144},
	453	{25, 11, 6145,8192},
	454	{26, 12, 8193,12288},
	455	{27, 12, 12289,16384},
	456	{28, 13, 16385,24576},
	457	{29, 13, 24577,32768},
	458	};
	459
	460	static void zlib_literal(struct LZ77Context *ectx, unsigned char c) {
	461	struct Outbuf out = (struct Outbuf )ectx->userdata;
	462
	463	if (c <= 143) {
	464	/* 0 through 143 are 8 bits long starting at 00110000. */
	465	outbits(out, mirrorbytes[0x30 + c], 8);
	466	} else {
	467	/* 144 through 255 are 9 bits long starting at 110010000. */
	468	outbits(out, 1 + 2*mirrorbytes[0x90 - 144 + c], 9);
	469	}
	470	}
	471
	472	static void zlib_match(struct LZ77Context *ectx, int distance, int len) {
	473	const coderecord d, l;
	474	int i, j, k;
	475	struct Outbuf out = (struct Outbuf )ectx->userdata;
	476	while (len > 0) {
	477	int thislen;
	478
	479	/*
	480	* We can transmit matches of lengths 3 through 258
	481	* inclusive. So if len exceeds 258, we must transmit in
	482	* several steps, with 258 or less in each step.
	483	*
	484	* Specifically: if len >= 261, we can transmit 258 and be
	485	* sure of having at least 3 left for the next step. And if
	486	* len <= 258, we can just transmit len. But if len == 259
	487	* or 260, we must transmit len-3.
	488	*/
	489	thislen = (len > 260 ? 258 : len <= 258 ? len : len-3);
	490	len -= thislen;
	491
	492	/*
	493	* Binary-search to find which length code we're
	494	* transmitting.
	495	*/
	496	i = -1; j = sizeof(lencodes)/sizeof(*lencodes);
	497	while (j - i >= 2) {
	498	k = (j+i)/2;
	499	if (thislen < lencodes[k].min)
	500	j = k;
	501	else if (thislen > lencodes[k].max)
	502	i = k;
	503	else {
	504	l = &lencodes[k];
	505	break; /* found it! */
	506	}
	507	}
	508
	509	/*
	510	* Transmit the length code. 256-279 are seven bits
	511	* starting at 0000000; 280-287 are eight bits starting at
	512	* 11000000.
	513	*/
	514	if (l->code <= 279) {
	515	outbits(out, mirrorbytes[(l->code-256)*2], 7);
	516	} else {
	517	outbits(out, mirrorbytes[0xc0 - 280 + l->code], 8);
	518	}
	519
	520	/*
	521	* Transmit the extra bits.
	522	*/
	523	if (l->extrabits)
	524	outbits(out, thislen - l->min, l->extrabits);
	525
	526	/*
	527	* Binary-search to find which distance code we're
	528	* transmitting.
	529	*/
	530	i = -1; j = sizeof(distcodes)/sizeof(*distcodes);
	531	while (j - i >= 2) {
	532	k = (j+i)/2;
	533	if (distance < distcodes[k].min)
	534	j = k;
	535	else if (distance > distcodes[k].max)
	536	i = k;
	537	else {
	538	d = &distcodes[k];
	539	break; /* found it! */
	540	}
	541	}
	542
	543	/*
	544	* Transmit the distance code. Five bits starting at 00000.
	545	*/
	546	outbits(out, mirrorbytes[d->code*8], 5);
	547
	548	/*
	549	* Transmit the extra bits.
	550	*/
	551	if (d->extrabits)
	552	outbits(out, distance - d->min, d->extrabits);
	553	}
	554	}
	555
	556	void zlib_compress_init(void) {
	557	struct Outbuf *out;
	558
	559	lz77_init(&ectx);
	560	ectx.literal = zlib_literal;
	561	ectx.match = zlib_match;
	562
	563	out = smalloc(sizeof(struct Outbuf));
	564	out->outbits = out->noutbits = 0;
	565	out->firstblock = 1;
	566	ectx.userdata = out;
	567
	568	logevent("Initialised zlib (RFC1950) compression");
	569	}
	570
	571	int zlib_compress_block(unsigned char *block, int len,
	572	unsigned char *outblock, int outlen) {
	573	struct Outbuf out = (struct Outbuf )ectx.userdata;
	574
	575	out->outbuf = NULL;
	576	out->outlen = out->outsize = 0;
	577
	578	/*
	579	* If this is the first block, output the Zlib (RFC1950) header
	580	* bytes 78 9C. (Deflate compression, 32K window size, default
	581	* algorithm.)
	582	*/
	583	if (out->firstblock) {
	584	outbits(out, 0x9C78, 16);
	585	out->firstblock = 0;
	586	/*
	587	* Start a Deflate (RFC1951) fixed-trees block. We transmit
	588	* a zero bit (BFINAL=0), followed by a zero bit and a one
	589	* bit (BTYPE=01). Of course these are in the wrong order
	590	* (01 0).
	591	*/
	592	outbits(out, 2, 3);
	593	}
	594
	595	/*
	596	* Do the compression.
	597	*/
	598	lz77_compress(&ectx, block, len);
	599	/*
	600	* End the block (by transmitting code 256, which is 0000000 in
	601	* fixed-tree mode), and transmit some empty blocks to ensure
	602	* we have emitted the byte containing the last piece of
	603	* genuine data. There are three ways we can do this:
	604	*
	605	* - Minimal flush. Output end-of-block and then open a new
	606	* static block. This takes 9 bits, which is guaranteed to
	607	* flush out the last genuine code in the closed block; but
	608	* allegedly zlib can't handle it.
	609	*
	610	* - Zlib partial flush. Output EOB, open and close an empty
	611	* static block, and _then_ open the new block. This is the
	612	* best zlib can handle.
	613	*
	614	* - Zlib sync flush. Output EOB, then an empty _uncompressed_
	615	* block (000, then sync to byte boundary, then send bytes
	616	* 00 00 FF FF). Then open the new block.
	617	*
	618	* For the moment, we will use Zlib partial flush.
	619	*/
	620	outbits(out, 0, 7); /* close block */
	621	outbits(out, 2, 3+7); /* empty static block */
	622	outbits(out, 2, 3); /* open new block */
	623
	624	*outblock = out->outbuf;
	625	*outlen = out->outlen;
	626
	627	return 1;
	628	}
	629
	630	/* ----------------------------------------------------------------------
	631	* Zlib decompression. Of course, even though our compressor always
	632	* uses static trees, our _decompressor_ has to be capable of
	633	* handling dynamic trees if it sees them.
	634	*/
	635
	636	/*
	637	* The way we work the Huffman decode is to have a table lookup on
	638	* the first N bits of the input stream (in the order they arrive,
	639	* of course, i.e. the first bit of the Huffman code is in bit 0).
	640	* Each table entry lists the number of bits to consume, plus
	641	* either an output code or a pointer to a secondary table.
	642	*/
	643	struct zlib_table;
	644	struct zlib_tableentry;
	645
	646	struct zlib_tableentry {
	647	unsigned char nbits;
	648	int code;
	649	struct zlib_table *nexttable;
	650	};
	651
	652	struct zlib_table {
	653	int mask; /* mask applied to input bit stream */
	654	struct zlib_tableentry *table;
	655	};
	656
	657	#define MAXCODELEN 16
	658	#define MAXSYMS 288
	659
	660	/*
	661	* Build a single-level decode table for elements
	662	* [minlength,maxlength) of the provided code/length tables, and
	663	* recurse to build subtables.
	664	*/
	665	static struct zlib_table zlib_mkonetab(int codes, unsigned char *lengths,
	666	int nsyms,
	667	int pfx, int pfxbits, int bits) {
	668	struct zlib_table *tab = smalloc(sizeof(struct zlib_table));
	669	int pfxmask = (1 << pfxbits) - 1;
	670	int nbits, i, j, code;
	671
	672	tab->table = smalloc((1 << bits) * sizeof(struct zlib_tableentry));
	673	tab->mask = (1 << bits) - 1;
	674
	675	for (code = 0; code <= tab->mask; code++) {
	676	tab->table[code].code = -1;
	677	tab->table[code].nbits = 0;
	678	tab->table[code].nexttable = NULL;
	679	}
	680
	681	for (i = 0; i < nsyms; i++) {
	682	if (lengths[i] <= pfxbits \|\| (codes[i] & pfxmask) != pfx)
	683	continue;
	684	code = (codes[i] >> pfxbits) & tab->mask;
	685	for (j = code; j <= tab->mask; j += 1 << (lengths[i]-pfxbits)) {
	686	tab->table[j].code = i;
	687	nbits = lengths[i] - pfxbits;
	688	if (tab->table[j].nbits < nbits)
	689	tab->table[j].nbits = nbits;
	690	}
	691	}
	692	for (code = 0; code <= tab->mask; code++) {
	693	if (tab->table[code].nbits <= bits)
	694	continue;
	695	/* Generate a subtable. */
	696	tab->table[code].code = -1;
	697	nbits = tab->table[code].nbits - bits;
	698	if (nbits > 7)
	699	nbits = 7;
	700	tab->table[code].nbits = bits;
	701	tab->table[code].nexttable = zlib_mkonetab(codes, lengths, nsyms,
	702	pfx \| (code << pfxbits),
	703	pfxbits + bits, nbits);
	704	}
	705
	706	return tab;
	707	}
	708
	709	/*
	710	* Build a decode table, given a set of Huffman tree lengths.
	711	*/
	712	static struct zlib_table zlib_mktable(unsigned char lengths, int nlengths) {
	713	int count[MAXCODELEN], startcode[MAXCODELEN], codes[MAXSYMS];
	714	int code, maxlen;
	715	int i, j;
	716
	717	/* Count the codes of each length. */
	718	maxlen = 0;
	719	for (i = 1; i < MAXCODELEN; i++) count[i] = 0;
	720	for (i = 0; i < nlengths; i++) {
	721	count[lengths[i]]++;
	722	if (maxlen < lengths[i])
	723	maxlen = lengths[i];
	724	}
	725	/* Determine the starting code for each length block. */
	726	code = 0;
	727	for (i = 1; i < MAXCODELEN; i++) {
	728	startcode[i] = code;
	729	code += count[i];
	730	code <<= 1;
	731	}
	732	/* Determine the code for each symbol. Mirrored, of course. */
	733	for (i = 0; i < nlengths; i++) {
	734	code = startcode[lengths[i]]++;
	735	codes[i] = 0;
	736	for (j = 0; j < lengths[i]; j++) {
	737	codes[i] = (codes[i] << 1) \| (code & 1);
	738	code >>= 1;
	739	}
	740	}
	741
	742	/*
	743	* Now we have the complete list of Huffman codes. Build a
	744	* table.
	745	*/
	746	return zlib_mkonetab(codes, lengths, nlengths, 0, 0,
	747	maxlen < 9 ? maxlen : 9);
	748	}
	749
	750	static struct zlib_decompress_ctx {
	751	struct zlib_table staticlentable, staticdisttable;
	752	struct zlib_table currlentable, currdisttable, *lenlentable;
	753	enum {
	754	START, OUTSIDEBLK,
	755	TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP,
	756	INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM,
	757	UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA
	758	} state;
	759	int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, lenrep;
	760	int uncomplen;
	761	unsigned char lenlen[19];
	762	unsigned char lengths[286+32];
	763	unsigned long bits;
	764	int nbits;
	765	unsigned char window[WINSIZE];
	766	int winpos;
	767	unsigned char *outblk;
	768	int outlen, outsize;
	769	} dctx;
	770
	771	void zlib_decompress_init(void) {
	772	unsigned char lengths[288];
	773	memset(lengths, 8, 144);
	774	memset(lengths+144, 9, 256-144);
	775	memset(lengths+256, 7, 280-256);
	776	memset(lengths+280, 8, 288-280);
	777	dctx.staticlentable = zlib_mktable(lengths, 288);
	778	memset(lengths, 5, 32);
	779	dctx.staticdisttable = zlib_mktable(lengths, 32);
	780	dctx.state = START; /* even before header */
	781	dctx.currlentable = dctx.currdisttable = dctx.lenlentable = NULL;
	782	dctx.bits = 0;
	783	dctx.nbits = 0;
	784	logevent("Initialised zlib (RFC1950) decompression");
	785	}
	786
	787	int zlib_huflookup(unsigned long bitsp, int nbitsp, struct zlib_table *tab) {
	788	unsigned long bits = *bitsp;
	789	int nbits = *nbitsp;
	790	while (1) {
	791	struct zlib_tableentry *ent;
	792	ent = &tab->table[bits & tab->mask];
	793	if (ent->nbits > nbits)
	794	return -1; /* not enough data */
	795	bits >>= ent->nbits;
	796	nbits -= ent->nbits;
	797	if (ent->code == -1)
	798	tab = ent->nexttable;
	799	else {
	800	*bitsp = bits;
	801	*nbitsp = nbits;
	802	return ent->code;
	803	}
	804	}
	805	}
	806
	807	static void zlib_emit_char(int c) {
	808	dctx.window[dctx.winpos] = c;
	809	dctx.winpos = (dctx.winpos + 1) & (WINSIZE-1);
	810	if (dctx.outlen >= dctx.outsize) {
	811	dctx.outsize = dctx.outlen + 512;
	812	dctx.outblk = srealloc(dctx.outblk, dctx.outsize);
	813	}
	814	dctx.outblk[dctx.outlen++] = c;
	815	}
	816
	817	#define EATBITS(n) ( dctx.nbits -= (n), dctx.bits >>= (n) )
	818
	819	int zlib_decompress_block(unsigned char *block, int len,
	820	unsigned char *outblock, int outlen) {
	821	const coderecord *rec;
	822	int code, blktype, rep, dist, nlen;
	823	static const unsigned char lenlenmap[] = {
	824	16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
	825	};
	826
	827	dctx.outblk = NULL;
	828	dctx.outsize = dctx.outlen = 0;
	829
	830	while (len > 0 \|\| dctx.nbits > 0) {
	831	while (dctx.nbits < 24 && len > 0) {
	832	dctx.bits \|= (*block++) << dctx.nbits;
	833	dctx.nbits += 8;
	834	len--;
	835	}
	836	switch (dctx.state) {
	837	case START:
	838	/* Expect 16-bit zlib header, which we'll dishonourably ignore. */
	839	if (dctx.nbits < 16)
	840	goto finished; /* done all we can */
	841	EATBITS(16);
	842	dctx.state = OUTSIDEBLK;
	843	break;
	844	case OUTSIDEBLK:
	845	/* Expect 3-bit block header. */
	846	if (dctx.nbits < 3)
	847	goto finished; /* done all we can */
	848	EATBITS(1);
	849	blktype = dctx.bits & 3;
	850	EATBITS(2);
	851	if (blktype == 0) {
	852	int to_eat = dctx.nbits & 7;
	853	dctx.state = UNCOMP_LEN;
	854	EATBITS(to_eat); /* align to byte boundary */
	855	} else if (blktype == 1) {
	856	dctx.currlentable = dctx.staticlentable;
	857	dctx.currdisttable = dctx.staticdisttable;
	858	dctx.state = INBLK;
	859	} else if (blktype == 2) {
	860	dctx.state = TREES_HDR;
	861	}
	862	break;
	863	case TREES_HDR:
	864	/*
	865	* Dynamic block header. Five bits of HLIT, five of
	866	* HDIST, four of HCLEN.
	867	*/
	868	if (dctx.nbits < 5+5+4)
	869	goto finished; /* done all we can */
	870	dctx.hlit = 257 + (dctx.bits & 31); EATBITS(5);
	871	dctx.hdist = 1 + (dctx.bits & 31); EATBITS(5);
	872	dctx.hclen = 4 + (dctx.bits & 15); EATBITS(4);
	873	dctx.lenptr = 0;
	874	dctx.state = TREES_LENLEN;
	875	memset(dctx.lenlen, 0, sizeof(dctx.lenlen));
	876	break;
	877	case TREES_LENLEN:
	878	if (dctx.nbits < 3)
	879	goto finished;
	880	while (dctx.lenptr < dctx.hclen && dctx.nbits >= 3) {
	881	dctx.lenlen[lenlenmap[dctx.lenptr++]] =
	882	(unsigned char)(dctx.bits & 7);
	883	EATBITS(3);
	884	}
	885	if (dctx.lenptr == dctx.hclen) {
	886	dctx.lenlentable = zlib_mktable(dctx.lenlen, 19);
	887	dctx.state = TREES_LEN;
	888	dctx.lenptr = 0;
	889	}
	890	break;
	891	case TREES_LEN:
	892	if (dctx.lenptr >= dctx.hlit+dctx.hdist) {
	893	dctx.currlentable = zlib_mktable(dctx.lengths, dctx.hlit);
	894	dctx.currdisttable = zlib_mktable(dctx.lengths + dctx.hlit,
	895	dctx.hdist);
	896	/* FIXME: zlib_freetable(dctx.lenlentable); */
	897	dctx.state = INBLK;
	898	break;
	899	}
	900	code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.lenlentable);
	901	if (code == -1)
	902	goto finished;
	903	if (code < 16)
	904	dctx.lengths[dctx.lenptr++] = code;
	905	else {
	906	dctx.lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7);
	907	dctx.lenaddon = (code == 18 ? 11 : 3);
	908	dctx.lenrep = (code == 16 && dctx.lenptr > 0 ?
	909	dctx.lengths[dctx.lenptr-1] : 0);
	910	dctx.state = TREES_LENREP;
	911	}
	912	break;
	913	case TREES_LENREP:
	914	if (dctx.nbits < dctx.lenextrabits)
	915	goto finished;
	916	rep = dctx.lenaddon + (dctx.bits & ((1<<dctx.lenextrabits)-1));
	917	EATBITS(dctx.lenextrabits);
	918	while (rep > 0 && dctx.lenptr < dctx.hlit+dctx.hdist) {
	919	dctx.lengths[dctx.lenptr] = dctx.lenrep;
	920	dctx.lenptr++;
	921	rep--;
	922	}
	923	dctx.state = TREES_LEN;
	924	break;
	925	case INBLK:
	926	code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.currlentable);
	927	if (code == -1)
	928	goto finished;
	929	if (code < 256)
	930	zlib_emit_char(code);
	931	else if (code == 256) {
	932	dctx.state = OUTSIDEBLK;
	933	/* FIXME: zlib_freetable(both) if not static */
	934	} else if (code < 286) { /* static tree can give >285; ignore */
	935	dctx.state = GOTLENSYM;
	936	dctx.sym = code;
	937	}
	938	break;
	939	case GOTLENSYM:
	940	rec = &lencodes[dctx.sym - 257];
	941	if (dctx.nbits < rec->extrabits)
	942	goto finished;
	943	dctx.len = rec->min + (dctx.bits & ((1<<rec->extrabits)-1));
	944	EATBITS(rec->extrabits);
	945	dctx.state = GOTLEN;
	946	break;
	947	case GOTLEN:
	948	code = zlib_huflookup(&dctx.bits, &dctx.nbits, dctx.currdisttable);
	949	if (code == -1)
	950	goto finished;
	951	dctx.state = GOTDISTSYM;
	952	dctx.sym = code;
	953	break;
	954	case GOTDISTSYM:
	955	rec = &distcodes[dctx.sym];
	956	if (dctx.nbits < rec->extrabits)
	957	goto finished;
	958	dist = rec->min + (dctx.bits & ((1<<rec->extrabits)-1));
	959	EATBITS(rec->extrabits);
	960	dctx.state = INBLK;
	961	while (dctx.len--)
	962	zlib_emit_char(dctx.window[(dctx.winpos-dist) & (WINSIZE-1)]);
	963	break;
	964	case UNCOMP_LEN:
	965	/*
	966	* Uncompressed block. We expect to see a 16-bit LEN.
	967	*/
	968	if (dctx.nbits < 16)
	969	goto finished;
	970	dctx.uncomplen = dctx.bits & 0xFFFF;
	971	EATBITS(16);
	972	dctx.state = UNCOMP_NLEN;
	973	break;
	974	case UNCOMP_NLEN:
	975	/*
	976	* Uncompressed block. We expect to see a 16-bit NLEN,
	977	* which should be the one's complement of the previous
	978	* LEN.
	979	*/
	980	if (dctx.nbits < 16)
	981	goto finished;
	982	nlen = dctx.bits & 0xFFFF;
	983	EATBITS(16);
	984	dctx.state = UNCOMP_DATA;
	985	break;
	986	case UNCOMP_DATA:
	987	if (dctx.nbits < 8)
	988	goto finished;
	989	zlib_emit_char(dctx.bits & 0xFF);
	990	EATBITS(8);
	991	if (--dctx.uncomplen == 0)
	992	dctx.state = OUTSIDEBLK; /* end of uncompressed block */
	993	break;
	994	}
	995	}
	996
	997	finished:
	998	*outblock = dctx.outblk;
	999	*outlen = dctx.outlen;
	1000
	1001	return 1;
	1002	}
	1003
	1004	const struct ssh_compress ssh_zlib = {
	1005	"zlib",
	1006	zlib_compress_init,
	1007	zlib_compress_block,
	1008	zlib_decompress_init,
	1009	zlib_decompress_block
	1010	};