git.distorted.org.uk Git - u/mdw/putty/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Zlib (RFC1950 / RFC1951) compression for PuTTY.
	3	*
	4	* There will no doubt be criticism of my decision to reimplement
	5	* Zlib compression from scratch instead of using the existing zlib
	6	* code. People will cry `reinventing the wheel'; they'll claim
	7	* that the `fundamental basis of OSS' is code reuse; they'll want
	8	* to see a really good reason for me having chosen not to use the
	9	* existing code.
	10	*
	11	* Well, here are my reasons. Firstly, I don't want to link the
	12	* whole of zlib into the PuTTY binary; PuTTY is justifiably proud
	13	* of its small size and I think zlib contains a lot of unnecessary
	14	* baggage for the kind of compression that SSH requires.
	15	*
	16	* Secondly, I also don't like the alternative of using zlib.dll.
	17	* Another thing PuTTY is justifiably proud of is its ease of
	18	* installation, and the last thing I want to do is to start
	19	* mandating DLLs. Not only that, but there are two _kinds_ of
	20	* zlib.dll kicking around, one with C calling conventions on the
	21	* exported functions and another with WINAPI conventions, and
	22	* there would be a significant danger of getting the wrong one.
	23	*
	24	* Thirdly, there seems to be a difference of opinion on the IETF
	25	* secsh mailing list about the correct way to round off a
	26	* compressed packet and start the next. In particular, there's
	27	* some talk of switching to a mechanism zlib isn't currently
	28	* capable of supporting (see below for an explanation). Given that
	29	* sort of uncertainty, I thought it might be better to have code
	30	* that will support even the zlib-incompatible worst case.
	31	*
	32	* Fourthly, it's a _second implementation_. Second implementations
	33	* are fundamentally a Good Thing in standardisation efforts. The
	34	* difference of opinion mentioned above has arisen _precisely_
	35	* because there has been only one zlib implementation and
	36	* everybody has used it. I don't intend that this should happen
	37	* again.
	38	*/
	39
	40	#include <stdlib.h>
	41	#include <assert.h>
	42
	43	#include "ssh.h"
	44
	45	#ifndef FALSE
	46	#define FALSE 0
	47	#define TRUE (!FALSE)
	48	#endif
	49
	50	/* ----------------------------------------------------------------------
	51	* Basic LZ77 code. This bit is designed modularly, so it could be
	52	* ripped out and used in a different LZ77 compressor. Go to it,
	53	* and good luck :-)
	54	*/
	55
	56	struct LZ77InternalContext;
	57	struct LZ77Context {
	58	struct LZ77InternalContext *ictx;
	59	void *userdata;
	60	void (literal) (struct LZ77Context ctx, unsigned char c);
	61	void (match) (struct LZ77Context ctx, int distance, int len);
	62	};
	63
	64	/*
	65	* Initialise the private fields of an LZ77Context. It's up to the
	66	* user to initialise the public fields.
	67	*/
	68	static int lz77_init(struct LZ77Context *ctx);
	69
	70	/*
	71	* Supply data to be compressed. Will update the private fields of
	72	* the LZ77Context, and will call literal() and match() to output.
	73	* If `compress' is FALSE, it will never emit a match, but will
	74	* instead call literal() for everything.
	75	*/
	76	static void lz77_compress(struct LZ77Context *ctx,
	77	unsigned char *data, int len, int compress);
	78
	79	/*
	80	* Modifiable parameters.
	81	*/
	82	#define WINSIZE 32768 /* window size. Must be power of 2! */
	83	#define HASHMAX 2039 /* one more than max hash value */
	84	#define MAXMATCH 32 /* how many matches we track */
	85	#define HASHCHARS 3 /* how many chars make a hash */
	86
	87	/*
	88	* This compressor takes a less slapdash approach than the
	89	* gzip/zlib one. Rather than allowing our hash chains to fall into
	90	* disuse near the far end, we keep them doubly linked so we can
	91	* _find_ the far end, and then every time we add a new byte to the
	92	* window (thus rolling round by one and removing the previous
	93	* byte), we can carefully remove the hash chain entry.
	94	*/
	95
	96	#define INVALID -1 /* invalid hash _and_ invalid offset */
	97	struct WindowEntry {
	98	short next, prev; /* array indices within the window */
	99	short hashval;
	100	};
	101
	102	struct HashEntry {
	103	short first; /* window index of first in chain */
	104	};
	105
	106	struct Match {
	107	int distance, len;
	108	};
	109
	110	struct LZ77InternalContext {
	111	struct WindowEntry win[WINSIZE];
	112	unsigned char data[WINSIZE];
	113	int winpos;
	114	struct HashEntry hashtab[HASHMAX];
	115	unsigned char pending[HASHCHARS];
	116	int npending;
	117	};
	118
	119	static int lz77_hash(unsigned char *data)
	120	{
	121	return (257 * data[0] + 263 * data[1] + 269 * data[2]) % HASHMAX;
	122	}
	123
	124	static int lz77_init(struct LZ77Context *ctx)
	125	{
	126	struct LZ77InternalContext *st;
	127	int i;
	128
	129	st = snew(struct LZ77InternalContext);
	130	if (!st)
	131	return 0;
	132
	133	ctx->ictx = st;
	134
	135	for (i = 0; i < WINSIZE; i++)
	136	st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
	137	for (i = 0; i < HASHMAX; i++)
	138	st->hashtab[i].first = INVALID;
	139	st->winpos = 0;
	140
	141	st->npending = 0;
	142
	143	return 1;
	144	}
	145
	146	static void lz77_advance(struct LZ77InternalContext *st,
	147	unsigned char c, int hash)
	148	{
	149	int off;
	150
	151	/*
	152	* Remove the hash entry at winpos from the tail of its chain,
	153	* or empty the chain if it's the only thing on the chain.
	154	*/
	155	if (st->win[st->winpos].prev != INVALID) {
	156	st->win[st->win[st->winpos].prev].next = INVALID;
	157	} else if (st->win[st->winpos].hashval != INVALID) {
	158	st->hashtab[st->win[st->winpos].hashval].first = INVALID;
	159	}
	160
	161	/*
	162	* Create a new entry at winpos and add it to the head of its
	163	* hash chain.
	164	*/
	165	st->win[st->winpos].hashval = hash;
	166	st->win[st->winpos].prev = INVALID;
	167	off = st->win[st->winpos].next = st->hashtab[hash].first;
	168	st->hashtab[hash].first = st->winpos;
	169	if (off != INVALID)
	170	st->win[off].prev = st->winpos;
	171	st->data[st->winpos] = c;
	172
	173	/*
	174	* Advance the window pointer.
	175	*/
	176	st->winpos = (st->winpos + 1) & (WINSIZE - 1);
	177	}
	178
	179	#define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] )
	180
	181	static void lz77_compress(struct LZ77Context *ctx,
	182	unsigned char *data, int len, int compress)
	183	{
	184	struct LZ77InternalContext *st = ctx->ictx;
	185	int i, hash, distance, off, nmatch, matchlen, advance;
	186	struct Match defermatch, matches[MAXMATCH];
	187	int deferchr;
	188
	189	/*
	190	* Add any pending characters from last time to the window. (We
	191	* might not be able to.)
	192	*/
	193	for (i = 0; i < st->npending; i++) {
	194	unsigned char foo[HASHCHARS];
	195	int j;
	196	if (len + st->npending - i < HASHCHARS) {
	197	/* Update the pending array. */
	198	for (j = i; j < st->npending; j++)
	199	st->pending[j - i] = st->pending[j];
	200	break;
	201	}
	202	for (j = 0; j < HASHCHARS; j++)
	203	foo[j] = (i + j < st->npending ? st->pending[i + j] :
	204	data[i + j - st->npending]);
	205	lz77_advance(st, foo[0], lz77_hash(foo));
	206	}
	207	st->npending -= i;
	208
	209	defermatch.len = 0;
	210	deferchr = '\0';
	211	while (len > 0) {
	212
	213	/* Don't even look for a match, if we're not compressing. */
	214	if (compress && len >= HASHCHARS) {
	215	/*
	216	* Hash the next few characters.
	217	*/
	218	hash = lz77_hash(data);
	219
	220	/*
	221	* Look the hash up in the corresponding hash chain and see
	222	* what we can find.
	223	*/
	224	nmatch = 0;
	225	for (off = st->hashtab[hash].first;
	226	off != INVALID; off = st->win[off].next) {
	227	/* distance = 1 if off == st->winpos-1 */
	228	/* distance = WINSIZE if off == st->winpos */
	229	distance =
	230	WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE;
	231	for (i = 0; i < HASHCHARS; i++)
	232	if (CHARAT(i) != CHARAT(i - distance))
	233	break;
	234	if (i == HASHCHARS) {
	235	matches[nmatch].distance = distance;
	236	matches[nmatch].len = 3;
	237	if (++nmatch >= MAXMATCH)
	238	break;
	239	}
	240	}
	241	} else {
	242	nmatch = 0;
	243	hash = INVALID;
	244	}
	245
	246	if (nmatch > 0) {
	247	/*
	248	* We've now filled up matches[] with nmatch potential
	249	* matches. Follow them down to find the longest. (We
	250	* assume here that it's always worth favouring a
	251	* longer match over a shorter one.)
	252	*/
	253	matchlen = HASHCHARS;
	254	while (matchlen < len) {
	255	int j;
	256	for (i = j = 0; i < nmatch; i++) {
	257	if (CHARAT(matchlen) ==
	258	CHARAT(matchlen - matches[i].distance)) {
	259	matches[j++] = matches[i];
	260	}
	261	}
	262	if (j == 0)
	263	break;
	264	matchlen++;
	265	nmatch = j;
	266	}
	267
	268	/*
	269	* We've now got all the longest matches. We favour the
	270	* shorter distances, which means we go with matches[0].
	271	* So see if we want to defer it or throw it away.
	272	*/
	273	matches[0].len = matchlen;
	274	if (defermatch.len > 0) {
	275	if (matches[0].len > defermatch.len + 1) {
	276	/* We have a better match. Emit the deferred char,
	277	* and defer this match. */
	278	ctx->literal(ctx, (unsigned char) deferchr);
	279	defermatch = matches[0];
	280	deferchr = data[0];
	281	advance = 1;
	282	} else {
	283	/* We don't have a better match. Do the deferred one. */
	284	ctx->match(ctx, defermatch.distance, defermatch.len);
	285	advance = defermatch.len - 1;
	286	defermatch.len = 0;
	287	}
	288	} else {
	289	/* There was no deferred match. Defer this one. */
	290	defermatch = matches[0];
	291	deferchr = data[0];
	292	advance = 1;
	293	}
	294	} else {
	295	/*
	296	* We found no matches. Emit the deferred match, if
	297	* any; otherwise emit a literal.
	298	*/
	299	if (defermatch.len > 0) {
	300	ctx->match(ctx, defermatch.distance, defermatch.len);
	301	advance = defermatch.len - 1;
	302	defermatch.len = 0;
	303	} else {
	304	ctx->literal(ctx, data[0]);
	305	advance = 1;
	306	}
	307	}
	308
	309	/*
	310	* Now advance the position by `advance' characters,
	311	* keeping the window and hash chains consistent.
	312	*/
	313	while (advance > 0) {
	314	if (len >= HASHCHARS) {
	315	lz77_advance(st, *data, lz77_hash(data));
	316	} else {
	317	st->pending[st->npending++] = *data;
	318	}
	319	data++;
	320	len--;
	321	advance--;
	322	}
	323	}
	324	}
	325
	326	/* ----------------------------------------------------------------------
	327	* Zlib compression. We always use the static Huffman tree option.
	328	* Mostly this is because it's hard to scan a block in advance to
	329	* work out better trees; dynamic trees are great when you're
	330	* compressing a large file under no significant time constraint,
	331	* but when you're compressing little bits in real time, things get
	332	* hairier.
	333	*
	334	* I suppose it's possible that I could compute Huffman trees based
	335	* on the frequencies in the _previous_ block, as a sort of
	336	* heuristic, but I'm not confident that the gain would balance out
	337	* having to transmit the trees.
	338	*/
	339
	340	struct Outbuf {
	341	unsigned char *outbuf;
	342	int outlen, outsize;
	343	unsigned long outbits;
	344	int noutbits;
	345	int firstblock;
	346	int comp_disabled;
	347	};
	348
	349	static void outbits(struct Outbuf *out, unsigned long bits, int nbits)
	350	{
	351	assert(out->noutbits + nbits <= 32);
	352	out->outbits \|= bits << out->noutbits;
	353	out->noutbits += nbits;
	354	while (out->noutbits >= 8) {
	355	if (out->outlen >= out->outsize) {
	356	out->outsize = out->outlen + 64;
	357	out->outbuf = sresize(out->outbuf, out->outsize, unsigned char);
	358	}
	359	out->outbuf[out->outlen++] = (unsigned char) (out->outbits & 0xFF);
	360	out->outbits >>= 8;
	361	out->noutbits -= 8;
	362	}
	363	}
	364
	365	static const unsigned char mirrorbytes[256] = {
	366	0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
	367	0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
	368	0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
	369	0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
	370	0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
	371	0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
	372	0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
	373	0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
	374	0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
	375	0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
	376	0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
	377	0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
	378	0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
	379	0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
	380	0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
	381	0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
	382	0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
	383	0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
	384	0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
	385	0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
	386	0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
	387	0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
	388	0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
	389	0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
	390	0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
	391	0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
	392	0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
	393	0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
	394	0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
	395	0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
	396	0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
	397	0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
	398	};
	399
	400	typedef struct {
	401	short code, extrabits;
	402	int min, max;
	403	} coderecord;
	404
	405	static const coderecord lencodes[] = {
	406	{257, 0, 3, 3},
	407	{258, 0, 4, 4},
	408	{259, 0, 5, 5},
	409	{260, 0, 6, 6},
	410	{261, 0, 7, 7},
	411	{262, 0, 8, 8},
	412	{263, 0, 9, 9},
	413	{264, 0, 10, 10},
	414	{265, 1, 11, 12},
	415	{266, 1, 13, 14},
	416	{267, 1, 15, 16},
	417	{268, 1, 17, 18},
	418	{269, 2, 19, 22},
	419	{270, 2, 23, 26},
	420	{271, 2, 27, 30},
	421	{272, 2, 31, 34},
	422	{273, 3, 35, 42},
	423	{274, 3, 43, 50},
	424	{275, 3, 51, 58},
	425	{276, 3, 59, 66},
	426	{277, 4, 67, 82},
	427	{278, 4, 83, 98},
	428	{279, 4, 99, 114},
	429	{280, 4, 115, 130},
	430	{281, 5, 131, 162},
	431	{282, 5, 163, 194},
	432	{283, 5, 195, 226},
	433	{284, 5, 227, 257},
	434	{285, 0, 258, 258},
	435	};
	436
	437	static const coderecord distcodes[] = {
	438	{0, 0, 1, 1},
	439	{1, 0, 2, 2},
	440	{2, 0, 3, 3},
	441	{3, 0, 4, 4},
	442	{4, 1, 5, 6},
	443	{5, 1, 7, 8},
	444	{6, 2, 9, 12},
	445	{7, 2, 13, 16},
	446	{8, 3, 17, 24},
	447	{9, 3, 25, 32},
	448	{10, 4, 33, 48},
	449	{11, 4, 49, 64},
	450	{12, 5, 65, 96},
	451	{13, 5, 97, 128},
	452	{14, 6, 129, 192},
	453	{15, 6, 193, 256},
	454	{16, 7, 257, 384},
	455	{17, 7, 385, 512},
	456	{18, 8, 513, 768},
	457	{19, 8, 769, 1024},
	458	{20, 9, 1025, 1536},
	459	{21, 9, 1537, 2048},
	460	{22, 10, 2049, 3072},
	461	{23, 10, 3073, 4096},
	462	{24, 11, 4097, 6144},
	463	{25, 11, 6145, 8192},
	464	{26, 12, 8193, 12288},
	465	{27, 12, 12289, 16384},
	466	{28, 13, 16385, 24576},
	467	{29, 13, 24577, 32768},
	468	};
	469
	470	static void zlib_literal(struct LZ77Context *ectx, unsigned char c)
	471	{
	472	struct Outbuf out = (struct Outbuf ) ectx->userdata;
	473
	474	if (out->comp_disabled) {
	475	/*
	476	* We're in an uncompressed block, so just output the byte.
	477	*/
	478	outbits(out, c, 8);
	479	return;
	480	}
	481
	482	if (c <= 143) {
	483	/* 0 through 143 are 8 bits long starting at 00110000. */
	484	outbits(out, mirrorbytes[0x30 + c], 8);
	485	} else {
	486	/* 144 through 255 are 9 bits long starting at 110010000. */
	487	outbits(out, 1 + 2 * mirrorbytes[0x90 - 144 + c], 9);
	488	}
	489	}
	490
	491	static void zlib_match(struct LZ77Context *ectx, int distance, int len)
	492	{
	493	const coderecord d, l;
	494	int i, j, k;
	495	struct Outbuf out = (struct Outbuf ) ectx->userdata;
	496
	497	assert(!out->comp_disabled);
	498
	499	while (len > 0) {
	500	int thislen;
	501
	502	/*
	503	* We can transmit matches of lengths 3 through 258
	504	* inclusive. So if len exceeds 258, we must transmit in
	505	* several steps, with 258 or less in each step.
	506	*
	507	* Specifically: if len >= 261, we can transmit 258 and be
	508	* sure of having at least 3 left for the next step. And if
	509	* len <= 258, we can just transmit len. But if len == 259
	510	* or 260, we must transmit len-3.
	511	*/
	512	thislen = (len > 260 ? 258 : len <= 258 ? len : len - 3);
	513	len -= thislen;
	514
	515	/*
	516	* Binary-search to find which length code we're
	517	* transmitting.
	518	*/
	519	i = -1;
	520	j = sizeof(lencodes) / sizeof(*lencodes);
	521	while (1) {
	522	assert(j - i >= 2);
	523	k = (j + i) / 2;
	524	if (thislen < lencodes[k].min)
	525	j = k;
	526	else if (thislen > lencodes[k].max)
	527	i = k;
	528	else {
	529	l = &lencodes[k];
	530	break; /* found it! */
	531	}
	532	}
	533
	534	/*
	535	* Transmit the length code. 256-279 are seven bits
	536	* starting at 0000000; 280-287 are eight bits starting at
	537	* 11000000.
	538	*/
	539	if (l->code <= 279) {
	540	outbits(out, mirrorbytes[(l->code - 256) * 2], 7);
	541	} else {
	542	outbits(out, mirrorbytes[0xc0 - 280 + l->code], 8);
	543	}
	544
	545	/*
	546	* Transmit the extra bits.
	547	*/
	548	if (l->extrabits)
	549	outbits(out, thislen - l->min, l->extrabits);
	550
	551	/*
	552	* Binary-search to find which distance code we're
	553	* transmitting.
	554	*/
	555	i = -1;
	556	j = sizeof(distcodes) / sizeof(*distcodes);
	557	while (1) {
	558	assert(j - i >= 2);
	559	k = (j + i) / 2;
	560	if (distance < distcodes[k].min)
	561	j = k;
	562	else if (distance > distcodes[k].max)
	563	i = k;
	564	else {
	565	d = &distcodes[k];
	566	break; /* found it! */
	567	}
	568	}
	569
	570	/*
	571	* Transmit the distance code. Five bits starting at 00000.
	572	*/
	573	outbits(out, mirrorbytes[d->code * 8], 5);
	574
	575	/*
	576	* Transmit the extra bits.
	577	*/
	578	if (d->extrabits)
	579	outbits(out, distance - d->min, d->extrabits);
	580	}
	581	}
	582
	583	void *zlib_compress_init(void)
	584	{
	585	struct Outbuf *out;
	586	struct LZ77Context *ectx = snew(struct LZ77Context);
	587
	588	lz77_init(ectx);
	589	ectx->literal = zlib_literal;
	590	ectx->match = zlib_match;
	591
	592	out = snew(struct Outbuf);
	593	out->outbits = out->noutbits = 0;
	594	out->firstblock = 1;
	595	out->comp_disabled = FALSE;
	596	ectx->userdata = out;
	597
	598	return ectx;
	599	}
	600
	601	void zlib_compress_cleanup(void *handle)
	602	{
	603	struct LZ77Context ectx = (struct LZ77Context )handle;
	604	sfree(ectx->userdata);
	605	}
	606
	607	/*
	608	* Turn off actual LZ77 analysis for one block, to facilitate
	609	* construction of a precise-length IGNORE packet. Returns the
	610	* length adjustment (which is only valid for packets < 65536
	611	* bytes, but that seems reasonable enough).
	612	*/
	613	static int zlib_disable_compression(void *handle)
	614	{
	615	struct LZ77Context ectx = (struct LZ77Context )handle;
	616	struct Outbuf out = (struct Outbuf ) ectx->userdata;
	617	int n;
	618
	619	out->comp_disabled = TRUE;
	620
	621	n = 0;
	622	/*
	623	* If this is the first block, we will start by outputting two
	624	* header bytes, and then three bits to begin an uncompressed
	625	* block. This will cost three bytes (because we will start on
	626	* a byte boundary, this is certain).
	627	*/
	628	if (out->firstblock) {
	629	n = 3;
	630	} else {
	631	/*
	632	* Otherwise, we will output seven bits to close the
	633	* previous static block, and _then_ three bits to begin an
	634	* uncompressed block, and then flush the current byte.
	635	* This may cost two bytes or three, depending on noutbits.
	636	*/
	637	n += (out->noutbits + 10) / 8;
	638	}
	639
	640	/*
	641	* Now we output four bytes for the length / ~length pair in
	642	* the uncompressed block.
	643	*/
	644	n += 4;
	645
	646	return n;
	647	}
	648
	649	int zlib_compress_block(void handle, unsigned char block, int len,
	650	unsigned char *outblock, int outlen)
	651	{
	652	struct LZ77Context ectx = (struct LZ77Context )handle;
	653	struct Outbuf out = (struct Outbuf ) ectx->userdata;
	654	int in_block;
	655
	656	out->outbuf = NULL;
	657	out->outlen = out->outsize = 0;
	658
	659	/*
	660	* If this is the first block, output the Zlib (RFC1950) header
	661	* bytes 78 9C. (Deflate compression, 32K window size, default
	662	* algorithm.)
	663	*/
	664	if (out->firstblock) {
	665	outbits(out, 0x9C78, 16);
	666	out->firstblock = 0;
	667
	668	in_block = FALSE;
	669	} else
	670	in_block = TRUE;
	671
	672	if (out->comp_disabled) {
	673	if (in_block)
	674	outbits(out, 0, 7); /* close static block */
	675
	676	while (len > 0) {
	677	int blen = (len < 65535 ? len : 65535);
	678
	679	/*
	680	* Start a Deflate (RFC1951) uncompressed block. We
	681	* transmit a zero bit (BFINAL=0), followed by a zero
	682	* bit and a one bit (BTYPE=00). Of course these are in
	683	* the wrong order (00 0).
	684	*/
	685	outbits(out, 0, 3);
	686
	687	/*
	688	* Output zero bits to align to a byte boundary.
	689	*/
	690	if (out->noutbits)
	691	outbits(out, 0, 8 - out->noutbits);
	692
	693	/*
	694	* Output the block length, and then its one's
	695	* complement. They're little-endian, so all we need to
	696	* do is pass them straight to outbits() with bit count
	697	* 16.
	698	*/
	699	outbits(out, blen, 16);
	700	outbits(out, blen ^ 0xFFFF, 16);
	701
	702	/*
	703	* Do the `compression': we need to pass the data to
	704	* lz77_compress so that it will be taken into account
	705	* for subsequent (distance,length) pairs. But
	706	* lz77_compress is passed FALSE, which means it won't
	707	* actually find (or even look for) any matches; so
	708	* every character will be passed straight to
	709	* zlib_literal which will spot out->comp_disabled and
	710	* emit in the uncompressed format.
	711	*/
	712	lz77_compress(ectx, block, blen, FALSE);
	713
	714	len -= blen;
	715	block += blen;
	716	}
	717	outbits(out, 2, 3); /* open new block */
	718	} else {
	719	if (!in_block) {
	720	/*
	721	* Start a Deflate (RFC1951) fixed-trees block. We
	722	* transmit a zero bit (BFINAL=0), followed by a zero
	723	* bit and a one bit (BTYPE=01). Of course these are in
	724	* the wrong order (01 0).
	725	*/
	726	outbits(out, 2, 3);
	727	}
	728
	729	/*
	730	* Do the compression.
	731	*/
	732	lz77_compress(ectx, block, len, TRUE);
	733
	734	/*
	735	* End the block (by transmitting code 256, which is
	736	* 0000000 in fixed-tree mode), and transmit some empty
	737	* blocks to ensure we have emitted the byte containing the
	738	* last piece of genuine data. There are three ways we can
	739	* do this:
	740	*
	741	* - Minimal flush. Output end-of-block and then open a
	742	* new static block. This takes 9 bits, which is
	743	* guaranteed to flush out the last genuine code in the
	744	* closed block; but allegedly zlib can't handle it.
	745	*
	746	* - Zlib partial flush. Output EOB, open and close an
	747	* empty static block, and _then_ open the new block.
	748	* This is the best zlib can handle.
	749	*
	750	* - Zlib sync flush. Output EOB, then an empty
	751	* _uncompressed_ block (000, then sync to byte
	752	* boundary, then send bytes 00 00 FF FF). Then open the
	753	* new block.
	754	*
	755	* For the moment, we will use Zlib partial flush.
	756	*/
	757	outbits(out, 0, 7); /* close block */
	758	outbits(out, 2, 3 + 7); /* empty static block */
	759	outbits(out, 2, 3); /* open new block */
	760	}
	761
	762	out->comp_disabled = FALSE;
	763
	764	*outblock = out->outbuf;
	765	*outlen = out->outlen;
	766
	767	return 1;
	768	}
	769
	770	/* ----------------------------------------------------------------------
	771	* Zlib decompression. Of course, even though our compressor always
	772	* uses static trees, our _decompressor_ has to be capable of
	773	* handling dynamic trees if it sees them.
	774	*/
	775
	776	/*
	777	* The way we work the Huffman decode is to have a table lookup on
	778	* the first N bits of the input stream (in the order they arrive,
	779	* of course, i.e. the first bit of the Huffman code is in bit 0).
	780	* Each table entry lists the number of bits to consume, plus
	781	* either an output code or a pointer to a secondary table.
	782	*/
	783	struct zlib_table;
	784	struct zlib_tableentry;
	785
	786	struct zlib_tableentry {
	787	unsigned char nbits;
	788	short code;
	789	struct zlib_table *nexttable;
	790	};
	791
	792	struct zlib_table {
	793	int mask; /* mask applied to input bit stream */
	794	struct zlib_tableentry *table;
	795	};
	796
	797	#define MAXCODELEN 16
	798	#define MAXSYMS 288
	799
	800	/*
	801	* Build a single-level decode table for elements
	802	* [minlength,maxlength) of the provided code/length tables, and
	803	* recurse to build subtables.
	804	*/
	805	static struct zlib_table zlib_mkonetab(int codes, unsigned char *lengths,
	806	int nsyms,
	807	int pfx, int pfxbits, int bits)
	808	{
	809	struct zlib_table *tab = snew(struct zlib_table);
	810	int pfxmask = (1 << pfxbits) - 1;
	811	int nbits, i, j, code;
	812
	813	tab->table = snewn(1 << bits, struct zlib_tableentry);
	814	tab->mask = (1 << bits) - 1;
	815
	816	for (code = 0; code <= tab->mask; code++) {
	817	tab->table[code].code = -1;
	818	tab->table[code].nbits = 0;
	819	tab->table[code].nexttable = NULL;
	820	}
	821
	822	for (i = 0; i < nsyms; i++) {
	823	if (lengths[i] <= pfxbits \|\| (codes[i] & pfxmask) != pfx)
	824	continue;
	825	code = (codes[i] >> pfxbits) & tab->mask;
	826	for (j = code; j <= tab->mask; j += 1 << (lengths[i] - pfxbits)) {
	827	tab->table[j].code = i;
	828	nbits = lengths[i] - pfxbits;
	829	if (tab->table[j].nbits < nbits)
	830	tab->table[j].nbits = nbits;
	831	}
	832	}
	833	for (code = 0; code <= tab->mask; code++) {
	834	if (tab->table[code].nbits <= bits)
	835	continue;
	836	/* Generate a subtable. */
	837	tab->table[code].code = -1;
	838	nbits = tab->table[code].nbits - bits;
	839	if (nbits > 7)
	840	nbits = 7;
	841	tab->table[code].nbits = bits;
	842	tab->table[code].nexttable = zlib_mkonetab(codes, lengths, nsyms,
	843	pfx \| (code << pfxbits),
	844	pfxbits + bits, nbits);
	845	}
	846
	847	return tab;
	848	}
	849
	850	/*
	851	* Build a decode table, given a set of Huffman tree lengths.
	852	*/
	853	static struct zlib_table zlib_mktable(unsigned char lengths,
	854	int nlengths)
	855	{
	856	int count[MAXCODELEN], startcode[MAXCODELEN], codes[MAXSYMS];
	857	int code, maxlen;
	858	int i, j;
	859
	860	/* Count the codes of each length. */
	861	maxlen = 0;
	862	for (i = 1; i < MAXCODELEN; i++)
	863	count[i] = 0;
	864	for (i = 0; i < nlengths; i++) {
	865	count[lengths[i]]++;
	866	if (maxlen < lengths[i])
	867	maxlen = lengths[i];
	868	}
	869	/* Determine the starting code for each length block. */
	870	code = 0;
	871	for (i = 1; i < MAXCODELEN; i++) {
	872	startcode[i] = code;
	873	code += count[i];
	874	code <<= 1;
	875	}
	876	/* Determine the code for each symbol. Mirrored, of course. */
	877	for (i = 0; i < nlengths; i++) {
	878	code = startcode[lengths[i]]++;
	879	codes[i] = 0;
	880	for (j = 0; j < lengths[i]; j++) {
	881	codes[i] = (codes[i] << 1) \| (code & 1);
	882	code >>= 1;
	883	}
	884	}
	885
	886	/*
	887	* Now we have the complete list of Huffman codes. Build a
	888	* table.
	889	*/
	890	return zlib_mkonetab(codes, lengths, nlengths, 0, 0,
	891	maxlen < 9 ? maxlen : 9);
	892	}
	893
	894	static int zlib_freetable(struct zlib_table **ztab)
	895	{
	896	struct zlib_table *tab;
	897	int code;
	898
	899	if (ztab == NULL)
	900	return -1;
	901
	902	if (*ztab == NULL)
	903	return 0;
	904
	905	tab = *ztab;
	906
	907	for (code = 0; code <= tab->mask; code++)
	908	if (tab->table[code].nexttable != NULL)
	909	zlib_freetable(&tab->table[code].nexttable);
	910
	911	sfree(tab->table);
	912	tab->table = NULL;
	913
	914	sfree(tab);
	915	*ztab = NULL;
	916
	917	return (0);
	918	}
	919
	920	struct zlib_decompress_ctx {
	921	struct zlib_table staticlentable, staticdisttable;
	922	struct zlib_table currlentable, currdisttable, *lenlentable;
	923	enum {
	924	START, OUTSIDEBLK,
	925	TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP,
	926	INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM,
	927	UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA
	928	} state;
	929	int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len,
	930	lenrep;
	931	int uncomplen;
	932	unsigned char lenlen[19];
	933	unsigned char lengths[286 + 32];
	934	unsigned long bits;
	935	int nbits;
	936	unsigned char window[WINSIZE];
	937	int winpos;
	938	unsigned char *outblk;
	939	int outlen, outsize;
	940	};
	941
	942	void *zlib_decompress_init(void)
	943	{
	944	struct zlib_decompress_ctx *dctx = snew(struct zlib_decompress_ctx);
	945	unsigned char lengths[288];
	946
	947	memset(lengths, 8, 144);
	948	memset(lengths + 144, 9, 256 - 144);
	949	memset(lengths + 256, 7, 280 - 256);
	950	memset(lengths + 280, 8, 288 - 280);
	951	dctx->staticlentable = zlib_mktable(lengths, 288);
	952	memset(lengths, 5, 32);
	953	dctx->staticdisttable = zlib_mktable(lengths, 32);
	954	dctx->state = START; /* even before header */
	955	dctx->currlentable = dctx->currdisttable = dctx->lenlentable = NULL;
	956	dctx->bits = 0;
	957	dctx->nbits = 0;
	958	dctx->winpos = 0;
	959
	960	return dctx;
	961	}
	962
	963	void zlib_decompress_cleanup(void *handle)
	964	{
	965	struct zlib_decompress_ctx dctx = (struct zlib_decompress_ctx )handle;
	966
	967	if (dctx->currlentable && dctx->currlentable != dctx->staticlentable)
	968	zlib_freetable(&dctx->currlentable);
	969	if (dctx->currdisttable && dctx->currdisttable != dctx->staticdisttable)
	970	zlib_freetable(&dctx->currdisttable);
	971	if (dctx->lenlentable)
	972	zlib_freetable(&dctx->lenlentable);
	973	sfree(dctx);
	974	}
	975
	976	static int zlib_huflookup(unsigned long bitsp, int nbitsp,
	977	struct zlib_table *tab)
	978	{
	979	unsigned long bits = *bitsp;
	980	int nbits = *nbitsp;
	981	while (1) {
	982	struct zlib_tableentry *ent;
	983	ent = &tab->table[bits & tab->mask];
	984	if (ent->nbits > nbits)
	985	return -1; /* not enough data */
	986	bits >>= ent->nbits;
	987	nbits -= ent->nbits;
	988	if (ent->code == -1)
	989	tab = ent->nexttable;
	990	else {
	991	*bitsp = bits;
	992	*nbitsp = nbits;
	993	return ent->code;
	994	}
	995
	996	if (!tab) {
	997	/*
	998	* There was a missing entry in the table, presumably
	999	* due to an invalid Huffman table description, and the
	1000	* subsequent data has attempted to use the missing
	1001	* entry. Return a decoding failure.
	1002	*/
	1003	return -2;
	1004	}
	1005	}
	1006	}
	1007
	1008	static void zlib_emit_char(struct zlib_decompress_ctx *dctx, int c)
	1009	{
	1010	dctx->window[dctx->winpos] = c;
	1011	dctx->winpos = (dctx->winpos + 1) & (WINSIZE - 1);
	1012	if (dctx->outlen >= dctx->outsize) {
	1013	dctx->outsize = dctx->outlen + 512;
	1014	dctx->outblk = sresize(dctx->outblk, dctx->outsize, unsigned char);
	1015	}
	1016	dctx->outblk[dctx->outlen++] = c;
	1017	}
	1018
	1019	#define EATBITS(n) ( dctx->nbits -= (n), dctx->bits >>= (n) )
	1020
	1021	int zlib_decompress_block(void handle, unsigned char block, int len,
	1022	unsigned char *outblock, int outlen)
	1023	{
	1024	struct zlib_decompress_ctx dctx = (struct zlib_decompress_ctx )handle;
	1025	const coderecord *rec;
	1026	int code, blktype, rep, dist, nlen;
	1027	static const unsigned char lenlenmap[] = {
	1028	16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
	1029	};
	1030
	1031	dctx->outblk = NULL;
	1032	dctx->outsize = dctx->outlen = 0;
	1033
	1034	while (len > 0 \|\| dctx->nbits > 0) {
	1035	while (dctx->nbits < 24 && len > 0) {
	1036	dctx->bits \|= (*block++) << dctx->nbits;
	1037	dctx->nbits += 8;
	1038	len--;
	1039	}
	1040	switch (dctx->state) {
	1041	case START:
	1042	/* Expect 16-bit zlib header, which we'll dishonourably ignore. */
	1043	if (dctx->nbits < 16)
	1044	goto finished; /* done all we can */
	1045	EATBITS(16);
	1046	dctx->state = OUTSIDEBLK;
	1047	break;
	1048	case OUTSIDEBLK:
	1049	/* Expect 3-bit block header. */
	1050	if (dctx->nbits < 3)
	1051	goto finished; /* done all we can */
	1052	EATBITS(1);
	1053	blktype = dctx->bits & 3;
	1054	EATBITS(2);
	1055	if (blktype == 0) {
	1056	int to_eat = dctx->nbits & 7;
	1057	dctx->state = UNCOMP_LEN;
	1058	EATBITS(to_eat); /* align to byte boundary */
	1059	} else if (blktype == 1) {
	1060	dctx->currlentable = dctx->staticlentable;
	1061	dctx->currdisttable = dctx->staticdisttable;
	1062	dctx->state = INBLK;
	1063	} else if (blktype == 2) {
	1064	dctx->state = TREES_HDR;
	1065	}
	1066	break;
	1067	case TREES_HDR:
	1068	/*
	1069	* Dynamic block header. Five bits of HLIT, five of
	1070	* HDIST, four of HCLEN.
	1071	*/
	1072	if (dctx->nbits < 5 + 5 + 4)
	1073	goto finished; /* done all we can */
	1074	dctx->hlit = 257 + (dctx->bits & 31);
	1075	EATBITS(5);
	1076	dctx->hdist = 1 + (dctx->bits & 31);
	1077	EATBITS(5);
	1078	dctx->hclen = 4 + (dctx->bits & 15);
	1079	EATBITS(4);
	1080	dctx->lenptr = 0;
	1081	dctx->state = TREES_LENLEN;
	1082	memset(dctx->lenlen, 0, sizeof(dctx->lenlen));
	1083	break;
	1084	case TREES_LENLEN:
	1085	if (dctx->nbits < 3)
	1086	goto finished;
	1087	while (dctx->lenptr < dctx->hclen && dctx->nbits >= 3) {
	1088	dctx->lenlen[lenlenmap[dctx->lenptr++]] =
	1089	(unsigned char) (dctx->bits & 7);
	1090	EATBITS(3);
	1091	}
	1092	if (dctx->lenptr == dctx->hclen) {
	1093	dctx->lenlentable = zlib_mktable(dctx->lenlen, 19);
	1094	dctx->state = TREES_LEN;
	1095	dctx->lenptr = 0;
	1096	}
	1097	break;
	1098	case TREES_LEN:
	1099	if (dctx->lenptr >= dctx->hlit + dctx->hdist) {
	1100	dctx->currlentable = zlib_mktable(dctx->lengths, dctx->hlit);
	1101	dctx->currdisttable = zlib_mktable(dctx->lengths + dctx->hlit,
	1102	dctx->hdist);
	1103	zlib_freetable(&dctx->lenlentable);
	1104	dctx->lenlentable = NULL;
	1105	dctx->state = INBLK;
	1106	break;
	1107	}
	1108	code =
	1109	zlib_huflookup(&dctx->bits, &dctx->nbits, dctx->lenlentable);
	1110	if (code == -1)
	1111	goto finished;
	1112	if (code == -2)
	1113	goto decode_error;
	1114	if (code < 16)
	1115	dctx->lengths[dctx->lenptr++] = code;
	1116	else {
	1117	dctx->lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7);
	1118	dctx->lenaddon = (code == 18 ? 11 : 3);
	1119	dctx->lenrep = (code == 16 && dctx->lenptr > 0 ?
	1120	dctx->lengths[dctx->lenptr - 1] : 0);
	1121	dctx->state = TREES_LENREP;
	1122	}
	1123	break;
	1124	case TREES_LENREP:
	1125	if (dctx->nbits < dctx->lenextrabits)
	1126	goto finished;
	1127	rep =
	1128	dctx->lenaddon +
	1129	(dctx->bits & ((1 << dctx->lenextrabits) - 1));
	1130	EATBITS(dctx->lenextrabits);
	1131	while (rep > 0 && dctx->lenptr < dctx->hlit + dctx->hdist) {
	1132	dctx->lengths[dctx->lenptr] = dctx->lenrep;
	1133	dctx->lenptr++;
	1134	rep--;
	1135	}
	1136	dctx->state = TREES_LEN;
	1137	break;
	1138	case INBLK:
	1139	code =
	1140	zlib_huflookup(&dctx->bits, &dctx->nbits, dctx->currlentable);
	1141	if (code == -1)
	1142	goto finished;
	1143	if (code == -2)
	1144	goto decode_error;
	1145	if (code < 256)
	1146	zlib_emit_char(dctx, code);
	1147	else if (code == 256) {
	1148	dctx->state = OUTSIDEBLK;
	1149	if (dctx->currlentable != dctx->staticlentable) {
	1150	zlib_freetable(&dctx->currlentable);
	1151	dctx->currlentable = NULL;
	1152	}
	1153	if (dctx->currdisttable != dctx->staticdisttable) {
	1154	zlib_freetable(&dctx->currdisttable);
	1155	dctx->currdisttable = NULL;
	1156	}
	1157	} else if (code < 286) { /* static tree can give >285; ignore */
	1158	dctx->state = GOTLENSYM;
	1159	dctx->sym = code;
	1160	}
	1161	break;
	1162	case GOTLENSYM:
	1163	rec = &lencodes[dctx->sym - 257];
	1164	if (dctx->nbits < rec->extrabits)
	1165	goto finished;
	1166	dctx->len =
	1167	rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
	1168	EATBITS(rec->extrabits);
	1169	dctx->state = GOTLEN;
	1170	break;
	1171	case GOTLEN:
	1172	code =
	1173	zlib_huflookup(&dctx->bits, &dctx->nbits,
	1174	dctx->currdisttable);
	1175	if (code == -1)
	1176	goto finished;
	1177	if (code == -2)
	1178	goto decode_error;
	1179	dctx->state = GOTDISTSYM;
	1180	dctx->sym = code;
	1181	break;
	1182	case GOTDISTSYM:
	1183	rec = &distcodes[dctx->sym];
	1184	if (dctx->nbits < rec->extrabits)
	1185	goto finished;
	1186	dist = rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
	1187	EATBITS(rec->extrabits);
	1188	dctx->state = INBLK;
	1189	while (dctx->len--)
	1190	zlib_emit_char(dctx, dctx->window[(dctx->winpos - dist) &
	1191	(WINSIZE - 1)]);
	1192	break;
	1193	case UNCOMP_LEN:
	1194	/*
	1195	* Uncompressed block. We expect to see a 16-bit LEN.
	1196	*/
	1197	if (dctx->nbits < 16)
	1198	goto finished;
	1199	dctx->uncomplen = dctx->bits & 0xFFFF;
	1200	EATBITS(16);
	1201	dctx->state = UNCOMP_NLEN;
	1202	break;
	1203	case UNCOMP_NLEN:
	1204	/*
	1205	* Uncompressed block. We expect to see a 16-bit NLEN,
	1206	* which should be the one's complement of the previous
	1207	* LEN.
	1208	*/
	1209	if (dctx->nbits < 16)
	1210	goto finished;
	1211	nlen = dctx->bits & 0xFFFF;
	1212	EATBITS(16);
	1213	if (dctx->uncomplen == 0)
	1214	dctx->state = OUTSIDEBLK; /* block is empty */
	1215	else
	1216	dctx->state = UNCOMP_DATA;
	1217	break;
	1218	case UNCOMP_DATA:
	1219	if (dctx->nbits < 8)
	1220	goto finished;
	1221	zlib_emit_char(dctx, dctx->bits & 0xFF);
	1222	EATBITS(8);
	1223	if (--dctx->uncomplen == 0)
	1224	dctx->state = OUTSIDEBLK; /* end of uncompressed block */
	1225	break;
	1226	}
	1227	}
	1228
	1229	finished:
	1230	*outblock = dctx->outblk;
	1231	*outlen = dctx->outlen;
	1232	return 1;
	1233
	1234	decode_error:
	1235	sfree(dctx->outblk);
	1236	*outblock = dctx->outblk = NULL;
	1237	*outlen = 0;
	1238	return 0;
	1239	}
	1240
	1241	const struct ssh_compress ssh_zlib = {
	1242	"zlib",
	1243	zlib_compress_init,
	1244	zlib_compress_cleanup,
	1245	zlib_compress_block,
	1246	zlib_decompress_init,
	1247	zlib_decompress_cleanup,
	1248	zlib_decompress_block,
	1249	zlib_disable_compression,
	1250	"zlib (RFC1950)"
	1251	};