From 7e2417cca16da830a1d0898380b256a77ec04de7 Mon Sep 17 00:00:00 2001 From: ben Date: Thu, 30 Nov 2006 23:19:00 +0000 Subject: [PATCH] Add support for compressed PDF streams, using Simon's new deflate library. git-svn-id: svn://svn.tartarus.org/sgt/halibut@6931 cda61777-01e9-0310-a592-d414129be87e --- Makefile | 3 +- bk_pdf.c | 24 +- deflate.c | 2202 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ deflate.h | 108 +++ halibut.h | 1 + misc.c | 7 +- 6 files changed, 2333 insertions(+), 12 deletions(-) create mode 100644 deflate.c create mode 100644 deflate.h diff --git a/Makefile b/Makefile index c38defc..a193620 100644 --- a/Makefile +++ b/Makefile @@ -72,6 +72,7 @@ else # The `real' makefile part. CFLAGS += -Wall -W -ansi -pedantic +LIBS += -lm ifdef TEST CFLAGS += -DLOGALLOC @@ -108,7 +109,7 @@ include $(LIBCHARSET_SRCDIR)Makefile MODULES := main malloc ustring error help licence version misc tree234 MODULES += input in_afm in_pf keywords contents index biblio MODULES += bk_text bk_html bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf -MODULES += winhelp psdata wcwidth +MODULES += winhelp deflate psdata wcwidth OBJECTS := $(addsuffix .o,$(MODULES)) $(LIBCHARSET_OBJS) DEPS := $(addsuffix .d,$(MODULES)) diff --git a/bk_pdf.c b/bk_pdf.c index 5db36bf..71307d2 100644 --- a/bk_pdf.c +++ b/bk_pdf.c @@ -5,6 +5,7 @@ #include #include "halibut.h" #include "paper.h" +#include "deflate.h" #define TREE_BRANCH 2 /* max branching factor in page tree */ @@ -472,12 +473,19 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, for (o = olist.head; o; o = o->next) { rdstringc rs = {0, 0, NULL}; char text[80]; + deflate_compress_ctx *zcontext; + void *zbuf; + int zlen; sprintf(text, "%d 0 obj\n", o->number); rdaddsc(&rs, text); if (!o->main.text && o->stream.text) { - sprintf(text, "<<\n/Length %d\n>>\n", o->stream.pos); + zcontext = deflate_compress_new(DEFLATE_TYPE_ZLIB); + deflate_compress_data(zcontext, o->stream.text, o->stream.pos, + DEFLATE_END_OF_DATA, &zbuf, &zlen); + deflate_compress_free(zcontext); + sprintf(text, "<<\n/Filter/FlateDecode\n/Length %d\n>>\n", zlen); rdaddsc(&o->main, text); } @@ -489,15 +497,11 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, rdaddc(&rs, '\n'); if (o->stream.text) { - /* - * FIXME: If we ever start compressing stream data then - * it will have zero bytes in it, so we'll have to be - * more careful than this. - */ rdaddsc(&rs, "stream\n"); - rdaddsc(&rs, o->stream.text); + rdaddsn(&rs, zbuf, zlen); rdaddsc(&rs, "\nendstream\n"); sfree(o->stream.text); + sfree(zbuf); } rdaddsc(&rs, "endobj\n"); @@ -518,9 +522,11 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, /* * Header. I'm going to put the version IDs in the header as - * well, simply in PDF comments. + * well, simply in PDF comments. The PDF Reference also suggests + * that binary PDF files contain four top-bit-set characters in + * the second line. */ - fileoff = fprintf(fp, "%%PDF-1.3\n"); + fileoff = fprintf(fp, "%%PDF-1.3\n%% L\xc3\xba\xc3\xb0""a\n"); for (p = sourceform; p; p = p->next) if (p->type == para_VersionID) fileoff += pdf_versionid(fp, p->words); diff --git a/deflate.c b/deflate.c new file mode 100644 index 0000000..b59df00 --- /dev/null +++ b/deflate.c @@ -0,0 +1,2202 @@ +/* + * Reimplementation of Deflate (RFC1951) compression. Adapted from + * the version in PuTTY, and extended to write dynamic Huffman + * trees and choose block boundaries usefully. + */ + +/* + * TODO: + * + * - Feature: it would probably be useful to add a third format + * type to read and write actual gzip files. + * + * - Feature: the decompress function should return error codes + * indicating what kind of thing went wrong in a decoding error + * situation, possibly even including a file pointer. I envisage + * an enum of error codes in the header file, and one of those + * nasty preprocessor tricks to permit a user to define a + * code-to-text mapping array. + * + * - Feature: could do with forms of flush other than SYNC_FLUSH. + * I'm not sure exactly how those work when you don't know in + * advance that your next block will be static (as we did in + * PuTTY). And remember the 9-bit limitation of zlib. + * + * - Compression quality: introduce the option of choosing a + * static block instead of a dynamic one, where that's more + * efficient. + * + * - Compression quality: the actual LZ77 engine appears to be + * unable to track a match going beyond the input data passed to + * it in a single call. I'd prefer it to be more restartable + * than that: we ought to be able to pass in our input data in + * whatever size blocks happen to be convenient and not affect + * the output at all. + * + * - Compression quality: chooseblock() appears to be computing + * wildly inaccurate block size estimates. Possible resolutions: + * + find and fix some trivial bug I haven't spotted yet + * + abandon the entropic approximation and go with trial + * Huffman runs + * + * - Compression quality: see if increasing SYMLIMIT causes + * dynamic blocks to start being consistently smaller than it. + * + * - Compression quality: we ought to be able to fall right back + * to actual uncompressed blocks if really necessary, though + * it's not clear what the criterion for doing so would be. + * + * - Performance: chooseblock() is currently computing the whole + * entropic approximation for every possible block size. It + * ought to be able to update it incrementally as it goes along + * (assuming of course we don't jack it all in and go for a + * proper Huffman analysis). + */ + +/* + * This software is copyright 2000-2006 Simon Tatham. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "deflate.h" + +#define snew(type) ( (type *) malloc(sizeof(type)) ) +#define snewn(n, type) ( (type *) malloc((n) * sizeof(type)) ) +#define sresize(x, n, type) ( (type *) realloc((x), (n) * sizeof(type)) ) +#define sfree(x) ( free((x)) ) + +#define lenof(x) (sizeof((x)) / sizeof(*(x))) + +#if defined TESTDBG +/* gcc-specific diagnostic macro */ +#define debug_int(x...) ( fprintf(stderr, x) ) +#define debug(x) ( debug_int x ) +#else +#define debug(x) +#endif + +#ifndef FALSE +#define FALSE 0 +#define TRUE (!FALSE) +#endif + +/* ---------------------------------------------------------------------- + * Basic LZ77 code. This bit is designed modularly, so it could be + * ripped out and used in a different LZ77 compressor. Go to it, + * and good luck :-) + */ + +struct LZ77InternalContext; +struct LZ77Context { + struct LZ77InternalContext *ictx; + void *userdata; + void (*literal) (struct LZ77Context * ctx, unsigned char c); + void (*match) (struct LZ77Context * ctx, int distance, int len); +}; + +/* + * Initialise the private fields of an LZ77Context. It's up to the + * user to initialise the public fields. + */ +static int lz77_init(struct LZ77Context *ctx); + +/* + * Supply data to be compressed. Will update the private fields of + * the LZ77Context, and will call literal() and match() to output. + * If `compress' is FALSE, it will never emit a match, but will + * instead call literal() for everything. + */ +static void lz77_compress(struct LZ77Context *ctx, + const unsigned char *data, int len, int compress); + +/* + * Modifiable parameters. + */ +#define WINSIZE 32768 /* window size. Must be power of 2! */ +#define HASHMAX 2039 /* one more than max hash value */ +#define MAXMATCH 32 /* how many matches we track */ +#define HASHCHARS 3 /* how many chars make a hash */ + +/* + * This compressor takes a less slapdash approach than the + * gzip/zlib one. Rather than allowing our hash chains to fall into + * disuse near the far end, we keep them doubly linked so we can + * _find_ the far end, and then every time we add a new byte to the + * window (thus rolling round by one and removing the previous + * byte), we can carefully remove the hash chain entry. + */ + +#define INVALID -1 /* invalid hash _and_ invalid offset */ +struct WindowEntry { + short next, prev; /* array indices within the window */ + short hashval; +}; + +struct HashEntry { + short first; /* window index of first in chain */ +}; + +struct Match { + int distance, len; +}; + +struct LZ77InternalContext { + struct WindowEntry win[WINSIZE]; + unsigned char data[WINSIZE]; + int winpos; + struct HashEntry hashtab[HASHMAX]; + unsigned char pending[HASHCHARS]; + int npending; +}; + +static int lz77_hash(const unsigned char *data) +{ + return (257 * data[0] + 263 * data[1] + 269 * data[2]) % HASHMAX; +} + +static int lz77_init(struct LZ77Context *ctx) +{ + struct LZ77InternalContext *st; + int i; + + st = snew(struct LZ77InternalContext); + if (!st) + return 0; + + ctx->ictx = st; + + for (i = 0; i < WINSIZE; i++) + st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID; + for (i = 0; i < HASHMAX; i++) + st->hashtab[i].first = INVALID; + st->winpos = 0; + + st->npending = 0; + + return 1; +} + +static void lz77_advance(struct LZ77InternalContext *st, + unsigned char c, int hash) +{ + int off; + + /* + * Remove the hash entry at winpos from the tail of its chain, + * or empty the chain if it's the only thing on the chain. + */ + if (st->win[st->winpos].prev != INVALID) { + st->win[st->win[st->winpos].prev].next = INVALID; + } else if (st->win[st->winpos].hashval != INVALID) { + st->hashtab[st->win[st->winpos].hashval].first = INVALID; + } + + /* + * Create a new entry at winpos and add it to the head of its + * hash chain. + */ + st->win[st->winpos].hashval = hash; + st->win[st->winpos].prev = INVALID; + off = st->win[st->winpos].next = st->hashtab[hash].first; + st->hashtab[hash].first = st->winpos; + if (off != INVALID) + st->win[off].prev = st->winpos; + st->data[st->winpos] = c; + + /* + * Advance the window pointer. + */ + st->winpos = (st->winpos + 1) & (WINSIZE - 1); +} + +#define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] ) + +static void lz77_compress(struct LZ77Context *ctx, + const unsigned char *data, int len, int compress) +{ + struct LZ77InternalContext *st = ctx->ictx; + int i, hash, distance, off, nmatch, matchlen, advance; + struct Match defermatch, matches[MAXMATCH]; + int deferchr; + + /* + * Add any pending characters from last time to the window. (We + * might not be able to.) + */ + for (i = 0; i < st->npending; i++) { + unsigned char foo[HASHCHARS]; + int j; + if (len + st->npending - i < HASHCHARS) { + /* Update the pending array. */ + for (j = i; j < st->npending; j++) + st->pending[j - i] = st->pending[j]; + break; + } + for (j = 0; j < HASHCHARS; j++) + foo[j] = (i + j < st->npending ? st->pending[i + j] : + data[i + j - st->npending]); + lz77_advance(st, foo[0], lz77_hash(foo)); + } + st->npending -= i; + + defermatch.len = 0; + deferchr = '\0'; + while (len > 0) { + + /* Don't even look for a match, if we're not compressing. */ + if (compress && len >= HASHCHARS) { + /* + * Hash the next few characters. + */ + hash = lz77_hash(data); + + /* + * Look the hash up in the corresponding hash chain and see + * what we can find. + */ + nmatch = 0; + for (off = st->hashtab[hash].first; + off != INVALID; off = st->win[off].next) { + /* distance = 1 if off == st->winpos-1 */ + /* distance = WINSIZE if off == st->winpos */ + distance = + WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE; + for (i = 0; i < HASHCHARS; i++) + if (CHARAT(i) != CHARAT(i - distance)) + break; + if (i == HASHCHARS) { + matches[nmatch].distance = distance; + matches[nmatch].len = 3; + if (++nmatch >= MAXMATCH) + break; + } + } + } else { + nmatch = 0; + hash = INVALID; + } + + if (nmatch > 0) { + /* + * We've now filled up matches[] with nmatch potential + * matches. Follow them down to find the longest. (We + * assume here that it's always worth favouring a + * longer match over a shorter one.) + */ + matchlen = HASHCHARS; + while (matchlen < len) { + int j; + for (i = j = 0; i < nmatch; i++) { + if (CHARAT(matchlen) == + CHARAT(matchlen - matches[i].distance)) { + matches[j++] = matches[i]; + } + } + if (j == 0) + break; + matchlen++; + nmatch = j; + } + + /* + * We've now got all the longest matches. We favour the + * shorter distances, which means we go with matches[0]. + * So see if we want to defer it or throw it away. + */ + matches[0].len = matchlen; + if (defermatch.len > 0) { + if (matches[0].len > defermatch.len + 1) { + /* We have a better match. Emit the deferred char, + * and defer this match. */ + ctx->literal(ctx, (unsigned char) deferchr); + defermatch = matches[0]; + deferchr = data[0]; + advance = 1; + } else { + /* We don't have a better match. Do the deferred one. */ + ctx->match(ctx, defermatch.distance, defermatch.len); + advance = defermatch.len - 1; + defermatch.len = 0; + } + } else { + /* There was no deferred match. Defer this one. */ + defermatch = matches[0]; + deferchr = data[0]; + advance = 1; + } + } else { + /* + * We found no matches. Emit the deferred match, if + * any; otherwise emit a literal. + */ + if (defermatch.len > 0) { + ctx->match(ctx, defermatch.distance, defermatch.len); + advance = defermatch.len - 1; + defermatch.len = 0; + } else { + ctx->literal(ctx, data[0]); + advance = 1; + } + } + + /* + * Now advance the position by `advance' characters, + * keeping the window and hash chains consistent. + */ + while (advance > 0) { + if (len >= HASHCHARS) { + lz77_advance(st, *data, lz77_hash(data)); + } else { + st->pending[st->npending++] = *data; + } + data++; + len--; + advance--; + } + } +} + +/* ---------------------------------------------------------------------- + * Deflate functionality common to both compression and decompression. + */ + +static const unsigned char lenlenmap[] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; + +#define MAXCODELEN 16 + +/* + * Given a sequence of Huffman code lengths, compute the actual + * codes, in the final form suitable for feeding to outbits (i.e. + * already bit-mirrored). + * + * Returns the maximum code length found. + */ +static int hufcodes(const unsigned char *lengths, int *codes, int nsyms) +{ + int count[MAXCODELEN], startcode[MAXCODELEN]; + int code, maxlen; + int i, j; + + /* Count the codes of each length. */ + maxlen = 0; + for (i = 1; i < MAXCODELEN; i++) + count[i] = 0; + for (i = 0; i < nsyms; i++) { + count[lengths[i]]++; + if (maxlen < lengths[i]) + maxlen = lengths[i]; + } + /* Determine the starting code for each length block. */ + code = 0; + for (i = 1; i < MAXCODELEN; i++) { + startcode[i] = code; + code += count[i]; + code <<= 1; + } + /* Determine the code for each symbol. Mirrored, of course. */ + for (i = 0; i < nsyms; i++) { + code = startcode[lengths[i]]++; + codes[i] = 0; + for (j = 0; j < lengths[i]; j++) { + codes[i] = (codes[i] << 1) | (code & 1); + code >>= 1; + } + } + + return maxlen; +} + +/* ---------------------------------------------------------------------- + * Deflate compression. + */ + +#define SYMLIMIT 65536 +#define SYMPFX_LITLEN 0x00000000U +#define SYMPFX_DIST 0x40000000U +#define SYMPFX_EXTRABITS 0x80000000U +#define SYMPFX_CODELEN 0xC0000000U +#define SYMPFX_MASK 0xC0000000U + +#define SYM_EXTRABITS_MASK 0x3C000000U +#define SYM_EXTRABITS_SHIFT 26 + +struct deflate_compress_ctx { + struct LZ77Context *lzc; + unsigned char *outbuf; + int outlen, outsize; + unsigned long outbits; + int noutbits; + int firstblock; + unsigned long *syms; + int symstart, nsyms; + int type; + unsigned long adler32; + int lastblock; + int finished; +#ifdef STATISTICS + unsigned long bitcount; +#endif +}; + +static void outbits(deflate_compress_ctx *out, + unsigned long bits, int nbits) +{ + assert(out->noutbits + nbits <= 32); + out->outbits |= bits << out->noutbits; + out->noutbits += nbits; + while (out->noutbits >= 8) { + if (out->outlen >= out->outsize) { + out->outsize = out->outlen + 64; + out->outbuf = sresize(out->outbuf, out->outsize, unsigned char); + } + out->outbuf[out->outlen++] = (unsigned char) (out->outbits & 0xFF); + out->outbits >>= 8; + out->noutbits -= 8; + } +#ifdef STATISTICS + out->bitcount += nbits; +#endif +} + +/* + * Binary heap functions used by buildhuf(). Each one assumes the + * heap to be stored in an array of ints, with two ints per node + * (user data and key). They take in the old heap length, and + * return the new one. + */ +#define HEAPPARENT(x) (((x)-2)/4*2) +#define HEAPLEFT(x) ((x)*2+2) +#define HEAPRIGHT(x) ((x)*2+4) +static int addheap(int *heap, int len, int userdata, int key) +{ + int me, dad, tmp; + + me = len; + heap[len++] = userdata; + heap[len++] = key; + + while (me > 0) { + dad = HEAPPARENT(me); + if (heap[me+1] < heap[dad+1]) { + tmp = heap[me]; heap[me] = heap[dad]; heap[dad] = tmp; + tmp = heap[me+1]; heap[me+1] = heap[dad+1]; heap[dad+1] = tmp; + me = dad; + } else + break; + } + + return len; +} +static int rmheap(int *heap, int len, int *userdata, int *key) +{ + int me, lc, rc, c, tmp; + + len -= 2; + *userdata = heap[0]; + *key = heap[1]; + heap[0] = heap[len]; + heap[1] = heap[len+1]; + + me = 0; + + while (1) { + lc = HEAPLEFT(me); + rc = HEAPRIGHT(me); + if (lc >= len) + break; + else if (rc >= len || heap[lc+1] < heap[rc+1]) + c = lc; + else + c = rc; + if (heap[me+1] > heap[c+1]) { + tmp = heap[me]; heap[me] = heap[c]; heap[c] = tmp; + tmp = heap[me+1]; heap[me+1] = heap[c+1]; heap[c+1] = tmp; + } else + break; + me = c; + } + + return len; +} + +/* + * The core of the Huffman algorithm: takes an input array of + * symbol frequencies, and produces an output array of code + * lengths. + * + * This is basically a generic Huffman implementation, but it has + * one zlib-related quirk which is that it caps the output code + * lengths to fit in an unsigned char (which is safe since Deflate + * will reject anything longer than 15 anyway). Anyone wanting to + * rip it out and use it in another context should find that easy + * to remove. + */ +#define HUFMAX 286 +static void buildhuf(const int *freqs, unsigned char *lengths, int nsyms) +{ + int parent[2*HUFMAX-1]; + int length[2*HUFMAX-1]; + int heap[2*HUFMAX]; + int heapsize; + int i, j, n; + int si, sj; + + assert(nsyms <= HUFMAX); + + memset(parent, 0, sizeof(parent)); + + /* + * Begin by building the heap. + */ + heapsize = 0; + for (i = 0; i < nsyms; i++) + if (freqs[i] > 0) /* leave unused symbols out totally */ + heapsize = addheap(heap, heapsize, i, freqs[i]); + + /* + * Now repeatedly take two elements off the heap and merge + * them. + */ + n = HUFMAX; + while (heapsize > 2) { + heapsize = rmheap(heap, heapsize, &i, &si); + heapsize = rmheap(heap, heapsize, &j, &sj); + parent[i] = n; + parent[j] = n; + heapsize = addheap(heap, heapsize, n, si + sj); + n++; + } + + /* + * Now we have our tree, in the form of a link from each node + * to the index of its parent. Count back down the tree to + * determine the code lengths. + */ + memset(length, 0, sizeof(length)); + /* The tree root has length 0 after that, which is correct. */ + for (i = n-1; i-- ;) + if (parent[i] > 0) + length[i] = 1 + length[parent[i]]; + + /* + * And that's it. (Simple, wasn't it?) Copy the lengths into + * the output array and leave. + * + * Here we cap lengths to fit in unsigned char. + */ + for (i = 0; i < nsyms; i++) + lengths[i] = (length[i] > 255 ? 255 : length[i]); +} + +/* + * Wrapper around buildhuf() which enforces the Deflate restriction + * that no code length may exceed 15 bits, or 7 for the auxiliary + * code length alphabet. This function has the same calling + * semantics as buildhuf(), except that it might modify the freqs + * array. + */ +static void deflate_buildhuf(int *freqs, unsigned char *lengths, + int nsyms, int limit) +{ + int smallestfreq, totalfreq, nactivesyms; + int num, denom, adjust; + int i; + int maxprob; + + /* + * First, try building the Huffman table the normal way. If + * this works, it's optimal, so we don't want to mess with it. + */ + buildhuf(freqs, lengths, nsyms); + + for (i = 0; i < nsyms; i++) + if (lengths[i] > limit) + break; + + if (i == nsyms) + return; /* OK */ + + /* + * The Huffman algorithm can only ever generate a code length + * of N bits or more if there is a symbol whose probability is + * less than the reciprocal of the (N+2)th Fibonacci number + * (counting from F_0=0 and F_1=1), i.e. 1/2584 for N=16, or + * 1/55 for N=8. (This is a necessary though not sufficient + * condition.) + * + * Why is this? Well, consider the input symbol with the + * smallest probability. Let that probability be x. In order + * for this symbol to have a code length of at least 1, the + * Huffman algorithm will have to merge it with some other + * node; and since x is the smallest probability, the node it + * gets merged with must be at least x. Thus, the probability + * of the resulting combined node will be at least 2x. Now in + * order for our node to reach depth 2, this 2x-node must be + * merged again. But what with? We can't assume the node it + * merges with is at least 2x, because this one might only be + * the _second_ smallest remaining node. But we do know the + * node it merges with must be at least x, so our order-2 + * internal node is at least 3x. + * + * How small a node can merge with _that_ to get an order-3 + * internal node? Well, it must be at least 2x, because if it + * was smaller than that then it would have been one of the two + * smallest nodes in the previous step and been merged at that + * point. So at least 3x, plus at least 2x, comes to at least + * 5x for an order-3 node. + * + * And so it goes on: at every stage we must merge our current + * node with a node at least as big as the bigger of this one's + * two parents, and from this starting point that gives rise to + * the Fibonacci sequence. So we find that in order to have a + * node n levels deep (i.e. a maximum code length of n), the + * overall probability of the root of the entire tree must be + * at least F_{n+2} times the probability of the rarest symbol. + * In other words, since the overall probability is 1, it is a + * necessary condition for a code length of 16 or more that + * there must be at least one symbol with probability <= + * 1/F_18. + * + * (To demonstrate that a probability this big really can give + * rise to a code length of 16, consider the set of input + * frequencies { 1-epsilon, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, + * 89, 144, 233, 377, 610, 987 }, for arbitrarily small + * epsilon.) + * + * So here buildhuf() has returned us an overlong code. So to + * ensure it doesn't do it again, we add a constant to all the + * (non-zero) symbol frequencies, causing them to become more + * balanced and removing the danger. We can then feed the + * results back to the standard buildhuf() and be + * assert()-level confident that the resulting code lengths + * contain nothing outside the permitted range. + */ + maxprob = (limit == 16 ? 2584 : 55); /* no point in computing full F_n */ + totalfreq = nactivesyms = 0; + smallestfreq = -1; + for (i = 0; i < nsyms; i++) { + if (freqs[i] == 0) + continue; + if (smallestfreq < 0 || smallestfreq > freqs[i]) + smallestfreq = freqs[i]; + totalfreq += freqs[i]; + nactivesyms++; + } + assert(smallestfreq <= totalfreq / maxprob); + + /* + * We want to find the smallest integer `adjust' such that + * (totalfreq + nactivesyms * adjust) / (smallestfreq + + * adjust) is less than maxprob. A bit of algebra tells us + * that the threshold value is equal to + * + * totalfreq - maxprob * smallestfreq + * ---------------------------------- + * maxprob - nactivesyms + * + * rounded up, of course. And we'll only even be trying + * this if + */ + num = totalfreq - smallestfreq * maxprob; + denom = maxprob - nactivesyms; + adjust = (num + denom - 1) / denom; + + /* + * Now add `adjust' to all the input symbol frequencies. + */ + for (i = 0; i < nsyms; i++) + if (freqs[i] != 0) + freqs[i] += adjust; + + /* + * Rebuild the Huffman tree... + */ + buildhuf(freqs, lengths, nsyms); + + /* + * ... and this time it ought to be OK. + */ + for (i = 0; i < nsyms; i++) + assert(lengths[i] <= limit); +} + +struct huftrees { + unsigned char *len_litlen; + int *code_litlen; + unsigned char *len_dist; + int *code_dist; + unsigned char *len_codelen; + int *code_codelen; +}; + +/* + * Write out a single symbol, given the three Huffman trees. + */ +static void writesym(deflate_compress_ctx *out, + unsigned sym, struct huftrees *trees) +{ + unsigned basesym = sym &~ SYMPFX_MASK; + int i; + + switch (sym & SYMPFX_MASK) { + case SYMPFX_LITLEN: + debug(("send: litlen %d\n", basesym)); + outbits(out, trees->code_litlen[basesym], trees->len_litlen[basesym]); + break; + case SYMPFX_DIST: + debug(("send: dist %d\n", basesym)); + outbits(out, trees->code_dist[basesym], trees->len_dist[basesym]); + break; + case SYMPFX_CODELEN: + debug(("send: codelen %d\n", basesym)); + outbits(out, trees->code_codelen[basesym],trees->len_codelen[basesym]); + break; + case SYMPFX_EXTRABITS: + i = basesym >> SYM_EXTRABITS_SHIFT; + basesym &= ~SYM_EXTRABITS_MASK; + debug(("send: extrabits %d/%d\n", basesym, i)); + outbits(out, basesym, i); + break; + } +} + +static void outblock(deflate_compress_ctx *out, + int blklen, int dynamic) +{ + int freqs1[286], freqs2[30], freqs3[19]; + unsigned char len1[286], len2[30], len3[19]; + int code1[286], code2[30], code3[19]; + int hlit, hdist, hclen, bfinal, btype; + int treesrc[286 + 30]; + int treesyms[286 + 30]; + int codelen[19]; + int i, ntreesrc, ntreesyms; + struct huftrees ht; +#ifdef STATISTICS + unsigned long bitcount_before; +#endif + + ht.len_litlen = len1; + ht.len_dist = len2; + ht.len_codelen = len3; + ht.code_litlen = code1; + ht.code_dist = code2; + ht.code_codelen = code3; + + /* + * Build the two main Huffman trees. + */ + if (dynamic) { + /* + * Count up the frequency tables. + */ + memset(freqs1, 0, sizeof(freqs1)); + memset(freqs2, 0, sizeof(freqs2)); + freqs1[256] = 1; /* we're bound to need one EOB */ + for (i = 0; i < blklen; i++) { + unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT]; + + /* + * Increment the occurrence counter for this symbol, if + * it's in one of the Huffman alphabets and isn't extra + * bits. + */ + if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) { + sym &= ~SYMPFX_MASK; + assert(sym < lenof(freqs1)); + freqs1[sym]++; + } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) { + sym &= ~SYMPFX_MASK; + assert(sym < lenof(freqs2)); + freqs2[sym]++; + } + } + deflate_buildhuf(freqs1, len1, lenof(freqs1), 15); + deflate_buildhuf(freqs2, len2, lenof(freqs2), 15); + } else { + /* + * Fixed static trees. + */ + for (i = 0; i < lenof(len1); i++) + len1[i] = (i < 144 ? 8 : + i < 256 ? 9 : + i < 280 ? 7 : 8); + for (i = 0; i < lenof(len2); i++) + len2[i] = 5; + } + hufcodes(len1, code1, lenof(freqs1)); + hufcodes(len2, code2, lenof(freqs2)); + + if (dynamic) { + /* + * Determine HLIT and HDIST. + */ + for (hlit = 286; hlit > 257 && len1[hlit-1] == 0; hlit--); + for (hdist = 30; hdist > 1 && len2[hdist-1] == 0; hdist--); + + /* + * Write out the list of symbols used to transmit the + * trees. + */ + ntreesrc = 0; + for (i = 0; i < hlit; i++) + treesrc[ntreesrc++] = len1[i]; + for (i = 0; i < hdist; i++) + treesrc[ntreesrc++] = len2[i]; + ntreesyms = 0; + for (i = 0; i < ntreesrc ;) { + int j = 1; + int k; + + /* Find length of run of the same length code. */ + while (i+j < ntreesrc && treesrc[i+j] == treesrc[i]) + j++; + + /* Encode that run as economically as we can. */ + k = j; + if (treesrc[i] == 0) { + /* + * Zero code length: we can output run codes for + * 3-138 zeroes. So if we have fewer than 3 zeroes, + * we just output literals. Otherwise, we output + * nothing but run codes, and tweak their lengths + * to make sure we aren't left with under 3 at the + * end. + */ + if (k < 3) { + while (k--) + treesyms[ntreesyms++] = 0 | SYMPFX_CODELEN; + } else { + while (k > 0) { + int rpt = (k < 138 ? k : 138); + if (rpt > k-3 && rpt < k) + rpt = k-3; + assert(rpt >= 3 && rpt <= 138); + if (rpt < 11) { + treesyms[ntreesyms++] = 17 | SYMPFX_CODELEN; + treesyms[ntreesyms++] = + (SYMPFX_EXTRABITS | (rpt - 3) | + (3 << SYM_EXTRABITS_SHIFT)); + } else { + treesyms[ntreesyms++] = 18 | SYMPFX_CODELEN; + treesyms[ntreesyms++] = + (SYMPFX_EXTRABITS | (rpt - 11) | + (7 << SYM_EXTRABITS_SHIFT)); + } + k -= rpt; + } + } + } else { + /* + * Non-zero code length: we must output the first + * one explicitly, then we can output a copy code + * for 3-6 repeats. So if we have fewer than 4 + * repeats, we _just_ output literals. Otherwise, + * we output one literal plus at least one copy + * code, and tweak the copy codes to make sure we + * aren't left with under 3 at the end. + */ + assert(treesrc[i] < 16); + treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN; + k--; + if (k < 3) { + while (k--) + treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN; + } else { + while (k > 0) { + int rpt = (k < 6 ? k : 6); + if (rpt > k-3 && rpt < k) + rpt = k-3; + assert(rpt >= 3 && rpt <= 6); + treesyms[ntreesyms++] = 16 | SYMPFX_CODELEN; + treesyms[ntreesyms++] = (SYMPFX_EXTRABITS | (rpt - 3) | + (2 << SYM_EXTRABITS_SHIFT)); + k -= rpt; + } + } + } + + i += j; + } + assert((unsigned)ntreesyms < lenof(treesyms)); + + /* + * Count up the frequency table for the tree-transmission + * symbols, and build the auxiliary Huffman tree for that. + */ + memset(freqs3, 0, sizeof(freqs3)); + for (i = 0; i < ntreesyms; i++) { + unsigned sym = treesyms[i]; + + /* + * Increment the occurrence counter for this symbol, if + * it's the Huffman alphabet and isn't extra bits. + */ + if ((sym & SYMPFX_MASK) == SYMPFX_CODELEN) { + sym &= ~SYMPFX_MASK; + assert(sym < lenof(freqs3)); + freqs3[sym]++; + } + } + deflate_buildhuf(freqs3, len3, lenof(freqs3), 7); + hufcodes(len3, code3, lenof(freqs3)); + + /* + * Reorder the code length codes into transmission order, and + * determine HCLEN. + */ + for (i = 0; i < 19; i++) + codelen[i] = len3[lenlenmap[i]]; + for (hclen = 19; hclen > 4 && codelen[hclen-1] == 0; hclen--); + } + + /* + * Actually transmit the block. + */ + + /* 3-bit block header */ + bfinal = (out->lastblock ? 1 : 0); + btype = dynamic ? 2 : 1; + debug(("send: bfinal=%d btype=%d\n", bfinal, btype)); + outbits(out, bfinal, 1); + outbits(out, btype, 2); + +#ifdef STATISTICS + bitcount_before = out->bitcount; +#endif + + if (dynamic) { + /* HLIT, HDIST and HCLEN */ + debug(("send: hlit=%d hdist=%d hclen=%d\n", hlit, hdist, hclen)); + outbits(out, hlit - 257, 5); + outbits(out, hdist - 1, 5); + outbits(out, hclen - 4, 4); + + /* Code lengths for the auxiliary tree */ + for (i = 0; i < hclen; i++) { + debug(("send: lenlen %d\n", codelen[i])); + outbits(out, codelen[i], 3); + } + + /* Code lengths for the literal/length and distance trees */ + for (i = 0; i < ntreesyms; i++) + writesym(out, treesyms[i], &ht); +#ifdef STATISTICS + fprintf(stderr, "total tree size %lu bits\n", + out->bitcount - bitcount_before); +#endif + } + + /* Output the actual symbols from the buffer */ + for (i = 0; i < blklen; i++) { + unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT]; + writesym(out, sym, &ht); + } + + /* Output the end-of-data symbol */ + writesym(out, SYMPFX_LITLEN | 256, &ht); + + /* + * Remove all the just-output symbols from the symbol buffer by + * adjusting symstart and nsyms. + */ + out->symstart = (out->symstart + blklen) % SYMLIMIT; + out->nsyms -= blklen; +} + +static void outblock_wrapper(deflate_compress_ctx *out, + int best_dynamic_len) +{ + /* + * Final block choice function: we have the option of either + * outputting a dynamic block of length best_dynamic_len, or a + * static block of length out->nsyms. Whichever gives us the + * best value for money, we do. + * + * FIXME: currently we always choose dynamic except for empty + * blocks. We should make a sensible judgment. + */ + if (out->nsyms == 0) + outblock(out, 0, FALSE); + else + outblock(out, best_dynamic_len, TRUE); +} + +static void chooseblock(deflate_compress_ctx *out) +{ + int freqs1[286], freqs2[30]; + int i, bestlen; + double bestvfm; + int nextrabits; + + memset(freqs1, 0, sizeof(freqs1)); + memset(freqs2, 0, sizeof(freqs2)); + freqs1[256] = 1; /* we're bound to need one EOB */ + nextrabits = 0; + + /* + * Iterate over all possible block lengths, computing the + * entropic coding approximation to the final length at every + * stage. We divide the result by the number of symbols + * encoded, to determine the `value for money' (overall + * bits-per-symbol count) of a block of that length. + */ + bestlen = -1; + bestvfm = 0.0; + for (i = 0; i < out->nsyms; i++) { + unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT]; + + if (i > 0 && (sym & SYMPFX_MASK) == SYMPFX_LITLEN) { + /* + * This is a viable point at which to end the block. + * Compute the length approximation and hence the value + * for money. + */ + double len = 0.0, vfm; + int k; + int total; + + /* + * FIXME: we should be doing this incrementally, rather + * than recomputing the whole thing at every byte + * position. Also, can we fiddle the logs somehow to + * avoid having to do floating point? + */ + total = 0; + for (k = 0; k < (int)lenof(freqs1); k++) { + if (freqs1[k]) + len -= freqs1[k] * log(freqs1[k]); + total += freqs1[k]; + } + if (total) + len += total * log(total); + total = 0; + for (k = 0; k < (int)lenof(freqs2); k++) { + if (freqs2[k]) + len -= freqs2[k] * log(freqs2[k]); + total += freqs2[k]; + } + if (total) + len += total * log(total); + len /= log(2); + len += nextrabits; + len += 300; /* very approximate size of the Huffman trees */ + + vfm = i / len; /* symbols encoded per bit */ +/* fprintf(stderr, "chooseblock: i=%d gives len %g, vfm %g\n", i, len, vfm); */ + if (bestlen < 0 || vfm > bestvfm) { + bestlen = i; + bestvfm = vfm; + } + } + + /* + * Increment the occurrence counter for this symbol, if + * it's in one of the Huffman alphabets and isn't extra + * bits. + */ + if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) { + sym &= ~SYMPFX_MASK; + assert(sym < lenof(freqs1)); + freqs1[sym]++; + } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) { + sym &= ~SYMPFX_MASK; + assert(sym < lenof(freqs2)); + freqs2[sym]++; + } else if ((sym & SYMPFX_MASK) == SYMPFX_EXTRABITS) { + nextrabits += (sym &~ SYMPFX_MASK) >> SYM_EXTRABITS_SHIFT; + } + } + + assert(bestlen > 0); + +/* fprintf(stderr, "chooseblock: bestlen %d, bestvfm %g\n", bestlen, bestvfm); */ + outblock_wrapper(out, bestlen); +} + +/* + * Force the current symbol buffer to be flushed out as a single + * block. + */ +static void flushblock(deflate_compress_ctx *out) +{ + /* + * Because outblock_wrapper guarantees to output either a + * dynamic block of the given length or a static block of + * length out->nsyms, we know that passing out->nsyms as the + * given length will definitely result in us using up the + * entire buffer. + */ + outblock_wrapper(out, out->nsyms); + assert(out->nsyms == 0); +} + +/* + * Place a symbol into the symbols buffer. + */ +static void outsym(deflate_compress_ctx *out, unsigned long sym) +{ + assert(out->nsyms < SYMLIMIT); + out->syms[(out->symstart + out->nsyms++) % SYMLIMIT] = sym; + + if (out->nsyms == SYMLIMIT) + chooseblock(out); +} + +typedef struct { + short code, extrabits; + int min, max; +} coderecord; + +static const coderecord lencodes[] = { + {257, 0, 3, 3}, + {258, 0, 4, 4}, + {259, 0, 5, 5}, + {260, 0, 6, 6}, + {261, 0, 7, 7}, + {262, 0, 8, 8}, + {263, 0, 9, 9}, + {264, 0, 10, 10}, + {265, 1, 11, 12}, + {266, 1, 13, 14}, + {267, 1, 15, 16}, + {268, 1, 17, 18}, + {269, 2, 19, 22}, + {270, 2, 23, 26}, + {271, 2, 27, 30}, + {272, 2, 31, 34}, + {273, 3, 35, 42}, + {274, 3, 43, 50}, + {275, 3, 51, 58}, + {276, 3, 59, 66}, + {277, 4, 67, 82}, + {278, 4, 83, 98}, + {279, 4, 99, 114}, + {280, 4, 115, 130}, + {281, 5, 131, 162}, + {282, 5, 163, 194}, + {283, 5, 195, 226}, + {284, 5, 227, 257}, + {285, 0, 258, 258}, +}; + +static const coderecord distcodes[] = { + {0, 0, 1, 1}, + {1, 0, 2, 2}, + {2, 0, 3, 3}, + {3, 0, 4, 4}, + {4, 1, 5, 6}, + {5, 1, 7, 8}, + {6, 2, 9, 12}, + {7, 2, 13, 16}, + {8, 3, 17, 24}, + {9, 3, 25, 32}, + {10, 4, 33, 48}, + {11, 4, 49, 64}, + {12, 5, 65, 96}, + {13, 5, 97, 128}, + {14, 6, 129, 192}, + {15, 6, 193, 256}, + {16, 7, 257, 384}, + {17, 7, 385, 512}, + {18, 8, 513, 768}, + {19, 8, 769, 1024}, + {20, 9, 1025, 1536}, + {21, 9, 1537, 2048}, + {22, 10, 2049, 3072}, + {23, 10, 3073, 4096}, + {24, 11, 4097, 6144}, + {25, 11, 6145, 8192}, + {26, 12, 8193, 12288}, + {27, 12, 12289, 16384}, + {28, 13, 16385, 24576}, + {29, 13, 24577, 32768}, +}; + +static void literal(struct LZ77Context *ectx, unsigned char c) +{ + deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata; + + outsym(out, SYMPFX_LITLEN | c); +} + +static void match(struct LZ77Context *ectx, int distance, int len) +{ + const coderecord *d, *l; + int i, j, k; + deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata; + + while (len > 0) { + int thislen; + + /* + * We can transmit matches of lengths 3 through 258 + * inclusive. So if len exceeds 258, we must transmit in + * several steps, with 258 or less in each step. + * + * Specifically: if len >= 261, we can transmit 258 and be + * sure of having at least 3 left for the next step. And if + * len <= 258, we can just transmit len. But if len == 259 + * or 260, we must transmit len-3. + */ + thislen = (len > 260 ? 258 : len <= 258 ? len : len - 3); + len -= thislen; + + /* + * Binary-search to find which length code we're + * transmitting. + */ + i = -1; + j = sizeof(lencodes) / sizeof(*lencodes); + while (1) { + assert(j - i >= 2); + k = (j + i) / 2; + if (thislen < lencodes[k].min) + j = k; + else if (thislen > lencodes[k].max) + i = k; + else { + l = &lencodes[k]; + break; /* found it! */ + } + } + + /* + * Transmit the length code. + */ + outsym(out, SYMPFX_LITLEN | l->code); + + /* + * Transmit the extra bits. + */ + if (l->extrabits) { + outsym(out, (SYMPFX_EXTRABITS | (thislen - l->min) | + (l->extrabits << SYM_EXTRABITS_SHIFT))); + } + + /* + * Binary-search to find which distance code we're + * transmitting. + */ + i = -1; + j = sizeof(distcodes) / sizeof(*distcodes); + while (1) { + assert(j - i >= 2); + k = (j + i) / 2; + if (distance < distcodes[k].min) + j = k; + else if (distance > distcodes[k].max) + i = k; + else { + d = &distcodes[k]; + break; /* found it! */ + } + } + + /* + * Write the distance code. + */ + outsym(out, SYMPFX_DIST | d->code); + + /* + * Transmit the extra bits. + */ + if (d->extrabits) { + outsym(out, (SYMPFX_EXTRABITS | (distance - d->min) | + (d->extrabits << SYM_EXTRABITS_SHIFT))); + } + } +} + +deflate_compress_ctx *deflate_compress_new(int type) +{ + deflate_compress_ctx *out; + struct LZ77Context *ectx = snew(struct LZ77Context); + + lz77_init(ectx); + ectx->literal = literal; + ectx->match = match; + + out = snew(deflate_compress_ctx); + out->type = type; + out->outbits = out->noutbits = 0; + out->firstblock = TRUE; +#ifdef STATISTICS + out->bitcount = 0; +#endif + + out->syms = snewn(SYMLIMIT, unsigned long); + out->symstart = out->nsyms = 0; + + out->adler32 = 1; + out->lastblock = FALSE; + out->finished = FALSE; + + ectx->userdata = out; + out->lzc = ectx; + + return out; +} + +void deflate_compress_free(deflate_compress_ctx *out) +{ + struct LZ77Context *ectx = out->lzc; + + sfree(out->syms); + sfree(out); + sfree(ectx->ictx); + sfree(ectx); +} + +static unsigned long adler32_update(unsigned long s, + const unsigned char *data, int len) +{ + unsigned s1 = s & 0xFFFF, s2 = (s >> 16) & 0xFFFF; + int i; + + for (i = 0; i < len; i++) { + s1 += data[i]; + s2 += s1; + if (!(i & 0xFFF)) { + s1 %= 65521; + s2 %= 65521; + } + } + + return ((s2 % 65521) << 16) | (s1 % 65521); +} + +int deflate_compress_data(deflate_compress_ctx *out, + const void *vblock, int len, int flushtype, + void **outblock, int *outlen) +{ + struct LZ77Context *ectx = out->lzc; + const unsigned char *block = (const unsigned char *)vblock; + + assert(!out->finished); + + out->outbuf = NULL; + out->outlen = out->outsize = 0; + + /* + * If this is the first block, output the header. + */ + if (out->firstblock) { + switch (out->type) { + case DEFLATE_TYPE_BARE: + break; /* no header */ + case DEFLATE_TYPE_ZLIB: + /* + * Zlib (RFC1950) header bytes: 78 9C. (Deflate + * compression, 32K window size, default algorithm.) + */ + outbits(out, 0x9C78, 16); + break; + } + out->firstblock = FALSE; + } + + /* + * Feed our data to the LZ77 compression phase. + */ + lz77_compress(ectx, block, len, TRUE); + + /* + * Update checksums. + */ + if (out->type == DEFLATE_TYPE_ZLIB) + out->adler32 = adler32_update(out->adler32, block, len); + + switch (flushtype) { + /* + * FIXME: what other flush types are available and useful? + * In PuTTY, it was clear that we generally wanted to be in + * a static block so it was safe to open one. Here, we + * probably prefer to be _outside_ a block if we can. Think + * about this. + */ + case DEFLATE_NO_FLUSH: + break; /* don't flush any data at all (duh) */ + case DEFLATE_SYNC_FLUSH: + /* + * Close the current block. + */ + flushblock(out); + + /* + * Then output an empty _uncompressed_ block: send 000, + * then sync to byte boundary, then send bytes 00 00 FF + * FF. + */ + outbits(out, 0, 3); + if (out->noutbits) + outbits(out, 0, 8 - out->noutbits); + outbits(out, 0, 16); + outbits(out, 0xFFFF, 16); + break; + case DEFLATE_END_OF_DATA: + /* + * Output a block with BFINAL set. + */ + out->lastblock = TRUE; + flushblock(out); + + /* + * Sync to byte boundary, flushing out the final byte. + */ + if (out->noutbits) + outbits(out, 0, 8 - out->noutbits); + + /* + * Output the adler32 checksum, in zlib mode. + */ + if (out->type == DEFLATE_TYPE_ZLIB) { + outbits(out, (out->adler32 >> 24) & 0xFF, 8); + outbits(out, (out->adler32 >> 16) & 0xFF, 8); + outbits(out, (out->adler32 >> 8) & 0xFF, 8); + outbits(out, (out->adler32 >> 0) & 0xFF, 8); + } + + out->finished = TRUE; + break; + } + + /* + * Return any data that we've generated. + */ + *outblock = (void *)out->outbuf; + *outlen = out->outlen; + + return 1; +} + +/* ---------------------------------------------------------------------- + * deflate decompression. + */ + +/* + * The way we work the Huffman decode is to have a table lookup on + * the first N bits of the input stream (in the order they arrive, + * of course, i.e. the first bit of the Huffman code is in bit 0). + * Each table entry lists the number of bits to consume, plus + * either an output code or a pointer to a secondary table. + */ +struct table; +struct tableentry; + +struct tableentry { + unsigned char nbits; + short code; + struct table *nexttable; +}; + +struct table { + int mask; /* mask applied to input bit stream */ + struct tableentry *table; +}; + +#define MAXSYMS 288 + +/* + * Build a single-level decode table for elements + * [minlength,maxlength) of the provided code/length tables, and + * recurse to build subtables. + */ +static struct table *mkonetab(int *codes, unsigned char *lengths, int nsyms, + int pfx, int pfxbits, int bits) +{ + struct table *tab = snew(struct table); + int pfxmask = (1 << pfxbits) - 1; + int nbits, i, j, code; + + tab->table = snewn(1 << bits, struct tableentry); + tab->mask = (1 << bits) - 1; + + for (code = 0; code <= tab->mask; code++) { + tab->table[code].code = -1; + tab->table[code].nbits = 0; + tab->table[code].nexttable = NULL; + } + + for (i = 0; i < nsyms; i++) { + if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx) + continue; + code = (codes[i] >> pfxbits) & tab->mask; + for (j = code; j <= tab->mask; j += 1 << (lengths[i] - pfxbits)) { + tab->table[j].code = i; + nbits = lengths[i] - pfxbits; + if (tab->table[j].nbits < nbits) + tab->table[j].nbits = nbits; + } + } + for (code = 0; code <= tab->mask; code++) { + if (tab->table[code].nbits <= bits) + continue; + /* Generate a subtable. */ + tab->table[code].code = -1; + nbits = tab->table[code].nbits - bits; + if (nbits > 7) + nbits = 7; + tab->table[code].nbits = bits; + tab->table[code].nexttable = mkonetab(codes, lengths, nsyms, + pfx | (code << pfxbits), + pfxbits + bits, nbits); + } + + return tab; +} + +/* + * Build a decode table, given a set of Huffman tree lengths. + */ +static struct table *mktable(unsigned char *lengths, int nlengths) +{ + int codes[MAXSYMS]; + int maxlen; + + maxlen = hufcodes(lengths, codes, nlengths); + + /* + * Now we have the complete list of Huffman codes. Build a + * table. + */ + return mkonetab(codes, lengths, nlengths, 0, 0, maxlen < 9 ? maxlen : 9); +} + +static int freetable(struct table **ztab) +{ + struct table *tab; + int code; + + if (ztab == NULL) + return -1; + + if (*ztab == NULL) + return 0; + + tab = *ztab; + + for (code = 0; code <= tab->mask; code++) + if (tab->table[code].nexttable != NULL) + freetable(&tab->table[code].nexttable); + + sfree(tab->table); + tab->table = NULL; + + sfree(tab); + *ztab = NULL; + + return (0); +} + +struct deflate_decompress_ctx { + struct table *staticlentable, *staticdisttable; + struct table *currlentable, *currdisttable, *lenlentable; + enum { + START, OUTSIDEBLK, + TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP, + INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM, + UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA, + END, ADLER1, ADLER2, FINALSPIN + } state; + int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len, + lenrep, lastblock; + int uncomplen; + unsigned char lenlen[19]; + unsigned char lengths[286 + 32]; + unsigned long bits; + int nbits; + unsigned char window[WINSIZE]; + int winpos; + unsigned char *outblk; + int outlen, outsize; + int type; + unsigned long adler32; +}; + +deflate_decompress_ctx *deflate_decompress_new(int type) +{ + deflate_decompress_ctx *dctx = snew(deflate_decompress_ctx); + unsigned char lengths[288]; + + memset(lengths, 8, 144); + memset(lengths + 144, 9, 256 - 144); + memset(lengths + 256, 7, 280 - 256); + memset(lengths + 280, 8, 288 - 280); + dctx->staticlentable = mktable(lengths, 288); + memset(lengths, 5, 32); + dctx->staticdisttable = mktable(lengths, 32); + if (type == DEFLATE_TYPE_BARE) + dctx->state = OUTSIDEBLK; + else + dctx->state = START; + dctx->currlentable = dctx->currdisttable = dctx->lenlentable = NULL; + dctx->bits = 0; + dctx->nbits = 0; + dctx->winpos = 0; + dctx->type = type; + dctx->lastblock = FALSE; + dctx->adler32 = 1; + + return dctx; +} + +void deflate_decompress_free(deflate_decompress_ctx *dctx) +{ + if (dctx->currlentable && dctx->currlentable != dctx->staticlentable) + freetable(&dctx->currlentable); + if (dctx->currdisttable && dctx->currdisttable != dctx->staticdisttable) + freetable(&dctx->currdisttable); + if (dctx->lenlentable) + freetable(&dctx->lenlentable); + freetable(&dctx->staticlentable); + freetable(&dctx->staticdisttable); + sfree(dctx); +} + +static int huflookup(unsigned long *bitsp, int *nbitsp, struct table *tab) +{ + unsigned long bits = *bitsp; + int nbits = *nbitsp; + while (1) { + struct tableentry *ent; + ent = &tab->table[bits & tab->mask]; + if (ent->nbits > nbits) + return -1; /* not enough data */ + bits >>= ent->nbits; + nbits -= ent->nbits; + if (ent->code == -1) + tab = ent->nexttable; + else { + *bitsp = bits; + *nbitsp = nbits; + return ent->code; + } + + if (!tab) { + /* + * There was a missing entry in the table, presumably + * due to an invalid Huffman table description, and the + * subsequent data has attempted to use the missing + * entry. Return a decoding failure. + */ + return -2; + } + } +} + +static void emit_char(deflate_decompress_ctx *dctx, int c) +{ + dctx->window[dctx->winpos] = c; + dctx->winpos = (dctx->winpos + 1) & (WINSIZE - 1); + if (dctx->outlen >= dctx->outsize) { + dctx->outsize = dctx->outlen + 512; + dctx->outblk = sresize(dctx->outblk, dctx->outsize, unsigned char); + } + if (dctx->type == DEFLATE_TYPE_ZLIB) { + unsigned char uc = c; + dctx->adler32 = adler32_update(dctx->adler32, &uc, 1); + } + dctx->outblk[dctx->outlen++] = c; +} + +#define EATBITS(n) ( dctx->nbits -= (n), dctx->bits >>= (n) ) + +int deflate_decompress_data(deflate_decompress_ctx *dctx, + const void *vblock, int len, + void **outblock, int *outlen) +{ + const coderecord *rec; + const unsigned char *block = (const unsigned char *)vblock; + int code, bfinal, btype, rep, dist, nlen, header, adler; + + dctx->outblk = snewn(256, unsigned char); + dctx->outsize = 256; + dctx->outlen = 0; + + while (len > 0 || dctx->nbits > 0) { + while (dctx->nbits < 24 && len > 0) { + dctx->bits |= (*block++) << dctx->nbits; + dctx->nbits += 8; + len--; + } + switch (dctx->state) { + case START: + /* Expect 16-bit zlib header. */ + if (dctx->nbits < 16) + goto finished; /* done all we can */ + + /* + * The header is stored as a big-endian 16-bit integer, + * in contrast to the general little-endian policy in + * the rest of the format :-( + */ + header = (((dctx->bits & 0xFF00) >> 8) | + ((dctx->bits & 0x00FF) << 8)); + EATBITS(16); + + /* + * Check the header: + * + * - bits 8-11 should be 1000 (Deflate/RFC1951) + * - bits 12-15 should be at most 0111 (window size) + * - bit 5 should be zero (no dictionary present) + * - we don't care about bits 6-7 (compression rate) + * - bits 0-4 should be set up to make the whole thing + * a multiple of 31 (checksum). + */ + if ((header & 0x0F00) != 0x0800 || + (header & 0xF000) > 0x7000 || + (header & 0x0020) != 0x0000 || + (header % 31) != 0) + goto decode_error; + + dctx->state = OUTSIDEBLK; + break; + case OUTSIDEBLK: + /* Expect 3-bit block header. */ + if (dctx->nbits < 3) + goto finished; /* done all we can */ + bfinal = dctx->bits & 1; + if (bfinal) + dctx->lastblock = TRUE; + EATBITS(1); + btype = dctx->bits & 3; + EATBITS(2); + if (btype == 0) { + int to_eat = dctx->nbits & 7; + dctx->state = UNCOMP_LEN; + EATBITS(to_eat); /* align to byte boundary */ + } else if (btype == 1) { + dctx->currlentable = dctx->staticlentable; + dctx->currdisttable = dctx->staticdisttable; + dctx->state = INBLK; + } else if (btype == 2) { + dctx->state = TREES_HDR; + } + debug(("recv: bfinal=%d btype=%d\n", bfinal, btype)); + break; + case TREES_HDR: + /* + * Dynamic block header. Five bits of HLIT, five of + * HDIST, four of HCLEN. + */ + if (dctx->nbits < 5 + 5 + 4) + goto finished; /* done all we can */ + dctx->hlit = 257 + (dctx->bits & 31); + EATBITS(5); + dctx->hdist = 1 + (dctx->bits & 31); + EATBITS(5); + dctx->hclen = 4 + (dctx->bits & 15); + EATBITS(4); + debug(("recv: hlit=%d hdist=%d hclen=%d\n", dctx->hlit, + dctx->hdist, dctx->hclen)); + dctx->lenptr = 0; + dctx->state = TREES_LENLEN; + memset(dctx->lenlen, 0, sizeof(dctx->lenlen)); + break; + case TREES_LENLEN: + if (dctx->nbits < 3) + goto finished; + while (dctx->lenptr < dctx->hclen && dctx->nbits >= 3) { + dctx->lenlen[lenlenmap[dctx->lenptr++]] = + (unsigned char) (dctx->bits & 7); + debug(("recv: lenlen %d\n", (unsigned char) (dctx->bits & 7))); + EATBITS(3); + } + if (dctx->lenptr == dctx->hclen) { + dctx->lenlentable = mktable(dctx->lenlen, 19); + dctx->state = TREES_LEN; + dctx->lenptr = 0; + } + break; + case TREES_LEN: + if (dctx->lenptr >= dctx->hlit + dctx->hdist) { + dctx->currlentable = mktable(dctx->lengths, dctx->hlit); + dctx->currdisttable = mktable(dctx->lengths + dctx->hlit, + dctx->hdist); + freetable(&dctx->lenlentable); + dctx->lenlentable = NULL; + dctx->state = INBLK; + break; + } + code = huflookup(&dctx->bits, &dctx->nbits, dctx->lenlentable); + debug(("recv: codelen %d\n", code)); + if (code == -1) + goto finished; + if (code == -2) + goto decode_error; + if (code < 16) + dctx->lengths[dctx->lenptr++] = code; + else { + dctx->lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7); + dctx->lenaddon = (code == 18 ? 11 : 3); + dctx->lenrep = (code == 16 && dctx->lenptr > 0 ? + dctx->lengths[dctx->lenptr - 1] : 0); + dctx->state = TREES_LENREP; + } + break; + case TREES_LENREP: + if (dctx->nbits < dctx->lenextrabits) + goto finished; + rep = + dctx->lenaddon + + (dctx->bits & ((1 << dctx->lenextrabits) - 1)); + EATBITS(dctx->lenextrabits); + if (dctx->lenextrabits) + debug(("recv: codelen-extrabits %d/%d\n", rep - dctx->lenaddon, + dctx->lenextrabits)); + while (rep > 0 && dctx->lenptr < dctx->hlit + dctx->hdist) { + dctx->lengths[dctx->lenptr] = dctx->lenrep; + dctx->lenptr++; + rep--; + } + dctx->state = TREES_LEN; + break; + case INBLK: + code = huflookup(&dctx->bits, &dctx->nbits, dctx->currlentable); + debug(("recv: litlen %d\n", code)); + if (code == -1) + goto finished; + if (code == -2) + goto decode_error; + if (code < 256) + emit_char(dctx, code); + else if (code == 256) { + if (dctx->lastblock) + dctx->state = END; + else + dctx->state = OUTSIDEBLK; + if (dctx->currlentable != dctx->staticlentable) { + freetable(&dctx->currlentable); + dctx->currlentable = NULL; + } + if (dctx->currdisttable != dctx->staticdisttable) { + freetable(&dctx->currdisttable); + dctx->currdisttable = NULL; + } + } else if (code < 286) { /* static tree can give >285; ignore */ + dctx->state = GOTLENSYM; + dctx->sym = code; + } + break; + case GOTLENSYM: + rec = &lencodes[dctx->sym - 257]; + if (dctx->nbits < rec->extrabits) + goto finished; + dctx->len = + rec->min + (dctx->bits & ((1 << rec->extrabits) - 1)); + if (rec->extrabits) + debug(("recv: litlen-extrabits %d/%d\n", + dctx->len - rec->min, rec->extrabits)); + EATBITS(rec->extrabits); + dctx->state = GOTLEN; + break; + case GOTLEN: + code = huflookup(&dctx->bits, &dctx->nbits, dctx->currdisttable); + debug(("recv: dist %d\n", code)); + if (code == -1) + goto finished; + if (code == -2) + goto decode_error; + dctx->state = GOTDISTSYM; + dctx->sym = code; + break; + case GOTDISTSYM: + rec = &distcodes[dctx->sym]; + if (dctx->nbits < rec->extrabits) + goto finished; + dist = rec->min + (dctx->bits & ((1 << rec->extrabits) - 1)); + if (rec->extrabits) + debug(("recv: dist-extrabits %d/%d\n", + dist - rec->min, rec->extrabits)); + EATBITS(rec->extrabits); + dctx->state = INBLK; + while (dctx->len--) + emit_char(dctx, dctx->window[(dctx->winpos - dist) & + (WINSIZE - 1)]); + break; + case UNCOMP_LEN: + /* + * Uncompressed block. We expect to see a 16-bit LEN. + */ + if (dctx->nbits < 16) + goto finished; + dctx->uncomplen = dctx->bits & 0xFFFF; + EATBITS(16); + dctx->state = UNCOMP_NLEN; + break; + case UNCOMP_NLEN: + /* + * Uncompressed block. We expect to see a 16-bit NLEN, + * which should be the one's complement of the previous + * LEN. + */ + if (dctx->nbits < 16) + goto finished; + nlen = dctx->bits & 0xFFFF; + EATBITS(16); + if (dctx->uncomplen == 0) + dctx->state = OUTSIDEBLK; /* block is empty */ + else + dctx->state = UNCOMP_DATA; + break; + case UNCOMP_DATA: + if (dctx->nbits < 8) + goto finished; + emit_char(dctx, dctx->bits & 0xFF); + EATBITS(8); + if (--dctx->uncomplen == 0) + dctx->state = OUTSIDEBLK; /* end of uncompressed block */ + break; + case END: + /* + * End of compressed data. We align to a byte boundary, + * and then look for format-specific trailer data. + */ + { + int to_eat = dctx->nbits & 7; + EATBITS(to_eat); + } + if (dctx->type == DEFLATE_TYPE_ZLIB) + dctx->state = ADLER1; + else + dctx->state = FINALSPIN; + break; + case ADLER1: + if (dctx->nbits < 16) + goto finished; + adler = (dctx->bits & 0xFF) << 8; + EATBITS(8); + adler |= (dctx->bits & 0xFF); + EATBITS(8); + if (adler != ((dctx->adler32 >> 16) & 0xFFFF)) + goto decode_error; + dctx->state = ADLER2; + break; + case ADLER2: + if (dctx->nbits < 16) + goto finished; + adler = (dctx->bits & 0xFF) << 8; + EATBITS(8); + adler |= (dctx->bits & 0xFF); + EATBITS(8); + if (adler != (dctx->adler32 & 0xFFFF)) + goto decode_error; + dctx->state = FINALSPIN; + break; + case FINALSPIN: + /* Just ignore any trailing garbage on the data stream. */ + EATBITS(dctx->nbits); + break; + } + } + + finished: + *outblock = dctx->outblk; + *outlen = dctx->outlen; + return 1; + + decode_error: + sfree(dctx->outblk); + *outblock = dctx->outblk = NULL; + *outlen = 0; + return 0; +} + +#ifdef STANDALONE + +int main(int argc, char **argv) +{ + unsigned char buf[65536], *outbuf; + int ret, outlen; + deflate_decompress_ctx *dhandle; + deflate_compress_ctx *chandle; + int type = DEFLATE_TYPE_ZLIB, opts = TRUE, compress = FALSE; + char *filename = NULL; + FILE *fp; + + while (--argc) { + char *p = *++argv; + + if (p[0] == '-' && opts) { + if (!strcmp(p, "-d")) + type = DEFLATE_TYPE_BARE; + if (!strcmp(p, "-c")) + compress = TRUE; + else if (!strcmp(p, "--")) + opts = FALSE; /* next thing is filename */ + else { + fprintf(stderr, "unknown command line option '%s'\n", p); + return 1; + } + } else if (!filename) { + filename = p; + } else { + fprintf(stderr, "can only handle one filename\n"); + return 1; + } + } + + if (compress) { + chandle = deflate_compress_new(type); + dhandle = NULL; + } else { + dhandle = deflate_decompress_new(type); + chandle = NULL; + } + + if (filename) + fp = fopen(filename, "rb"); + else + fp = stdin; + + if (!fp) { + assert(filename); + fprintf(stderr, "unable to open '%s'\n", filename); + return 1; + } + + do { + ret = fread(buf, 1, sizeof(buf), fp); + if (dhandle) { + if (ret > 0) + deflate_decompress_data(dhandle, buf, ret, + (void **)&outbuf, &outlen); + } else { + if (ret > 0) + deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH, + (void **)&outbuf, &outlen); + else + deflate_compress_data(chandle, buf, ret, DEFLATE_END_OF_DATA, + (void **)&outbuf, &outlen); + } + if (outbuf) { + if (outlen) + fwrite(outbuf, 1, outlen, stdout); + sfree(outbuf); + } else if (dhandle) { + fprintf(stderr, "decoding error\n"); + return 1; + } + } while (ret > 0); + + if (dhandle) + deflate_decompress_free(dhandle); + if (chandle) + deflate_compress_free(chandle); + + if (filename) + fclose(fp); + + return 0; +} + +#endif + +#ifdef TESTMODE + +int main(int argc, char **argv) +{ + char *filename = NULL; + FILE *fp; + deflate_compress_ctx *chandle; + deflate_decompress_ctx *dhandle; + unsigned char buf[65536], *outbuf, *outbuf2; + int ret, outlen, outlen2; + int dlen = 0, clen = 0; + int opts = TRUE; + + while (--argc) { + char *p = *++argv; + + if (p[0] == '-' && opts) { + if (!strcmp(p, "--")) + opts = FALSE; /* next thing is filename */ + else { + fprintf(stderr, "unknown command line option '%s'\n", p); + return 1; + } + } else if (!filename) { + filename = p; + } else { + fprintf(stderr, "can only handle one filename\n"); + return 1; + } + } + + if (filename) + fp = fopen(filename, "rb"); + else + fp = stdin; + + if (!fp) { + assert(filename); + fprintf(stderr, "unable to open '%s'\n", filename); + return 1; + } + + chandle = deflate_compress_new(DEFLATE_TYPE_ZLIB); + dhandle = deflate_decompress_new(DEFLATE_TYPE_ZLIB); + + do { + ret = fread(buf, 1, sizeof(buf), fp); + if (ret <= 0) { + deflate_compress_data(chandle, NULL, 0, DEFLATE_END_OF_DATA, + (void **)&outbuf, &outlen); + } else { + dlen += ret; + deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH, + (void **)&outbuf, &outlen); + } + if (outbuf) { + clen += outlen; + deflate_decompress_data(dhandle, outbuf, outlen, + (void **)&outbuf2, &outlen2); + sfree(outbuf); + if (outbuf2) { + if (outlen2) + fwrite(outbuf2, 1, outlen2, stdout); + sfree(outbuf2); + } else { + fprintf(stderr, "decoding error\n"); + return 1; + } + } + } while (ret > 0); + + fprintf(stderr, "%d plaintext -> %d compressed\n", dlen, clen); + + return 0; +} + +#endif diff --git a/deflate.h b/deflate.h new file mode 100644 index 0000000..fac63e2 --- /dev/null +++ b/deflate.h @@ -0,0 +1,108 @@ +/* + * Header file for my independent implementation of Deflate + * (RFC1951) compression. + */ + +#ifndef DEFLATE_DEFLATE_H +#define DEFLATE_DEFLATE_H + +enum { + DEFLATE_TYPE_BARE, + DEFLATE_TYPE_ZLIB +}; + +/* ---------------------------------------------------------------------- + * Compression functions. Create a compression context with + * deflate_compress_new(); feed it data with repeated calls to + * deflate_compress_data(); destroy it with + * deflate_compress_free(). + */ + +typedef struct deflate_compress_ctx deflate_compress_ctx; + +/* + * Create a new compression context. `type' indicates whether it's + * bare Deflate (as used in, say, zip files) or Zlib (as used in, + * say, PDF). + */ +deflate_compress_ctx *deflate_compress_new(int type); + +/* + * Free a compression context previously allocated by + * deflate_compress_new(). + */ +void deflate_compress_free(deflate_compress_ctx *ctx); + +/* + * Give the compression context some data to compress. The input + * data is passed in `inblock', and has length `inlen'. This + * function may or may not produce some output data; if so, it is + * written to a dynamically allocated chunk of memory, a pointer to + * that memory is stored in `outblock', and the length of output + * data is stored in `outlen'. It is common for no data to be + * output, if the input data has merely been stored in internal + * buffers. + * + * `flushtype' indicates whether you want to force buffered data to + * be output. It can be one of the following values: + * + * - DEFLATE_NO_FLUSH: nothing is output if the compressor would + * rather not. Use this when the best compression is desired + * (i.e. most of the time). + * + * - DEFLATE_SYNC_FLUSH: all the buffered data is output, but the + * compressed data stream remains open and ready to continue + * compressing data. Use this in interactive protocols when a + * single compressed data stream is split across several network + * packets. + * + * - DEFLATE_END_OF_DATA: all the buffered data is output and the + * compressed data stream is cleaned up. Any checksums required + * at the end of the stream are also output. + */ +int deflate_compress_data(deflate_compress_ctx *ctx, + const void *inblock, int inlen, int flushtype, + void **outblock, int *outlen); + +enum { + DEFLATE_NO_FLUSH, + DEFLATE_SYNC_FLUSH, + DEFLATE_END_OF_DATA +}; + +/* ---------------------------------------------------------------------- + * Decompression functions. Create a decompression context with + * deflate_decompress_new(); feed it data with repeated calls to + * deflate_decompress_data(); destroy it with + * deflate_decompress_free(). + */ + +typedef struct deflate_decompress_ctx deflate_decompress_ctx; + +/* + * Create a new decompression context. `type' means the same as it + * does in deflate_compress_new(). + */ +deflate_decompress_ctx *deflate_decompress_new(int type); + +/* + * Free a decompression context previously allocated by + * deflate_decompress_new(). + */ +void deflate_decompress_free(deflate_decompress_ctx *ctx); + +/* + * Give the decompression context some data to decompress. The + * input data is passed in `inblock', and has length `inlen'. This + * function may or may not produce some output data; if so, it is + * written to a dynamically allocated chunk of memory, a pointer to + * that memory is stored in `outblock', and the length of output + * data is stored in `outlen'. + * + * FIXME: error reporting? + */ +int deflate_decompress_data(deflate_decompress_ctx *ctx, + const void *inblock, int inlen, + void **outblock, int *outlen); + +#endif /* DEFLATE_DEFLATE_H */ diff --git a/halibut.h b/halibut.h index d109ade..5898a8e 100644 --- a/halibut.h +++ b/halibut.h @@ -361,6 +361,7 @@ void rdadds(rdstring *rs, wchar_t const *p); wchar_t *rdtrim(rdstring *rs); void rdaddc(rdstringc *rs, char c); void rdaddsc(rdstringc *rs, char const *p); +void rdaddsn(rdstringc *rc, char const *p, int len); char *rdtrimc(rdstringc *rs); int compare_wordlists(word *a, word *b); diff --git a/misc.c b/misc.c index a20c5b4..1d407de 100644 --- a/misc.c +++ b/misc.c @@ -90,13 +90,16 @@ void rdaddc(rdstringc *rs, char c) { rs->text[rs->pos] = 0; } void rdaddsc(rdstringc *rs, char const *p) { - int len = strlen(p); + rdaddsn(rs, p, strlen(p)); +} +void rdaddsn(rdstringc *rs, char const *p, int len) { if (rs->pos >= rs->size - len) { rs->size = rs->pos + len + 128; rs->text = sresize(rs->text, rs->size, char); } - strcpy(rs->text + rs->pos, p); + memcpy(rs->text + rs->pos, p, len); rs->pos += len; + rs->text[rs->pos] = 0; } char *rdtrimc(rdstringc *rs) { rs->text = sresize(rs->text, rs->pos + 1, char); -- 2.11.0