+static void add(struct buf *b, unsigned char c)
+{
+ if (b->len >= b->size) {
+ b->size = (b->len * 3 / 2) + 512;
+ b->data = sresize(b->data, b->size, unsigned char);
+ }
+ b->data[b->len++] = c;
+}
+static int get(struct buf *b)
+{
+ return b->data[b->len++];
+}
+static void makerle(struct buf *b, termline *ldata,
+ void (*makeliteral)(struct buf *b, termchar *c,
+ unsigned long *state))
+{
+ int hdrpos, hdrsize, n, prevlen, prevpos, thislen, thispos, prev2;
+ termchar *c = ldata->chars;
+ unsigned long state = 0, oldstate;
+
+ n = ldata->cols;
+
+ hdrpos = b->len;
+ hdrsize = 0;
+ add(b, 0);
+ prevlen = prevpos = 0;
+ prev2 = FALSE;
+
+ while (n-- > 0) {
+ thispos = b->len;
+ makeliteral(b, c++, &state);
+ thislen = b->len - thispos;
+ if (thislen == prevlen &&
+ !memcmp(b->data + prevpos, b->data + thispos, thislen)) {
+ /*
+ * This literal precisely matches the previous one.
+ * Turn it into a run if it's worthwhile.
+ *
+ * With one-byte literals, it costs us two bytes to
+ * encode a run, plus another byte to write the header
+ * to resume normal output; so a three-element run is
+ * neutral, and anything beyond that is unconditionally
+ * worthwhile. With two-byte literals or more, even a
+ * 2-run is a win.
+ */
+ if (thislen > 1 || prev2) {
+ int runpos, runlen;
+
+ /*
+ * It's worth encoding a run. Start at prevpos,
+ * unless hdrsize==0 in which case we can back up
+ * another one and start by overwriting hdrpos.
+ */
+
+ hdrsize--; /* remove the literal at prevpos */
+ if (prev2) {
+ assert(hdrsize > 0);
+ hdrsize--;
+ prevpos -= prevlen;/* and possibly another one */
+ }
+
+ if (hdrsize == 0) {
+ assert(prevpos == hdrpos + 1);
+ runpos = hdrpos;
+ b->len = prevpos+prevlen;
+ } else {
+ memmove(b->data + prevpos+1, b->data + prevpos, prevlen);
+ runpos = prevpos;
+ b->len = prevpos+prevlen+1;
+ /*
+ * Terminate the previous run of ordinary
+ * literals.
+ */
+ assert(hdrsize >= 1 && hdrsize <= 128);
+ b->data[hdrpos] = hdrsize - 1;
+ }
+
+ runlen = prev2 ? 3 : 2;
+
+ while (n > 0 && runlen < 129) {
+ int tmppos, tmplen;
+ tmppos = b->len;
+ oldstate = state;
+ makeliteral(b, c, &state);
+ tmplen = b->len - tmppos;
+ b->len = tmppos;
+ if (tmplen != thislen ||
+ memcmp(b->data + runpos+1, b->data + tmppos, tmplen)) {
+ state = oldstate;
+ break; /* run over */
+ }
+ n--, c++, runlen++;
+ }
+
+ assert(runlen >= 2 && runlen <= 129);
+ b->data[runpos] = runlen + 0x80 - 2;
+
+ hdrpos = b->len;
+ hdrsize = 0;
+ add(b, 0);
+ /* And ensure this run doesn't interfere with the next. */
+ prevlen = prevpos = 0;
+ prev2 = FALSE;
+
+ continue;
+ } else {
+ /*
+ * Just flag that the previous two literals were
+ * identical, in case we find a third identical one
+ * we want to turn into a run.
+ */
+ prev2 = TRUE;
+ prevlen = thislen;
+ prevpos = thispos;
+ }
+ } else {
+ prev2 = FALSE;
+ prevlen = thislen;
+ prevpos = thispos;
+ }
+
+ /*
+ * This character isn't (yet) part of a run. Add it to
+ * hdrsize.
+ */
+ hdrsize++;
+ if (hdrsize == 128) {
+ b->data[hdrpos] = hdrsize - 1;
+ hdrpos = b->len;
+ hdrsize = 0;
+ add(b, 0);
+ prevlen = prevpos = 0;
+ prev2 = FALSE;
+ }
+ }
+
+ /*
+ * Clean up.
+ */
+ if (hdrsize > 0) {
+ assert(hdrsize <= 128);
+ b->data[hdrpos] = hdrsize - 1;
+ } else {
+ b->len = hdrpos;
+ }
+}
+static void makeliteral_chr(struct buf *b, termchar *c, unsigned long *state)
+{
+ /*
+ * My encoding for characters is UTF-8-like, in that it stores
+ * 7-bit ASCII in one byte and uses high-bit-set bytes as
+ * introducers to indicate a longer sequence. However, it's
+ * unlike UTF-8 in that it doesn't need to be able to
+ * resynchronise, and therefore I don't want to waste two bits
+ * per byte on having recognisable continuation characters.
+ * Also I don't want to rule out the possibility that I may one
+ * day use values 0x80000000-0xFFFFFFFF for interesting
+ * purposes, so unlike UTF-8 I need a full 32-bit range.
+ * Accordingly, here is my encoding:
+ *
+ * 00000000-0000007F: 0xxxxxxx (but see below)
+ * 00000080-00003FFF: 10xxxxxx xxxxxxxx
+ * 00004000-001FFFFF: 110xxxxx xxxxxxxx xxxxxxxx
+ * 00200000-0FFFFFFF: 1110xxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ * 10000000-FFFFFFFF: 11110ZZZ xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ *
+ * (`Z' is like `x' but is always going to be zero since the
+ * values I'm encoding don't go above 2^32. In principle the
+ * five-byte form of the encoding could extend to 2^35, and
+ * there could be six-, seven-, eight- and nine-byte forms as
+ * well to allow up to 64-bit values to be encoded. But that's
+ * completely unnecessary for these purposes!)
+ *
+ * The encoding as written above would be very simple, except
+ * that 7-bit ASCII can occur in several different ways in the
+ * terminal data; sometimes it crops up in the D800 page
+ * (CSET_ASCII) but at other times it's in the 0000 page (real
+ * Unicode). Therefore, this encoding is actually _stateful_:
+ * the one-byte encoding of 00-7F actually indicates `reuse the
+ * upper three bytes of the last character', and to encode an
+ * absolute value of 00-7F you need to use the two-byte form
+ * instead.
+ */
+ if ((c->chr & ~0x7F) == *state) {
+ add(b, (unsigned char)(c->chr & 0x7F));
+ } else if (c->chr < 0x4000) {
+ add(b, (unsigned char)(((c->chr >> 8) & 0x3F) | 0x80));
+ add(b, (unsigned char)(c->chr & 0xFF));
+ } else if (c->chr < 0x200000) {
+ add(b, (unsigned char)(((c->chr >> 16) & 0x1F) | 0xC0));
+ add(b, (unsigned char)((c->chr >> 8) & 0xFF));
+ add(b, (unsigned char)(c->chr & 0xFF));
+ } else if (c->chr < 0x10000000) {
+ add(b, (unsigned char)(((c->chr >> 24) & 0x0F) | 0xE0));
+ add(b, (unsigned char)((c->chr >> 16) & 0xFF));
+ add(b, (unsigned char)((c->chr >> 8) & 0xFF));
+ add(b, (unsigned char)(c->chr & 0xFF));
+ } else {
+ add(b, 0xF0);
+ add(b, (unsigned char)((c->chr >> 24) & 0xFF));
+ add(b, (unsigned char)((c->chr >> 16) & 0xFF));
+ add(b, (unsigned char)((c->chr >> 8) & 0xFF));
+ add(b, (unsigned char)(c->chr & 0xFF));
+ }
+ *state = c->chr & ~0xFF;
+}
+static void makeliteral_attr(struct buf *b, termchar *c, unsigned long *state)
+{
+ /*
+ * My encoding for attributes is 16-bit-granular and assumes
+ * that the top bit of the word is never required. I either
+ * store a two-byte value with the top bit clear (indicating
+ * just that value), or a four-byte value with the top bit set
+ * (indicating the same value with its top bit clear).
+ *
+ * However, first I permute the bits of the attribute value, so
+ * that the eight bits of colour (four in each of fg and bg)
+ * which are never non-zero unless xterm 256-colour mode is in
+ * use are placed higher up the word than everything else. This
+ * ensures that attribute values remain 16-bit _unless_ the
+ * user uses extended colour.
+ */
+ unsigned attr, colourbits;
+
+ attr = c->attr;
+
+ assert(ATTR_BGSHIFT > ATTR_FGSHIFT);
+
+ colourbits = (attr >> (ATTR_BGSHIFT + 4)) & 0xF;
+ colourbits <<= 4;
+ colourbits |= (attr >> (ATTR_FGSHIFT + 4)) & 0xF;
+
+ attr = (((attr >> (ATTR_BGSHIFT + 8)) << (ATTR_BGSHIFT + 4)) |
+ (attr & ((1 << (ATTR_BGSHIFT + 4))-1)));
+ attr = (((attr >> (ATTR_FGSHIFT + 8)) << (ATTR_FGSHIFT + 4)) |
+ (attr & ((1 << (ATTR_FGSHIFT + 4))-1)));
+
+ attr |= (colourbits << (32-9));
+
+ if (attr < 0x8000) {
+ add(b, (unsigned char)((attr >> 8) & 0xFF));
+ add(b, (unsigned char)(attr & 0xFF));
+ } else {
+ add(b, (unsigned char)(((attr >> 24) & 0x7F) | 0x80));
+ add(b, (unsigned char)((attr >> 16) & 0xFF));
+ add(b, (unsigned char)((attr >> 8) & 0xFF));
+ add(b, (unsigned char)(attr & 0xFF));
+ }
+}
+static void makeliteral_cc(struct buf *b, termchar *c, unsigned long *state)
+{
+ /*
+ * For combining characters, I just encode a bunch of ordinary
+ * chars using makeliteral_chr, and terminate with a \0
+ * character (which I know won't come up as a combining char
+ * itself).
+ *
+ * I don't use the stateful encoding in makeliteral_chr.
+ */
+ unsigned long zstate;
+ termchar z;
+
+ while (c->cc_next) {
+ c += c->cc_next;
+
+ assert(c->chr != 0);
+
+ zstate = 0;
+ makeliteral_chr(b, c, &zstate);
+ }
+
+ z.chr = 0;
+ zstate = 0;
+ makeliteral_chr(b, &z, &zstate);
+}
+
+static termline *decompressline(unsigned char *data, int *bytes_used);
+
+static unsigned char *compressline(termline *ldata)
+{
+ struct buf buffer = { NULL, 0, 0 }, *b = &buffer;
+
+ /*
+ * First, store the column count, 7 bits at a time, least
+ * significant `digit' first, with the high bit set on all but
+ * the last.
+ */
+ {
+ int n = ldata->cols;
+ while (n >= 128) {
+ add(b, (unsigned char)((n & 0x7F) | 0x80));
+ n >>= 7;
+ }
+ add(b, (unsigned char)(n));
+ }
+
+ /*
+ * Next store the lattrs; same principle.
+ */
+ {
+ int n = ldata->lattr;
+ while (n >= 128) {
+ add(b, (unsigned char)((n & 0x7F) | 0x80));
+ n >>= 7;
+ }
+ add(b, (unsigned char)(n));
+ }
+
+ /*
+ * Now we store a sequence of separate run-length encoded
+ * fragments, each containing exactly as many symbols as there
+ * are columns in the ldata.
+ *
+ * All of these have a common basic format:
+ *
+ * - a byte 00-7F indicates that X+1 literals follow it
+ * - a byte 80-FF indicates that a single literal follows it
+ * and expects to be repeated (X-0x80)+2 times.
+ *
+ * The format of the `literals' varies between the fragments.
+ */
+ makerle(b, ldata, makeliteral_chr);
+ makerle(b, ldata, makeliteral_attr);
+ makerle(b, ldata, makeliteral_cc);