+ runlen = prev2 ? 3 : 2;
+
+ while (n > 0 && runlen < 129) {
+ int tmppos, tmplen;
+ tmppos = b->len;
+ oldstate = state;
+ makeliteral(b, c, &state);
+ tmplen = b->len - tmppos;
+ b->len = tmppos;
+ if (tmplen != thislen ||
+ memcmp(b->data + runpos+1, b->data + tmppos, tmplen)) {
+ state = oldstate;
+ break; /* run over */
+ }
+ n--, c++, runlen++;
+ }
+
+ assert(runlen >= 2 && runlen <= 129);
+ b->data[runpos] = runlen + 0x80 - 2;
+
+ hdrpos = b->len;
+ hdrsize = 0;
+ add(b, 0);
+ /* And ensure this run doesn't interfere with the next. */
+ prevlen = prevpos = 0;
+ prev2 = FALSE;
+
+ continue;
+ } else {
+ /*
+ * Just flag that the previous two literals were
+ * identical, in case we find a third identical one
+ * we want to turn into a run.
+ */
+ prev2 = TRUE;
+ prevlen = thislen;
+ prevpos = thispos;
+ }
+ } else {
+ prev2 = FALSE;
+ prevlen = thislen;
+ prevpos = thispos;
+ }
+
+ /*
+ * This character isn't (yet) part of a run. Add it to
+ * hdrsize.
+ */
+ hdrsize++;
+ if (hdrsize == 128) {
+ b->data[hdrpos] = hdrsize - 1;
+ hdrpos = b->len;
+ hdrsize = 0;
+ add(b, 0);
+ prevlen = prevpos = 0;
+ prev2 = FALSE;
+ }
+ }
+
+ /*
+ * Clean up.
+ */
+ if (hdrsize > 0) {
+ assert(hdrsize <= 128);
+ b->data[hdrpos] = hdrsize - 1;
+ } else {
+ b->len = hdrpos;
+ }
+}
+static void makeliteral_chr(struct buf *b, termchar *c, unsigned long *state)
+{
+ /*
+ * My encoding for characters is UTF-8-like, in that it stores
+ * 7-bit ASCII in one byte and uses high-bit-set bytes as
+ * introducers to indicate a longer sequence. However, it's
+ * unlike UTF-8 in that it doesn't need to be able to
+ * resynchronise, and therefore I don't want to waste two bits
+ * per byte on having recognisable continuation characters.
+ * Also I don't want to rule out the possibility that I may one
+ * day use values 0x80000000-0xFFFFFFFF for interesting
+ * purposes, so unlike UTF-8 I need a full 32-bit range.
+ * Accordingly, here is my encoding:
+ *
+ * 00000000-0000007F: 0xxxxxxx (but see below)
+ * 00000080-00003FFF: 10xxxxxx xxxxxxxx
+ * 00004000-001FFFFF: 110xxxxx xxxxxxxx xxxxxxxx
+ * 00200000-0FFFFFFF: 1110xxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ * 10000000-FFFFFFFF: 11110ZZZ xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ *
+ * (`Z' is like `x' but is always going to be zero since the
+ * values I'm encoding don't go above 2^32. In principle the
+ * five-byte form of the encoding could extend to 2^35, and
+ * there could be six-, seven-, eight- and nine-byte forms as
+ * well to allow up to 64-bit values to be encoded. But that's
+ * completely unnecessary for these purposes!)
+ *
+ * The encoding as written above would be very simple, except
+ * that 7-bit ASCII can occur in several different ways in the
+ * terminal data; sometimes it crops up in the D800 page
+ * (CSET_ASCII) but at other times it's in the 0000 page (real
+ * Unicode). Therefore, this encoding is actually _stateful_:
+ * the one-byte encoding of 00-7F actually indicates `reuse the
+ * upper three bytes of the last character', and to encode an
+ * absolute value of 00-7F you need to use the two-byte form
+ * instead.
+ */
+ if ((c->chr & ~0x7F) == *state) {
+ add(b, (unsigned char)(c->chr & 0x7F));
+ } else if (c->chr < 0x4000) {
+ add(b, (unsigned char)(((c->chr >> 8) & 0x3F) | 0x80));
+ add(b, (unsigned char)(c->chr & 0xFF));
+ } else if (c->chr < 0x200000) {
+ add(b, (unsigned char)(((c->chr >> 16) & 0x1F) | 0xC0));
+ add(b, (unsigned char)((c->chr >> 8) & 0xFF));
+ add(b, (unsigned char)(c->chr & 0xFF));
+ } else if (c->chr < 0x10000000) {
+ add(b, (unsigned char)(((c->chr >> 24) & 0x0F) | 0xE0));
+ add(b, (unsigned char)((c->chr >> 16) & 0xFF));
+ add(b, (unsigned char)((c->chr >> 8) & 0xFF));
+ add(b, (unsigned char)(c->chr & 0xFF));
+ } else {
+ add(b, 0xF0);
+ add(b, (unsigned char)((c->chr >> 24) & 0xFF));
+ add(b, (unsigned char)((c->chr >> 16) & 0xFF));
+ add(b, (unsigned char)((c->chr >> 8) & 0xFF));
+ add(b, (unsigned char)(c->chr & 0xFF));
+ }
+ *state = c->chr & ~0xFF;
+}
+static void makeliteral_attr(struct buf *b, termchar *c, unsigned long *state)
+{
+ /*
+ * My encoding for attributes is 16-bit-granular and assumes
+ * that the top bit of the word is never required. I either
+ * store a two-byte value with the top bit clear (indicating
+ * just that value), or a four-byte value with the top bit set
+ * (indicating the same value with its top bit clear).
+ *
+ * However, first I permute the bits of the attribute value, so
+ * that the eight bits of colour (four in each of fg and bg)
+ * which are never non-zero unless xterm 256-colour mode is in
+ * use are placed higher up the word than everything else. This
+ * ensures that attribute values remain 16-bit _unless_ the
+ * user uses extended colour.
+ */
+ unsigned attr, colourbits;
+
+ attr = c->attr;
+
+ assert(ATTR_BGSHIFT > ATTR_FGSHIFT);
+
+ colourbits = (attr >> (ATTR_BGSHIFT + 4)) & 0xF;
+ colourbits <<= 4;
+ colourbits |= (attr >> (ATTR_FGSHIFT + 4)) & 0xF;
+
+ attr = (((attr >> (ATTR_BGSHIFT + 8)) << (ATTR_BGSHIFT + 4)) |
+ (attr & ((1 << (ATTR_BGSHIFT + 4))-1)));
+ attr = (((attr >> (ATTR_FGSHIFT + 8)) << (ATTR_FGSHIFT + 4)) |
+ (attr & ((1 << (ATTR_FGSHIFT + 4))-1)));
+
+ attr |= (colourbits << (32-9));
+
+ if (attr < 0x8000) {
+ add(b, (unsigned char)((attr >> 8) & 0xFF));
+ add(b, (unsigned char)(attr & 0xFF));
+ } else {
+ add(b, (unsigned char)(((attr >> 24) & 0x7F) | 0x80));
+ add(b, (unsigned char)((attr >> 16) & 0xFF));
+ add(b, (unsigned char)((attr >> 8) & 0xFF));
+ add(b, (unsigned char)(attr & 0xFF));
+ }
+}
+static void makeliteral_cc(struct buf *b, termchar *c, unsigned long *state)
+{
+ /*
+ * For combining characters, I just encode a bunch of ordinary
+ * chars using makeliteral_chr, and terminate with a \0
+ * character (which I know won't come up as a combining char
+ * itself).
+ *
+ * I don't use the stateful encoding in makeliteral_chr.
+ */
+ unsigned long zstate;
+ termchar z;
+
+ while (c->cc_next) {
+ c += c->cc_next;
+
+ assert(c->chr != 0);
+
+ zstate = 0;
+ makeliteral_chr(b, c, &zstate);
+ }
+
+ z.chr = 0;
+ zstate = 0;
+ makeliteral_chr(b, &z, &zstate);
+}
+
+static termline *decompressline(unsigned char *data, int *bytes_used);
+
+static unsigned char *compressline(termline *ldata)
+{
+ struct buf buffer = { NULL, 0, 0 }, *b = &buffer;
+
+ /*
+ * First, store the column count, 7 bits at a time, least
+ * significant `digit' first, with the high bit set on all but
+ * the last.
+ */
+ {
+ int n = ldata->cols;
+ while (n >= 128) {
+ add(b, (unsigned char)((n & 0x7F) | 0x80));
+ n >>= 7;
+ }
+ add(b, (unsigned char)(n));
+ }
+
+ /*
+ * Next store the lattrs; same principle.
+ */
+ {
+ int n = ldata->lattr;
+ while (n >= 128) {
+ add(b, (unsigned char)((n & 0x7F) | 0x80));
+ n >>= 7;
+ }
+ add(b, (unsigned char)(n));
+ }
+
+ /*
+ * Now we store a sequence of separate run-length encoded
+ * fragments, each containing exactly as many symbols as there
+ * are columns in the ldata.
+ *
+ * All of these have a common basic format:
+ *
+ * - a byte 00-7F indicates that X+1 literals follow it
+ * - a byte 80-FF indicates that a single literal follows it
+ * and expects to be repeated (X-0x80)+2 times.
+ *
+ * The format of the `literals' varies between the fragments.
+ */
+ makerle(b, ldata, makeliteral_chr);
+ makerle(b, ldata, makeliteral_attr);
+ makerle(b, ldata, makeliteral_cc);
+
+ /*
+ * Diagnostics: ensure that the compressed data really does
+ * decompress to the right thing.
+ *
+ * This is a bit performance-heavy for production code.
+ */
+#ifdef TERM_CC_DIAGS
+#ifndef CHECK_SB_COMPRESSION
+ {
+ int dused;
+ termline *dcl;
+ int i;
+
+#ifdef DIAGNOSTIC_SB_COMPRESSION
+ for (i = 0; i < b->len; i++) {
+ printf(" %02x ", b->data[i]);
+ }
+ printf("\n");
+#endif
+
+ dcl = decompressline(b->data, &dused);
+ assert(b->len == dused);
+ assert(ldata->cols == dcl->cols);
+ assert(ldata->lattr == dcl->lattr);
+ for (i = 0; i < ldata->cols; i++)
+ assert(termchars_equal(&ldata->chars[i], &dcl->chars[i]));
+
+#ifdef DIAGNOSTIC_SB_COMPRESSION
+ printf("%d cols (%d bytes) -> %d bytes (factor of %g)\n",
+ ldata->cols, 4 * ldata->cols, dused,
+ (double)dused / (4 * ldata->cols));
+#endif
+
+ freeline(dcl);
+ }
+#endif
+#endif /* TERM_CC_DIAGS */
+
+ /*
+ * Trim the allocated memory so we don't waste any, and return.
+ */
+ return sresize(b->data, b->len, unsigned char);
+}
+
+static void readrle(struct buf *b, termline *ldata,
+ void (*readliteral)(struct buf *b, termchar *c,
+ termline *ldata, unsigned long *state))
+{
+ int n = 0;
+ unsigned long state = 0;
+
+ while (n < ldata->cols) {
+ int hdr = get(b);
+
+ if (hdr >= 0x80) {
+ /* A run. */
+
+ int pos = b->len, count = hdr + 2 - 0x80;
+ while (count--) {
+ assert(n < ldata->cols);
+ b->len = pos;
+ readliteral(b, ldata->chars + n, ldata, &state);
+ n++;
+ }
+ } else {
+ /* Just a sequence of consecutive literals. */
+
+ int count = hdr + 1;
+ while (count--) {
+ assert(n < ldata->cols);
+ readliteral(b, ldata->chars + n, ldata, &state);
+ n++;
+ }
+ }
+ }
+
+ assert(n == ldata->cols);
+}
+static void readliteral_chr(struct buf *b, termchar *c, termline *ldata,
+ unsigned long *state)
+{
+ int byte;
+
+ /*
+ * 00000000-0000007F: 0xxxxxxx
+ * 00000080-00003FFF: 10xxxxxx xxxxxxxx
+ * 00004000-001FFFFF: 110xxxxx xxxxxxxx xxxxxxxx
+ * 00200000-0FFFFFFF: 1110xxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ * 10000000-FFFFFFFF: 11110ZZZ xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ */
+
+ byte = get(b);
+ if (byte < 0x80) {
+ c->chr = byte | *state;
+ } else if (byte < 0xC0) {
+ c->chr = (byte &~ 0xC0) << 8;
+ c->chr |= get(b);
+ } else if (byte < 0xE0) {
+ c->chr = (byte &~ 0xE0) << 16;
+ c->chr |= get(b) << 8;
+ c->chr |= get(b);
+ } else if (byte < 0xF0) {
+ c->chr = (byte &~ 0xF0) << 24;
+ c->chr |= get(b) << 16;
+ c->chr |= get(b) << 8;
+ c->chr |= get(b);
+ } else {
+ assert(byte == 0xF0);
+ c->chr = get(b) << 24;
+ c->chr |= get(b) << 16;
+ c->chr |= get(b) << 8;
+ c->chr |= get(b);
+ }
+ *state = c->chr & ~0xFF;
+}
+static void readliteral_attr(struct buf *b, termchar *c, termline *ldata,
+ unsigned long *state)
+{
+ unsigned val, attr, colourbits;
+
+ val = get(b) << 8;
+ val |= get(b);
+
+ if (val >= 0x8000) {
+ val &= ~0x8000;
+ val <<= 16;
+ val |= get(b) << 8;
+ val |= get(b);
+ }
+
+ colourbits = (val >> (32-9)) & 0xFF;
+ attr = (val & ((1<<(32-9))-1));
+
+ attr = (((attr >> (ATTR_FGSHIFT + 4)) << (ATTR_FGSHIFT + 8)) |
+ (attr & ((1 << (ATTR_FGSHIFT + 4))-1)));
+ attr = (((attr >> (ATTR_BGSHIFT + 4)) << (ATTR_BGSHIFT + 8)) |
+ (attr & ((1 << (ATTR_BGSHIFT + 4))-1)));
+
+ attr |= (colourbits >> 4) << (ATTR_BGSHIFT + 4);
+ attr |= (colourbits & 0xF) << (ATTR_FGSHIFT + 4);
+
+ c->attr = attr;
+}
+static void readliteral_cc(struct buf *b, termchar *c, termline *ldata,
+ unsigned long *state)
+{
+ termchar n;
+ unsigned long zstate;
+ int x = c - ldata->chars;
+
+ c->cc_next = 0;
+
+ while (1) {
+ zstate = 0;
+ readliteral_chr(b, &n, ldata, &zstate);
+ if (!n.chr)
+ break;
+ add_cc(ldata, x, n.chr);
+ }
+}
+
+static termline *decompressline(unsigned char *data, int *bytes_used)
+{
+ int ncols, byte, shift;
+ struct buf buffer, *b = &buffer;
+ termline *ldata;
+
+ b->data = data;
+ b->len = 0;
+
+ /*
+ * First read in the column count.
+ */
+ ncols = shift = 0;
+ do {
+ byte = get(b);
+ ncols |= (byte & 0x7F) << shift;
+ shift += 7;
+ } while (byte & 0x80);
+
+ /*
+ * Now create the output termline.
+ */
+ ldata = snew(termline);
+ ldata->chars = snewn(ncols, termchar);
+ ldata->cols = ldata->size = ncols;
+ ldata->temporary = TRUE;
+ ldata->cc_free = 0;