From 9acadc2b1377453e1c10614920bd390c52227e8a Mon Sep 17 00:00:00 2001
From: simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Date: Sat, 20 Nov 2004 08:44:10 +0000
Subject: [PATCH] Move some of my more useful utilities out from my all-purpose
 `local' and `misc' directories into a `utils' area, where they might end up
 releasable.

git-svn-id: svn://svn.tartarus.org/sgt/utils@4837 cda61777-01e9-0310-a592-d414129be87e
---
 base64/Makefile       |  10 ++
 base64/base64.but     |  72 ++++++++
 base64/base64.c       | 263 +++++++++++++++++++++++++++++
 cvt-utf8/Makefile     |   7 +
 cvt-utf8/cvt-utf8     | 460 ++++++++++++++++++++++++++++++++++++++++++++++++++
 cvt-utf8/cvt-utf8.but | 133 +++++++++++++++
 xcopy/Makefile        |  12 ++
 xcopy/xcopy.but       | 104 ++++++++++++
 xcopy/xcopy.c         | 359 +++++++++++++++++++++++++++++++++++++++
 9 files changed, 1420 insertions(+)
 create mode 100644 base64/Makefile
 create mode 100644 base64/base64.but
 create mode 100644 base64/base64.c
 create mode 100644 cvt-utf8/Makefile
 create mode 100755 cvt-utf8/cvt-utf8
 create mode 100644 cvt-utf8/cvt-utf8.but
 create mode 100644 xcopy/Makefile
 create mode 100644 xcopy/xcopy.but
 create mode 100644 xcopy/xcopy.c

diff --git a/base64/Makefile b/base64/Makefile
new file mode 100644
index 0000000..44ec017
--- /dev/null
+++ b/base64/Makefile
@@ -0,0 +1,10 @@
+all: base64.1 base64
+
+base64: base64.c
+	$(CC) $(CFLAGS) -o $@ $<
+
+%.1: %.but
+	halibut --man=$@ $<
+
+clean:
+	rm -f *.1 base64
diff --git a/base64/base64.but b/base64/base64.but
new file mode 100644
index 0000000..0634d05
--- /dev/null
+++ b/base64/base64.but
@@ -0,0 +1,72 @@
+\cfg{man-identity}{base64}{1}{2004-08-02}{Simon Tatham}{Simon Tatham}
+\cfg{man-mindepth}{1}
+
+\C{base64-manpage} Man page for \cw{base64}
+
+\H{base64-manpage-name} NAME
+
+\cw{base64} - stand-alone encoder and decoder for base64
+
+\H{base64-manpage-synopsis} SYNOPSIS
+
+\c base64 [ -d ] [ filename ]
+\e bbbbbb   bb     iiiiiiii
+\c base64 -e [ -cwidth ] [ filename ]
+\e bbbbbb bb   bbiiiii     iiiiiiii
+
+\H{base64-manpage-description} DESCRIPTION
+
+\cw{base64} is a command-line utility for encoding and decoding the
+\q{base64} encoding.
+
+This encoding, defined in RFC 2045, is primarily used to encode
+binary attachments in MIME e-mail, but is widely used in many other
+applications as well. For example, the \q{Content-MD5} mail header
+contains a small piece of base64; SSH private keys are generally
+stored as base64-encoded blobs; and so on.
+
+Other utilities, such as \cw{munpack}, exist which will take an
+entire MIME-encoded message, identify the base64-encoded subparts,
+and decode them. However, these utilities will not help you if you
+need to inspect a Content-MD5 header or an SSH private key.
+
+\cw{base64} is a very simple stand-alone encoder and decoder for the
+base64 format \e{alone}. It does not try to understand MIME headers
+or anything other than raw data.
+
+\H{base64-manpage-options} OPTIONS
+
+By default (if neither \cw{-d} or \cw{-e} is supplied), \cw{base64}
+operates in decode mode.
+
+\dt \cw{-d}
+
+\dd Places \cw{base64} into decode mode. In this mode, it will read
+from standard input or the supplied file name, ignore all characters
+that are not part of the base64 alphabet, decode the ones that are,
+and output the decoded data on standard output.
+
+\dt \cw{-e}
+
+\dd Places \cw{base64} into encode mode. In this mode, it will read
+binary data from standard input or the supplied file name, encode it
+as base64, and output the encoded data on standard output.
+
+\dt \cw{-c}\e{width}
+
+\dd If \cw{base64} is operating in encode mode, this controls the
+number of base64 characters output per line of the encoded file.
+Normally base64-reading applications do not care about this, so the
+default of 64 characters per line is perfectly adequate.
+
+\lcont{
+
+The special value 0 will prevent \cw{base64} from ever writing a
+line break in the middle of the data at all.
+
+The base64 encoding converts between a group of three plaintext
+bytes and a group of four encoded bytes. \cw{base64} does not
+support breaking an encoded group across a line. Therefore, the
+\e{width} parameter passed to \cw{-c} must be a multiple of 4.
+
+}
diff --git a/base64/base64.c b/base64/base64.c
new file mode 100644
index 0000000..178c51b
--- /dev/null
+++ b/base64/base64.c
@@ -0,0 +1,263 @@
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#define isbase64(c) (    ((c) >= 'A' && (c) <= 'Z') || \
+                         ((c) >= 'a' && (c) <= 'z') || \
+                         ((c) >= '0' && (c) <= '9') || \
+                         (c) == '+' || (c) == '/' || (c) == '=' \
+                         )
+
+int base64_decode_atom(char *atom, unsigned char *out) {
+    int vals[4];
+    int i, v, len;
+    unsigned word;
+    char c;
+    
+    for (i = 0; i < 4; i++) {
+	c = atom[i];
+	if (c >= 'A' && c <= 'Z')
+	    v = c - 'A';
+	else if (c >= 'a' && c <= 'z')
+	    v = c - 'a' + 26;
+	else if (c >= '0' && c <= '9')
+	    v = c - '0' + 52;
+	else if (c == '+')
+	    v = 62;
+	else if (c == '/')
+	    v = 63;
+	else if (c == '=')
+	    v = -1;
+	else
+	    return 0;		       /* invalid atom */
+	vals[i] = v;
+    }
+
+    if (vals[0] == -1 || vals[1] == -1)
+	return 0;
+    if (vals[2] == -1 && vals[3] != -1)
+	return 0;
+
+    if (vals[3] != -1)
+	len = 3;
+    else if (vals[2] != -1)
+	len = 2;
+    else
+	len = 1;
+
+    word = ((vals[0] << 18) |
+	    (vals[1] << 12) |
+	    ((vals[2] & 0x3F) << 6) |
+	    (vals[3] & 0x3F));
+    out[0] = (word >> 16) & 0xFF;
+    if (len > 1)
+	out[1] = (word >> 8) & 0xFF;
+    if (len > 2)
+	out[2] = word & 0xFF;
+    return len;
+}
+
+void base64_encode_atom(unsigned char *data, int n, char *out) {
+    static const char base64_chars[] =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+    unsigned word;
+
+    word = data[0] << 16;
+    if (n > 1)
+	word |= data[1] << 8;
+    if (n > 2)
+	word |= data[2];
+    out[0] = base64_chars[(word >> 18) & 0x3F];
+    out[1] = base64_chars[(word >> 12) & 0x3F];
+    if (n > 1)
+	out[2] = base64_chars[(word >> 6) & 0x3F];
+    else
+	out[2] = '=';
+    if (n > 2)
+	out[3] = base64_chars[word & 0x3F];
+    else
+	out[3] = '=';
+}
+
+const char usagemsg[] =
+    "usage: base64 [-d] [filename]        decode from a file or from stdin\n"
+    "   or: base64 -e [-cNNN] [filename]  encode from a file or from stdin\n"
+    " also: base64 --version              report version number\n"
+    "  and: base64 --help                 display this help text\n"
+    "where: -d     decode mode (default)\n"
+    "       -e     encode mode\n"
+    "       -cNNN  set number of chars per line for encoded output\n"
+    ;
+
+void usage(void) {
+    fputs(usagemsg, stdout);
+}
+
+void version(void) {
+#define SVN_REV "$Revision$"
+    char rev[sizeof(SVN_REV)];
+    char *p, *q;
+
+    strcpy(rev, SVN_REV);
+
+    for (p = rev; *p && *p != ':'; p++);
+    if (*p) {
+        p++;
+        while (*p && isspace(*p)) p++;
+        for (q = p; *q && *q != '$'; q++);
+        if (*q) *q = '\0';
+        printf("base64 revision %s\n", p);
+    } else {
+        printf("base64: unknown version\n");
+    }
+}
+
+int main(int ac, char **av) {
+    int encoding = 0;
+    int cpl = 64;
+    FILE *fp;
+    char *fname;
+    char *eptr;
+
+    fname = NULL;
+
+    while (--ac) {
+        char *v, *p = *++av;
+        if (*p == '-') {
+            while (*p) {
+                char c = *++p;
+                switch (c) {
+                  case '-':
+                    if (!strcmp(p, "version")) {
+                        version();
+                        exit(0);
+                    }
+                    if (!strcmp(p, "help")) {
+                        usage();
+                        exit(0);
+                    }
+                    break;
+                  case 'v':
+                  case 'V':
+                    version();
+                    exit(0);
+                    break;
+                  case 'h':
+                  case 'H':
+                    usage();
+                    exit(0);
+                    break;
+                  case 'd':
+                    encoding = 0;
+                    break;
+                  case 'e':
+                    encoding = 1;
+                    break;
+                  case 'c':
+		    /*
+		     * Options requiring values.
+		     */
+		    v = p+1;
+		    if (!*v && ac > 1) {
+			--ac;
+			v = *++av;
+		    }
+		    if (!*v) {
+                        fprintf(stderr, "base64: option '-%c' expects"
+                                " an argument\n", c);
+			exit(1);
+		    }
+		    switch (c) {
+		      case 'c':
+			cpl = strtol(v, &eptr, 10);
+			if (eptr && *eptr) {
+			    fprintf(stderr, "base64: option -c expects"
+				    " a numeric argument\n");
+			    exit(1);
+			}
+			if (cpl % 4) {
+			    fprintf(stderr, "base64: chars per line should be"
+				    " divisible by 4\n");
+			    exit(1);
+			}
+			break;
+		    }
+		    p = "";
+                    break;
+                }
+            }
+        } else {
+            if (!fname)
+                fname = p;
+            else {
+                fprintf(stderr, "base64: expected only one filename\n");
+                exit(0);
+            }
+        }
+    }
+
+    if (fname) {
+        fp = fopen(fname, encoding ? "rb" : "r");
+	if (!fp) {
+	    fprintf(stderr, "base64: unable to open '%s': %s\n", fname,
+		    strerror(errno));
+	    exit(1);
+	}
+    } else
+        fp = stdin;
+
+    if (encoding) {
+        unsigned char in[3];
+        char out[4];
+        int column;
+        int n;
+
+        column = 0;
+        while (1) {
+            if (cpl && column >= cpl) {
+                putchar('\n');
+                column = 0;
+            }
+            n = fread(in, 1, 3, fp);
+            if (n == 0) break;
+            base64_encode_atom(in, n, out);
+            fwrite(out, 1, 4, stdout);
+            column += 4;
+        }
+
+        putchar('\n');
+    } else {
+        char in[4];
+        unsigned char out[3];
+        int c, i, n, eof;
+
+        eof = 0;
+        do {
+            for (i = 0; i < 4; i++) {
+                do {
+                    c = fgetc(fp);
+                } while (c != EOF && !isbase64(c));
+                if (c == EOF) {
+                    eof = 1;
+                    break;
+                }
+                in[i] = c;
+            }
+            if (i > 0) {
+                if (i < 4) {
+                    fprintf(stderr, "base64: warning: number of base64"
+                            " characters was not a multiple of 4\n");
+                    while (i < 4) in[i++] = '=';
+                }
+                n = base64_decode_atom(in, out);
+                fwrite(out, 1, n, stdout);
+            }
+        } while (!eof);
+    }
+
+    if (fname)
+        fclose(fp);
+
+    return 0;
+}
diff --git a/cvt-utf8/Makefile b/cvt-utf8/Makefile
new file mode 100644
index 0000000..e766e09
--- /dev/null
+++ b/cvt-utf8/Makefile
@@ -0,0 +1,7 @@
+all: cvt-utf8.1
+
+%.1: %.but
+	halibut --man=$@ $<
+
+clean:
+	rm -f *.1
diff --git a/cvt-utf8/cvt-utf8 b/cvt-utf8/cvt-utf8
new file mode 100755
index 0000000..06a17fd
--- /dev/null
+++ b/cvt-utf8/cvt-utf8
@@ -0,0 +1,460 @@
+#!/usr/bin/env python 
+
+import sys
+import string
+import os
+import anydbm
+import zlib
+
+class zip_untangler:
+    def __init__(self, file, datasofar):
+        self.file = file
+        assert len(datasofar) < 30
+        self.header = datasofar
+        self.data = ""
+        self.dataleft = None
+        self.decompress = zlib.decompressobj()
+        # Zlib header bytes, expected by decompress obj but not
+        # present in zip file
+        self.decompress.decompress("\x78\x9c")
+
+    def readline(self):
+        if self.dataleft == None:
+            while len(self.header) < 30:
+                s = self.file.read(30 - len(self.header))
+                assert s != ""
+                self.header = self.header + s
+            # Name length and extra length.
+            namelen = 256 * ord(self.header[27]) + ord(self.header[26])
+            extralen = 256 * ord(self.header[29]) + ord(self.header[28])
+            while len(self.header) < 30 + namelen + extralen:
+                s = self.file.read(30 + namelen + extralen - len(self.header))
+                assert s != ""
+                self.header = self.header + s
+            self.dataleft = \
+            256 * (256 * (256 * ord(self.header[21]) + ord(self.header[20])) \
+            + ord(self.header[19])) + ord(self.header[18])
+        k = string.find(self.data, "\n")
+        while k < 0:
+            rlen = self.dataleft
+            if rlen > 4096: rlen = 4096
+            if rlen == 0: break
+            d = self.file.read(rlen)
+            if d == "": break
+            self.dataleft = self.dataleft - rlen
+            self.data = self.data + self.decompress.decompress(d)
+            k = string.find(self.data, "\n")
+        if k < 0:
+            ret = self.data
+            self.data = ""
+            return ret
+        else:
+            ret = self.data[:k+1]
+            self.data = self.data[k+1:]
+            return ret
+
+def hexstr(x):
+    s = hex(x)
+    if s[-1:] == "L" or s[-1:] == "l":
+	s = s[:-1]
+    if s[:2] == "0x" or s[:2] == "0X":
+	s = s[2:]
+    return s
+
+def charname(x):
+    if db:
+	key = hexstr(x)
+	while len(key) < 4: key = "0" + key
+	key = string.upper(key)
+	if han_translations:
+	    try:
+		value = handb[key]
+		return "<han> " + value
+	    except KeyError:
+		pass
+	try:
+	    value = db[key]
+	    return string.split(value, ";")[1]
+	except KeyError:
+	    return "<no name available>"
+    else:
+	return ""
+
+def output(char, bytes, errors):
+    if output_analysis:
+	if char == -1:
+	    s = "           "
+	else:
+	    s = "U-%08X " % char
+	for i in bytes:
+	    s = s + " %02X" % i
+	for i in range(6-len(bytes)):
+	    s = s + "   "
+
+	if char == -1:
+	    name = ""
+	else:
+	    name = charname(char)
+	if name != "":
+	    s = s + " " + name
+	s = s + errors
+	print s
+    else:
+	if char == -1 or errors != "":
+	    # problem chars become U+FFFD REPLACEMENT CHARACTER
+	    sys.stdout.write("\xEF\xBF\xBD")
+	else:
+	    for i in bytes:
+		sys.stdout.write(chr(i))
+
+def process_ucs(x, bytes=[], errors=""):
+    if x < 0x80:
+	utf8 = [x]
+	realbytes = 1
+    else:
+	if x < 0x800:
+	    tmp = (0xC0, 1)
+	elif x < 0x10000:
+	    tmp = (0xE0, 2)
+	elif x < 0x200000:
+	    tmp = (0xF0, 3)
+	elif x < 0x4000000:
+	    tmp = (0xF8, 4)
+	else:
+	    assert x < 0x80000000L
+	    tmp = (0xFC, 5)
+	realbytes = tmp[1] + 1
+	utf8 = [tmp[0] + (x >> (6*tmp[1]))]
+	for i in range(tmp[1]-1, -1, -1):
+	    utf8.append(0x80 + (0x3F & (x >> (i*6))))
+
+    if bytes != [] and len(bytes) > realbytes:
+	errors = errors + " (overlong form of"
+	for i in utf8:
+	    errors = errors + " %02X" % i
+	errors = errors + ")"
+	utf8 = bytes
+    if x >= 0xD800 and x <= 0xDFFF:
+	errors = errors + " (surrogate)"
+    if x >= 0xFFFE and x <= 0xFFFF:
+	errors = errors + " (invalid char)"
+
+    output(x, utf8, errors)
+
+def process_utf8(next):
+    c = next()
+    while c != None:
+	char = [c]
+	i = c
+	if i < 0x80:
+	    process_ucs(i) # single-byte char
+	    c = next()
+	elif i == 0xfe or i == 0xff:
+	    output(-1, char, " (invalid UTF-8 byte)")
+	    c = next()
+	elif i >= 0x80 and i <= 0xbf:
+	    output(-1, char, " (unexpected continuation byte)")
+	    c = next()
+	else:
+	    if i >= 0xC0 and i <= 0xDF:
+		acc = i &~ 0xC0
+		cbytes = 1
+	    elif i >= 0xE0 and i <= 0xEF:
+		acc = i &~ 0xE0
+		cbytes = 2
+	    elif i >= 0xF0 and i <= 0xF7:
+		acc = i &~ 0xF0
+		cbytes = 3
+	    elif i >= 0xF8 and i <= 0xFB:
+		acc = i &~ 0xF8
+		cbytes = 4
+	    elif i >= 0xFC and i <= 0xFD:
+		acc = i &~ 0xFC
+		cbytes = 5
+	    gotone = 0
+	    while cbytes > 0:
+		c = next()
+		if c == None or c < 0x80 or c > 0xBF:
+		    gotone = 1
+		    break
+		char.append(c)
+		acc = (acc << 6) + (c & 0x3F)
+		cbytes = cbytes - 1
+	    if not gotone:
+		c = next()
+	    if cbytes > 0:
+		output(-1, char, " (incomplete sequence)")
+	    else:
+		process_ucs(acc, char)
+
+def do(args):
+    # Class to turn a list into a callable object that returns one
+    # element at a time.
+    class liststepper:
+	def __init__(self, list):
+	    self.list = list
+	    self.index = 0
+	def __call__(self):
+	    if self.index >= len(self.list):
+		return None
+	    ret = self.list[self.index]
+	    self.index = self.index + 1
+	    return ret
+
+    list = []
+    for arg in args:
+	if string.upper(arg[0]) == "U":
+	    if len(list) > 0:
+		process_utf8(liststepper(list))
+		list = []
+	    assert arg[1] == "+" or arg[1] == "-"
+	    process_ucs(string.atoi(arg[2:], 16))
+	else:
+	    list.append(string.atoi(arg, 16))
+
+    if len(list) > 0:
+	process_utf8(liststepper(list))
+
+args = sys.argv[1:]
+output_analysis = 1
+han_translations = 0
+
+if args == [] or args == ["--help"] or args == ["--help-admin"]:
+    print "Usage: cvt-utf8 [flags] <hex UTF-8 bytes and/or U+codepoints>"
+    print "  e.g. cvt-utf8 e2 82 ac"
+    print "    or cvt-utf8 U+20ac"
+    print "    or cvt-utf8 U-10ffff"
+    print ""
+    print "Flags: -o or --output        just output well-formed UTF-8 instead of"
+    print "                             an analysis of the input data"
+    print "       -h or --han           also give Han definitions from unihan db"
+    print ""
+    print "Also:  cvt-utf8 --test       run Markus Kuhn's decoder stress tests" #'
+    print "       cvt-utf8 --input (or -i)"
+    print "                             read, analyse and decode UTF-8 from stdin"
+    if args == ["--help-admin"]:
+        print "       cvt-utf8 --help       display user help text"
+        print "       cvt-utf8 --help-admin display admin help text (this one)"
+        print "       cvt-utf8 --build <infile> <outfile>"
+        print "                             convert UnicodeData.txt to unicode db"
+        print "       cvt-utf8 --build-unihan <infile> <outfile>"
+        print "                             convert Unihan.txt to unihan db"
+        print "       cvt-utf8 --fetch-build <outfile>"
+        print "                             "+\
+        "build unicode db by download from unicode.org"
+        print "       cvt-utf8 --fetch-build-unihan <outfile>"
+        print "                             "+\
+        "build Unihan db by download from unicode.org"
+    else:
+        print "       cvt-utf8 --help       display this help text"
+        print "       cvt-utf8 --help-admin display admin help text"
+    sys.exit(0)
+
+if args[0] == "-o" or args[0] == "--output":
+    output_analysis = 0
+    args = args[1:]
+
+if args[0] == "-h" or args[0] == "--han":
+    han_translations = 1
+    args = args[1:]
+
+if args[0] == "--build" or args[0] == "--fetch-build":
+    if args[0] == "--build":
+	if len(args) != 3:
+	    print "cvt-utf8: --build expects two filename arguments"
+	    sys.exit(1)
+	infile = open(args[1], "r")
+	outfile = args[2]
+    else:
+	if len(args) != 2:
+	    print "cvt-utf8: --fetch-build expects one filename argument"
+	    sys.exit(1)
+	import urllib
+	infile = urllib.urlopen("http://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
+	outfile = args[1]
+    # Now build the database.
+    if outfile[-3:] == ".db":
+	print "cvt-utf8: warning: you should not append .db to db name"
+
+    db = anydbm.open(outfile, "n")
+    while 1:
+	s = infile.readline()
+	if s == "": break
+	ss = string.split(s, ";")[0]
+	db[ss] = s
+    db.close()
+    sys.exit(0)
+
+if args[0] == "--build-unihan" or args[0] == "--fetch-build-unihan":
+    if args[0] == "--build-unihan":
+        if len(args) != 3:
+            print "cvt-utf8: --build expects two filename arguments"
+            sys.exit(1)
+        infile = open(args[1], "r")
+        s = infile.read(1)
+        # Unihan.txt starts with a hash. If this file starts with a
+        # P, we assume it's a zip file ("PK").
+        if s == "P":
+            infile = zip_untangler(infile, s)
+            s = ""
+        outfile = args[2]
+    else:
+	if len(args) != 2:
+	    print "cvt-utf8: --fetch-build-unihan expects one filename argument"
+	    sys.exit(1)
+	import urllib
+	infile = urllib.urlopen("ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip")
+        # We know this one is zipped.
+        infile = zip_untangler(infile, "")
+	outfile = args[1]
+        s = ""
+    # Now build the database.
+    if outfile[-3:] == ".db":
+	print "cvt-utf8: warning: you should not append .db to db name"
+
+    db = anydbm.open(outfile, "n")
+    while 1:
+	s = s + infile.readline()
+	if s == "": break
+	while s[-1:] == "\r" or s[-1:] == "\n":
+	    s = s[:-1]
+	sa = string.split(s, "\t")
+	if len(sa) == 3 and sa[1] == "kDefinition" and sa[0][:2] == "U+":
+	    db[sa[0][2:]] = sa[2]
+        s = ""
+    db.close()
+    sys.exit(0)
+
+locations = []
+locations.append("/usr/share/unicode/unicode")
+locations.append("/usr/lib/unicode/unicode")
+locations.append("/usr/local/share/unicode/unicode")
+locations.append("/usr/local/lib/unicode/unicode")
+locations.append(os.environ["HOME"] + "/share/unicode/unicode")
+locations.append(os.environ["HOME"] + "/lib/unicode/unicode")
+
+for loc in locations:
+    try:
+	db = anydbm.open(loc, "r")
+    except IOError:
+	db = None
+    except anydbm.error:
+	db = None
+    if db != None:
+	break
+if han_translations:
+    i = string.rfind(loc, "/")
+    assert i >= 0
+    hanloc = loc[:i+1] + "unihan"
+    handb = anydbm.open(hanloc, "r")
+    # this has been explicitly required, so we don't squelch exceptions
+
+if args[0] == "--test":
+    do(["CE","BA","E1","BD","B9","CF","83","CE","BC","CE","B5"])
+    do(["00"])
+    do(["C2","80"])
+    do(["E0","A0","80"])
+    do(["F0","90","80","80"])
+    do(["F8","88","80","80","80"])
+    do(["FC","84","80","80","80","80"])
+    do(["7F"])
+    do(["DF","BF"])
+    do(["EF","BF","BF"])
+    do(["F7","BF","BF","BF"])
+    do(["FB","BF","BF","BF","BF"])
+    do(["FD","BF","BF","BF","BF","BF"])
+    do(["ED","9F","BF"])
+    do(["EE","80","80"])
+    do(["EF","BF","BD"])
+    do(["F4","8F","BF","BF"])
+    do(["F4","90","80","80"])
+    do(["80"])
+    do(["BF"])
+    do(["80","BF"])
+    do(["80","BF","80"])
+    do(["80","BF","80","BF"])
+    do(["80","BF","80","BF","80"])
+    do(["80","BF","80","BF","80","BF"])
+    do(["80","BF","80","BF","80","BF","80"])
+    do(["80","81","82","83","84","85","86","87",
+    "88","89","8A","8B","8C","8D","8E","8F",
+    "90","91","92","93","94","95","96","97",
+    "98","99","9A","9B","9C","9D","9E","9F",
+    "A0","A1","A2","A3","A4","A5","A6","A7",
+    "A8","A9","AA","AB","AC","AD","AE","AF",
+    "B0","B1","B2","B3","B4","B5","B6","B7",
+    "B8","B9","BA","BB","BC","BD","BE","BF"])
+    do(["C0","20","C1","20","C2","20","C3","20",
+    "C4","20","C5","20","C6","20","C7","20",
+    "C8","20","C9","20","CA","20","CB","20",
+    "CC","20","CD","20","CE","20","CF","20",
+    "D0","20","D1","20","D2","20","D3","20",
+    "D4","20","D5","20","D6","20","D7","20",
+    "D8","20","D9","20","DA","20","DB","20",
+    "DC","20","DD","20","DE","20","DF","20"])
+    do(["E0","20","E1","20","E2","20","E3","20",
+    "E4","20","E5","20","E6","20","E7","20",
+    "E8","20","E9","20","EA","20","EB","20",
+    "EC","20","ED","20","EE","20","EF","20"])
+    do(["F0","20","F1","20","F2","20","F3","20",
+    "F4","20","F5","20","F6","20","F7","20"])
+    do(["F8","20","F9","20","FA","20","FB","20"])
+    do(["FC","20","FD","20"])
+    do(["C0"])
+    do(["E0","80"])
+    do(["F0","80","80"])
+    do(["F8","80","80","80"])
+    do(["FC","80","80","80","80"])
+    do(["DF"])
+    do(["EF","BF"])
+    do(["F7","BF","BF"])
+    do(["FB","BF","BF","BF"])
+    do(["FD","BF","BF","BF","BF"])
+    do(["C0","E0","80","F0","80","80","F8","80",
+    "80","80","FC","80","80","80","80",
+    "DF","EF","BF","F7","BF","BF","FB",
+    "BF","BF","BF","FD","BF","BF","BF","BF"])
+    do(["FE"])
+    do(["FF"])
+    do(["FE","FE","FF","FF"])
+    do(["C0","AF"])
+    do(["E0","80","AF"])
+    do(["F0","80","80","AF"])
+    do(["F8","80","80","80","AF"])
+    do(["FC","80","80","80","80","AF"])
+    do(["C1","BF"])
+    do(["E0","9F","BF"])
+    do(["F0","8F","BF","BF"])
+    do(["F8","87","BF","BF","BF"])
+    do(["FC","83","BF","BF","BF","BF"])
+    do(["C0","80"])
+    do(["E0","80","80"])
+    do(["F0","80","80","80"])
+    do(["F8","80","80","80","80"])
+    do(["FC","80","80","80","80","80"])
+    do(["ED","A0","80"])
+    do(["ED","AD","BF"])
+    do(["ED","AE","80"])
+    do(["ED","AF","BF"])
+    do(["ED","B0","80"])
+    do(["ED","BE","80"])
+    do(["ED","BF","BF"])
+    do(["ED","A0","80","ED","B0","80"])
+    do(["ED","A0","80","ED","BF","BF"])
+    do(["ED","AD","BF","ED","B0","80"])
+    do(["ED","AD","BF","ED","BF","BF"])
+    do(["ED","AE","80","ED","B0","80"])
+    do(["ED","AE","80","ED","BF","BF"])
+    do(["ED","AF","BF","ED","B0","80"])
+    do(["ED","AF","BF","ED","BF","8F"])
+    do(["EF","BF","BE"])
+    do(["EF","BF","BF"])
+elif args[0] == "--input" or args[0] == "-i":
+    def getchar():
+	s = sys.stdin.read(1)
+	if s == "":
+	    return None
+	return ord(s) & 0xFF   # ensure it isn't negative
+    process_utf8(getchar)
+else:
+    do(args)
diff --git a/cvt-utf8/cvt-utf8.but b/cvt-utf8/cvt-utf8.but
new file mode 100644
index 0000000..427c097
--- /dev/null
+++ b/cvt-utf8/cvt-utf8.but
@@ -0,0 +1,133 @@
+\cfg{man-identity}{cvt-utf8}{1}{2004-03-24}{Simon Tatham}{Simon Tatham}
+\cfg{man-mindepth}{1}
+
+\C{cvt-utf8-manpage} Man page for \cw{cvt-utf8}
+
+\H{cvt-utf8-manpage-name} NAME
+
+\cw{cvt-utf8} - convert between UTF-8 and Unicode, and analyse Unicode
+
+\H{cvt-utf8-manpage-synopsis} SYNOPSIS
+
+\c cvt-utf8 [flags] [hex UTF-8 bytes and/or U+codepoints]
+\e bbbbbbbb  iiiii   iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+
+\H{cvt-utf8-manpage-description} DESCRIPTION
+
+\cw{cvt-utf8} is a tool for manipulating and analysing UTF-8 and
+Unicode data. Its functions include:
+
+\b Given a sequence of Unicode code points, convert them to the
+corresponding sequence of bytes in the UTF-8 encoding.
+
+\b Given a sequence of UTF-8 bytes, convert them back into Unicode
+code points.
+
+\b Given any combination of the above inputs, look up each Unicode
+code point in the Unicode character database and identify it.
+
+\b Look up Unified Han characters in the \q{Unihan} database and
+provide their translation text.
+
+By default, \cw{cvt-utf8} expects to receive hex numbers (either
+UTF-8 bytes or Unicode code points) on the command line, and it will
+print out a verbose analysis of the input data. If you need it to
+read UTF-8 from standard input or to write pure UTF-8 to standard
+output, you can do so using command-line options.
+
+\H{cvt-utf8-manpage-options} OPTIONS
+
+\dt \cw{-i}
+
+\dd Read UTF-8 data from standard input and analyse that, instead of
+expecting hex numbers on the command line.
+
+\dt \cw{-o}
+
+\dd Write well-formed UTF-8 to standard output, instead of writing a
+long analysis of the input data.
+
+\dt \cw{-h}
+
+\dd Look up each code point in the Unihan database as well as the
+main Unicode character database.
+
+\H{cvt-utf8-manpage-examples} EXAMPLES
+
+In \cw{cvt-utf8}'s native mode, it simply analyses input Unicode or
+UTF-8 data. For example, you can give a list of Unicode code
+points...
+
+\c $ cvt-utf8 U+20ac U+31 U+30
+\e   bbbbbbbbbbbbbbbbbbbbbbbbb
+\c U-000020AC  E2 82 AC          EURO SIGN
+\c U-00000031  31                DIGIT ONE
+\c U-00000030  30                DIGIT ZERO
+
+... and \cw{cvt-utf8} gives you the UTF-8 encodings plus the
+character definitions.
+
+Alternatively, you can supply a list of UTF-8 bytes...
+
+\c $ cvt-utf8 D0 A0 D1 83 D1 81 D1 81 D0 BA D0 B8 D0 B9
+\e   bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+\c U-00000420  D0 A0             CYRILLIC CAPITAL LETTER ER
+\c U-00000443  D1 83             CYRILLIC SMALL LETTER U
+\c U-00000441  D1 81             CYRILLIC SMALL LETTER ES
+\c U-00000441  D1 81             CYRILLIC SMALL LETTER ES
+\c U-0000043A  D0 BA             CYRILLIC SMALL LETTER KA
+\c U-00000438  D0 B8             CYRILLIC SMALL LETTER I
+\c U-00000439  D0 B9             CYRILLIC SMALL LETTER SHORT I
+
+... and you get back the same output format, including the UTF-8
+code points.
+
+If you supply malformed data, \cw{cvt-utf8} will break it down for
+you and identify the malformed pieces and any correctly formed
+characters:
+
+\c $ cvt-utf8 A9 FE 45 C2 80 90 0A
+\e   bbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+\c             A9                (unexpected continuation byte)
+\c             FE                (invalid UTF-8 byte)
+\c U-00000045  45                LATIN CAPITAL LETTER E
+\c U-00000080  C2 80             <control>
+\c             90                (unexpected continuation byte)
+\c U-0000000A  0A                <control>
+
+If you need the UTF-8 encoding of a particular character, you can
+use the \cw{-o} option to cause the UTF-8 to be written to standard
+output:
+
+\c $ cvt-utf8 -o U+20AC >> my-utf8-file.txt
+\e   bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+
+If you have UTF-8 data in a file or output from another program, you
+can use the \cw{-i} option to have \cw{cvt-utf8} analyse it. This
+works particularly well if you also have my \cw{xcopy} program,
+which can be told to extract UTF-8 data from the X selection and
+write it to its standard output. With these two programs working
+together, if you ever have trouble identifying some text in a
+UTF-8-supporting web browser such as Mozilla, you can simply select
+the text in question, switch to a terminal window, and type
+
+\c $ xcopy -u -r | cvt-utf8 -i
+\e   bbbbbbbbbbbbbbbbbbbbbbbbb
+
+If the text is in Chinese, you can get at least a general idea of
+its meaning by using the \cw{-h} option to print the meaning of each
+ideograph from the Unihan database. For example, if you pass in the
+Chinese text meaning \q{Traditional Chinese}:
+
+\c $ cvt-utf8 -h U+7E41 U+9AD4 U+4E2D U+6587
+\e   bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+\c U-00007E41  E7 B9 81          <han> complicated, complex, difficult
+\c U-00009AD4  E9 AB 94          <han> body; group, class, body, unit
+\c U-00004E2D  E4 B8 AD          <han> central; center, middle; in the
+\c                               midst of; hit (target); attain
+\c U-00006587  E6 96 87          <han> literature, culture, writing
+
+\H{cvt-utf8-manpage-bugs} BUGS
+
+Command-line option processing is very basic. In particular, \cw{-h}
+must come before \cw{-i} or it will not be recognised.
diff --git a/xcopy/Makefile b/xcopy/Makefile
new file mode 100644
index 0000000..6f193bc
--- /dev/null
+++ b/xcopy/Makefile
@@ -0,0 +1,12 @@
+X11LIB=-L/usr/X11R6/lib -lX11
+
+all: xcopy.1 xcopy
+
+xcopy: xcopy.c
+	$(CC) $(CFLAGS) -o $@ $< $(X11LIB)
+
+%.1: %.but
+	halibut --man=$@ $<
+
+clean:
+	rm -f *.1 xcopy
diff --git a/xcopy/xcopy.but b/xcopy/xcopy.but
new file mode 100644
index 0000000..ba5bcac
--- /dev/null
+++ b/xcopy/xcopy.but
@@ -0,0 +1,104 @@
+\cfg{man-identity}{xcopy}{1}{2004-08-02}{Simon Tatham}{Simon Tatham}
+\cfg{man-mindepth}{1}
+
+\C{xcopy-manpage} Man page for \cw{xcopy}
+
+\H{xcopy-manpage-name} NAME
+
+\cw{xcopy} - read and write text to/from an X selection from the
+command line
+
+\H{xcopy-manpage-synopsis} SYNOPSIS
+
+\c xcopy [ -r ] [ -u | -c ] [ -C ]
+\e bbbbb   bb     bb   bb     bb
+
+\H{xcopy-manpage-description} DESCRIPTION
+
+\cw{xcopy} is a command-line utility for manipulating the X selection.
+
+It has two basic modes. In read mode (\cw{xcopy -r}), it connects to
+your X server, retrieves the contents of the selection as plain
+text, and writes it on standard output. You would then typically
+redirect its output into a file, or pipe it into some other program.
+
+In write mode (just \cw{xcopy}, if \cw{-r} is not specified), it
+will read data from standard input, then connect to your X server
+and place that data in the selection as plain text. So you can pipe
+data into \cw{xcopy}, move to another application, and press Paste.
+
+The X selection model requires the selection-owning client to remain
+connected to the server and hand out its data on request. Therefore,
+\cw{xcopy} in write mode forks off a background process which does
+this. The background process terminates once it is no longer the
+selection owner (i.e. as soon as you select data in another
+application), or if your X session finishes. Normally you can ignore
+its presence, although it might become important to be aware of it
+if (for example) the \cw{xcopy} background process were to be the
+last X client still connected through an SSH tunnel.
+
+\cw{xcopy} currently only handles text data. However, it is capable
+of handling it in the form of plain text, UTF-8, or compound
+(multiple-character-set) text. Use the \cw{-u}, \cw{-c} and \cw{-C}
+options to control this aspect of its behaviour.
+
+\H{xcopy-manpage-options} OPTIONS
+
+By default (if \cw{-r} is not supplied), \cw{xcopy} operates in
+write mode.
+
+\dt \cw{-r}
+
+\dd Places \cw{xcopy} into read mode.
+
+By default (if neither \cw{-c} nor \cw{-u} is supplied), \cw{xcopy}
+reads and writes the selection using the type \cw{STRING}, which
+means that the input or output data is expected to be encoded in
+ISO-8859-1.
+
+\dt \cw{-u}
+
+\dd In read mode, causes \cw{xcopy} to request the selection using
+the type \cw{UTF8_STRING}, which typically means that the returned
+data will be encoded as UTF-8. In write mode, causes \cw{xcopy} to
+\e{give out} the selection as type \cw{UTF8_STRING}, meaning that
+the data piped in to it is expected to be encoded as UTF-8.
+
+\dt \cw{-c}
+
+\dd Similar to \cw{-u}, but uses the type \cw{COMPOUND_TEXT} rather
+than \cw{UTF8_STRING}. \cw{COMPOUND_TEXT} is a complex
+multi-character-set encoding similar to ISO 2022, and is unlikely to
+be a very useful form in which to pass data to or from non-X
+programs. However, it might occasionally be useful to retrieve a
+compound text selection using \cw{xcopy -r -c}, and later on return
+it to the X selection using \cw{xcopy -c} so it can be pasted back
+into a different application.
+
+In write mode, if \cw{xcopy} is operating in \cw{STRING} mode and a
+pasting application requests the selection as \cw{COMPOUND_TEXT},
+\cw{xcopy} will convert the data automatically. This is normally
+what you want.
+
+\dt \cw{-C}
+
+\dd Suppresses conversion to compound text in write mode. This is
+occasionally useful if you are pasting control characters, since the
+compound text specification forbids any control characters and the
+Xlib conversion functions honour this. If you are (for example)
+trying to paste a sequence of editor commands into a terminal
+window, you might well want to paste a string full of escape
+sequences and control characters, in which case you may need to use
+\cw{-C} to disable conversion to compound text.
+
+\H{xcopy-manpage-bugs} BUGS
+
+Occasionally \cw{xcopy -r} completely fails to notice selection data
+owned by another process. I have not yet reproduced this reliably;
+if anyone can, some work with \cw{xmon}(1) would be much
+appreciated...
+
+Automatic conversion between compound text and UTF-8 is not
+currently supported. There are Xlib functions to do it, although
+they don't appear to work very well (missing out many characters
+which they could have converted).
diff --git a/xcopy/xcopy.c b/xcopy/xcopy.c
new file mode 100644
index 0000000..c936cd7
--- /dev/null
+++ b/xcopy/xcopy.c
@@ -0,0 +1,359 @@
+/*
+ * xcopy: quickly pipe text data into, or out of, the primary X
+ * selection
+ */
+
+#include <malloc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <math.h>
+#include <errno.h>
+#include <assert.h>
+
+#include <X11/X.h>
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+#include <X11/Xatom.h>
+
+int init_X(void);
+void run_X(void);
+void done_X(void);
+void full_redraw(void);
+void do_paste(Window window, Atom property, int Delete);
+
+char *pname;			       /* program name */
+
+void error (char *fmt, ...);
+
+/* set from command-line parameters */
+char *display = NULL;
+enum { STRING, CTEXT, UTF8 } mode = STRING;
+
+/* selection data */
+char *seltext;
+int sellen, selsize;
+#define SELDELTA 16384
+
+/* functional parameters */
+int reading;                           /* read instead of writing? */
+int convert_to_ctext = True;	       /* Xmb convert to compound text? */
+
+int main(int ac, char **av) {
+    int n;
+    int eventloop;
+
+    pname = *av;
+
+    /* parse the command line arguments */
+    while (--ac) {
+	char *p = *++av;
+
+	if (!strcmp(p, "-display") || !strcmp(p, "-disp")) {
+	    if (!av[1])
+		error ("option `%s' expects a parameter", p);
+	    display = *++av, --ac;
+        } else if (!strcmp(p, "-r")) {
+            reading = True;
+        } else if (!strcmp(p, "-u")) {
+            mode = UTF8;
+        } else if (!strcmp(p, "-c")) {
+            mode = CTEXT;
+        } else if (!strcmp(p, "-C")) {
+            convert_to_ctext = False;
+	} else if (*p=='-') {
+	    error ("unrecognised option `%s'", p);
+	} else {
+	    error ("no parameters required");
+	}
+    }
+
+    if (!reading) {
+        seltext = malloc(SELDELTA);
+        if (!seltext)
+            error ("out of memory");
+        selsize = SELDELTA;
+        sellen = 0;
+        do {
+            n = fread(seltext+sellen, 1, selsize-sellen, stdin);
+            sellen += n;
+            if (sellen >= selsize) {
+                seltext = realloc(seltext, selsize += SELDELTA);
+                if (!seltext)
+                    error ("out of memory");
+            }
+        } while (n > 0);
+        if (sellen == selsize) {
+            seltext = realloc(seltext, selsize += SELDELTA);
+            if (!seltext)
+                error ("out of memory");
+        }
+        seltext[sellen] = '\0';
+    }
+
+    eventloop = init_X();
+    if (!reading) {
+        /*
+         * If we are writing the selection, we must go into the
+         * background now.
+         */
+        int pid = fork();
+        if (pid < 0) {
+            error("unable to fork: %s", strerror(errno));
+        } else if (pid > 0) {
+            /*
+             * we are the parent; just exit
+             */
+            return 0;
+        }
+        /*
+         * we are the child
+         */
+        close(0);
+        close(1);
+        close(2);
+        chdir("/");
+    }
+    if (eventloop)
+        run_X();
+    done_X();
+    return 0;
+}
+
+/* handle errors */
+
+void error (char *fmt, ...) {
+    va_list ap;
+    char errbuf[200];
+
+    done_X();
+    va_start (ap, fmt);
+    vsprintf (errbuf, fmt, ap);
+    va_end (ap);
+    fprintf (stderr, "%s: %s\n", pname, errbuf);
+    exit (1);
+}
+
+/* begin the X interface */
+
+char *lcasename = "xcopy";
+char *ucasename = "XCopy";
+
+Display *disp = NULL;
+Window ourwin = None;
+Atom compound_text_atom, targets_atom;
+int screen, wwidth, wheight;
+
+Atom strtype = XA_STRING;
+
+/*
+ * Returns TRUE if we need to enter an event loop, FALSE otherwise.
+ */
+int init_X(void) {
+    Window root;
+    int x = 0, y = 0, width = 512, height = 128;
+    int i, got = 0;
+    XWMHints wm_hints;
+    XSizeHints size_hints;
+    XClassHint class_hints;
+    XTextProperty textprop;
+    XGCValues gcv;
+
+    /* open the X display */
+    disp = XOpenDisplay (display);
+    if (!disp)
+	error ("unable to open display");
+
+    if (mode == UTF8) {
+	strtype = XInternAtom(disp, "UTF8_STRING", False);
+	if (!strtype)
+	    error ("unable to get UTF8_STRING property");
+    } else if (mode == CTEXT) {
+	strtype = XInternAtom(disp, "COMPOUND_TEXT", False);
+	if (!strtype)
+	    error ("unable to get COMPOUND_TEXT property");
+    }
+    targets_atom = XInternAtom(disp, "TARGETS", False);
+    if (!targets_atom)
+        error ("unable to get TARGETS property");
+
+    /* get the screen and root-window */
+    screen = DefaultScreen (disp);
+    root = RootWindow (disp, screen);
+
+    x = y = 0;
+    width = height = 10;	       /* doesn't really matter */
+
+    /* actually create the window */
+    ourwin = XCreateSimpleWindow (disp, root, x, y, width, height,0, 
+				  BlackPixel(disp, screen),
+				  WhitePixel(disp, screen));
+
+    /* resource class name */
+    class_hints.res_name = lcasename;
+    class_hints.res_class = ucasename;
+    XSetClassHint (disp, ourwin, &class_hints);
+
+    /* do selection fiddling */
+    if (reading) {
+        /*
+         * We are reading the selection, so we must FIXME.
+         */
+        if (XGetSelectionOwner(disp, XA_PRIMARY) == None) {
+            /* No primary selection, so use the cut buffer. */
+            do_paste(DefaultRootWindow(disp), XA_CUT_BUFFER0, False);
+            return False;
+        } else {
+            Atom sel_property = XInternAtom(disp, "VT_SELECTION", False);
+            XConvertSelection(disp, XA_PRIMARY, strtype,
+                              sel_property, ourwin, CurrentTime);
+            return True;
+        }
+    } else {
+        /*
+         * We are writing to the selection, so we establish
+         * ourselves as selection owner. Also place the data in
+         * CUT_BUFFER0, if it isn't of an exotic type (cut buffers
+         * can only take ordinary string data, it turns out).
+         */
+        XSetSelectionOwner (disp, XA_PRIMARY, ourwin, CurrentTime);
+        if (XGetSelectionOwner (disp, XA_PRIMARY) != ourwin)
+            error ("unable to obtain primary X selection\n");
+        compound_text_atom = XInternAtom(disp, "COMPOUND_TEXT", False);
+	if (strtype == XA_STRING) {
+	    /*
+	     * ICCCM-required cut buffer initialisation.
+	     */
+	    XChangeProperty(disp, root, XA_CUT_BUFFER0,
+			    XA_STRING, 8, PropModeAppend, "", 0);
+	    XChangeProperty(disp, root, XA_CUT_BUFFER1,
+			    XA_STRING, 8, PropModeAppend, "", 0);
+	    XChangeProperty(disp, root, XA_CUT_BUFFER2,
+			    XA_STRING, 8, PropModeAppend, "", 0);
+	    XChangeProperty(disp, root, XA_CUT_BUFFER3,
+			    XA_STRING, 8, PropModeAppend, "", 0);
+	    XChangeProperty(disp, root, XA_CUT_BUFFER4,
+			    XA_STRING, 8, PropModeAppend, "", 0);
+	    XChangeProperty(disp, root, XA_CUT_BUFFER5,
+			    XA_STRING, 8, PropModeAppend, "", 0);
+	    XChangeProperty(disp, root, XA_CUT_BUFFER6,
+			    XA_STRING, 8, PropModeAppend, "", 0);
+	    XChangeProperty(disp, root, XA_CUT_BUFFER7,
+			    XA_STRING, 8, PropModeAppend, "", 0);
+	    /*
+	     * Rotate the cut buffers and add our text in CUT_BUFFER0.
+	     */
+	    XRotateBuffers(disp, 1);
+	    XStoreBytes(disp, seltext, sellen);
+	}
+        return True;
+    }
+}
+
+void run_X(void) {
+    XEvent ev, e2;
+
+    while (1) {
+	XNextEvent (disp, &ev);
+        if (reading) {
+            switch (ev.type) {
+              case SelectionNotify:
+                if (ev.xselection.property != None)
+                    do_paste(ev.xselection.requestor,
+                             ev.xselection.property, True);
+                return;
+            }
+        } else {
+            switch (ev.type) {
+              case SelectionClear:
+                /* Selection has been cleared by another app. */
+                return;
+              case SelectionRequest:
+                e2.xselection.type = SelectionNotify;
+                e2.xselection.requestor = ev.xselectionrequest.requestor;
+                e2.xselection.selection = ev.xselectionrequest.selection;
+                e2.xselection.target = ev.xselectionrequest.target;
+                e2.xselection.time = ev.xselectionrequest.time;
+                if (ev.xselectionrequest.target == strtype) {
+                    XChangeProperty (disp, ev.xselectionrequest.requestor,
+                                     ev.xselectionrequest.property, strtype,
+                                     8, PropModeReplace, seltext, sellen);
+                    e2.xselection.property = ev.xselectionrequest.property;
+                } else if (ev.xselectionrequest.target == compound_text_atom &&
+			   convert_to_ctext) {
+                    XTextProperty tp;
+                    XmbTextListToTextProperty (disp, &seltext, 1,
+                                               XCompoundTextStyle, &tp);
+                    XChangeProperty (disp, ev.xselectionrequest.requestor,
+                                     ev.xselectionrequest.property,
+                                     ev.xselectionrequest.target,
+                                     tp.format, PropModeReplace,
+                                     tp.value, tp.nitems);
+                    e2.xselection.property = ev.xselectionrequest.property;
+                } else if (ev.xselectionrequest.target == targets_atom) {
+                    Atom targets[2];
+                    int len = 0;
+                    targets[len++] = strtype;
+                    if (strtype != compound_text_atom && convert_to_ctext)
+                        targets[len++] = compound_text_atom;
+                    XChangeProperty (disp, ev.xselectionrequest.requestor,
+                                     ev.xselectionrequest.property,
+                                     ev.xselectionrequest.target,
+                                     32, PropModeReplace,
+                                     (unsigned char *)targets, len);
+                } else {
+                    e2.xselection.property = None;
+                }
+                XSendEvent (disp, ev.xselectionrequest.requestor, False, 0, &e2);
+            }
+        }
+    }
+}
+
+void done_X(void) {
+    int i;
+
+    if (ourwin != None)
+	XDestroyWindow (disp, ourwin);
+    if (disp)
+	XCloseDisplay (disp);
+}
+
+void do_paste(Window window, Atom property, int Delete) {
+    Atom actual_type;
+    int actual_format, i;
+    long nitems, bytes_after, nread;
+    unsigned char *data;
+
+    nread = 0;
+    while (XGetWindowProperty(disp, window, property, nread / 4, SELDELTA,
+                              Delete, AnyPropertyType, &actual_type,
+                              &actual_format, &nitems, &bytes_after,
+                              (unsigned char **) &data) == Success) {
+	/*
+	 * We expect all returned chunks of data to be multiples of
+	 * 4 bytes (because we can only request the subsequent
+	 * starting offset in 4-byte increments). Of course you can
+	 * store an odd number of bytes in a selection, so this
+	 * can't be the case every time XGetWindowProperty returns;
+	 * but it should be the case every time it returns _and
+	 * there is more data to come_.
+	 * 
+	 * Hence, whenever XGetWindowProperty returns, we verify
+	 * that the size of the data returned _last_ time was
+	 * divisible by 4.
+	 */
+	if (nitems > 0)
+	    assert((nread & 3) == 0);
+
+        if (actual_type == strtype && nitems > 0) {
+            assert(actual_format == 8);
+            fwrite(data, 1, nitems, stdout);
+            nread += nitems;
+        }
+        XFree(data);
+        if (actual_type != strtype || nitems == 0)
+            break;
+    }
+}
-- 
2.11.0