Initial import of unaltered files.

author Mark Wooding <mdw@distorted.org.uk>

Sat, 13 Feb 2016 18:39:07 +0000 (18:39 +0000)

committer Mark Wooding <mdw@distorted.org.uk>

Sat, 13 Feb 2016 18:39:07 +0000 (18:39 +0000)
author Mark Wooding <mdw@distorted.org.uk>
Sat, 13 Feb 2016 18:39:07 +0000 (18:39 +0000)
committer Mark Wooding <mdw@distorted.org.uk>
Sat, 13 Feb 2016 18:39:07 +0000 (18:39 +0000)
diff --git a/coverart/chkimgsz b/coverart/chkimgsz

new file mode 100755 (executable)

index 0000000..60b4460
--- /dev/null
+++ b/coverart/chkimgsz
@@ -0,0 +1,11 @@
+#! /usr/bin/python
+
+from PIL import Image as I
+from sys import argv, stdin
+
+minx, miny = map(int, argv[1:])
+
+for line in stdin:
+  if line and line[-1] == '\n': line = line[:-1]
+  x, y = I.open(line).size
+  if x < minx or y < miny: print '%s (%dx%d)' % (line, x, y)
diff --git a/coverart/coverart b/coverart/coverart

new file mode 100755 (executable)

index 0000000..789b06d
--- /dev/null
+++ b/coverart/coverart
@@ -0,0 +1,483 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+import sys as SYS
+import os as OS
+from cStringIO import StringIO
+
+import gobject as G
+import gtk as GTK
+GDK = GTK.gdk
+import cairo as XR
+
+import urllib as U
+import urllib2 as U2
+import json as JS
+
+THUMBSZ = 96
+
+class ImageCache (object):
+
+  THRESH = 128*1024*1024
+
+  def __init__(me):
+    me._total = 0
+    me._first = me._last = None
+
+  def add(me, img):
+    me._total += img.size
+    while me._first and me._total > me.THRESH:
+      me._first.evict()
+    img._prev = me._last
+    img._next = None
+    if me._last:
+      me._last._next = img
+    else:
+      me._first = img
+    me._last = img
+
+  def rm(me, img):
+    if img._prev:
+      img._prev._next = img._next
+    else:
+      me._first = img._next
+    if img._next:
+      img._next._prev = img._prev
+    else:
+      img._last = img._prev
+    me._total -= img.size
+
+CACHE = ImageCache()
+
+class CacheableImage (object):
+
+  def __init__(me):
+    me._pixbuf = None
+    me._prev = me._next = None
+    me._thumb = None
+
+  @property
+  def pixbuf(me):
+    if not me._pixbuf:
+      me._pixbuf = me._acquire()
+      me.size = me._pixbuf.get_pixels_array().nbytes
+      CACHE.add(me)
+    return me._pixbuf
+
+  def evict(me):
+    me._pixbuf = None
+    CACHE.rm(me)
+
+  def flush(me):
+    me.evict()
+    me._thumb = None
+
+  @property
+  def thumbnail(me):
+    if not me._thumb:
+      me._thumb = Thumbnail(me)
+    return me._thumb
+
+class Thumbnail (object):
+
+  def __init__(me, img):
+    pix = img.pixbuf
+    wd, ht = pix.get_width(), pix.get_height()
+    m = max(wd, ht)
+    if m <= THUMBSZ:
+      me.pixbuf = pix
+    else:
+      twd, tht = [(x*THUMBSZ + m//2)//m for x in [wd, ht]]
+      me.pixbuf = pix.scale_simple(twd, tht, GDK.INTERP_HYPER)
+
+class NullImage (CacheableImage):
+
+  MAP = {}
+
+  def __init__(me, size, text):
+    CacheableImage.__init__(me)
+    me._size = size
+    me._text = text
+
+  @staticmethod
+  def get(cls, size):
+    try:
+      return cls.MAP[size]
+    except KeyError:
+      img = cls.MAP[size] = cls(size)
+      return img
+
+  def _acquire(me):
+
+    surf = XR.ImageSurface(XR.FORMAT_ARGB32, me._size, me._size)
+    xr = XR.Context(surf)
+
+    xr.set_source_rgb(0.3, 0.3, 0.3)
+    xr.paint()
+
+    xr.move_to(me._size/2.0, me._size/2.0)
+    xr.select_font_face('sans-serif',
+                        XR.FONT_SLANT_NORMAL, XR.FONT_WEIGHT_BOLD)
+    xb, yb, wd, ht, xa, ya = xr.text_extents(me._text)
+    m = max(wd, ht)
+    z = me._size/float(m) * 2.0/3.0
+    xr.scale(z, z)
+
+    xr.set_source_rgb(0.8, 0.8, 0.8)
+    xr.move_to(3.0*m/4.0 - wd/2.0 - xb, 3.0*m/4.0 - ht/2.0 - yb)
+    xr.show_text(me._text)
+
+    surf.flush()
+    pix = GDK.pixbuf_new_from_data(surf.get_data(),
+                                   GDK.COLORSPACE_RGB, True, 8,
+                                   me._size, me._size, surf.get_stride())
+    return pix
+
+class FileImage (CacheableImage):
+
+  def __init__(me, file):
+    CacheableImage.__init__(me)
+    me._file = file
+
+  def _acquire(me):
+    return GDK.pixbuf_new_from_file(me._file)
+
+def fetch_url(url):
+  out = StringIO()
+  with U.urlopen(url) as u:
+    while True:
+      stuff = u.read(16384)
+      if not stuff:
+        break
+      out.write(stuff)
+  return out.getvalue()
+
+def fix_background(w):
+  style = w.get_style().copy()
+  style.base[GTK.STATE_NORMAL] = BLACK
+  style.bg[GTK.STATE_NORMAL] = BLACK
+  style.text[GTK.STATE_NORMAL] = WHITE
+  w.set_style(style)
+
+class BaseCoverViewer (object):
+
+  def __init__(me):
+    me.scr = GTK.ScrolledWindow()
+    me.scr.set_policy(GTK.POLICY_AUTOMATIC, GTK.POLICY_AUTOMATIC)
+    me.iv = GTK.IconView()
+    me.iv.connect('item-activated',
+                  lambda iv, p: me.activate(me._frompath(p)))
+    me.iv.connect('selection-changed', me._select)
+    me.iv.set_pixbuf_column(0)
+    me.iv.set_text_column(1)
+    me.iv.set_orientation(GTK.ORIENTATION_VERTICAL)
+    me.iv.set_item_width(THUMBSZ + 32)
+    fix_background(me.iv)
+    me.scr.add(me.iv)
+    me.reset()
+
+  def reset(me):
+    me.list = GTK.ListStore(GDK.Pixbuf, G.TYPE_STRING, G.TYPE_PYOBJECT)
+    me.iv.set_model(me.list)
+    me.iv.unselect_all()
+
+  def add(me, item):
+    item.it = me.list.append([item.img.thumbnail.pixbuf,
+                              item.text,
+                              item])
+
+  def _frompath(me, path):
+    return me.list[path][2]
+
+  def _select(me, iv):
+    sel = me.iv.get_selected_items()
+    if len(sel) != 1:
+      me.select(None)
+    else:
+      me.select(me._frompath(sel[0]))
+
+class SearchCover (object):
+  def __init__(me, img):
+    me.img = img
+    pix = img.pixbuf
+    me.text = '%d×%d*' % (pix.get_width(), pix.get_height())
+
+class SearchViewer (BaseCoverViewer):
+
+  def __init__(me, chooser):
+    BaseCoverViewer.__init__(me)
+    me._chooser = chooser
+
+  def switch(me, current):
+    me.reset()
+    if current:
+      cov = SearchCover(current)
+      me.add(cov)
+      me.iv.select_path(me.list.get_path(cov.it))
+
+  def activate(me, cov):
+    me._chooser.activated(cov)
+
+  def select(me, cov):
+    me._chooser.selected(cov)
+
+class RemoteImage (CacheableImage):
+
+  ERRIMG = NullImage(256, '!')
+
+  def __init__(me, url, ref = None):
+    CacheableImage.__init__(me)
+    me._url = url
+    me._ref = ref
+    me._data = None
+
+  def _fetch(me):
+    if me._data:
+      return
+    d = StringIO()
+    rq = U2.Request(me._url)
+    if me._ref:
+      rq.add_header('Referer', me._ref)
+    rs = U2.urlopen(rq)
+    while True:
+      stuff = rs.read(16384)
+      if not stuff:
+        break
+      d.write(stuff)
+    me._data = d.getvalue()
+    ld = GDK.PixbufLoader()
+    try:
+      o = 0
+      n = len(me._data)
+      while True:
+        if o >= n:
+          raise ValueError, 'not going to work'
+        l = min(n, o + 16384)
+        ld.write(me._data[o:l])
+        o = l
+        f = ld.get_format()
+        if f:
+          break
+      me._format = f
+      if 'image/gif' in f['mime_types']:
+        raise ValueError, 'boycotting GIF image'
+    finally:
+      try:
+        ld.close()
+      except G.GError:
+        pass
+
+  def _acquire(me):
+    try:
+      me._fetch()
+      ld = GDK.PixbufLoader()
+      try:
+        ld.write(me._data)
+      finally:
+        ld.close()
+      return ld.get_pixbuf()
+    except Exception, e:
+      print e
+      return me.ERRIMG.pixbuf
+
+  @property
+  def ext(me):
+    exts = me._format['extensions']
+    for i in ['jpg']:
+      if i in exts:
+        return i
+    return exts[0]
+
+class SearchImage (RemoteImage):
+
+  def __init__(me, url, ref, tburl):
+    RemoteImage.__init__(me, url, ref)
+    me._tburl = tburl
+
+  @property
+  def thumbnail(me):
+    if not me._thumb:
+      me._thumb = Thumbnail(RemoteImage(me._tburl))
+    return me._thumb
+
+class SearchResult (SearchCover):
+
+  def __init__(me, r):
+    w = int(r['width'])
+    h = int(r['height'])
+    url = r['unescapedUrl']
+    ref = r['originalContextUrl']
+    tburl = r['tbUrl']
+    me.img = SearchImage(url, ref, tburl)
+    me.text = '%d×%d' % (w, h)
+
+class SearchFail (Exception):
+  pass
+
+class CoverChooser (object):
+
+  SEARCHURL = \
+    'http://ajax.googleapis.com/ajax/services/search/images?v=1.0&rsz=8&q='
+
+  def __init__(me):
+    me.win = GTK.Window()
+    box = GTK.VBox()
+    top = GTK.HBox()
+    me.query = GTK.Entry()
+    top.pack_start(me.query, True, True, 2)
+    srch = GTK.Button('_Search')
+    srch.set_flags(GTK.CAN_DEFAULT)
+    srch.connect('clicked', me.search)
+    top.pack_start(srch, False, False, 2)
+    box.pack_start(top, False, False, 2)
+    me.sv = SearchViewer(me)
+    panes = GTK.HPaned()
+    panes.pack1(me.sv.scr, False, True)
+    scr = GTK.ScrolledWindow()
+    scr.set_policy(GTK.POLICY_AUTOMATIC, GTK.POLICY_AUTOMATIC)
+    me.img = GTK.Image()
+    evb = GTK.EventBox()
+    evb.add(me.img)
+    fix_background(evb)
+    scr.add_with_viewport(evb)
+    panes.pack2(scr, True, True)
+    panes.set_position(THUMBSZ + 64)
+    box.pack_start(panes, True, True, 0)
+    me.win.add(box)
+    me.win.connect('destroy', me.destroyed)
+    me.win.set_default_size(800, 550)
+    srch.grab_default()
+
+  def update(me, view, which, dir, current):
+    me.view = view
+    me.dir = dir
+    me.which = which
+    me.current = current
+    me.img.clear()
+    me.sv.switch(current)
+    me.query.set_text(me.makequery(dir))
+    me.win.show_all()
+
+  def search(me, w):
+    q = me.query.get_text()
+    try:
+      try:
+        rq = U2.Request(me.SEARCHURL + U.quote_plus(q),
+                        None,
+                        { 'Referer':
+                          'http://www.distorted.org.uk/~mdw/coverart' })
+        rs = U2.urlopen(rq)
+      except U2.URLError, e:
+        raise SearchFail(e.reason)
+      result = JS.load(rs)
+      if result['responseStatus'] != 200:
+        raise SearchFail('%s (status = %d)' %
+                         (result['responseDetails'],
+                          result['responseStatus']))
+      d = result['responseData']
+      me.sv.switch(me.current)
+      for r in d['results']:
+        try:
+          me.sv.add(SearchResult(r))
+        except (U2.URLError, U2.HTTPError):
+          pass
+    except SearchFail, e:
+      print e.args[0]
+
+  def makequery(me, path):
+    bits = path.split(OS.path.sep)
+    return ' '.join(['"%s"' % p for p in bits[-2:]])
+
+  def selected(me, cov):
+    if cov:
+      me.img.set_from_pixbuf(cov.img.pixbuf)
+    else:
+      me.img.clear()
+
+  def activated(me, cov):
+    if isinstance(cov, SearchCover):
+      me.view.replace(me.which, cov.img)
+
+  def destroyed(me, w):
+    global CHOOSER
+    CHOOSER = None
+
+CHOOSER = None
+
+class ViewCover (object):
+
+  NULLIMG = NullImage(THUMBSZ, '?')
+
+  def __init__(me, dir, path, leaf):
+    me.text = dir
+    me.path = path
+    me.leaf = leaf
+    if me.leaf:
+      me.img = me.covimg = FileImage(OS.path.join(me.path, me.leaf))
+    else:
+      me.img = me.NULLIMG
+      me.covimg = None
+
+class MainViewer (BaseCoverViewer):
+
+  ITERATTR = 'vit'
+
+  def __init__(me, root):
+    BaseCoverViewer.__init__(me)
+    me.root = root
+    me.walk('')
+
+  def walk(me, dir):
+    leafp = True
+    b = OS.path.join(me.root, dir)
+    imgfile = None
+    for l in sorted(OS.listdir(b)):
+      if OS.path.isdir(OS.path.join(b, l)):
+        leafp = False
+        me.walk(OS.path.join(dir, l))
+      else:
+        base, ext = OS.path.splitext(l)
+        if base == 'cover' and ext in ['.jpg', '.png', '.gif']:
+          imgfile = l
+    if leafp:
+      me.add(ViewCover(dir, OS.path.join(me.root, dir), imgfile))
+
+  def select(me, cov):
+    pass
+
+  def activate(me, cov):
+    global CHOOSER
+    if not CHOOSER:
+      CHOOSER = CoverChooser()
+    CHOOSER.update(me, cov, cov.text, cov.covimg)
+
+  def replace(me, cov, img):
+    leaf = 'cover.%s' % img.ext
+    out = OS.path.join(cov.path, leaf)
+    new = out + '.new'
+    with open(new, 'wb') as f:
+      f.write(img._data)
+    OS.rename(new, out)
+    if cov.leaf not in [None, leaf]:
+      OS.unlink(OS.path.join(cov.path, cov.leaf))
+    ncov = ViewCover(cov.text, cov.path, leaf)
+    ncov.it = cov.it
+    me.list[ncov.it] = [ncov.img.thumbnail.pixbuf, ncov.text, ncov]
+    me.activate(ncov)
+
+ROOT = SYS.argv[1]
+
+LOOP = G.MainLoop()
+
+BLACK = GDK.Color(0, 0, 0)
+WHITE = GDK.Color(65535, 65535, 65535)
+
+WIN = GTK.Window()
+VIEW = MainViewer(ROOT)
+WIN.add(VIEW.scr)
+WIN.set_default_size(814, 660)
+WIN.set_title('coverart')
+WIN.connect('destroy', lambda _: LOOP.quit())
+WIN.show_all()
+
+LOOP.run()
diff --git a/flaccrip/cat-prefix b/flaccrip/cat-prefix

new file mode 100755 (executable)

index 0000000..c3850cc
--- /dev/null
+++ b/flaccrip/cat-prefix
@@ -0,0 +1,78 @@
+#! /usr/bin/tcc -run
+/* -*-c-*- */
+
+/* Some of the scripts in the `flaccrip' suite want to chop streams of PCM
+ * data about.  Once upon a time, they used dd(1) for this, but dd does
+ * entirely the wrong thing on short reads, and short reads happen quite
+ * regularly on pipes.
+ *
+ * The requirements on this program are that it copy exactly the first N
+ * bytes from stdin to stdout, without reading anything else from stdin or
+ * writing anything else to stdout.  (That's why it doesn't use stdio to do
+ * the copying: stdio buffering will read too much from stdin, which will
+ * cause stream corruption later.)
+ *
+ * As a special bonus, it's quite paranoid about error checking, and does way
+ * more work on dealing with short reads and writes than is necessary for
+ * working on pipes.  It will fail miserably if either stdin or stdout is
+ * non-blocking.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[])
+{
+  unsigned long n;
+  size_t nn, nleft;
+  ssize_t sz;
+  char buf[40960], *p;
+
+  if (argc != 2) {
+    fprintf(stderr, "Usage: %s LENGTH\n", argv[0]);
+    exit(1);
+  }
+  n = strtoul(argv[1], 0, 0);
+
+  while (n) {
+    nn = nleft = n > sizeof(buf) ? sizeof(buf) : n;
+    p = buf;
+    while (nleft) {
+      sz = read(STDIN_FILENO, p, nleft);
+      if (sz < 0) {
+       if (errno == EINTR)
+         continue;
+       fprintf(stderr, "%s: read error: %s", argv[0], strerror(errno));
+       exit(1);
+      } else if (!sz) {
+       fprintf(stderr, "%s: unexpected eof", argv[0]);
+       exit(1);
+      }
+      nleft -= sz; p += sz;
+    }
+
+    nleft = nn;
+    p = buf;
+    while (nleft) {
+      sz = write(STDOUT_FILENO, p, nleft);
+      if (sz < 0) {
+       if (errno == EINTR)
+         continue;
+       fprintf(stderr, "%s: write error: %s", argv[0], strerror(errno));
+       exit(1);
+      } else if (!sz) {
+       fprintf(stderr, "%s: empty write", argv[0]);
+       exit(1);
+      }
+      nleft -= sz; p += sz;
+    }
+
+    n -= nn;
+  }
+  return (0);
+}
diff --git a/flaccrip/flaccrip-arfetch b/flaccrip/flaccrip-arfetch

new file mode 100755 (executable)

index 0000000..ddb2c12
--- /dev/null
+++ b/flaccrip/flaccrip-arfetch
@@ -0,0 +1,13 @@
+#! /bin/bash
+
+set -e
+: ${JBDIR=/mnt/jb}
+: ${ACCURATERIP_SERVER=http://www.accuraterip.com/accuraterip}
+
+case $# in
+  0) set -- $($JBDIR/bin/flaccrip-discid -a .) ;;
+  1) ;;
+  *) echo >&2 "Usage: $0 ACCURATERIP-ID"; exit 1 ;;
+esac
+id=$(echo "$1" | sed 's:...-.....\(.\)\(.\)\(.\).*:\3/\2/\1/dBAR-&:')
+curl -fsS $ACCURATERIP_SERVER/$id.bin | $JBDIR/bin/flaccrip-decode
diff --git a/flaccrip/flaccrip-check b/flaccrip/flaccrip-check

new file mode 100755 (executable)

index 0000000..bbc093a
--- /dev/null
+++ b/flaccrip/flaccrip-check
@@ -0,0 +1,121 @@
+#! /bin/bash
+
+set -e
+: ${JBDIR=/mnt/jb}
+
+force=nil
+while getopts fo:i: opt; do
+  case "$opt" in
+    o) offset=$OPTARG ;;
+    f) force=t ;;
+    i) id=$OPTARG ;;
+    *) exit 1 ;;
+  esac
+done
+shift $((OPTIND - 1))
+
+dir=$1; shift; cd "$dir"
+case $# in
+  0)
+    set $(ls | sed '
+       /^\([0-9][0-9]\)[-.\ ].*\.flac$/!d
+       s//\1/
+    ')
+    ;;
+esac
+
+case "${offset+t}" in
+  t) ;;
+  *) if [ -r .offset ]; then read offset <.offset; else offset=0; fi ;;
+esac
+
+: ${id=$($JBDIR/bin/flaccrip-discid -a .)}
+ntr=$(echo "$id" | sed 's:^0*\([1-9][0-9]*\)-.*$:\1:')
+ar=$($JBDIR/bin/flaccrip-arfetch $id)
+npress=0
+while read type rest; do
+  case "$type" in
+    H) npress=$((npress + 1)) ;;
+    *) ;;
+  esac
+done <<EOF
+$ar
+EOF
+case $npress,$force in
+  0,nil)
+    echo >&2 "$0: no AccurateRip record found"
+    exit 1
+    ;;
+  *)
+    echo "Found $npress pressings"
+    ;;
+esac
+
+cks="" ntrack=$# ngood=0
+for t in "$@"; do
+  while :; do case "$t" in 0*) t=${t#0} ;; *) break ;; esac; done
+  l=$((t - 1)) h=$((t + 1))
+  tt=$(printf %02d "$t")
+  ll=$(printf %02d "$l")
+  hh=$(printf %02d "$h")
+  flags=""
+  unset before after
+
+  if ((t == 1)); then
+    flags="${flags+$flags }-f"
+  elif [ -f "$ll"[-.\ ]*.flac ]; then
+    before=$(echo "$ll"[-.\ ]*.flac)
+  elif ((offset < 0)); then
+    echo >&2 "$0: warning: -ve offset, but track $l missing; using silence"
+  fi
+
+  if ((t == ntr)); then
+    flags="${flags+$flags }-l"
+  elif [ -f "$hh"[-.\ ]*.flac ]; then
+    after=$(echo "$hh"[-.\ ]*.flac)
+  elif ((offset > 0)); then
+    echo >&2 "$0: warning: +ve offset, but track $h missing; using silence"
+  fi
+
+  ck=$($JBDIR/bin/flaccrip-offset -o"$offset" \
+    $flags ${before+-b "$before"} ${after+-a "$after"} "$tt"[-.\ ]*.flac |
+    $JBDIR/bin/flaccrip-compute)
+
+  press=0 found=nil
+  while read arty art arconf arck; do
+    case "$arty" in
+      H)
+       press=$((press + 1))
+       ;;
+      T)
+       case $art,$arck in
+         $t,$ck)
+           found=t
+           printf "Track %2d (%s): match pressing %d; confidence %d\n" \
+             $t $ck $press $arconf
+           ngood=$((ngood + 1))
+           break
+           ;;
+       esac
+       ;;
+    esac
+  done <<EOF
+$ar
+EOF
+  case $found in
+    nil)
+      printf "Track %2d (%s): NO MATCH\n" $t $ck
+      ;;
+  esac
+done
+
+if ((ngood == ntrack)); then
+  echo "All tracks match: GOOD RIP"
+  exit 0
+elif ((ngood == 0)); then
+  echo "No matches: new pressing, incorrect offset or wrong discid?"
+  exit 2
+else
+  echo "Matched $ngood/$ntrack: time to re-rip :-("
+  exit 3
+fi
diff --git a/flaccrip/flaccrip-compute b/flaccrip/flaccrip-compute

new file mode 100755 (executable)

index 0000000..ad41912
--- /dev/null
+++ b/flaccrip/flaccrip-compute
@@ -0,0 +1,42 @@
+#! /usr/bin/tcc -run
+/* -*-c-*- */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <getopt.h>
+
+int main(int argc, char *argv[])
+{
+  unsigned long ck = 0, t = 0;
+  unsigned long ns = 0;
+  unsigned long x;
+  unsigned f = 0;
+#define F_ROLLING 1u
+
+  for (;;) {
+    int opt = getopt(argc, argv, "b:r");
+    if (opt < 0) break;
+    switch (opt) {
+      case 'b': ns = strtoul(optarg, 0, 0); break;
+      case 'r': f |= F_ROLLING; break;
+      default: exit(1);
+    }
+  }
+  for (;;) {
+    unsigned char b[4];
+    if (!fread(b, 4, 1, stdin)) break;
+    x = (b[0] <<  0) | (b[1] <<  8) | (b[2] << 16) | (b[3] << 24);
+    ck += x*++ns; t += x;
+  }
+  if (ferror(stdin)) {
+    fprintf(stderr, "%s: read error: %s\n", argv[0], strerror(errno));
+    exit(1);
+  }
+  printf("%08lx", ck & 0xffffffff);
+  if (f & F_ROLLING) printf(" %lu", t & 0xffffffff);
+  putchar('\n');
+  return (0);
+}
diff --git a/flaccrip/flaccrip-decode b/flaccrip/flaccrip-decode

new file mode 100755 (executable)

index 0000000..bf29f34
--- /dev/null
+++ b/flaccrip/flaccrip-decode
@@ -0,0 +1,16 @@
+#! /usr/bin/python
+
+from struct import unpack
+from sys import stdin
+
+p = 0
+while True:
+  h = stdin.read(13)
+  if h == '':
+    break
+  ntr, da, db, cid = unpack('<B3I', h)
+  print 'H %d %d %08x %08x %08x' % (p, ntr, da, db, cid)
+  p += 1
+  for i in xrange(ntr):
+    conf, ck, hunoz = unpack('<B2I', stdin.read(9))
+    print 'T %d %d %08x' % (i + 1, conf, ck)
diff --git a/flaccrip/flaccrip-discid b/flaccrip/flaccrip-discid

new file mode 100755 (executable)

index 0000000..c29f032
--- /dev/null
+++ b/flaccrip/flaccrip-discid
@@ -0,0 +1,177 @@
+#! /bin/bash
+
+set -e
+: ${JBDIR=/mnt/jb}
+
+###--------------------------------------------------------------------------
+### CD identification algorithms.
+###
+### 1. CDDB
+###
+### CCLLLLNN [NTRACK TRACK-START... LENGTH]
+###
+### CC is a checksum of the track start times; LLLL is the offset of the
+### leadout track, in seconds (rounded down), and NN is the total number of
+### tracks.  All of these are in hexadecimal, and include the 150-frame (2
+### second) pre-gap.  All of these are in hexadecimal.  Since a CD can have
+### at most 99 tracks, and can contain no more than 90 minutes of audio (!),
+### the other two items fit without needing reduction.
+###
+### The checksum is the sum of the decimal digits of the track start times,
+### in seconds, reduced modulo 255.
+###
+### NTRACK is the number of tracks; LENGTH is the offset of the leadout in
+### seconds.  These are the same as in the checksum, so repeating them is
+### pointless, but it's done anyway.  The TRACK-STARTs are the track start
+### offsets, in frames.
+###
+###
+### 2. AccurateRip
+###
+### DA1-DA2-CDDBID
+###
+### CDDBID is the CDDB id as described above.  DA1 is simply the sum of the
+### track starts, including the lead-out track; DA2 is the sum of the
+### products TRACKNO * OFFSET for the audio tracks only, but including the
+### final lead-out -- so a data track makes the last audio track look very
+### long.  Another wrinkle: the OFFSET for the first track is forced to 1 if
+### it's zero (to avoid the entry being lost, I presume, though I'm not sure
+### why this is ever so useful).
+###
+###
+### 3. MusicBrainz
+###
+### The MusicBrainz identification is a base64-encoded SHA-1 hash of the
+### table of contents.  The base64 encoding uses `.', `_' and `-' in place of
+### `+', `/' and `=', because the standard characters /all/ have special
+### meanings in URL query strings.  (Duh.  And I'm not quite sure why we
+### still need the trailing marker.)
+###
+### The message to be hashed is FIRST LAST LENGTH TRACK-START..., where FIRST
+### and LAST are the first and last track numbers, LENGTH is the offset of
+### the lead-out, in frames, and the TRACK-STARTs are the start offsets of
+### the tracks, in order, also in frames.  The track numbers are two
+### uppercase hex digits; the frame offsets are eight.  All of these are
+### simply concatenated together.
+###
+### MusicBrainz only concerns itself with the audio tracks.  If there's a
+### data track, then we ignore it, and the lead-out is considered to be 11400
+### frames before the data track.
+
+###--------------------------------------------------------------------------
+### Command line.
+
+format=cddb
+while getopts "acCm" opt; do
+  case "$opt" in
+    a) format=accuraterip ;;
+    c) format=cddb ;;
+    C) format=cddb-tracks ;;
+    m) format=musicbrainz ;;
+    *) exit 1 ;;
+  esac
+done
+shift $((OPTIND - 1))
+
+case $# in
+  0)
+    ;;
+  1)
+    if [ -r "$1/.discid" ]; then
+      exec <"$1/.discid"
+    else
+      exec < <($JBDIR/bin/flaccrip-toc "$1")
+    fi
+    ;;
+  *)
+    echo >&2 "Usage: $0 [-acCm] [DIRECTORY]"
+    exit 1
+    ;;
+esac
+
+###--------------------------------------------------------------------------
+### Main work.
+
+## Initial setup.
+cddbck=0
+cddbtracks=""
+nt=0 nat=0
+da=0 db=0
+mbtracks=""
+
+## Wander through the table of contents picking up unconsidered trifles.
+while read type offset; do
+
+  ## Bump the track numbers here.  Most things want 1-based numbering, so
+  ## this is right.  Don't bump for the end marker.  Those who care
+  ## (AccurateRip) will sort it out for themselves.
+  case "$type" in
+    T) nt=$((nt + 1)) nat=$((nat + 1));;
+    D) nt=$((nt + 1)) ;;
+  esac
+
+  ## Update the CDDB state.  This is common to several formats.
+  case "$type" in
+    [TD])
+      o=$((offset + 150))
+      s=$((o/75))
+      cddbtracks="${cddbtracks:+$cddbtracks }$o"
+      while :; do
+       case "$s" in
+         ?*) cddbck=$((cddbck + ${s:0:1})); s=${s#?} ;;
+         *) break ;;
+       esac
+      done
+      ;;
+    E)
+      final=$offset
+      ;;
+  esac
+
+  ## Update other bits of information.
+  case "$type" in
+    T)
+      da=$((da + offset))
+      db=$((db + nat*(offset > 0 ? offset : 1)))
+      mbtracks="$mbtracks$(printf "%08X" $((offset + 150)))"
+      ;;
+    D)
+      mbfinal=$((offset - 11250))
+      ;;
+    E)
+      da=$((da + offset))
+      db=$((db + (nat + 1)*(offset > 0 ? offset : 1)))
+      case "${mbfinal+t}" in
+       t) ;;
+       *) mbfinal=$((offset + 150)) ;;
+      esac
+      ;;
+  esac
+done
+
+## Sort out the CDDB id.
+cddbid=$(printf "%02x%04x%02x" $((cddbck%255)) $((final/75)) $nt)
+
+###--------------------------------------------------------------------------
+### Produce the answer.
+
+case "$format" in
+  cddb)
+    echo "$cddbid"
+    ;;
+  cddb-tracks)
+    echo "$cddbid $nt $cddbtracks $((final/75 + 2))"
+    ;;
+  accuraterip)
+    printf "%03d-%08x-%08x-%s\n" $nat $da $db $cddbid
+    ;;
+  musicbrainz)
+    mb=$(printf "%02X%02X%08X%s" 1 $nat $mbfinal $mbtracks)
+    for ((i = nat; i < 99; i++)); do
+      mb="${mb}00000000"
+    done
+    printf "%s" $mb |
+      openssl dgst -sha1 -binary |
+      openssl base64 | tr '+/=' '._-'
+    ;;
+esac
diff --git a/flaccrip/flaccrip-guessoffset b/flaccrip/flaccrip-guessoffset

new file mode 100755 (executable)

index 0000000..bca4cda
--- /dev/null
+++ b/flaccrip/flaccrip-guessoffset
@@ -0,0 +1,112 @@
+#! /bin/bash
+
+set -e
+: ${JBDIR=/mnt/jb}
+
+while getopts i: opt; do
+  case "$opt" in
+    i) id=$OPTARG ;;
+    *) exit 1 ;;
+  esac
+done
+shift $((OPTIND - 1))
+
+dir=$1; shift; cd "$dir"
+
+case $# in
+  0)
+    set $(ls | sed '
+       /^\([0-9][0-9]\)[-. ].*\.flac$/!d
+       s//\1/
+    ')
+    ;;
+esac
+
+: ${id=$($JBDIR/bin/flaccrip-discid -a .)}
+ntr=$(echo "$id" | sed 's:^0*\([1-9][0-9]*\)-.*$:\1:')
+ar=$($JBDIR/bin/flaccrip-arfetch $id)
+npress=0
+while read type rest; do
+  case "$type" in
+    H) npress=$((npress + 1)) ;;
+    *) ;;
+  esac
+done <<EOF
+$ar
+EOF
+case $npress in
+  0)
+    echo >&2 "$0: no AccurateRip record found: this isn't going to work"
+    exit 1
+    ;;
+  *)
+    echo "Found $npress pressings"
+    ;;
+esac
+
+for t in "$@"; do
+  while :; do case "$t" in 0*) t=${t#0} ;; *) break ;; esac; done
+  l=$((t - 1)) h=$((t + 1))
+  tt=$(printf %02d "$t")
+  ll=$(printf %02d "$l")
+  hh=$(printf %02d "$h")
+  flags=""
+  unset before after
+
+  if ((t == 1)); then
+    flags="${flags+$flags }-f"
+  elif [ -f "$ll"[-.\ ]*.flac ]; then
+    before=$(echo "$ll"[-.\ ]*.flac)
+  else
+    echo >&2 "$0: warning: track $l missing; using silence"
+  fi
+
+  if ((t == ntr)); then
+    flags="${flags+$flags }-l"
+  elif [ -f "$hh"[-.\ ]*.flac ]; then
+    after=$(echo "$hh"[-.\ ]*.flac)
+  else
+    echo >&2 "$0: warning: track $h missing; using silence"
+  fi
+
+  echo "T $t"
+
+  cks=""
+  while read arty art arconf arck; do
+    case "$arty,$art" in
+      T,$t)
+      cks="${cks:+$cks }$arck"
+      echo "C $arck $arconf"
+      ;;
+    esac
+  done <<EOF
+$ar
+EOF
+  $JBDIR/bin/flaccrip-trackoffsets \
+    $flags ${before+-b "$before"} ${after+-a "$after"} "$tt"[-.\ ]*.flac $cks
+
+done | awk '
+BEGIN {
+
+}
+
+$1 == "T" {
+       delete CK;
+}
+
+$1 == "C" {
+       CK[$2] = $3;
+}
+
+$1 == "M" {
+       m = $2; ck = $3;
+       c = CK[ck];
+       MM[m]++;
+       if (!(m in MC) || MC[m] > c) MC[m] = c;
+}
+
+END {
+       for (m in MM)
+         print m " " MM[m] " " MC[m];
+}' |
+sort -rn -k2 -k3
diff --git a/flaccrip/flaccrip-offset b/flaccrip/flaccrip-offset

new file mode 100755 (executable)

index 0000000..2fb7e53
--- /dev/null
+++ b/flaccrip/flaccrip-offset
@@ -0,0 +1,75 @@
+#! /bin/bash
+
+set -e
+: ${JBDIR=/mnt/jb}
+
+zero_start=0 trim_end=0 offset=0
+while getopts flF:L:o:b:a: opt; do
+  case "$opt" in
+    f) zero_start=$((588 * 5 - 1)) ;;
+    l) trim_end=$((588 * 5)) ;;
+    F) zero_start=$OPTARG ;;
+    L) trim_end=$OPTARG ;;
+    o) offset=$OPTARG ;;
+    b) before=$OPTARG ;;
+    a) after=$OPTARG ;;
+    *) exit 1 ;;
+  esac
+done
+
+shift $((OPTIND - 1))
+case $# in
+  1) ;;
+  *) echo >&2 "missing file argument" ;;
+esac
+file=$1
+
+len=$(metaflac --show-total-samples "$file")
+
+if ((offset < 0)); then
+  start=0
+  stop=$((stop + offset))
+elif ((offset >= 0)); then
+  start=$offset
+  stop=$len
+fi
+
+{
+  if ((offset < 0)); then
+    if [ "${before+set}" ]; then
+      n=$(metaflac --show-total-samples "$before")
+      flac -d -s -o- --force-raw-format --sign=signed --endian=little \
+       --skip=$((n + offset)) --until=$n \
+       "$before"
+    else
+      $JBDIR/bin/cat-prefix $((4 * -offset)) </dev/zero
+    fi
+  fi
+
+  flac -d -s -o- --force-raw-format --sign=signed --endian=little \
+    --skip=$start --until=$stop \
+    "$file"
+
+  if ((offset > 0)); then
+    if [ "${after+set}" ]; then
+      flac -d -s -o- --force-raw-format --sign=signed --endian=little \
+       --skip=0 --until=$offset \
+       "$after"
+    else
+      $JBDIR/bin/cat-prefix $((4 * offset)) </dev/zero
+    fi
+  fi
+} | {
+  if ((zero_start > 0)); then
+    $JBDIR/bin/cat-prefix $((4 * zero_start)) </dev/zero
+    $JBDIR/bin/cat-prefix $((4 * zero_start)) >/dev/null
+    len=$((len - zero_start))
+  fi
+  if ((trim_end > 0)); then
+    $JBDIR/bin/cat-prefix $((4 * (len - trim_end)))
+    $JBDIR/bin/cat-prefix $((4 * trim_end)) </dev/zero
+    cat >/dev/null
+  else
+    cat
+  fi
+}
diff --git a/flaccrip/flaccrip-slide b/flaccrip/flaccrip-slide

new file mode 100755 (executable)

index 0000000..c5e8323
--- /dev/null
+++ b/flaccrip/flaccrip-slide
@@ -0,0 +1,134 @@
+#! /usr/bin/tcc -run
+/* -*-c-*- */
+
+/* A simple progrem to compute AccurateRip checksums for a sliding window
+ * over a stream.  The algorithm is based on an observation by Jon Lund
+ * Steffensen (http://jonls.dk/2009/10/calculating-accuraterip-checksums/).
+ *
+ * The basic checksum is c = SUM_i (i + i) S_i, where 0 <= i < n ranges over
+ * the sample numbers, and S_i is the data for the sample point, expressed as
+ * a single element of Z/2^{32}Z (a cheesy notational device which avoids me
+ * having to write `(mod 2^{32})' everywhere).
+ *
+ * Steffensen's observation is this: if T_i = S_{i+1} for 0 <= i < n - 1 then
+ * we can compute the checksum c' over the T_i given only a small quantity of
+ * data.  Indeed,
+ *
+ *   c' - c = SUM_{0<=i<n} (i + 1) T_i - SUM_{0<=i<n} (i + 1) S_i
+ *         = SUM_{0<=i<n-1} (i + 1) S_{i+1} + n T_{n-1} -
+ *             SUM_{0<i<n} (i + 1) S_i - S_0
+ *         = SUM_{0<i<n} i S_i + n T_{n-1} -
+ *             SUM_{0<i<n} (i + 1) S_i - S_0
+ *         = n T_{n-1} - SUM_{0<=i<n} S_i
+ *
+ * The term SUM_{0<=i<n} S_i can be computed while we're summing up the
+ * initial window.  Obviously, maintaining it is trivial.
+ *
+ * The final track is dealt with specially by clobbering the last five frames
+ * with silence.  Since silent samples contribute nothing to the checksum, we
+ * can instead consider the final track to be truncated at this point.
+ *
+ * Life gets a little bit more complicated when we deal with the special
+ * rules for the first track: rather than simply starting (almost) five
+ * frames in, we silence the initial segment.  So when we advance our window,
+ * we must take off m S_0 rather than simply S_0.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <getopt.h>
+
+int main(int argc, char *argv[])
+{
+  unsigned long ns, ck, tot, i0, o = 0, x, y;
+  FILE *fp, *fs;
+  unsigned long *tv;
+  int i;
+  const char *quis = argv[0];
+  unsigned char b[4];
+  unsigned f = 0;
+#define F_DEBUG 1u
+
+  for (;;) {
+    int o = getopt(argc, argv, "di:");
+    if (o == EOF) break;
+    switch (o) {
+      case 'd': f |= F_DEBUG; break;
+      case 'i': i0 = strtoul(optarg, 0, 0); break;
+      default: exit(1);
+    }
+  }
+  argv += optind; argc -= optind;
+
+  if (argc < 6) {
+    fprintf(stderr, "Usage: %s flaccrip-slide NSAMPLES CHECKSUM SUM "
+           "PREFIX SUFFIX TARGET ...\n", quis);
+    exit(1);
+  }
+  ns = strtoul(argv[0], 0, 0);
+  ck = strtoul(argv[1], 0, 16);
+  tot = strtoul(argv[2], 0, 0);
+  if ((fp = fopen(argv[3], "rb")) == 0) {
+    fprintf(stderr, "%s: open %s: %s\n", quis, argv[3], strerror(errno));
+    exit(1);
+  }
+  if ((fs = fopen(argv[4], "rb")) == 0) {
+    fprintf(stderr, "%s: open %s: %s\n", quis, argv[4], strerror(errno));
+    exit(1);
+  }
+  argv += 5; argc -= 5;
+
+  if ((tv = malloc(argc * sizeof(*tv))) == 0) {
+    fprintf(stderr, "%s: malloc: %s\n", quis, strerror(errno));
+    exit(1);
+  }
+  for (i = 0; i < argc; i++)
+    tv[i] = strtoul(argv[i], 0, 16);
+
+  for (;;) {
+
+    if (f & F_DEBUG) {
+      fprintf(stderr, "%s: DEBUG: offset = %lu, ck = %08lx, tot = %lu\n",
+             quis, o, ck, tot);
+    }
+
+    ck &= 0xffffffff;
+    for (i = 0; i < argc; i++) {
+      if (ck == tv[i]) {
+       printf("%lu %08lx\n", o, ck);
+       break;
+      }
+    }
+
+    if (!fread(b, 4, 1, fp)) {
+      if (ferror(fp)) {
+       fprintf(stderr, "%s: read prefix: %s\n", quis, strerror(errno));
+       exit(1);
+      }
+      break;
+    }
+    x = (b[0] <<  0) | (b[1] <<  8) | (b[2] << 16) | (b[3] << 24);
+
+    if (!fread(b, 4, 1, fs)) {
+      if (ferror(fs)) {
+       fprintf(stderr, "%s: read suffix: %s\n", quis, strerror(errno));
+       exit(1);
+      }
+      break;
+    }
+    y = (b[0] <<  0) | (b[1] <<  8) | (b[2] << 16) | (b[3] << 24);
+
+    if (f & F_DEBUG)
+      fprintf(stderr, "%s: DEBUG: prefix = %08lx, suffix = %08lx\n",
+             quis, x, y);
+
+    ck += ns*y - tot - i0*x;
+    tot += y - x;
+    o++;
+  }
+
+  return (0);
+}
diff --git a/flaccrip/flaccrip-toc b/flaccrip/flaccrip-toc

new file mode 100755 (executable)

index 0000000..46e1790
--- /dev/null
+++ b/flaccrip/flaccrip-toc
@@ -0,0 +1,84 @@
+#! /bin/bash
+
+set -e
+
+###--------------------------------------------------------------------------
+### Table of contents representation.
+###
+### We need a simple table of contents representation.  A toc file consists
+### of a number of records, one per line; each record is a number of
+### blank-delimited fields.  Fields don't contain whitespace, so no quoting
+### is needed.  The first field of each record is a type which explains how
+### to decode the rest.
+###
+### `T' START  Audio track starts at START frames.
+###
+### `D' START  Data track, starts at START frames.
+###
+### `E' END    Leadout at END frames.
+
+###--------------------------------------------------------------------------
+### Command line options.
+
+hidden=0
+data=none
+while getopts "d:h:" opt; do
+  case "$opt" in
+    h) hidden=$OPTARG ;;
+    d) data=$OPTARG ;;
+    *) exit 1 ;;
+  esac
+done
+shift $((OPTIND - 1))
+
+case "$#" in
+  1) ;;
+  *) echo >&2 "Usage: $0 [-d DATA] [-h HIDDEN] DIR|CD-DEVICE"; exit 1 ;;
+esac
+source=$1
+
+###--------------------------------------------------------------------------
+### Work out what to do.
+
+if [ -d "$source" ]; then
+
+  ## Intuit what's going on from a directory full of FLAC files.
+  cd "$source"
+  for i in [0-9][0-9][-.\ ]*.flac; do
+    metaflac --show-total-samples "$i"
+  done | {
+    tn=1 tot=$hidden
+    while read samples; do
+      frames=$((samples/588))
+      echo "T $tot"
+      tot=$((tot + frames))
+    done
+    case "$data" in
+      none)
+       ;;
+      *)
+       tot=$((tot + 11400))
+       echo "D $tot"
+       tot=$((tot + data))
+       ;;
+    esac
+    echo "E $tot"
+  }
+
+elif [ -b "$source" ]; then
+
+  ## Read a table of contents from a CD.
+  wodim dev="$source" -toc | sed '
+       /^track:/ !d
+       s/^track:\(.*\) lba: *\([0-9][0-9]*\) (.*mode: *\([-0-9][0-9]*\) *$/\1 \3 \2/
+  ' | while read track mode offset; do
+    case "$track,$mode" in
+      lout,-1) echo "E $offset" ;;
+      *,0 | *,-1) echo "T $offset" ;;
+      *,1 | *,2) echo "D $offset" ;;
+    esac
+  done
+
+else
+  echo >&2 "$0: don't know how to read a toc from $source"
+fi
diff --git a/flaccrip/flaccrip-trackoffsets b/flaccrip/flaccrip-trackoffsets

new file mode 100755 (executable)

index 0000000..0520eb4
--- /dev/null
+++ b/flaccrip/flaccrip-trackoffsets
@@ -0,0 +1,98 @@
+#! /bin/bash
+
+set -e
+: ${JBDIR=/mnt/jb}
+
+i0=0 zero_start=0 trim_end=0 span=$((588*5 - 1)) debug=""
+while getopts flF:L:i:b:a:s:d opt; do
+  case "$opt" in
+    f) zero_start=$((588*5 - 1)) ;;
+    l) trim_end=$((588*5)) ;;
+    F) zero_start=$OPTARG ;;
+    L) trim_end=$OPTARG ;;
+    b) before=$OPTARG ;;
+    a) after=$OPTARG ;;
+    s) span=$OPTARG ;;
+    d) debug=-d ;;
+    *) exit 1 ;;
+  esac
+done
+shift $((OPTIND - 1))
+
+case $# in
+  0 | 1)
+    echo >&2 "usage: $0 [-OPTIONS] FILE TARGET..."
+    exit 1
+    ;;
+esac
+file=$1; shift
+
+ns=$(metaflac --show-total-samples "$file")
+
+read ck tot < <(
+  {
+    if [ "${before+set}" ]; then
+      n=$(metaflac --show-total-samples "$before")
+      flac -d -s -o- --force-raw-format --sign=signed --endian=little \
+       --skip=$((n - span)) \
+       "$before"
+    else
+      $JBDIR/bin/cat-prefix $((4 * span)) </dev/zero
+    fi
+
+    flac -d -s -o- --force-raw-format --sign=signed --endian=little \
+      --until=$((ns - span - trim_end)) \
+      "$file"
+  } | {
+    if ((zero_start > 0)); then
+      $JBDIR/bin/cat-prefix $((4 * zero_start)) </dev/zero
+      $JBDIR/bin/cat-prefix $((4 * zero_start)) >/dev/null
+    fi
+    cat
+  } | $JBDIR/bin/flaccrip-compute -r
+)
+
+while read off ck; do
+  echo M $((off - span)) "$ck"
+done < <(
+  $JBDIR/bin/flaccrip-slide \
+    -i "$zero_start" $debug \
+    $((ns - trim_end)) $ck $tot \
+    <(
+      if ((span > zero_start)); then
+       tail=$((span - zero_start))
+       if [ "${before+set}" ]; then
+         n=$(metaflac --show-total-samples "$before")
+         flac -d -s -o- --force-raw-format --sign=signed --endian=little \
+           --skip=$((n - tail)) \
+           "$before"
+       else
+         $JBDIR/bin/cat-prefix $((4 * tail)) </dev/zero
+       fi
+      fi
+      flac -d -s -o- --force-raw-format --sign=signed --endian=little \
+       --until=$((span + zero_start)) \
+       "$file"
+    ) \
+    <(
+      if ((span > trim_end)); then
+       flac -d -s -o- --force-raw-format --sign=signed --endian=little \
+         --skip=$((ns - span - trim_end)) \
+         "$file"
+       head=$((span - trim_end))
+       if [ "${after+set}" ]; then
+         flac -d -s -o- --force-raw-format --sign=signed --endian=little \
+           --until=$head \
+           "$after"
+       else
+         $JBDIR/bin/cat-prefix $((4 * head)) </dev/zero
+       fi
+      else
+       flac -d -s -o- --force-raw-format --sign=signed --endian=little \
+         --skip=$((ns - span - trim_end)) \
+         --until=$((ns + span - trim_end)) \
+         "$file"
+      fi
+    ) \
+    "$@"
+)
diff --git a/flaccrip/offset-album b/flaccrip/offset-album

new file mode 100755 (executable)

index 0000000..06ae35c
--- /dev/null
+++ b/flaccrip/offset-album
@@ -0,0 +1,64 @@
+#! /bin/bash
+
+set -e
+: ${JBDIR=/mnt/jb}
+
+case $# in
+  3) ;;
+  *) echo >&2 "Usage: $0 OFFSET SOURCEDIR TARGETDIR"; exit 1 ;;
+esac
+
+offset=$1 sourcedir=$2 targetdir=$3
+mkdir "$targetdir"
+tmp="$targetdir"/_tmp; mkdir "$tmp"
+trap 'rm -rf "$targetdir"' EXIT INT TERM
+
+lens=""
+for i in "$sourcedir"/*.flac; do
+  base=${i##*/}
+  metaflac --export-tags-to="$tmp/${base%.flac}.tags" "$i"
+  len=$(metaflac --show-total-samples "$i")
+  lens="${lens:+$lens }$len"
+done
+
+{
+  if ((offset < 0)); then
+    $JBDIR/bin/cat-prefix $((4 * -offset)) </dev/zero
+  fi
+  for i in "$sourcedir"/*.flac; do
+    flac -d -s -o- --force-raw-format --sign=signed --endian=little "$i"
+  done
+  if ((offset > 0)); then
+    $JBDIR/bin/cat-prefix $((4 * offset)) </dev/zero
+  fi
+} | {
+  if ((offset > 0)); then
+    $JBDIR/bin/cat-prefix $((4 * offset)) >/dev/null
+  fi
+  set -- $lens
+  for i in "$sourcedir"/*.flac; do
+    base=${i##*/}
+    len=$1; shift
+    $JBDIR/bin/cat-prefix $((4 * len)) >"$tmp"/"${base%.flac}.pcm"
+    flac --force-raw-format --sign=signed --endian=little --channels=2 \
+      --bps=16 --sample-rate=44100 --best -o "$targetdir"/"$base" \
+      "$tmp"/"${base%.flac}.pcm"
+    metaflac --remove-all-tags \
+      --import-tags-from="$tmp"/"${base%.flac}.tags" \
+      "$targetdir"/"$base"
+  done
+  if ((offset < 0)); then
+    $JBDIR/bin/cat-prefix $((4 * -offset)) >/dev/null
+  fi
+}
+
+for i in "$sourcedir"/* "$sourcedir"/.[!.]*; do
+  if [ ! -e "$i" ]; then continue; fi
+  case "$i" in
+    *.flac) ;;
+    *) cp -r "$i" "$targetdir"/ ;;
+  esac
+done
+
+rm -rf "$tmp"
+trap '' EXIT INT TERM
diff --git a/gremlin/gremlin b/gremlin/gremlin

new file mode 100755 (executable)

index 0000000..c9734d5
--- /dev/null
+++ b/gremlin/gremlin
@@ -0,0 +1,1660 @@
+#! /usr/bin/python
+###
+### Convert a directory tree of audio files
+###
+### (c) 2010 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+###--------------------------------------------------------------------------
+### External dependencies.
+
+## Language features.
+from __future__ import with_statement
+
+## Standard Python libraries.
+import sys as SYS
+import os as OS
+import errno as E
+import time as T
+import unicodedata as UD
+import fnmatch as FN
+import re as RX
+import shutil as SH
+import optparse as OP
+import threading as TH
+import shlex as L
+from math import sqrt
+from contextlib import contextmanager
+
+## eyeD3 tag fettling.
+import eyeD3 as E3
+
+## Gstreamer.  It picks up command-line arguments -- most notably `--help' --
+## and processes them itself.  Of course, its help is completely wrong.  This
+## kludge is due to Jonas Wagner.
+_argv, SYS.argv = SYS.argv, []
+import gobject as G
+import gio as GIO
+import gst as GS
+SYS.argv = _argv
+
+## Python Imaging.
+from PIL import Image as I
+
+## Python parsing.
+import pyparsing as P
+
+###--------------------------------------------------------------------------
+### Special initialization.
+
+VERSION = '1.0.0~pre'
+
+## GLib.
+G.threads_init()
+
+###--------------------------------------------------------------------------
+### Eyecandy progress reports.
+
+def charwidth(s):
+  """
+  Return the width of S, in characters.
+
+  Specifically, this is the number of backspace characters required to
+  overprint the string S.  If the current encoding for `stdout' appears to be
+  Unicode then do a complicated Unicode thing; otherwise assume that
+  characters take up one cell each.
+
+  None of this handles tab characters in any kind of useful way.  Sorry.
+  """
+
+  ## If there's no encoding for stdout then we're doing something stupid.
+  if SYS.stdout.encoding is None: return len(s)
+
+  ## Turn the string into Unicode so we can hack on it properly.  Maybe that
+  ## won't work out, in which case fall back to being stupid.
+  try: u = s.decode(SYS.stdout.encoding)
+  except UnicodeError: return len(s)
+
+  ## Our main problem is combining characters, but we should also try to
+  ## handle wide (mostly Asian) characters, and zero-width ones.  This hack
+  ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+  w = 0
+  for ch in u:
+    cd = ord(ch)
+    if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
+          0x1160 <= cd <= 0x11ff: pass
+    elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
+    else: w += 1
+
+  ## Done.
+  #print ';; %r -> %d' % (s, w)
+  return w
+
+class StatusLine (object):
+  """
+  Maintains a status line containing ephemeral progress information.
+
+  The status line isn't especially important, but it keeps interactive users
+  amused.
+
+  There should be only one status line object in your program; otherwise
+  they'll interfere with each other and get confused.
+
+  The update algorithm (in `set') is fairly careful to do the right thing
+  with long status `lines', and to work properly in an Emacs `shell' buffer.
+  """
+
+  def __init__(me):
+    "Initialize the status line."
+    me._last = ''
+    me._lastlen = 0
+    me.eyecandyp = OS.isatty(SYS.stdout.fileno())
+
+  def set(me, line):
+    """
+    Set the status line contents to LINE, replacing what was there before.
+
+    This only produces actual output if stdout is interactive.
+    """
+    n = len(line)
+
+    ## Eyecandy update.
+    if me.eyecandyp:
+      #print
+      #print ';; new status %r' % line
+
+      ## If the old line was longer, we need to clobber its tail, so work out
+      ## what that involves.
+      if n < me._lastlen:
+        b = charwidth(me._last[n:])
+        pre = '\b'*b + ' '*b
+      else:
+        pre = ''
+
+      ## Now figure out the length of the common prefix between what we had
+      ## before and what we have now.  This reduces the amount of I/O done,
+      ## which keeps network traffic down on SSH links, and keeps down the
+      ## amount of work slow terminal emulators like Emacs have to do.
+      i = 0
+      m = min(n, me._lastlen)
+      while i < m and line[i] == me._last[i]:
+        i += 1
+
+      ## Actually do the output, all in one syscall.
+      b = charwidth(me._last[i:])
+      SYS.stdout.write(pre + '\b'*b + line[i:])
+      #print ';; => %r' % (pre + '\b'*b + line[i:])
+      SYS.stdout.flush()
+
+    ## Update our idea of what's gone on.
+    me._lastlen = n
+    me._last = line
+
+  def clear(me):
+    "Clear the status line.  Just like set('')."
+    me.set('')
+
+  def commit(me, line = None):
+    """
+    Commit the current status line, and maybe the string LINE.
+
+    If the current status line is nonempty, then commit it to the transcript.
+    If LINE is not None, then commit that to the transcript too.
+
+    After all of this, we clear the status line to get back to a clean state.
+    """
+    if me._last:
+      if me.eyecandyp:
+        SYS.stdout.write('\n')
+      else:
+        SYS.stdout.write(me._last + '\n')
+    if line is not None:
+      SYS.stdout.write(line + '\n')
+    me._lastlen = 0
+    me._last = ''
+
+STATUS = StatusLine()
+
+def filestatus(file, status):
+  return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
+
+class ProgressEyecandy (object):
+  """
+  Provide amusement while something big and complicated is happening.
+
+  This is an abstract class.  Subclasses must provide a method `progress'
+  returning a pair (CURRENT, MAX) indicating the current progress through the
+  operation.
+  """
+
+  def __init__(me, what, silentp = False):
+    """
+    Initialize a progress meter.
+
+    WHAT is a prefix string to be written before the progress eyecandy
+    itself.
+    """
+    me._what = what
+    me._silentp = silentp
+    me._spinner = 0
+    me._start = T.time()
+
+  def _fmt_time(me, t):
+    "Format T as a time, in (maybe hours) minutes and seconds."
+    s, t = t % 60, int(t/60)
+    m, h = t % 60, int(t/60)
+    if h > 0:
+      return '%d:%02d:%02d' % (h, m, s)
+    else:
+      return '%02d:%02d' % (m, s)
+
+  def show(me):
+    "Show the current level of progress."
+
+    ## If we're not showing pointless frippery, don't bother at all.
+    if not STATUS.eyecandyp:
+      return
+
+    ## Update the spinner index.
+    me._spinner = (me._spinner + 1)%4
+
+    ## Fetch the current progress information.  Note that we always fetch
+    ## both the current and maximum levels, because both might change if an
+    ## operation revises its idea of how much work needs doing.
+    cur, max = me.progress()
+
+    ## If we couldn't get progress information, display something vaguely
+    ## amusing anyway.
+    if cur is None or max is None:
+      STATUS.set('%s %c [unknown progress]' %
+                 (me._what, r'/-\|'[me._spinner]))
+      return
+
+    ## Work out -- well, guess -- the time remaining.
+    if cur:
+      t = T.time()
+      eta = me._fmt_time((t - me._start)*(max - cur)/cur)
+    else:
+      eta = '???'
+
+    ## Set the status bar.
+    n = 40*cur/max
+    STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
+               (me._what,
+                r'/-\|'[me._spinner],
+                '='*n, ' '*(40 - n),
+                100*cur/max,
+                eta))
+
+  def done(me, win = True):
+    "Show a completion notice, or a failure if WIN is false."
+    if not win:
+      STATUS.set('%s FAILED!' % me._what)
+    elif not me._silentp:
+      STATUS.set('%s done (%s)' %
+                 (me._what,
+                  me._fmt_time(T.time() - me._start)))
+    else:
+      return
+    STATUS.commit()
+
+###--------------------------------------------------------------------------
+### Timeout handling.
+
+KILLSWITCH = TH.Event()
+
+def timeout(t0, t1):
+  T.sleep(t0)
+  KILLSWITCH.set()
+  T.sleep(t1)
+  moan('dying messily due to timeout')
+  OS._exit(3)
+
+###--------------------------------------------------------------------------
+### Parsing utilities.
+
+## Allow hyphens in identifiers.
+IDCHARS = P.alphanums + '-_'
+P.Keyword.setDefaultKeywordChars(IDCHARS)
+
+## Some common kinds of tokens.
+Name = P.Word(IDCHARS)
+Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
+String = P.QuotedString('"', '\\')
+
+## Handy abbreviations for constructed parser elements.
+def K(k): return P.Keyword(k).suppress()
+def D(d): return P.Literal(d).suppress()
+##R = P.ZeroOrMore
+def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
+O = P.Optional
+
+###--------------------------------------------------------------------------
+### Format identification and conversion.
+
+class IdentificationFailure (Exception):
+  pass
+
+class FileCategory (object):
+  """
+  A FileCategory represents a class of files.
+
+  For example, it's sensible to consider audio, or image files as a
+  category.  A file category knows how to recognize member files from
+  MIME content types.
+  """
+
+  def __init__(me, name, mime_pats, ident):
+    """
+    Construct a new category.
+
+    The PATS are a list of `fnmatch' patterns to be compared with a MIME
+    type.  The IDENT is a function which produces an identification object
+    given a file's name and first-guess MIME type.  The object is passed to a
+    Format's `check' method to see whether a file needs re-encoding, and to
+    `convert' to assist with the conversion.
+
+    An identification object must have an attribute `mime' which is a set of
+    possible MIME types accumulated for the object.
+    """
+    me.name = name
+    me._mime_pats = mime_pats
+    me._ident = ident
+    CATEGORYMAP[name] = me
+
+  def identify(me, file, mime):
+    """
+    Attempt to identify FILE, given its apparent MIME type.
+
+    If identification succeeds, return an identification object which can be
+    used by associated file formats; otherwise return None.
+    """
+    for p in me._mime_pats:
+      if not FN.fnmatchcase(mime, p):
+        continue
+      try:
+        return me._ident(file, mime)
+      except IdentificationFailure:
+        pass
+    return None
+
+class BaseFormat (object):
+  """
+  A BaseFormat object represents a particular encoding and parameters.
+
+  The object can verify (the `check' method) whether a particular file
+  matches its requirements, and if necessary (`encode') re-encode a file.
+
+  Subclasses should define the following methods.
+
+  check(ID)
+          Answer whether the file identified by ID is acceptable according to
+          the receiver's parameters.
+
+  convert(MASTER, ID, TARGET)
+          Convert the file MASTER, which has been identified as ID, according
+          to the receiver's parameters, writing the output to TARGET.
+
+  Subclasses should also provide these attributes.
+
+  CATEGORY
+          A FileCategory object for the category of files that this format
+          lives within.
+
+  EXT     A file extension to be applied to encoded output files.
+
+  NAME    A user-facing name for the format.
+
+  PROPS   A parser element to parse a property definition.  It should produce
+          a pair NAME, VALUE to be stored in a dictionary.
+
+  Subclasses for different kinds of file may introduce more subclass
+  protocol.
+  """
+
+  def fixup(me, path):
+    """Post-encoding fixups."""
+    pass
+
+FORMATMAP = {}
+CATEGORYMAP = {}
+
+def defformat(name, cls):
+  "Define a format NAME using class CLS."
+  if not hasattr(cls, 'NAME'):
+    raise ValueError, 'abstract class'
+  if not hasattr(cls, 'CATEGORY'):
+    raise ValueError, 'no category'
+  FORMATMAP[name] = cls
+
+class FormatParser (P.ParserElement):
+  """
+  Parse a format specifier:
+
+  format-spec ::= string [format-properties]
+  format-properties ::= `{' format-property (`,' format-property)* `}'
+
+  The syntax of a format-property is determined by the PROPS attribute on the
+  named format and its superclasses.
+  """
+
+  ## We cache the parser elements we generate to avoid enormous consing.
+  CACHE = {}
+
+  def parseImpl(me, s, loc, actp = True):
+
+    ## Firstly, determine the format name.
+    loc, r = Name._parse(s, loc, actp)
+    fmt = r[0]
+
+    ## Look up the format class.
+    try: fcls = FORMATMAP[fmt]
+    except KeyError:
+      raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
+
+    ## Fetch the property-list parser from the cache, if possible; else
+    ## construct it.
+    try:
+      pp = me.CACHE[fmt]
+    except KeyError:
+      seen = set()
+      prop = None
+      for c in fcls.mro():
+        try: p = c.PROPS
+        except AttributeError: continue
+        if p in seen: continue
+        if prop is None: prop = p
+        else: prop |= p
+        seen.add(p)
+      if prop is None:
+        pp = me.CACHE[fmt] = None
+      else:
+        props = P.delimitedList(prop)
+        props.setParseAction(lambda s, l, t: dict(t.asList()))
+        pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
+
+    ## Parse the properties.
+    if pp is None:
+      pd = {}
+    else:
+      loc, r = pp._parse(s, loc, actp)
+      if r: pd = r[0]
+      else: pd = {}
+
+    ## Construct the format object and return it.
+    return loc, fcls(**pd)
+
+Format = FormatParser()
+
+def prop(kw, pval, tag = None):
+  if tag is None: tag = kw
+  if pval is None:
+    p = K(kw)
+    p.setParseAction(lambda s, l, t: (tag, True))
+  else:
+    p = K(kw) + D('=') + pval
+    p.setParseAction(lambda s, l, t: (tag, t[0]))
+  return p
+
+###--------------------------------------------------------------------------
+### Policies and actions.
+
+class Action (object):
+  """
+  An Action object represents a conversion action to be performed.
+
+  This class isn't intended to be instantiated directly.  It exists to define
+  some protocol common to all Action objects.
+
+  Action objects have the following attributes.
+
+  master        The name of the master (source) file.
+
+  target        The name of the target (destination) file.
+
+  PRIORITY      The priority of the action, for deciding which of two actions
+                to perform.  Higher priorities are more likely to win.
+
+  Converting an Action to a string describes the action in a simple
+  user-readable manner.  The `perform' method actually carries the action
+  out.
+  """
+
+  PRIORITY = 0
+
+  def __init__(me, master):
+    "Stash the MASTER file name for later."
+    me.master = master
+
+  def choose(me, him):
+    "Choose either ME or HIM and return one."
+    if him is None or me.PRIORITY > him.PRIORITY:
+      return me
+    else:
+      return him
+
+class CopyAction (Action):
+  """
+  An Action object for simply copying a file.
+
+  Actually we try to hardlink it first, falling back to a copy later.  This
+  is both faster and more efficient with regard to disk space.
+  """
+
+  ## Copying is good.  Linking is really good, but we can't tell the
+  ## difference at this stage.
+  PRIORITY = 10
+
+  def __init__(me, master, targetdir):
+    "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
+    Action.__init__(me, master)
+    me.target = OS.path.join(targetdir, OS.path.basename(master))
+
+  def __str__(me):
+    return 'copy/link'
+
+  def perform(me):
+    "Actually perform a CopyAction."
+    try:
+      STATUS.set(filestatus(me.master, 'link'))
+      OS.link(me.master, me.target)
+    except OSError, err:
+      if err.errno != E.EXDEV:
+        raise
+      STATUS.set(filestatus(me.master, 'copy'))
+      new = me.target + '.new'
+      SH.copyfile(me.master, new)
+      OS.rename(new, me.target)
+    STATUS.commit()
+
+class ConvertAction (Action):
+  """
+  An Action object for converting a file to a given format.
+
+  Additional attributes:
+
+  id            The identification object for the master file.
+
+  format        The format to which we're meant to conver the master.
+  """
+
+  def __init__(me, master, targetdir, id, format):
+    "Initialize a ConvertAction."
+    Action.__init__(me, master)
+    stem, ext = OS.path.splitext(OS.path.basename(master))
+    me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
+    me.id = id
+    me.format = format
+
+  def __str__(me):
+    return 'convert to %s' % me.format.NAME
+
+  def perform(me):
+    "Acually perform a ConvertAction."
+    STATUS.set(filestatus(me.master, me))
+    me.format.convert(me.master, me.id, me.target)
+
+Policy = P.Forward()
+
+class FormatPolicy (object):
+  """
+  A FormatPolicy object represents a set of rules for how to convert files.
+
+  Given a master file, the FormatPolicy will identify it and return a list of
+  actions to be performed.  The methods required of a FormatPolicy are:
+
+  setcategory(CAT)
+          Store CAT as the policy's category.  Check that this is consistent
+          with the policy as stored.
+
+  actions(MASTER, TARGETDIR, ID, COHORT)
+          Given a MASTER file, identified as ID, a target directory
+          TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
+          of the same category in the same directory, return a list of
+          actions to be performed to get the target directory into the right
+          form.  The list might be empty if the policy object /rejects/ the
+          file.
+  """
+
+class AndPolicy (FormatPolicy):
+  """
+  A FormatPolicy which does the union of a bunch of other policies.
+
+  Each subsidiary policy is invoked in turn.  The highest-priority action for
+  each target file is returned.
+  """
+
+  def __init__(me, policies):
+    me._policies = policies
+
+  def setcategory(me, cat):
+    me.cat = cat
+    for p in me._policies:
+      p.setcategory(cat)
+
+  def actions(me, master, targetdir, id, cohort):
+    tmap = {}
+    for p in me._policies:
+      for a in p.actions(master, targetdir, id, cohort):
+        if a.target in tmap:
+          tmap[a.target] = a.choose(tmap.get(a.target))
+        else:
+          tmap[a.target] = a
+    return tmap.values()
+
+And = K('and') - D('{') - R(Policy) - D('}')
+And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
+
+class OrPolicy (FormatPolicy):
+  """
+  A FormatPolicy which tries other policies and uses the first that accepts.
+
+  Each subsidiary policy is invoked in turn.  If any accepts, the actions it
+  proposes are turned and no further policies are invoked.  If none accepts
+  then the file is rejected.
+  """
+
+  def __init__(me, policies):
+    me._policies = policies
+
+  def setcategory(me, cat):
+    me.cat = cat
+    for p in me._policies:
+      p.setcategory(cat)
+
+  def actions(me, master, targetdir, id, cohort):
+    for p in me._policies:
+      aa = p.actions(master, targetdir, id, cohort)
+      if aa:
+        return aa
+    else:
+      return []
+
+Or = K('or') - D('{') - R(Policy) - D('}')
+Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
+
+class AcceptPolicy (FormatPolicy):
+  """
+  A FormatPolicy which copies files in a particular format.
+
+  If all of the files in a cohort are recognized as being in a particular
+  format (including this one), then accept it with a CopyAction; otherwise
+  reject.
+  """
+
+  def __init__(me, format):
+    me._format = format
+
+  def setcategory(me, cat):
+    if me._format.CATEGORY is not cat:
+      raise ValueError, \
+            "Accept format `%s' has category `%s', not `%s'" % \
+            (me._format.__class__.__name__,
+             me._format.CATEGORY.name, cat.name)
+    me.cat = cat
+
+  def actions(me, master, targetdir, id, cohort):
+    if me._format.check(id) and \
+       all(me._format.check(cid) for f, cid in cohort):
+      return [CopyAction(master, targetdir)]
+    else:
+      return []
+
+Accept = K('accept') - Format
+Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
+
+class ConvertPolicy (FormatPolicy):
+  """
+  A FormatPolicy which copies files in a particular format or converts if
+  necessary.
+  """
+  def __init__(me, format):
+    me._format = format
+
+  def setcategory(me, cat):
+    if me._format.CATEGORY is not cat:
+      raise ValueError, \
+            "Accept format `%s' has category `%s', not `%s'" % \
+            (me._format.__class__.__name__,
+             me._format.CATEGORY.name, cat.name)
+    me.cat = cat
+
+  def actions(me, master, targetdir, id, cohort):
+    if me._format.check(id):
+      return [CopyAction(master, targetdir)]
+    else:
+      return [ConvertAction(master, targetdir, id, me._format)]
+
+Convert = K('convert') - Format
+Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
+
+Policy << (And | Or | Accept | Convert)
+
+###--------------------------------------------------------------------------
+### Audio handling, based on GStreamer.
+
+def make_element(factory, name = None, **props):
+  "Return a new element from the FACTORY with the given NAME and PROPS."
+  elt = GS.element_factory_make(factory, name)
+  elt.set_properties(**props)
+  return elt
+
+class GStreamerProgressEyecandy (ProgressEyecandy):
+  """
+  Provide amusement while GStreamer is busy doing something.
+
+  The GStreamerProgressEyecandy object is a context manager.  Wrap it round
+  your GStreamer loop to provide progress information for an operation.
+  """
+
+  def __init__(me, what, elt, **kw):
+    """
+    Initialize a progress meter.
+
+    WHAT is a prefix string to be written before the progress eyecandy
+    itself.  ELT is a GStreamer element to interrogate to find the progress
+    information.
+    """
+    me._elt = elt
+    ProgressEyecandy.__init__(me, what, **kw)
+
+  def _update(me):
+    "Called by GLib main event loop to update the eyecandy."
+    me.show()
+    return True
+
+  def _timer(me):
+    """
+    Update the progress meter.
+
+    This is called periodically by the GLib main event-processing loop.
+    """
+    me.show()
+    return True
+
+  def progress(me):
+    "Return the current progress as a pair (CURRENT, MAX)."
+
+    ## Fetch the current progress information.  We get the duration each
+    ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
+    ## duration can change as we progress.  Hopefully it settles down fairly
+    ## soon.
+    try:
+      t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
+      end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
+      return t, end
+    except GS.QueryError:
+      return None, None
+
+  def __enter__(me):
+    "Enter context: attach progress meter display."
+
+    ## If we're not showing pointless frippery, don't bother at all.
+    if not STATUS.eyecandyp:
+      return
+
+    ## Update regularly.  The pipeline runs asynchronously.
+    me._id = G.timeout_add(200, me._update)
+
+  def __exit__(me, ty, val, tb):
+    "Leave context: remove display and report completion or failure."
+
+    ## If we're not showing pointless frippery, there's nothing to remove.
+    if STATUS.eyecandyp:
+      G.source_remove(me._id)
+
+    ## Report completion anyway.
+    me.done(ty is None)
+
+    ## As you were.
+    return False
+
+class AudioIdentifier (object):
+  """
+  Analyses and identifies an audio file.
+
+  Important properties are:
+
+  cap     A capabilities structure describing the audio file data.  The most
+          interesting thing in here is probably its name, which is a MIME
+          type describing the data.
+
+  dcap    A capabilities structure describing the decoded audio data.  This
+          is of interest during conversion.
+
+  tags    A dictionary containing metadata tags from the file.  These are in
+          GStreamer's encoding-independent format.
+
+  bitrate An approximation to the stream's bitrate, in kilobits per second.
+          This might be slow to work out for some files so it's computed on
+          demand.
+  """
+
+  def __init__(me, file, mime):
+    "Initialize the object suitably for identifying FILE."
+
+    ## Make some initial GStreamer objects.  We'll want the pipeline later if
+    ## we need to analyse a poorly tagged MP3 stream, so save it away.
+    me._pipe = GS.Pipeline()
+    me._file = file
+    bus = me._pipe.get_bus()
+    bus.add_signal_watch()
+    loop = G.MainLoop()
+
+    ## The basic recognition kit is based around `decodebin'.  We must keep
+    ## it happy by giving it sinks for the streams it's found, which it
+    ## announces asynchronously.
+    source = make_element('filesrc', 'file', location = file)
+    decoder = make_element('decodebin', 'decode')
+    sink = make_element('fakesink')
+    def decoder_pad_arrived(elt, pad):
+      if pad.get_caps()[0].get_name().startswith('audio/'):
+        elt.link_pads(pad.get_name(), sink, 'sink')
+    dpaid = decoder.connect('pad-added', decoder_pad_arrived)
+    me._pipe.add(source, decoder, sink)
+    GS.element_link_many(source, decoder)
+
+    ## Arrange to collect tags from the pipeline's bus as they're reported.
+    ## If we reuse the pipeline later, we'll want different bus-message
+    ## handling, so make sure we can take the signal handler away.
+    tags = {}
+    fail = []
+    def bus_message(bus, msg):
+      if msg.type == GS.MESSAGE_ERROR:
+        fail[:] = (ValueError, msg.structure['debug'], None)
+        loop.quit()
+      elif msg.type == GS.MESSAGE_STATE_CHANGED:
+        if msg.structure['new-state'] == GS.STATE_PAUSED and \
+               msg.src == me._pipe:
+          loop.quit()
+      elif msg.type == GS.MESSAGE_TAG:
+        tags.update(msg.structure)
+    bmid = bus.connect('message', bus_message)
+
+    ## We want to identify the kind of stream this is.  (Hmm.  The MIME type
+    ## recognizer has already done this work, but GStreamer is probably more
+    ## reliable.)  The `decodebin' has a `typefind' element inside which will
+    ## announce the identified media type.  All we need to do is find it and
+    ## attach a signal handler.  (Note that the handler might be run in the
+    ## thread context of the pipeline element, but Python's GIL will keep
+    ## things from being too awful.)
+    me.cap = None
+    me.dcap = None
+    for e in decoder.elements():
+      if e.get_factory().get_name() == 'typefind':
+        tfelt = e
+        break
+    else:
+      assert False, 'failed to find typefind element'
+
+    ## Crank up most of the heavy machinery.  The message handler will stop
+    ## the loop when things seem to be sufficiently well underway.
+    me._pipe.set_state(GS.STATE_PAUSED)
+    loop.run()
+    bus.disconnect(bmid)
+    decoder.disconnect(dpaid)
+    if fail:
+      me._pipe.set_state(GS.STATE_NULL)
+      raise fail[0], fail[1], fail[2]
+
+    ## Store the collected tags.
+    me.tags = tags
+
+    ## Gather the capabilities.  The `typefind' element knows the input data
+    ## type.  The 'decodebin' knows the raw data type.
+    me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
+    me.mime = set([mime, me.cap.get_name()])
+    me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
+
+    ## If we found a plausible bitrate then stash it.  Otherwise note that we
+    ## failed.  If anybody asks then we'll work it out then.
+    if 'nominal-bitrate' in tags:
+      me._bitrate = tags['nominal-bitrate']/1000
+    elif 'bitrate' in tags and tags['bitrate'] >= 80000:
+      me._bitrate = tags['bitrate']/1000
+    else:
+      me._bitrate = None
+
+    ## The bitrate computation wants the file size.  Ideally we'd want the
+    ## total size of the frames' contents, but that seems hard to dredge
+    ## out.  If the framing overhead is small, this should be close enough
+    ## for our purposes.
+    me._bytes = OS.stat(file).st_size
+
+  def __del__(me):
+    "Close the pipeline down so we don't leak file descriptors."
+    me._pipe.set_state(GS.STATE_NULL)
+
+  @property
+  def bitrate(me):
+    """
+    Return the approximate bit-rate of the input file.
+
+    This might take a while if we have to work it out the hard way.
+    """
+
+    ## If we already know the answer then just return it.
+    if me._bitrate is not None:
+      return me._bitrate
+
+    ## Make up a new main loop.
+    loop = G.MainLoop()
+
+    ## Watch for bus messages.  We'll stop when we reach the end of the
+    ## stream: then we'll have a clear idea of how long the track was.
+    fail = []
+    def bus_message(bus, msg):
+      if msg.type == GS.MESSAGE_ERROR:
+        fail[:] = (ValueError, msg.structure['debug'], None)
+        loop.quit()
+      elif msg.type == GS.MESSAGE_EOS:
+        loop.quit()
+    bus = me._pipe.get_bus()
+    bmid = bus.connect('message', bus_message)
+
+    ## Get everything moving, and keep the user amused while we work.
+    me._pipe.set_state(GS.STATE_PLAYING)
+    with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
+                                   me._pipe,
+                                   silentp = True):
+      loop.run()
+    bus.disconnect(bmid)
+    if fail:
+      me._pipe.set_state(GS.STATE_NULL)
+      raise fail[0], fail[1], fail[2]
+
+    ## Now we should be able to find out our position accurately and work out
+    ## a bitrate.  Cache it in case anybody asks again.
+    t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
+    me._bitrate = int(8*me._bytes*1e6/t)
+
+    ## Done.
+    return me._bitrate
+
+class AudioFormat (BaseFormat):
+  """
+  An AudioFormat is a kind of Format specialized for audio files.
+
+  Format checks are done on an AudioIdentifier object.
+  """
+
+  PROPS = prop('bitrate', Num)
+
+  ## libmagic reports `application/ogg' for Ogg Vorbis files.  We've switched
+  ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
+  ## depending on how thorough it's trying to be.  Still, it doesn't do any
+  ## harm here; the main risk is picking up Ogg Theora files by accident, and
+  ## we'll probably be able to extract the audio from them anyway.
+  CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
+                          AudioIdentifier)
+
+  def __init__(me, bitrate = None):
+    "Construct an object, requiring an approximate bitrate."
+    me.bitrate = bitrate
+
+  def check(me, id):
+    """
+    Return whether the AudioIdentifier ID is suitable for our purposes.
+
+    Subclasses can either override this method or provide a property
+    `MIMETYPES', which is a list (other thing that implements `__contains__')
+    of GStreamer MIME types matching this format.
+    """
+    return id.mime & me.MIMETYPES and \
+           (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
+
+  def encoder(me):
+    """
+    Constructs a GStreamer element to encode audio input.
+
+    Subclasses can either override this method (or replace `encode'
+    entirely), or provide a method `encoder_chain' which returns a list of
+    elements to be linked together in sequence.  The first element in the
+    chain must have a pad named `sink' and the last must have a pad named
+    `src'.
+    """
+    elts = me.encoder_chain()
+    bin = GS.Bin()
+    bin.add(*elts)
+    GS.element_link_many(*elts)
+    bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
+    bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
+    return bin
+
+  def convert(me, master, id, target):
+    """
+    Encode audio from MASTER, already identified as ID, writing it to TARGET.
+
+    See `encoder' for subclasses' responsibilities.
+    """
+
+    ## Construct the necessary equipment.
+    pipe = GS.Pipeline()
+    bus = pipe.get_bus()
+    bus.add_signal_watch()
+    loop = G.MainLoop()
+
+    ## Make sure that there isn't anything in the way of our output.  We're
+    ## going to write to a scratch file so that we don't get confused by
+    ## half-written rubbish left by a crashed program.
+    new = target + '.new'
+    try:
+      OS.unlink(new)
+    except OSError, err:
+      if err.errno != E.ENOENT:
+        raise
+
+    ## Piece together our pipeline.  The annoying part is that the
+    ## `decodebin' doesn't have any source pads yet, so our chain is in two
+    ## halves for now.
+    source = make_element('filesrc', 'source', location = master)
+    decoder = make_element('decodebin', 'decode')
+    convert = make_element('audioconvert', 'convert')
+    encoder = me.encoder()
+    sink = make_element('filesink', 'sink', location = new)
+    pipe.add(source, decoder, convert, encoder, sink)
+    GS.element_link_many(source, decoder)
+    GS.element_link_many(convert, encoder, sink)
+
+    ## Some decoders (e.g., the AC3 decoder) include channel-position
+    ## indicators in their output caps.  The Vorbis encoder interferes with
+    ## this, and you end up with a beautifully encoded mono signal from a
+    ## stereo source.  From a quick butchers at the `vorbisenc' source, I
+    ## /think/ that this is only a problem with stereo signals: mono signals
+    ## are mono already, and `vorbisenc' accepts channel positions if there
+    ## are more than two channels.
+    ##
+    ## So we have this bodge.  We already collected the decoded audio caps
+    ## during identification.  So if we see 2-channel audio with channel
+    ## positions, we strip the positions off forcibly by adding a filter.
+    if id.dcap.get_name().startswith('audio/x-raw-') and \
+       id.dcap.has_field('channels') and \
+       id.dcap['channels'] == 2 and \
+       id.dcap.has_field('channel-positions'):
+      dcap = GS.Caps()
+      c = id.dcap.copy()
+      c.remove_field('channel-positions')
+      dcap.append(c)
+    else:
+      dcap = None
+
+    ## Hook onto the `decodebin' so we can link together the two halves of
+    ## our encoding chain.  For now, we'll hope that there's only one audio
+    ## stream in there, and just throw everything else away.
+    def decoder_pad_arrived(elt, pad):
+      if pad.get_caps()[0].get_name().startswith('audio/'):
+        if dcap:
+          elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
+        else:
+          elt.link_pads(pad.get_name(), convert, 'sink')
+    decoder.connect('pad-added', decoder_pad_arrived)
+
+    ## Watch the bus for completion messages.
+    fail = []
+    def bus_message(bus, msg):
+      if msg.type == GS.MESSAGE_ERROR:
+        fail[:] = (ValueError, msg.structure['debug'], None)
+        loop.quit()
+      elif msg.type == GS.MESSAGE_EOS:
+        loop.quit()
+    bmid = bus.connect('message', bus_message)
+
+    ## Get everything ready and let it go.
+    pipe.set_state(GS.STATE_PLAYING)
+    with GStreamerProgressEyecandy(filestatus(master,
+                                              'convert to %s' % me.NAME),
+                                   pipe):
+      loop.run()
+    pipe.set_state(GS.STATE_NULL)
+    if fail:
+      raise fail[0], fail[1], fail[2]
+
+    ## Fix up the output file if we have to.
+    me.fixup(new)
+
+    ## We're done.
+    OS.rename(new, target)
+
+class OggVorbisFormat (AudioFormat):
+  "AudioFormat object for Ogg Vorbis."
+
+  ## From http://en.wikipedia.org/wiki/Vorbis
+  QMAP = [(-1,  45), ( 0,  64), ( 1,  80), ( 2,  96),
+          ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
+          ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
+
+  NAME = 'Ogg Vorbis'
+  MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
+                   'audio/x-vorbis+ogg'])
+  EXT = 'ogg'
+
+  def encoder_chain(me):
+    for q, br in me.QMAP:
+      if br >= me.bitrate:
+        break
+    else:
+      raise ValueError, 'no suitable quality setting found'
+    return [make_element('vorbisenc',
+                         quality = q/10.0),
+            make_element('oggmux')]
+
+defformat('ogg-vorbis', OggVorbisFormat)
+
+class MP3Format (AudioFormat):
+  "AudioFormat object for MP3."
+
+  NAME = 'MP3'
+  MIMETYPES = set(['audio/mpeg'])
+  EXT = 'mp3'
+
+  def encoder_chain(me):
+    return [make_element('lame',
+                         vbr_mean_bitrate = me.bitrate,
+                         vbr = 4),
+            make_element('xingmux'),
+            make_element('id3v2mux')]
+
+  def fixup(me, path):
+    """
+    Fix up MP3 files.
+
+    GStreamer produces ID3v2 tags, but not ID3v1.  This seems unnecessarily
+    unkind to stupid players.
+    """
+    tag = E3.Tag()
+    tag.link(path)
+    tag.setTextEncoding(E3.UTF_8_ENCODING)
+    try:
+      tag.update(E3.ID3_V1_1)
+    except (UnicodeEncodeError, E3.tag.GenreException):
+      pass
+
+defformat('mp3', MP3Format)
+
+###--------------------------------------------------------------------------
+### Image handling, based on the Python Imaging Library.
+
+class ImageIdentifier (object):
+  """
+  Analyses and identifies an image file.
+
+  Simply leaves an Image object in the `img' property which can be inspected.
+  """
+
+  def __init__(me, file, mime):
+
+    ## Get PIL to open the file.  It will magically work out what kind of
+    ## file it is.
+    try:
+      me.img = I.open(file)
+    except IOError, exc:
+
+      ## Unhelpful thing to raise on identification failure.  We can
+      ## distinguish this from an actual I/O error because it doesn't have an
+      ## `errno'.
+      if exc.errno is None:
+        raise IdentificationFailure
+      raise
+
+    me.mime = set([mime])
+
+class ImageFormat (BaseFormat):
+  """
+  An ImageFormat is a kind of Format specialized for image files.
+
+  Subclasses don't need to provide anything other than the properties
+  required by all concrete Format subclasses.  However, there is a
+  requirement that the `NAME' property match PIL's `format' name for the
+  format.
+  """
+
+  PROPS = prop('size', Num)
+  CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
+
+  def __init__(me, size = None, **kw):
+    """
+    Initialize an ImageFormat object.
+
+    Additional keywords are used when encoding, and may be recognized by
+    enhanced `check' methods in subclasses.
+    """
+    me._size = size
+    me._props = kw
+
+  def check(me, id):
+    "Check whether the ImageIdentifier ID matches our requirements."
+    return id.img.format == me.NAME and \
+           (me._size is None or
+            (id.img.size[0] <= me._size and
+             id.img.size[1] <= me._size))
+
+  def convert(me, master, id, target):
+    "Encode the file MASTER, identified as ID, writing the result to TARGET."
+
+    ## Write to a scratch file.
+    new = target + '.new'
+
+    ## The ImageIdentifier already contains a copy of the open file.  It
+    ## would be wasteful not to use it.
+    img = id.img
+    STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
+
+    ## If there's a stated maximum size then scale the image down to match.
+    ## But thumbnailing clobbers the original, so take a copy.
+    if me._size is not None and \
+           (img.size[0] > me._size or img.size[1] > me._size):
+      img = img.copy()
+      img.thumbnail((me._size, me._size), I.ANTIALIAS)
+
+    ## Write the output image.
+    img.save(new, me.NAME, **me._props)
+
+    ## Fix it up if necessary.
+    me.fixup(new)
+
+    ## We're done.
+    OS.rename(new, target)
+    STATUS.commit()
+
+class JPEGFormat (ImageFormat):
+  """
+  Image format for JPEG (actually JFIF) files.
+
+  Interesting properties to set:
+
+  optimize
+          If present, take a second pass to select optimal encoder settings.
+
+  progression
+          If present, make a progressive file.
+
+  quality Integer from 1--100 (worst to best); default is 75.
+  """
+  EXT = 'jpg'
+  NAME = 'JPEG'
+  PROPS = prop('optimize', None) \
+    | prop('progressive', None, 'progression') \
+    | prop('quality', Num)
+
+defformat('jpeg', JPEGFormat)
+
+class PNGFormat (ImageFormat):
+  """
+  Image format for PNG files.
+
+  Interesting properties:
+
+  optimize
+          If present, make a special effort to minimize the output file.
+  """
+  EXT = 'png'
+  NAME = 'PNG'
+  PROPS = prop('optimize', None)
+
+defformat('png', PNGFormat)
+
+class BMPFormat (ImageFormat):
+  """
+  Image format for Windows BMP files, as used by RockBox.
+
+  No additional properties.
+  """
+  NAME = 'BMP'
+  EXT = 'bmp'
+
+defformat('bmp', BMPFormat)
+
+###--------------------------------------------------------------------------
+### The directory grobbler.
+
+class Grobbler (object):
+  """
+  The directory grobbler copies a directory tree, converting files.
+  """
+
+  def __init__(me, policies, noact = False):
+    """
+    Create a new Grobbler, working with the given POLICIES.
+    """
+    me._pmap = {}
+    me._noact = noact
+    for p in policies:
+      me._pmap.setdefault(p.cat, []).append(p)
+    me._dirs = []
+
+  def _grobble_file(me, master, targetdir, cohorts):
+    """
+    Convert MASTER, writing the result to TARGETDIR.
+
+    The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is a
+    list of (FILENAME, ID) pairs.
+
+    Since this function might convert the MASTER file, the caller doesn't
+    know the name of the output files, so we return then as a list.
+    """
+
+    done = set()
+    st_m = OS.stat(master)
+
+    ## Work through each category listed and apply its policy.
+    for cat, id, cohort in cohorts:
+
+      ## Go through the category's policies and see if any match.  If we fail
+      ## here, see if there are more categories to try.
+      for pol in me._pmap[cat]:
+        acts = pol.actions(master, targetdir, id, cohort)
+        if acts: break
+      else:
+        continue
+
+      ## Work through the targets one by one.
+      for a in acts:
+        done.add(a.target)
+
+        ## Find out whether the target file already exists and is up-to-date
+        ## with respect to the master.  (Caution here with low-resolution
+        ## timestamps.)  If it's OK, then just move on.
+        try:
+          st_t = OS.stat(a.target)
+          if st_m.st_mtime < st_t.st_mtime or \
+                 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
+            continue
+        except OSError, err:
+          if err.errno not in (E.ENOENT, E.ENOTDIR):
+            raise
+
+        ## We have real work to do.  If there's a current status message,
+        ## it's the containing directory so flush it so that people know
+        ## where we are.
+        STATUS.commit()
+
+        ## Remove the target.  (A hardlink will fail if the target already
+        ## exists.)
+        if not me._noact:
+          try:
+            OS.unlink(a.target)
+          except OSError, err:
+            if err.errno not in (E.ENOENT, E.ENOTDIR):
+              raise
+
+        ## Do whatever it is we decided to do.
+        if me._noact:
+          STATUS.commit(filestatus(master, a))
+        else:
+          a.perform()
+
+    ## We're done.  Return the names of the targets.
+    return list(done)
+
+  @contextmanager
+  def _wrap(me, masterfile):
+    """
+    Handle exceptions found while trying to convert a particular file or
+    directory.
+    """
+
+    try:
+      yield masterfile
+
+    ## Something bad happened.  Report the error, but continue.  (This list
+    ## of exceptions needs a lot of work.)
+    except (IOError, OSError), exc:
+      STATUS.clear()
+      STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
+      me._broken.append((masterfile, exc))
+
+  def _grobble_dir(me, master, target):
+    """
+    Recursively convert files in MASTER, writing them to TARGET.
+    """
+
+    ## Make sure the TARGET exists and is a directory.  It's a fundamental
+    ## assumption of this program that the entire TARGET tree is disposable,
+    ## so if something exists but isn't a directory, we should kill it.
+    if OS.path.isdir(target):
+      pass
+    else:
+      if OS.path.exists(target):
+        STATUS.commit(filestatus(target, 'clear nondirectory'))
+        if not me._noact:
+          OS.unlink(target)
+      STATUS.commit(filestatus(target, 'create directory'))
+      if not me._noact:
+        OS.mkdir(target)
+
+    ## Keep a list of things in the target.  As we convert files, we'll check
+    ## them off.  Anything left over is rubbish and needs to be deleted.
+    checklist = {}
+    try:
+      for i in OS.listdir(target):
+        checklist[i] = False
+    except OSError, err:
+      if err.errno not in (E.ENOENT, E.ENOTDIR):
+        raise
+
+    ## Keep track of the files in each category.
+    catmap = {}
+    todo = []
+    done = []
+
+    ## Work through the master files.
+    for f in sorted(OS.listdir(master)):
+
+      ## If the killswitch has been pulled then stop.  The whole idea is that
+      ## we want to cause a clean shutdown if possible, so we don't want to
+      ## do it in the middle of encoding because the encoding effort will
+      ## have been wasted.  This is the only place we need to check.  If
+      ## we've exited the loop, then clearing old files will probably be
+      ## fast, and we'll either end up here when the recursive call returns
+      ## or we'll be in the same boat as before, clearing old files, only up
+      ## a level.  If worst comes to worst, we'll be killed forcibly
+      ## somewhere inside `SH.rmtree', and that can continue where it left
+      ## off.
+      if KILLSWITCH.is_set():
+        return
+
+      ## Do something with the file.
+      with me._wrap(OS.path.join(master, f)) as masterfile:
+
+        ## If it's a directory then grobble it recursively.  Keep the user
+        ## amused by telling him where we are in the tree.
+        if OS.path.isdir(masterfile):
+          me._dirs.append(f)
+          STATUS.set('/'.join(me._dirs))
+          try:
+            done += me._grobble_dir(masterfile, OS.path.join(target, f))
+          finally:
+            me._dirs.pop()
+            STATUS.set('/'.join(me._dirs))
+
+        ## Otherwise it's a file.  Work out what kind, and stash it under
+        ## the appropriate categories.  Later, we'll apply policy to the
+        ## files, by category, and work out what to do with them all.
+        else:
+          gf = GIO.File(masterfile)
+          mime = gf.query_info('standard::content-type').get_content_type()
+          cats = []
+          for cat in me._pmap.iterkeys():
+            id = cat.identify(masterfile, mime)
+            if id is None: continue
+            catmap.setdefault(cat, []).append((masterfile, id))
+            cats.append((cat, id))
+          if not cats:
+            catmap.setdefault(None, []).append((masterfile, id))
+          todo.append((masterfile, cats))
+
+    ## Work through the categorized files to see what actions to do for
+    ## them.
+    for masterfile, cats in todo:
+      with me._wrap(masterfile):
+        done += me._grobble_file(masterfile, target,
+                                 [(cat, id, catmap[cat])
+                                  for cat, id in cats])
+
+    ## Check the results off the list so that we don't clear it later.
+    for f in done:
+      checklist[OS.path.basename(f)] = True
+
+    ## Maybe there's stuff in the target which isn't accounted for.  Delete
+    ## it: either the master has changed, or the policy for this target has
+    ## changed.  Either way, the old files aren't wanted.
+    for f in checklist:
+      if not checklist[f]:
+        STATUS.commit(filestatus(f, 'clear bogus file'))
+        if not me._noact:
+          bogus = OS.path.join(target, f)
+          try:
+            if OS.path.isdir(bogus):
+              SH.rmtree(bogus)
+            else:
+              OS.unlink(bogus)
+          except OSError, err:
+            if err.errno != E.ENOENT:
+              raise
+
+    ## Return the target name, so that it can be checked off.
+    return [target]
+
+  def grobble(me, master, target):
+    """
+    Convert MASTER, writing a directory tree TARGET.
+
+    Returns a list of files which couldn't be converted.
+    """
+    try:
+      me._broken = []
+      me._grobble_dir(master, target)
+      return me._broken
+    finally:
+      del me._broken
+
+###--------------------------------------------------------------------------
+### Remaining parsing machinery.
+
+Type = K('type') - Name - D('{') - R(Policy) - D('}')
+def build_type(s, l, t):
+  try:
+    cat = CATEGORYMAP[t[0]]
+  except KeyError:
+    raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
+  pols = t[1]
+  if len(pols) == 1: pol = pols[0]
+  else: pol = AndPolicy(pols)
+  pol.setcategory(cat)
+  return pol
+Type.setParseAction(build_type)
+
+TARGETS = []
+class TargetJob (object):
+  def __init__(me, targetdir, policies):
+    me.targetdir = targetdir
+    me.policies = policies
+  def perform(me):
+    TARGETS.append(me)
+
+Target = K('target') - String - D('{') - R(Type) - D('}')
+def build_target(s, l, t):
+  return TargetJob(t[0], t[1])
+Target.setParseAction(build_target)
+
+VARS = { 'master': None }
+class VarsJob (object):
+  def __init__(me, vars):
+    me.vars = vars
+  def perform(me):
+    for k, v in me.vars:
+      VARS[k] = v
+
+Var = prop('master', String)
+Vars = K('vars') - D('{') - R(Var) - D('}')
+def build_vars(s, l, t):
+  return VarsJob(t[0])
+Vars.setParseAction(build_vars)
+
+TopLevel = Vars | Target
+Config = R(TopLevel)
+Config.ignore(P.pythonStyleComment)
+
+###--------------------------------------------------------------------------
+### Command-line interface.
+
+QUIS = OS.path.basename(SYS.argv[0])
+
+def moan(msg):
+  "Report a warning message to the user."
+  SYS.stderr.write('%s: %s\n' % (QUIS, msg))
+
+def die(msg):
+  "Report a fatal error message to the user."
+  moan(msg)
+  SYS.exit(1)
+
+def parse_opts(args):
+  """
+  Parse command-line arguments in ARGS.
+
+  Returns a Grobbler object and the MASTER and TARGET directories to be
+  grobbled.
+  """
+
+  ## Build the option parser object.
+  op = OP.OptionParser(prog = QUIS, version = VERSION,
+                       usage = '%prog [-t TIMEOUT] CONFIG',
+                       description = """\
+Convert a directory tree of files according to the configuration file
+CONFIG.
+""")
+
+  ## Timeout handling.
+  def cb_time(opt, ostr, arg, op):
+    m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
+    if not m:
+      raise OP.OptionValueerror, 'bad time value `%s\'' % arg
+    t, u = m.groups()
+    t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
+    setattr(op.values, opt.dest, t)
+  op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
+                dest = 'timeout',
+                help = 'stop processing nicely after SECS',
+                action = 'callback', callback = cb_time)
+  op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
+                dest = 'timeout_nasty',
+                help = 'stop processing unpleasantly after further SECS',
+                action = 'callback', callback = cb_time)
+
+  ## Other options.
+  op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
+                help = 'provide progress information')
+  op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
+                help = 'don\'t actually modify the filesystem')
+
+  ## Ready to rock.
+  op.set_defaults(formats = [], noact = False,
+                  timeout = None, timeout_nasty = 300)
+  opts, args = op.parse_args(args)
+
+  ## Check that we got the non-option arguments that we want.
+  if len(args) != 1:
+    op.error('wrong number of arguments')
+
+  ## Act on the options.
+  if opts.tty:
+    STATUS.eyecandyp = True
+  if opts.timeout is not None:
+    to = TH.Thread(target = timeout,
+                   args = (opts.timeout, opts.timeout_nasty))
+    to.daemon = True
+    to.start()
+
+  ## Parse the configuration file.
+  with open(args[0]) as conf:
+    jobs, = Config.parseFile(conf, True)
+  for j in jobs:
+    j.perform()
+
+  return opts
+
+if __name__ == '__main__':
+  opts = parse_opts(SYS.argv[1:])
+  if 'master' not in VARS:
+    die("no master directory set")
+  broken = []
+  for t in TARGETS:
+    g = Grobbler(t.policies, opts.noact)
+    b = g.grobble(VARS['master'], t.targetdir)
+    broken += b
+  if broken:
+    moan('failed to convert some files:')
+    for file, exc in broken:
+      moan('%s: %s' % (file, exc))
+    SYS.exit(1)
+
+  ## This is basically a successful completion: we did what we were asked to
+  ## do.  It seems polite to report a message, though.
+  ##
+  ## Why don't we have a nonzero exit status?  The idea would be that a
+  ## calling script would be interested that we used up all of our time, and
+  ## not attempt to convert some other directory as well.  But that doesn't
+  ## quite work.  Such a script would need to account correctly for time we
+  ## had spent even if we complete successfully.  And if the script is having
+  ## to watch the clock itself, it can do that without our help here.
+  if KILLSWITCH.is_set():
+    moan('killed by timeout')
+
+###----- That's all, folks --------------------------------------------------
diff --git a/make-multidisc-playlists/m3u-extinf b/make-multidisc-playlists/m3u-extinf

new file mode 100755 (executable)

index 0000000..46f33ff
--- /dev/null
+++ b/make-multidisc-playlists/m3u-extinf
@@ -0,0 +1,13 @@
+#! /usr/bin/python
+
+from sys import argv
+import mutagen as MG
+__import__(MG.__name__, fromlist = ['mp3', 'easyid3'])
+
+f, = argv[1:]
+
+t = MG.File(f)
+if type(t) == MG.mp3.MP3:
+  t = MG.mp3.MP3(f, ID3 = MG.easyid3.EasyID3)
+
+print (u'#EXTINF %d,%s - %s' % (-1, t['artist'][0], t['title'][0])).encode('utf-8')
diff --git a/make-multidisc-playlists/make-multidisc-playlists b/make-multidisc-playlists/make-multidisc-playlists

new file mode 100755 (executable)

index 0000000..e1e7a55
--- /dev/null
+++ b/make-multidisc-playlists/make-multidisc-playlists
@@ -0,0 +1,20 @@
+#! /bin/sh -e
+
+cd $HOME/jb
+find . -type f -name '?-??. *' -print | sed 's:/[^/]*$::' | uniq |
+while read d; do
+  (cd "$d"
+   p=$(echo "$d" | sed 's:^.*/\([^/]*\)/\([^/]*\)$:\1 - \2.m3u:')
+   updatep=nil
+   for f in ?-??.*; do
+     if [ ! "$p" -nt "$f" ]; then updatep=t; break; fi
+   done
+   case $updatep in nil) continue ;; esac
+   { echo "#EXTM3U"
+     for f in ?-??.*; do m3u-extinf "$f"; echo "$f"; done
+   } >"$p.new"
+   rm -f *.m3u
+   mv "$p".new "$p"
+   echo "$d/$p")
+done
+  
diff --git a/misc/ab-chop b/misc/ab-chop

new file mode 100755 (executable)

index 0000000..e0b8aa3
--- /dev/null
+++ b/misc/ab-chop
@@ -0,0 +1,240 @@
+#! /usr/bin/python
+###
+### A simple program for doing blind A/B audio comparisons
+###
+### (c) 2010 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+###----- Usage --------------------------------------------------------------
+###
+### The command line syntax is:
+###
+###      ab-chop INPUT CAPS OUTPUT PIPELINE...
+###
+### This means that we should read INPUT, decode it (using a GStreamer
+### `decodebin', so it should be able to handle most things you care to throw
+### at it), and then re-encode it according to each PIPELINE in turn, decode
+### /that/ again, and stash the resulting raw PCM data.  When we've finished,
+### we line up the PCM data streams side-by-side, chop them into chunks, and
+### then stitch chunks from randomly chosen streams together to make a new
+### PCM stream.  Finally, we encode that mixed-up stream as FLAC, and write
+### it to OUTPUT.  It also writes a file OUTPUT.sequence which is a list of
+### numbers indicating which pipeline each chunk of the original came from.
+###
+### The motivation is that we want to test encoder quality.  So you take a
+### reference source (as good as you can find), and use that as your INPUT.
+### You then write GStreamer pipeline fragments for the encoders you want to
+### compare; say `identity' if you want the unmodified original reference to
+### be mixed in.
+###
+### The only tricky bit is the CAPS, which is a GStreamer capabilities string
+### describing the raw PCM format to use as an intermediate representation.
+### (This is far too low-level and cumbersome for real use, but it's OK for
+### now.)  You need to say something like
+###
+###   audio/x-raw-int,width=16,rate=44100,channels=2,depth=16,
+###     endianness=1234,signed=true
+###
+### for standard CD audio.
+
+###--------------------------------------------------------------------------
+### External dependencies.
+
+## Standard Python libraries.
+import sys as SYS
+import os as OS
+import shutil as SH
+import fnmatch as FN
+import random as R
+
+SR = R.SystemRandom()
+
+## GObject and GStreamer.
+import gobject as G
+import gst as GS
+
+###--------------------------------------------------------------------------
+### GStreamer utilities.
+
+def link_on_demand(src, sink, sinkpad = None, cap = None):
+  """
+  Link SINK to SRC when a pad appears.
+
+  More precisely, when SRC reports that a pad with media type matching the
+  `fnmatch' pattern CAP has appeared, link the pad of SINK named SINKPAD (or
+  some sensible pad by default).
+  """
+  def _link(src, srcpad):
+    if cap is None or FN.fnmatchcase(srcpad.get_caps()[0].get_name(), cap):
+      src.link_pads(srcpad.get_name(), sink, sinkpad)
+  src.connect('pad-added', _link)
+
+def make_element(factory, name = None, **props):
+  """
+  Return an element made by FACTORY with properties specified by PROPS.
+  """
+  elt = GS.element_factory_make(factory, name)
+  elt.set_properties(**props)
+  return elt
+
+def dump_pipeline(pipe, indent = 0):
+  done = {}
+  q = []
+  for e in pipe.iterate_sources():
+    q = [e]
+    while q:
+      e, q = q[0], q[1:]
+      if e in done:
+        continue
+      done[e] = True
+      print
+      print '%s%s %s' % ('  '*indent, type(e).__name__, e.get_name())
+      for p in e.pads():
+        c = p.get_negotiated_caps()
+        peer = p.get_peer()
+        print '%s  Pad %s %s (%s)' % \
+              ('  '*(indent + 1),
+               p.get_name(),
+               peer and ('<-> %s.%s' % (peer.get_parent().get_name(),
+                                        peer.get_name()))
+                    or 'unconnected',
+               c and c.to_string() or 'no-negotiated-caps')
+        if peer:
+          q.append(peer.get_parent())
+        if isinstance(e, GS.Bin):
+          dump_pipeline(e, indent + 1)
+
+def run_pipe(pipe, what):
+  """
+  Run a GStreamer pipeline PIPE until it finishes.
+  """
+  loop = G.MainLoop()
+  bus = pipe.get_bus()
+  bus.add_signal_watch()
+  def _bus_message(bus, msg):
+    if msg.type == GS.MESSAGE_ERROR:
+      SYS.stderr.write('error from pipeline: %s\n' % msg)
+      SYS.exit(1)
+    elif msg.type == GS.MESSAGE_STATE_CHANGED and \
+         msg.src == pipe and \
+         msg.structure['new-state'] == GS.STATE_PAUSED:
+      dump_pipeline(pipe)
+    elif msg.type == GS.MESSAGE_EOS:
+      loop.quit()
+  bus.connect('message', _bus_message)
+
+  pipe.set_state(GS.STATE_PLAYING)
+  loop.run()
+  GS.DEBUG_BIN_TO_DOT_FILE(pipe, 3, what)
+  pipe.set_state(GS.STATE_NULL)
+
+###--------------------------------------------------------------------------
+### Main program.
+
+## Read the command line arguments.
+input = SYS.argv[1]
+caps = GS.caps_from_string(SYS.argv[2])
+output = SYS.argv[3]
+
+## We want a temporary place to keep things.  This provokes a warning, but
+## `mkdir' is atomic and sane so it's not a worry.
+tmp = OS.tmpnam()
+OS.mkdir(tmp)
+try:
+
+  ## First step: produce raw PCM files from the original source and the
+  ## requested encoders.
+  q = 0
+  temps = []
+  for i in SYS.argv[4:]:
+    temp = OS.path.join(tmp, '%d.raw' % q)
+    temps.append(temp)
+    pipe = GS.Pipeline()
+    origin = make_element('filesrc', location = input)
+    decode_1 = make_element('decodebin')
+    convert_1 = make_element('audioconvert')
+    encode = GS.parse_bin_from_description(i, True)
+    decode_2 = make_element('decodebin')
+    convert_2 = make_element('audioconvert')
+    target = make_element('filesink', location = temp)
+    pipe.add(origin, decode_1, convert_1, encode,
+             decode_2, convert_2, target)
+    origin.link(decode_1)
+    link_on_demand(decode_1, convert_1)
+    ##convert_1.link(encode, GS.caps_from_string('audio/x-raw-float, channels=2'))
+    convert_1.link(encode)
+    encode.link(decode_2)
+    link_on_demand(decode_2, convert_2)
+    convert_2.link(target, caps)
+
+    run_pipe(pipe, 'input-%d' % q)
+    del pipe
+    print 'done %s' % i
+    q += 1
+  step = 1763520
+  lens = [OS.stat(i).st_size for i in temps]
+  blocks = (max(*lens) + step - 1)//step
+  while True:
+    seq = []
+    done = {}
+    for i in xrange(blocks):
+      j = SR.randrange(q)
+      done[j] = True
+      seq.append(j)
+    ok = True
+    for i in xrange(q):
+      if i not in done:
+        ok = False
+        break
+    if ok:
+      break
+  ff = [open(i, 'rb') for i in temps]
+  mix = OS.path.join(tmp, 'mix.raw')
+  out = open(mix, 'wb')
+  pos = 0
+  for i in seq:
+    f = ff[i]
+    f.seek(pos)
+    buf = f.read(step)
+    out.write(buf)
+    if len(buf) < step:
+      break
+    pos += step
+  out.close()
+  for f in ff:
+    f.close()
+
+  f = open(output + '.sequence', 'w')
+  f.write(', '.join([str(i) for i in seq]) + '\n')
+  f.close()
+
+  pipe = GS.Pipeline()
+  origin = make_element('filesrc', location = mix)
+  convert = make_element('audioconvert')
+  encode = make_element('flacenc', quality = 8)
+  target = make_element('filesink', location = output)
+  pipe.add(origin, convert, encode, target)
+  origin.link(convert, caps)
+  GS.element_link_many(convert, encode, target)
+
+  run_pipe(pipe, 'output')
+  del pipe
+  print 'all done'
+finally:
+  SH.rmtree(tmp)
diff --git a/misc/catalogue b/misc/catalogue

new file mode 100755 (executable)

index 0000000..56f3024
--- /dev/null
+++ b/misc/catalogue
@@ -0,0 +1,60 @@
+#! /bin/sh -e
+
+case $# in 0) set -- . ;; esac
+
+parse_vorbis_comment () {
+  tag=$1
+  
+  while read value; do
+  case "$value" in
+    artist=* | album=* | musicbrainz_albumid=*)
+      label=${value%%=*}
+      eval $label=\${value#*=}
+      eval have_$label=t
+      ;;
+  esac
+  done <<TAG
+$tag
+TAG
+}
+
+find "$@" \( -name '*.flac' -o -name '*.ogg' -o -name '*.mp3' \) -print | \
+while read file; do
+  have_artist=nil
+  have_album=nil
+  have_musicbrainz_albumid=nil
+  case "$file" in
+    "$skip"/*)
+      continue
+      ;;
+    *.flac)
+      parse_vorbis_comment "$(
+       metaflac --list --block-type=VORBIS_COMMENT "$file" |
+       sed -n '/^.*comment\[[0-9]*\]: /s///p')"
+      ;;
+    *.ogg)
+      parse_vorbis_comment "$(vorbiscomment "$file")"
+      ;;
+    *.mp3)
+      tag="$(id3v2 --list "$file")"
+      while read fourcc rest; do
+       rest=${rest#*): }
+       case "$fourcc,$rest" in
+         TPE1,*) artist=$rest have_artist=t ;;
+         TALB,*) album=$rest have_album=t ;;
+         TXXX,"(MusicBrainz Album Id): "*)
+           musicbrainz_albumid=${rest#*): }
+           have_musicbrainz_albumid=t
+           ;;
+       esac
+      done <<TAG
+$tag
+TAG
+  esac
+  case $have_artist,$have_album,$have_musicbrainz_albumid in
+    t,t,t)
+      echo "$musicbrainz_albumid $artist | $album"
+      skip=${file%/*}
+      ;;
+  esac
+done
diff --git a/misc/missing-replay-gain b/misc/missing-replay-gain

new file mode 100755 (executable)

index 0000000..a1a9f1f
--- /dev/null
+++ b/misc/missing-replay-gain
@@ -0,0 +1,30 @@
+#! /bin/sh -e
+
+dir=none
+find /mnt/jb/master -type f -print | sort | while read line; do
+  ok=t
+  case "$line" in "$dir"/*/*) ;; "$dir"/*) continue ;; esac
+  case "$line" in
+    *.flac)
+      if ! metaflac --list "$line" | grep -iq replaygain; then
+       ok=nil
+      fi
+      ;;
+    *.ogg)
+      if ! vorbiscomment -l "$line" | grep -iq replaygain; then
+       ok=nil
+      fi
+      ;;
+    *.mp3)
+      if ! mp3gain -s i -s c "$line" | grep -q "mp3 gain change"; then
+       ok=nil
+      fi
+      ;;
+  esac
+  case $ok in
+    nil)
+      dir=${line%/*}
+      echo "$dir"
+      ;;
+  esac
+done
diff --git a/unsorted/diff-flacdirs b/unsorted/diff-flacdirs

new file mode 100755 (executable)

index 0000000..e5e4d20
--- /dev/null
+++ b/unsorted/diff-flacdirs
@@ -0,0 +1,5 @@
+#! /bin/bash -e
+sums () {
+  metaflac --show-md5sum "$@" | awk -F: '{ print $2, $1 }'
+}
+wdiff -n <(sums "$1"/*.flac) <(sums "$2"/*.flac) | colordiff
diff --git a/unsorted/diff-flacraw b/unsorted/diff-flacraw

new file mode 100755 (executable)

index 0000000..7c0463f
--- /dev/null
+++ b/unsorted/diff-flacraw
@@ -0,0 +1,7 @@
+#! /bin/bash -e
+dump () {
+  flac --decode --output-name=- \
+        --force-raw-format --sign=signed --endian=little \
+        "$1" | xxd
+}
+colordiff -u <(dump "$1") <(dump "$2")
author	Mark Wooding <mdw@distorted.org.uk>
	Sat, 13 Feb 2016 18:39:07 +0000 (18:39 +0000)
committer	Mark Wooding <mdw@distorted.org.uk>
	Sat, 13 Feb 2016 18:39:07 +0000 (18:39 +0000)
coverart/chkimgsz	[new file with mode: 0755]	patch \| blob
coverart/coverart	[new file with mode: 0755]	patch \| blob
flaccrip/cat-prefix	[new file with mode: 0755]	patch \| blob
flaccrip/flaccrip-arfetch	[new file with mode: 0755]	patch \| blob
flaccrip/flaccrip-check	[new file with mode: 0755]	patch \| blob
flaccrip/flaccrip-compute	[new file with mode: 0755]	patch \| blob
flaccrip/flaccrip-decode	[new file with mode: 0755]	patch \| blob
flaccrip/flaccrip-discid	[new file with mode: 0755]	patch \| blob
flaccrip/flaccrip-guessoffset	[new file with mode: 0755]	patch \| blob
flaccrip/flaccrip-offset	[new file with mode: 0755]	patch \| blob
flaccrip/flaccrip-slide	[new file with mode: 0755]	patch \| blob
flaccrip/flaccrip-toc	[new file with mode: 0755]	patch \| blob
flaccrip/flaccrip-trackoffsets	[new file with mode: 0755]	patch \| blob
flaccrip/offset-album	[new file with mode: 0755]	patch \| blob
gremlin/gremlin	[new file with mode: 0755]	patch \| blob
make-multidisc-playlists/m3u-extinf	[new file with mode: 0755]	patch \| blob
make-multidisc-playlists/make-multidisc-playlists	[new file with mode: 0755]	patch \| blob
misc/ab-chop	[new file with mode: 0755]	patch \| blob
misc/catalogue	[new file with mode: 0755]	patch \| blob
misc/missing-replay-gain	[new file with mode: 0755]	patch \| blob
unsorted/diff-flacdirs	[new file with mode: 0755]	patch \| blob
unsorted/diff-flacraw	[new file with mode: 0755]	patch \| blob