From: Mark Wooding Date: Sat, 13 Feb 2016 18:39:07 +0000 (+0000) Subject: Initial import of unaltered files. X-Git-Tag: 0.1.0-pre0~1 X-Git-Url: https://git.distorted.org.uk/~mdw/autoys/commitdiff_plain/583b7e4aa8b685ebf609cf0867a62689d5e37c96 Initial import of unaltered files. These programs have been collected from various machines with a view to turning them into something that someone else might actually be able to use. Some won't make the cut and will be deleted; others may remain as simple scripts; and some will grow proper documentation and packaging. --- 583b7e4aa8b685ebf609cf0867a62689d5e37c96 diff --git a/coverart/chkimgsz b/coverart/chkimgsz new file mode 100755 index 0000000..60b4460 --- /dev/null +++ b/coverart/chkimgsz @@ -0,0 +1,11 @@ +#! /usr/bin/python + +from PIL import Image as I +from sys import argv, stdin + +minx, miny = map(int, argv[1:]) + +for line in stdin: + if line and line[-1] == '\n': line = line[:-1] + x, y = I.open(line).size + if x < minx or y < miny: print '%s (%dx%d)' % (line, x, y) diff --git a/coverart/coverart b/coverart/coverart new file mode 100755 index 0000000..789b06d --- /dev/null +++ b/coverart/coverart @@ -0,0 +1,483 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- + +import sys as SYS +import os as OS +from cStringIO import StringIO + +import gobject as G +import gtk as GTK +GDK = GTK.gdk +import cairo as XR + +import urllib as U +import urllib2 as U2 +import json as JS + +THUMBSZ = 96 + +class ImageCache (object): + + THRESH = 128*1024*1024 + + def __init__(me): + me._total = 0 + me._first = me._last = None + + def add(me, img): + me._total += img.size + while me._first and me._total > me.THRESH: + me._first.evict() + img._prev = me._last + img._next = None + if me._last: + me._last._next = img + else: + me._first = img + me._last = img + + def rm(me, img): + if img._prev: + img._prev._next = img._next + else: + me._first = img._next + if img._next: + img._next._prev = img._prev + else: + img._last = img._prev + me._total -= img.size + +CACHE = ImageCache() + +class CacheableImage (object): + + def __init__(me): + me._pixbuf = None + me._prev = me._next = None + me._thumb = None + + @property + def pixbuf(me): + if not me._pixbuf: + me._pixbuf = me._acquire() + me.size = me._pixbuf.get_pixels_array().nbytes + CACHE.add(me) + return me._pixbuf + + def evict(me): + me._pixbuf = None + CACHE.rm(me) + + def flush(me): + me.evict() + me._thumb = None + + @property + def thumbnail(me): + if not me._thumb: + me._thumb = Thumbnail(me) + return me._thumb + +class Thumbnail (object): + + def __init__(me, img): + pix = img.pixbuf + wd, ht = pix.get_width(), pix.get_height() + m = max(wd, ht) + if m <= THUMBSZ: + me.pixbuf = pix + else: + twd, tht = [(x*THUMBSZ + m//2)//m for x in [wd, ht]] + me.pixbuf = pix.scale_simple(twd, tht, GDK.INTERP_HYPER) + +class NullImage (CacheableImage): + + MAP = {} + + def __init__(me, size, text): + CacheableImage.__init__(me) + me._size = size + me._text = text + + @staticmethod + def get(cls, size): + try: + return cls.MAP[size] + except KeyError: + img = cls.MAP[size] = cls(size) + return img + + def _acquire(me): + + surf = XR.ImageSurface(XR.FORMAT_ARGB32, me._size, me._size) + xr = XR.Context(surf) + + xr.set_source_rgb(0.3, 0.3, 0.3) + xr.paint() + + xr.move_to(me._size/2.0, me._size/2.0) + xr.select_font_face('sans-serif', + XR.FONT_SLANT_NORMAL, XR.FONT_WEIGHT_BOLD) + xb, yb, wd, ht, xa, ya = xr.text_extents(me._text) + m = max(wd, ht) + z = me._size/float(m) * 2.0/3.0 + xr.scale(z, z) + + xr.set_source_rgb(0.8, 0.8, 0.8) + xr.move_to(3.0*m/4.0 - wd/2.0 - xb, 3.0*m/4.0 - ht/2.0 - yb) + xr.show_text(me._text) + + surf.flush() + pix = GDK.pixbuf_new_from_data(surf.get_data(), + GDK.COLORSPACE_RGB, True, 8, + me._size, me._size, surf.get_stride()) + return pix + +class FileImage (CacheableImage): + + def __init__(me, file): + CacheableImage.__init__(me) + me._file = file + + def _acquire(me): + return GDK.pixbuf_new_from_file(me._file) + +def fetch_url(url): + out = StringIO() + with U.urlopen(url) as u: + while True: + stuff = u.read(16384) + if not stuff: + break + out.write(stuff) + return out.getvalue() + +def fix_background(w): + style = w.get_style().copy() + style.base[GTK.STATE_NORMAL] = BLACK + style.bg[GTK.STATE_NORMAL] = BLACK + style.text[GTK.STATE_NORMAL] = WHITE + w.set_style(style) + +class BaseCoverViewer (object): + + def __init__(me): + me.scr = GTK.ScrolledWindow() + me.scr.set_policy(GTK.POLICY_AUTOMATIC, GTK.POLICY_AUTOMATIC) + me.iv = GTK.IconView() + me.iv.connect('item-activated', + lambda iv, p: me.activate(me._frompath(p))) + me.iv.connect('selection-changed', me._select) + me.iv.set_pixbuf_column(0) + me.iv.set_text_column(1) + me.iv.set_orientation(GTK.ORIENTATION_VERTICAL) + me.iv.set_item_width(THUMBSZ + 32) + fix_background(me.iv) + me.scr.add(me.iv) + me.reset() + + def reset(me): + me.list = GTK.ListStore(GDK.Pixbuf, G.TYPE_STRING, G.TYPE_PYOBJECT) + me.iv.set_model(me.list) + me.iv.unselect_all() + + def add(me, item): + item.it = me.list.append([item.img.thumbnail.pixbuf, + item.text, + item]) + + def _frompath(me, path): + return me.list[path][2] + + def _select(me, iv): + sel = me.iv.get_selected_items() + if len(sel) != 1: + me.select(None) + else: + me.select(me._frompath(sel[0])) + +class SearchCover (object): + def __init__(me, img): + me.img = img + pix = img.pixbuf + me.text = '%d×%d*' % (pix.get_width(), pix.get_height()) + +class SearchViewer (BaseCoverViewer): + + def __init__(me, chooser): + BaseCoverViewer.__init__(me) + me._chooser = chooser + + def switch(me, current): + me.reset() + if current: + cov = SearchCover(current) + me.add(cov) + me.iv.select_path(me.list.get_path(cov.it)) + + def activate(me, cov): + me._chooser.activated(cov) + + def select(me, cov): + me._chooser.selected(cov) + +class RemoteImage (CacheableImage): + + ERRIMG = NullImage(256, '!') + + def __init__(me, url, ref = None): + CacheableImage.__init__(me) + me._url = url + me._ref = ref + me._data = None + + def _fetch(me): + if me._data: + return + d = StringIO() + rq = U2.Request(me._url) + if me._ref: + rq.add_header('Referer', me._ref) + rs = U2.urlopen(rq) + while True: + stuff = rs.read(16384) + if not stuff: + break + d.write(stuff) + me._data = d.getvalue() + ld = GDK.PixbufLoader() + try: + o = 0 + n = len(me._data) + while True: + if o >= n: + raise ValueError, 'not going to work' + l = min(n, o + 16384) + ld.write(me._data[o:l]) + o = l + f = ld.get_format() + if f: + break + me._format = f + if 'image/gif' in f['mime_types']: + raise ValueError, 'boycotting GIF image' + finally: + try: + ld.close() + except G.GError: + pass + + def _acquire(me): + try: + me._fetch() + ld = GDK.PixbufLoader() + try: + ld.write(me._data) + finally: + ld.close() + return ld.get_pixbuf() + except Exception, e: + print e + return me.ERRIMG.pixbuf + + @property + def ext(me): + exts = me._format['extensions'] + for i in ['jpg']: + if i in exts: + return i + return exts[0] + +class SearchImage (RemoteImage): + + def __init__(me, url, ref, tburl): + RemoteImage.__init__(me, url, ref) + me._tburl = tburl + + @property + def thumbnail(me): + if not me._thumb: + me._thumb = Thumbnail(RemoteImage(me._tburl)) + return me._thumb + +class SearchResult (SearchCover): + + def __init__(me, r): + w = int(r['width']) + h = int(r['height']) + url = r['unescapedUrl'] + ref = r['originalContextUrl'] + tburl = r['tbUrl'] + me.img = SearchImage(url, ref, tburl) + me.text = '%d×%d' % (w, h) + +class SearchFail (Exception): + pass + +class CoverChooser (object): + + SEARCHURL = \ + 'http://ajax.googleapis.com/ajax/services/search/images?v=1.0&rsz=8&q=' + + def __init__(me): + me.win = GTK.Window() + box = GTK.VBox() + top = GTK.HBox() + me.query = GTK.Entry() + top.pack_start(me.query, True, True, 2) + srch = GTK.Button('_Search') + srch.set_flags(GTK.CAN_DEFAULT) + srch.connect('clicked', me.search) + top.pack_start(srch, False, False, 2) + box.pack_start(top, False, False, 2) + me.sv = SearchViewer(me) + panes = GTK.HPaned() + panes.pack1(me.sv.scr, False, True) + scr = GTK.ScrolledWindow() + scr.set_policy(GTK.POLICY_AUTOMATIC, GTK.POLICY_AUTOMATIC) + me.img = GTK.Image() + evb = GTK.EventBox() + evb.add(me.img) + fix_background(evb) + scr.add_with_viewport(evb) + panes.pack2(scr, True, True) + panes.set_position(THUMBSZ + 64) + box.pack_start(panes, True, True, 0) + me.win.add(box) + me.win.connect('destroy', me.destroyed) + me.win.set_default_size(800, 550) + srch.grab_default() + + def update(me, view, which, dir, current): + me.view = view + me.dir = dir + me.which = which + me.current = current + me.img.clear() + me.sv.switch(current) + me.query.set_text(me.makequery(dir)) + me.win.show_all() + + def search(me, w): + q = me.query.get_text() + try: + try: + rq = U2.Request(me.SEARCHURL + U.quote_plus(q), + None, + { 'Referer': + 'http://www.distorted.org.uk/~mdw/coverart' }) + rs = U2.urlopen(rq) + except U2.URLError, e: + raise SearchFail(e.reason) + result = JS.load(rs) + if result['responseStatus'] != 200: + raise SearchFail('%s (status = %d)' % + (result['responseDetails'], + result['responseStatus'])) + d = result['responseData'] + me.sv.switch(me.current) + for r in d['results']: + try: + me.sv.add(SearchResult(r)) + except (U2.URLError, U2.HTTPError): + pass + except SearchFail, e: + print e.args[0] + + def makequery(me, path): + bits = path.split(OS.path.sep) + return ' '.join(['"%s"' % p for p in bits[-2:]]) + + def selected(me, cov): + if cov: + me.img.set_from_pixbuf(cov.img.pixbuf) + else: + me.img.clear() + + def activated(me, cov): + if isinstance(cov, SearchCover): + me.view.replace(me.which, cov.img) + + def destroyed(me, w): + global CHOOSER + CHOOSER = None + +CHOOSER = None + +class ViewCover (object): + + NULLIMG = NullImage(THUMBSZ, '?') + + def __init__(me, dir, path, leaf): + me.text = dir + me.path = path + me.leaf = leaf + if me.leaf: + me.img = me.covimg = FileImage(OS.path.join(me.path, me.leaf)) + else: + me.img = me.NULLIMG + me.covimg = None + +class MainViewer (BaseCoverViewer): + + ITERATTR = 'vit' + + def __init__(me, root): + BaseCoverViewer.__init__(me) + me.root = root + me.walk('') + + def walk(me, dir): + leafp = True + b = OS.path.join(me.root, dir) + imgfile = None + for l in sorted(OS.listdir(b)): + if OS.path.isdir(OS.path.join(b, l)): + leafp = False + me.walk(OS.path.join(dir, l)) + else: + base, ext = OS.path.splitext(l) + if base == 'cover' and ext in ['.jpg', '.png', '.gif']: + imgfile = l + if leafp: + me.add(ViewCover(dir, OS.path.join(me.root, dir), imgfile)) + + def select(me, cov): + pass + + def activate(me, cov): + global CHOOSER + if not CHOOSER: + CHOOSER = CoverChooser() + CHOOSER.update(me, cov, cov.text, cov.covimg) + + def replace(me, cov, img): + leaf = 'cover.%s' % img.ext + out = OS.path.join(cov.path, leaf) + new = out + '.new' + with open(new, 'wb') as f: + f.write(img._data) + OS.rename(new, out) + if cov.leaf not in [None, leaf]: + OS.unlink(OS.path.join(cov.path, cov.leaf)) + ncov = ViewCover(cov.text, cov.path, leaf) + ncov.it = cov.it + me.list[ncov.it] = [ncov.img.thumbnail.pixbuf, ncov.text, ncov] + me.activate(ncov) + +ROOT = SYS.argv[1] + +LOOP = G.MainLoop() + +BLACK = GDK.Color(0, 0, 0) +WHITE = GDK.Color(65535, 65535, 65535) + +WIN = GTK.Window() +VIEW = MainViewer(ROOT) +WIN.add(VIEW.scr) +WIN.set_default_size(814, 660) +WIN.set_title('coverart') +WIN.connect('destroy', lambda _: LOOP.quit()) +WIN.show_all() + +LOOP.run() diff --git a/flaccrip/cat-prefix b/flaccrip/cat-prefix new file mode 100755 index 0000000..c3850cc --- /dev/null +++ b/flaccrip/cat-prefix @@ -0,0 +1,78 @@ +#! /usr/bin/tcc -run +/* -*-c-*- */ + +/* Some of the scripts in the `flaccrip' suite want to chop streams of PCM + * data about. Once upon a time, they used dd(1) for this, but dd does + * entirely the wrong thing on short reads, and short reads happen quite + * regularly on pipes. + * + * The requirements on this program are that it copy exactly the first N + * bytes from stdin to stdout, without reading anything else from stdin or + * writing anything else to stdout. (That's why it doesn't use stdio to do + * the copying: stdio buffering will read too much from stdin, which will + * cause stream corruption later.) + * + * As a special bonus, it's quite paranoid about error checking, and does way + * more work on dealing with short reads and writes than is necessary for + * working on pipes. It will fail miserably if either stdin or stdout is + * non-blocking. + */ + +#include +#include +#include +#include + +#include +#include + +int main(int argc, char *argv[]) +{ + unsigned long n; + size_t nn, nleft; + ssize_t sz; + char buf[40960], *p; + + if (argc != 2) { + fprintf(stderr, "Usage: %s LENGTH\n", argv[0]); + exit(1); + } + n = strtoul(argv[1], 0, 0); + + while (n) { + nn = nleft = n > sizeof(buf) ? sizeof(buf) : n; + p = buf; + while (nleft) { + sz = read(STDIN_FILENO, p, nleft); + if (sz < 0) { + if (errno == EINTR) + continue; + fprintf(stderr, "%s: read error: %s", argv[0], strerror(errno)); + exit(1); + } else if (!sz) { + fprintf(stderr, "%s: unexpected eof", argv[0]); + exit(1); + } + nleft -= sz; p += sz; + } + + nleft = nn; + p = buf; + while (nleft) { + sz = write(STDOUT_FILENO, p, nleft); + if (sz < 0) { + if (errno == EINTR) + continue; + fprintf(stderr, "%s: write error: %s", argv[0], strerror(errno)); + exit(1); + } else if (!sz) { + fprintf(stderr, "%s: empty write", argv[0]); + exit(1); + } + nleft -= sz; p += sz; + } + + n -= nn; + } + return (0); +} diff --git a/flaccrip/flaccrip-arfetch b/flaccrip/flaccrip-arfetch new file mode 100755 index 0000000..ddb2c12 --- /dev/null +++ b/flaccrip/flaccrip-arfetch @@ -0,0 +1,13 @@ +#! /bin/bash + +set -e +: ${JBDIR=/mnt/jb} +: ${ACCURATERIP_SERVER=http://www.accuraterip.com/accuraterip} + +case $# in + 0) set -- $($JBDIR/bin/flaccrip-discid -a .) ;; + 1) ;; + *) echo >&2 "Usage: $0 ACCURATERIP-ID"; exit 1 ;; +esac +id=$(echo "$1" | sed 's:...-.....\(.\)\(.\)\(.\).*:\3/\2/\1/dBAR-&:') +curl -fsS $ACCURATERIP_SERVER/$id.bin | $JBDIR/bin/flaccrip-decode diff --git a/flaccrip/flaccrip-check b/flaccrip/flaccrip-check new file mode 100755 index 0000000..bbc093a --- /dev/null +++ b/flaccrip/flaccrip-check @@ -0,0 +1,121 @@ +#! /bin/bash + +set -e +: ${JBDIR=/mnt/jb} + +force=nil +while getopts fo:i: opt; do + case "$opt" in + o) offset=$OPTARG ;; + f) force=t ;; + i) id=$OPTARG ;; + *) exit 1 ;; + esac +done +shift $((OPTIND - 1)) + +dir=$1; shift; cd "$dir" +case $# in + 0) + set $(ls | sed ' + /^\([0-9][0-9]\)[-.\ ].*\.flac$/!d + s//\1/ + ') + ;; +esac + +case "${offset+t}" in + t) ;; + *) if [ -r .offset ]; then read offset <.offset; else offset=0; fi ;; +esac + +: ${id=$($JBDIR/bin/flaccrip-discid -a .)} +ntr=$(echo "$id" | sed 's:^0*\([1-9][0-9]*\)-.*$:\1:') +ar=$($JBDIR/bin/flaccrip-arfetch $id) +npress=0 +while read type rest; do + case "$type" in + H) npress=$((npress + 1)) ;; + *) ;; + esac +done <&2 "$0: no AccurateRip record found" + exit 1 + ;; + *) + echo "Found $npress pressings" + ;; +esac + +cks="" ntrack=$# ngood=0 +for t in "$@"; do + while :; do case "$t" in 0*) t=${t#0} ;; *) break ;; esac; done + l=$((t - 1)) h=$((t + 1)) + tt=$(printf %02d "$t") + ll=$(printf %02d "$l") + hh=$(printf %02d "$h") + flags="" + unset before after + + if ((t == 1)); then + flags="${flags+$flags }-f" + elif [ -f "$ll"[-.\ ]*.flac ]; then + before=$(echo "$ll"[-.\ ]*.flac) + elif ((offset < 0)); then + echo >&2 "$0: warning: -ve offset, but track $l missing; using silence" + fi + + if ((t == ntr)); then + flags="${flags+$flags }-l" + elif [ -f "$hh"[-.\ ]*.flac ]; then + after=$(echo "$hh"[-.\ ]*.flac) + elif ((offset > 0)); then + echo >&2 "$0: warning: +ve offset, but track $h missing; using silence" + fi + + ck=$($JBDIR/bin/flaccrip-offset -o"$offset" \ + $flags ${before+-b "$before"} ${after+-a "$after"} "$tt"[-.\ ]*.flac | + $JBDIR/bin/flaccrip-compute) + + press=0 found=nil + while read arty art arconf arck; do + case "$arty" in + H) + press=$((press + 1)) + ;; + T) + case $art,$arck in + $t,$ck) + found=t + printf "Track %2d (%s): match pressing %d; confidence %d\n" \ + $t $ck $press $arconf + ngood=$((ngood + 1)) + break + ;; + esac + ;; + esac + done < +#include +#include +#include + +#include + +int main(int argc, char *argv[]) +{ + unsigned long ck = 0, t = 0; + unsigned long ns = 0; + unsigned long x; + unsigned f = 0; +#define F_ROLLING 1u + + for (;;) { + int opt = getopt(argc, argv, "b:r"); + if (opt < 0) break; + switch (opt) { + case 'b': ns = strtoul(optarg, 0, 0); break; + case 'r': f |= F_ROLLING; break; + default: exit(1); + } + } + for (;;) { + unsigned char b[4]; + if (!fread(b, 4, 1, stdin)) break; + x = (b[0] << 0) | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); + ck += x*++ns; t += x; + } + if (ferror(stdin)) { + fprintf(stderr, "%s: read error: %s\n", argv[0], strerror(errno)); + exit(1); + } + printf("%08lx", ck & 0xffffffff); + if (f & F_ROLLING) printf(" %lu", t & 0xffffffff); + putchar('\n'); + return (0); +} diff --git a/flaccrip/flaccrip-decode b/flaccrip/flaccrip-decode new file mode 100755 index 0000000..bf29f34 --- /dev/null +++ b/flaccrip/flaccrip-decode @@ -0,0 +1,16 @@ +#! /usr/bin/python + +from struct import unpack +from sys import stdin + +p = 0 +while True: + h = stdin.read(13) + if h == '': + break + ntr, da, db, cid = unpack('&2 "Usage: $0 [-acCm] [DIRECTORY]" + exit 1 + ;; +esac + +###-------------------------------------------------------------------------- +### Main work. + +## Initial setup. +cddbck=0 +cddbtracks="" +nt=0 nat=0 +da=0 db=0 +mbtracks="" + +## Wander through the table of contents picking up unconsidered trifles. +while read type offset; do + + ## Bump the track numbers here. Most things want 1-based numbering, so + ## this is right. Don't bump for the end marker. Those who care + ## (AccurateRip) will sort it out for themselves. + case "$type" in + T) nt=$((nt + 1)) nat=$((nat + 1));; + D) nt=$((nt + 1)) ;; + esac + + ## Update the CDDB state. This is common to several formats. + case "$type" in + [TD]) + o=$((offset + 150)) + s=$((o/75)) + cddbtracks="${cddbtracks:+$cddbtracks }$o" + while :; do + case "$s" in + ?*) cddbck=$((cddbck + ${s:0:1})); s=${s#?} ;; + *) break ;; + esac + done + ;; + E) + final=$offset + ;; + esac + + ## Update other bits of information. + case "$type" in + T) + da=$((da + offset)) + db=$((db + nat*(offset > 0 ? offset : 1))) + mbtracks="$mbtracks$(printf "%08X" $((offset + 150)))" + ;; + D) + mbfinal=$((offset - 11250)) + ;; + E) + da=$((da + offset)) + db=$((db + (nat + 1)*(offset > 0 ? offset : 1))) + case "${mbfinal+t}" in + t) ;; + *) mbfinal=$((offset + 150)) ;; + esac + ;; + esac +done + +## Sort out the CDDB id. +cddbid=$(printf "%02x%04x%02x" $((cddbck%255)) $((final/75)) $nt) + +###-------------------------------------------------------------------------- +### Produce the answer. + +case "$format" in + cddb) + echo "$cddbid" + ;; + cddb-tracks) + echo "$cddbid $nt $cddbtracks $((final/75 + 2))" + ;; + accuraterip) + printf "%03d-%08x-%08x-%s\n" $nat $da $db $cddbid + ;; + musicbrainz) + mb=$(printf "%02X%02X%08X%s" 1 $nat $mbfinal $mbtracks) + for ((i = nat; i < 99; i++)); do + mb="${mb}00000000" + done + printf "%s" $mb | + openssl dgst -sha1 -binary | + openssl base64 | tr '+/=' '._-' + ;; +esac diff --git a/flaccrip/flaccrip-guessoffset b/flaccrip/flaccrip-guessoffset new file mode 100755 index 0000000..bca4cda --- /dev/null +++ b/flaccrip/flaccrip-guessoffset @@ -0,0 +1,112 @@ +#! /bin/bash + +set -e +: ${JBDIR=/mnt/jb} + +while getopts i: opt; do + case "$opt" in + i) id=$OPTARG ;; + *) exit 1 ;; + esac +done +shift $((OPTIND - 1)) + +dir=$1; shift; cd "$dir" + +case $# in + 0) + set $(ls | sed ' + /^\([0-9][0-9]\)[-. ].*\.flac$/!d + s//\1/ + ') + ;; +esac + +: ${id=$($JBDIR/bin/flaccrip-discid -a .)} +ntr=$(echo "$id" | sed 's:^0*\([1-9][0-9]*\)-.*$:\1:') +ar=$($JBDIR/bin/flaccrip-arfetch $id) +npress=0 +while read type rest; do + case "$type" in + H) npress=$((npress + 1)) ;; + *) ;; + esac +done <&2 "$0: no AccurateRip record found: this isn't going to work" + exit 1 + ;; + *) + echo "Found $npress pressings" + ;; +esac + +for t in "$@"; do + while :; do case "$t" in 0*) t=${t#0} ;; *) break ;; esac; done + l=$((t - 1)) h=$((t + 1)) + tt=$(printf %02d "$t") + ll=$(printf %02d "$l") + hh=$(printf %02d "$h") + flags="" + unset before after + + if ((t == 1)); then + flags="${flags+$flags }-f" + elif [ -f "$ll"[-.\ ]*.flac ]; then + before=$(echo "$ll"[-.\ ]*.flac) + else + echo >&2 "$0: warning: track $l missing; using silence" + fi + + if ((t == ntr)); then + flags="${flags+$flags }-l" + elif [ -f "$hh"[-.\ ]*.flac ]; then + after=$(echo "$hh"[-.\ ]*.flac) + else + echo >&2 "$0: warning: track $h missing; using silence" + fi + + echo "T $t" + + cks="" + while read arty art arconf arck; do + case "$arty,$art" in + T,$t) + cks="${cks:+$cks }$arck" + echo "C $arck $arconf" + ;; + esac + done < c) MC[m] = c; +} + +END { + for (m in MM) + print m " " MM[m] " " MC[m]; +}' | +sort -rn -k2 -k3 diff --git a/flaccrip/flaccrip-offset b/flaccrip/flaccrip-offset new file mode 100755 index 0000000..2fb7e53 --- /dev/null +++ b/flaccrip/flaccrip-offset @@ -0,0 +1,75 @@ +#! /bin/bash + +set -e +: ${JBDIR=/mnt/jb} + +zero_start=0 trim_end=0 offset=0 +while getopts flF:L:o:b:a: opt; do + case "$opt" in + f) zero_start=$((588 * 5 - 1)) ;; + l) trim_end=$((588 * 5)) ;; + F) zero_start=$OPTARG ;; + L) trim_end=$OPTARG ;; + o) offset=$OPTARG ;; + b) before=$OPTARG ;; + a) after=$OPTARG ;; + *) exit 1 ;; + esac +done + +shift $((OPTIND - 1)) +case $# in + 1) ;; + *) echo >&2 "missing file argument" ;; +esac +file=$1 + +len=$(metaflac --show-total-samples "$file") + +if ((offset < 0)); then + start=0 + stop=$((stop + offset)) +elif ((offset >= 0)); then + start=$offset + stop=$len +fi + +{ + if ((offset < 0)); then + if [ "${before+set}" ]; then + n=$(metaflac --show-total-samples "$before") + flac -d -s -o- --force-raw-format --sign=signed --endian=little \ + --skip=$((n + offset)) --until=$n \ + "$before" + else + $JBDIR/bin/cat-prefix $((4 * -offset)) 0)); then + if [ "${after+set}" ]; then + flac -d -s -o- --force-raw-format --sign=signed --endian=little \ + --skip=0 --until=$offset \ + "$after" + else + $JBDIR/bin/cat-prefix $((4 * offset)) 0)); then + $JBDIR/bin/cat-prefix $((4 * zero_start)) /dev/null + len=$((len - zero_start)) + fi + if ((trim_end > 0)); then + $JBDIR/bin/cat-prefix $((4 * (len - trim_end))) + $JBDIR/bin/cat-prefix $((4 * trim_end)) /dev/null + else + cat + fi +} diff --git a/flaccrip/flaccrip-slide b/flaccrip/flaccrip-slide new file mode 100755 index 0000000..c5e8323 --- /dev/null +++ b/flaccrip/flaccrip-slide @@ -0,0 +1,134 @@ +#! /usr/bin/tcc -run +/* -*-c-*- */ + +/* A simple progrem to compute AccurateRip checksums for a sliding window + * over a stream. The algorithm is based on an observation by Jon Lund + * Steffensen (http://jonls.dk/2009/10/calculating-accuraterip-checksums/). + * + * The basic checksum is c = SUM_i (i + i) S_i, where 0 <= i < n ranges over + * the sample numbers, and S_i is the data for the sample point, expressed as + * a single element of Z/2^{32}Z (a cheesy notational device which avoids me + * having to write `(mod 2^{32})' everywhere). + * + * Steffensen's observation is this: if T_i = S_{i+1} for 0 <= i < n - 1 then + * we can compute the checksum c' over the T_i given only a small quantity of + * data. Indeed, + * + * c' - c = SUM_{0<=i +#include +#include +#include + +#include + +int main(int argc, char *argv[]) +{ + unsigned long ns, ck, tot, i0, o = 0, x, y; + FILE *fp, *fs; + unsigned long *tv; + int i; + const char *quis = argv[0]; + unsigned char b[4]; + unsigned f = 0; +#define F_DEBUG 1u + + for (;;) { + int o = getopt(argc, argv, "di:"); + if (o == EOF) break; + switch (o) { + case 'd': f |= F_DEBUG; break; + case 'i': i0 = strtoul(optarg, 0, 0); break; + default: exit(1); + } + } + argv += optind; argc -= optind; + + if (argc < 6) { + fprintf(stderr, "Usage: %s flaccrip-slide NSAMPLES CHECKSUM SUM " + "PREFIX SUFFIX TARGET ...\n", quis); + exit(1); + } + ns = strtoul(argv[0], 0, 0); + ck = strtoul(argv[1], 0, 16); + tot = strtoul(argv[2], 0, 0); + if ((fp = fopen(argv[3], "rb")) == 0) { + fprintf(stderr, "%s: open %s: %s\n", quis, argv[3], strerror(errno)); + exit(1); + } + if ((fs = fopen(argv[4], "rb")) == 0) { + fprintf(stderr, "%s: open %s: %s\n", quis, argv[4], strerror(errno)); + exit(1); + } + argv += 5; argc -= 5; + + if ((tv = malloc(argc * sizeof(*tv))) == 0) { + fprintf(stderr, "%s: malloc: %s\n", quis, strerror(errno)); + exit(1); + } + for (i = 0; i < argc; i++) + tv[i] = strtoul(argv[i], 0, 16); + + for (;;) { + + if (f & F_DEBUG) { + fprintf(stderr, "%s: DEBUG: offset = %lu, ck = %08lx, tot = %lu\n", + quis, o, ck, tot); + } + + ck &= 0xffffffff; + for (i = 0; i < argc; i++) { + if (ck == tv[i]) { + printf("%lu %08lx\n", o, ck); + break; + } + } + + if (!fread(b, 4, 1, fp)) { + if (ferror(fp)) { + fprintf(stderr, "%s: read prefix: %s\n", quis, strerror(errno)); + exit(1); + } + break; + } + x = (b[0] << 0) | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); + + if (!fread(b, 4, 1, fs)) { + if (ferror(fs)) { + fprintf(stderr, "%s: read suffix: %s\n", quis, strerror(errno)); + exit(1); + } + break; + } + y = (b[0] << 0) | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); + + if (f & F_DEBUG) + fprintf(stderr, "%s: DEBUG: prefix = %08lx, suffix = %08lx\n", + quis, x, y); + + ck += ns*y - tot - i0*x; + tot += y - x; + o++; + } + + return (0); +} diff --git a/flaccrip/flaccrip-toc b/flaccrip/flaccrip-toc new file mode 100755 index 0000000..46e1790 --- /dev/null +++ b/flaccrip/flaccrip-toc @@ -0,0 +1,84 @@ +#! /bin/bash + +set -e + +###-------------------------------------------------------------------------- +### Table of contents representation. +### +### We need a simple table of contents representation. A toc file consists +### of a number of records, one per line; each record is a number of +### blank-delimited fields. Fields don't contain whitespace, so no quoting +### is needed. The first field of each record is a type which explains how +### to decode the rest. +### +### `T' START Audio track starts at START frames. +### +### `D' START Data track, starts at START frames. +### +### `E' END Leadout at END frames. + +###-------------------------------------------------------------------------- +### Command line options. + +hidden=0 +data=none +while getopts "d:h:" opt; do + case "$opt" in + h) hidden=$OPTARG ;; + d) data=$OPTARG ;; + *) exit 1 ;; + esac +done +shift $((OPTIND - 1)) + +case "$#" in + 1) ;; + *) echo >&2 "Usage: $0 [-d DATA] [-h HIDDEN] DIR|CD-DEVICE"; exit 1 ;; +esac +source=$1 + +###-------------------------------------------------------------------------- +### Work out what to do. + +if [ -d "$source" ]; then + + ## Intuit what's going on from a directory full of FLAC files. + cd "$source" + for i in [0-9][0-9][-.\ ]*.flac; do + metaflac --show-total-samples "$i" + done | { + tn=1 tot=$hidden + while read samples; do + frames=$((samples/588)) + echo "T $tot" + tot=$((tot + frames)) + done + case "$data" in + none) + ;; + *) + tot=$((tot + 11400)) + echo "D $tot" + tot=$((tot + data)) + ;; + esac + echo "E $tot" + } + +elif [ -b "$source" ]; then + + ## Read a table of contents from a CD. + wodim dev="$source" -toc | sed ' + /^track:/ !d + s/^track:\(.*\) lba: *\([0-9][0-9]*\) (.*mode: *\([-0-9][0-9]*\) *$/\1 \3 \2/ + ' | while read track mode offset; do + case "$track,$mode" in + lout,-1) echo "E $offset" ;; + *,0 | *,-1) echo "T $offset" ;; + *,1 | *,2) echo "D $offset" ;; + esac + done + +else + echo >&2 "$0: don't know how to read a toc from $source" +fi diff --git a/flaccrip/flaccrip-trackoffsets b/flaccrip/flaccrip-trackoffsets new file mode 100755 index 0000000..0520eb4 --- /dev/null +++ b/flaccrip/flaccrip-trackoffsets @@ -0,0 +1,98 @@ +#! /bin/bash + +set -e +: ${JBDIR=/mnt/jb} + +i0=0 zero_start=0 trim_end=0 span=$((588*5 - 1)) debug="" +while getopts flF:L:i:b:a:s:d opt; do + case "$opt" in + f) zero_start=$((588*5 - 1)) ;; + l) trim_end=$((588*5)) ;; + F) zero_start=$OPTARG ;; + L) trim_end=$OPTARG ;; + b) before=$OPTARG ;; + a) after=$OPTARG ;; + s) span=$OPTARG ;; + d) debug=-d ;; + *) exit 1 ;; + esac +done +shift $((OPTIND - 1)) + +case $# in + 0 | 1) + echo >&2 "usage: $0 [-OPTIONS] FILE TARGET..." + exit 1 + ;; +esac +file=$1; shift + +ns=$(metaflac --show-total-samples "$file") + +read ck tot < <( + { + if [ "${before+set}" ]; then + n=$(metaflac --show-total-samples "$before") + flac -d -s -o- --force-raw-format --sign=signed --endian=little \ + --skip=$((n - span)) \ + "$before" + else + $JBDIR/bin/cat-prefix $((4 * span)) 0)); then + $JBDIR/bin/cat-prefix $((4 * zero_start)) /dev/null + fi + cat + } | $JBDIR/bin/flaccrip-compute -r +) + +while read off ck; do + echo M $((off - span)) "$ck" +done < <( + $JBDIR/bin/flaccrip-slide \ + -i "$zero_start" $debug \ + $((ns - trim_end)) $ck $tot \ + <( + if ((span > zero_start)); then + tail=$((span - zero_start)) + if [ "${before+set}" ]; then + n=$(metaflac --show-total-samples "$before") + flac -d -s -o- --force-raw-format --sign=signed --endian=little \ + --skip=$((n - tail)) \ + "$before" + else + $JBDIR/bin/cat-prefix $((4 * tail)) trim_end)); then + flac -d -s -o- --force-raw-format --sign=signed --endian=little \ + --skip=$((ns - span - trim_end)) \ + "$file" + head=$((span - trim_end)) + if [ "${after+set}" ]; then + flac -d -s -o- --force-raw-format --sign=signed --endian=little \ + --until=$head \ + "$after" + else + $JBDIR/bin/cat-prefix $((4 * head)) &2 "Usage: $0 OFFSET SOURCEDIR TARGETDIR"; exit 1 ;; +esac + +offset=$1 sourcedir=$2 targetdir=$3 +mkdir "$targetdir" +tmp="$targetdir"/_tmp; mkdir "$tmp" +trap 'rm -rf "$targetdir"' EXIT INT TERM + +lens="" +for i in "$sourcedir"/*.flac; do + base=${i##*/} + metaflac --export-tags-to="$tmp/${base%.flac}.tags" "$i" + len=$(metaflac --show-total-samples "$i") + lens="${lens:+$lens }$len" +done + +{ + if ((offset < 0)); then + $JBDIR/bin/cat-prefix $((4 * -offset)) 0)); then + $JBDIR/bin/cat-prefix $((4 * offset)) 0)); then + $JBDIR/bin/cat-prefix $((4 * offset)) >/dev/null + fi + set -- $lens + for i in "$sourcedir"/*.flac; do + base=${i##*/} + len=$1; shift + $JBDIR/bin/cat-prefix $((4 * len)) >"$tmp"/"${base%.flac}.pcm" + flac --force-raw-format --sign=signed --endian=little --channels=2 \ + --bps=16 --sample-rate=44100 --best -o "$targetdir"/"$base" \ + "$tmp"/"${base%.flac}.pcm" + metaflac --remove-all-tags \ + --import-tags-from="$tmp"/"${base%.flac}.tags" \ + "$targetdir"/"$base" + done + if ((offset < 0)); then + $JBDIR/bin/cat-prefix $((4 * -offset)) >/dev/null + fi +} + +for i in "$sourcedir"/* "$sourcedir"/.[!.]*; do + if [ ! -e "$i" ]; then continue; fi + case "$i" in + *.flac) ;; + *) cp -r "$i" "$targetdir"/ ;; + esac +done + +rm -rf "$tmp" +trap '' EXIT INT TERM diff --git a/gremlin/gremlin b/gremlin/gremlin new file mode 100755 index 0000000..c9734d5 --- /dev/null +++ b/gremlin/gremlin @@ -0,0 +1,1660 @@ +#! /usr/bin/python +### +### Convert a directory tree of audio files +### +### (c) 2010 Mark Wooding +### + +###----- Licensing notice --------------------------------------------------- +### +### This program is free software; you can redistribute it and/or modify +### it under the terms of the GNU General Public License as published by +### the Free Software Foundation; either version 2 of the License, or +### (at your option) any later version. +### +### This program is distributed in the hope that it will be useful, +### but WITHOUT ANY WARRANTY; without even the implied warranty of +### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +### GNU General Public License for more details. +### +### You should have received a copy of the GNU General Public License +### along with this program; if not, write to the Free Software Foundation, +### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +###-------------------------------------------------------------------------- +### External dependencies. + +## Language features. +from __future__ import with_statement + +## Standard Python libraries. +import sys as SYS +import os as OS +import errno as E +import time as T +import unicodedata as UD +import fnmatch as FN +import re as RX +import shutil as SH +import optparse as OP +import threading as TH +import shlex as L +from math import sqrt +from contextlib import contextmanager + +## eyeD3 tag fettling. +import eyeD3 as E3 + +## Gstreamer. It picks up command-line arguments -- most notably `--help' -- +## and processes them itself. Of course, its help is completely wrong. This +## kludge is due to Jonas Wagner. +_argv, SYS.argv = SYS.argv, [] +import gobject as G +import gio as GIO +import gst as GS +SYS.argv = _argv + +## Python Imaging. +from PIL import Image as I + +## Python parsing. +import pyparsing as P + +###-------------------------------------------------------------------------- +### Special initialization. + +VERSION = '1.0.0~pre' + +## GLib. +G.threads_init() + +###-------------------------------------------------------------------------- +### Eyecandy progress reports. + +def charwidth(s): + """ + Return the width of S, in characters. + + Specifically, this is the number of backspace characters required to + overprint the string S. If the current encoding for `stdout' appears to be + Unicode then do a complicated Unicode thing; otherwise assume that + characters take up one cell each. + + None of this handles tab characters in any kind of useful way. Sorry. + """ + + ## If there's no encoding for stdout then we're doing something stupid. + if SYS.stdout.encoding is None: return len(s) + + ## Turn the string into Unicode so we can hack on it properly. Maybe that + ## won't work out, in which case fall back to being stupid. + try: u = s.decode(SYS.stdout.encoding) + except UnicodeError: return len(s) + + ## Our main problem is combining characters, but we should also try to + ## handle wide (mostly Asian) characters, and zero-width ones. This hack + ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + w = 0 + for ch in u: + cd = ord(ch) + if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \ + 0x1160 <= cd <= 0x11ff: pass + elif UD.east_asian_width(ch) in ['F', 'W']: w += 2 + else: w += 1 + + ## Done. + #print ';; %r -> %d' % (s, w) + return w + +class StatusLine (object): + """ + Maintains a status line containing ephemeral progress information. + + The status line isn't especially important, but it keeps interactive users + amused. + + There should be only one status line object in your program; otherwise + they'll interfere with each other and get confused. + + The update algorithm (in `set') is fairly careful to do the right thing + with long status `lines', and to work properly in an Emacs `shell' buffer. + """ + + def __init__(me): + "Initialize the status line." + me._last = '' + me._lastlen = 0 + me.eyecandyp = OS.isatty(SYS.stdout.fileno()) + + def set(me, line): + """ + Set the status line contents to LINE, replacing what was there before. + + This only produces actual output if stdout is interactive. + """ + n = len(line) + + ## Eyecandy update. + if me.eyecandyp: + #print + #print ';; new status %r' % line + + ## If the old line was longer, we need to clobber its tail, so work out + ## what that involves. + if n < me._lastlen: + b = charwidth(me._last[n:]) + pre = '\b'*b + ' '*b + else: + pre = '' + + ## Now figure out the length of the common prefix between what we had + ## before and what we have now. This reduces the amount of I/O done, + ## which keeps network traffic down on SSH links, and keeps down the + ## amount of work slow terminal emulators like Emacs have to do. + i = 0 + m = min(n, me._lastlen) + while i < m and line[i] == me._last[i]: + i += 1 + + ## Actually do the output, all in one syscall. + b = charwidth(me._last[i:]) + SYS.stdout.write(pre + '\b'*b + line[i:]) + #print ';; => %r' % (pre + '\b'*b + line[i:]) + SYS.stdout.flush() + + ## Update our idea of what's gone on. + me._lastlen = n + me._last = line + + def clear(me): + "Clear the status line. Just like set('')." + me.set('') + + def commit(me, line = None): + """ + Commit the current status line, and maybe the string LINE. + + If the current status line is nonempty, then commit it to the transcript. + If LINE is not None, then commit that to the transcript too. + + After all of this, we clear the status line to get back to a clean state. + """ + if me._last: + if me.eyecandyp: + SYS.stdout.write('\n') + else: + SYS.stdout.write(me._last + '\n') + if line is not None: + SYS.stdout.write(line + '\n') + me._lastlen = 0 + me._last = '' + +STATUS = StatusLine() + +def filestatus(file, status): + return '%s%s: %s' % (' '*8, OS.path.basename(file), status) + +class ProgressEyecandy (object): + """ + Provide amusement while something big and complicated is happening. + + This is an abstract class. Subclasses must provide a method `progress' + returning a pair (CURRENT, MAX) indicating the current progress through the + operation. + """ + + def __init__(me, what, silentp = False): + """ + Initialize a progress meter. + + WHAT is a prefix string to be written before the progress eyecandy + itself. + """ + me._what = what + me._silentp = silentp + me._spinner = 0 + me._start = T.time() + + def _fmt_time(me, t): + "Format T as a time, in (maybe hours) minutes and seconds." + s, t = t % 60, int(t/60) + m, h = t % 60, int(t/60) + if h > 0: + return '%d:%02d:%02d' % (h, m, s) + else: + return '%02d:%02d' % (m, s) + + def show(me): + "Show the current level of progress." + + ## If we're not showing pointless frippery, don't bother at all. + if not STATUS.eyecandyp: + return + + ## Update the spinner index. + me._spinner = (me._spinner + 1)%4 + + ## Fetch the current progress information. Note that we always fetch + ## both the current and maximum levels, because both might change if an + ## operation revises its idea of how much work needs doing. + cur, max = me.progress() + + ## If we couldn't get progress information, display something vaguely + ## amusing anyway. + if cur is None or max is None: + STATUS.set('%s %c [unknown progress]' % + (me._what, r'/-\|'[me._spinner])) + return + + ## Work out -- well, guess -- the time remaining. + if cur: + t = T.time() + eta = me._fmt_time((t - me._start)*(max - cur)/cur) + else: + eta = '???' + + ## Set the status bar. + n = 40*cur/max + STATUS.set('%s %c [%s%s] %3d%% (%s)' % \ + (me._what, + r'/-\|'[me._spinner], + '='*n, ' '*(40 - n), + 100*cur/max, + eta)) + + def done(me, win = True): + "Show a completion notice, or a failure if WIN is false." + if not win: + STATUS.set('%s FAILED!' % me._what) + elif not me._silentp: + STATUS.set('%s done (%s)' % + (me._what, + me._fmt_time(T.time() - me._start))) + else: + return + STATUS.commit() + +###-------------------------------------------------------------------------- +### Timeout handling. + +KILLSWITCH = TH.Event() + +def timeout(t0, t1): + T.sleep(t0) + KILLSWITCH.set() + T.sleep(t1) + moan('dying messily due to timeout') + OS._exit(3) + +###-------------------------------------------------------------------------- +### Parsing utilities. + +## Allow hyphens in identifiers. +IDCHARS = P.alphanums + '-_' +P.Keyword.setDefaultKeywordChars(IDCHARS) + +## Some common kinds of tokens. +Name = P.Word(IDCHARS) +Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks)) +String = P.QuotedString('"', '\\') + +## Handy abbreviations for constructed parser elements. +def K(k): return P.Keyword(k).suppress() +def D(d): return P.Literal(d).suppress() +##R = P.ZeroOrMore +def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t]) +O = P.Optional + +###-------------------------------------------------------------------------- +### Format identification and conversion. + +class IdentificationFailure (Exception): + pass + +class FileCategory (object): + """ + A FileCategory represents a class of files. + + For example, it's sensible to consider audio, or image files as a + category. A file category knows how to recognize member files from + MIME content types. + """ + + def __init__(me, name, mime_pats, ident): + """ + Construct a new category. + + The PATS are a list of `fnmatch' patterns to be compared with a MIME + type. The IDENT is a function which produces an identification object + given a file's name and first-guess MIME type. The object is passed to a + Format's `check' method to see whether a file needs re-encoding, and to + `convert' to assist with the conversion. + + An identification object must have an attribute `mime' which is a set of + possible MIME types accumulated for the object. + """ + me.name = name + me._mime_pats = mime_pats + me._ident = ident + CATEGORYMAP[name] = me + + def identify(me, file, mime): + """ + Attempt to identify FILE, given its apparent MIME type. + + If identification succeeds, return an identification object which can be + used by associated file formats; otherwise return None. + """ + for p in me._mime_pats: + if not FN.fnmatchcase(mime, p): + continue + try: + return me._ident(file, mime) + except IdentificationFailure: + pass + return None + +class BaseFormat (object): + """ + A BaseFormat object represents a particular encoding and parameters. + + The object can verify (the `check' method) whether a particular file + matches its requirements, and if necessary (`encode') re-encode a file. + + Subclasses should define the following methods. + + check(ID) + Answer whether the file identified by ID is acceptable according to + the receiver's parameters. + + convert(MASTER, ID, TARGET) + Convert the file MASTER, which has been identified as ID, according + to the receiver's parameters, writing the output to TARGET. + + Subclasses should also provide these attributes. + + CATEGORY + A FileCategory object for the category of files that this format + lives within. + + EXT A file extension to be applied to encoded output files. + + NAME A user-facing name for the format. + + PROPS A parser element to parse a property definition. It should produce + a pair NAME, VALUE to be stored in a dictionary. + + Subclasses for different kinds of file may introduce more subclass + protocol. + """ + + def fixup(me, path): + """Post-encoding fixups.""" + pass + +FORMATMAP = {} +CATEGORYMAP = {} + +def defformat(name, cls): + "Define a format NAME using class CLS." + if not hasattr(cls, 'NAME'): + raise ValueError, 'abstract class' + if not hasattr(cls, 'CATEGORY'): + raise ValueError, 'no category' + FORMATMAP[name] = cls + +class FormatParser (P.ParserElement): + """ + Parse a format specifier: + + format-spec ::= string [format-properties] + format-properties ::= `{' format-property (`,' format-property)* `}' + + The syntax of a format-property is determined by the PROPS attribute on the + named format and its superclasses. + """ + + ## We cache the parser elements we generate to avoid enormous consing. + CACHE = {} + + def parseImpl(me, s, loc, actp = True): + + ## Firstly, determine the format name. + loc, r = Name._parse(s, loc, actp) + fmt = r[0] + + ## Look up the format class. + try: fcls = FORMATMAP[fmt] + except KeyError: + raise P.ParseException(s, loc, "Unknown format `%s'" % fmt) + + ## Fetch the property-list parser from the cache, if possible; else + ## construct it. + try: + pp = me.CACHE[fmt] + except KeyError: + seen = set() + prop = None + for c in fcls.mro(): + try: p = c.PROPS + except AttributeError: continue + if p in seen: continue + if prop is None: prop = p + else: prop |= p + seen.add(p) + if prop is None: + pp = me.CACHE[fmt] = None + else: + props = P.delimitedList(prop) + props.setParseAction(lambda s, l, t: dict(t.asList())) + pp = me.CACHE[fmt] = O(D('{') - props - D('}')) + + ## Parse the properties. + if pp is None: + pd = {} + else: + loc, r = pp._parse(s, loc, actp) + if r: pd = r[0] + else: pd = {} + + ## Construct the format object and return it. + return loc, fcls(**pd) + +Format = FormatParser() + +def prop(kw, pval, tag = None): + if tag is None: tag = kw + if pval is None: + p = K(kw) + p.setParseAction(lambda s, l, t: (tag, True)) + else: + p = K(kw) + D('=') + pval + p.setParseAction(lambda s, l, t: (tag, t[0])) + return p + +###-------------------------------------------------------------------------- +### Policies and actions. + +class Action (object): + """ + An Action object represents a conversion action to be performed. + + This class isn't intended to be instantiated directly. It exists to define + some protocol common to all Action objects. + + Action objects have the following attributes. + + master The name of the master (source) file. + + target The name of the target (destination) file. + + PRIORITY The priority of the action, for deciding which of two actions + to perform. Higher priorities are more likely to win. + + Converting an Action to a string describes the action in a simple + user-readable manner. The `perform' method actually carries the action + out. + """ + + PRIORITY = 0 + + def __init__(me, master): + "Stash the MASTER file name for later." + me.master = master + + def choose(me, him): + "Choose either ME or HIM and return one." + if him is None or me.PRIORITY > him.PRIORITY: + return me + else: + return him + +class CopyAction (Action): + """ + An Action object for simply copying a file. + + Actually we try to hardlink it first, falling back to a copy later. This + is both faster and more efficient with regard to disk space. + """ + + ## Copying is good. Linking is really good, but we can't tell the + ## difference at this stage. + PRIORITY = 10 + + def __init__(me, master, targetdir): + "Initialize a CopyAction, from MASTER to the TARGETDIR directory." + Action.__init__(me, master) + me.target = OS.path.join(targetdir, OS.path.basename(master)) + + def __str__(me): + return 'copy/link' + + def perform(me): + "Actually perform a CopyAction." + try: + STATUS.set(filestatus(me.master, 'link')) + OS.link(me.master, me.target) + except OSError, err: + if err.errno != E.EXDEV: + raise + STATUS.set(filestatus(me.master, 'copy')) + new = me.target + '.new' + SH.copyfile(me.master, new) + OS.rename(new, me.target) + STATUS.commit() + +class ConvertAction (Action): + """ + An Action object for converting a file to a given format. + + Additional attributes: + + id The identification object for the master file. + + format The format to which we're meant to conver the master. + """ + + def __init__(me, master, targetdir, id, format): + "Initialize a ConvertAction." + Action.__init__(me, master) + stem, ext = OS.path.splitext(OS.path.basename(master)) + me.target = OS.path.join(targetdir, stem + '.' + format.EXT) + me.id = id + me.format = format + + def __str__(me): + return 'convert to %s' % me.format.NAME + + def perform(me): + "Acually perform a ConvertAction." + STATUS.set(filestatus(me.master, me)) + me.format.convert(me.master, me.id, me.target) + +Policy = P.Forward() + +class FormatPolicy (object): + """ + A FormatPolicy object represents a set of rules for how to convert files. + + Given a master file, the FormatPolicy will identify it and return a list of + actions to be performed. The methods required of a FormatPolicy are: + + setcategory(CAT) + Store CAT as the policy's category. Check that this is consistent + with the policy as stored. + + actions(MASTER, TARGETDIR, ID, COHORT) + Given a MASTER file, identified as ID, a target directory + TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files + of the same category in the same directory, return a list of + actions to be performed to get the target directory into the right + form. The list might be empty if the policy object /rejects/ the + file. + """ + +class AndPolicy (FormatPolicy): + """ + A FormatPolicy which does the union of a bunch of other policies. + + Each subsidiary policy is invoked in turn. The highest-priority action for + each target file is returned. + """ + + def __init__(me, policies): + me._policies = policies + + def setcategory(me, cat): + me.cat = cat + for p in me._policies: + p.setcategory(cat) + + def actions(me, master, targetdir, id, cohort): + tmap = {} + for p in me._policies: + for a in p.actions(master, targetdir, id, cohort): + if a.target in tmap: + tmap[a.target] = a.choose(tmap.get(a.target)) + else: + tmap[a.target] = a + return tmap.values() + +And = K('and') - D('{') - R(Policy) - D('}') +And.setParseAction(lambda s, l, t: AndPolicy(t[0])) + +class OrPolicy (FormatPolicy): + """ + A FormatPolicy which tries other policies and uses the first that accepts. + + Each subsidiary policy is invoked in turn. If any accepts, the actions it + proposes are turned and no further policies are invoked. If none accepts + then the file is rejected. + """ + + def __init__(me, policies): + me._policies = policies + + def setcategory(me, cat): + me.cat = cat + for p in me._policies: + p.setcategory(cat) + + def actions(me, master, targetdir, id, cohort): + for p in me._policies: + aa = p.actions(master, targetdir, id, cohort) + if aa: + return aa + else: + return [] + +Or = K('or') - D('{') - R(Policy) - D('}') +Or.setParseAction(lambda s, l, t: OrPolicy(t[0])) + +class AcceptPolicy (FormatPolicy): + """ + A FormatPolicy which copies files in a particular format. + + If all of the files in a cohort are recognized as being in a particular + format (including this one), then accept it with a CopyAction; otherwise + reject. + """ + + def __init__(me, format): + me._format = format + + def setcategory(me, cat): + if me._format.CATEGORY is not cat: + raise ValueError, \ + "Accept format `%s' has category `%s', not `%s'" % \ + (me._format.__class__.__name__, + me._format.CATEGORY.name, cat.name) + me.cat = cat + + def actions(me, master, targetdir, id, cohort): + if me._format.check(id) and \ + all(me._format.check(cid) for f, cid in cohort): + return [CopyAction(master, targetdir)] + else: + return [] + +Accept = K('accept') - Format +Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0])) + +class ConvertPolicy (FormatPolicy): + """ + A FormatPolicy which copies files in a particular format or converts if + necessary. + """ + def __init__(me, format): + me._format = format + + def setcategory(me, cat): + if me._format.CATEGORY is not cat: + raise ValueError, \ + "Accept format `%s' has category `%s', not `%s'" % \ + (me._format.__class__.__name__, + me._format.CATEGORY.name, cat.name) + me.cat = cat + + def actions(me, master, targetdir, id, cohort): + if me._format.check(id): + return [CopyAction(master, targetdir)] + else: + return [ConvertAction(master, targetdir, id, me._format)] + +Convert = K('convert') - Format +Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0])) + +Policy << (And | Or | Accept | Convert) + +###-------------------------------------------------------------------------- +### Audio handling, based on GStreamer. + +def make_element(factory, name = None, **props): + "Return a new element from the FACTORY with the given NAME and PROPS." + elt = GS.element_factory_make(factory, name) + elt.set_properties(**props) + return elt + +class GStreamerProgressEyecandy (ProgressEyecandy): + """ + Provide amusement while GStreamer is busy doing something. + + The GStreamerProgressEyecandy object is a context manager. Wrap it round + your GStreamer loop to provide progress information for an operation. + """ + + def __init__(me, what, elt, **kw): + """ + Initialize a progress meter. + + WHAT is a prefix string to be written before the progress eyecandy + itself. ELT is a GStreamer element to interrogate to find the progress + information. + """ + me._elt = elt + ProgressEyecandy.__init__(me, what, **kw) + + def _update(me): + "Called by GLib main event loop to update the eyecandy." + me.show() + return True + + def _timer(me): + """ + Update the progress meter. + + This is called periodically by the GLib main event-processing loop. + """ + me.show() + return True + + def progress(me): + "Return the current progress as a pair (CURRENT, MAX)." + + ## Fetch the current progress information. We get the duration each + ## time, because (particularly with VBR-encoded MP3 inputs) the estimated + ## duration can change as we progress. Hopefully it settles down fairly + ## soon. + try: + t, hunoz = me._elt.query_position(GS.FORMAT_TIME) + end, hukairz = me._elt.query_duration(GS.FORMAT_TIME) + return t, end + except GS.QueryError: + return None, None + + def __enter__(me): + "Enter context: attach progress meter display." + + ## If we're not showing pointless frippery, don't bother at all. + if not STATUS.eyecandyp: + return + + ## Update regularly. The pipeline runs asynchronously. + me._id = G.timeout_add(200, me._update) + + def __exit__(me, ty, val, tb): + "Leave context: remove display and report completion or failure." + + ## If we're not showing pointless frippery, there's nothing to remove. + if STATUS.eyecandyp: + G.source_remove(me._id) + + ## Report completion anyway. + me.done(ty is None) + + ## As you were. + return False + +class AudioIdentifier (object): + """ + Analyses and identifies an audio file. + + Important properties are: + + cap A capabilities structure describing the audio file data. The most + interesting thing in here is probably its name, which is a MIME + type describing the data. + + dcap A capabilities structure describing the decoded audio data. This + is of interest during conversion. + + tags A dictionary containing metadata tags from the file. These are in + GStreamer's encoding-independent format. + + bitrate An approximation to the stream's bitrate, in kilobits per second. + This might be slow to work out for some files so it's computed on + demand. + """ + + def __init__(me, file, mime): + "Initialize the object suitably for identifying FILE." + + ## Make some initial GStreamer objects. We'll want the pipeline later if + ## we need to analyse a poorly tagged MP3 stream, so save it away. + me._pipe = GS.Pipeline() + me._file = file + bus = me._pipe.get_bus() + bus.add_signal_watch() + loop = G.MainLoop() + + ## The basic recognition kit is based around `decodebin'. We must keep + ## it happy by giving it sinks for the streams it's found, which it + ## announces asynchronously. + source = make_element('filesrc', 'file', location = file) + decoder = make_element('decodebin', 'decode') + sink = make_element('fakesink') + def decoder_pad_arrived(elt, pad): + if pad.get_caps()[0].get_name().startswith('audio/'): + elt.link_pads(pad.get_name(), sink, 'sink') + dpaid = decoder.connect('pad-added', decoder_pad_arrived) + me._pipe.add(source, decoder, sink) + GS.element_link_many(source, decoder) + + ## Arrange to collect tags from the pipeline's bus as they're reported. + ## If we reuse the pipeline later, we'll want different bus-message + ## handling, so make sure we can take the signal handler away. + tags = {} + fail = [] + def bus_message(bus, msg): + if msg.type == GS.MESSAGE_ERROR: + fail[:] = (ValueError, msg.structure['debug'], None) + loop.quit() + elif msg.type == GS.MESSAGE_STATE_CHANGED: + if msg.structure['new-state'] == GS.STATE_PAUSED and \ + msg.src == me._pipe: + loop.quit() + elif msg.type == GS.MESSAGE_TAG: + tags.update(msg.structure) + bmid = bus.connect('message', bus_message) + + ## We want to identify the kind of stream this is. (Hmm. The MIME type + ## recognizer has already done this work, but GStreamer is probably more + ## reliable.) The `decodebin' has a `typefind' element inside which will + ## announce the identified media type. All we need to do is find it and + ## attach a signal handler. (Note that the handler might be run in the + ## thread context of the pipeline element, but Python's GIL will keep + ## things from being too awful.) + me.cap = None + me.dcap = None + for e in decoder.elements(): + if e.get_factory().get_name() == 'typefind': + tfelt = e + break + else: + assert False, 'failed to find typefind element' + + ## Crank up most of the heavy machinery. The message handler will stop + ## the loop when things seem to be sufficiently well underway. + me._pipe.set_state(GS.STATE_PAUSED) + loop.run() + bus.disconnect(bmid) + decoder.disconnect(dpaid) + if fail: + me._pipe.set_state(GS.STATE_NULL) + raise fail[0], fail[1], fail[2] + + ## Store the collected tags. + me.tags = tags + + ## Gather the capabilities. The `typefind' element knows the input data + ## type. The 'decodebin' knows the raw data type. + me.cap = tfelt.get_pad('src').get_negotiated_caps()[0] + me.mime = set([mime, me.cap.get_name()]) + me.dcap = sink.get_pad('sink').get_negotiated_caps()[0] + + ## If we found a plausible bitrate then stash it. Otherwise note that we + ## failed. If anybody asks then we'll work it out then. + if 'nominal-bitrate' in tags: + me._bitrate = tags['nominal-bitrate']/1000 + elif 'bitrate' in tags and tags['bitrate'] >= 80000: + me._bitrate = tags['bitrate']/1000 + else: + me._bitrate = None + + ## The bitrate computation wants the file size. Ideally we'd want the + ## total size of the frames' contents, but that seems hard to dredge + ## out. If the framing overhead is small, this should be close enough + ## for our purposes. + me._bytes = OS.stat(file).st_size + + def __del__(me): + "Close the pipeline down so we don't leak file descriptors." + me._pipe.set_state(GS.STATE_NULL) + + @property + def bitrate(me): + """ + Return the approximate bit-rate of the input file. + + This might take a while if we have to work it out the hard way. + """ + + ## If we already know the answer then just return it. + if me._bitrate is not None: + return me._bitrate + + ## Make up a new main loop. + loop = G.MainLoop() + + ## Watch for bus messages. We'll stop when we reach the end of the + ## stream: then we'll have a clear idea of how long the track was. + fail = [] + def bus_message(bus, msg): + if msg.type == GS.MESSAGE_ERROR: + fail[:] = (ValueError, msg.structure['debug'], None) + loop.quit() + elif msg.type == GS.MESSAGE_EOS: + loop.quit() + bus = me._pipe.get_bus() + bmid = bus.connect('message', bus_message) + + ## Get everything moving, and keep the user amused while we work. + me._pipe.set_state(GS.STATE_PLAYING) + with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') % + me._pipe, + silentp = True): + loop.run() + bus.disconnect(bmid) + if fail: + me._pipe.set_state(GS.STATE_NULL) + raise fail[0], fail[1], fail[2] + + ## Now we should be able to find out our position accurately and work out + ## a bitrate. Cache it in case anybody asks again. + t, hukairz = me._pipe.query_position(GS.FORMAT_TIME) + me._bitrate = int(8*me._bytes*1e6/t) + + ## Done. + return me._bitrate + +class AudioFormat (BaseFormat): + """ + An AudioFormat is a kind of Format specialized for audio files. + + Format checks are done on an AudioIdentifier object. + """ + + PROPS = prop('bitrate', Num) + + ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched + ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg' + ## depending on how thorough it's trying to be. Still, it doesn't do any + ## harm here; the main risk is picking up Ogg Theora files by accident, and + ## we'll probably be able to extract the audio from them anyway. + CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'], + AudioIdentifier) + + def __init__(me, bitrate = None): + "Construct an object, requiring an approximate bitrate." + me.bitrate = bitrate + + def check(me, id): + """ + Return whether the AudioIdentifier ID is suitable for our purposes. + + Subclasses can either override this method or provide a property + `MIMETYPES', which is a list (other thing that implements `__contains__') + of GStreamer MIME types matching this format. + """ + return id.mime & me.MIMETYPES and \ + (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2)) + + def encoder(me): + """ + Constructs a GStreamer element to encode audio input. + + Subclasses can either override this method (or replace `encode' + entirely), or provide a method `encoder_chain' which returns a list of + elements to be linked together in sequence. The first element in the + chain must have a pad named `sink' and the last must have a pad named + `src'. + """ + elts = me.encoder_chain() + bin = GS.Bin() + bin.add(*elts) + GS.element_link_many(*elts) + bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink'))) + bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src'))) + return bin + + def convert(me, master, id, target): + """ + Encode audio from MASTER, already identified as ID, writing it to TARGET. + + See `encoder' for subclasses' responsibilities. + """ + + ## Construct the necessary equipment. + pipe = GS.Pipeline() + bus = pipe.get_bus() + bus.add_signal_watch() + loop = G.MainLoop() + + ## Make sure that there isn't anything in the way of our output. We're + ## going to write to a scratch file so that we don't get confused by + ## half-written rubbish left by a crashed program. + new = target + '.new' + try: + OS.unlink(new) + except OSError, err: + if err.errno != E.ENOENT: + raise + + ## Piece together our pipeline. The annoying part is that the + ## `decodebin' doesn't have any source pads yet, so our chain is in two + ## halves for now. + source = make_element('filesrc', 'source', location = master) + decoder = make_element('decodebin', 'decode') + convert = make_element('audioconvert', 'convert') + encoder = me.encoder() + sink = make_element('filesink', 'sink', location = new) + pipe.add(source, decoder, convert, encoder, sink) + GS.element_link_many(source, decoder) + GS.element_link_many(convert, encoder, sink) + + ## Some decoders (e.g., the AC3 decoder) include channel-position + ## indicators in their output caps. The Vorbis encoder interferes with + ## this, and you end up with a beautifully encoded mono signal from a + ## stereo source. From a quick butchers at the `vorbisenc' source, I + ## /think/ that this is only a problem with stereo signals: mono signals + ## are mono already, and `vorbisenc' accepts channel positions if there + ## are more than two channels. + ## + ## So we have this bodge. We already collected the decoded audio caps + ## during identification. So if we see 2-channel audio with channel + ## positions, we strip the positions off forcibly by adding a filter. + if id.dcap.get_name().startswith('audio/x-raw-') and \ + id.dcap.has_field('channels') and \ + id.dcap['channels'] == 2 and \ + id.dcap.has_field('channel-positions'): + dcap = GS.Caps() + c = id.dcap.copy() + c.remove_field('channel-positions') + dcap.append(c) + else: + dcap = None + + ## Hook onto the `decodebin' so we can link together the two halves of + ## our encoding chain. For now, we'll hope that there's only one audio + ## stream in there, and just throw everything else away. + def decoder_pad_arrived(elt, pad): + if pad.get_caps()[0].get_name().startswith('audio/'): + if dcap: + elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap) + else: + elt.link_pads(pad.get_name(), convert, 'sink') + decoder.connect('pad-added', decoder_pad_arrived) + + ## Watch the bus for completion messages. + fail = [] + def bus_message(bus, msg): + if msg.type == GS.MESSAGE_ERROR: + fail[:] = (ValueError, msg.structure['debug'], None) + loop.quit() + elif msg.type == GS.MESSAGE_EOS: + loop.quit() + bmid = bus.connect('message', bus_message) + + ## Get everything ready and let it go. + pipe.set_state(GS.STATE_PLAYING) + with GStreamerProgressEyecandy(filestatus(master, + 'convert to %s' % me.NAME), + pipe): + loop.run() + pipe.set_state(GS.STATE_NULL) + if fail: + raise fail[0], fail[1], fail[2] + + ## Fix up the output file if we have to. + me.fixup(new) + + ## We're done. + OS.rename(new, target) + +class OggVorbisFormat (AudioFormat): + "AudioFormat object for Ogg Vorbis." + + ## From http://en.wikipedia.org/wiki/Vorbis + QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96), + ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192), + ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)] + + NAME = 'Ogg Vorbis' + MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg', + 'audio/x-vorbis+ogg']) + EXT = 'ogg' + + def encoder_chain(me): + for q, br in me.QMAP: + if br >= me.bitrate: + break + else: + raise ValueError, 'no suitable quality setting found' + return [make_element('vorbisenc', + quality = q/10.0), + make_element('oggmux')] + +defformat('ogg-vorbis', OggVorbisFormat) + +class MP3Format (AudioFormat): + "AudioFormat object for MP3." + + NAME = 'MP3' + MIMETYPES = set(['audio/mpeg']) + EXT = 'mp3' + + def encoder_chain(me): + return [make_element('lame', + vbr_mean_bitrate = me.bitrate, + vbr = 4), + make_element('xingmux'), + make_element('id3v2mux')] + + def fixup(me, path): + """ + Fix up MP3 files. + + GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily + unkind to stupid players. + """ + tag = E3.Tag() + tag.link(path) + tag.setTextEncoding(E3.UTF_8_ENCODING) + try: + tag.update(E3.ID3_V1_1) + except (UnicodeEncodeError, E3.tag.GenreException): + pass + +defformat('mp3', MP3Format) + +###-------------------------------------------------------------------------- +### Image handling, based on the Python Imaging Library. + +class ImageIdentifier (object): + """ + Analyses and identifies an image file. + + Simply leaves an Image object in the `img' property which can be inspected. + """ + + def __init__(me, file, mime): + + ## Get PIL to open the file. It will magically work out what kind of + ## file it is. + try: + me.img = I.open(file) + except IOError, exc: + + ## Unhelpful thing to raise on identification failure. We can + ## distinguish this from an actual I/O error because it doesn't have an + ## `errno'. + if exc.errno is None: + raise IdentificationFailure + raise + + me.mime = set([mime]) + +class ImageFormat (BaseFormat): + """ + An ImageFormat is a kind of Format specialized for image files. + + Subclasses don't need to provide anything other than the properties + required by all concrete Format subclasses. However, there is a + requirement that the `NAME' property match PIL's `format' name for the + format. + """ + + PROPS = prop('size', Num) + CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier) + + def __init__(me, size = None, **kw): + """ + Initialize an ImageFormat object. + + Additional keywords are used when encoding, and may be recognized by + enhanced `check' methods in subclasses. + """ + me._size = size + me._props = kw + + def check(me, id): + "Check whether the ImageIdentifier ID matches our requirements." + return id.img.format == me.NAME and \ + (me._size is None or + (id.img.size[0] <= me._size and + id.img.size[1] <= me._size)) + + def convert(me, master, id, target): + "Encode the file MASTER, identified as ID, writing the result to TARGET." + + ## Write to a scratch file. + new = target + '.new' + + ## The ImageIdentifier already contains a copy of the open file. It + ## would be wasteful not to use it. + img = id.img + STATUS.set(filestatus(master, 'convert to %s' % me.NAME)) + + ## If there's a stated maximum size then scale the image down to match. + ## But thumbnailing clobbers the original, so take a copy. + if me._size is not None and \ + (img.size[0] > me._size or img.size[1] > me._size): + img = img.copy() + img.thumbnail((me._size, me._size), I.ANTIALIAS) + + ## Write the output image. + img.save(new, me.NAME, **me._props) + + ## Fix it up if necessary. + me.fixup(new) + + ## We're done. + OS.rename(new, target) + STATUS.commit() + +class JPEGFormat (ImageFormat): + """ + Image format for JPEG (actually JFIF) files. + + Interesting properties to set: + + optimize + If present, take a second pass to select optimal encoder settings. + + progression + If present, make a progressive file. + + quality Integer from 1--100 (worst to best); default is 75. + """ + EXT = 'jpg' + NAME = 'JPEG' + PROPS = prop('optimize', None) \ + | prop('progressive', None, 'progression') \ + | prop('quality', Num) + +defformat('jpeg', JPEGFormat) + +class PNGFormat (ImageFormat): + """ + Image format for PNG files. + + Interesting properties: + + optimize + If present, make a special effort to minimize the output file. + """ + EXT = 'png' + NAME = 'PNG' + PROPS = prop('optimize', None) + +defformat('png', PNGFormat) + +class BMPFormat (ImageFormat): + """ + Image format for Windows BMP files, as used by RockBox. + + No additional properties. + """ + NAME = 'BMP' + EXT = 'bmp' + +defformat('bmp', BMPFormat) + +###-------------------------------------------------------------------------- +### The directory grobbler. + +class Grobbler (object): + """ + The directory grobbler copies a directory tree, converting files. + """ + + def __init__(me, policies, noact = False): + """ + Create a new Grobbler, working with the given POLICIES. + """ + me._pmap = {} + me._noact = noact + for p in policies: + me._pmap.setdefault(p.cat, []).append(p) + me._dirs = [] + + def _grobble_file(me, master, targetdir, cohorts): + """ + Convert MASTER, writing the result to TARGETDIR. + + The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is a + list of (FILENAME, ID) pairs. + + Since this function might convert the MASTER file, the caller doesn't + know the name of the output files, so we return then as a list. + """ + + done = set() + st_m = OS.stat(master) + + ## Work through each category listed and apply its policy. + for cat, id, cohort in cohorts: + + ## Go through the category's policies and see if any match. If we fail + ## here, see if there are more categories to try. + for pol in me._pmap[cat]: + acts = pol.actions(master, targetdir, id, cohort) + if acts: break + else: + continue + + ## Work through the targets one by one. + for a in acts: + done.add(a.target) + + ## Find out whether the target file already exists and is up-to-date + ## with respect to the master. (Caution here with low-resolution + ## timestamps.) If it's OK, then just move on. + try: + st_t = OS.stat(a.target) + if st_m.st_mtime < st_t.st_mtime or \ + (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev): + continue + except OSError, err: + if err.errno not in (E.ENOENT, E.ENOTDIR): + raise + + ## We have real work to do. If there's a current status message, + ## it's the containing directory so flush it so that people know + ## where we are. + STATUS.commit() + + ## Remove the target. (A hardlink will fail if the target already + ## exists.) + if not me._noact: + try: + OS.unlink(a.target) + except OSError, err: + if err.errno not in (E.ENOENT, E.ENOTDIR): + raise + + ## Do whatever it is we decided to do. + if me._noact: + STATUS.commit(filestatus(master, a)) + else: + a.perform() + + ## We're done. Return the names of the targets. + return list(done) + + @contextmanager + def _wrap(me, masterfile): + """ + Handle exceptions found while trying to convert a particular file or + directory. + """ + + try: + yield masterfile + + ## Something bad happened. Report the error, but continue. (This list + ## of exceptions needs a lot of work.) + except (IOError, OSError), exc: + STATUS.clear() + STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc)) + me._broken.append((masterfile, exc)) + + def _grobble_dir(me, master, target): + """ + Recursively convert files in MASTER, writing them to TARGET. + """ + + ## Make sure the TARGET exists and is a directory. It's a fundamental + ## assumption of this program that the entire TARGET tree is disposable, + ## so if something exists but isn't a directory, we should kill it. + if OS.path.isdir(target): + pass + else: + if OS.path.exists(target): + STATUS.commit(filestatus(target, 'clear nondirectory')) + if not me._noact: + OS.unlink(target) + STATUS.commit(filestatus(target, 'create directory')) + if not me._noact: + OS.mkdir(target) + + ## Keep a list of things in the target. As we convert files, we'll check + ## them off. Anything left over is rubbish and needs to be deleted. + checklist = {} + try: + for i in OS.listdir(target): + checklist[i] = False + except OSError, err: + if err.errno not in (E.ENOENT, E.ENOTDIR): + raise + + ## Keep track of the files in each category. + catmap = {} + todo = [] + done = [] + + ## Work through the master files. + for f in sorted(OS.listdir(master)): + + ## If the killswitch has been pulled then stop. The whole idea is that + ## we want to cause a clean shutdown if possible, so we don't want to + ## do it in the middle of encoding because the encoding effort will + ## have been wasted. This is the only place we need to check. If + ## we've exited the loop, then clearing old files will probably be + ## fast, and we'll either end up here when the recursive call returns + ## or we'll be in the same boat as before, clearing old files, only up + ## a level. If worst comes to worst, we'll be killed forcibly + ## somewhere inside `SH.rmtree', and that can continue where it left + ## off. + if KILLSWITCH.is_set(): + return + + ## Do something with the file. + with me._wrap(OS.path.join(master, f)) as masterfile: + + ## If it's a directory then grobble it recursively. Keep the user + ## amused by telling him where we are in the tree. + if OS.path.isdir(masterfile): + me._dirs.append(f) + STATUS.set('/'.join(me._dirs)) + try: + done += me._grobble_dir(masterfile, OS.path.join(target, f)) + finally: + me._dirs.pop() + STATUS.set('/'.join(me._dirs)) + + ## Otherwise it's a file. Work out what kind, and stash it under + ## the appropriate categories. Later, we'll apply policy to the + ## files, by category, and work out what to do with them all. + else: + gf = GIO.File(masterfile) + mime = gf.query_info('standard::content-type').get_content_type() + cats = [] + for cat in me._pmap.iterkeys(): + id = cat.identify(masterfile, mime) + if id is None: continue + catmap.setdefault(cat, []).append((masterfile, id)) + cats.append((cat, id)) + if not cats: + catmap.setdefault(None, []).append((masterfile, id)) + todo.append((masterfile, cats)) + + ## Work through the categorized files to see what actions to do for + ## them. + for masterfile, cats in todo: + with me._wrap(masterfile): + done += me._grobble_file(masterfile, target, + [(cat, id, catmap[cat]) + for cat, id in cats]) + + ## Check the results off the list so that we don't clear it later. + for f in done: + checklist[OS.path.basename(f)] = True + + ## Maybe there's stuff in the target which isn't accounted for. Delete + ## it: either the master has changed, or the policy for this target has + ## changed. Either way, the old files aren't wanted. + for f in checklist: + if not checklist[f]: + STATUS.commit(filestatus(f, 'clear bogus file')) + if not me._noact: + bogus = OS.path.join(target, f) + try: + if OS.path.isdir(bogus): + SH.rmtree(bogus) + else: + OS.unlink(bogus) + except OSError, err: + if err.errno != E.ENOENT: + raise + + ## Return the target name, so that it can be checked off. + return [target] + + def grobble(me, master, target): + """ + Convert MASTER, writing a directory tree TARGET. + + Returns a list of files which couldn't be converted. + """ + try: + me._broken = [] + me._grobble_dir(master, target) + return me._broken + finally: + del me._broken + +###-------------------------------------------------------------------------- +### Remaining parsing machinery. + +Type = K('type') - Name - D('{') - R(Policy) - D('}') +def build_type(s, l, t): + try: + cat = CATEGORYMAP[t[0]] + except KeyError: + raise P.ParseException(s, loc, "Unknown category `%s'" % t[0]) + pols = t[1] + if len(pols) == 1: pol = pols[0] + else: pol = AndPolicy(pols) + pol.setcategory(cat) + return pol +Type.setParseAction(build_type) + +TARGETS = [] +class TargetJob (object): + def __init__(me, targetdir, policies): + me.targetdir = targetdir + me.policies = policies + def perform(me): + TARGETS.append(me) + +Target = K('target') - String - D('{') - R(Type) - D('}') +def build_target(s, l, t): + return TargetJob(t[0], t[1]) +Target.setParseAction(build_target) + +VARS = { 'master': None } +class VarsJob (object): + def __init__(me, vars): + me.vars = vars + def perform(me): + for k, v in me.vars: + VARS[k] = v + +Var = prop('master', String) +Vars = K('vars') - D('{') - R(Var) - D('}') +def build_vars(s, l, t): + return VarsJob(t[0]) +Vars.setParseAction(build_vars) + +TopLevel = Vars | Target +Config = R(TopLevel) +Config.ignore(P.pythonStyleComment) + +###-------------------------------------------------------------------------- +### Command-line interface. + +QUIS = OS.path.basename(SYS.argv[0]) + +def moan(msg): + "Report a warning message to the user." + SYS.stderr.write('%s: %s\n' % (QUIS, msg)) + +def die(msg): + "Report a fatal error message to the user." + moan(msg) + SYS.exit(1) + +def parse_opts(args): + """ + Parse command-line arguments in ARGS. + + Returns a Grobbler object and the MASTER and TARGET directories to be + grobbled. + """ + + ## Build the option parser object. + op = OP.OptionParser(prog = QUIS, version = VERSION, + usage = '%prog [-t TIMEOUT] CONFIG', + description = """\ +Convert a directory tree of files according to the configuration file +CONFIG. +""") + + ## Timeout handling. + def cb_time(opt, ostr, arg, op): + m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg) + if not m: + raise OP.OptionValueerror, 'bad time value `%s\'' % arg + t, u = m.groups() + t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u] + setattr(op.values, opt.dest, t) + op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS', + dest = 'timeout', + help = 'stop processing nicely after SECS', + action = 'callback', callback = cb_time) + op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS', + dest = 'timeout_nasty', + help = 'stop processing unpleasantly after further SECS', + action = 'callback', callback = cb_time) + + ## Other options. + op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty', + help = 'provide progress information') + op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact', + help = 'don\'t actually modify the filesystem') + + ## Ready to rock. + op.set_defaults(formats = [], noact = False, + timeout = None, timeout_nasty = 300) + opts, args = op.parse_args(args) + + ## Check that we got the non-option arguments that we want. + if len(args) != 1: + op.error('wrong number of arguments') + + ## Act on the options. + if opts.tty: + STATUS.eyecandyp = True + if opts.timeout is not None: + to = TH.Thread(target = timeout, + args = (opts.timeout, opts.timeout_nasty)) + to.daemon = True + to.start() + + ## Parse the configuration file. + with open(args[0]) as conf: + jobs, = Config.parseFile(conf, True) + for j in jobs: + j.perform() + + return opts + +if __name__ == '__main__': + opts = parse_opts(SYS.argv[1:]) + if 'master' not in VARS: + die("no master directory set") + broken = [] + for t in TARGETS: + g = Grobbler(t.policies, opts.noact) + b = g.grobble(VARS['master'], t.targetdir) + broken += b + if broken: + moan('failed to convert some files:') + for file, exc in broken: + moan('%s: %s' % (file, exc)) + SYS.exit(1) + + ## This is basically a successful completion: we did what we were asked to + ## do. It seems polite to report a message, though. + ## + ## Why don't we have a nonzero exit status? The idea would be that a + ## calling script would be interested that we used up all of our time, and + ## not attempt to convert some other directory as well. But that doesn't + ## quite work. Such a script would need to account correctly for time we + ## had spent even if we complete successfully. And if the script is having + ## to watch the clock itself, it can do that without our help here. + if KILLSWITCH.is_set(): + moan('killed by timeout') + +###----- That's all, folks -------------------------------------------------- diff --git a/make-multidisc-playlists/m3u-extinf b/make-multidisc-playlists/m3u-extinf new file mode 100755 index 0000000..46f33ff --- /dev/null +++ b/make-multidisc-playlists/m3u-extinf @@ -0,0 +1,13 @@ +#! /usr/bin/python + +from sys import argv +import mutagen as MG +__import__(MG.__name__, fromlist = ['mp3', 'easyid3']) + +f, = argv[1:] + +t = MG.File(f) +if type(t) == MG.mp3.MP3: + t = MG.mp3.MP3(f, ID3 = MG.easyid3.EasyID3) + +print (u'#EXTINF %d,%s - %s' % (-1, t['artist'][0], t['title'][0])).encode('utf-8') diff --git a/make-multidisc-playlists/make-multidisc-playlists b/make-multidisc-playlists/make-multidisc-playlists new file mode 100755 index 0000000..e1e7a55 --- /dev/null +++ b/make-multidisc-playlists/make-multidisc-playlists @@ -0,0 +1,20 @@ +#! /bin/sh -e + +cd $HOME/jb +find . -type f -name '?-??. *' -print | sed 's:/[^/]*$::' | uniq | +while read d; do + (cd "$d" + p=$(echo "$d" | sed 's:^.*/\([^/]*\)/\([^/]*\)$:\1 - \2.m3u:') + updatep=nil + for f in ?-??.*; do + if [ ! "$p" -nt "$f" ]; then updatep=t; break; fi + done + case $updatep in nil) continue ;; esac + { echo "#EXTM3U" + for f in ?-??.*; do m3u-extinf "$f"; echo "$f"; done + } >"$p.new" + rm -f *.m3u + mv "$p".new "$p" + echo "$d/$p") +done + diff --git a/misc/ab-chop b/misc/ab-chop new file mode 100755 index 0000000..e0b8aa3 --- /dev/null +++ b/misc/ab-chop @@ -0,0 +1,240 @@ +#! /usr/bin/python +### +### A simple program for doing blind A/B audio comparisons +### +### (c) 2010 Mark Wooding +### + +###----- Licensing notice --------------------------------------------------- +### +### This program is free software; you can redistribute it and/or modify +### it under the terms of the GNU General Public License as published by +### the Free Software Foundation; either version 2 of the License, or +### (at your option) any later version. +### +### This program is distributed in the hope that it will be useful, +### but WITHOUT ANY WARRANTY; without even the implied warranty of +### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +### GNU General Public License for more details. +### +### You should have received a copy of the GNU General Public License +### along with this program; if not, write to the Free Software Foundation, +### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +###----- Usage -------------------------------------------------------------- +### +### The command line syntax is: +### +### ab-chop INPUT CAPS OUTPUT PIPELINE... +### +### This means that we should read INPUT, decode it (using a GStreamer +### `decodebin', so it should be able to handle most things you care to throw +### at it), and then re-encode it according to each PIPELINE in turn, decode +### /that/ again, and stash the resulting raw PCM data. When we've finished, +### we line up the PCM data streams side-by-side, chop them into chunks, and +### then stitch chunks from randomly chosen streams together to make a new +### PCM stream. Finally, we encode that mixed-up stream as FLAC, and write +### it to OUTPUT. It also writes a file OUTPUT.sequence which is a list of +### numbers indicating which pipeline each chunk of the original came from. +### +### The motivation is that we want to test encoder quality. So you take a +### reference source (as good as you can find), and use that as your INPUT. +### You then write GStreamer pipeline fragments for the encoders you want to +### compare; say `identity' if you want the unmodified original reference to +### be mixed in. +### +### The only tricky bit is the CAPS, which is a GStreamer capabilities string +### describing the raw PCM format to use as an intermediate representation. +### (This is far too low-level and cumbersome for real use, but it's OK for +### now.) You need to say something like +### +### audio/x-raw-int,width=16,rate=44100,channels=2,depth=16, +### endianness=1234,signed=true +### +### for standard CD audio. + +###-------------------------------------------------------------------------- +### External dependencies. + +## Standard Python libraries. +import sys as SYS +import os as OS +import shutil as SH +import fnmatch as FN +import random as R + +SR = R.SystemRandom() + +## GObject and GStreamer. +import gobject as G +import gst as GS + +###-------------------------------------------------------------------------- +### GStreamer utilities. + +def link_on_demand(src, sink, sinkpad = None, cap = None): + """ + Link SINK to SRC when a pad appears. + + More precisely, when SRC reports that a pad with media type matching the + `fnmatch' pattern CAP has appeared, link the pad of SINK named SINKPAD (or + some sensible pad by default). + """ + def _link(src, srcpad): + if cap is None or FN.fnmatchcase(srcpad.get_caps()[0].get_name(), cap): + src.link_pads(srcpad.get_name(), sink, sinkpad) + src.connect('pad-added', _link) + +def make_element(factory, name = None, **props): + """ + Return an element made by FACTORY with properties specified by PROPS. + """ + elt = GS.element_factory_make(factory, name) + elt.set_properties(**props) + return elt + +def dump_pipeline(pipe, indent = 0): + done = {} + q = [] + for e in pipe.iterate_sources(): + q = [e] + while q: + e, q = q[0], q[1:] + if e in done: + continue + done[e] = True + print + print '%s%s %s' % (' '*indent, type(e).__name__, e.get_name()) + for p in e.pads(): + c = p.get_negotiated_caps() + peer = p.get_peer() + print '%s Pad %s %s (%s)' % \ + (' '*(indent + 1), + p.get_name(), + peer and ('<-> %s.%s' % (peer.get_parent().get_name(), + peer.get_name())) + or 'unconnected', + c and c.to_string() or 'no-negotiated-caps') + if peer: + q.append(peer.get_parent()) + if isinstance(e, GS.Bin): + dump_pipeline(e, indent + 1) + +def run_pipe(pipe, what): + """ + Run a GStreamer pipeline PIPE until it finishes. + """ + loop = G.MainLoop() + bus = pipe.get_bus() + bus.add_signal_watch() + def _bus_message(bus, msg): + if msg.type == GS.MESSAGE_ERROR: + SYS.stderr.write('error from pipeline: %s\n' % msg) + SYS.exit(1) + elif msg.type == GS.MESSAGE_STATE_CHANGED and \ + msg.src == pipe and \ + msg.structure['new-state'] == GS.STATE_PAUSED: + dump_pipeline(pipe) + elif msg.type == GS.MESSAGE_EOS: + loop.quit() + bus.connect('message', _bus_message) + + pipe.set_state(GS.STATE_PLAYING) + loop.run() + GS.DEBUG_BIN_TO_DOT_FILE(pipe, 3, what) + pipe.set_state(GS.STATE_NULL) + +###-------------------------------------------------------------------------- +### Main program. + +## Read the command line arguments. +input = SYS.argv[1] +caps = GS.caps_from_string(SYS.argv[2]) +output = SYS.argv[3] + +## We want a temporary place to keep things. This provokes a warning, but +## `mkdir' is atomic and sane so it's not a worry. +tmp = OS.tmpnam() +OS.mkdir(tmp) +try: + + ## First step: produce raw PCM files from the original source and the + ## requested encoders. + q = 0 + temps = [] + for i in SYS.argv[4:]: + temp = OS.path.join(tmp, '%d.raw' % q) + temps.append(temp) + pipe = GS.Pipeline() + origin = make_element('filesrc', location = input) + decode_1 = make_element('decodebin') + convert_1 = make_element('audioconvert') + encode = GS.parse_bin_from_description(i, True) + decode_2 = make_element('decodebin') + convert_2 = make_element('audioconvert') + target = make_element('filesink', location = temp) + pipe.add(origin, decode_1, convert_1, encode, + decode_2, convert_2, target) + origin.link(decode_1) + link_on_demand(decode_1, convert_1) + ##convert_1.link(encode, GS.caps_from_string('audio/x-raw-float, channels=2')) + convert_1.link(encode) + encode.link(decode_2) + link_on_demand(decode_2, convert_2) + convert_2.link(target, caps) + + run_pipe(pipe, 'input-%d' % q) + del pipe + print 'done %s' % i + q += 1 + step = 1763520 + lens = [OS.stat(i).st_size for i in temps] + blocks = (max(*lens) + step - 1)//step + while True: + seq = [] + done = {} + for i in xrange(blocks): + j = SR.randrange(q) + done[j] = True + seq.append(j) + ok = True + for i in xrange(q): + if i not in done: + ok = False + break + if ok: + break + ff = [open(i, 'rb') for i in temps] + mix = OS.path.join(tmp, 'mix.raw') + out = open(mix, 'wb') + pos = 0 + for i in seq: + f = ff[i] + f.seek(pos) + buf = f.read(step) + out.write(buf) + if len(buf) < step: + break + pos += step + out.close() + for f in ff: + f.close() + + f = open(output + '.sequence', 'w') + f.write(', '.join([str(i) for i in seq]) + '\n') + f.close() + + pipe = GS.Pipeline() + origin = make_element('filesrc', location = mix) + convert = make_element('audioconvert') + encode = make_element('flacenc', quality = 8) + target = make_element('filesink', location = output) + pipe.add(origin, convert, encode, target) + origin.link(convert, caps) + GS.element_link_many(convert, encode, target) + + run_pipe(pipe, 'output') + del pipe + print 'all done' +finally: + SH.rmtree(tmp) diff --git a/misc/catalogue b/misc/catalogue new file mode 100755 index 0000000..56f3024 --- /dev/null +++ b/misc/catalogue @@ -0,0 +1,60 @@ +#! /bin/sh -e + +case $# in 0) set -- . ;; esac + +parse_vorbis_comment () { + tag=$1 + + while read value; do + case "$value" in + artist=* | album=* | musicbrainz_albumid=*) + label=${value%%=*} + eval $label=\${value#*=} + eval have_$label=t + ;; + esac + done <