mdw@git.distorted.org.uk Git - autoys/blob - gremlin/gremlin.in

   1 #! @PYTHON@
   2 ###
   3 ### Convert a directory tree of audio files
   4 ###
   5 ### (c) 2010 Mark Wooding
   6 ###
   7
   8 ###----- Licensing notice ---------------------------------------------------
   9 ###
  10 ### This file is part of the `autoys' audio tools collection.
  11 ###
  12 ### `autoys' is free software; you can redistribute it and/or modify
  13 ### it under the terms of the GNU General Public License as published by
  14 ### the Free Software Foundation; either version 2 of the License, or
  15 ### (at your option) any later version.
  16 ###
  17 ### `autoys' is distributed in the hope that it will be useful,
  18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 ### GNU General Public License for more details.
  21 ###
  22 ### You should have received a copy of the GNU General Public License
  23 ### along with `autoys'; if not, write to the Free Software Foundation,
  24 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25
  26 ###--------------------------------------------------------------------------
  27 ### External dependencies.
  28
  29 ## Language features.
  30 from __future__ import with_statement
  31
  32 ## Standard Python libraries.
  33 import sys as SYS
  34 import os as OS
  35 import errno as E
  36 import time as T
  37 import unicodedata as UD
  38 import fnmatch as FN
  39 import re as RX
  40 import shutil as SH
  41 import optparse as OP
  42 import threading as TH
  43 import shlex as L
  44 from math import sqrt
  45 from contextlib import contextmanager
  46
  47 ## eyeD3 tag fettling.
  48 import eyed3 as E3
  49
  50 ## Gstreamer.  It picks up command-line arguments -- most notably `--help' --
  51 ## and processes them itself.  Of course, its help is completely wrong.  This
  52 ## kludge is due to Jonas Wagner.
  53 _argv, SYS.argv = SYS.argv, []
  54 import gobject as G
  55 import gio as GIO
  56 import gst as GS
  57 SYS.argv = _argv
  58
  59 ## Python Imaging.
  60 from PIL import Image as I
  61
  62 ## Python parsing.
  63 import pyparsing as P
  64
  65 ###--------------------------------------------------------------------------
  66 ### Special initialization.
  67
  68 VERSION = '@VERSION@'
  69
  70 ## GLib.
  71 G.threads_init()
  72
  73 ###--------------------------------------------------------------------------
  74 ### Eyecandy progress reports.
  75
  76 def charwidth(s):
  77   """
  78   Return the width of S, in characters.
  79
  80   Specifically, this is the number of backspace characters required to
  81   overprint the string S.  If the current encoding for `stdout' appears to be
  82   Unicode then do a complicated Unicode thing; otherwise assume that
  83   characters take up one cell each.
  84
  85   None of this handles tab characters in any kind of useful way.  Sorry.
  86   """
  87
  88   ## If there's no encoding for stdout then we're doing something stupid.
  89   if SYS.stdout.encoding is None: return len(s)
  90
  91   ## Turn the string into Unicode so we can hack on it properly.  Maybe that
  92   ## won't work out, in which case fall back to being stupid.
  93   try: u = s.decode(SYS.stdout.encoding)
  94   except UnicodeError: return len(s)
  95
  96   ## Our main problem is combining characters, but we should also try to
  97   ## handle wide (mostly Asian) characters, and zero-width ones.  This hack
  98   ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
  99   w = 0
 100   for ch in u:
 101     cd = ord(ch)
 102     if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
 103           0x1160 <= cd <= 0x11ff: pass
 104     elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
 105     else: w += 1
 106
 107   ## Done.
 108   return w
 109
 110 class StatusLine (object):
 111   """
 112   Maintains a status line containing ephemeral progress information.
 113
 114   The status line isn't especially important, but it keeps interactive users
 115   amused.
 116
 117   There should be only one status line object in your program; otherwise
 118   they'll interfere with each other and get confused.
 119
 120   The update algorithm (in `set') is fairly careful to do the right thing
 121   with long status `lines', and to work properly in an Emacs `shell' buffer.
 122   """
 123
 124   def __init__(me):
 125     "Initialize the status line."
 126     me._last = ''
 127     me._lastlen = 0
 128     me.eyecandyp = OS.isatty(SYS.stdout.fileno())
 129
 130   def set(me, line):
 131     """
 132     Set the status line contents to LINE, replacing what was there before.
 133
 134     This only produces actual output if stdout is interactive.
 135     """
 136     n = len(line)
 137
 138     ## Eyecandy update.
 139     if me.eyecandyp:
 140
 141       ## If the old line was longer, we need to clobber its tail, so work out
 142       ## what that involves.
 143       if n < me._lastlen:
 144         b = charwidth(me._last[n:])
 145         pre = '\b'*b + ' '*b
 146       else:
 147         pre = ''
 148
 149       ## Now figure out the length of the common prefix between what we had
 150       ## before and what we have now.  This reduces the amount of I/O done,
 151       ## which keeps network traffic down on SSH links, and keeps down the
 152       ## amount of work slow terminal emulators like Emacs have to do.
 153       i = 0
 154       m = min(n, me._lastlen)
 155       while i < m and line[i] == me._last[i]:
 156         i += 1
 157
 158       ## Actually do the output, all in one syscall.
 159       b = charwidth(me._last[i:])
 160       SYS.stdout.write(pre + '\b'*b + line[i:])
 161       SYS.stdout.flush()
 162
 163     ## Update our idea of what's gone on.
 164     me._lastlen = n
 165     me._last = line
 166
 167   def clear(me):
 168     "Clear the status line.  Just like set('')."
 169     me.set('')
 170
 171   def commit(me, line = None):
 172     """
 173     Commit the current status line, and maybe the string LINE.
 174
 175     If the current status line is nonempty, then commit it to the transcript.
 176     If LINE is not None, then commit that to the transcript too.
 177
 178     After all of this, we clear the status line to get back to a clean state.
 179     """
 180     if me._last:
 181       if me.eyecandyp:
 182         SYS.stdout.write('\n')
 183       else:
 184         SYS.stdout.write(me._last + '\n')
 185     if line is not None:
 186       SYS.stdout.write(line + '\n')
 187     me._lastlen = 0
 188     me._last = ''
 189
 190 STATUS = StatusLine()
 191
 192 def filestatus(file, status):
 193   return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
 194
 195 class ProgressEyecandy (object):
 196   """
 197   Provide amusement while something big and complicated is happening.
 198
 199   This is an abstract class.  Subclasses must provide a method `progress'
 200   returning a pair (CURRENT, MAX) indicating the current progress through the
 201   operation.
 202   """
 203
 204   def __init__(me, what, silentp = False):
 205     """
 206     Initialize a progress meter.
 207
 208     WHAT is a prefix string to be written before the progress eyecandy
 209     itself.
 210     """
 211     me._what = what
 212     me._silentp = silentp
 213     me._spinner = 0
 214     me._start = T.time()
 215
 216   def _fmt_time(me, t):
 217     "Format T as a time, in (maybe hours) minutes and seconds."
 218     s, t = t % 60, int(t/60)
 219     m, h = t % 60, int(t/60)
 220     if h > 0:
 221       return '%d:%02d:%02d' % (h, m, s)
 222     else:
 223       return '%02d:%02d' % (m, s)
 224
 225   def show(me):
 226     "Show the current level of progress."
 227
 228     ## If we're not showing pointless frippery, don't bother at all.
 229     if not STATUS.eyecandyp:
 230       return
 231
 232     ## Update the spinner index.
 233     me._spinner = (me._spinner + 1)%4
 234
 235     ## Fetch the current progress information.  Note that we always fetch
 236     ## both the current and maximum levels, because both might change if an
 237     ## operation revises its idea of how much work needs doing.
 238     cur, max = me.progress()
 239
 240     ## If we couldn't get progress information, display something vaguely
 241     ## amusing anyway.
 242     if cur is None or max is None:
 243       STATUS.set('%s %c [unknown progress]' %
 244                  (me._what, r'/-\|'[me._spinner]))
 245       return
 246
 247     ## Work out -- well, guess -- the time remaining.
 248     if cur:
 249       t = T.time()
 250       eta = me._fmt_time((t - me._start)*(max - cur)/cur)
 251     else:
 252       eta = '???'
 253
 254     ## Set the status bar.
 255     n = 40*cur/max
 256     STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
 257                (me._what,
 258                 r'/-\|'[me._spinner],
 259                 '='*n, ' '*(40 - n),
 260                 100*cur/max,
 261                 eta))
 262
 263   def done(me, win = True):
 264     "Show a completion notice, or a failure if WIN is false."
 265     if not win:
 266       STATUS.set('%s FAILED!' % me._what)
 267     elif not me._silentp:
 268       STATUS.set('%s done (%s)' %
 269                  (me._what,
 270                   me._fmt_time(T.time() - me._start)))
 271     else:
 272       return
 273     STATUS.commit()
 274
 275 ###--------------------------------------------------------------------------
 276 ### Timeout handling.
 277
 278 KILLSWITCH = TH.Event()
 279
 280 def timeout(t0, t1):
 281   T.sleep(t0)
 282   KILLSWITCH.set()
 283   T.sleep(t1)
 284   moan('dying messily due to timeout')
 285   OS._exit(3)
 286
 287 ###--------------------------------------------------------------------------
 288 ### Parsing utilities.
 289
 290 ## Allow hyphens in identifiers.
 291 IDCHARS = P.alphanums + '-_'
 292 P.Keyword.setDefaultKeywordChars(IDCHARS)
 293
 294 ## Some common kinds of tokens.
 295 Name = P.Word(IDCHARS)
 296 Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
 297 String = P.QuotedString('"', '\\')
 298
 299 ## Handy abbreviations for constructed parser elements.
 300 def K(k): return P.Keyword(k).suppress()
 301 def D(d): return P.Literal(d).suppress()
 302 def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
 303 O = P.Optional
 304
 305 ###--------------------------------------------------------------------------
 306 ### Format identification and conversion.
 307
 308 class IdentificationFailure (Exception):
 309   pass
 310
 311 class FileCategory (object):
 312   """
 313   A FileCategory represents a class of files.
 314
 315   For example, it's sensible to consider audio, or image files as a
 316   category.  A file category knows how to recognize member files from
 317   MIME content types.
 318   """
 319
 320   def __init__(me, name, mime_pats, ident):
 321     """
 322     Construct a new category.
 323
 324     The PATS are a list of `fnmatch' patterns to be compared with a MIME
 325     type.  The IDENT is a function which produces an identification object
 326     given a file's name and first-guess MIME type.  The object is passed to a
 327     Format's `check' method to see whether a file needs re-encoding, and to
 328     `convert' to assist with the conversion.
 329
 330     An identification object must have an attribute `mime' which is a set of
 331     possible MIME types accumulated for the object.
 332     """
 333     me.name = name
 334     me._mime_pats = mime_pats
 335     me._ident = ident
 336     CATEGORYMAP[name] = me
 337
 338   def identify(me, file, mime):
 339     """
 340     Attempt to identify FILE, given its apparent MIME type.
 341
 342     If identification succeeds, return an identification object which can be
 343     used by associated file formats; otherwise return None.
 344     """
 345     for p in me._mime_pats:
 346       if not FN.fnmatchcase(mime, p):
 347         continue
 348       try:
 349         return me._ident(file, mime)
 350       except IdentificationFailure:
 351         pass
 352     return None
 353
 354 class BaseFormat (object):
 355   """
 356   A BaseFormat object represents a particular encoding and parameters.
 357
 358   The object can verify (the `check' method) whether a particular file
 359   matches its requirements, and if necessary (`encode') re-encode a file.
 360
 361   Subclasses should define the following methods.
 362
 363   check(ID)
 364           Answer whether the file identified by ID is acceptable according to
 365           the receiver's parameters.
 366
 367   convert(MASTER, ID, TARGET)
 368           Convert the file MASTER, which has been identified as ID, according
 369           to the receiver's parameters, writing the output to TARGET.
 370
 371   Subclasses should also provide these attributes.
 372
 373   CATEGORY
 374           A FileCategory object for the category of files that this format
 375           lives within.
 376
 377   EXT     A file extension to be applied to encoded output files.
 378
 379   NAME    A user-facing name for the format.
 380
 381   PROPS   A parser element to parse a property definition.  It should produce
 382           a pair NAME, VALUE to be stored in a dictionary.
 383
 384   Subclasses for different kinds of file may introduce more subclass
 385   protocol.
 386   """
 387
 388   def fixup(me, path):
 389     """Post-encoding fixups."""
 390     pass
 391
 392 FORMATMAP = {}
 393 CATEGORYMAP = {}
 394
 395 def defformat(name, cls):
 396   "Define a format NAME using class CLS."
 397   if not hasattr(cls, 'NAME'):
 398     raise ValueError, 'abstract class'
 399   if not hasattr(cls, 'CATEGORY'):
 400     raise ValueError, 'no category'
 401   FORMATMAP[name] = cls
 402
 403 class FormatParser (P.ParserElement):
 404   """
 405   Parse a format specifier:
 406
 407   format-spec ::= string [format-properties]
 408   format-properties ::= `{' format-property (`,' format-property)* `}'
 409
 410   The syntax of a format-property is determined by the PROPS attribute on the
 411   named format and its superclasses.
 412   """
 413
 414   ## We cache the parser elements we generate to avoid enormous consing.
 415   CACHE = {}
 416
 417   def parseImpl(me, s, loc, actp = True):
 418
 419     ## Firstly, determine the format name.
 420     loc, r = Name._parse(s, loc, actp)
 421     fmt = r[0]
 422
 423     ## Look up the format class.
 424     try: fcls = FORMATMAP[fmt]
 425     except KeyError:
 426       raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
 427
 428     ## Fetch the property-list parser from the cache, if possible; else
 429     ## construct it.
 430     try:
 431       pp = me.CACHE[fmt]
 432     except KeyError:
 433       seen = set()
 434       prop = None
 435       for c in fcls.mro():
 436         try: p = c.PROPS
 437         except AttributeError: continue
 438         if p in seen: continue
 439         if prop is None: prop = p
 440         else: prop |= p
 441         seen.add(p)
 442       if prop is None:
 443         pp = me.CACHE[fmt] = None
 444       else:
 445         props = P.delimitedList(prop)
 446         props.setParseAction(lambda s, l, t: dict(t.asList()))
 447         pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
 448
 449     ## Parse the properties.
 450     if pp is None:
 451       pd = {}
 452     else:
 453       loc, r = pp._parse(s, loc, actp)
 454       if r: pd = r[0]
 455       else: pd = {}
 456
 457     ## Construct the format object and return it.
 458     return loc, fcls(**pd)
 459
 460 Format = FormatParser()
 461
 462 def prop(kw, pval, tag = None):
 463   if tag is None: tag = kw
 464   if pval is None:
 465     p = K(kw)
 466     p.setParseAction(lambda s, l, t: (tag, True))
 467   else:
 468     p = K(kw) + D('=') + pval
 469     p.setParseAction(lambda s, l, t: (tag, t[0]))
 470   return p
 471
 472 ###--------------------------------------------------------------------------
 473 ### Policies and actions.
 474
 475 class Action (object):
 476   """
 477   An Action object represents a conversion action to be performed.
 478
 479   This class isn't intended to be instantiated directly.  It exists to define
 480   some protocol common to all Action objects.
 481
 482   Action objects have the following attributes.
 483
 484   master        The name of the master (source) file.
 485
 486   target        The name of the target (destination) file.
 487
 488   PRIORITY      The priority of the action, for deciding which of two actions
 489                 to perform.  Higher priorities are more likely to win.
 490
 491   Converting an Action to a string describes the action in a simple
 492   user-readable manner.  The `perform' method actually carries the action
 493   out.
 494   """
 495
 496   PRIORITY = 0
 497
 498   def __init__(me, master):
 499     "Stash the MASTER file name for later."
 500     me.master = master
 501
 502   def choose(me, him):
 503     "Choose either ME or HIM and return one."
 504     if him is None or me.PRIORITY > him.PRIORITY:
 505       return me
 506     else:
 507       return him
 508
 509 class CopyAction (Action):
 510   """
 511   An Action object for simply copying a file.
 512
 513   Actually we try to hardlink it first, falling back to a copy later.  This
 514   is both faster and more efficient with regard to disk space.
 515   """
 516
 517   ## Copying is good.  Linking is really good, but we can't tell the
 518   ## difference at this stage.
 519   PRIORITY = 10
 520
 521   def __init__(me, master, targetdir):
 522     "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
 523     Action.__init__(me, master)
 524     me.target = OS.path.join(targetdir, OS.path.basename(master))
 525
 526   def __str__(me):
 527     return 'copy/link'
 528
 529   def perform(me):
 530     "Actually perform a CopyAction."
 531     try:
 532       STATUS.set(filestatus(me.master, 'link'))
 533       OS.link(me.master, me.target)
 534     except OSError, err:
 535       if err.errno != E.EXDEV:
 536         raise
 537       STATUS.set(filestatus(me.master, 'copy'))
 538       new = me.target + '.new'
 539       SH.copyfile(me.master, new)
 540       OS.rename(new, me.target)
 541     STATUS.commit()
 542
 543 class ConvertAction (Action):
 544   """
 545   An Action object for converting a file to a given format.
 546
 547   Additional attributes:
 548
 549   id            The identification object for the master file.
 550
 551   format        The format to which we're meant to conver the master.
 552   """
 553
 554   def __init__(me, master, targetdir, id, format):
 555     "Initialize a ConvertAction."
 556     Action.__init__(me, master)
 557     stem, ext = OS.path.splitext(OS.path.basename(master))
 558     me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
 559     me.id = id
 560     me.format = format
 561
 562   def __str__(me):
 563     return 'convert to %s' % me.format.NAME
 564
 565   def perform(me):
 566     "Acually perform a ConvertAction."
 567     STATUS.set(filestatus(me.master, me))
 568     me.format.convert(me.master, me.id, me.target)
 569
 570 Policy = P.Forward()
 571
 572 class FormatPolicy (object):
 573   """
 574   A FormatPolicy object represents a set of rules for how to convert files.
 575
 576   Given a master file, the FormatPolicy will identify it and return a list of
 577   actions to be performed.  The methods required of a FormatPolicy are:
 578
 579   setcategory(CAT)
 580           Store CAT as the policy's category.  Check that this is consistent
 581           with the policy as stored.
 582
 583   actions(MASTER, TARGETDIR, ID, COHORT)
 584           Given a MASTER file, identified as ID, a target directory
 585           TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
 586           of the same category in the same directory, return a list of
 587           actions to be performed to get the target directory into the right
 588           form.  The list might be empty if the policy object /rejects/ the
 589           file.
 590   """
 591
 592 class AndPolicy (FormatPolicy):
 593   """
 594   A FormatPolicy which does the union of a bunch of other policies.
 595
 596   Each subsidiary policy is invoked in turn.  The highest-priority action for
 597   each target file is returned.
 598   """
 599
 600   def __init__(me, policies):
 601     me._policies = policies
 602
 603   def setcategory(me, cat):
 604     me.cat = cat
 605     for p in me._policies:
 606       p.setcategory(cat)
 607
 608   def actions(me, master, targetdir, id, cohort):
 609     tmap = {}
 610     for p in me._policies:
 611       for a in p.actions(master, targetdir, id, cohort):
 612         if a.target in tmap:
 613           tmap[a.target] = a.choose(tmap.get(a.target))
 614         else:
 615           tmap[a.target] = a
 616     return tmap.values()
 617
 618 And = K('and') - D('{') - R(Policy) - D('}')
 619 And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
 620
 621 class OrPolicy (FormatPolicy):
 622   """
 623   A FormatPolicy which tries other policies and uses the first that accepts.
 624
 625   Each subsidiary policy is invoked in turn.  If any accepts, the actions it
 626   proposes are turned and no further policies are invoked.  If none accepts
 627   then the file is rejected.
 628   """
 629
 630   def __init__(me, policies):
 631     me._policies = policies
 632
 633   def setcategory(me, cat):
 634     me.cat = cat
 635     for p in me._policies:
 636       p.setcategory(cat)
 637
 638   def actions(me, master, targetdir, id, cohort):
 639     for p in me._policies:
 640       aa = p.actions(master, targetdir, id, cohort)
 641       if aa:
 642         return aa
 643     else:
 644       return []
 645
 646 Or = K('or') - D('{') - R(Policy) - D('}')
 647 Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
 648
 649 class AcceptPolicy (FormatPolicy):
 650   """
 651   A FormatPolicy which copies files in a particular format.
 652
 653   If all of the files in a cohort are recognized as being in a particular
 654   format (including this one), then accept it with a CopyAction; otherwise
 655   reject.
 656   """
 657
 658   def __init__(me, format):
 659     me._format = format
 660
 661   def setcategory(me, cat):
 662     if me._format.CATEGORY is not cat:
 663       raise ValueError, \
 664             "Accept format `%s' has category `%s', not `%s'" % \
 665             (me._format.__class__.__name__,
 666              me._format.CATEGORY.name, cat.name)
 667     me.cat = cat
 668
 669   def actions(me, master, targetdir, id, cohort):
 670     if me._format.check(id) and \
 671        all(me._format.check(cid) for f, cid in cohort):
 672       return [CopyAction(master, targetdir)]
 673     else:
 674       return []
 675
 676 Accept = K('accept') - Format
 677 Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
 678
 679 class ConvertPolicy (FormatPolicy):
 680   """
 681   A FormatPolicy which copies files in a particular format or converts if
 682   necessary.
 683   """
 684   def __init__(me, format):
 685     me._format = format
 686
 687   def setcategory(me, cat):
 688     if me._format.CATEGORY is not cat:
 689       raise ValueError, \
 690             "Accept format `%s' has category `%s', not `%s'" % \
 691             (me._format.__class__.__name__,
 692              me._format.CATEGORY.name, cat.name)
 693     me.cat = cat
 694
 695   def actions(me, master, targetdir, id, cohort):
 696     if me._format.check(id):
 697       return [CopyAction(master, targetdir)]
 698     else:
 699       return [ConvertAction(master, targetdir, id, me._format)]
 700
 701 Convert = K('convert') - Format
 702 Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
 703
 704 Policy << (And | Or | Accept | Convert)
 705
 706 ###--------------------------------------------------------------------------
 707 ### Audio handling, based on GStreamer.
 708
 709 def make_element(factory, name = None, **props):
 710   "Return a new element from the FACTORY with the given NAME and PROPS."
 711   elt = GS.element_factory_make(factory, name)
 712   elt.set_properties(**props)
 713   return elt
 714
 715 class GStreamerProgressEyecandy (ProgressEyecandy):
 716   """
 717   Provide amusement while GStreamer is busy doing something.
 718
 719   The GStreamerProgressEyecandy object is a context manager.  Wrap it round
 720   your GStreamer loop to provide progress information for an operation.
 721   """
 722
 723   def __init__(me, what, elt, **kw):
 724     """
 725     Initialize a progress meter.
 726
 727     WHAT is a prefix string to be written before the progress eyecandy
 728     itself.  ELT is a GStreamer element to interrogate to find the progress
 729     information.
 730     """
 731     me._elt = elt
 732     ProgressEyecandy.__init__(me, what, **kw)
 733
 734   def _update(me):
 735     "Called by GLib main event loop to update the eyecandy."
 736     me.show()
 737     return True
 738
 739   def _timer(me):
 740     """
 741     Update the progress meter.
 742
 743     This is called periodically by the GLib main event-processing loop.
 744     """
 745     me.show()
 746     return True
 747
 748   def progress(me):
 749     "Return the current progress as a pair (CURRENT, MAX)."
 750
 751     ## Fetch the current progress information.  We get the duration each
 752     ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
 753     ## duration can change as we progress.  Hopefully it settles down fairly
 754     ## soon.
 755     try:
 756       t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
 757       end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
 758       return t, end
 759     except GS.QueryError:
 760       return None, None
 761
 762   def __enter__(me):
 763     "Enter context: attach progress meter display."
 764
 765     ## If we're not showing pointless frippery, don't bother at all.
 766     if not STATUS.eyecandyp:
 767       return
 768
 769     ## Update regularly.  The pipeline runs asynchronously.
 770     me._id = G.timeout_add(200, me._update)
 771
 772   def __exit__(me, ty, val, tb):
 773     "Leave context: remove display and report completion or failure."
 774
 775     ## If we're not showing pointless frippery, there's nothing to remove.
 776     if STATUS.eyecandyp:
 777       G.source_remove(me._id)
 778
 779     ## Report completion anyway.
 780     me.done(ty is None)
 781
 782     ## As you were.
 783     return False
 784
 785 class AudioIdentifier (object):
 786   """
 787   Analyses and identifies an audio file.
 788
 789   Important properties are:
 790
 791   cap     A capabilities structure describing the audio file data.  The most
 792           interesting thing in here is probably its name, which is a MIME
 793           type describing the data.
 794
 795   dcap    A capabilities structure describing the decoded audio data.  This
 796           is of interest during conversion.
 797
 798   tags    A dictionary containing metadata tags from the file.  These are in
 799           GStreamer's encoding-independent format.
 800
 801   bitrate An approximation to the stream's bitrate, in kilobits per second.
 802           This might be slow to work out for some files so it's computed on
 803           demand.
 804   """
 805
 806   def __init__(me, file, mime):
 807     "Initialize the object suitably for identifying FILE."
 808
 809     ## Make some initial GStreamer objects.  We'll want the pipeline later if
 810     ## we need to analyse a poorly tagged MP3 stream, so save it away.
 811     me._pipe = GS.Pipeline()
 812     me._file = file
 813     bus = me._pipe.get_bus()
 814     bus.add_signal_watch()
 815     loop = G.MainLoop()
 816
 817     ## The basic recognition kit is based around `decodebin'.  We must keep
 818     ## it happy by giving it sinks for the streams it's found, which it
 819     ## announces asynchronously.
 820     source = make_element('filesrc', 'file', location = file)
 821     decoder = make_element('decodebin', 'decode')
 822     sink = make_element('fakesink')
 823     def decoder_pad_arrived(elt, pad):
 824       if pad.get_caps()[0].get_name().startswith('audio/'):
 825         elt.link_pads(pad.get_name(), sink, 'sink')
 826     dpaid = decoder.connect('pad-added', decoder_pad_arrived)
 827     me._pipe.add(source, decoder, sink)
 828     GS.element_link_many(source, decoder)
 829
 830     ## Arrange to collect tags from the pipeline's bus as they're reported.
 831     ## If we reuse the pipeline later, we'll want different bus-message
 832     ## handling, so make sure we can take the signal handler away.
 833     tags = {}
 834     fail = []
 835     def bus_message(bus, msg):
 836       if msg.type == GS.MESSAGE_ERROR:
 837         fail[:] = (ValueError, msg.structure['debug'], None)
 838         loop.quit()
 839       elif msg.type == GS.MESSAGE_STATE_CHANGED:
 840         if msg.structure['new-state'] == GS.STATE_PAUSED and \
 841                msg.src == me._pipe:
 842           loop.quit()
 843       elif msg.type == GS.MESSAGE_TAG:
 844         tags.update(msg.structure)
 845     bmid = bus.connect('message', bus_message)
 846
 847     ## We want to identify the kind of stream this is.  (Hmm.  The MIME type
 848     ## recognizer has already done this work, but GStreamer is probably more
 849     ## reliable.)  The `decodebin' has a `typefind' element inside which will
 850     ## announce the identified media type.  All we need to do is find it and
 851     ## attach a signal handler.  (Note that the handler might be run in the
 852     ## thread context of the pipeline element, but Python's GIL will keep
 853     ## things from being too awful.)
 854     me.cap = None
 855     me.dcap = None
 856     for e in decoder.elements():
 857       if e.get_factory().get_name() == 'typefind':
 858         tfelt = e
 859         break
 860     else:
 861       assert False, 'failed to find typefind element'
 862
 863     ## Crank up most of the heavy machinery.  The message handler will stop
 864     ## the loop when things seem to be sufficiently well underway.
 865     me._pipe.set_state(GS.STATE_PAUSED)
 866     loop.run()
 867     bus.disconnect(bmid)
 868     decoder.disconnect(dpaid)
 869     if fail:
 870       me._pipe.set_state(GS.STATE_NULL)
 871       raise fail[0], fail[1], fail[2]
 872
 873     ## Store the collected tags.
 874     me.tags = tags
 875
 876     ## Gather the capabilities.  The `typefind' element knows the input data
 877     ## type.  The 'decodebin' knows the raw data type.
 878     me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
 879     me.mime = set([mime, me.cap.get_name()])
 880     me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
 881
 882     ## If we found a plausible bitrate then stash it.  Otherwise note that we
 883     ## failed.  If anybody asks then we'll work it out then.
 884     if 'nominal-bitrate' in tags:
 885       me._bitrate = tags['nominal-bitrate']/1000
 886     elif 'bitrate' in tags and tags['bitrate'] >= 80000:
 887       me._bitrate = tags['bitrate']/1000
 888     else:
 889       me._bitrate = None
 890
 891     ## The bitrate computation wants the file size.  Ideally we'd want the
 892     ## total size of the frames' contents, but that seems hard to dredge
 893     ## out.  If the framing overhead is small, this should be close enough
 894     ## for our purposes.
 895     me._bytes = OS.stat(file).st_size
 896
 897   def __del__(me):
 898     "Close the pipeline down so we don't leak file descriptors."
 899     me._pipe.set_state(GS.STATE_NULL)
 900
 901   @property
 902   def bitrate(me):
 903     """
 904     Return the approximate bit-rate of the input file.
 905
 906     This might take a while if we have to work it out the hard way.
 907     """
 908
 909     ## If we already know the answer then just return it.
 910     if me._bitrate is not None:
 911       return me._bitrate
 912
 913     ## Make up a new main loop.
 914     loop = G.MainLoop()
 915
 916     ## Watch for bus messages.  We'll stop when we reach the end of the
 917     ## stream: then we'll have a clear idea of how long the track was.
 918     fail = []
 919     def bus_message(bus, msg):
 920       if msg.type == GS.MESSAGE_ERROR:
 921         fail[:] = (ValueError, msg.structure['debug'], None)
 922         loop.quit()
 923       elif msg.type == GS.MESSAGE_EOS:
 924         loop.quit()
 925     bus = me._pipe.get_bus()
 926     bmid = bus.connect('message', bus_message)
 927
 928     ## Get everything moving, and keep the user amused while we work.
 929     me._pipe.set_state(GS.STATE_PLAYING)
 930     with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
 931                                    me._pipe,
 932                                    silentp = True):
 933       loop.run()
 934     bus.disconnect(bmid)
 935     if fail:
 936       me._pipe.set_state(GS.STATE_NULL)
 937       raise fail[0], fail[1], fail[2]
 938
 939     ## Now we should be able to find out our position accurately and work out
 940     ## a bitrate.  Cache it in case anybody asks again.
 941     t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
 942     me._bitrate = int(8*me._bytes*1e6/t)
 943
 944     ## Done.
 945     return me._bitrate
 946
 947 class AudioFormat (BaseFormat):
 948   """
 949   An AudioFormat is a kind of Format specialized for audio files.
 950
 951   Format checks are done on an AudioIdentifier object.
 952   """
 953
 954   PROPS = prop('bitrate', Num)
 955
 956   ## libmagic reports `application/ogg' for Ogg Vorbis files.  We've switched
 957   ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
 958   ## depending on how thorough it's trying to be.  Still, it doesn't do any
 959   ## harm here; the main risk is picking up Ogg Theora files by accident, and
 960   ## we'll probably be able to extract the audio from them anyway.
 961   CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
 962                           AudioIdentifier)
 963
 964   def __init__(me, bitrate = None):
 965     "Construct an object, requiring an approximate bitrate."
 966     me.bitrate = bitrate
 967
 968   def check(me, id):
 969     """
 970     Return whether the AudioIdentifier ID is suitable for our purposes.
 971
 972     Subclasses can either override this method or provide a property
 973     `MIMETYPES', which is a list (other thing that implements `__contains__')
 974     of GStreamer MIME types matching this format.
 975     """
 976     return id.mime & me.MIMETYPES and \
 977            (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
 978
 979   def encoder(me):
 980     """
 981     Constructs a GStreamer element to encode audio input.
 982
 983     Subclasses can either override this method (or replace `encode'
 984     entirely), or provide a method `encoder_chain' which returns a list of
 985     elements to be linked together in sequence.  The first element in the
 986     chain must have a pad named `sink' and the last must have a pad named
 987     `src'.
 988     """
 989     elts = me.encoder_chain()
 990     bin = GS.Bin()
 991     bin.add(*elts)
 992     GS.element_link_many(*elts)
 993     bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
 994     bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
 995     return bin
 996
 997   def convert(me, master, id, target):
 998     """
 999     Encode audio from MASTER, already identified as ID, writing it to TARGET.
1000
1001     See `encoder' for subclasses' responsibilities.
1002     """
1003
1004     ## Construct the necessary equipment.
1005     pipe = GS.Pipeline()
1006     bus = pipe.get_bus()
1007     bus.add_signal_watch()
1008     loop = G.MainLoop()
1009
1010     ## Make sure that there isn't anything in the way of our output.  We're
1011     ## going to write to a scratch file so that we don't get confused by
1012     ## half-written rubbish left by a crashed program.
1013     new = target + '.new'
1014     try:
1015       OS.unlink(new)
1016     except OSError, err:
1017       if err.errno != E.ENOENT:
1018         raise
1019
1020     ## Piece together our pipeline.  The annoying part is that the
1021     ## `decodebin' doesn't have any source pads yet, so our chain is in two
1022     ## halves for now.
1023     source = make_element('filesrc', 'source', location = master)
1024     decoder = make_element('decodebin', 'decode')
1025     convert = make_element('audioconvert', 'convert')
1026     encoder = me.encoder()
1027     sink = make_element('filesink', 'sink', location = new)
1028     pipe.add(source, decoder, convert, encoder, sink)
1029     GS.element_link_many(source, decoder)
1030     GS.element_link_many(convert, encoder, sink)
1031
1032     ## Some decoders (e.g., the AC3 decoder) include channel-position
1033     ## indicators in their output caps.  The Vorbis encoder interferes with
1034     ## this, and you end up with a beautifully encoded mono signal from a
1035     ## stereo source.  From a quick butchers at the `vorbisenc' source, I
1036     ## /think/ that this is only a problem with stereo signals: mono signals
1037     ## are mono already, and `vorbisenc' accepts channel positions if there
1038     ## are more than two channels.
1039     ##
1040     ## So we have this bodge.  We already collected the decoded audio caps
1041     ## during identification.  So if we see 2-channel audio with channel
1042     ## positions, we strip the positions off forcibly by adding a filter.
1043     if id.dcap.get_name().startswith('audio/x-raw-') and \
1044        id.dcap.has_field('channels') and \
1045        id.dcap['channels'] == 2 and \
1046        id.dcap.has_field('channel-positions'):
1047       dcap = GS.Caps()
1048       c = id.dcap.copy()
1049       c.remove_field('channel-positions')
1050       dcap.append(c)
1051     else:
1052       dcap = None
1053
1054     ## Hook onto the `decodebin' so we can link together the two halves of
1055     ## our encoding chain.  For now, we'll hope that there's only one audio
1056     ## stream in there, and just throw everything else away.
1057     def decoder_pad_arrived(elt, pad):
1058       if pad.get_caps()[0].get_name().startswith('audio/'):
1059         if dcap:
1060           elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1061         else:
1062           elt.link_pads(pad.get_name(), convert, 'sink')
1063     decoder.connect('pad-added', decoder_pad_arrived)
1064
1065     ## Watch the bus for completion messages.
1066     fail = []
1067     def bus_message(bus, msg):
1068       if msg.type == GS.MESSAGE_ERROR:
1069         fail[:] = (ValueError, msg.structure['debug'], None)
1070         loop.quit()
1071       elif msg.type == GS.MESSAGE_EOS:
1072         loop.quit()
1073     bmid = bus.connect('message', bus_message)
1074
1075     ## Get everything ready and let it go.
1076     pipe.set_state(GS.STATE_PLAYING)
1077     with GStreamerProgressEyecandy(filestatus(master,
1078                                               'convert to %s' % me.NAME),
1079                                    pipe):
1080       loop.run()
1081     pipe.set_state(GS.STATE_NULL)
1082     if fail:
1083       raise fail[0], fail[1], fail[2]
1084
1085     ## Fix up the output file if we have to.
1086     me.fixup(new)
1087
1088     ## We're done.
1089     OS.rename(new, target)
1090
1091 class OggVorbisFormat (AudioFormat):
1092   "AudioFormat object for Ogg Vorbis."
1093
1094   ## From https://en.wikipedia.org/wiki/Vorbis
1095   QMAP = [(-1,  45), ( 0,  64), ( 1,  80), ( 2,  96),
1096           ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1097           ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1098
1099   NAME = 'Ogg Vorbis'
1100   MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1101                    'audio/x-vorbis+ogg'])
1102   EXT = 'ogg'
1103
1104   def encoder_chain(me):
1105     encprops = {}
1106     if me.bitrate is not None:
1107       for q, br in me.QMAP:
1108         if br >= me.bitrate:
1109           break
1110         else:
1111           raise ValueError, 'no suitable quality setting found'
1112       encprops['quality'] = q/10.0
1113     return [make_element('vorbisenc', **encprops),
1114             make_element('oggmux')]
1115
1116 defformat('ogg-vorbis', OggVorbisFormat)
1117
1118 class MP3Format (AudioFormat):
1119   "AudioFormat object for MP3."
1120
1121   NAME = 'MP3'
1122   MIMETYPES = set(['audio/mpeg'])
1123   EXT = 'mp3'
1124
1125   def encoder_chain(me):
1126     encprops = {}
1127     if me.bitrate is not None: encprops['vbr_mean_bitrate'] = me.bitrate
1128     return [make_element('lame', vbr = 4, **encprops),
1129             make_element('xingmux'),
1130             make_element('id3v2mux')]
1131
1132   def fixup(me, path):
1133     """
1134     Fix up MP3 files.
1135
1136     GStreamer produces ID3v2 tags, but not ID3v1.  This seems unnecessarily
1137     unkind to stupid players.
1138     """
1139     f = E3.load(path)
1140     if f is None: return
1141     t = f.tag
1142     if t is None: return
1143     for v in [E3.id3.ID3_V2_3, E3.id3.ID3_V1]:
1144       try: f.tag.save(version = v)
1145       except (UnicodeEncodeError,
1146               E3.id3.GenreException,
1147               E3.id3.TagException):
1148         pass
1149
1150 defformat('mp3', MP3Format)
1151
1152 ###--------------------------------------------------------------------------
1153 ### Image handling, based on the Python Imaging Library.
1154
1155 class ImageIdentifier (object):
1156   """
1157   Analyses and identifies an image file.
1158
1159   Simply leaves an Image object in the `img' property which can be inspected.
1160   """
1161
1162   def __init__(me, file, mime):
1163
1164     ## Get PIL to open the file.  It will magically work out what kind of
1165     ## file it is.
1166     try:
1167       me.img = I.open(file)
1168     except IOError, exc:
1169
1170       ## Unhelpful thing to raise on identification failure.  We can
1171       ## distinguish this from an actual I/O error because it doesn't have an
1172       ## `errno'.
1173       if exc.errno is None:
1174         raise IdentificationFailure
1175       raise
1176
1177     me.mime = set([mime])
1178
1179 class ImageFormat (BaseFormat):
1180   """
1181   An ImageFormat is a kind of Format specialized for image files.
1182
1183   Subclasses don't need to provide anything other than the properties
1184   required by all concrete Format subclasses.  However, there is a
1185   requirement that the `NAME' property match PIL's `format' name for the
1186   format.
1187   """
1188
1189   PROPS = prop('size', Num)
1190   CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1191
1192   def __init__(me, size = None, **kw):
1193     """
1194     Initialize an ImageFormat object.
1195
1196     Additional keywords are used when encoding, and may be recognized by
1197     enhanced `check' methods in subclasses.
1198     """
1199     me._size = size
1200     me._props = kw
1201
1202   def check(me, id):
1203     "Check whether the ImageIdentifier ID matches our requirements."
1204     return id.img.format == me.NAME and \
1205            (me._size is None or
1206             (id.img.size[0] <= me._size and
1207              id.img.size[1] <= me._size))
1208
1209   def convert(me, master, id, target):
1210     "Encode the file MASTER, identified as ID, writing the result to TARGET."
1211
1212     ## Write to a scratch file.
1213     new = target + '.new'
1214
1215     ## The ImageIdentifier already contains a copy of the open file.  It
1216     ## would be wasteful not to use it.
1217     img = id.img
1218     STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1219
1220     ## If there's a stated maximum size then scale the image down to match.
1221     ## But thumbnailing clobbers the original, so take a copy.
1222     if me._size is not None and \
1223            (img.size[0] > me._size or img.size[1] > me._size):
1224       img = img.copy()
1225       img.thumbnail((me._size, me._size), I.ANTIALIAS)
1226
1227     ## Write the output image.
1228     img.save(new, me.NAME, **me._props)
1229
1230     ## Fix it up if necessary.
1231     me.fixup(new)
1232
1233     ## We're done.
1234     OS.rename(new, target)
1235     STATUS.commit()
1236
1237 class JPEGFormat (ImageFormat):
1238   """
1239   Image format for JPEG (actually JFIF) files.
1240
1241   Interesting properties to set:
1242
1243   optimize
1244           If present, take a second pass to select optimal encoder settings.
1245
1246   progressive
1247           If present, make a progressive file.
1248
1249   quality Integer from 1--100 (worst to best); default is 75.
1250   """
1251   EXT = 'jpg'
1252   NAME = 'JPEG'
1253   PROPS = prop('optimize', None) \
1254     | prop('progressive', None, 'progression') \
1255     | prop('quality', Num)
1256
1257 defformat('jpeg', JPEGFormat)
1258
1259 class PNGFormat (ImageFormat):
1260   """
1261   Image format for PNG files.
1262
1263   Interesting properties:
1264
1265   optimize
1266           If present, make a special effort to minimize the output file.
1267   """
1268   EXT = 'png'
1269   NAME = 'PNG'
1270   PROPS = prop('optimize', None)
1271
1272 defformat('png', PNGFormat)
1273
1274 class BMPFormat (ImageFormat):
1275   """
1276   Image format for Windows BMP files, as used by RockBox.
1277
1278   No additional properties.
1279   """
1280   NAME = 'BMP'
1281   EXT = 'bmp'
1282
1283 defformat('bmp', BMPFormat)
1284
1285 ###--------------------------------------------------------------------------
1286 ### Remaining parsing machinery.
1287
1288 Type = K('type') - Name - D('{') - R(Policy) - D('}')
1289 def build_type(s, l, t):
1290   try:
1291     cat = CATEGORYMAP[t[0]]
1292   except KeyError:
1293     raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1294   pols = t[1]
1295   if len(pols) == 1: pol = pols[0]
1296   else: pol = AndPolicy(pols)
1297   pol.setcategory(cat)
1298   return pol
1299 Type.setParseAction(build_type)
1300
1301 TARGETS = []
1302 class TargetJob (object):
1303   def __init__(me, targetdir, policies):
1304     me.targetdir = targetdir
1305     me.policies = policies
1306   def perform(me):
1307     TARGETS.append(me)
1308
1309 Target = K('target') - String - D('{') - R(Type) - D('}')
1310 def build_target(s, l, t):
1311   return TargetJob(t[0], t[1])
1312 Target.setParseAction(build_target)
1313
1314 VARS = { 'master': None }
1315 class VarsJob (object):
1316   def __init__(me, vars):
1317     me.vars = vars
1318   def perform(me):
1319     for k, v in me.vars:
1320       VARS[k] = v
1321
1322 Var = prop('master', String)
1323 Vars = K('vars') - D('{') - R(Var) - D('}')
1324 def build_vars(s, l, t):
1325   return VarsJob(t[0])
1326 Vars.setParseAction(build_vars)
1327
1328 TopLevel = Vars | Target
1329 Config = R(TopLevel)
1330 Config.ignore(P.pythonStyleComment)
1331
1332 ###--------------------------------------------------------------------------
1333 ### The directory grobbler.
1334
1335 def grobble(master, targets, noact = False):
1336   """
1337   Work through the MASTER directory, writing converted files to TARGETS.
1338
1339   The TARGETS are a list of `TargetJob' objects, each describing a target
1340   directory and a policy to apply to it.
1341
1342   If NOACT is true, then don't actually do anything permanent to the
1343   filesystem.
1344   """
1345
1346   ## Transform the targets into a more convenient data structure.
1347   tpolmap = []
1348   for t in targets:
1349     pmap = {}
1350     tpolmap.append(pmap)
1351     for p in t.policies: pmap.setdefault(p.cat, []).append(p)
1352
1353   ## Keep track of the current position in the master tree.
1354   dirs = []
1355
1356   ## And the files which haven't worked.
1357   broken = []
1358
1359   def grobble_file(master, pmap, targetdir, cohorts):
1360     ## Convert MASTER, writing the result to TARGETDIR.
1361     ##
1362     ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1363     ## a list of (FILENAME, ID) pairs.
1364     ##
1365     ## Since this function might convert the MASTER file, the caller doesn't
1366     ## know the name of the output files, so we return then as a list.
1367
1368     done = set()
1369     st_m = OS.stat(master)
1370
1371     ## Work through each category listed and apply its policy.
1372     for cat, id, cohort in cohorts:
1373
1374       ## Go through the category's policies and see if any match.  If we fail
1375       ## here, see if there are more categories to try.
1376       for pol in pmap[cat]:
1377         acts = pol.actions(master, targetdir, id, cohort)
1378         if acts: break
1379       else:
1380         continue
1381
1382       ## Work through the targets one by one.
1383       for a in acts:
1384         done.add(a.target)
1385
1386         ## Find out whether the target file already exists and is up-to-date
1387         ## with respect to the master.  (Caution here with low-resolution
1388         ## timestamps.)  If it's OK, then just move on.
1389         try:
1390           st_t = OS.stat(a.target)
1391           if st_m.st_mtime < st_t.st_mtime or \
1392                  (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1393             continue
1394         except OSError, err:
1395           if err.errno not in (E.ENOENT, E.ENOTDIR):
1396             raise
1397
1398         ## We have real work to do.  If there's a current status message,
1399         ## it's the containing directory so flush it so that people know
1400         ## where we are.
1401         STATUS.commit()
1402
1403         ## Remove the target.  (A hardlink will fail if the target already
1404         ## exists.)
1405         if not noact:
1406           try:
1407             OS.unlink(a.target)
1408           except OSError, err:
1409             if err.errno not in (E.ENOENT, E.ENOTDIR):
1410               raise
1411
1412         ## Do whatever it is we decided to do.
1413         if noact:
1414           STATUS.commit(filestatus(master, a))
1415         else:
1416           a.perform()
1417
1418     ## We're done.  Return the names of the targets.
1419     return list(done)
1420
1421   @contextmanager
1422   def wrap(masterfile):
1423     ## Handle exceptions found while trying to convert a particular file or
1424     ## directory.
1425
1426     try:
1427       yield masterfile
1428
1429     ## Something bad happened.  Report the error, but continue.  (This list
1430     ## of exceptions needs a lot of work.)
1431     except (IOError, OSError), exc:
1432       STATUS.clear()
1433       STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
1434       broken.append((masterfile, exc))
1435
1436   def grobble_dir(master, targets):
1437     ## Recursively convert files in MASTER, writing them to the TARGETS.
1438
1439     ## Keep track of the subdirectories we encounter, because we'll need to
1440     ## do all of those in one go at the end.
1441     subdirs = set()
1442
1443     ## Work through each target directory in turn.
1444     for target, pmap in zip(targets, tpolmap):
1445
1446       ## Make sure the TARGET exists and is a directory.  It's a fundamental
1447       ## assumption of this program that the entire TARGET tree is
1448       ## disposable, so if something exists but isn't a directory, we should
1449       ## kill it.
1450       if OS.path.isdir(target):
1451         pass
1452       else:
1453         if OS.path.exists(target):
1454           STATUS.commit(filestatus(target, 'clear nondirectory'))
1455           if not noact:
1456             OS.unlink(target)
1457         STATUS.commit(filestatus(target, 'create directory'))
1458         if not noact:
1459           OS.mkdir(target)
1460
1461       ## Keep a list of things in the target.  As we convert files, we'll
1462       ## check them off.  Anything left over is rubbish and needs to be
1463       ## deleted.
1464       checklist = {}
1465       try:
1466         for i in OS.listdir(target):
1467           checklist[i] = False
1468       except OSError, err:
1469         if err.errno not in (E.ENOENT, E.ENOTDIR):
1470           raise
1471
1472       ## Keep track of the files in each category.
1473       catmap = {}
1474       todo = []
1475       done = []
1476
1477       ## Work through the master files.
1478       for f in sorted(OS.listdir(master)):
1479
1480         ## If the killswitch has been pulled then stop.  The whole idea is
1481         ## that we want to cause a clean shutdown if possible, so we don't
1482         ## want to do it in the middle of encoding because the encoding
1483         ## effort will have been wasted.  This is the only place we need to
1484         ## check.  If we've exited the loop, then clearing old files will
1485         ## probably be fast, and we'll either end up here when the recursive
1486         ## call returns or we'll be in the same boat as before, clearing old
1487         ## files, only up a level.  If worst comes to worst, we'll be killed
1488         ## forcibly somewhere inside `SH.rmtree', and that can continue where
1489         ## it left off.
1490         if KILLSWITCH.is_set():
1491           return
1492
1493         ## Do something with the file.
1494         with wrap(OS.path.join(master, f)) as masterfile:
1495
1496           ## If it's a directory then prepare to grobble it recursively, but
1497           ## don't do that yet.
1498           if OS.path.isdir(masterfile):
1499             subdirs.add(f)
1500             done.append(OS.path.join(target, f))
1501
1502           ## Otherwise it's a file.  Work out what kind, and stash it under
1503           ## the appropriate categories.  Later, we'll apply policy to the
1504           ## files, by category, and work out what to do with them all.
1505           else:
1506             gf = GIO.File(masterfile)
1507             mime = gf.query_info('standard::content-type').get_content_type()
1508             cats = []
1509             for cat in pmap.iterkeys():
1510               id = cat.identify(masterfile, mime)
1511               if id is None: continue
1512               catmap.setdefault(cat, []).append((masterfile, id))
1513               cats.append((cat, id))
1514             if not cats:
1515               catmap.setdefault(None, []).append((masterfile, id))
1516             todo.append((masterfile, cats))
1517
1518       ## Work through the categorized files to see what actions to do for
1519       ## them.
1520       for masterfile, cats in todo:
1521         with wrap(masterfile):
1522           done += grobble_file(masterfile, pmap, target,
1523                                [(cat, id, catmap[cat]) for cat, id in cats])
1524
1525       ## Check the results off the list so that we don't clear it later.
1526       for f in done:
1527         checklist[OS.path.basename(f)] = True
1528
1529       ## Maybe there's stuff in the target which isn't accounted for.  Delete
1530       ## it: either the master has changed, or the policy for this target has
1531       ## changed.  Either way, the old files aren't wanted.
1532       for f in checklist:
1533         if not checklist[f]:
1534           STATUS.commit(filestatus(f, 'clear bogus file'))
1535           if not noact:
1536             bogus = OS.path.join(target, f)
1537             try:
1538               if OS.path.isdir(bogus):
1539                 SH.rmtree(bogus)
1540               else:
1541                 OS.unlink(bogus)
1542             except OSError, err:
1543               if err.errno != E.ENOENT:
1544                 raise
1545
1546     ## If there are subdirectories which want processing then do those.
1547     ## Keep the user amused by telling him where we are in the tree.
1548     for d in sorted(subdirs):
1549       dirs.append(d)
1550       STATUS.set('/'.join(dirs))
1551       with wrap(OS.path.join(master, d)) as masterdir:
1552         try:
1553           grobble_dir(masterdir,
1554                       [OS.path.join(target, d) for target in targets])
1555         finally:
1556           dirs.pop()
1557           STATUS.set('/'.join(dirs))
1558
1559   ## Right.  We're ready to go.
1560   grobble_dir(master, [t.targetdir for t in targets])
1561   return broken
1562
1563 ###--------------------------------------------------------------------------
1564 ### Command-line interface.
1565
1566 QUIS = OS.path.basename(SYS.argv[0])
1567
1568 def moan(msg):
1569   "Report a warning message to the user."
1570   SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1571
1572 def die(msg):
1573   "Report a fatal error message to the user."
1574   moan(msg)
1575   SYS.exit(1)
1576
1577 def parse_opts(args):
1578   """
1579   Parse command-line arguments in ARGS.
1580
1581   Returns a Grobbler object and the MASTER and TARGET directories to be
1582   grobbled.
1583   """
1584
1585   ## Build the option parser object.
1586   op = OP.OptionParser(prog = QUIS, version = VERSION,
1587                        usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1588                                'CONFIG',
1589                        description = """\
1590 Convert a directory tree of files according to the configuration file
1591 CONFIG.
1592 """)
1593
1594   ## Timeout handling.
1595   def cb_time(opt, ostr, arg, op):
1596     m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1597     if not m:
1598       raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1599     t, u = m.groups()
1600     t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1601     setattr(op.values, opt.dest, t)
1602   op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1603                 dest = 'timeout',
1604                 help = 'stop processing nicely after SECS',
1605                 action = 'callback', callback = cb_time)
1606   op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1607                 dest = 'timeout_nasty',
1608                 help = 'stop processing unpleasantly after further SECS',
1609                 action = 'callback', callback = cb_time)
1610
1611   ## Other options.
1612   op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1613                 help = 'provide progress information')
1614   op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1615                 help = 'don\'t actually modify the filesystem')
1616
1617   ## Ready to rock.
1618   op.set_defaults(formats = [], noact = False,
1619                   timeout = None, timeout_nasty = 300)
1620   opts, args = op.parse_args(args)
1621
1622   ## Check that we got the non-option arguments that we want.
1623   if len(args) != 1:
1624     op.error('wrong number of arguments')
1625
1626   ## Act on the options.
1627   if opts.tty:
1628     STATUS.eyecandyp = True
1629   if opts.timeout is not None:
1630     to = TH.Thread(target = timeout,
1631                    args = (opts.timeout, opts.timeout_nasty))
1632     to.daemon = True
1633     to.start()
1634
1635   ## Parse the configuration file.
1636   with open(args[0]) as conf:
1637     jobs, = Config.parseFile(conf, True)
1638   for j in jobs:
1639     j.perform()
1640
1641   return opts
1642
1643 if __name__ == '__main__':
1644   opts = parse_opts(SYS.argv[1:])
1645   if 'master' not in VARS:
1646     die("no master directory set")
1647   broken = grobble(VARS['master'], TARGETS, opts.noact)
1648   if broken:
1649     moan('failed to convert some files:')
1650     for file, exc in broken:
1651       moan('%s: %s' % (file, exc))
1652     SYS.exit(1)
1653
1654   ## This is basically a successful completion: we did what we were asked to
1655   ## do.  It seems polite to report a message, though.
1656   ##
1657   ## Why don't we have a nonzero exit status?  The idea would be that a
1658   ## calling script would be interested that we used up all of our time, and
1659   ## not attempt to convert some other directory as well.  But that doesn't
1660   ## quite work.  Such a script would need to account correctly for time we
1661   ## had spent even if we complete successfully.  And if the script is having
1662   ## to watch the clock itself, it can do that without our help here.
1663   if KILLSWITCH.is_set():
1664     moan('killed by timeout')
1665
1666 ###----- That's all, folks --------------------------------------------------