3 ### Convert a directory tree of audio files
5 ### (c) 2010 Mark Wooding
8 ###----- Licensing notice ---------------------------------------------------
10 ### This file is part of the `autoys' audio tools collection.
12 ### `autoys' is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU General Public License as published by
14 ### the Free Software Foundation; either version 2 of the License, or
15 ### (at your option) any later version.
17 ### `autoys' is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU General Public License for more details.
22 ### You should have received a copy of the GNU General Public License
23 ### along with `autoys'; if not, write to the Free Software Foundation,
24 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 ###--------------------------------------------------------------------------
27 ### External dependencies.
30 from __future__ import with_statement
32 ## Standard Python libraries.
42 import threading as TH
43 import unicodedata as UD
44 from math import sqrt, ceil
45 from contextlib import contextmanager
47 ## eyeD3 tag fettling.
52 gi.require_version('GLib', '2.0'); from gi.repository import GLib as G
53 gi.require_version('Gio', '2.0'); from gi.repository import Gio as GIO
54 gi.require_version('Gst', '1.0'); from gi.repository import Gst as GS
58 from PIL import Image as I
63 ###--------------------------------------------------------------------------
64 ### Special initialization.
71 ###--------------------------------------------------------------------------
72 ### Eyecandy progress reports.
76 Return the width of S, in characters.
78 Specifically, this is the number of backspace characters required to
79 overprint the string S. If the current encoding for `stdout' appears to be
80 Unicode then do a complicated Unicode thing; otherwise assume that
81 characters take up one cell each.
83 None of this handles tab characters in any kind of useful way. Sorry.
86 ## If there's no encoding for stdout then we're doing something stupid.
87 if SYS.stdout.encoding is None: return len(s)
89 ## Turn the string into Unicode so we can hack on it properly. Maybe that
90 ## won't work out, in which case fall back to being stupid.
91 try: u = s.decode(SYS.stdout.encoding)
92 except UnicodeError: return len(s)
94 ## Our main problem is combining characters, but we should also try to
95 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
96 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
100 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
101 0x1160 <= cd <= 0x11ff: pass
102 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
108 class StatusLine (object):
110 Maintains a status line containing ephemeral progress information.
112 The status line isn't especially important, but it keeps interactive users
115 There should be only one status line object in your program; otherwise
116 they'll interfere with each other and get confused.
118 The update algorithm (in `set') is fairly careful to do the right thing
119 with long status `lines', and to work properly in an Emacs `shell' buffer.
123 "Initialize the status line."
126 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
130 Set the status line contents to LINE, replacing what was there before.
132 This only produces actual output if stdout is interactive.
139 ## If the old line was longer, we need to clobber its tail, so work out
140 ## what that involves.
142 b = charwidth(me._last[n:])
147 ## Now figure out the length of the common prefix between what we had
148 ## before and what we have now. This reduces the amount of I/O done,
149 ## which keeps network traffic down on SSH links, and keeps down the
150 ## amount of work slow terminal emulators like Emacs have to do.
152 m = min(n, me._lastlen)
153 while i < m and line[i] == me._last[i]:
156 ## Actually do the output, all in one syscall.
157 b = charwidth(me._last[i:])
158 SYS.stdout.write(pre + '\b'*b + line[i:])
161 ## Update our idea of what's gone on.
166 "Clear the status line. Just like set('')."
169 def commit(me, line = None):
171 Commit the current status line, and maybe the string LINE.
173 If the current status line is nonempty, then commit it to the transcript.
174 If LINE is not None, then commit that to the transcript too.
176 After all of this, we clear the status line to get back to a clean state.
180 SYS.stdout.write('\n')
182 SYS.stdout.write(me._last + '\n')
184 SYS.stdout.write(line + '\n')
188 STATUS = StatusLine()
190 def filestatus(file, status):
191 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
193 class ProgressEyecandy (object):
195 Provide amusement while something big and complicated is happening.
197 This is an abstract class. Subclasses must provide a method `progress'
198 returning a pair (CURRENT, MAX) indicating the current progress through the
202 def __init__(me, what, silentp = False):
204 Initialize a progress meter.
206 WHAT is a prefix string to be written before the progress eyecandy
210 me._silentp = silentp
214 def _fmt_time(me, t):
215 "Format T as a time, in (maybe hours) minutes and seconds."
216 s, t = t % 60, int(t/60)
217 m, h = t % 60, int(t/60)
219 return '%d:%02d:%02d' % (h, m, s)
221 return '%02d:%02d' % (m, s)
224 "Show the current level of progress."
226 ## If we're not showing pointless frippery, don't bother at all.
227 if not STATUS.eyecandyp:
230 ## Update the spinner index.
231 me._spinner = (me._spinner + 1)%4
233 ## Fetch the current progress information. Note that we always fetch
234 ## both the current and maximum levels, because both might change if an
235 ## operation revises its idea of how much work needs doing.
236 cur, max = me.progress()
238 ## If we couldn't get progress information, display something vaguely
240 if cur is None or max is None:
241 STATUS.set('%s %c [unknown progress]' %
242 (me._what, r'/-\|'[me._spinner]))
245 ## Work out -- well, guess -- the time remaining.
248 eta = me._fmt_time(ceil((t - me._start)*(max - cur)/cur))
252 ## Set the status bar.
254 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
256 r'/-\|'[me._spinner],
261 def done(me, win = True):
262 "Show a completion notice, or a failure if WIN is false."
264 STATUS.set('%s FAILED!' % me._what)
265 elif not me._silentp:
266 STATUS.set('%s done (%s)' %
268 me._fmt_time(T.time() - me._start)))
273 ###--------------------------------------------------------------------------
274 ### Timeout handling.
276 KILLSWITCH = TH.Event()
282 moan('dying messily due to timeout')
285 ###--------------------------------------------------------------------------
286 ### Parsing utilities.
288 ## Allow hyphens in identifiers.
289 IDCHARS = P.alphanums + '-_'
290 P.Keyword.setDefaultKeywordChars(IDCHARS)
292 ## Some common kinds of tokens.
293 Name = P.Word(IDCHARS)
294 Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
295 String = P.QuotedString('"', '\\')
297 ## Handy abbreviations for constructed parser elements.
298 def K(k): return P.Keyword(k).suppress()
299 def D(d): return P.Literal(d).suppress()
300 def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
303 ###--------------------------------------------------------------------------
304 ### Format identification and conversion.
306 class IdentificationFailure (Exception):
309 class FileCategory (object):
311 A FileCategory represents a class of files.
313 For example, it's sensible to consider audio, or image files as a
314 category. A file category knows how to recognize member files from
318 def __init__(me, name, mime_pats, ident):
320 Construct a new category.
322 The PATS are a list of `fnmatch' patterns to be compared with a MIME
323 type. The IDENT is a function which produces an identification object
324 given a file's name and first-guess MIME type. The object is passed to a
325 Format's `check' method to see whether a file needs re-encoding, and to
326 `convert' to assist with the conversion.
328 An identification object must have an attribute `mime' which is a set of
329 possible MIME types accumulated for the object.
332 me._mime_pats = mime_pats
334 CATEGORYMAP[name] = me
336 def identify(me, file, mime):
338 Attempt to identify FILE, given its apparent MIME type.
340 If identification succeeds, return an identification object which can be
341 used by associated file formats; otherwise return None.
343 for p in me._mime_pats:
344 if not FN.fnmatchcase(mime, p):
347 return me._ident(file, mime)
348 except IdentificationFailure:
352 class BaseFormat (object):
354 A BaseFormat object represents a particular encoding and parameters.
356 The object can verify (the `check' method) whether a particular file
357 matches its requirements, and if necessary (`encode') re-encode a file.
359 Subclasses should define the following methods.
362 Answer whether the file identified by ID is acceptable according to
363 the receiver's parameters.
365 convert(MASTER, ID, TARGET)
366 Convert the file MASTER, which has been identified as ID, according
367 to the receiver's parameters, writing the output to TARGET.
369 Subclasses should also provide these attributes.
372 A FileCategory object for the category of files that this format
375 EXT A file extension to be applied to encoded output files.
377 NAME A user-facing name for the format.
379 PROPS A parser element to parse a property definition. It should produce
380 a pair NAME, VALUE to be stored in a dictionary.
382 Subclasses for different kinds of file may introduce more subclass
387 """Post-encoding fixups."""
393 def defformat(name, cls):
394 "Define a format NAME using class CLS."
395 if not hasattr(cls, 'NAME'):
396 raise ValueError, 'abstract class'
397 if not hasattr(cls, 'CATEGORY'):
398 raise ValueError, 'no category'
399 FORMATMAP[name] = cls
401 class FormatParser (P.ParserElement):
403 Parse a format specifier:
405 format-spec ::= string [format-properties]
406 format-properties ::= `{' format-property (`,' format-property)* `}'
408 The syntax of a format-property is determined by the PROPS attribute on the
409 named format and its superclasses.
414 ## We cache the parser elements we generate to avoid enormous consing.
417 def parseImpl(me, s, loc, actp = True):
419 ## Firstly, determine the format name.
420 loc, r = Name._parse(s, loc, actp)
423 ## Look up the format class.
424 try: fcls = FORMATMAP[fmt]
426 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
428 ## Fetch the property-list parser from the cache, if possible; else
437 except AttributeError: continue
438 if p in seen: continue
439 if prop is None: prop = p
443 pp = me.CACHE[fmt] = None
445 props = P.delimitedList(prop)
446 props.setParseAction(lambda s, l, t: dict(t.asList()))
447 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
449 ## Parse the properties.
453 loc, r = pp._parse(s, loc, actp)
457 ## Construct the format object and return it.
458 return loc, fcls(**pd)
460 Format = FormatParser()
462 def prop(kw, pval, tag = None):
463 if tag is None: tag = kw
466 p.setParseAction(lambda s, l, t: (tag, True))
468 p = K(kw) + D('=') + pval
469 p.setParseAction(lambda s, l, t: (tag, t[0]))
472 ###--------------------------------------------------------------------------
473 ### Policies and actions.
475 class Action (object):
477 An Action object represents a conversion action to be performed.
479 This class isn't intended to be instantiated directly. It exists to define
480 some protocol common to all Action objects.
482 Action objects have the following attributes.
484 master The name of the master (source) file.
486 target The name of the target (destination) file.
488 PRIORITY The priority of the action, for deciding which of two actions
489 to perform. Higher priorities are more likely to win.
491 Converting an Action to a string describes the action in a simple
492 user-readable manner. The `perform' method actually carries the action
498 def __init__(me, master):
499 "Stash the MASTER file name for later."
503 "Choose either ME or HIM and return one."
504 if him is None or me.PRIORITY > him.PRIORITY:
509 class CopyAction (Action):
511 An Action object for simply copying a file.
513 Actually we try to hardlink it first, falling back to a copy later. This
514 is both faster and more efficient with regard to disk space.
517 ## Copying is good. Linking is really good, but we can't tell the
518 ## difference at this stage.
521 def __init__(me, master, targetdir):
522 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
523 Action.__init__(me, master)
524 me.target = OS.path.join(targetdir, OS.path.basename(master))
530 "Actually perform a CopyAction."
532 STATUS.set(filestatus(me.master, 'link'))
533 OS.link(me.master, me.target)
535 if err.errno != E.EXDEV:
537 STATUS.set(filestatus(me.master, 'copy'))
538 new = me.target + '.new'
539 SH.copyfile(me.master, new)
540 OS.rename(new, me.target)
543 class ConvertAction (Action):
545 An Action object for converting a file to a given format.
547 Additional attributes:
549 id The identification object for the master file.
551 format The format to which we're meant to conver the master.
554 def __init__(me, master, targetdir, id, format):
555 "Initialize a ConvertAction."
556 Action.__init__(me, master)
557 stem, ext = OS.path.splitext(OS.path.basename(master))
558 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
563 return 'convert to %s' % me.format.NAME
566 "Acually perform a ConvertAction."
567 STATUS.set(filestatus(me.master, me))
568 me.format.convert(me.master, me.id, me.target)
572 class FormatPolicy (object):
574 A FormatPolicy object represents a set of rules for how to convert files.
576 Given a master file, the FormatPolicy will identify it and return a list of
577 actions to be performed. The methods required of a FormatPolicy are:
580 Store CAT as the policy's category. Check that this is consistent
581 with the policy as stored.
583 actions(MASTER, TARGETDIR, ID, COHORT)
584 Given a MASTER file, identified as ID, a target directory
585 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
586 of the same category in the same directory, return a list of
587 actions to be performed to get the target directory into the right
588 form. The list might be empty if the policy object /rejects/ the
592 class AndPolicy (FormatPolicy):
594 A FormatPolicy which does the union of a bunch of other policies.
596 Each subsidiary policy is invoked in turn. The highest-priority action for
597 each target file is returned.
600 def __init__(me, policies):
601 me._policies = policies
603 def setcategory(me, cat):
605 for p in me._policies:
608 def actions(me, master, targetdir, id, cohort):
610 for p in me._policies:
611 for a in p.actions(master, targetdir, id, cohort):
613 tmap[a.target] = a.choose(tmap.get(a.target))
618 And = K('and') - D('{') - R(Policy) - D('}')
619 And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
621 class OrPolicy (FormatPolicy):
623 A FormatPolicy which tries other policies and uses the first that accepts.
625 Each subsidiary policy is invoked in turn. If any accepts, the actions it
626 proposes are turned and no further policies are invoked. If none accepts
627 then the file is rejected.
630 def __init__(me, policies):
631 me._policies = policies
633 def setcategory(me, cat):
635 for p in me._policies:
638 def actions(me, master, targetdir, id, cohort):
639 for p in me._policies:
640 aa = p.actions(master, targetdir, id, cohort)
646 Or = K('or') - D('{') - R(Policy) - D('}')
647 Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
649 class AcceptPolicy (FormatPolicy):
651 A FormatPolicy which copies files in a particular format.
653 If all of the files in a cohort are recognized as being in a particular
654 format (including this one), then accept it with a CopyAction; otherwise
658 def __init__(me, format):
661 def setcategory(me, cat):
662 if me._format.CATEGORY is not cat:
664 "Accept format `%s' has category `%s', not `%s'" % \
665 (me._format.__class__.__name__,
666 me._format.CATEGORY.name, cat.name)
669 def actions(me, master, targetdir, id, cohort):
670 if me._format.check(id) and \
671 all(me._format.check(cid) for f, cid in cohort):
672 return [CopyAction(master, targetdir)]
676 Accept = K('accept') - Format
677 Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
679 class ConvertPolicy (FormatPolicy):
681 A FormatPolicy which copies files in a particular format or converts if
684 def __init__(me, format):
687 def setcategory(me, cat):
688 if me._format.CATEGORY is not cat:
690 "Accept format `%s' has category `%s', not `%s'" % \
691 (me._format.__class__.__name__,
692 me._format.CATEGORY.name, cat.name)
695 def actions(me, master, targetdir, id, cohort):
696 if me._format.check(id):
697 return [CopyAction(master, targetdir)]
699 return [ConvertAction(master, targetdir, id, me._format)]
701 Convert = K('convert') - Format
702 Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
704 Policy << (And | Or | Accept | Convert)
706 ###--------------------------------------------------------------------------
707 ### Audio handling, based on GStreamer.
709 def make_element(factory, name = None, **props):
710 "Return a new element from the FACTORY with the given NAME and PROPS."
711 elt = GS.ElementFactory.make(factory, name)
712 if elt is None: raise ValueError, 'failed to make `%s\' element' % factory
713 elt.set_properties(**props)
716 def link_elements(elts):
717 "Link the elements ELTS together, in order."
720 if e0 is not None: e0.link(e1)
723 def bin_children(bin):
724 "Iterate over the (direct) children of a BIN."
725 iter = bin.iterate_elements()
727 rc, elt = iter.next()
728 if rc == GS.IteratorResult.DONE: break
729 elif rc != GS.IteratorResult.OK:
730 raise ValueError, 'iteration failed (%s)' % rc
733 class GStreamerProgressEyecandy (ProgressEyecandy):
735 Provide amusement while GStreamer is busy doing something.
737 The GStreamerProgressEyecandy object is a context manager. Wrap it round
738 your GStreamer loop to provide progress information for an operation.
741 def __init__(me, what, elt, **kw):
743 Initialize a progress meter.
745 WHAT is a prefix string to be written before the progress eyecandy
746 itself. ELT is a GStreamer element to interrogate to find the progress
750 ProgressEyecandy.__init__(me, what, **kw)
753 "Called by GLib main event loop to update the eyecandy."
759 Update the progress meter.
761 This is called periodically by the GLib main event-processing loop.
767 "Return the current progress as a pair (CURRENT, MAX)."
769 ## Fetch the current progress information. We get the duration each
770 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
771 ## duration can change as we progress. Hopefully it settles down fairly
773 ok, t = me._elt.query_position(GS.Format.TIME)
774 if ok: ok, end = me._elt.query_duration(GS.Format.TIME)
776 else: return None, None
779 "Enter context: attach progress meter display."
781 ## If we're not showing pointless frippery, don't bother at all.
782 if not STATUS.eyecandyp:
785 ## Update regularly. The pipeline runs asynchronously.
786 me._id = G.timeout_add(100, me._update)
788 def __exit__(me, ty, val, tb):
789 "Leave context: remove display and report completion or failure."
791 ## If we're not showing pointless frippery, there's nothing to remove.
793 G.source_remove(me._id)
795 ## Report completion anyway.
801 class AudioIdentifier (object):
803 Analyses and identifies an audio file.
805 Important properties are:
807 cap A capabilities structure describing the audio file data. The most
808 interesting thing in here is probably its name, which is a MIME
809 type describing the data.
811 dcap A capabilities structure describing the decoded audio data. This
812 is of interest during conversion.
814 tags A dictionary containing metadata tags from the file. These are in
815 GStreamer's encoding-independent format.
817 bitrate An approximation to the stream's bitrate, in kilobits per second.
818 This might be slow to work out for some files so it's computed on
822 def _prepare_pipeline(me):
826 ## The basic recognition kit is based around `decodebin'. We must keep
827 ## it happy by giving it sinks for the streams it's found, which it
828 ## announces asynchronously.
829 source = make_element('filesrc', 'file', location = me._file)
830 decoder = make_element('decodebin', 'decode')
831 sink = make_element('fakesink')
832 def decoder_pad_arrived(elt, pad):
833 if pad.get_current_caps()[0].get_name().startswith('audio/'):
834 elt.link_pads(pad.get_name(), sink, 'sink')
835 decoder.connect('pad-added', decoder_pad_arrived)
836 for i in [source, decoder, sink]: pipe.add(i)
837 link_elements([source, decoder])
840 return pipe, bus, decoder, sink
842 def __init__(me, file, mime):
843 "Initialize the object suitably for identifying FILE."
846 pipe, bus, decoder, sink = me._prepare_pipeline()
848 ## Make some initial GStreamer objects. We'll want the pipeline later if
849 ## we need to analyse a poorly tagged MP3 stream, so save it away.
852 ## Arrange to collect tags from the pipeline's bus as they're reported.
855 def bus_message(bus, msg):
856 ty, s = msg.type, msg.get_structure()
857 if ty == GS.MessageType.ERROR:
858 fail[:] = (ValueError, s['debug'], None)
860 elif ty == GS.MessageType.STATE_CHANGED:
861 if s['new-state'] == GS.State.PAUSED and \
864 elif ty == GS.MessageType.TAG:
866 for i in xrange(tt.n_tags()):
867 t = tt.nth_tag_name(i)
868 if tt.get_tag_size(t) != 1: continue
869 v = tt.get_value_index(t, 0)
871 bmid = bus.connect('message', bus_message)
873 ## We want to identify the kind of stream this is. (Hmm. The MIME type
874 ## recognizer has already done this work, but GStreamer is probably more
875 ## reliable.) The `decodebin' has a `typefind' element inside which will
876 ## announce the identified media type. All we need to do is find it and
877 ## attach a signal handler. (Note that the handler might be run in the
878 ## thread context of the pipeline element, but Python's GIL will keep
879 ## things from being too awful.)
882 for e in bin_children(decoder):
883 if e.get_factory().get_name() == 'typefind':
887 assert False, 'failed to find typefind element'
889 ## Crank up most of the heavy machinery. The message handler will stop
890 ## the loop when things seem to be sufficiently well underway.
891 bus.add_signal_watch()
892 pipe.set_state(GS.State.PAUSED)
895 bus.remove_signal_watch()
897 pipe.set_state(GS.State.NULL)
898 raise fail[0], fail[1], fail[2]
900 ## Store the collected tags.
903 ## Gather the capabilities. The `typefind' element knows the input data
904 ## type. The 'decodebin' knows the raw data type.
905 me.cap = tfelt.get_static_pad('src').get_allowed_caps()[0]
906 me.mime = set([mime, me.cap.get_name()])
907 me.dcap = sink.get_static_pad('sink').get_allowed_caps()[0]
909 ## If we found a plausible bitrate then stash it. Otherwise note that we
910 ## failed. If anybody asks then we'll work it out then.
911 if 'nominal-bitrate' in tags:
912 me._bitrate = tags['nominal-bitrate']/1000
913 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
914 me._bitrate = tags['bitrate']/1000
916 ok, n = pipe.query_duration(GS.Format.BYTES)
917 if ok: ok, t = pipe.query_duration(GS.Format.TIME)
918 if ok: me._bitrate = int((8e6*n)/t)
919 else: me._bitrate = None
920 pipe.set_state(GS.State.NULL)
925 Return the approximate bit-rate of the input file.
927 This might take a while if we have to work it out the hard way.
930 ## If we already know the answer then just return it.
931 if me._bitrate is not None:
934 ## Make up a new pipeline and main loop.
935 pipe, bus, _, _ = me._prepare_pipeline()
938 ## Watch for bus messages. We'll stop when we reach the end of the
939 ## stream: then we'll have a clear idea of how long the track was.
941 def bus_message(bus, msg):
942 ty, s = msg.type, msg.get_structure()
943 if ty == GS.MessageType.ERROR:
944 fail[:] = (ValueError, s['debug'], None)
946 elif ty == GS.MessageType.EOS:
949 bmid = bus.connect('message', bus_message)
951 ## Get everything moving, and keep the user amused while we work.
952 bus.add_signal_watch()
953 pipe.set_state(GS.State.PLAYING)
954 with GStreamerProgressEyecandy(filestatus(me._file, 'measure bitrate'),
955 pipe, silentp = True):
957 bus.remove_signal_watch()
960 pipe.set_state(GS.State.NULL)
961 raise fail[0], fail[1], fail[2]
964 ## The bitrate computation wants the file size. Ideally we'd want the
965 ## total size of the frames' contents, but that seems hard to dredge
966 ## out. If the framing overhead is small, this should be close enough
968 bytes = OS.stat(me._file).st_size
970 ## Now we should be able to find out our position accurately and work out
971 ## a bitrate. Cache it in case anybody asks again.
972 ok, t = pipe.query_position(GS.Format.TIME)
973 assert ok, 'failed to discover bitrate'
974 me._bitrate = int(8*bytes*1e6/t)
975 pipe.set_state(GS.State.NULL)
980 class AudioFormat (BaseFormat):
982 An AudioFormat is a kind of Format specialized for audio files.
984 Format checks are done on an AudioIdentifier object.
987 PROPS = prop('bitrate', Num)
989 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
990 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
991 ## depending on how thorough it's trying to be. Still, it doesn't do any
992 ## harm here; the main risk is picking up Ogg Theora files by accident, and
993 ## we'll probably be able to extract the audio from them anyway.
994 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
997 def __init__(me, bitrate = None):
998 "Construct an object, requiring an approximate bitrate."
1003 Return whether the AudioIdentifier ID is suitable for our purposes.
1005 Subclasses can either override this method or provide a property
1006 `MIMETYPES', which is a list (other thing that implements `__contains__')
1007 of GStreamer MIME types matching this format.
1009 return id.mime & me.MIMETYPES and \
1010 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
1014 Constructs a GStreamer element to encode audio input.
1016 Subclasses can either override this method (or replace `encode'
1017 entirely), or provide a method `encoder_chain' which returns a list of
1018 elements to be linked together in sequence. The first element in the
1019 chain must have a pad named `sink' and the last must have a pad named
1022 elts = me.encoder_chain()
1024 for i in elts: bin.add(i)
1026 bin.add_pad(GS.GhostPad('sink', elts[0].get_static_pad('sink')))
1027 bin.add_pad(GS.GhostPad('src', elts[-1].get_static_pad('src')))
1030 def convert(me, master, id, target):
1032 Encode audio from MASTER, already identified as ID, writing it to TARGET.
1034 See `encoder' for subclasses' responsibilities.
1037 ## Construct the necessary equipment.
1038 pipe = GS.Pipeline()
1039 bus = pipe.get_bus()
1042 ## Make sure that there isn't anything in the way of our output. We're
1043 ## going to write to a scratch file so that we don't get confused by
1044 ## half-written rubbish left by a crashed program.
1045 new = target + '.new'
1048 except OSError, err:
1049 if err.errno != E.ENOENT:
1052 ## Piece together our pipeline. The annoying part is that the
1053 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1055 source = make_element('filesrc', 'source', location = master)
1056 decoder = make_element('decodebin', 'decode')
1057 convert = make_element('audioconvert', 'convert')
1058 encoder = me.encoder()
1059 sink = make_element('filesink', 'sink', location = new)
1060 for i in [source, decoder, convert, encoder, sink]: pipe.add(i)
1061 link_elements([source, decoder])
1062 link_elements([convert, encoder, sink])
1064 ## Some decoders (e.g., the AC3 decoder) include channel-position
1065 ## indicators in their output caps. The Vorbis encoder interferes with
1066 ## this, and you end up with a beautifully encoded mono signal from a
1067 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1068 ## /think/ that this is only a problem with stereo signals: mono signals
1069 ## are mono already, and `vorbisenc' accepts channel positions if there
1070 ## are more than two channels.
1072 ## So we have this bodge. We already collected the decoded audio caps
1073 ## during identification. So if we see 2-channel audio with channel
1074 ## positions, we strip the positions off forcibly by adding a filter.
1075 if id.dcap.get_name().startswith('audio/x-raw-') and \
1076 id.dcap.has_field('channels') and \
1077 id.dcap['channels'] == 2 and \
1078 id.dcap.has_field('channel-positions'):
1081 c.remove_field('channel-positions')
1086 ## Hook onto the `decodebin' so we can link together the two halves of
1087 ## our encoding chain. For now, we'll hope that there's only one audio
1088 ## stream in there, and just throw everything else away.
1089 def decoder_pad_arrived(elt, pad):
1090 if pad.get_current_caps()[0].get_name().startswith('audio/'):
1092 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1094 elt.link_pads(pad.get_name(), convert, 'sink')
1095 decoder.connect('pad-added', decoder_pad_arrived)
1097 ## Watch the bus for completion messages.
1099 def bus_message(bus, msg):
1100 if msg.type == GS.MessageType.ERROR:
1101 fail[:] = (ValueError, msg.get_structure()['debug'], None)
1103 elif msg.type == GS.MessageType.EOS:
1105 bmid = bus.connect('message', bus_message)
1107 ## Get everything ready and let it go.
1108 bus.add_signal_watch()
1109 pipe.set_state(GS.State.PLAYING)
1110 with GStreamerProgressEyecandy(filestatus(master,
1111 'convert to %s' % me.NAME),
1114 pipe.set_state(GS.State.NULL)
1115 bus.remove_signal_watch()
1116 bus.disconnect(bmid)
1118 raise fail[0], fail[1], fail[2]
1120 ## Fix up the output file if we have to.
1124 OS.rename(new, target)
1126 class OggVorbisFormat (AudioFormat):
1127 "AudioFormat object for Ogg Vorbis."
1129 ## From https://en.wikipedia.org/wiki/Vorbis
1130 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1131 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1132 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1135 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1136 'audio/x-vorbis+ogg'])
1139 def encoder_chain(me):
1141 if me.bitrate is not None:
1142 for q, br in me.QMAP:
1143 if br >= me.bitrate:
1146 raise ValueError, 'no suitable quality setting found'
1147 encprops['quality'] = q/10.0
1148 return [make_element('vorbisenc', **encprops),
1149 make_element('oggmux')]
1151 defformat('ogg-vorbis', OggVorbisFormat)
1153 class MP3Format (AudioFormat):
1154 "AudioFormat object for MP3."
1157 MIMETYPES = set(['audio/mpeg'])
1160 def encoder_chain(me):
1162 if me.bitrate is not None:
1163 encprops['bitrate'] = me.bitrate
1164 encprops['target'] = 'bitrate'
1166 encprops['quality'] = 4
1167 encprops['target'] = 'quality'
1168 return [make_element('lamemp3enc', quality = 4, **encprops),
1169 make_element('xingmux'),
1170 make_element('id3v2mux')]
1172 def fixup(me, path):
1176 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1177 unkind to stupid players.
1180 if f is None: return
1182 if t is None: return
1183 for v in [E3.id3.ID3_V2_3, E3.id3.ID3_V1]:
1184 try: f.tag.save(version = v)
1185 except (UnicodeEncodeError,
1186 E3.id3.GenreException,
1187 E3.id3.TagException):
1190 defformat('mp3', MP3Format)
1192 ###--------------------------------------------------------------------------
1193 ### Image handling, based on the Python Imaging Library.
1195 class ImageIdentifier (object):
1197 Analyses and identifies an image file.
1199 Simply leaves an Image object in the `img' property which can be inspected.
1202 def __init__(me, file, mime):
1204 ## Get PIL to open the file. It will magically work out what kind of
1207 me.img = I.open(file)
1208 except IOError, exc:
1210 ## Unhelpful thing to raise on identification failure. We can
1211 ## distinguish this from an actual I/O error because it doesn't have an
1213 if exc.errno is None:
1214 raise IdentificationFailure
1217 me.mime = set([mime])
1219 class ImageFormat (BaseFormat):
1221 An ImageFormat is a kind of Format specialized for image files.
1223 Subclasses don't need to provide anything other than the properties
1224 required by all concrete Format subclasses. However, there is a
1225 requirement that the `NAME' property match PIL's `format' name for the
1229 PROPS = prop('size', Num)
1230 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1232 def __init__(me, size = None, **kw):
1234 Initialize an ImageFormat object.
1236 Additional keywords are used when encoding, and may be recognized by
1237 enhanced `check' methods in subclasses.
1243 "Check whether the ImageIdentifier ID matches our requirements."
1244 return id.img.format == me.NAME and \
1245 (me._size is None or
1246 (id.img.size[0] <= me._size and
1247 id.img.size[1] <= me._size))
1249 def convert(me, master, id, target):
1250 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1252 ## Write to a scratch file.
1253 new = target + '.new'
1255 ## The ImageIdentifier already contains a copy of the open file. It
1256 ## would be wasteful not to use it.
1258 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1260 ## If there's a stated maximum size then scale the image down to match.
1261 ## But thumbnailing clobbers the original, so take a copy.
1262 if me._size is not None and \
1263 (img.size[0] > me._size or img.size[1] > me._size):
1265 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1267 ## Write the output image.
1268 img.save(new, me.NAME, **me._props)
1270 ## Fix it up if necessary.
1274 OS.rename(new, target)
1277 class JPEGFormat (ImageFormat):
1279 Image format for JPEG (actually JFIF) files.
1281 Interesting properties to set:
1284 If present, take a second pass to select optimal encoder settings.
1287 If present, make a progressive file.
1289 quality Integer from 1--100 (worst to best); default is 75.
1293 PROPS = prop('optimize', None) \
1294 | prop('progressive', None, 'progression') \
1295 | prop('quality', Num)
1297 defformat('jpeg', JPEGFormat)
1299 class PNGFormat (ImageFormat):
1301 Image format for PNG files.
1303 Interesting properties:
1306 If present, make a special effort to minimize the output file.
1310 PROPS = prop('optimize', None)
1312 defformat('png', PNGFormat)
1314 class BMPFormat (ImageFormat):
1316 Image format for Windows BMP files, as used by RockBox.
1318 No additional properties.
1323 defformat('bmp', BMPFormat)
1325 ###--------------------------------------------------------------------------
1326 ### Remaining parsing machinery.
1328 Type = K('type') - Name - D('{') - R(Policy) - D('}')
1329 def build_type(s, l, t):
1331 cat = CATEGORYMAP[t[0]]
1333 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1335 if len(pols) == 1: pol = pols[0]
1336 else: pol = AndPolicy(pols)
1337 pol.setcategory(cat)
1339 Type.setParseAction(build_type)
1342 class TargetJob (object):
1343 def __init__(me, targetdir, policies):
1344 me.targetdir = targetdir
1345 me.policies = policies
1349 Target = K('target') - String - D('{') - R(Type) - D('}')
1350 def build_target(s, l, t):
1351 return TargetJob(t[0], t[1])
1352 Target.setParseAction(build_target)
1354 VARS = { 'master': None }
1355 class VarsJob (object):
1356 def __init__(me, vars):
1359 for k, v in me.vars:
1362 Var = prop('master', String)
1363 Vars = K('vars') - D('{') - R(Var) - D('}')
1364 def build_vars(s, l, t):
1365 return VarsJob(t[0])
1366 Vars.setParseAction(build_vars)
1368 TopLevel = Vars | Target
1369 Config = R(TopLevel)
1370 Config.ignore(P.pythonStyleComment)
1372 ###--------------------------------------------------------------------------
1373 ### The directory grobbler.
1375 def grobble(master, targets, noact = False):
1377 Work through the MASTER directory, writing converted files to TARGETS.
1379 The TARGETS are a list of `TargetJob' objects, each describing a target
1380 directory and a policy to apply to it.
1382 If NOACT is true, then don't actually do anything permanent to the
1386 ## Transform the targets into a more convenient data structure.
1390 tpolmap.append(pmap)
1391 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
1393 ## Keep track of the current position in the master tree.
1396 ## And the files which haven't worked.
1399 def grobble_file(master, pmap, targetdir, cohorts):
1400 ## Convert MASTER, writing the result to TARGETDIR.
1402 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1403 ## a list of (FILENAME, ID) pairs.
1405 ## Since this function might convert the MASTER file, the caller doesn't
1406 ## know the name of the output files, so we return then as a list.
1409 st_m = OS.stat(master)
1411 ## Work through each category listed and apply its policy.
1412 for cat, id, cohort in cohorts:
1414 ## Go through the category's policies and see if any match. If we fail
1415 ## here, see if there are more categories to try.
1416 for pol in pmap[cat]:
1417 acts = pol.actions(master, targetdir, id, cohort)
1422 ## Work through the targets one by one.
1426 ## Find out whether the target file already exists and is up-to-date
1427 ## with respect to the master. (Caution here with low-resolution
1428 ## timestamps.) If it's OK, then just move on.
1430 st_t = OS.stat(a.target)
1431 if st_m.st_mtime < st_t.st_mtime or \
1432 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1434 except OSError, err:
1435 if err.errno not in (E.ENOENT, E.ENOTDIR):
1438 ## We have real work to do. If there's a current status message,
1439 ## it's the containing directory so flush it so that people know
1443 ## Remove the target. (A hardlink will fail if the target already
1448 except OSError, err:
1449 if err.errno not in (E.ENOENT, E.ENOTDIR):
1452 ## Do whatever it is we decided to do.
1454 STATUS.commit(filestatus(master, a))
1458 ## We're done. Return the names of the targets.
1462 def wrap(masterfile):
1463 ## Handle exceptions found while trying to convert a particular file or
1469 ## Something bad happened. Report the error, but continue. (This list
1470 ## of exceptions needs a lot of work.)
1471 except (IOError, OSError), exc:
1473 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
1474 broken.append((masterfile, exc))
1476 def grobble_dir(master, targets):
1477 ## Recursively convert files in MASTER, writing them to the TARGETS.
1479 ## Keep track of the subdirectories we encounter, because we'll need to
1480 ## do all of those in one go at the end.
1483 ## Work through each target directory in turn.
1484 for target, pmap in zip(targets, tpolmap):
1486 ## Make sure the TARGET exists and is a directory. It's a fundamental
1487 ## assumption of this program that the entire TARGET tree is
1488 ## disposable, so if something exists but isn't a directory, we should
1490 if OS.path.isdir(target):
1493 if OS.path.exists(target):
1494 STATUS.commit(filestatus(target, 'clear nondirectory'))
1497 STATUS.commit(filestatus(target, 'create directory'))
1501 ## Keep a list of things in the target. As we convert files, we'll
1502 ## check them off. Anything left over is rubbish and needs to be
1506 for i in OS.listdir(target):
1507 checklist[i] = False
1508 except OSError, err:
1509 if err.errno not in (E.ENOENT, E.ENOTDIR):
1512 ## Keep track of the files in each category.
1517 ## Work through the master files.
1518 for f in sorted(OS.listdir(master)):
1520 ## If the killswitch has been pulled then stop. The whole idea is
1521 ## that we want to cause a clean shutdown if possible, so we don't
1522 ## want to do it in the middle of encoding because the encoding
1523 ## effort will have been wasted. This is the only place we need to
1524 ## check. If we've exited the loop, then clearing old files will
1525 ## probably be fast, and we'll either end up here when the recursive
1526 ## call returns or we'll be in the same boat as before, clearing old
1527 ## files, only up a level. If worst comes to worst, we'll be killed
1528 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1530 if KILLSWITCH.is_set():
1533 ## Do something with the file.
1534 with wrap(OS.path.join(master, f)) as masterfile:
1536 ## If it's a directory then prepare to grobble it recursively, but
1537 ## don't do that yet.
1538 if OS.path.isdir(masterfile):
1540 done.append(OS.path.join(target, f))
1542 ## Otherwise it's a file. Work out what kind, and stash it under
1543 ## the appropriate categories. Later, we'll apply policy to the
1544 ## files, by category, and work out what to do with them all.
1546 mime = GIO.file_new_for_path(masterfile) \
1547 .query_info('standard::content-type', 0) \
1550 for cat in pmap.iterkeys():
1551 id = cat.identify(masterfile, mime)
1552 if id is None: continue
1553 catmap.setdefault(cat, []).append((masterfile, id))
1554 cats.append((cat, id))
1556 catmap.setdefault(None, []).append((masterfile, id))
1557 todo.append((masterfile, cats))
1559 ## Work through the categorized files to see what actions to do for
1561 for masterfile, cats in todo:
1562 with wrap(masterfile):
1563 done += grobble_file(masterfile, pmap, target,
1564 [(cat, id, catmap[cat]) for cat, id in cats])
1566 ## Check the results off the list so that we don't clear it later.
1568 checklist[OS.path.basename(f)] = True
1570 ## Maybe there's stuff in the target which isn't accounted for. Delete
1571 ## it: either the master has changed, or the policy for this target has
1572 ## changed. Either way, the old files aren't wanted.
1574 if not checklist[f]:
1575 STATUS.commit(filestatus(f, 'clear bogus file'))
1577 bogus = OS.path.join(target, f)
1579 if OS.path.isdir(bogus):
1583 except OSError, err:
1584 if err.errno != E.ENOENT:
1587 ## If there are subdirectories which want processing then do those.
1588 ## Keep the user amused by telling him where we are in the tree.
1589 for d in sorted(subdirs):
1591 STATUS.set('/'.join(dirs))
1592 with wrap(OS.path.join(master, d)) as masterdir:
1594 grobble_dir(masterdir,
1595 [OS.path.join(target, d) for target in targets])
1598 STATUS.set('/'.join(dirs))
1600 ## Right. We're ready to go.
1601 grobble_dir(master, [t.targetdir for t in targets])
1604 ###--------------------------------------------------------------------------
1605 ### Command-line interface.
1607 QUIS = OS.path.basename(SYS.argv[0])
1610 "Report a warning message to the user."
1611 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1614 "Report a fatal error message to the user."
1618 def parse_opts(args):
1620 Parse command-line arguments in ARGS.
1622 Returns a Grobbler object and the MASTER and TARGET directories to be
1626 ## Build the option parser object.
1627 op = OP.OptionParser(prog = QUIS, version = VERSION,
1628 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1631 Convert a directory tree of files according to the configuration file
1635 ## Timeout handling.
1636 def cb_time(opt, ostr, arg, op):
1637 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1639 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1641 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1642 setattr(op.values, opt.dest, t)
1643 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1645 help = 'stop processing nicely after SECS',
1646 action = 'callback', callback = cb_time)
1647 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1648 dest = 'timeout_nasty',
1649 help = 'stop processing unpleasantly after further SECS',
1650 action = 'callback', callback = cb_time)
1653 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1654 help = 'provide progress information')
1655 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1656 help = 'don\'t actually modify the filesystem')
1659 op.set_defaults(formats = [], noact = False,
1660 timeout = None, timeout_nasty = 300)
1661 opts, args = op.parse_args(args)
1663 ## Check that we got the non-option arguments that we want.
1665 op.error('wrong number of arguments')
1667 ## Act on the options.
1669 STATUS.eyecandyp = True
1670 if opts.timeout is not None:
1671 to = TH.Thread(target = timeout,
1672 args = (opts.timeout, opts.timeout_nasty))
1676 ## Parse the configuration file.
1677 with open(args[0]) as conf:
1678 jobs, = Config.parseFile(conf, True)
1684 if __name__ == '__main__':
1685 opts = parse_opts(SYS.argv[1:])
1686 if 'master' not in VARS:
1687 die("no master directory set")
1688 broken = grobble(VARS['master'], TARGETS, opts.noact)
1690 moan('failed to convert some files:')
1691 for file, exc in broken:
1692 moan('%s: %s' % (file, exc))
1695 ## This is basically a successful completion: we did what we were asked to
1696 ## do. It seems polite to report a message, though.
1698 ## Why don't we have a nonzero exit status? The idea would be that a
1699 ## calling script would be interested that we used up all of our time, and
1700 ## not attempt to convert some other directory as well. But that doesn't
1701 ## quite work. Such a script would need to account correctly for time we
1702 ## had spent even if we complete successfully. And if the script is having
1703 ## to watch the clock itself, it can do that without our help here.
1704 if KILLSWITCH.is_set():
1705 moan('killed by timeout')
1707 ###----- That's all, folks --------------------------------------------------