3 ### Convert a directory tree of audio files
5 ### (c) 2010 Mark Wooding
8 ###----- Licensing notice ---------------------------------------------------
10 ### This file is part of the `autoys' audio tools collection.
12 ### `autoys' is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU General Public License as published by
14 ### the Free Software Foundation; either version 2 of the License, or
15 ### (at your option) any later version.
17 ### `autoys' is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU General Public License for more details.
22 ### You should have received a copy of the GNU General Public License
23 ### along with `autoys'; if not, write to the Free Software Foundation,
24 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 ###--------------------------------------------------------------------------
27 ### External dependencies.
30 from __future__ import with_statement
32 ## Standard Python libraries.
37 import unicodedata as UD
42 import threading as TH
44 from math import sqrt, ceil
45 from contextlib import contextmanager
47 ## eyeD3 tag fettling.
50 ## Gstreamer. It picks up command-line arguments -- most notably `--help' --
51 ## and processes them itself. Of course, its help is completely wrong. This
52 ## kludge is due to Jonas Wagner.
53 _argv, SYS.argv = SYS.argv, []
60 from PIL import Image as I
65 ###--------------------------------------------------------------------------
66 ### Special initialization.
73 ###--------------------------------------------------------------------------
74 ### Eyecandy progress reports.
78 Return the width of S, in characters.
80 Specifically, this is the number of backspace characters required to
81 overprint the string S. If the current encoding for `stdout' appears to be
82 Unicode then do a complicated Unicode thing; otherwise assume that
83 characters take up one cell each.
85 None of this handles tab characters in any kind of useful way. Sorry.
88 ## If there's no encoding for stdout then we're doing something stupid.
89 if SYS.stdout.encoding is None: return len(s)
91 ## Turn the string into Unicode so we can hack on it properly. Maybe that
92 ## won't work out, in which case fall back to being stupid.
93 try: u = s.decode(SYS.stdout.encoding)
94 except UnicodeError: return len(s)
96 ## Our main problem is combining characters, but we should also try to
97 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
98 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
102 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
103 0x1160 <= cd <= 0x11ff: pass
104 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
110 class StatusLine (object):
112 Maintains a status line containing ephemeral progress information.
114 The status line isn't especially important, but it keeps interactive users
117 There should be only one status line object in your program; otherwise
118 they'll interfere with each other and get confused.
120 The update algorithm (in `set') is fairly careful to do the right thing
121 with long status `lines', and to work properly in an Emacs `shell' buffer.
125 "Initialize the status line."
128 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
132 Set the status line contents to LINE, replacing what was there before.
134 This only produces actual output if stdout is interactive.
141 ## If the old line was longer, we need to clobber its tail, so work out
142 ## what that involves.
144 b = charwidth(me._last[n:])
149 ## Now figure out the length of the common prefix between what we had
150 ## before and what we have now. This reduces the amount of I/O done,
151 ## which keeps network traffic down on SSH links, and keeps down the
152 ## amount of work slow terminal emulators like Emacs have to do.
154 m = min(n, me._lastlen)
155 while i < m and line[i] == me._last[i]:
158 ## Actually do the output, all in one syscall.
159 b = charwidth(me._last[i:])
160 SYS.stdout.write(pre + '\b'*b + line[i:])
163 ## Update our idea of what's gone on.
168 "Clear the status line. Just like set('')."
171 def commit(me, line = None):
173 Commit the current status line, and maybe the string LINE.
175 If the current status line is nonempty, then commit it to the transcript.
176 If LINE is not None, then commit that to the transcript too.
178 After all of this, we clear the status line to get back to a clean state.
182 SYS.stdout.write('\n')
184 SYS.stdout.write(me._last + '\n')
186 SYS.stdout.write(line + '\n')
190 STATUS = StatusLine()
192 def filestatus(file, status):
193 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
195 class ProgressEyecandy (object):
197 Provide amusement while something big and complicated is happening.
199 This is an abstract class. Subclasses must provide a method `progress'
200 returning a pair (CURRENT, MAX) indicating the current progress through the
204 def __init__(me, what, silentp = False):
206 Initialize a progress meter.
208 WHAT is a prefix string to be written before the progress eyecandy
212 me._silentp = silentp
216 def _fmt_time(me, t):
217 "Format T as a time, in (maybe hours) minutes and seconds."
218 s, t = t % 60, int(t/60)
219 m, h = t % 60, int(t/60)
221 return '%d:%02d:%02d' % (h, m, s)
223 return '%02d:%02d' % (m, s)
226 "Show the current level of progress."
228 ## If we're not showing pointless frippery, don't bother at all.
229 if not STATUS.eyecandyp:
232 ## Update the spinner index.
233 me._spinner = (me._spinner + 1)%4
235 ## Fetch the current progress information. Note that we always fetch
236 ## both the current and maximum levels, because both might change if an
237 ## operation revises its idea of how much work needs doing.
238 cur, max = me.progress()
240 ## If we couldn't get progress information, display something vaguely
242 if cur is None or max is None:
243 STATUS.set('%s %c [unknown progress]' %
244 (me._what, r'/-\|'[me._spinner]))
247 ## Work out -- well, guess -- the time remaining.
250 eta = me._fmt_time(ceil((t - me._start)*(max - cur)/cur))
254 ## Set the status bar.
256 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
258 r'/-\|'[me._spinner],
263 def done(me, win = True):
264 "Show a completion notice, or a failure if WIN is false."
266 STATUS.set('%s FAILED!' % me._what)
267 elif not me._silentp:
268 STATUS.set('%s done (%s)' %
270 me._fmt_time(T.time() - me._start)))
275 ###--------------------------------------------------------------------------
276 ### Timeout handling.
278 KILLSWITCH = TH.Event()
284 moan('dying messily due to timeout')
287 ###--------------------------------------------------------------------------
288 ### Parsing utilities.
290 ## Allow hyphens in identifiers.
291 IDCHARS = P.alphanums + '-_'
292 P.Keyword.setDefaultKeywordChars(IDCHARS)
294 ## Some common kinds of tokens.
295 Name = P.Word(IDCHARS)
296 Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
297 String = P.QuotedString('"', '\\')
299 ## Handy abbreviations for constructed parser elements.
300 def K(k): return P.Keyword(k).suppress()
301 def D(d): return P.Literal(d).suppress()
302 def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
305 ###--------------------------------------------------------------------------
306 ### Format identification and conversion.
308 class IdentificationFailure (Exception):
311 class FileCategory (object):
313 A FileCategory represents a class of files.
315 For example, it's sensible to consider audio, or image files as a
316 category. A file category knows how to recognize member files from
320 def __init__(me, name, mime_pats, ident):
322 Construct a new category.
324 The PATS are a list of `fnmatch' patterns to be compared with a MIME
325 type. The IDENT is a function which produces an identification object
326 given a file's name and first-guess MIME type. The object is passed to a
327 Format's `check' method to see whether a file needs re-encoding, and to
328 `convert' to assist with the conversion.
330 An identification object must have an attribute `mime' which is a set of
331 possible MIME types accumulated for the object.
334 me._mime_pats = mime_pats
336 CATEGORYMAP[name] = me
338 def identify(me, file, mime):
340 Attempt to identify FILE, given its apparent MIME type.
342 If identification succeeds, return an identification object which can be
343 used by associated file formats; otherwise return None.
345 for p in me._mime_pats:
346 if not FN.fnmatchcase(mime, p):
349 return me._ident(file, mime)
350 except IdentificationFailure:
354 class BaseFormat (object):
356 A BaseFormat object represents a particular encoding and parameters.
358 The object can verify (the `check' method) whether a particular file
359 matches its requirements, and if necessary (`encode') re-encode a file.
361 Subclasses should define the following methods.
364 Answer whether the file identified by ID is acceptable according to
365 the receiver's parameters.
367 convert(MASTER, ID, TARGET)
368 Convert the file MASTER, which has been identified as ID, according
369 to the receiver's parameters, writing the output to TARGET.
371 Subclasses should also provide these attributes.
374 A FileCategory object for the category of files that this format
377 EXT A file extension to be applied to encoded output files.
379 NAME A user-facing name for the format.
381 PROPS A parser element to parse a property definition. It should produce
382 a pair NAME, VALUE to be stored in a dictionary.
384 Subclasses for different kinds of file may introduce more subclass
389 """Post-encoding fixups."""
395 def defformat(name, cls):
396 "Define a format NAME using class CLS."
397 if not hasattr(cls, 'NAME'):
398 raise ValueError, 'abstract class'
399 if not hasattr(cls, 'CATEGORY'):
400 raise ValueError, 'no category'
401 FORMATMAP[name] = cls
403 class FormatParser (P.ParserElement):
405 Parse a format specifier:
407 format-spec ::= string [format-properties]
408 format-properties ::= `{' format-property (`,' format-property)* `}'
410 The syntax of a format-property is determined by the PROPS attribute on the
411 named format and its superclasses.
416 ## We cache the parser elements we generate to avoid enormous consing.
419 def parseImpl(me, s, loc, actp = True):
421 ## Firstly, determine the format name.
422 loc, r = Name._parse(s, loc, actp)
425 ## Look up the format class.
426 try: fcls = FORMATMAP[fmt]
428 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
430 ## Fetch the property-list parser from the cache, if possible; else
439 except AttributeError: continue
440 if p in seen: continue
441 if prop is None: prop = p
445 pp = me.CACHE[fmt] = None
447 props = P.delimitedList(prop)
448 props.setParseAction(lambda s, l, t: dict(t.asList()))
449 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
451 ## Parse the properties.
455 loc, r = pp._parse(s, loc, actp)
459 ## Construct the format object and return it.
460 return loc, fcls(**pd)
462 Format = FormatParser()
464 def prop(kw, pval, tag = None):
465 if tag is None: tag = kw
468 p.setParseAction(lambda s, l, t: (tag, True))
470 p = K(kw) + D('=') + pval
471 p.setParseAction(lambda s, l, t: (tag, t[0]))
474 ###--------------------------------------------------------------------------
475 ### Policies and actions.
477 class Action (object):
479 An Action object represents a conversion action to be performed.
481 This class isn't intended to be instantiated directly. It exists to define
482 some protocol common to all Action objects.
484 Action objects have the following attributes.
486 master The name of the master (source) file.
488 target The name of the target (destination) file.
490 PRIORITY The priority of the action, for deciding which of two actions
491 to perform. Higher priorities are more likely to win.
493 Converting an Action to a string describes the action in a simple
494 user-readable manner. The `perform' method actually carries the action
500 def __init__(me, master):
501 "Stash the MASTER file name for later."
505 "Choose either ME or HIM and return one."
506 if him is None or me.PRIORITY > him.PRIORITY:
511 class CopyAction (Action):
513 An Action object for simply copying a file.
515 Actually we try to hardlink it first, falling back to a copy later. This
516 is both faster and more efficient with regard to disk space.
519 ## Copying is good. Linking is really good, but we can't tell the
520 ## difference at this stage.
523 def __init__(me, master, targetdir):
524 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
525 Action.__init__(me, master)
526 me.target = OS.path.join(targetdir, OS.path.basename(master))
532 "Actually perform a CopyAction."
534 STATUS.set(filestatus(me.master, 'link'))
535 OS.link(me.master, me.target)
537 if err.errno != E.EXDEV:
539 STATUS.set(filestatus(me.master, 'copy'))
540 new = me.target + '.new'
541 SH.copyfile(me.master, new)
542 OS.rename(new, me.target)
545 class ConvertAction (Action):
547 An Action object for converting a file to a given format.
549 Additional attributes:
551 id The identification object for the master file.
553 format The format to which we're meant to conver the master.
556 def __init__(me, master, targetdir, id, format):
557 "Initialize a ConvertAction."
558 Action.__init__(me, master)
559 stem, ext = OS.path.splitext(OS.path.basename(master))
560 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
565 return 'convert to %s' % me.format.NAME
568 "Acually perform a ConvertAction."
569 STATUS.set(filestatus(me.master, me))
570 me.format.convert(me.master, me.id, me.target)
574 class FormatPolicy (object):
576 A FormatPolicy object represents a set of rules for how to convert files.
578 Given a master file, the FormatPolicy will identify it and return a list of
579 actions to be performed. The methods required of a FormatPolicy are:
582 Store CAT as the policy's category. Check that this is consistent
583 with the policy as stored.
585 actions(MASTER, TARGETDIR, ID, COHORT)
586 Given a MASTER file, identified as ID, a target directory
587 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
588 of the same category in the same directory, return a list of
589 actions to be performed to get the target directory into the right
590 form. The list might be empty if the policy object /rejects/ the
594 class AndPolicy (FormatPolicy):
596 A FormatPolicy which does the union of a bunch of other policies.
598 Each subsidiary policy is invoked in turn. The highest-priority action for
599 each target file is returned.
602 def __init__(me, policies):
603 me._policies = policies
605 def setcategory(me, cat):
607 for p in me._policies:
610 def actions(me, master, targetdir, id, cohort):
612 for p in me._policies:
613 for a in p.actions(master, targetdir, id, cohort):
615 tmap[a.target] = a.choose(tmap.get(a.target))
620 And = K('and') - D('{') - R(Policy) - D('}')
621 And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
623 class OrPolicy (FormatPolicy):
625 A FormatPolicy which tries other policies and uses the first that accepts.
627 Each subsidiary policy is invoked in turn. If any accepts, the actions it
628 proposes are turned and no further policies are invoked. If none accepts
629 then the file is rejected.
632 def __init__(me, policies):
633 me._policies = policies
635 def setcategory(me, cat):
637 for p in me._policies:
640 def actions(me, master, targetdir, id, cohort):
641 for p in me._policies:
642 aa = p.actions(master, targetdir, id, cohort)
648 Or = K('or') - D('{') - R(Policy) - D('}')
649 Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
651 class AcceptPolicy (FormatPolicy):
653 A FormatPolicy which copies files in a particular format.
655 If all of the files in a cohort are recognized as being in a particular
656 format (including this one), then accept it with a CopyAction; otherwise
660 def __init__(me, format):
663 def setcategory(me, cat):
664 if me._format.CATEGORY is not cat:
666 "Accept format `%s' has category `%s', not `%s'" % \
667 (me._format.__class__.__name__,
668 me._format.CATEGORY.name, cat.name)
671 def actions(me, master, targetdir, id, cohort):
672 if me._format.check(id) and \
673 all(me._format.check(cid) for f, cid in cohort):
674 return [CopyAction(master, targetdir)]
678 Accept = K('accept') - Format
679 Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
681 class ConvertPolicy (FormatPolicy):
683 A FormatPolicy which copies files in a particular format or converts if
686 def __init__(me, format):
689 def setcategory(me, cat):
690 if me._format.CATEGORY is not cat:
692 "Accept format `%s' has category `%s', not `%s'" % \
693 (me._format.__class__.__name__,
694 me._format.CATEGORY.name, cat.name)
697 def actions(me, master, targetdir, id, cohort):
698 if me._format.check(id):
699 return [CopyAction(master, targetdir)]
701 return [ConvertAction(master, targetdir, id, me._format)]
703 Convert = K('convert') - Format
704 Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
706 Policy << (And | Or | Accept | Convert)
708 ###--------------------------------------------------------------------------
709 ### Audio handling, based on GStreamer.
711 def make_element(factory, name = None, **props):
712 "Return a new element from the FACTORY with the given NAME and PROPS."
713 elt = GS.element_factory_make(factory, name)
714 elt.set_properties(**props)
717 def link_elements(elts):
718 "Link the elements ELTS together, in order."
719 GS.element_link_many(*elts)
721 def bin_children(bin):
722 "Iterate over the (direct) children of a BIN."
723 for elt in bin.elements(): yield elt
725 class GStreamerProgressEyecandy (ProgressEyecandy):
727 Provide amusement while GStreamer is busy doing something.
729 The GStreamerProgressEyecandy object is a context manager. Wrap it round
730 your GStreamer loop to provide progress information for an operation.
733 def __init__(me, what, elt, **kw):
735 Initialize a progress meter.
737 WHAT is a prefix string to be written before the progress eyecandy
738 itself. ELT is a GStreamer element to interrogate to find the progress
742 ProgressEyecandy.__init__(me, what, **kw)
745 "Called by GLib main event loop to update the eyecandy."
751 Update the progress meter.
753 This is called periodically by the GLib main event-processing loop.
759 "Return the current progress as a pair (CURRENT, MAX)."
761 ## Fetch the current progress information. We get the duration each
762 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
763 ## duration can change as we progress. Hopefully it settles down fairly
766 t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
767 end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
769 except GS.QueryError:
773 "Enter context: attach progress meter display."
775 ## If we're not showing pointless frippery, don't bother at all.
776 if not STATUS.eyecandyp:
779 ## Update regularly. The pipeline runs asynchronously.
780 me._id = G.timeout_add(200, me._update)
782 def __exit__(me, ty, val, tb):
783 "Leave context: remove display and report completion or failure."
785 ## If we're not showing pointless frippery, there's nothing to remove.
787 G.source_remove(me._id)
789 ## Report completion anyway.
795 class AudioIdentifier (object):
797 Analyses and identifies an audio file.
799 Important properties are:
801 cap A capabilities structure describing the audio file data. The most
802 interesting thing in here is probably its name, which is a MIME
803 type describing the data.
805 dcap A capabilities structure describing the decoded audio data. This
806 is of interest during conversion.
808 tags A dictionary containing metadata tags from the file. These are in
809 GStreamer's encoding-independent format.
811 bitrate An approximation to the stream's bitrate, in kilobits per second.
812 This might be slow to work out for some files so it's computed on
816 def __init__(me, file, mime):
817 "Initialize the object suitably for identifying FILE."
819 ## Make some initial GStreamer objects. We'll want the pipeline later if
820 ## we need to analyse a poorly tagged MP3 stream, so save it away.
821 me._pipe = GS.Pipeline()
823 bus = me._pipe.get_bus()
824 bus.add_signal_watch()
827 ## The basic recognition kit is based around `decodebin'. We must keep
828 ## it happy by giving it sinks for the streams it's found, which it
829 ## announces asynchronously.
830 source = make_element('filesrc', 'file', location = file)
831 decoder = make_element('decodebin', 'decode')
832 sink = make_element('fakesink')
833 def decoder_pad_arrived(elt, pad):
834 if pad.get_caps()[0].get_name().startswith('audio/'):
835 elt.link_pads(pad.get_name(), sink, 'sink')
836 dpaid = decoder.connect('pad-added', decoder_pad_arrived)
837 for i in [source, decoder, sink]: me._pipe.add(i)
838 link_elements([source, decoder])
840 ## Arrange to collect tags from the pipeline's bus as they're reported.
841 ## If we reuse the pipeline later, we'll want different bus-message
842 ## handling, so make sure we can take the signal handler away.
845 def bus_message(bus, msg):
846 if msg.type == GS.MESSAGE_ERROR:
847 fail[:] = (ValueError, msg.structure['debug'], None)
849 elif msg.type == GS.MESSAGE_STATE_CHANGED:
850 if msg.structure['new-state'] == GS.STATE_PAUSED and \
853 elif msg.type == GS.MESSAGE_TAG:
854 tags.update(msg.structure)
855 bmid = bus.connect('message', bus_message)
857 ## We want to identify the kind of stream this is. (Hmm. The MIME type
858 ## recognizer has already done this work, but GStreamer is probably more
859 ## reliable.) The `decodebin' has a `typefind' element inside which will
860 ## announce the identified media type. All we need to do is find it and
861 ## attach a signal handler. (Note that the handler might be run in the
862 ## thread context of the pipeline element, but Python's GIL will keep
863 ## things from being too awful.)
866 for e in bin_children(decoder):
867 if e.get_factory().get_name() == 'typefind':
871 assert False, 'failed to find typefind element'
873 ## Crank up most of the heavy machinery. The message handler will stop
874 ## the loop when things seem to be sufficiently well underway.
875 me._pipe.set_state(GS.STATE_PAUSED)
878 decoder.disconnect(dpaid)
880 me._pipe.set_state(GS.STATE_NULL)
881 raise fail[0], fail[1], fail[2]
883 ## Store the collected tags.
886 ## Gather the capabilities. The `typefind' element knows the input data
887 ## type. The 'decodebin' knows the raw data type.
888 me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
889 me.mime = set([mime, me.cap.get_name()])
890 me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
892 ## If we found a plausible bitrate then stash it. Otherwise note that we
893 ## failed. If anybody asks then we'll work it out then.
894 if 'nominal-bitrate' in tags:
895 me._bitrate = tags['nominal-bitrate']/1000
896 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
897 me._bitrate = tags['bitrate']/1000
901 ## The bitrate computation wants the file size. Ideally we'd want the
902 ## total size of the frames' contents, but that seems hard to dredge
903 ## out. If the framing overhead is small, this should be close enough
905 me._bytes = OS.stat(file).st_size
908 "Close the pipeline down so we don't leak file descriptors."
909 me._pipe.set_state(GS.STATE_NULL)
914 Return the approximate bit-rate of the input file.
916 This might take a while if we have to work it out the hard way.
919 ## If we already know the answer then just return it.
920 if me._bitrate is not None:
923 ## Make up a new main loop.
926 ## Watch for bus messages. We'll stop when we reach the end of the
927 ## stream: then we'll have a clear idea of how long the track was.
929 def bus_message(bus, msg):
930 if msg.type == GS.MESSAGE_ERROR:
931 fail[:] = (ValueError, msg.structure['debug'], None)
933 elif msg.type == GS.MESSAGE_EOS:
935 bus = me._pipe.get_bus()
936 bmid = bus.connect('message', bus_message)
938 ## Get everything moving, and keep the user amused while we work.
939 me._pipe.set_state(GS.STATE_PLAYING)
940 with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
946 me._pipe.set_state(GS.STATE_NULL)
947 raise fail[0], fail[1], fail[2]
949 ## Now we should be able to find out our position accurately and work out
950 ## a bitrate. Cache it in case anybody asks again.
951 t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
952 me._bitrate = int(8*me._bytes*1e6/t)
957 class AudioFormat (BaseFormat):
959 An AudioFormat is a kind of Format specialized for audio files.
961 Format checks are done on an AudioIdentifier object.
964 PROPS = prop('bitrate', Num)
966 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
967 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
968 ## depending on how thorough it's trying to be. Still, it doesn't do any
969 ## harm here; the main risk is picking up Ogg Theora files by accident, and
970 ## we'll probably be able to extract the audio from them anyway.
971 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
974 def __init__(me, bitrate = None):
975 "Construct an object, requiring an approximate bitrate."
980 Return whether the AudioIdentifier ID is suitable for our purposes.
982 Subclasses can either override this method or provide a property
983 `MIMETYPES', which is a list (other thing that implements `__contains__')
984 of GStreamer MIME types matching this format.
986 return id.mime & me.MIMETYPES and \
987 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
991 Constructs a GStreamer element to encode audio input.
993 Subclasses can either override this method (or replace `encode'
994 entirely), or provide a method `encoder_chain' which returns a list of
995 elements to be linked together in sequence. The first element in the
996 chain must have a pad named `sink' and the last must have a pad named
999 elts = me.encoder_chain()
1001 for i in elts: bin.add(i)
1003 bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
1004 bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
1007 def convert(me, master, id, target):
1009 Encode audio from MASTER, already identified as ID, writing it to TARGET.
1011 See `encoder' for subclasses' responsibilities.
1014 ## Construct the necessary equipment.
1015 pipe = GS.Pipeline()
1016 bus = pipe.get_bus()
1017 bus.add_signal_watch()
1020 ## Make sure that there isn't anything in the way of our output. We're
1021 ## going to write to a scratch file so that we don't get confused by
1022 ## half-written rubbish left by a crashed program.
1023 new = target + '.new'
1026 except OSError, err:
1027 if err.errno != E.ENOENT:
1030 ## Piece together our pipeline. The annoying part is that the
1031 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1033 source = make_element('filesrc', 'source', location = master)
1034 decoder = make_element('decodebin', 'decode')
1035 convert = make_element('audioconvert', 'convert')
1036 encoder = me.encoder()
1037 sink = make_element('filesink', 'sink', location = new)
1038 for i in [source, decoder, convert, encoder, sink]: pipe.add(i)
1039 link_elements([source, decoder])
1040 link_elements([convert, encoder, sink])
1042 ## Some decoders (e.g., the AC3 decoder) include channel-position
1043 ## indicators in their output caps. The Vorbis encoder interferes with
1044 ## this, and you end up with a beautifully encoded mono signal from a
1045 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1046 ## /think/ that this is only a problem with stereo signals: mono signals
1047 ## are mono already, and `vorbisenc' accepts channel positions if there
1048 ## are more than two channels.
1050 ## So we have this bodge. We already collected the decoded audio caps
1051 ## during identification. So if we see 2-channel audio with channel
1052 ## positions, we strip the positions off forcibly by adding a filter.
1053 if id.dcap.get_name().startswith('audio/x-raw-') and \
1054 id.dcap.has_field('channels') and \
1055 id.dcap['channels'] == 2 and \
1056 id.dcap.has_field('channel-positions'):
1059 c.remove_field('channel-positions')
1064 ## Hook onto the `decodebin' so we can link together the two halves of
1065 ## our encoding chain. For now, we'll hope that there's only one audio
1066 ## stream in there, and just throw everything else away.
1067 def decoder_pad_arrived(elt, pad):
1068 if pad.get_caps()[0].get_name().startswith('audio/'):
1070 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1072 elt.link_pads(pad.get_name(), convert, 'sink')
1073 decoder.connect('pad-added', decoder_pad_arrived)
1075 ## Watch the bus for completion messages.
1077 def bus_message(bus, msg):
1078 if msg.type == GS.MESSAGE_ERROR:
1079 fail[:] = (ValueError, msg.structure['debug'], None)
1081 elif msg.type == GS.MESSAGE_EOS:
1083 bmid = bus.connect('message', bus_message)
1085 ## Get everything ready and let it go.
1086 pipe.set_state(GS.STATE_PLAYING)
1087 with GStreamerProgressEyecandy(filestatus(master,
1088 'convert to %s' % me.NAME),
1091 pipe.set_state(GS.STATE_NULL)
1093 raise fail[0], fail[1], fail[2]
1095 ## Fix up the output file if we have to.
1099 OS.rename(new, target)
1101 class OggVorbisFormat (AudioFormat):
1102 "AudioFormat object for Ogg Vorbis."
1104 ## From https://en.wikipedia.org/wiki/Vorbis
1105 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1106 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1107 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1110 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1111 'audio/x-vorbis+ogg'])
1114 def encoder_chain(me):
1116 if me.bitrate is not None:
1117 for q, br in me.QMAP:
1118 if br >= me.bitrate:
1121 raise ValueError, 'no suitable quality setting found'
1122 encprops['quality'] = q/10.0
1123 return [make_element('vorbisenc', **encprops),
1124 make_element('oggmux')]
1126 defformat('ogg-vorbis', OggVorbisFormat)
1128 class MP3Format (AudioFormat):
1129 "AudioFormat object for MP3."
1132 MIMETYPES = set(['audio/mpeg'])
1135 def encoder_chain(me):
1137 if me.bitrate is not None: encprops['vbr_mean_bitrate'] = me.bitrate
1138 return [make_element('lame', vbr = 4, **encprops),
1139 make_element('xingmux'),
1140 make_element('id3v2mux')]
1142 def fixup(me, path):
1146 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1147 unkind to stupid players.
1150 if f is None: return
1152 if t is None: return
1153 for v in [E3.id3.ID3_V2_3, E3.id3.ID3_V1]:
1154 try: f.tag.save(version = v)
1155 except (UnicodeEncodeError,
1156 E3.id3.GenreException,
1157 E3.id3.TagException):
1160 defformat('mp3', MP3Format)
1162 ###--------------------------------------------------------------------------
1163 ### Image handling, based on the Python Imaging Library.
1165 class ImageIdentifier (object):
1167 Analyses and identifies an image file.
1169 Simply leaves an Image object in the `img' property which can be inspected.
1172 def __init__(me, file, mime):
1174 ## Get PIL to open the file. It will magically work out what kind of
1177 me.img = I.open(file)
1178 except IOError, exc:
1180 ## Unhelpful thing to raise on identification failure. We can
1181 ## distinguish this from an actual I/O error because it doesn't have an
1183 if exc.errno is None:
1184 raise IdentificationFailure
1187 me.mime = set([mime])
1189 class ImageFormat (BaseFormat):
1191 An ImageFormat is a kind of Format specialized for image files.
1193 Subclasses don't need to provide anything other than the properties
1194 required by all concrete Format subclasses. However, there is a
1195 requirement that the `NAME' property match PIL's `format' name for the
1199 PROPS = prop('size', Num)
1200 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1202 def __init__(me, size = None, **kw):
1204 Initialize an ImageFormat object.
1206 Additional keywords are used when encoding, and may be recognized by
1207 enhanced `check' methods in subclasses.
1213 "Check whether the ImageIdentifier ID matches our requirements."
1214 return id.img.format == me.NAME and \
1215 (me._size is None or
1216 (id.img.size[0] <= me._size and
1217 id.img.size[1] <= me._size))
1219 def convert(me, master, id, target):
1220 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1222 ## Write to a scratch file.
1223 new = target + '.new'
1225 ## The ImageIdentifier already contains a copy of the open file. It
1226 ## would be wasteful not to use it.
1228 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1230 ## If there's a stated maximum size then scale the image down to match.
1231 ## But thumbnailing clobbers the original, so take a copy.
1232 if me._size is not None and \
1233 (img.size[0] > me._size or img.size[1] > me._size):
1235 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1237 ## Write the output image.
1238 img.save(new, me.NAME, **me._props)
1240 ## Fix it up if necessary.
1244 OS.rename(new, target)
1247 class JPEGFormat (ImageFormat):
1249 Image format for JPEG (actually JFIF) files.
1251 Interesting properties to set:
1254 If present, take a second pass to select optimal encoder settings.
1257 If present, make a progressive file.
1259 quality Integer from 1--100 (worst to best); default is 75.
1263 PROPS = prop('optimize', None) \
1264 | prop('progressive', None, 'progression') \
1265 | prop('quality', Num)
1267 defformat('jpeg', JPEGFormat)
1269 class PNGFormat (ImageFormat):
1271 Image format for PNG files.
1273 Interesting properties:
1276 If present, make a special effort to minimize the output file.
1280 PROPS = prop('optimize', None)
1282 defformat('png', PNGFormat)
1284 class BMPFormat (ImageFormat):
1286 Image format for Windows BMP files, as used by RockBox.
1288 No additional properties.
1293 defformat('bmp', BMPFormat)
1295 ###--------------------------------------------------------------------------
1296 ### Remaining parsing machinery.
1298 Type = K('type') - Name - D('{') - R(Policy) - D('}')
1299 def build_type(s, l, t):
1301 cat = CATEGORYMAP[t[0]]
1303 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1305 if len(pols) == 1: pol = pols[0]
1306 else: pol = AndPolicy(pols)
1307 pol.setcategory(cat)
1309 Type.setParseAction(build_type)
1312 class TargetJob (object):
1313 def __init__(me, targetdir, policies):
1314 me.targetdir = targetdir
1315 me.policies = policies
1319 Target = K('target') - String - D('{') - R(Type) - D('}')
1320 def build_target(s, l, t):
1321 return TargetJob(t[0], t[1])
1322 Target.setParseAction(build_target)
1324 VARS = { 'master': None }
1325 class VarsJob (object):
1326 def __init__(me, vars):
1329 for k, v in me.vars:
1332 Var = prop('master', String)
1333 Vars = K('vars') - D('{') - R(Var) - D('}')
1334 def build_vars(s, l, t):
1335 return VarsJob(t[0])
1336 Vars.setParseAction(build_vars)
1338 TopLevel = Vars | Target
1339 Config = R(TopLevel)
1340 Config.ignore(P.pythonStyleComment)
1342 ###--------------------------------------------------------------------------
1343 ### The directory grobbler.
1345 def grobble(master, targets, noact = False):
1347 Work through the MASTER directory, writing converted files to TARGETS.
1349 The TARGETS are a list of `TargetJob' objects, each describing a target
1350 directory and a policy to apply to it.
1352 If NOACT is true, then don't actually do anything permanent to the
1356 ## Transform the targets into a more convenient data structure.
1360 tpolmap.append(pmap)
1361 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
1363 ## Keep track of the current position in the master tree.
1366 ## And the files which haven't worked.
1369 def grobble_file(master, pmap, targetdir, cohorts):
1370 ## Convert MASTER, writing the result to TARGETDIR.
1372 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1373 ## a list of (FILENAME, ID) pairs.
1375 ## Since this function might convert the MASTER file, the caller doesn't
1376 ## know the name of the output files, so we return then as a list.
1379 st_m = OS.stat(master)
1381 ## Work through each category listed and apply its policy.
1382 for cat, id, cohort in cohorts:
1384 ## Go through the category's policies and see if any match. If we fail
1385 ## here, see if there are more categories to try.
1386 for pol in pmap[cat]:
1387 acts = pol.actions(master, targetdir, id, cohort)
1392 ## Work through the targets one by one.
1396 ## Find out whether the target file already exists and is up-to-date
1397 ## with respect to the master. (Caution here with low-resolution
1398 ## timestamps.) If it's OK, then just move on.
1400 st_t = OS.stat(a.target)
1401 if st_m.st_mtime < st_t.st_mtime or \
1402 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1404 except OSError, err:
1405 if err.errno not in (E.ENOENT, E.ENOTDIR):
1408 ## We have real work to do. If there's a current status message,
1409 ## it's the containing directory so flush it so that people know
1413 ## Remove the target. (A hardlink will fail if the target already
1418 except OSError, err:
1419 if err.errno not in (E.ENOENT, E.ENOTDIR):
1422 ## Do whatever it is we decided to do.
1424 STATUS.commit(filestatus(master, a))
1428 ## We're done. Return the names of the targets.
1432 def wrap(masterfile):
1433 ## Handle exceptions found while trying to convert a particular file or
1439 ## Something bad happened. Report the error, but continue. (This list
1440 ## of exceptions needs a lot of work.)
1441 except (IOError, OSError), exc:
1443 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
1444 broken.append((masterfile, exc))
1446 def grobble_dir(master, targets):
1447 ## Recursively convert files in MASTER, writing them to the TARGETS.
1449 ## Keep track of the subdirectories we encounter, because we'll need to
1450 ## do all of those in one go at the end.
1453 ## Work through each target directory in turn.
1454 for target, pmap in zip(targets, tpolmap):
1456 ## Make sure the TARGET exists and is a directory. It's a fundamental
1457 ## assumption of this program that the entire TARGET tree is
1458 ## disposable, so if something exists but isn't a directory, we should
1460 if OS.path.isdir(target):
1463 if OS.path.exists(target):
1464 STATUS.commit(filestatus(target, 'clear nondirectory'))
1467 STATUS.commit(filestatus(target, 'create directory'))
1471 ## Keep a list of things in the target. As we convert files, we'll
1472 ## check them off. Anything left over is rubbish and needs to be
1476 for i in OS.listdir(target):
1477 checklist[i] = False
1478 except OSError, err:
1479 if err.errno not in (E.ENOENT, E.ENOTDIR):
1482 ## Keep track of the files in each category.
1487 ## Work through the master files.
1488 for f in sorted(OS.listdir(master)):
1490 ## If the killswitch has been pulled then stop. The whole idea is
1491 ## that we want to cause a clean shutdown if possible, so we don't
1492 ## want to do it in the middle of encoding because the encoding
1493 ## effort will have been wasted. This is the only place we need to
1494 ## check. If we've exited the loop, then clearing old files will
1495 ## probably be fast, and we'll either end up here when the recursive
1496 ## call returns or we'll be in the same boat as before, clearing old
1497 ## files, only up a level. If worst comes to worst, we'll be killed
1498 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1500 if KILLSWITCH.is_set():
1503 ## Do something with the file.
1504 with wrap(OS.path.join(master, f)) as masterfile:
1506 ## If it's a directory then prepare to grobble it recursively, but
1507 ## don't do that yet.
1508 if OS.path.isdir(masterfile):
1510 done.append(OS.path.join(target, f))
1512 ## Otherwise it's a file. Work out what kind, and stash it under
1513 ## the appropriate categories. Later, we'll apply policy to the
1514 ## files, by category, and work out what to do with them all.
1516 mime = GIO.File(masterfile) \
1517 .query_info('standard::content-type') \
1520 for cat in pmap.iterkeys():
1521 id = cat.identify(masterfile, mime)
1522 if id is None: continue
1523 catmap.setdefault(cat, []).append((masterfile, id))
1524 cats.append((cat, id))
1526 catmap.setdefault(None, []).append((masterfile, id))
1527 todo.append((masterfile, cats))
1529 ## Work through the categorized files to see what actions to do for
1531 for masterfile, cats in todo:
1532 with wrap(masterfile):
1533 done += grobble_file(masterfile, pmap, target,
1534 [(cat, id, catmap[cat]) for cat, id in cats])
1536 ## Check the results off the list so that we don't clear it later.
1538 checklist[OS.path.basename(f)] = True
1540 ## Maybe there's stuff in the target which isn't accounted for. Delete
1541 ## it: either the master has changed, or the policy for this target has
1542 ## changed. Either way, the old files aren't wanted.
1544 if not checklist[f]:
1545 STATUS.commit(filestatus(f, 'clear bogus file'))
1547 bogus = OS.path.join(target, f)
1549 if OS.path.isdir(bogus):
1553 except OSError, err:
1554 if err.errno != E.ENOENT:
1557 ## If there are subdirectories which want processing then do those.
1558 ## Keep the user amused by telling him where we are in the tree.
1559 for d in sorted(subdirs):
1561 STATUS.set('/'.join(dirs))
1562 with wrap(OS.path.join(master, d)) as masterdir:
1564 grobble_dir(masterdir,
1565 [OS.path.join(target, d) for target in targets])
1568 STATUS.set('/'.join(dirs))
1570 ## Right. We're ready to go.
1571 grobble_dir(master, [t.targetdir for t in targets])
1574 ###--------------------------------------------------------------------------
1575 ### Command-line interface.
1577 QUIS = OS.path.basename(SYS.argv[0])
1580 "Report a warning message to the user."
1581 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1584 "Report a fatal error message to the user."
1588 def parse_opts(args):
1590 Parse command-line arguments in ARGS.
1592 Returns a Grobbler object and the MASTER and TARGET directories to be
1596 ## Build the option parser object.
1597 op = OP.OptionParser(prog = QUIS, version = VERSION,
1598 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1601 Convert a directory tree of files according to the configuration file
1605 ## Timeout handling.
1606 def cb_time(opt, ostr, arg, op):
1607 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1609 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1611 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1612 setattr(op.values, opt.dest, t)
1613 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1615 help = 'stop processing nicely after SECS',
1616 action = 'callback', callback = cb_time)
1617 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1618 dest = 'timeout_nasty',
1619 help = 'stop processing unpleasantly after further SECS',
1620 action = 'callback', callback = cb_time)
1623 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1624 help = 'provide progress information')
1625 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1626 help = 'don\'t actually modify the filesystem')
1629 op.set_defaults(formats = [], noact = False,
1630 timeout = None, timeout_nasty = 300)
1631 opts, args = op.parse_args(args)
1633 ## Check that we got the non-option arguments that we want.
1635 op.error('wrong number of arguments')
1637 ## Act on the options.
1639 STATUS.eyecandyp = True
1640 if opts.timeout is not None:
1641 to = TH.Thread(target = timeout,
1642 args = (opts.timeout, opts.timeout_nasty))
1646 ## Parse the configuration file.
1647 with open(args[0]) as conf:
1648 jobs, = Config.parseFile(conf, True)
1654 if __name__ == '__main__':
1655 opts = parse_opts(SYS.argv[1:])
1656 if 'master' not in VARS:
1657 die("no master directory set")
1658 broken = grobble(VARS['master'], TARGETS, opts.noact)
1660 moan('failed to convert some files:')
1661 for file, exc in broken:
1662 moan('%s: %s' % (file, exc))
1665 ## This is basically a successful completion: we did what we were asked to
1666 ## do. It seems polite to report a message, though.
1668 ## Why don't we have a nonzero exit status? The idea would be that a
1669 ## calling script would be interested that we used up all of our time, and
1670 ## not attempt to convert some other directory as well. But that doesn't
1671 ## quite work. Such a script would need to account correctly for time we
1672 ## had spent even if we complete successfully. And if the script is having
1673 ## to watch the clock itself, it can do that without our help here.
1674 if KILLSWITCH.is_set():
1675 moan('killed by timeout')
1677 ###----- That's all, folks --------------------------------------------------