gremlin/gremlin.in: Clear `measure bitrate' progress when it's done.
[autoys] / gremlin / gremlin.in
1 #! @PYTHON@
2 ###
3 ### Convert a directory tree of audio files
4 ###
5 ### (c) 2010 Mark Wooding
6 ###
7
8 ###----- Licensing notice ---------------------------------------------------
9 ###
10 ### This file is part of the `autoys' audio tools collection.
11 ###
12 ### `autoys' is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU General Public License as published by
14 ### the Free Software Foundation; either version 2 of the License, or
15 ### (at your option) any later version.
16 ###
17 ### `autoys' is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU General Public License for more details.
21 ###
22 ### You should have received a copy of the GNU General Public License
23 ### along with `autoys'; if not, write to the Free Software Foundation,
24 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 ###--------------------------------------------------------------------------
27 ### External dependencies.
28
29 ## Language features.
30 from __future__ import with_statement
31
32 ## Standard Python libraries.
33 import sys as SYS
34 import os as OS
35 import errno as E
36 import time as T
37 import unicodedata as UD
38 import fnmatch as FN
39 import re as RX
40 import shutil as SH
41 import optparse as OP
42 import threading as TH
43 import shlex as L
44 from math import sqrt, ceil
45 from contextlib import contextmanager
46
47 ## eyeD3 tag fettling.
48 import eyed3 as E3
49
50 ## Gstreamer.
51 import gi
52 gi.require_version('GLib', '2.0'); from gi.repository import GLib as G
53 gi.require_version('Gio', '2.0'); from gi.repository import Gio as GIO
54 gi.require_version('Gst', '1.0'); from gi.repository import Gst as GS
55 GS.init([])
56
57 ## Python Imaging.
58 from PIL import Image as I
59
60 ## Python parsing.
61 import pyparsing as P
62
63 ###--------------------------------------------------------------------------
64 ### Special initialization.
65
66 VERSION = '@VERSION@'
67
68 ## GLib.
69 G.threads_init()
70
71 ###--------------------------------------------------------------------------
72 ### Eyecandy progress reports.
73
74 def charwidth(s):
75 """
76 Return the width of S, in characters.
77
78 Specifically, this is the number of backspace characters required to
79 overprint the string S. If the current encoding for `stdout' appears to be
80 Unicode then do a complicated Unicode thing; otherwise assume that
81 characters take up one cell each.
82
83 None of this handles tab characters in any kind of useful way. Sorry.
84 """
85
86 ## If there's no encoding for stdout then we're doing something stupid.
87 if SYS.stdout.encoding is None: return len(s)
88
89 ## Turn the string into Unicode so we can hack on it properly. Maybe that
90 ## won't work out, in which case fall back to being stupid.
91 try: u = s.decode(SYS.stdout.encoding)
92 except UnicodeError: return len(s)
93
94 ## Our main problem is combining characters, but we should also try to
95 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
96 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
97 w = 0
98 for ch in u:
99 cd = ord(ch)
100 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
101 0x1160 <= cd <= 0x11ff: pass
102 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
103 else: w += 1
104
105 ## Done.
106 return w
107
108 class StatusLine (object):
109 """
110 Maintains a status line containing ephemeral progress information.
111
112 The status line isn't especially important, but it keeps interactive users
113 amused.
114
115 There should be only one status line object in your program; otherwise
116 they'll interfere with each other and get confused.
117
118 The update algorithm (in `set') is fairly careful to do the right thing
119 with long status `lines', and to work properly in an Emacs `shell' buffer.
120 """
121
122 def __init__(me):
123 "Initialize the status line."
124 me._last = ''
125 me._lastlen = 0
126 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
127
128 def set(me, line):
129 """
130 Set the status line contents to LINE, replacing what was there before.
131
132 This only produces actual output if stdout is interactive.
133 """
134 n = len(line)
135
136 ## Eyecandy update.
137 if me.eyecandyp:
138
139 ## If the old line was longer, we need to clobber its tail, so work out
140 ## what that involves.
141 if n < me._lastlen:
142 b = charwidth(me._last[n:])
143 pre = '\b'*b + ' '*b
144 else:
145 pre = ''
146
147 ## Now figure out the length of the common prefix between what we had
148 ## before and what we have now. This reduces the amount of I/O done,
149 ## which keeps network traffic down on SSH links, and keeps down the
150 ## amount of work slow terminal emulators like Emacs have to do.
151 i = 0
152 m = min(n, me._lastlen)
153 while i < m and line[i] == me._last[i]:
154 i += 1
155
156 ## Actually do the output, all in one syscall.
157 b = charwidth(me._last[i:])
158 SYS.stdout.write(pre + '\b'*b + line[i:])
159 SYS.stdout.flush()
160
161 ## Update our idea of what's gone on.
162 me._lastlen = n
163 me._last = line
164
165 def clear(me):
166 "Clear the status line. Just like set('')."
167 me.set('')
168
169 def commit(me, line = None):
170 """
171 Commit the current status line, and maybe the string LINE.
172
173 If the current status line is nonempty, then commit it to the transcript.
174 If LINE is not None, then commit that to the transcript too.
175
176 After all of this, we clear the status line to get back to a clean state.
177 """
178 if me._last:
179 if me.eyecandyp:
180 SYS.stdout.write('\n')
181 else:
182 SYS.stdout.write(me._last + '\n')
183 if line is not None:
184 SYS.stdout.write(line + '\n')
185 me._lastlen = 0
186 me._last = ''
187
188 STATUS = StatusLine()
189
190 def filestatus(file, status):
191 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
192
193 class ProgressEyecandy (object):
194 """
195 Provide amusement while something big and complicated is happening.
196
197 This is an abstract class. Subclasses must provide a method `progress'
198 returning a pair (CURRENT, MAX) indicating the current progress through the
199 operation.
200 """
201
202 def __init__(me, what, silentp = False):
203 """
204 Initialize a progress meter.
205
206 WHAT is a prefix string to be written before the progress eyecandy
207 itself.
208 """
209 me._what = what
210 me._silentp = silentp
211 me._spinner = 0
212 me._start = T.time()
213
214 def _fmt_time(me, t):
215 "Format T as a time, in (maybe hours) minutes and seconds."
216 s, t = t % 60, int(t/60)
217 m, h = t % 60, int(t/60)
218 if h > 0:
219 return '%d:%02d:%02d' % (h, m, s)
220 else:
221 return '%02d:%02d' % (m, s)
222
223 def show(me):
224 "Show the current level of progress."
225
226 ## If we're not showing pointless frippery, don't bother at all.
227 if not STATUS.eyecandyp:
228 return
229
230 ## Update the spinner index.
231 me._spinner = (me._spinner + 1)%4
232
233 ## Fetch the current progress information. Note that we always fetch
234 ## both the current and maximum levels, because both might change if an
235 ## operation revises its idea of how much work needs doing.
236 cur, max = me.progress()
237
238 ## If we couldn't get progress information, display something vaguely
239 ## amusing anyway.
240 if cur is None or max is None:
241 STATUS.set('%s %c [unknown progress]' %
242 (me._what, r'/-\|'[me._spinner]))
243 return
244
245 ## Work out -- well, guess -- the time remaining.
246 if cur:
247 t = T.time()
248 eta = me._fmt_time(ceil((t - me._start)*(max - cur)/cur))
249 else:
250 eta = '???'
251
252 ## Set the status bar.
253 n = 40*cur/max
254 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
255 (me._what,
256 r'/-\|'[me._spinner],
257 '='*n, ' '*(40 - n),
258 100*cur/max,
259 eta))
260
261 def done(me, win = True):
262 "Show a completion notice, or a failure if WIN is false."
263 if not win:
264 STATUS.set('%s FAILED!' % me._what)
265 elif not me._silentp:
266 STATUS.set('%s done (%s)' %
267 (me._what,
268 me._fmt_time(T.time() - me._start)))
269 else:
270 return
271 STATUS.commit()
272
273 ###--------------------------------------------------------------------------
274 ### Timeout handling.
275
276 KILLSWITCH = TH.Event()
277
278 def timeout(t0, t1):
279 T.sleep(t0)
280 KILLSWITCH.set()
281 T.sleep(t1)
282 moan('dying messily due to timeout')
283 OS._exit(3)
284
285 ###--------------------------------------------------------------------------
286 ### Parsing utilities.
287
288 ## Allow hyphens in identifiers.
289 IDCHARS = P.alphanums + '-_'
290 P.Keyword.setDefaultKeywordChars(IDCHARS)
291
292 ## Some common kinds of tokens.
293 Name = P.Word(IDCHARS)
294 Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
295 String = P.QuotedString('"', '\\')
296
297 ## Handy abbreviations for constructed parser elements.
298 def K(k): return P.Keyword(k).suppress()
299 def D(d): return P.Literal(d).suppress()
300 def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
301 O = P.Optional
302
303 ###--------------------------------------------------------------------------
304 ### Format identification and conversion.
305
306 class IdentificationFailure (Exception):
307 pass
308
309 class FileCategory (object):
310 """
311 A FileCategory represents a class of files.
312
313 For example, it's sensible to consider audio, or image files as a
314 category. A file category knows how to recognize member files from
315 MIME content types.
316 """
317
318 def __init__(me, name, mime_pats, ident):
319 """
320 Construct a new category.
321
322 The PATS are a list of `fnmatch' patterns to be compared with a MIME
323 type. The IDENT is a function which produces an identification object
324 given a file's name and first-guess MIME type. The object is passed to a
325 Format's `check' method to see whether a file needs re-encoding, and to
326 `convert' to assist with the conversion.
327
328 An identification object must have an attribute `mime' which is a set of
329 possible MIME types accumulated for the object.
330 """
331 me.name = name
332 me._mime_pats = mime_pats
333 me._ident = ident
334 CATEGORYMAP[name] = me
335
336 def identify(me, file, mime):
337 """
338 Attempt to identify FILE, given its apparent MIME type.
339
340 If identification succeeds, return an identification object which can be
341 used by associated file formats; otherwise return None.
342 """
343 for p in me._mime_pats:
344 if not FN.fnmatchcase(mime, p):
345 continue
346 try:
347 return me._ident(file, mime)
348 except IdentificationFailure:
349 pass
350 return None
351
352 class BaseFormat (object):
353 """
354 A BaseFormat object represents a particular encoding and parameters.
355
356 The object can verify (the `check' method) whether a particular file
357 matches its requirements, and if necessary (`encode') re-encode a file.
358
359 Subclasses should define the following methods.
360
361 check(ID)
362 Answer whether the file identified by ID is acceptable according to
363 the receiver's parameters.
364
365 convert(MASTER, ID, TARGET)
366 Convert the file MASTER, which has been identified as ID, according
367 to the receiver's parameters, writing the output to TARGET.
368
369 Subclasses should also provide these attributes.
370
371 CATEGORY
372 A FileCategory object for the category of files that this format
373 lives within.
374
375 EXT A file extension to be applied to encoded output files.
376
377 NAME A user-facing name for the format.
378
379 PROPS A parser element to parse a property definition. It should produce
380 a pair NAME, VALUE to be stored in a dictionary.
381
382 Subclasses for different kinds of file may introduce more subclass
383 protocol.
384 """
385
386 def fixup(me, path):
387 """Post-encoding fixups."""
388 pass
389
390 FORMATMAP = {}
391 CATEGORYMAP = {}
392
393 def defformat(name, cls):
394 "Define a format NAME using class CLS."
395 if not hasattr(cls, 'NAME'):
396 raise ValueError, 'abstract class'
397 if not hasattr(cls, 'CATEGORY'):
398 raise ValueError, 'no category'
399 FORMATMAP[name] = cls
400
401 class FormatParser (P.ParserElement):
402 """
403 Parse a format specifier:
404
405 format-spec ::= string [format-properties]
406 format-properties ::= `{' format-property (`,' format-property)* `}'
407
408 The syntax of a format-property is determined by the PROPS attribute on the
409 named format and its superclasses.
410 """
411
412 name = 'format-spec'
413
414 ## We cache the parser elements we generate to avoid enormous consing.
415 CACHE = {}
416
417 def parseImpl(me, s, loc, actp = True):
418
419 ## Firstly, determine the format name.
420 loc, r = Name._parse(s, loc, actp)
421 fmt = r[0]
422
423 ## Look up the format class.
424 try: fcls = FORMATMAP[fmt]
425 except KeyError:
426 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
427
428 ## Fetch the property-list parser from the cache, if possible; else
429 ## construct it.
430 try:
431 pp = me.CACHE[fmt]
432 except KeyError:
433 seen = set()
434 prop = None
435 for c in fcls.mro():
436 try: p = c.PROPS
437 except AttributeError: continue
438 if p in seen: continue
439 if prop is None: prop = p
440 else: prop |= p
441 seen.add(p)
442 if prop is None:
443 pp = me.CACHE[fmt] = None
444 else:
445 props = P.delimitedList(prop)
446 props.setParseAction(lambda s, l, t: dict(t.asList()))
447 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
448
449 ## Parse the properties.
450 if pp is None:
451 pd = {}
452 else:
453 loc, r = pp._parse(s, loc, actp)
454 if r: pd = r[0]
455 else: pd = {}
456
457 ## Construct the format object and return it.
458 return loc, fcls(**pd)
459
460 Format = FormatParser()
461
462 def prop(kw, pval, tag = None):
463 if tag is None: tag = kw
464 if pval is None:
465 p = K(kw)
466 p.setParseAction(lambda s, l, t: (tag, True))
467 else:
468 p = K(kw) + D('=') + pval
469 p.setParseAction(lambda s, l, t: (tag, t[0]))
470 return p
471
472 ###--------------------------------------------------------------------------
473 ### Policies and actions.
474
475 class Action (object):
476 """
477 An Action object represents a conversion action to be performed.
478
479 This class isn't intended to be instantiated directly. It exists to define
480 some protocol common to all Action objects.
481
482 Action objects have the following attributes.
483
484 master The name of the master (source) file.
485
486 target The name of the target (destination) file.
487
488 PRIORITY The priority of the action, for deciding which of two actions
489 to perform. Higher priorities are more likely to win.
490
491 Converting an Action to a string describes the action in a simple
492 user-readable manner. The `perform' method actually carries the action
493 out.
494 """
495
496 PRIORITY = 0
497
498 def __init__(me, master):
499 "Stash the MASTER file name for later."
500 me.master = master
501
502 def choose(me, him):
503 "Choose either ME or HIM and return one."
504 if him is None or me.PRIORITY > him.PRIORITY:
505 return me
506 else:
507 return him
508
509 class CopyAction (Action):
510 """
511 An Action object for simply copying a file.
512
513 Actually we try to hardlink it first, falling back to a copy later. This
514 is both faster and more efficient with regard to disk space.
515 """
516
517 ## Copying is good. Linking is really good, but we can't tell the
518 ## difference at this stage.
519 PRIORITY = 10
520
521 def __init__(me, master, targetdir):
522 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
523 Action.__init__(me, master)
524 me.target = OS.path.join(targetdir, OS.path.basename(master))
525
526 def __str__(me):
527 return 'copy/link'
528
529 def perform(me):
530 "Actually perform a CopyAction."
531 try:
532 STATUS.set(filestatus(me.master, 'link'))
533 OS.link(me.master, me.target)
534 except OSError, err:
535 if err.errno != E.EXDEV:
536 raise
537 STATUS.set(filestatus(me.master, 'copy'))
538 new = me.target + '.new'
539 SH.copyfile(me.master, new)
540 OS.rename(new, me.target)
541 STATUS.commit()
542
543 class ConvertAction (Action):
544 """
545 An Action object for converting a file to a given format.
546
547 Additional attributes:
548
549 id The identification object for the master file.
550
551 format The format to which we're meant to conver the master.
552 """
553
554 def __init__(me, master, targetdir, id, format):
555 "Initialize a ConvertAction."
556 Action.__init__(me, master)
557 stem, ext = OS.path.splitext(OS.path.basename(master))
558 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
559 me.id = id
560 me.format = format
561
562 def __str__(me):
563 return 'convert to %s' % me.format.NAME
564
565 def perform(me):
566 "Acually perform a ConvertAction."
567 STATUS.set(filestatus(me.master, me))
568 me.format.convert(me.master, me.id, me.target)
569
570 Policy = P.Forward()
571
572 class FormatPolicy (object):
573 """
574 A FormatPolicy object represents a set of rules for how to convert files.
575
576 Given a master file, the FormatPolicy will identify it and return a list of
577 actions to be performed. The methods required of a FormatPolicy are:
578
579 setcategory(CAT)
580 Store CAT as the policy's category. Check that this is consistent
581 with the policy as stored.
582
583 actions(MASTER, TARGETDIR, ID, COHORT)
584 Given a MASTER file, identified as ID, a target directory
585 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
586 of the same category in the same directory, return a list of
587 actions to be performed to get the target directory into the right
588 form. The list might be empty if the policy object /rejects/ the
589 file.
590 """
591
592 class AndPolicy (FormatPolicy):
593 """
594 A FormatPolicy which does the union of a bunch of other policies.
595
596 Each subsidiary policy is invoked in turn. The highest-priority action for
597 each target file is returned.
598 """
599
600 def __init__(me, policies):
601 me._policies = policies
602
603 def setcategory(me, cat):
604 me.cat = cat
605 for p in me._policies:
606 p.setcategory(cat)
607
608 def actions(me, master, targetdir, id, cohort):
609 tmap = {}
610 for p in me._policies:
611 for a in p.actions(master, targetdir, id, cohort):
612 if a.target in tmap:
613 tmap[a.target] = a.choose(tmap.get(a.target))
614 else:
615 tmap[a.target] = a
616 return tmap.values()
617
618 And = K('and') - D('{') - R(Policy) - D('}')
619 And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
620
621 class OrPolicy (FormatPolicy):
622 """
623 A FormatPolicy which tries other policies and uses the first that accepts.
624
625 Each subsidiary policy is invoked in turn. If any accepts, the actions it
626 proposes are turned and no further policies are invoked. If none accepts
627 then the file is rejected.
628 """
629
630 def __init__(me, policies):
631 me._policies = policies
632
633 def setcategory(me, cat):
634 me.cat = cat
635 for p in me._policies:
636 p.setcategory(cat)
637
638 def actions(me, master, targetdir, id, cohort):
639 for p in me._policies:
640 aa = p.actions(master, targetdir, id, cohort)
641 if aa:
642 return aa
643 else:
644 return []
645
646 Or = K('or') - D('{') - R(Policy) - D('}')
647 Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
648
649 class AcceptPolicy (FormatPolicy):
650 """
651 A FormatPolicy which copies files in a particular format.
652
653 If all of the files in a cohort are recognized as being in a particular
654 format (including this one), then accept it with a CopyAction; otherwise
655 reject.
656 """
657
658 def __init__(me, format):
659 me._format = format
660
661 def setcategory(me, cat):
662 if me._format.CATEGORY is not cat:
663 raise ValueError, \
664 "Accept format `%s' has category `%s', not `%s'" % \
665 (me._format.__class__.__name__,
666 me._format.CATEGORY.name, cat.name)
667 me.cat = cat
668
669 def actions(me, master, targetdir, id, cohort):
670 if me._format.check(id) and \
671 all(me._format.check(cid) for f, cid in cohort):
672 return [CopyAction(master, targetdir)]
673 else:
674 return []
675
676 Accept = K('accept') - Format
677 Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
678
679 class ConvertPolicy (FormatPolicy):
680 """
681 A FormatPolicy which copies files in a particular format or converts if
682 necessary.
683 """
684 def __init__(me, format):
685 me._format = format
686
687 def setcategory(me, cat):
688 if me._format.CATEGORY is not cat:
689 raise ValueError, \
690 "Accept format `%s' has category `%s', not `%s'" % \
691 (me._format.__class__.__name__,
692 me._format.CATEGORY.name, cat.name)
693 me.cat = cat
694
695 def actions(me, master, targetdir, id, cohort):
696 if me._format.check(id):
697 return [CopyAction(master, targetdir)]
698 else:
699 return [ConvertAction(master, targetdir, id, me._format)]
700
701 Convert = K('convert') - Format
702 Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
703
704 Policy << (And | Or | Accept | Convert)
705
706 ###--------------------------------------------------------------------------
707 ### Audio handling, based on GStreamer.
708
709 def make_element(factory, name = None, **props):
710 "Return a new element from the FACTORY with the given NAME and PROPS."
711 elt = GS.ElementFactory.make(factory, name)
712 if elt is None: raise ValueError, 'failed to make `%s\' element' % factory
713 elt.set_properties(**props)
714 return elt
715
716 def link_elements(elts):
717 "Link the elements ELTS together, in order."
718 e0 = None
719 for e1 in elts:
720 if e0 is not None: e0.link(e1)
721 e0 = e1
722
723 def bin_children(bin):
724 "Iterate over the (direct) children of a BIN."
725 iter = bin.iterate_elements()
726 while True:
727 rc, elt = iter.next()
728 if rc == GS.IteratorResult.DONE: break
729 elif rc != GS.IteratorResult.OK:
730 raise ValueError, 'iteration failed (%s)' % rc
731 else: yield elt
732
733 class GStreamerProgressEyecandy (ProgressEyecandy):
734 """
735 Provide amusement while GStreamer is busy doing something.
736
737 The GStreamerProgressEyecandy object is a context manager. Wrap it round
738 your GStreamer loop to provide progress information for an operation.
739 """
740
741 def __init__(me, what, elt, **kw):
742 """
743 Initialize a progress meter.
744
745 WHAT is a prefix string to be written before the progress eyecandy
746 itself. ELT is a GStreamer element to interrogate to find the progress
747 information.
748 """
749 me._elt = elt
750 ProgressEyecandy.__init__(me, what, **kw)
751
752 def _update(me):
753 "Called by GLib main event loop to update the eyecandy."
754 me.show()
755 return True
756
757 def _timer(me):
758 """
759 Update the progress meter.
760
761 This is called periodically by the GLib main event-processing loop.
762 """
763 me.show()
764 return True
765
766 def progress(me):
767 "Return the current progress as a pair (CURRENT, MAX)."
768
769 ## Fetch the current progress information. We get the duration each
770 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
771 ## duration can change as we progress. Hopefully it settles down fairly
772 ## soon.
773 ok, t = me._elt.query_position(GS.Format.TIME)
774 if ok: ok, end = me._elt.query_duration(GS.Format.TIME)
775 if ok: return t, end
776 else: return None, None
777
778 def __enter__(me):
779 "Enter context: attach progress meter display."
780
781 ## If we're not showing pointless frippery, don't bother at all.
782 if not STATUS.eyecandyp:
783 return
784
785 ## Update regularly. The pipeline runs asynchronously.
786 me._id = G.timeout_add(100, me._update)
787
788 def __exit__(me, ty, val, tb):
789 "Leave context: remove display and report completion or failure."
790
791 ## If we're not showing pointless frippery, there's nothing to remove.
792 if STATUS.eyecandyp:
793 G.source_remove(me._id)
794
795 ## Report completion anyway.
796 me.done(ty is None)
797
798 ## As you were.
799 return False
800
801 class AudioIdentifier (object):
802 """
803 Analyses and identifies an audio file.
804
805 Important properties are:
806
807 cap A capabilities structure describing the audio file data. The most
808 interesting thing in here is probably its name, which is a MIME
809 type describing the data.
810
811 dcap A capabilities structure describing the decoded audio data. This
812 is of interest during conversion.
813
814 tags A dictionary containing metadata tags from the file. These are in
815 GStreamer's encoding-independent format.
816
817 bitrate An approximation to the stream's bitrate, in kilobits per second.
818 This might be slow to work out for some files so it's computed on
819 demand.
820 """
821
822 def _prepare_pipeline(me):
823 pipe = GS.Pipeline()
824 bus = pipe.get_bus()
825
826 ## The basic recognition kit is based around `decodebin'. We must keep
827 ## it happy by giving it sinks for the streams it's found, which it
828 ## announces asynchronously.
829 source = make_element('filesrc', 'file', location = me._file)
830 decoder = make_element('decodebin', 'decode')
831 sink = make_element('fakesink')
832 def decoder_pad_arrived(elt, pad):
833 if pad.get_current_caps()[0].get_name().startswith('audio/'):
834 elt.link_pads(pad.get_name(), sink, 'sink')
835 decoder.connect('pad-added', decoder_pad_arrived)
836 for i in [source, decoder, sink]: pipe.add(i)
837 link_elements([source, decoder])
838
839 ## Done.
840 return pipe, bus, decoder, sink
841
842 def __init__(me, file, mime):
843 "Initialize the object suitably for identifying FILE."
844
845 me._file = file
846 pipe, bus, decoder, sink = me._prepare_pipeline()
847
848 ## Make some initial GStreamer objects. We'll want the pipeline later if
849 ## we need to analyse a poorly tagged MP3 stream, so save it away.
850 loop = G.MainLoop()
851
852 ## Arrange to collect tags from the pipeline's bus as they're reported.
853 tags = {}
854 fail = []
855 def bus_message(bus, msg):
856 ty, s = msg.type, msg.get_structure()
857 if ty == GS.MessageType.ERROR:
858 fail[:] = (ValueError, s['debug'], None)
859 loop.quit()
860 elif ty == GS.MessageType.STATE_CHANGED:
861 if s['new-state'] == GS.State.PAUSED and \
862 msg.src == pipe:
863 loop.quit()
864 elif ty == GS.MessageType.TAG:
865 tt = s['taglist']
866 for i in xrange(tt.n_tags()):
867 t = tt.nth_tag_name(i)
868 if tt.get_tag_size(t) != 1: continue
869 v = tt.get_value_index(t, 0)
870 tags[t] = v
871 bmid = bus.connect('message', bus_message)
872
873 ## We want to identify the kind of stream this is. (Hmm. The MIME type
874 ## recognizer has already done this work, but GStreamer is probably more
875 ## reliable.) The `decodebin' has a `typefind' element inside which will
876 ## announce the identified media type. All we need to do is find it and
877 ## attach a signal handler. (Note that the handler might be run in the
878 ## thread context of the pipeline element, but Python's GIL will keep
879 ## things from being too awful.)
880 me.cap = None
881 me.dcap = None
882 for e in bin_children(decoder):
883 if e.get_factory().get_name() == 'typefind':
884 tfelt = e
885 break
886 else:
887 assert False, 'failed to find typefind element'
888
889 ## Crank up most of the heavy machinery. The message handler will stop
890 ## the loop when things seem to be sufficiently well underway.
891 bus.add_signal_watch()
892 pipe.set_state(GS.State.PAUSED)
893 loop.run()
894 bus.disconnect(bmid)
895 bus.remove_signal_watch()
896 if fail:
897 pipe.set_state(GS.State.NULL)
898 raise fail[0], fail[1], fail[2]
899
900 ## Store the collected tags.
901 me.tags = tags
902
903 ## Gather the capabilities. The `typefind' element knows the input data
904 ## type. The 'decodebin' knows the raw data type.
905 me.cap = tfelt.get_static_pad('src').get_allowed_caps()[0]
906 me.mime = set([mime, me.cap.get_name()])
907 me.dcap = sink.get_static_pad('sink').get_allowed_caps()[0]
908
909 ## If we found a plausible bitrate then stash it. Otherwise note that we
910 ## failed. If anybody asks then we'll work it out then.
911 if 'nominal-bitrate' in tags:
912 me._bitrate = tags['nominal-bitrate']/1000
913 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
914 me._bitrate = tags['bitrate']/1000
915 else:
916 me._bitrate = None
917 pipe.set_state(GS.State.NULL)
918
919 @property
920 def bitrate(me):
921 """
922 Return the approximate bit-rate of the input file.
923
924 This might take a while if we have to work it out the hard way.
925 """
926
927 ## If we already know the answer then just return it.
928 if me._bitrate is not None:
929 return me._bitrate
930
931 ## Make up a new pipeline and main loop.
932 pipe, bus, _, _ = me._prepare_pipeline()
933 loop = G.MainLoop()
934
935 ## Watch for bus messages. We'll stop when we reach the end of the
936 ## stream: then we'll have a clear idea of how long the track was.
937 fail = []
938 def bus_message(bus, msg):
939 ty, s = msg.type, msg.get_structure()
940 if ty == GS.MessageType.ERROR:
941 fail[:] = (ValueError, s['debug'], None)
942 loop.quit()
943 elif ty == GS.MessageType.EOS:
944 loop.quit()
945 bus = pipe.get_bus()
946 bmid = bus.connect('message', bus_message)
947
948 ## Get everything moving, and keep the user amused while we work.
949 bus.add_signal_watch()
950 pipe.set_state(GS.State.PLAYING)
951 with GStreamerProgressEyecandy(filestatus(me._file, 'measure bitrate'),
952 pipe, silentp = True):
953 loop.run()
954 bus.remove_signal_watch()
955 bus.disconnect(bmid)
956 if fail:
957 pipe.set_state(GS.State.NULL)
958 raise fail[0], fail[1], fail[2]
959 STATUS.clear()
960
961 ## The bitrate computation wants the file size. Ideally we'd want the
962 ## total size of the frames' contents, but that seems hard to dredge
963 ## out. If the framing overhead is small, this should be close enough
964 ## for our purposes.
965 bytes = OS.stat(me._file).st_size
966
967 ## Now we should be able to find out our position accurately and work out
968 ## a bitrate. Cache it in case anybody asks again.
969 ok, t = pipe.query_position(GS.Format.TIME)
970 assert ok, 'failed to discover bitrate'
971 me._bitrate = int(8*bytes*1e6/t)
972 pipe.set_state(GS.State.NULL)
973
974 ## Done.
975 return me._bitrate
976
977 class AudioFormat (BaseFormat):
978 """
979 An AudioFormat is a kind of Format specialized for audio files.
980
981 Format checks are done on an AudioIdentifier object.
982 """
983
984 PROPS = prop('bitrate', Num)
985
986 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
987 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
988 ## depending on how thorough it's trying to be. Still, it doesn't do any
989 ## harm here; the main risk is picking up Ogg Theora files by accident, and
990 ## we'll probably be able to extract the audio from them anyway.
991 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
992 AudioIdentifier)
993
994 def __init__(me, bitrate = None):
995 "Construct an object, requiring an approximate bitrate."
996 me.bitrate = bitrate
997
998 def check(me, id):
999 """
1000 Return whether the AudioIdentifier ID is suitable for our purposes.
1001
1002 Subclasses can either override this method or provide a property
1003 `MIMETYPES', which is a list (other thing that implements `__contains__')
1004 of GStreamer MIME types matching this format.
1005 """
1006 return id.mime & me.MIMETYPES and \
1007 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
1008
1009 def encoder(me):
1010 """
1011 Constructs a GStreamer element to encode audio input.
1012
1013 Subclasses can either override this method (or replace `encode'
1014 entirely), or provide a method `encoder_chain' which returns a list of
1015 elements to be linked together in sequence. The first element in the
1016 chain must have a pad named `sink' and the last must have a pad named
1017 `src'.
1018 """
1019 elts = me.encoder_chain()
1020 bin = GS.Bin()
1021 for i in elts: bin.add(i)
1022 link_elements(elts)
1023 bin.add_pad(GS.GhostPad('sink', elts[0].get_static_pad('sink')))
1024 bin.add_pad(GS.GhostPad('src', elts[-1].get_static_pad('src')))
1025 return bin
1026
1027 def convert(me, master, id, target):
1028 """
1029 Encode audio from MASTER, already identified as ID, writing it to TARGET.
1030
1031 See `encoder' for subclasses' responsibilities.
1032 """
1033
1034 ## Construct the necessary equipment.
1035 pipe = GS.Pipeline()
1036 bus = pipe.get_bus()
1037 loop = G.MainLoop()
1038
1039 ## Make sure that there isn't anything in the way of our output. We're
1040 ## going to write to a scratch file so that we don't get confused by
1041 ## half-written rubbish left by a crashed program.
1042 new = target + '.new'
1043 try:
1044 OS.unlink(new)
1045 except OSError, err:
1046 if err.errno != E.ENOENT:
1047 raise
1048
1049 ## Piece together our pipeline. The annoying part is that the
1050 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1051 ## halves for now.
1052 source = make_element('filesrc', 'source', location = master)
1053 decoder = make_element('decodebin', 'decode')
1054 convert = make_element('audioconvert', 'convert')
1055 encoder = me.encoder()
1056 sink = make_element('filesink', 'sink', location = new)
1057 for i in [source, decoder, convert, encoder, sink]: pipe.add(i)
1058 link_elements([source, decoder])
1059 link_elements([convert, encoder, sink])
1060
1061 ## Some decoders (e.g., the AC3 decoder) include channel-position
1062 ## indicators in their output caps. The Vorbis encoder interferes with
1063 ## this, and you end up with a beautifully encoded mono signal from a
1064 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1065 ## /think/ that this is only a problem with stereo signals: mono signals
1066 ## are mono already, and `vorbisenc' accepts channel positions if there
1067 ## are more than two channels.
1068 ##
1069 ## So we have this bodge. We already collected the decoded audio caps
1070 ## during identification. So if we see 2-channel audio with channel
1071 ## positions, we strip the positions off forcibly by adding a filter.
1072 if id.dcap.get_name().startswith('audio/x-raw-') and \
1073 id.dcap.has_field('channels') and \
1074 id.dcap['channels'] == 2 and \
1075 id.dcap.has_field('channel-positions'):
1076 dcap = GS.Caps()
1077 c = id.dcap.copy()
1078 c.remove_field('channel-positions')
1079 dcap.append(c)
1080 else:
1081 dcap = None
1082
1083 ## Hook onto the `decodebin' so we can link together the two halves of
1084 ## our encoding chain. For now, we'll hope that there's only one audio
1085 ## stream in there, and just throw everything else away.
1086 def decoder_pad_arrived(elt, pad):
1087 if pad.get_current_caps()[0].get_name().startswith('audio/'):
1088 if dcap:
1089 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1090 else:
1091 elt.link_pads(pad.get_name(), convert, 'sink')
1092 decoder.connect('pad-added', decoder_pad_arrived)
1093
1094 ## Watch the bus for completion messages.
1095 fail = []
1096 def bus_message(bus, msg):
1097 if msg.type == GS.MessageType.ERROR:
1098 fail[:] = (ValueError, msg.get_structure()['debug'], None)
1099 loop.quit()
1100 elif msg.type == GS.MessageType.EOS:
1101 loop.quit()
1102 bmid = bus.connect('message', bus_message)
1103
1104 ## Get everything ready and let it go.
1105 bus.add_signal_watch()
1106 pipe.set_state(GS.State.PLAYING)
1107 with GStreamerProgressEyecandy(filestatus(master,
1108 'convert to %s' % me.NAME),
1109 pipe):
1110 loop.run()
1111 pipe.set_state(GS.State.NULL)
1112 bus.remove_signal_watch()
1113 bus.disconnect(bmid)
1114 if fail:
1115 raise fail[0], fail[1], fail[2]
1116
1117 ## Fix up the output file if we have to.
1118 me.fixup(new)
1119
1120 ## We're done.
1121 OS.rename(new, target)
1122
1123 class OggVorbisFormat (AudioFormat):
1124 "AudioFormat object for Ogg Vorbis."
1125
1126 ## From https://en.wikipedia.org/wiki/Vorbis
1127 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1128 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1129 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1130
1131 NAME = 'Ogg Vorbis'
1132 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1133 'audio/x-vorbis+ogg'])
1134 EXT = 'ogg'
1135
1136 def encoder_chain(me):
1137 encprops = {}
1138 if me.bitrate is not None:
1139 for q, br in me.QMAP:
1140 if br >= me.bitrate:
1141 break
1142 else:
1143 raise ValueError, 'no suitable quality setting found'
1144 encprops['quality'] = q/10.0
1145 return [make_element('vorbisenc', **encprops),
1146 make_element('oggmux')]
1147
1148 defformat('ogg-vorbis', OggVorbisFormat)
1149
1150 class MP3Format (AudioFormat):
1151 "AudioFormat object for MP3."
1152
1153 NAME = 'MP3'
1154 MIMETYPES = set(['audio/mpeg'])
1155 EXT = 'mp3'
1156
1157 def encoder_chain(me):
1158 encprops = {}
1159 if me.bitrate is not None:
1160 encprops['bitrate'] = me.bitrate
1161 encprops['target'] = 'bitrate'
1162 else:
1163 encprops['quality'] = 4
1164 encprops['target'] = 'quality'
1165 return [make_element('lamemp3enc', quality = 4, **encprops),
1166 make_element('xingmux'),
1167 make_element('id3v2mux')]
1168
1169 def fixup(me, path):
1170 """
1171 Fix up MP3 files.
1172
1173 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1174 unkind to stupid players.
1175 """
1176 f = E3.load(path)
1177 if f is None: return
1178 t = f.tag
1179 if t is None: return
1180 for v in [E3.id3.ID3_V2_3, E3.id3.ID3_V1]:
1181 try: f.tag.save(version = v)
1182 except (UnicodeEncodeError,
1183 E3.id3.GenreException,
1184 E3.id3.TagException):
1185 pass
1186
1187 defformat('mp3', MP3Format)
1188
1189 ###--------------------------------------------------------------------------
1190 ### Image handling, based on the Python Imaging Library.
1191
1192 class ImageIdentifier (object):
1193 """
1194 Analyses and identifies an image file.
1195
1196 Simply leaves an Image object in the `img' property which can be inspected.
1197 """
1198
1199 def __init__(me, file, mime):
1200
1201 ## Get PIL to open the file. It will magically work out what kind of
1202 ## file it is.
1203 try:
1204 me.img = I.open(file)
1205 except IOError, exc:
1206
1207 ## Unhelpful thing to raise on identification failure. We can
1208 ## distinguish this from an actual I/O error because it doesn't have an
1209 ## `errno'.
1210 if exc.errno is None:
1211 raise IdentificationFailure
1212 raise
1213
1214 me.mime = set([mime])
1215
1216 class ImageFormat (BaseFormat):
1217 """
1218 An ImageFormat is a kind of Format specialized for image files.
1219
1220 Subclasses don't need to provide anything other than the properties
1221 required by all concrete Format subclasses. However, there is a
1222 requirement that the `NAME' property match PIL's `format' name for the
1223 format.
1224 """
1225
1226 PROPS = prop('size', Num)
1227 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1228
1229 def __init__(me, size = None, **kw):
1230 """
1231 Initialize an ImageFormat object.
1232
1233 Additional keywords are used when encoding, and may be recognized by
1234 enhanced `check' methods in subclasses.
1235 """
1236 me._size = size
1237 me._props = kw
1238
1239 def check(me, id):
1240 "Check whether the ImageIdentifier ID matches our requirements."
1241 return id.img.format == me.NAME and \
1242 (me._size is None or
1243 (id.img.size[0] <= me._size and
1244 id.img.size[1] <= me._size))
1245
1246 def convert(me, master, id, target):
1247 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1248
1249 ## Write to a scratch file.
1250 new = target + '.new'
1251
1252 ## The ImageIdentifier already contains a copy of the open file. It
1253 ## would be wasteful not to use it.
1254 img = id.img
1255 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1256
1257 ## If there's a stated maximum size then scale the image down to match.
1258 ## But thumbnailing clobbers the original, so take a copy.
1259 if me._size is not None and \
1260 (img.size[0] > me._size or img.size[1] > me._size):
1261 img = img.copy()
1262 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1263
1264 ## Write the output image.
1265 img.save(new, me.NAME, **me._props)
1266
1267 ## Fix it up if necessary.
1268 me.fixup(new)
1269
1270 ## We're done.
1271 OS.rename(new, target)
1272 STATUS.commit()
1273
1274 class JPEGFormat (ImageFormat):
1275 """
1276 Image format for JPEG (actually JFIF) files.
1277
1278 Interesting properties to set:
1279
1280 optimize
1281 If present, take a second pass to select optimal encoder settings.
1282
1283 progressive
1284 If present, make a progressive file.
1285
1286 quality Integer from 1--100 (worst to best); default is 75.
1287 """
1288 EXT = 'jpg'
1289 NAME = 'JPEG'
1290 PROPS = prop('optimize', None) \
1291 | prop('progressive', None, 'progression') \
1292 | prop('quality', Num)
1293
1294 defformat('jpeg', JPEGFormat)
1295
1296 class PNGFormat (ImageFormat):
1297 """
1298 Image format for PNG files.
1299
1300 Interesting properties:
1301
1302 optimize
1303 If present, make a special effort to minimize the output file.
1304 """
1305 EXT = 'png'
1306 NAME = 'PNG'
1307 PROPS = prop('optimize', None)
1308
1309 defformat('png', PNGFormat)
1310
1311 class BMPFormat (ImageFormat):
1312 """
1313 Image format for Windows BMP files, as used by RockBox.
1314
1315 No additional properties.
1316 """
1317 NAME = 'BMP'
1318 EXT = 'bmp'
1319
1320 defformat('bmp', BMPFormat)
1321
1322 ###--------------------------------------------------------------------------
1323 ### Remaining parsing machinery.
1324
1325 Type = K('type') - Name - D('{') - R(Policy) - D('}')
1326 def build_type(s, l, t):
1327 try:
1328 cat = CATEGORYMAP[t[0]]
1329 except KeyError:
1330 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1331 pols = t[1]
1332 if len(pols) == 1: pol = pols[0]
1333 else: pol = AndPolicy(pols)
1334 pol.setcategory(cat)
1335 return pol
1336 Type.setParseAction(build_type)
1337
1338 TARGETS = []
1339 class TargetJob (object):
1340 def __init__(me, targetdir, policies):
1341 me.targetdir = targetdir
1342 me.policies = policies
1343 def perform(me):
1344 TARGETS.append(me)
1345
1346 Target = K('target') - String - D('{') - R(Type) - D('}')
1347 def build_target(s, l, t):
1348 return TargetJob(t[0], t[1])
1349 Target.setParseAction(build_target)
1350
1351 VARS = { 'master': None }
1352 class VarsJob (object):
1353 def __init__(me, vars):
1354 me.vars = vars
1355 def perform(me):
1356 for k, v in me.vars:
1357 VARS[k] = v
1358
1359 Var = prop('master', String)
1360 Vars = K('vars') - D('{') - R(Var) - D('}')
1361 def build_vars(s, l, t):
1362 return VarsJob(t[0])
1363 Vars.setParseAction(build_vars)
1364
1365 TopLevel = Vars | Target
1366 Config = R(TopLevel)
1367 Config.ignore(P.pythonStyleComment)
1368
1369 ###--------------------------------------------------------------------------
1370 ### The directory grobbler.
1371
1372 def grobble(master, targets, noact = False):
1373 """
1374 Work through the MASTER directory, writing converted files to TARGETS.
1375
1376 The TARGETS are a list of `TargetJob' objects, each describing a target
1377 directory and a policy to apply to it.
1378
1379 If NOACT is true, then don't actually do anything permanent to the
1380 filesystem.
1381 """
1382
1383 ## Transform the targets into a more convenient data structure.
1384 tpolmap = []
1385 for t in targets:
1386 pmap = {}
1387 tpolmap.append(pmap)
1388 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
1389
1390 ## Keep track of the current position in the master tree.
1391 dirs = []
1392
1393 ## And the files which haven't worked.
1394 broken = []
1395
1396 def grobble_file(master, pmap, targetdir, cohorts):
1397 ## Convert MASTER, writing the result to TARGETDIR.
1398 ##
1399 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1400 ## a list of (FILENAME, ID) pairs.
1401 ##
1402 ## Since this function might convert the MASTER file, the caller doesn't
1403 ## know the name of the output files, so we return then as a list.
1404
1405 done = set()
1406 st_m = OS.stat(master)
1407
1408 ## Work through each category listed and apply its policy.
1409 for cat, id, cohort in cohorts:
1410
1411 ## Go through the category's policies and see if any match. If we fail
1412 ## here, see if there are more categories to try.
1413 for pol in pmap[cat]:
1414 acts = pol.actions(master, targetdir, id, cohort)
1415 if acts: break
1416 else:
1417 continue
1418
1419 ## Work through the targets one by one.
1420 for a in acts:
1421 done.add(a.target)
1422
1423 ## Find out whether the target file already exists and is up-to-date
1424 ## with respect to the master. (Caution here with low-resolution
1425 ## timestamps.) If it's OK, then just move on.
1426 try:
1427 st_t = OS.stat(a.target)
1428 if st_m.st_mtime < st_t.st_mtime or \
1429 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1430 continue
1431 except OSError, err:
1432 if err.errno not in (E.ENOENT, E.ENOTDIR):
1433 raise
1434
1435 ## We have real work to do. If there's a current status message,
1436 ## it's the containing directory so flush it so that people know
1437 ## where we are.
1438 STATUS.commit()
1439
1440 ## Remove the target. (A hardlink will fail if the target already
1441 ## exists.)
1442 if not noact:
1443 try:
1444 OS.unlink(a.target)
1445 except OSError, err:
1446 if err.errno not in (E.ENOENT, E.ENOTDIR):
1447 raise
1448
1449 ## Do whatever it is we decided to do.
1450 if noact:
1451 STATUS.commit(filestatus(master, a))
1452 else:
1453 a.perform()
1454
1455 ## We're done. Return the names of the targets.
1456 return list(done)
1457
1458 @contextmanager
1459 def wrap(masterfile):
1460 ## Handle exceptions found while trying to convert a particular file or
1461 ## directory.
1462
1463 try:
1464 yield masterfile
1465
1466 ## Something bad happened. Report the error, but continue. (This list
1467 ## of exceptions needs a lot of work.)
1468 except (IOError, OSError), exc:
1469 STATUS.clear()
1470 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
1471 broken.append((masterfile, exc))
1472
1473 def grobble_dir(master, targets):
1474 ## Recursively convert files in MASTER, writing them to the TARGETS.
1475
1476 ## Keep track of the subdirectories we encounter, because we'll need to
1477 ## do all of those in one go at the end.
1478 subdirs = set()
1479
1480 ## Work through each target directory in turn.
1481 for target, pmap in zip(targets, tpolmap):
1482
1483 ## Make sure the TARGET exists and is a directory. It's a fundamental
1484 ## assumption of this program that the entire TARGET tree is
1485 ## disposable, so if something exists but isn't a directory, we should
1486 ## kill it.
1487 if OS.path.isdir(target):
1488 pass
1489 else:
1490 if OS.path.exists(target):
1491 STATUS.commit(filestatus(target, 'clear nondirectory'))
1492 if not noact:
1493 OS.unlink(target)
1494 STATUS.commit(filestatus(target, 'create directory'))
1495 if not noact:
1496 OS.mkdir(target)
1497
1498 ## Keep a list of things in the target. As we convert files, we'll
1499 ## check them off. Anything left over is rubbish and needs to be
1500 ## deleted.
1501 checklist = {}
1502 try:
1503 for i in OS.listdir(target):
1504 checklist[i] = False
1505 except OSError, err:
1506 if err.errno not in (E.ENOENT, E.ENOTDIR):
1507 raise
1508
1509 ## Keep track of the files in each category.
1510 catmap = {}
1511 todo = []
1512 done = []
1513
1514 ## Work through the master files.
1515 for f in sorted(OS.listdir(master)):
1516
1517 ## If the killswitch has been pulled then stop. The whole idea is
1518 ## that we want to cause a clean shutdown if possible, so we don't
1519 ## want to do it in the middle of encoding because the encoding
1520 ## effort will have been wasted. This is the only place we need to
1521 ## check. If we've exited the loop, then clearing old files will
1522 ## probably be fast, and we'll either end up here when the recursive
1523 ## call returns or we'll be in the same boat as before, clearing old
1524 ## files, only up a level. If worst comes to worst, we'll be killed
1525 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1526 ## it left off.
1527 if KILLSWITCH.is_set():
1528 return
1529
1530 ## Do something with the file.
1531 with wrap(OS.path.join(master, f)) as masterfile:
1532
1533 ## If it's a directory then prepare to grobble it recursively, but
1534 ## don't do that yet.
1535 if OS.path.isdir(masterfile):
1536 subdirs.add(f)
1537 done.append(OS.path.join(target, f))
1538
1539 ## Otherwise it's a file. Work out what kind, and stash it under
1540 ## the appropriate categories. Later, we'll apply policy to the
1541 ## files, by category, and work out what to do with them all.
1542 else:
1543 mime = GIO.file_new_for_path(masterfile) \
1544 .query_info('standard::content-type', 0) \
1545 .get_content_type()
1546 cats = []
1547 for cat in pmap.iterkeys():
1548 id = cat.identify(masterfile, mime)
1549 if id is None: continue
1550 catmap.setdefault(cat, []).append((masterfile, id))
1551 cats.append((cat, id))
1552 if not cats:
1553 catmap.setdefault(None, []).append((masterfile, id))
1554 todo.append((masterfile, cats))
1555
1556 ## Work through the categorized files to see what actions to do for
1557 ## them.
1558 for masterfile, cats in todo:
1559 with wrap(masterfile):
1560 done += grobble_file(masterfile, pmap, target,
1561 [(cat, id, catmap[cat]) for cat, id in cats])
1562
1563 ## Check the results off the list so that we don't clear it later.
1564 for f in done:
1565 checklist[OS.path.basename(f)] = True
1566
1567 ## Maybe there's stuff in the target which isn't accounted for. Delete
1568 ## it: either the master has changed, or the policy for this target has
1569 ## changed. Either way, the old files aren't wanted.
1570 for f in checklist:
1571 if not checklist[f]:
1572 STATUS.commit(filestatus(f, 'clear bogus file'))
1573 if not noact:
1574 bogus = OS.path.join(target, f)
1575 try:
1576 if OS.path.isdir(bogus):
1577 SH.rmtree(bogus)
1578 else:
1579 OS.unlink(bogus)
1580 except OSError, err:
1581 if err.errno != E.ENOENT:
1582 raise
1583
1584 ## If there are subdirectories which want processing then do those.
1585 ## Keep the user amused by telling him where we are in the tree.
1586 for d in sorted(subdirs):
1587 dirs.append(d)
1588 STATUS.set('/'.join(dirs))
1589 with wrap(OS.path.join(master, d)) as masterdir:
1590 try:
1591 grobble_dir(masterdir,
1592 [OS.path.join(target, d) for target in targets])
1593 finally:
1594 dirs.pop()
1595 STATUS.set('/'.join(dirs))
1596
1597 ## Right. We're ready to go.
1598 grobble_dir(master, [t.targetdir for t in targets])
1599 return broken
1600
1601 ###--------------------------------------------------------------------------
1602 ### Command-line interface.
1603
1604 QUIS = OS.path.basename(SYS.argv[0])
1605
1606 def moan(msg):
1607 "Report a warning message to the user."
1608 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1609
1610 def die(msg):
1611 "Report a fatal error message to the user."
1612 moan(msg)
1613 SYS.exit(1)
1614
1615 def parse_opts(args):
1616 """
1617 Parse command-line arguments in ARGS.
1618
1619 Returns a Grobbler object and the MASTER and TARGET directories to be
1620 grobbled.
1621 """
1622
1623 ## Build the option parser object.
1624 op = OP.OptionParser(prog = QUIS, version = VERSION,
1625 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1626 'CONFIG',
1627 description = """\
1628 Convert a directory tree of files according to the configuration file
1629 CONFIG.
1630 """)
1631
1632 ## Timeout handling.
1633 def cb_time(opt, ostr, arg, op):
1634 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1635 if not m:
1636 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1637 t, u = m.groups()
1638 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1639 setattr(op.values, opt.dest, t)
1640 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1641 dest = 'timeout',
1642 help = 'stop processing nicely after SECS',
1643 action = 'callback', callback = cb_time)
1644 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1645 dest = 'timeout_nasty',
1646 help = 'stop processing unpleasantly after further SECS',
1647 action = 'callback', callback = cb_time)
1648
1649 ## Other options.
1650 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1651 help = 'provide progress information')
1652 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1653 help = 'don\'t actually modify the filesystem')
1654
1655 ## Ready to rock.
1656 op.set_defaults(formats = [], noact = False,
1657 timeout = None, timeout_nasty = 300)
1658 opts, args = op.parse_args(args)
1659
1660 ## Check that we got the non-option arguments that we want.
1661 if len(args) != 1:
1662 op.error('wrong number of arguments')
1663
1664 ## Act on the options.
1665 if opts.tty:
1666 STATUS.eyecandyp = True
1667 if opts.timeout is not None:
1668 to = TH.Thread(target = timeout,
1669 args = (opts.timeout, opts.timeout_nasty))
1670 to.daemon = True
1671 to.start()
1672
1673 ## Parse the configuration file.
1674 with open(args[0]) as conf:
1675 jobs, = Config.parseFile(conf, True)
1676 for j in jobs:
1677 j.perform()
1678
1679 return opts
1680
1681 if __name__ == '__main__':
1682 opts = parse_opts(SYS.argv[1:])
1683 if 'master' not in VARS:
1684 die("no master directory set")
1685 broken = grobble(VARS['master'], TARGETS, opts.noact)
1686 if broken:
1687 moan('failed to convert some files:')
1688 for file, exc in broken:
1689 moan('%s: %s' % (file, exc))
1690 SYS.exit(1)
1691
1692 ## This is basically a successful completion: we did what we were asked to
1693 ## do. It seems polite to report a message, though.
1694 ##
1695 ## Why don't we have a nonzero exit status? The idea would be that a
1696 ## calling script would be interested that we used up all of our time, and
1697 ## not attempt to convert some other directory as well. But that doesn't
1698 ## quite work. Such a script would need to account correctly for time we
1699 ## had spent even if we complete successfully. And if the script is having
1700 ## to watch the clock itself, it can do that without our help here.
1701 if KILLSWITCH.is_set():
1702 moan('killed by timeout')
1703
1704 ###----- That's all, folks --------------------------------------------------