gremlin/gremlin.in: Attach a name to the FormatSpec parser.
[autoys] / gremlin / gremlin.in
1 #! @PYTHON@
2 ###
3 ### Convert a directory tree of audio files
4 ###
5 ### (c) 2010 Mark Wooding
6 ###
7
8 ###----- Licensing notice ---------------------------------------------------
9 ###
10 ### This file is part of the `autoys' audio tools collection.
11 ###
12 ### `autoys' is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU General Public License as published by
14 ### the Free Software Foundation; either version 2 of the License, or
15 ### (at your option) any later version.
16 ###
17 ### `autoys' is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU General Public License for more details.
21 ###
22 ### You should have received a copy of the GNU General Public License
23 ### along with `autoys'; if not, write to the Free Software Foundation,
24 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 ###--------------------------------------------------------------------------
27 ### External dependencies.
28
29 ## Language features.
30 from __future__ import with_statement
31
32 ## Standard Python libraries.
33 import sys as SYS
34 import os as OS
35 import errno as E
36 import time as T
37 import unicodedata as UD
38 import fnmatch as FN
39 import re as RX
40 import shutil as SH
41 import optparse as OP
42 import threading as TH
43 import shlex as L
44 from math import sqrt, ceil
45 from contextlib import contextmanager
46
47 ## eyeD3 tag fettling.
48 import eyed3 as E3
49
50 ## Gstreamer. It picks up command-line arguments -- most notably `--help' --
51 ## and processes them itself. Of course, its help is completely wrong. This
52 ## kludge is due to Jonas Wagner.
53 _argv, SYS.argv = SYS.argv, []
54 import gobject as G
55 import gio as GIO
56 import gst as GS
57 SYS.argv = _argv
58
59 ## Python Imaging.
60 from PIL import Image as I
61
62 ## Python parsing.
63 import pyparsing as P
64
65 ###--------------------------------------------------------------------------
66 ### Special initialization.
67
68 VERSION = '@VERSION@'
69
70 ## GLib.
71 G.threads_init()
72
73 ###--------------------------------------------------------------------------
74 ### Eyecandy progress reports.
75
76 def charwidth(s):
77 """
78 Return the width of S, in characters.
79
80 Specifically, this is the number of backspace characters required to
81 overprint the string S. If the current encoding for `stdout' appears to be
82 Unicode then do a complicated Unicode thing; otherwise assume that
83 characters take up one cell each.
84
85 None of this handles tab characters in any kind of useful way. Sorry.
86 """
87
88 ## If there's no encoding for stdout then we're doing something stupid.
89 if SYS.stdout.encoding is None: return len(s)
90
91 ## Turn the string into Unicode so we can hack on it properly. Maybe that
92 ## won't work out, in which case fall back to being stupid.
93 try: u = s.decode(SYS.stdout.encoding)
94 except UnicodeError: return len(s)
95
96 ## Our main problem is combining characters, but we should also try to
97 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
98 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
99 w = 0
100 for ch in u:
101 cd = ord(ch)
102 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
103 0x1160 <= cd <= 0x11ff: pass
104 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
105 else: w += 1
106
107 ## Done.
108 return w
109
110 class StatusLine (object):
111 """
112 Maintains a status line containing ephemeral progress information.
113
114 The status line isn't especially important, but it keeps interactive users
115 amused.
116
117 There should be only one status line object in your program; otherwise
118 they'll interfere with each other and get confused.
119
120 The update algorithm (in `set') is fairly careful to do the right thing
121 with long status `lines', and to work properly in an Emacs `shell' buffer.
122 """
123
124 def __init__(me):
125 "Initialize the status line."
126 me._last = ''
127 me._lastlen = 0
128 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
129
130 def set(me, line):
131 """
132 Set the status line contents to LINE, replacing what was there before.
133
134 This only produces actual output if stdout is interactive.
135 """
136 n = len(line)
137
138 ## Eyecandy update.
139 if me.eyecandyp:
140
141 ## If the old line was longer, we need to clobber its tail, so work out
142 ## what that involves.
143 if n < me._lastlen:
144 b = charwidth(me._last[n:])
145 pre = '\b'*b + ' '*b
146 else:
147 pre = ''
148
149 ## Now figure out the length of the common prefix between what we had
150 ## before and what we have now. This reduces the amount of I/O done,
151 ## which keeps network traffic down on SSH links, and keeps down the
152 ## amount of work slow terminal emulators like Emacs have to do.
153 i = 0
154 m = min(n, me._lastlen)
155 while i < m and line[i] == me._last[i]:
156 i += 1
157
158 ## Actually do the output, all in one syscall.
159 b = charwidth(me._last[i:])
160 SYS.stdout.write(pre + '\b'*b + line[i:])
161 SYS.stdout.flush()
162
163 ## Update our idea of what's gone on.
164 me._lastlen = n
165 me._last = line
166
167 def clear(me):
168 "Clear the status line. Just like set('')."
169 me.set('')
170
171 def commit(me, line = None):
172 """
173 Commit the current status line, and maybe the string LINE.
174
175 If the current status line is nonempty, then commit it to the transcript.
176 If LINE is not None, then commit that to the transcript too.
177
178 After all of this, we clear the status line to get back to a clean state.
179 """
180 if me._last:
181 if me.eyecandyp:
182 SYS.stdout.write('\n')
183 else:
184 SYS.stdout.write(me._last + '\n')
185 if line is not None:
186 SYS.stdout.write(line + '\n')
187 me._lastlen = 0
188 me._last = ''
189
190 STATUS = StatusLine()
191
192 def filestatus(file, status):
193 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
194
195 class ProgressEyecandy (object):
196 """
197 Provide amusement while something big and complicated is happening.
198
199 This is an abstract class. Subclasses must provide a method `progress'
200 returning a pair (CURRENT, MAX) indicating the current progress through the
201 operation.
202 """
203
204 def __init__(me, what, silentp = False):
205 """
206 Initialize a progress meter.
207
208 WHAT is a prefix string to be written before the progress eyecandy
209 itself.
210 """
211 me._what = what
212 me._silentp = silentp
213 me._spinner = 0
214 me._start = T.time()
215
216 def _fmt_time(me, t):
217 "Format T as a time, in (maybe hours) minutes and seconds."
218 s, t = t % 60, int(t/60)
219 m, h = t % 60, int(t/60)
220 if h > 0:
221 return '%d:%02d:%02d' % (h, m, s)
222 else:
223 return '%02d:%02d' % (m, s)
224
225 def show(me):
226 "Show the current level of progress."
227
228 ## If we're not showing pointless frippery, don't bother at all.
229 if not STATUS.eyecandyp:
230 return
231
232 ## Update the spinner index.
233 me._spinner = (me._spinner + 1)%4
234
235 ## Fetch the current progress information. Note that we always fetch
236 ## both the current and maximum levels, because both might change if an
237 ## operation revises its idea of how much work needs doing.
238 cur, max = me.progress()
239
240 ## If we couldn't get progress information, display something vaguely
241 ## amusing anyway.
242 if cur is None or max is None:
243 STATUS.set('%s %c [unknown progress]' %
244 (me._what, r'/-\|'[me._spinner]))
245 return
246
247 ## Work out -- well, guess -- the time remaining.
248 if cur:
249 t = T.time()
250 eta = me._fmt_time(ceil((t - me._start)*(max - cur)/cur))
251 else:
252 eta = '???'
253
254 ## Set the status bar.
255 n = 40*cur/max
256 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
257 (me._what,
258 r'/-\|'[me._spinner],
259 '='*n, ' '*(40 - n),
260 100*cur/max,
261 eta))
262
263 def done(me, win = True):
264 "Show a completion notice, or a failure if WIN is false."
265 if not win:
266 STATUS.set('%s FAILED!' % me._what)
267 elif not me._silentp:
268 STATUS.set('%s done (%s)' %
269 (me._what,
270 me._fmt_time(T.time() - me._start)))
271 else:
272 return
273 STATUS.commit()
274
275 ###--------------------------------------------------------------------------
276 ### Timeout handling.
277
278 KILLSWITCH = TH.Event()
279
280 def timeout(t0, t1):
281 T.sleep(t0)
282 KILLSWITCH.set()
283 T.sleep(t1)
284 moan('dying messily due to timeout')
285 OS._exit(3)
286
287 ###--------------------------------------------------------------------------
288 ### Parsing utilities.
289
290 ## Allow hyphens in identifiers.
291 IDCHARS = P.alphanums + '-_'
292 P.Keyword.setDefaultKeywordChars(IDCHARS)
293
294 ## Some common kinds of tokens.
295 Name = P.Word(IDCHARS)
296 Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
297 String = P.QuotedString('"', '\\')
298
299 ## Handy abbreviations for constructed parser elements.
300 def K(k): return P.Keyword(k).suppress()
301 def D(d): return P.Literal(d).suppress()
302 def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
303 O = P.Optional
304
305 ###--------------------------------------------------------------------------
306 ### Format identification and conversion.
307
308 class IdentificationFailure (Exception):
309 pass
310
311 class FileCategory (object):
312 """
313 A FileCategory represents a class of files.
314
315 For example, it's sensible to consider audio, or image files as a
316 category. A file category knows how to recognize member files from
317 MIME content types.
318 """
319
320 def __init__(me, name, mime_pats, ident):
321 """
322 Construct a new category.
323
324 The PATS are a list of `fnmatch' patterns to be compared with a MIME
325 type. The IDENT is a function which produces an identification object
326 given a file's name and first-guess MIME type. The object is passed to a
327 Format's `check' method to see whether a file needs re-encoding, and to
328 `convert' to assist with the conversion.
329
330 An identification object must have an attribute `mime' which is a set of
331 possible MIME types accumulated for the object.
332 """
333 me.name = name
334 me._mime_pats = mime_pats
335 me._ident = ident
336 CATEGORYMAP[name] = me
337
338 def identify(me, file, mime):
339 """
340 Attempt to identify FILE, given its apparent MIME type.
341
342 If identification succeeds, return an identification object which can be
343 used by associated file formats; otherwise return None.
344 """
345 for p in me._mime_pats:
346 if not FN.fnmatchcase(mime, p):
347 continue
348 try:
349 return me._ident(file, mime)
350 except IdentificationFailure:
351 pass
352 return None
353
354 class BaseFormat (object):
355 """
356 A BaseFormat object represents a particular encoding and parameters.
357
358 The object can verify (the `check' method) whether a particular file
359 matches its requirements, and if necessary (`encode') re-encode a file.
360
361 Subclasses should define the following methods.
362
363 check(ID)
364 Answer whether the file identified by ID is acceptable according to
365 the receiver's parameters.
366
367 convert(MASTER, ID, TARGET)
368 Convert the file MASTER, which has been identified as ID, according
369 to the receiver's parameters, writing the output to TARGET.
370
371 Subclasses should also provide these attributes.
372
373 CATEGORY
374 A FileCategory object for the category of files that this format
375 lives within.
376
377 EXT A file extension to be applied to encoded output files.
378
379 NAME A user-facing name for the format.
380
381 PROPS A parser element to parse a property definition. It should produce
382 a pair NAME, VALUE to be stored in a dictionary.
383
384 Subclasses for different kinds of file may introduce more subclass
385 protocol.
386 """
387
388 def fixup(me, path):
389 """Post-encoding fixups."""
390 pass
391
392 FORMATMAP = {}
393 CATEGORYMAP = {}
394
395 def defformat(name, cls):
396 "Define a format NAME using class CLS."
397 if not hasattr(cls, 'NAME'):
398 raise ValueError, 'abstract class'
399 if not hasattr(cls, 'CATEGORY'):
400 raise ValueError, 'no category'
401 FORMATMAP[name] = cls
402
403 class FormatParser (P.ParserElement):
404 """
405 Parse a format specifier:
406
407 format-spec ::= string [format-properties]
408 format-properties ::= `{' format-property (`,' format-property)* `}'
409
410 The syntax of a format-property is determined by the PROPS attribute on the
411 named format and its superclasses.
412 """
413
414 name = 'format-spec'
415
416 ## We cache the parser elements we generate to avoid enormous consing.
417 CACHE = {}
418
419 def parseImpl(me, s, loc, actp = True):
420
421 ## Firstly, determine the format name.
422 loc, r = Name._parse(s, loc, actp)
423 fmt = r[0]
424
425 ## Look up the format class.
426 try: fcls = FORMATMAP[fmt]
427 except KeyError:
428 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
429
430 ## Fetch the property-list parser from the cache, if possible; else
431 ## construct it.
432 try:
433 pp = me.CACHE[fmt]
434 except KeyError:
435 seen = set()
436 prop = None
437 for c in fcls.mro():
438 try: p = c.PROPS
439 except AttributeError: continue
440 if p in seen: continue
441 if prop is None: prop = p
442 else: prop |= p
443 seen.add(p)
444 if prop is None:
445 pp = me.CACHE[fmt] = None
446 else:
447 props = P.delimitedList(prop)
448 props.setParseAction(lambda s, l, t: dict(t.asList()))
449 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
450
451 ## Parse the properties.
452 if pp is None:
453 pd = {}
454 else:
455 loc, r = pp._parse(s, loc, actp)
456 if r: pd = r[0]
457 else: pd = {}
458
459 ## Construct the format object and return it.
460 return loc, fcls(**pd)
461
462 Format = FormatParser()
463
464 def prop(kw, pval, tag = None):
465 if tag is None: tag = kw
466 if pval is None:
467 p = K(kw)
468 p.setParseAction(lambda s, l, t: (tag, True))
469 else:
470 p = K(kw) + D('=') + pval
471 p.setParseAction(lambda s, l, t: (tag, t[0]))
472 return p
473
474 ###--------------------------------------------------------------------------
475 ### Policies and actions.
476
477 class Action (object):
478 """
479 An Action object represents a conversion action to be performed.
480
481 This class isn't intended to be instantiated directly. It exists to define
482 some protocol common to all Action objects.
483
484 Action objects have the following attributes.
485
486 master The name of the master (source) file.
487
488 target The name of the target (destination) file.
489
490 PRIORITY The priority of the action, for deciding which of two actions
491 to perform. Higher priorities are more likely to win.
492
493 Converting an Action to a string describes the action in a simple
494 user-readable manner. The `perform' method actually carries the action
495 out.
496 """
497
498 PRIORITY = 0
499
500 def __init__(me, master):
501 "Stash the MASTER file name for later."
502 me.master = master
503
504 def choose(me, him):
505 "Choose either ME or HIM and return one."
506 if him is None or me.PRIORITY > him.PRIORITY:
507 return me
508 else:
509 return him
510
511 class CopyAction (Action):
512 """
513 An Action object for simply copying a file.
514
515 Actually we try to hardlink it first, falling back to a copy later. This
516 is both faster and more efficient with regard to disk space.
517 """
518
519 ## Copying is good. Linking is really good, but we can't tell the
520 ## difference at this stage.
521 PRIORITY = 10
522
523 def __init__(me, master, targetdir):
524 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
525 Action.__init__(me, master)
526 me.target = OS.path.join(targetdir, OS.path.basename(master))
527
528 def __str__(me):
529 return 'copy/link'
530
531 def perform(me):
532 "Actually perform a CopyAction."
533 try:
534 STATUS.set(filestatus(me.master, 'link'))
535 OS.link(me.master, me.target)
536 except OSError, err:
537 if err.errno != E.EXDEV:
538 raise
539 STATUS.set(filestatus(me.master, 'copy'))
540 new = me.target + '.new'
541 SH.copyfile(me.master, new)
542 OS.rename(new, me.target)
543 STATUS.commit()
544
545 class ConvertAction (Action):
546 """
547 An Action object for converting a file to a given format.
548
549 Additional attributes:
550
551 id The identification object for the master file.
552
553 format The format to which we're meant to conver the master.
554 """
555
556 def __init__(me, master, targetdir, id, format):
557 "Initialize a ConvertAction."
558 Action.__init__(me, master)
559 stem, ext = OS.path.splitext(OS.path.basename(master))
560 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
561 me.id = id
562 me.format = format
563
564 def __str__(me):
565 return 'convert to %s' % me.format.NAME
566
567 def perform(me):
568 "Acually perform a ConvertAction."
569 STATUS.set(filestatus(me.master, me))
570 me.format.convert(me.master, me.id, me.target)
571
572 Policy = P.Forward()
573
574 class FormatPolicy (object):
575 """
576 A FormatPolicy object represents a set of rules for how to convert files.
577
578 Given a master file, the FormatPolicy will identify it and return a list of
579 actions to be performed. The methods required of a FormatPolicy are:
580
581 setcategory(CAT)
582 Store CAT as the policy's category. Check that this is consistent
583 with the policy as stored.
584
585 actions(MASTER, TARGETDIR, ID, COHORT)
586 Given a MASTER file, identified as ID, a target directory
587 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
588 of the same category in the same directory, return a list of
589 actions to be performed to get the target directory into the right
590 form. The list might be empty if the policy object /rejects/ the
591 file.
592 """
593
594 class AndPolicy (FormatPolicy):
595 """
596 A FormatPolicy which does the union of a bunch of other policies.
597
598 Each subsidiary policy is invoked in turn. The highest-priority action for
599 each target file is returned.
600 """
601
602 def __init__(me, policies):
603 me._policies = policies
604
605 def setcategory(me, cat):
606 me.cat = cat
607 for p in me._policies:
608 p.setcategory(cat)
609
610 def actions(me, master, targetdir, id, cohort):
611 tmap = {}
612 for p in me._policies:
613 for a in p.actions(master, targetdir, id, cohort):
614 if a.target in tmap:
615 tmap[a.target] = a.choose(tmap.get(a.target))
616 else:
617 tmap[a.target] = a
618 return tmap.values()
619
620 And = K('and') - D('{') - R(Policy) - D('}')
621 And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
622
623 class OrPolicy (FormatPolicy):
624 """
625 A FormatPolicy which tries other policies and uses the first that accepts.
626
627 Each subsidiary policy is invoked in turn. If any accepts, the actions it
628 proposes are turned and no further policies are invoked. If none accepts
629 then the file is rejected.
630 """
631
632 def __init__(me, policies):
633 me._policies = policies
634
635 def setcategory(me, cat):
636 me.cat = cat
637 for p in me._policies:
638 p.setcategory(cat)
639
640 def actions(me, master, targetdir, id, cohort):
641 for p in me._policies:
642 aa = p.actions(master, targetdir, id, cohort)
643 if aa:
644 return aa
645 else:
646 return []
647
648 Or = K('or') - D('{') - R(Policy) - D('}')
649 Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
650
651 class AcceptPolicy (FormatPolicy):
652 """
653 A FormatPolicy which copies files in a particular format.
654
655 If all of the files in a cohort are recognized as being in a particular
656 format (including this one), then accept it with a CopyAction; otherwise
657 reject.
658 """
659
660 def __init__(me, format):
661 me._format = format
662
663 def setcategory(me, cat):
664 if me._format.CATEGORY is not cat:
665 raise ValueError, \
666 "Accept format `%s' has category `%s', not `%s'" % \
667 (me._format.__class__.__name__,
668 me._format.CATEGORY.name, cat.name)
669 me.cat = cat
670
671 def actions(me, master, targetdir, id, cohort):
672 if me._format.check(id) and \
673 all(me._format.check(cid) for f, cid in cohort):
674 return [CopyAction(master, targetdir)]
675 else:
676 return []
677
678 Accept = K('accept') - Format
679 Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
680
681 class ConvertPolicy (FormatPolicy):
682 """
683 A FormatPolicy which copies files in a particular format or converts if
684 necessary.
685 """
686 def __init__(me, format):
687 me._format = format
688
689 def setcategory(me, cat):
690 if me._format.CATEGORY is not cat:
691 raise ValueError, \
692 "Accept format `%s' has category `%s', not `%s'" % \
693 (me._format.__class__.__name__,
694 me._format.CATEGORY.name, cat.name)
695 me.cat = cat
696
697 def actions(me, master, targetdir, id, cohort):
698 if me._format.check(id):
699 return [CopyAction(master, targetdir)]
700 else:
701 return [ConvertAction(master, targetdir, id, me._format)]
702
703 Convert = K('convert') - Format
704 Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
705
706 Policy << (And | Or | Accept | Convert)
707
708 ###--------------------------------------------------------------------------
709 ### Audio handling, based on GStreamer.
710
711 def make_element(factory, name = None, **props):
712 "Return a new element from the FACTORY with the given NAME and PROPS."
713 elt = GS.element_factory_make(factory, name)
714 elt.set_properties(**props)
715 return elt
716
717 class GStreamerProgressEyecandy (ProgressEyecandy):
718 """
719 Provide amusement while GStreamer is busy doing something.
720
721 The GStreamerProgressEyecandy object is a context manager. Wrap it round
722 your GStreamer loop to provide progress information for an operation.
723 """
724
725 def __init__(me, what, elt, **kw):
726 """
727 Initialize a progress meter.
728
729 WHAT is a prefix string to be written before the progress eyecandy
730 itself. ELT is a GStreamer element to interrogate to find the progress
731 information.
732 """
733 me._elt = elt
734 ProgressEyecandy.__init__(me, what, **kw)
735
736 def _update(me):
737 "Called by GLib main event loop to update the eyecandy."
738 me.show()
739 return True
740
741 def _timer(me):
742 """
743 Update the progress meter.
744
745 This is called periodically by the GLib main event-processing loop.
746 """
747 me.show()
748 return True
749
750 def progress(me):
751 "Return the current progress as a pair (CURRENT, MAX)."
752
753 ## Fetch the current progress information. We get the duration each
754 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
755 ## duration can change as we progress. Hopefully it settles down fairly
756 ## soon.
757 try:
758 t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
759 end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
760 return t, end
761 except GS.QueryError:
762 return None, None
763
764 def __enter__(me):
765 "Enter context: attach progress meter display."
766
767 ## If we're not showing pointless frippery, don't bother at all.
768 if not STATUS.eyecandyp:
769 return
770
771 ## Update regularly. The pipeline runs asynchronously.
772 me._id = G.timeout_add(200, me._update)
773
774 def __exit__(me, ty, val, tb):
775 "Leave context: remove display and report completion or failure."
776
777 ## If we're not showing pointless frippery, there's nothing to remove.
778 if STATUS.eyecandyp:
779 G.source_remove(me._id)
780
781 ## Report completion anyway.
782 me.done(ty is None)
783
784 ## As you were.
785 return False
786
787 class AudioIdentifier (object):
788 """
789 Analyses and identifies an audio file.
790
791 Important properties are:
792
793 cap A capabilities structure describing the audio file data. The most
794 interesting thing in here is probably its name, which is a MIME
795 type describing the data.
796
797 dcap A capabilities structure describing the decoded audio data. This
798 is of interest during conversion.
799
800 tags A dictionary containing metadata tags from the file. These are in
801 GStreamer's encoding-independent format.
802
803 bitrate An approximation to the stream's bitrate, in kilobits per second.
804 This might be slow to work out for some files so it's computed on
805 demand.
806 """
807
808 def __init__(me, file, mime):
809 "Initialize the object suitably for identifying FILE."
810
811 ## Make some initial GStreamer objects. We'll want the pipeline later if
812 ## we need to analyse a poorly tagged MP3 stream, so save it away.
813 me._pipe = GS.Pipeline()
814 me._file = file
815 bus = me._pipe.get_bus()
816 bus.add_signal_watch()
817 loop = G.MainLoop()
818
819 ## The basic recognition kit is based around `decodebin'. We must keep
820 ## it happy by giving it sinks for the streams it's found, which it
821 ## announces asynchronously.
822 source = make_element('filesrc', 'file', location = file)
823 decoder = make_element('decodebin', 'decode')
824 sink = make_element('fakesink')
825 def decoder_pad_arrived(elt, pad):
826 if pad.get_caps()[0].get_name().startswith('audio/'):
827 elt.link_pads(pad.get_name(), sink, 'sink')
828 dpaid = decoder.connect('pad-added', decoder_pad_arrived)
829 me._pipe.add(source, decoder, sink)
830 GS.element_link_many(source, decoder)
831
832 ## Arrange to collect tags from the pipeline's bus as they're reported.
833 ## If we reuse the pipeline later, we'll want different bus-message
834 ## handling, so make sure we can take the signal handler away.
835 tags = {}
836 fail = []
837 def bus_message(bus, msg):
838 if msg.type == GS.MESSAGE_ERROR:
839 fail[:] = (ValueError, msg.structure['debug'], None)
840 loop.quit()
841 elif msg.type == GS.MESSAGE_STATE_CHANGED:
842 if msg.structure['new-state'] == GS.STATE_PAUSED and \
843 msg.src == me._pipe:
844 loop.quit()
845 elif msg.type == GS.MESSAGE_TAG:
846 tags.update(msg.structure)
847 bmid = bus.connect('message', bus_message)
848
849 ## We want to identify the kind of stream this is. (Hmm. The MIME type
850 ## recognizer has already done this work, but GStreamer is probably more
851 ## reliable.) The `decodebin' has a `typefind' element inside which will
852 ## announce the identified media type. All we need to do is find it and
853 ## attach a signal handler. (Note that the handler might be run in the
854 ## thread context of the pipeline element, but Python's GIL will keep
855 ## things from being too awful.)
856 me.cap = None
857 me.dcap = None
858 for e in decoder.elements():
859 if e.get_factory().get_name() == 'typefind':
860 tfelt = e
861 break
862 else:
863 assert False, 'failed to find typefind element'
864
865 ## Crank up most of the heavy machinery. The message handler will stop
866 ## the loop when things seem to be sufficiently well underway.
867 me._pipe.set_state(GS.STATE_PAUSED)
868 loop.run()
869 bus.disconnect(bmid)
870 decoder.disconnect(dpaid)
871 if fail:
872 me._pipe.set_state(GS.STATE_NULL)
873 raise fail[0], fail[1], fail[2]
874
875 ## Store the collected tags.
876 me.tags = tags
877
878 ## Gather the capabilities. The `typefind' element knows the input data
879 ## type. The 'decodebin' knows the raw data type.
880 me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
881 me.mime = set([mime, me.cap.get_name()])
882 me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
883
884 ## If we found a plausible bitrate then stash it. Otherwise note that we
885 ## failed. If anybody asks then we'll work it out then.
886 if 'nominal-bitrate' in tags:
887 me._bitrate = tags['nominal-bitrate']/1000
888 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
889 me._bitrate = tags['bitrate']/1000
890 else:
891 me._bitrate = None
892
893 ## The bitrate computation wants the file size. Ideally we'd want the
894 ## total size of the frames' contents, but that seems hard to dredge
895 ## out. If the framing overhead is small, this should be close enough
896 ## for our purposes.
897 me._bytes = OS.stat(file).st_size
898
899 def __del__(me):
900 "Close the pipeline down so we don't leak file descriptors."
901 me._pipe.set_state(GS.STATE_NULL)
902
903 @property
904 def bitrate(me):
905 """
906 Return the approximate bit-rate of the input file.
907
908 This might take a while if we have to work it out the hard way.
909 """
910
911 ## If we already know the answer then just return it.
912 if me._bitrate is not None:
913 return me._bitrate
914
915 ## Make up a new main loop.
916 loop = G.MainLoop()
917
918 ## Watch for bus messages. We'll stop when we reach the end of the
919 ## stream: then we'll have a clear idea of how long the track was.
920 fail = []
921 def bus_message(bus, msg):
922 if msg.type == GS.MESSAGE_ERROR:
923 fail[:] = (ValueError, msg.structure['debug'], None)
924 loop.quit()
925 elif msg.type == GS.MESSAGE_EOS:
926 loop.quit()
927 bus = me._pipe.get_bus()
928 bmid = bus.connect('message', bus_message)
929
930 ## Get everything moving, and keep the user amused while we work.
931 me._pipe.set_state(GS.STATE_PLAYING)
932 with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
933 me._pipe,
934 silentp = True):
935 loop.run()
936 bus.disconnect(bmid)
937 if fail:
938 me._pipe.set_state(GS.STATE_NULL)
939 raise fail[0], fail[1], fail[2]
940
941 ## Now we should be able to find out our position accurately and work out
942 ## a bitrate. Cache it in case anybody asks again.
943 t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
944 me._bitrate = int(8*me._bytes*1e6/t)
945
946 ## Done.
947 return me._bitrate
948
949 class AudioFormat (BaseFormat):
950 """
951 An AudioFormat is a kind of Format specialized for audio files.
952
953 Format checks are done on an AudioIdentifier object.
954 """
955
956 PROPS = prop('bitrate', Num)
957
958 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
959 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
960 ## depending on how thorough it's trying to be. Still, it doesn't do any
961 ## harm here; the main risk is picking up Ogg Theora files by accident, and
962 ## we'll probably be able to extract the audio from them anyway.
963 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
964 AudioIdentifier)
965
966 def __init__(me, bitrate = None):
967 "Construct an object, requiring an approximate bitrate."
968 me.bitrate = bitrate
969
970 def check(me, id):
971 """
972 Return whether the AudioIdentifier ID is suitable for our purposes.
973
974 Subclasses can either override this method or provide a property
975 `MIMETYPES', which is a list (other thing that implements `__contains__')
976 of GStreamer MIME types matching this format.
977 """
978 return id.mime & me.MIMETYPES and \
979 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
980
981 def encoder(me):
982 """
983 Constructs a GStreamer element to encode audio input.
984
985 Subclasses can either override this method (or replace `encode'
986 entirely), or provide a method `encoder_chain' which returns a list of
987 elements to be linked together in sequence. The first element in the
988 chain must have a pad named `sink' and the last must have a pad named
989 `src'.
990 """
991 elts = me.encoder_chain()
992 bin = GS.Bin()
993 bin.add(*elts)
994 GS.element_link_many(*elts)
995 bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
996 bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
997 return bin
998
999 def convert(me, master, id, target):
1000 """
1001 Encode audio from MASTER, already identified as ID, writing it to TARGET.
1002
1003 See `encoder' for subclasses' responsibilities.
1004 """
1005
1006 ## Construct the necessary equipment.
1007 pipe = GS.Pipeline()
1008 bus = pipe.get_bus()
1009 bus.add_signal_watch()
1010 loop = G.MainLoop()
1011
1012 ## Make sure that there isn't anything in the way of our output. We're
1013 ## going to write to a scratch file so that we don't get confused by
1014 ## half-written rubbish left by a crashed program.
1015 new = target + '.new'
1016 try:
1017 OS.unlink(new)
1018 except OSError, err:
1019 if err.errno != E.ENOENT:
1020 raise
1021
1022 ## Piece together our pipeline. The annoying part is that the
1023 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1024 ## halves for now.
1025 source = make_element('filesrc', 'source', location = master)
1026 decoder = make_element('decodebin', 'decode')
1027 convert = make_element('audioconvert', 'convert')
1028 encoder = me.encoder()
1029 sink = make_element('filesink', 'sink', location = new)
1030 pipe.add(source, decoder, convert, encoder, sink)
1031 GS.element_link_many(source, decoder)
1032 GS.element_link_many(convert, encoder, sink)
1033
1034 ## Some decoders (e.g., the AC3 decoder) include channel-position
1035 ## indicators in their output caps. The Vorbis encoder interferes with
1036 ## this, and you end up with a beautifully encoded mono signal from a
1037 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1038 ## /think/ that this is only a problem with stereo signals: mono signals
1039 ## are mono already, and `vorbisenc' accepts channel positions if there
1040 ## are more than two channels.
1041 ##
1042 ## So we have this bodge. We already collected the decoded audio caps
1043 ## during identification. So if we see 2-channel audio with channel
1044 ## positions, we strip the positions off forcibly by adding a filter.
1045 if id.dcap.get_name().startswith('audio/x-raw-') and \
1046 id.dcap.has_field('channels') and \
1047 id.dcap['channels'] == 2 and \
1048 id.dcap.has_field('channel-positions'):
1049 dcap = GS.Caps()
1050 c = id.dcap.copy()
1051 c.remove_field('channel-positions')
1052 dcap.append(c)
1053 else:
1054 dcap = None
1055
1056 ## Hook onto the `decodebin' so we can link together the two halves of
1057 ## our encoding chain. For now, we'll hope that there's only one audio
1058 ## stream in there, and just throw everything else away.
1059 def decoder_pad_arrived(elt, pad):
1060 if pad.get_caps()[0].get_name().startswith('audio/'):
1061 if dcap:
1062 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1063 else:
1064 elt.link_pads(pad.get_name(), convert, 'sink')
1065 decoder.connect('pad-added', decoder_pad_arrived)
1066
1067 ## Watch the bus for completion messages.
1068 fail = []
1069 def bus_message(bus, msg):
1070 if msg.type == GS.MESSAGE_ERROR:
1071 fail[:] = (ValueError, msg.structure['debug'], None)
1072 loop.quit()
1073 elif msg.type == GS.MESSAGE_EOS:
1074 loop.quit()
1075 bmid = bus.connect('message', bus_message)
1076
1077 ## Get everything ready and let it go.
1078 pipe.set_state(GS.STATE_PLAYING)
1079 with GStreamerProgressEyecandy(filestatus(master,
1080 'convert to %s' % me.NAME),
1081 pipe):
1082 loop.run()
1083 pipe.set_state(GS.STATE_NULL)
1084 if fail:
1085 raise fail[0], fail[1], fail[2]
1086
1087 ## Fix up the output file if we have to.
1088 me.fixup(new)
1089
1090 ## We're done.
1091 OS.rename(new, target)
1092
1093 class OggVorbisFormat (AudioFormat):
1094 "AudioFormat object for Ogg Vorbis."
1095
1096 ## From https://en.wikipedia.org/wiki/Vorbis
1097 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1098 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1099 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1100
1101 NAME = 'Ogg Vorbis'
1102 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1103 'audio/x-vorbis+ogg'])
1104 EXT = 'ogg'
1105
1106 def encoder_chain(me):
1107 encprops = {}
1108 if me.bitrate is not None:
1109 for q, br in me.QMAP:
1110 if br >= me.bitrate:
1111 break
1112 else:
1113 raise ValueError, 'no suitable quality setting found'
1114 encprops['quality'] = q/10.0
1115 return [make_element('vorbisenc', **encprops),
1116 make_element('oggmux')]
1117
1118 defformat('ogg-vorbis', OggVorbisFormat)
1119
1120 class MP3Format (AudioFormat):
1121 "AudioFormat object for MP3."
1122
1123 NAME = 'MP3'
1124 MIMETYPES = set(['audio/mpeg'])
1125 EXT = 'mp3'
1126
1127 def encoder_chain(me):
1128 encprops = {}
1129 if me.bitrate is not None: encprops['vbr_mean_bitrate'] = me.bitrate
1130 return [make_element('lame', vbr = 4, **encprops),
1131 make_element('xingmux'),
1132 make_element('id3v2mux')]
1133
1134 def fixup(me, path):
1135 """
1136 Fix up MP3 files.
1137
1138 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1139 unkind to stupid players.
1140 """
1141 f = E3.load(path)
1142 if f is None: return
1143 t = f.tag
1144 if t is None: return
1145 for v in [E3.id3.ID3_V2_3, E3.id3.ID3_V1]:
1146 try: f.tag.save(version = v)
1147 except (UnicodeEncodeError,
1148 E3.id3.GenreException,
1149 E3.id3.TagException):
1150 pass
1151
1152 defformat('mp3', MP3Format)
1153
1154 ###--------------------------------------------------------------------------
1155 ### Image handling, based on the Python Imaging Library.
1156
1157 class ImageIdentifier (object):
1158 """
1159 Analyses and identifies an image file.
1160
1161 Simply leaves an Image object in the `img' property which can be inspected.
1162 """
1163
1164 def __init__(me, file, mime):
1165
1166 ## Get PIL to open the file. It will magically work out what kind of
1167 ## file it is.
1168 try:
1169 me.img = I.open(file)
1170 except IOError, exc:
1171
1172 ## Unhelpful thing to raise on identification failure. We can
1173 ## distinguish this from an actual I/O error because it doesn't have an
1174 ## `errno'.
1175 if exc.errno is None:
1176 raise IdentificationFailure
1177 raise
1178
1179 me.mime = set([mime])
1180
1181 class ImageFormat (BaseFormat):
1182 """
1183 An ImageFormat is a kind of Format specialized for image files.
1184
1185 Subclasses don't need to provide anything other than the properties
1186 required by all concrete Format subclasses. However, there is a
1187 requirement that the `NAME' property match PIL's `format' name for the
1188 format.
1189 """
1190
1191 PROPS = prop('size', Num)
1192 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1193
1194 def __init__(me, size = None, **kw):
1195 """
1196 Initialize an ImageFormat object.
1197
1198 Additional keywords are used when encoding, and may be recognized by
1199 enhanced `check' methods in subclasses.
1200 """
1201 me._size = size
1202 me._props = kw
1203
1204 def check(me, id):
1205 "Check whether the ImageIdentifier ID matches our requirements."
1206 return id.img.format == me.NAME and \
1207 (me._size is None or
1208 (id.img.size[0] <= me._size and
1209 id.img.size[1] <= me._size))
1210
1211 def convert(me, master, id, target):
1212 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1213
1214 ## Write to a scratch file.
1215 new = target + '.new'
1216
1217 ## The ImageIdentifier already contains a copy of the open file. It
1218 ## would be wasteful not to use it.
1219 img = id.img
1220 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1221
1222 ## If there's a stated maximum size then scale the image down to match.
1223 ## But thumbnailing clobbers the original, so take a copy.
1224 if me._size is not None and \
1225 (img.size[0] > me._size or img.size[1] > me._size):
1226 img = img.copy()
1227 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1228
1229 ## Write the output image.
1230 img.save(new, me.NAME, **me._props)
1231
1232 ## Fix it up if necessary.
1233 me.fixup(new)
1234
1235 ## We're done.
1236 OS.rename(new, target)
1237 STATUS.commit()
1238
1239 class JPEGFormat (ImageFormat):
1240 """
1241 Image format for JPEG (actually JFIF) files.
1242
1243 Interesting properties to set:
1244
1245 optimize
1246 If present, take a second pass to select optimal encoder settings.
1247
1248 progressive
1249 If present, make a progressive file.
1250
1251 quality Integer from 1--100 (worst to best); default is 75.
1252 """
1253 EXT = 'jpg'
1254 NAME = 'JPEG'
1255 PROPS = prop('optimize', None) \
1256 | prop('progressive', None, 'progression') \
1257 | prop('quality', Num)
1258
1259 defformat('jpeg', JPEGFormat)
1260
1261 class PNGFormat (ImageFormat):
1262 """
1263 Image format for PNG files.
1264
1265 Interesting properties:
1266
1267 optimize
1268 If present, make a special effort to minimize the output file.
1269 """
1270 EXT = 'png'
1271 NAME = 'PNG'
1272 PROPS = prop('optimize', None)
1273
1274 defformat('png', PNGFormat)
1275
1276 class BMPFormat (ImageFormat):
1277 """
1278 Image format for Windows BMP files, as used by RockBox.
1279
1280 No additional properties.
1281 """
1282 NAME = 'BMP'
1283 EXT = 'bmp'
1284
1285 defformat('bmp', BMPFormat)
1286
1287 ###--------------------------------------------------------------------------
1288 ### Remaining parsing machinery.
1289
1290 Type = K('type') - Name - D('{') - R(Policy) - D('}')
1291 def build_type(s, l, t):
1292 try:
1293 cat = CATEGORYMAP[t[0]]
1294 except KeyError:
1295 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1296 pols = t[1]
1297 if len(pols) == 1: pol = pols[0]
1298 else: pol = AndPolicy(pols)
1299 pol.setcategory(cat)
1300 return pol
1301 Type.setParseAction(build_type)
1302
1303 TARGETS = []
1304 class TargetJob (object):
1305 def __init__(me, targetdir, policies):
1306 me.targetdir = targetdir
1307 me.policies = policies
1308 def perform(me):
1309 TARGETS.append(me)
1310
1311 Target = K('target') - String - D('{') - R(Type) - D('}')
1312 def build_target(s, l, t):
1313 return TargetJob(t[0], t[1])
1314 Target.setParseAction(build_target)
1315
1316 VARS = { 'master': None }
1317 class VarsJob (object):
1318 def __init__(me, vars):
1319 me.vars = vars
1320 def perform(me):
1321 for k, v in me.vars:
1322 VARS[k] = v
1323
1324 Var = prop('master', String)
1325 Vars = K('vars') - D('{') - R(Var) - D('}')
1326 def build_vars(s, l, t):
1327 return VarsJob(t[0])
1328 Vars.setParseAction(build_vars)
1329
1330 TopLevel = Vars | Target
1331 Config = R(TopLevel)
1332 Config.ignore(P.pythonStyleComment)
1333
1334 ###--------------------------------------------------------------------------
1335 ### The directory grobbler.
1336
1337 def grobble(master, targets, noact = False):
1338 """
1339 Work through the MASTER directory, writing converted files to TARGETS.
1340
1341 The TARGETS are a list of `TargetJob' objects, each describing a target
1342 directory and a policy to apply to it.
1343
1344 If NOACT is true, then don't actually do anything permanent to the
1345 filesystem.
1346 """
1347
1348 ## Transform the targets into a more convenient data structure.
1349 tpolmap = []
1350 for t in targets:
1351 pmap = {}
1352 tpolmap.append(pmap)
1353 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
1354
1355 ## Keep track of the current position in the master tree.
1356 dirs = []
1357
1358 ## And the files which haven't worked.
1359 broken = []
1360
1361 def grobble_file(master, pmap, targetdir, cohorts):
1362 ## Convert MASTER, writing the result to TARGETDIR.
1363 ##
1364 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1365 ## a list of (FILENAME, ID) pairs.
1366 ##
1367 ## Since this function might convert the MASTER file, the caller doesn't
1368 ## know the name of the output files, so we return then as a list.
1369
1370 done = set()
1371 st_m = OS.stat(master)
1372
1373 ## Work through each category listed and apply its policy.
1374 for cat, id, cohort in cohorts:
1375
1376 ## Go through the category's policies and see if any match. If we fail
1377 ## here, see if there are more categories to try.
1378 for pol in pmap[cat]:
1379 acts = pol.actions(master, targetdir, id, cohort)
1380 if acts: break
1381 else:
1382 continue
1383
1384 ## Work through the targets one by one.
1385 for a in acts:
1386 done.add(a.target)
1387
1388 ## Find out whether the target file already exists and is up-to-date
1389 ## with respect to the master. (Caution here with low-resolution
1390 ## timestamps.) If it's OK, then just move on.
1391 try:
1392 st_t = OS.stat(a.target)
1393 if st_m.st_mtime < st_t.st_mtime or \
1394 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1395 continue
1396 except OSError, err:
1397 if err.errno not in (E.ENOENT, E.ENOTDIR):
1398 raise
1399
1400 ## We have real work to do. If there's a current status message,
1401 ## it's the containing directory so flush it so that people know
1402 ## where we are.
1403 STATUS.commit()
1404
1405 ## Remove the target. (A hardlink will fail if the target already
1406 ## exists.)
1407 if not noact:
1408 try:
1409 OS.unlink(a.target)
1410 except OSError, err:
1411 if err.errno not in (E.ENOENT, E.ENOTDIR):
1412 raise
1413
1414 ## Do whatever it is we decided to do.
1415 if noact:
1416 STATUS.commit(filestatus(master, a))
1417 else:
1418 a.perform()
1419
1420 ## We're done. Return the names of the targets.
1421 return list(done)
1422
1423 @contextmanager
1424 def wrap(masterfile):
1425 ## Handle exceptions found while trying to convert a particular file or
1426 ## directory.
1427
1428 try:
1429 yield masterfile
1430
1431 ## Something bad happened. Report the error, but continue. (This list
1432 ## of exceptions needs a lot of work.)
1433 except (IOError, OSError), exc:
1434 STATUS.clear()
1435 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
1436 broken.append((masterfile, exc))
1437
1438 def grobble_dir(master, targets):
1439 ## Recursively convert files in MASTER, writing them to the TARGETS.
1440
1441 ## Keep track of the subdirectories we encounter, because we'll need to
1442 ## do all of those in one go at the end.
1443 subdirs = set()
1444
1445 ## Work through each target directory in turn.
1446 for target, pmap in zip(targets, tpolmap):
1447
1448 ## Make sure the TARGET exists and is a directory. It's a fundamental
1449 ## assumption of this program that the entire TARGET tree is
1450 ## disposable, so if something exists but isn't a directory, we should
1451 ## kill it.
1452 if OS.path.isdir(target):
1453 pass
1454 else:
1455 if OS.path.exists(target):
1456 STATUS.commit(filestatus(target, 'clear nondirectory'))
1457 if not noact:
1458 OS.unlink(target)
1459 STATUS.commit(filestatus(target, 'create directory'))
1460 if not noact:
1461 OS.mkdir(target)
1462
1463 ## Keep a list of things in the target. As we convert files, we'll
1464 ## check them off. Anything left over is rubbish and needs to be
1465 ## deleted.
1466 checklist = {}
1467 try:
1468 for i in OS.listdir(target):
1469 checklist[i] = False
1470 except OSError, err:
1471 if err.errno not in (E.ENOENT, E.ENOTDIR):
1472 raise
1473
1474 ## Keep track of the files in each category.
1475 catmap = {}
1476 todo = []
1477 done = []
1478
1479 ## Work through the master files.
1480 for f in sorted(OS.listdir(master)):
1481
1482 ## If the killswitch has been pulled then stop. The whole idea is
1483 ## that we want to cause a clean shutdown if possible, so we don't
1484 ## want to do it in the middle of encoding because the encoding
1485 ## effort will have been wasted. This is the only place we need to
1486 ## check. If we've exited the loop, then clearing old files will
1487 ## probably be fast, and we'll either end up here when the recursive
1488 ## call returns or we'll be in the same boat as before, clearing old
1489 ## files, only up a level. If worst comes to worst, we'll be killed
1490 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1491 ## it left off.
1492 if KILLSWITCH.is_set():
1493 return
1494
1495 ## Do something with the file.
1496 with wrap(OS.path.join(master, f)) as masterfile:
1497
1498 ## If it's a directory then prepare to grobble it recursively, but
1499 ## don't do that yet.
1500 if OS.path.isdir(masterfile):
1501 subdirs.add(f)
1502 done.append(OS.path.join(target, f))
1503
1504 ## Otherwise it's a file. Work out what kind, and stash it under
1505 ## the appropriate categories. Later, we'll apply policy to the
1506 ## files, by category, and work out what to do with them all.
1507 else:
1508 gf = GIO.File(masterfile)
1509 mime = gf.query_info('standard::content-type').get_content_type()
1510 cats = []
1511 for cat in pmap.iterkeys():
1512 id = cat.identify(masterfile, mime)
1513 if id is None: continue
1514 catmap.setdefault(cat, []).append((masterfile, id))
1515 cats.append((cat, id))
1516 if not cats:
1517 catmap.setdefault(None, []).append((masterfile, id))
1518 todo.append((masterfile, cats))
1519
1520 ## Work through the categorized files to see what actions to do for
1521 ## them.
1522 for masterfile, cats in todo:
1523 with wrap(masterfile):
1524 done += grobble_file(masterfile, pmap, target,
1525 [(cat, id, catmap[cat]) for cat, id in cats])
1526
1527 ## Check the results off the list so that we don't clear it later.
1528 for f in done:
1529 checklist[OS.path.basename(f)] = True
1530
1531 ## Maybe there's stuff in the target which isn't accounted for. Delete
1532 ## it: either the master has changed, or the policy for this target has
1533 ## changed. Either way, the old files aren't wanted.
1534 for f in checklist:
1535 if not checklist[f]:
1536 STATUS.commit(filestatus(f, 'clear bogus file'))
1537 if not noact:
1538 bogus = OS.path.join(target, f)
1539 try:
1540 if OS.path.isdir(bogus):
1541 SH.rmtree(bogus)
1542 else:
1543 OS.unlink(bogus)
1544 except OSError, err:
1545 if err.errno != E.ENOENT:
1546 raise
1547
1548 ## If there are subdirectories which want processing then do those.
1549 ## Keep the user amused by telling him where we are in the tree.
1550 for d in sorted(subdirs):
1551 dirs.append(d)
1552 STATUS.set('/'.join(dirs))
1553 with wrap(OS.path.join(master, d)) as masterdir:
1554 try:
1555 grobble_dir(masterdir,
1556 [OS.path.join(target, d) for target in targets])
1557 finally:
1558 dirs.pop()
1559 STATUS.set('/'.join(dirs))
1560
1561 ## Right. We're ready to go.
1562 grobble_dir(master, [t.targetdir for t in targets])
1563 return broken
1564
1565 ###--------------------------------------------------------------------------
1566 ### Command-line interface.
1567
1568 QUIS = OS.path.basename(SYS.argv[0])
1569
1570 def moan(msg):
1571 "Report a warning message to the user."
1572 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1573
1574 def die(msg):
1575 "Report a fatal error message to the user."
1576 moan(msg)
1577 SYS.exit(1)
1578
1579 def parse_opts(args):
1580 """
1581 Parse command-line arguments in ARGS.
1582
1583 Returns a Grobbler object and the MASTER and TARGET directories to be
1584 grobbled.
1585 """
1586
1587 ## Build the option parser object.
1588 op = OP.OptionParser(prog = QUIS, version = VERSION,
1589 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1590 'CONFIG',
1591 description = """\
1592 Convert a directory tree of files according to the configuration file
1593 CONFIG.
1594 """)
1595
1596 ## Timeout handling.
1597 def cb_time(opt, ostr, arg, op):
1598 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1599 if not m:
1600 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1601 t, u = m.groups()
1602 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1603 setattr(op.values, opt.dest, t)
1604 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1605 dest = 'timeout',
1606 help = 'stop processing nicely after SECS',
1607 action = 'callback', callback = cb_time)
1608 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1609 dest = 'timeout_nasty',
1610 help = 'stop processing unpleasantly after further SECS',
1611 action = 'callback', callback = cb_time)
1612
1613 ## Other options.
1614 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1615 help = 'provide progress information')
1616 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1617 help = 'don\'t actually modify the filesystem')
1618
1619 ## Ready to rock.
1620 op.set_defaults(formats = [], noact = False,
1621 timeout = None, timeout_nasty = 300)
1622 opts, args = op.parse_args(args)
1623
1624 ## Check that we got the non-option arguments that we want.
1625 if len(args) != 1:
1626 op.error('wrong number of arguments')
1627
1628 ## Act on the options.
1629 if opts.tty:
1630 STATUS.eyecandyp = True
1631 if opts.timeout is not None:
1632 to = TH.Thread(target = timeout,
1633 args = (opts.timeout, opts.timeout_nasty))
1634 to.daemon = True
1635 to.start()
1636
1637 ## Parse the configuration file.
1638 with open(args[0]) as conf:
1639 jobs, = Config.parseFile(conf, True)
1640 for j in jobs:
1641 j.perform()
1642
1643 return opts
1644
1645 if __name__ == '__main__':
1646 opts = parse_opts(SYS.argv[1:])
1647 if 'master' not in VARS:
1648 die("no master directory set")
1649 broken = grobble(VARS['master'], TARGETS, opts.noact)
1650 if broken:
1651 moan('failed to convert some files:')
1652 for file, exc in broken:
1653 moan('%s: %s' % (file, exc))
1654 SYS.exit(1)
1655
1656 ## This is basically a successful completion: we did what we were asked to
1657 ## do. It seems polite to report a message, though.
1658 ##
1659 ## Why don't we have a nonzero exit status? The idea would be that a
1660 ## calling script would be interested that we used up all of our time, and
1661 ## not attempt to convert some other directory as well. But that doesn't
1662 ## quite work. Such a script would need to account correctly for time we
1663 ## had spent even if we complete successfully. And if the script is having
1664 ## to watch the clock itself, it can do that without our help here.
1665 if KILLSWITCH.is_set():
1666 moan('killed by timeout')
1667
1668 ###----- That's all, folks --------------------------------------------------