Initial import of unaltered files.
[autoys] / gremlin / gremlin
1 #! /usr/bin/python
2 ###
3 ### Convert a directory tree of audio files
4 ###
5 ### (c) 2010 Mark Wooding
6 ###
7
8 ###----- Licensing notice ---------------------------------------------------
9 ###
10 ### This program is free software; you can redistribute it and/or modify
11 ### it under the terms of the GNU General Public License as published by
12 ### the Free Software Foundation; either version 2 of the License, or
13 ### (at your option) any later version.
14 ###
15 ### This program is distributed in the hope that it will be useful,
16 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ### GNU General Public License for more details.
19 ###
20 ### You should have received a copy of the GNU General Public License
21 ### along with this program; if not, write to the Free Software Foundation,
22 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23
24 ###--------------------------------------------------------------------------
25 ### External dependencies.
26
27 ## Language features.
28 from __future__ import with_statement
29
30 ## Standard Python libraries.
31 import sys as SYS
32 import os as OS
33 import errno as E
34 import time as T
35 import unicodedata as UD
36 import fnmatch as FN
37 import re as RX
38 import shutil as SH
39 import optparse as OP
40 import threading as TH
41 import shlex as L
42 from math import sqrt
43 from contextlib import contextmanager
44
45 ## eyeD3 tag fettling.
46 import eyeD3 as E3
47
48 ## Gstreamer. It picks up command-line arguments -- most notably `--help' --
49 ## and processes them itself. Of course, its help is completely wrong. This
50 ## kludge is due to Jonas Wagner.
51 _argv, SYS.argv = SYS.argv, []
52 import gobject as G
53 import gio as GIO
54 import gst as GS
55 SYS.argv = _argv
56
57 ## Python Imaging.
58 from PIL import Image as I
59
60 ## Python parsing.
61 import pyparsing as P
62
63 ###--------------------------------------------------------------------------
64 ### Special initialization.
65
66 VERSION = '1.0.0~pre'
67
68 ## GLib.
69 G.threads_init()
70
71 ###--------------------------------------------------------------------------
72 ### Eyecandy progress reports.
73
74 def charwidth(s):
75 """
76 Return the width of S, in characters.
77
78 Specifically, this is the number of backspace characters required to
79 overprint the string S. If the current encoding for `stdout' appears to be
80 Unicode then do a complicated Unicode thing; otherwise assume that
81 characters take up one cell each.
82
83 None of this handles tab characters in any kind of useful way. Sorry.
84 """
85
86 ## If there's no encoding for stdout then we're doing something stupid.
87 if SYS.stdout.encoding is None: return len(s)
88
89 ## Turn the string into Unicode so we can hack on it properly. Maybe that
90 ## won't work out, in which case fall back to being stupid.
91 try: u = s.decode(SYS.stdout.encoding)
92 except UnicodeError: return len(s)
93
94 ## Our main problem is combining characters, but we should also try to
95 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
96 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
97 w = 0
98 for ch in u:
99 cd = ord(ch)
100 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
101 0x1160 <= cd <= 0x11ff: pass
102 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
103 else: w += 1
104
105 ## Done.
106 #print ';; %r -> %d' % (s, w)
107 return w
108
109 class StatusLine (object):
110 """
111 Maintains a status line containing ephemeral progress information.
112
113 The status line isn't especially important, but it keeps interactive users
114 amused.
115
116 There should be only one status line object in your program; otherwise
117 they'll interfere with each other and get confused.
118
119 The update algorithm (in `set') is fairly careful to do the right thing
120 with long status `lines', and to work properly in an Emacs `shell' buffer.
121 """
122
123 def __init__(me):
124 "Initialize the status line."
125 me._last = ''
126 me._lastlen = 0
127 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
128
129 def set(me, line):
130 """
131 Set the status line contents to LINE, replacing what was there before.
132
133 This only produces actual output if stdout is interactive.
134 """
135 n = len(line)
136
137 ## Eyecandy update.
138 if me.eyecandyp:
139 #print
140 #print ';; new status %r' % line
141
142 ## If the old line was longer, we need to clobber its tail, so work out
143 ## what that involves.
144 if n < me._lastlen:
145 b = charwidth(me._last[n:])
146 pre = '\b'*b + ' '*b
147 else:
148 pre = ''
149
150 ## Now figure out the length of the common prefix between what we had
151 ## before and what we have now. This reduces the amount of I/O done,
152 ## which keeps network traffic down on SSH links, and keeps down the
153 ## amount of work slow terminal emulators like Emacs have to do.
154 i = 0
155 m = min(n, me._lastlen)
156 while i < m and line[i] == me._last[i]:
157 i += 1
158
159 ## Actually do the output, all in one syscall.
160 b = charwidth(me._last[i:])
161 SYS.stdout.write(pre + '\b'*b + line[i:])
162 #print ';; => %r' % (pre + '\b'*b + line[i:])
163 SYS.stdout.flush()
164
165 ## Update our idea of what's gone on.
166 me._lastlen = n
167 me._last = line
168
169 def clear(me):
170 "Clear the status line. Just like set('')."
171 me.set('')
172
173 def commit(me, line = None):
174 """
175 Commit the current status line, and maybe the string LINE.
176
177 If the current status line is nonempty, then commit it to the transcript.
178 If LINE is not None, then commit that to the transcript too.
179
180 After all of this, we clear the status line to get back to a clean state.
181 """
182 if me._last:
183 if me.eyecandyp:
184 SYS.stdout.write('\n')
185 else:
186 SYS.stdout.write(me._last + '\n')
187 if line is not None:
188 SYS.stdout.write(line + '\n')
189 me._lastlen = 0
190 me._last = ''
191
192 STATUS = StatusLine()
193
194 def filestatus(file, status):
195 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
196
197 class ProgressEyecandy (object):
198 """
199 Provide amusement while something big and complicated is happening.
200
201 This is an abstract class. Subclasses must provide a method `progress'
202 returning a pair (CURRENT, MAX) indicating the current progress through the
203 operation.
204 """
205
206 def __init__(me, what, silentp = False):
207 """
208 Initialize a progress meter.
209
210 WHAT is a prefix string to be written before the progress eyecandy
211 itself.
212 """
213 me._what = what
214 me._silentp = silentp
215 me._spinner = 0
216 me._start = T.time()
217
218 def _fmt_time(me, t):
219 "Format T as a time, in (maybe hours) minutes and seconds."
220 s, t = t % 60, int(t/60)
221 m, h = t % 60, int(t/60)
222 if h > 0:
223 return '%d:%02d:%02d' % (h, m, s)
224 else:
225 return '%02d:%02d' % (m, s)
226
227 def show(me):
228 "Show the current level of progress."
229
230 ## If we're not showing pointless frippery, don't bother at all.
231 if not STATUS.eyecandyp:
232 return
233
234 ## Update the spinner index.
235 me._spinner = (me._spinner + 1)%4
236
237 ## Fetch the current progress information. Note that we always fetch
238 ## both the current and maximum levels, because both might change if an
239 ## operation revises its idea of how much work needs doing.
240 cur, max = me.progress()
241
242 ## If we couldn't get progress information, display something vaguely
243 ## amusing anyway.
244 if cur is None or max is None:
245 STATUS.set('%s %c [unknown progress]' %
246 (me._what, r'/-\|'[me._spinner]))
247 return
248
249 ## Work out -- well, guess -- the time remaining.
250 if cur:
251 t = T.time()
252 eta = me._fmt_time((t - me._start)*(max - cur)/cur)
253 else:
254 eta = '???'
255
256 ## Set the status bar.
257 n = 40*cur/max
258 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
259 (me._what,
260 r'/-\|'[me._spinner],
261 '='*n, ' '*(40 - n),
262 100*cur/max,
263 eta))
264
265 def done(me, win = True):
266 "Show a completion notice, or a failure if WIN is false."
267 if not win:
268 STATUS.set('%s FAILED!' % me._what)
269 elif not me._silentp:
270 STATUS.set('%s done (%s)' %
271 (me._what,
272 me._fmt_time(T.time() - me._start)))
273 else:
274 return
275 STATUS.commit()
276
277 ###--------------------------------------------------------------------------
278 ### Timeout handling.
279
280 KILLSWITCH = TH.Event()
281
282 def timeout(t0, t1):
283 T.sleep(t0)
284 KILLSWITCH.set()
285 T.sleep(t1)
286 moan('dying messily due to timeout')
287 OS._exit(3)
288
289 ###--------------------------------------------------------------------------
290 ### Parsing utilities.
291
292 ## Allow hyphens in identifiers.
293 IDCHARS = P.alphanums + '-_'
294 P.Keyword.setDefaultKeywordChars(IDCHARS)
295
296 ## Some common kinds of tokens.
297 Name = P.Word(IDCHARS)
298 Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
299 String = P.QuotedString('"', '\\')
300
301 ## Handy abbreviations for constructed parser elements.
302 def K(k): return P.Keyword(k).suppress()
303 def D(d): return P.Literal(d).suppress()
304 ##R = P.ZeroOrMore
305 def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
306 O = P.Optional
307
308 ###--------------------------------------------------------------------------
309 ### Format identification and conversion.
310
311 class IdentificationFailure (Exception):
312 pass
313
314 class FileCategory (object):
315 """
316 A FileCategory represents a class of files.
317
318 For example, it's sensible to consider audio, or image files as a
319 category. A file category knows how to recognize member files from
320 MIME content types.
321 """
322
323 def __init__(me, name, mime_pats, ident):
324 """
325 Construct a new category.
326
327 The PATS are a list of `fnmatch' patterns to be compared with a MIME
328 type. The IDENT is a function which produces an identification object
329 given a file's name and first-guess MIME type. The object is passed to a
330 Format's `check' method to see whether a file needs re-encoding, and to
331 `convert' to assist with the conversion.
332
333 An identification object must have an attribute `mime' which is a set of
334 possible MIME types accumulated for the object.
335 """
336 me.name = name
337 me._mime_pats = mime_pats
338 me._ident = ident
339 CATEGORYMAP[name] = me
340
341 def identify(me, file, mime):
342 """
343 Attempt to identify FILE, given its apparent MIME type.
344
345 If identification succeeds, return an identification object which can be
346 used by associated file formats; otherwise return None.
347 """
348 for p in me._mime_pats:
349 if not FN.fnmatchcase(mime, p):
350 continue
351 try:
352 return me._ident(file, mime)
353 except IdentificationFailure:
354 pass
355 return None
356
357 class BaseFormat (object):
358 """
359 A BaseFormat object represents a particular encoding and parameters.
360
361 The object can verify (the `check' method) whether a particular file
362 matches its requirements, and if necessary (`encode') re-encode a file.
363
364 Subclasses should define the following methods.
365
366 check(ID)
367 Answer whether the file identified by ID is acceptable according to
368 the receiver's parameters.
369
370 convert(MASTER, ID, TARGET)
371 Convert the file MASTER, which has been identified as ID, according
372 to the receiver's parameters, writing the output to TARGET.
373
374 Subclasses should also provide these attributes.
375
376 CATEGORY
377 A FileCategory object for the category of files that this format
378 lives within.
379
380 EXT A file extension to be applied to encoded output files.
381
382 NAME A user-facing name for the format.
383
384 PROPS A parser element to parse a property definition. It should produce
385 a pair NAME, VALUE to be stored in a dictionary.
386
387 Subclasses for different kinds of file may introduce more subclass
388 protocol.
389 """
390
391 def fixup(me, path):
392 """Post-encoding fixups."""
393 pass
394
395 FORMATMAP = {}
396 CATEGORYMAP = {}
397
398 def defformat(name, cls):
399 "Define a format NAME using class CLS."
400 if not hasattr(cls, 'NAME'):
401 raise ValueError, 'abstract class'
402 if not hasattr(cls, 'CATEGORY'):
403 raise ValueError, 'no category'
404 FORMATMAP[name] = cls
405
406 class FormatParser (P.ParserElement):
407 """
408 Parse a format specifier:
409
410 format-spec ::= string [format-properties]
411 format-properties ::= `{' format-property (`,' format-property)* `}'
412
413 The syntax of a format-property is determined by the PROPS attribute on the
414 named format and its superclasses.
415 """
416
417 ## We cache the parser elements we generate to avoid enormous consing.
418 CACHE = {}
419
420 def parseImpl(me, s, loc, actp = True):
421
422 ## Firstly, determine the format name.
423 loc, r = Name._parse(s, loc, actp)
424 fmt = r[0]
425
426 ## Look up the format class.
427 try: fcls = FORMATMAP[fmt]
428 except KeyError:
429 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
430
431 ## Fetch the property-list parser from the cache, if possible; else
432 ## construct it.
433 try:
434 pp = me.CACHE[fmt]
435 except KeyError:
436 seen = set()
437 prop = None
438 for c in fcls.mro():
439 try: p = c.PROPS
440 except AttributeError: continue
441 if p in seen: continue
442 if prop is None: prop = p
443 else: prop |= p
444 seen.add(p)
445 if prop is None:
446 pp = me.CACHE[fmt] = None
447 else:
448 props = P.delimitedList(prop)
449 props.setParseAction(lambda s, l, t: dict(t.asList()))
450 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
451
452 ## Parse the properties.
453 if pp is None:
454 pd = {}
455 else:
456 loc, r = pp._parse(s, loc, actp)
457 if r: pd = r[0]
458 else: pd = {}
459
460 ## Construct the format object and return it.
461 return loc, fcls(**pd)
462
463 Format = FormatParser()
464
465 def prop(kw, pval, tag = None):
466 if tag is None: tag = kw
467 if pval is None:
468 p = K(kw)
469 p.setParseAction(lambda s, l, t: (tag, True))
470 else:
471 p = K(kw) + D('=') + pval
472 p.setParseAction(lambda s, l, t: (tag, t[0]))
473 return p
474
475 ###--------------------------------------------------------------------------
476 ### Policies and actions.
477
478 class Action (object):
479 """
480 An Action object represents a conversion action to be performed.
481
482 This class isn't intended to be instantiated directly. It exists to define
483 some protocol common to all Action objects.
484
485 Action objects have the following attributes.
486
487 master The name of the master (source) file.
488
489 target The name of the target (destination) file.
490
491 PRIORITY The priority of the action, for deciding which of two actions
492 to perform. Higher priorities are more likely to win.
493
494 Converting an Action to a string describes the action in a simple
495 user-readable manner. The `perform' method actually carries the action
496 out.
497 """
498
499 PRIORITY = 0
500
501 def __init__(me, master):
502 "Stash the MASTER file name for later."
503 me.master = master
504
505 def choose(me, him):
506 "Choose either ME or HIM and return one."
507 if him is None or me.PRIORITY > him.PRIORITY:
508 return me
509 else:
510 return him
511
512 class CopyAction (Action):
513 """
514 An Action object for simply copying a file.
515
516 Actually we try to hardlink it first, falling back to a copy later. This
517 is both faster and more efficient with regard to disk space.
518 """
519
520 ## Copying is good. Linking is really good, but we can't tell the
521 ## difference at this stage.
522 PRIORITY = 10
523
524 def __init__(me, master, targetdir):
525 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
526 Action.__init__(me, master)
527 me.target = OS.path.join(targetdir, OS.path.basename(master))
528
529 def __str__(me):
530 return 'copy/link'
531
532 def perform(me):
533 "Actually perform a CopyAction."
534 try:
535 STATUS.set(filestatus(me.master, 'link'))
536 OS.link(me.master, me.target)
537 except OSError, err:
538 if err.errno != E.EXDEV:
539 raise
540 STATUS.set(filestatus(me.master, 'copy'))
541 new = me.target + '.new'
542 SH.copyfile(me.master, new)
543 OS.rename(new, me.target)
544 STATUS.commit()
545
546 class ConvertAction (Action):
547 """
548 An Action object for converting a file to a given format.
549
550 Additional attributes:
551
552 id The identification object for the master file.
553
554 format The format to which we're meant to conver the master.
555 """
556
557 def __init__(me, master, targetdir, id, format):
558 "Initialize a ConvertAction."
559 Action.__init__(me, master)
560 stem, ext = OS.path.splitext(OS.path.basename(master))
561 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
562 me.id = id
563 me.format = format
564
565 def __str__(me):
566 return 'convert to %s' % me.format.NAME
567
568 def perform(me):
569 "Acually perform a ConvertAction."
570 STATUS.set(filestatus(me.master, me))
571 me.format.convert(me.master, me.id, me.target)
572
573 Policy = P.Forward()
574
575 class FormatPolicy (object):
576 """
577 A FormatPolicy object represents a set of rules for how to convert files.
578
579 Given a master file, the FormatPolicy will identify it and return a list of
580 actions to be performed. The methods required of a FormatPolicy are:
581
582 setcategory(CAT)
583 Store CAT as the policy's category. Check that this is consistent
584 with the policy as stored.
585
586 actions(MASTER, TARGETDIR, ID, COHORT)
587 Given a MASTER file, identified as ID, a target directory
588 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
589 of the same category in the same directory, return a list of
590 actions to be performed to get the target directory into the right
591 form. The list might be empty if the policy object /rejects/ the
592 file.
593 """
594
595 class AndPolicy (FormatPolicy):
596 """
597 A FormatPolicy which does the union of a bunch of other policies.
598
599 Each subsidiary policy is invoked in turn. The highest-priority action for
600 each target file is returned.
601 """
602
603 def __init__(me, policies):
604 me._policies = policies
605
606 def setcategory(me, cat):
607 me.cat = cat
608 for p in me._policies:
609 p.setcategory(cat)
610
611 def actions(me, master, targetdir, id, cohort):
612 tmap = {}
613 for p in me._policies:
614 for a in p.actions(master, targetdir, id, cohort):
615 if a.target in tmap:
616 tmap[a.target] = a.choose(tmap.get(a.target))
617 else:
618 tmap[a.target] = a
619 return tmap.values()
620
621 And = K('and') - D('{') - R(Policy) - D('}')
622 And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
623
624 class OrPolicy (FormatPolicy):
625 """
626 A FormatPolicy which tries other policies and uses the first that accepts.
627
628 Each subsidiary policy is invoked in turn. If any accepts, the actions it
629 proposes are turned and no further policies are invoked. If none accepts
630 then the file is rejected.
631 """
632
633 def __init__(me, policies):
634 me._policies = policies
635
636 def setcategory(me, cat):
637 me.cat = cat
638 for p in me._policies:
639 p.setcategory(cat)
640
641 def actions(me, master, targetdir, id, cohort):
642 for p in me._policies:
643 aa = p.actions(master, targetdir, id, cohort)
644 if aa:
645 return aa
646 else:
647 return []
648
649 Or = K('or') - D('{') - R(Policy) - D('}')
650 Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
651
652 class AcceptPolicy (FormatPolicy):
653 """
654 A FormatPolicy which copies files in a particular format.
655
656 If all of the files in a cohort are recognized as being in a particular
657 format (including this one), then accept it with a CopyAction; otherwise
658 reject.
659 """
660
661 def __init__(me, format):
662 me._format = format
663
664 def setcategory(me, cat):
665 if me._format.CATEGORY is not cat:
666 raise ValueError, \
667 "Accept format `%s' has category `%s', not `%s'" % \
668 (me._format.__class__.__name__,
669 me._format.CATEGORY.name, cat.name)
670 me.cat = cat
671
672 def actions(me, master, targetdir, id, cohort):
673 if me._format.check(id) and \
674 all(me._format.check(cid) for f, cid in cohort):
675 return [CopyAction(master, targetdir)]
676 else:
677 return []
678
679 Accept = K('accept') - Format
680 Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
681
682 class ConvertPolicy (FormatPolicy):
683 """
684 A FormatPolicy which copies files in a particular format or converts if
685 necessary.
686 """
687 def __init__(me, format):
688 me._format = format
689
690 def setcategory(me, cat):
691 if me._format.CATEGORY is not cat:
692 raise ValueError, \
693 "Accept format `%s' has category `%s', not `%s'" % \
694 (me._format.__class__.__name__,
695 me._format.CATEGORY.name, cat.name)
696 me.cat = cat
697
698 def actions(me, master, targetdir, id, cohort):
699 if me._format.check(id):
700 return [CopyAction(master, targetdir)]
701 else:
702 return [ConvertAction(master, targetdir, id, me._format)]
703
704 Convert = K('convert') - Format
705 Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
706
707 Policy << (And | Or | Accept | Convert)
708
709 ###--------------------------------------------------------------------------
710 ### Audio handling, based on GStreamer.
711
712 def make_element(factory, name = None, **props):
713 "Return a new element from the FACTORY with the given NAME and PROPS."
714 elt = GS.element_factory_make(factory, name)
715 elt.set_properties(**props)
716 return elt
717
718 class GStreamerProgressEyecandy (ProgressEyecandy):
719 """
720 Provide amusement while GStreamer is busy doing something.
721
722 The GStreamerProgressEyecandy object is a context manager. Wrap it round
723 your GStreamer loop to provide progress information for an operation.
724 """
725
726 def __init__(me, what, elt, **kw):
727 """
728 Initialize a progress meter.
729
730 WHAT is a prefix string to be written before the progress eyecandy
731 itself. ELT is a GStreamer element to interrogate to find the progress
732 information.
733 """
734 me._elt = elt
735 ProgressEyecandy.__init__(me, what, **kw)
736
737 def _update(me):
738 "Called by GLib main event loop to update the eyecandy."
739 me.show()
740 return True
741
742 def _timer(me):
743 """
744 Update the progress meter.
745
746 This is called periodically by the GLib main event-processing loop.
747 """
748 me.show()
749 return True
750
751 def progress(me):
752 "Return the current progress as a pair (CURRENT, MAX)."
753
754 ## Fetch the current progress information. We get the duration each
755 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
756 ## duration can change as we progress. Hopefully it settles down fairly
757 ## soon.
758 try:
759 t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
760 end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
761 return t, end
762 except GS.QueryError:
763 return None, None
764
765 def __enter__(me):
766 "Enter context: attach progress meter display."
767
768 ## If we're not showing pointless frippery, don't bother at all.
769 if not STATUS.eyecandyp:
770 return
771
772 ## Update regularly. The pipeline runs asynchronously.
773 me._id = G.timeout_add(200, me._update)
774
775 def __exit__(me, ty, val, tb):
776 "Leave context: remove display and report completion or failure."
777
778 ## If we're not showing pointless frippery, there's nothing to remove.
779 if STATUS.eyecandyp:
780 G.source_remove(me._id)
781
782 ## Report completion anyway.
783 me.done(ty is None)
784
785 ## As you were.
786 return False
787
788 class AudioIdentifier (object):
789 """
790 Analyses and identifies an audio file.
791
792 Important properties are:
793
794 cap A capabilities structure describing the audio file data. The most
795 interesting thing in here is probably its name, which is a MIME
796 type describing the data.
797
798 dcap A capabilities structure describing the decoded audio data. This
799 is of interest during conversion.
800
801 tags A dictionary containing metadata tags from the file. These are in
802 GStreamer's encoding-independent format.
803
804 bitrate An approximation to the stream's bitrate, in kilobits per second.
805 This might be slow to work out for some files so it's computed on
806 demand.
807 """
808
809 def __init__(me, file, mime):
810 "Initialize the object suitably for identifying FILE."
811
812 ## Make some initial GStreamer objects. We'll want the pipeline later if
813 ## we need to analyse a poorly tagged MP3 stream, so save it away.
814 me._pipe = GS.Pipeline()
815 me._file = file
816 bus = me._pipe.get_bus()
817 bus.add_signal_watch()
818 loop = G.MainLoop()
819
820 ## The basic recognition kit is based around `decodebin'. We must keep
821 ## it happy by giving it sinks for the streams it's found, which it
822 ## announces asynchronously.
823 source = make_element('filesrc', 'file', location = file)
824 decoder = make_element('decodebin', 'decode')
825 sink = make_element('fakesink')
826 def decoder_pad_arrived(elt, pad):
827 if pad.get_caps()[0].get_name().startswith('audio/'):
828 elt.link_pads(pad.get_name(), sink, 'sink')
829 dpaid = decoder.connect('pad-added', decoder_pad_arrived)
830 me._pipe.add(source, decoder, sink)
831 GS.element_link_many(source, decoder)
832
833 ## Arrange to collect tags from the pipeline's bus as they're reported.
834 ## If we reuse the pipeline later, we'll want different bus-message
835 ## handling, so make sure we can take the signal handler away.
836 tags = {}
837 fail = []
838 def bus_message(bus, msg):
839 if msg.type == GS.MESSAGE_ERROR:
840 fail[:] = (ValueError, msg.structure['debug'], None)
841 loop.quit()
842 elif msg.type == GS.MESSAGE_STATE_CHANGED:
843 if msg.structure['new-state'] == GS.STATE_PAUSED and \
844 msg.src == me._pipe:
845 loop.quit()
846 elif msg.type == GS.MESSAGE_TAG:
847 tags.update(msg.structure)
848 bmid = bus.connect('message', bus_message)
849
850 ## We want to identify the kind of stream this is. (Hmm. The MIME type
851 ## recognizer has already done this work, but GStreamer is probably more
852 ## reliable.) The `decodebin' has a `typefind' element inside which will
853 ## announce the identified media type. All we need to do is find it and
854 ## attach a signal handler. (Note that the handler might be run in the
855 ## thread context of the pipeline element, but Python's GIL will keep
856 ## things from being too awful.)
857 me.cap = None
858 me.dcap = None
859 for e in decoder.elements():
860 if e.get_factory().get_name() == 'typefind':
861 tfelt = e
862 break
863 else:
864 assert False, 'failed to find typefind element'
865
866 ## Crank up most of the heavy machinery. The message handler will stop
867 ## the loop when things seem to be sufficiently well underway.
868 me._pipe.set_state(GS.STATE_PAUSED)
869 loop.run()
870 bus.disconnect(bmid)
871 decoder.disconnect(dpaid)
872 if fail:
873 me._pipe.set_state(GS.STATE_NULL)
874 raise fail[0], fail[1], fail[2]
875
876 ## Store the collected tags.
877 me.tags = tags
878
879 ## Gather the capabilities. The `typefind' element knows the input data
880 ## type. The 'decodebin' knows the raw data type.
881 me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
882 me.mime = set([mime, me.cap.get_name()])
883 me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
884
885 ## If we found a plausible bitrate then stash it. Otherwise note that we
886 ## failed. If anybody asks then we'll work it out then.
887 if 'nominal-bitrate' in tags:
888 me._bitrate = tags['nominal-bitrate']/1000
889 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
890 me._bitrate = tags['bitrate']/1000
891 else:
892 me._bitrate = None
893
894 ## The bitrate computation wants the file size. Ideally we'd want the
895 ## total size of the frames' contents, but that seems hard to dredge
896 ## out. If the framing overhead is small, this should be close enough
897 ## for our purposes.
898 me._bytes = OS.stat(file).st_size
899
900 def __del__(me):
901 "Close the pipeline down so we don't leak file descriptors."
902 me._pipe.set_state(GS.STATE_NULL)
903
904 @property
905 def bitrate(me):
906 """
907 Return the approximate bit-rate of the input file.
908
909 This might take a while if we have to work it out the hard way.
910 """
911
912 ## If we already know the answer then just return it.
913 if me._bitrate is not None:
914 return me._bitrate
915
916 ## Make up a new main loop.
917 loop = G.MainLoop()
918
919 ## Watch for bus messages. We'll stop when we reach the end of the
920 ## stream: then we'll have a clear idea of how long the track was.
921 fail = []
922 def bus_message(bus, msg):
923 if msg.type == GS.MESSAGE_ERROR:
924 fail[:] = (ValueError, msg.structure['debug'], None)
925 loop.quit()
926 elif msg.type == GS.MESSAGE_EOS:
927 loop.quit()
928 bus = me._pipe.get_bus()
929 bmid = bus.connect('message', bus_message)
930
931 ## Get everything moving, and keep the user amused while we work.
932 me._pipe.set_state(GS.STATE_PLAYING)
933 with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
934 me._pipe,
935 silentp = True):
936 loop.run()
937 bus.disconnect(bmid)
938 if fail:
939 me._pipe.set_state(GS.STATE_NULL)
940 raise fail[0], fail[1], fail[2]
941
942 ## Now we should be able to find out our position accurately and work out
943 ## a bitrate. Cache it in case anybody asks again.
944 t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
945 me._bitrate = int(8*me._bytes*1e6/t)
946
947 ## Done.
948 return me._bitrate
949
950 class AudioFormat (BaseFormat):
951 """
952 An AudioFormat is a kind of Format specialized for audio files.
953
954 Format checks are done on an AudioIdentifier object.
955 """
956
957 PROPS = prop('bitrate', Num)
958
959 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
960 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
961 ## depending on how thorough it's trying to be. Still, it doesn't do any
962 ## harm here; the main risk is picking up Ogg Theora files by accident, and
963 ## we'll probably be able to extract the audio from them anyway.
964 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
965 AudioIdentifier)
966
967 def __init__(me, bitrate = None):
968 "Construct an object, requiring an approximate bitrate."
969 me.bitrate = bitrate
970
971 def check(me, id):
972 """
973 Return whether the AudioIdentifier ID is suitable for our purposes.
974
975 Subclasses can either override this method or provide a property
976 `MIMETYPES', which is a list (other thing that implements `__contains__')
977 of GStreamer MIME types matching this format.
978 """
979 return id.mime & me.MIMETYPES and \
980 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
981
982 def encoder(me):
983 """
984 Constructs a GStreamer element to encode audio input.
985
986 Subclasses can either override this method (or replace `encode'
987 entirely), or provide a method `encoder_chain' which returns a list of
988 elements to be linked together in sequence. The first element in the
989 chain must have a pad named `sink' and the last must have a pad named
990 `src'.
991 """
992 elts = me.encoder_chain()
993 bin = GS.Bin()
994 bin.add(*elts)
995 GS.element_link_many(*elts)
996 bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
997 bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
998 return bin
999
1000 def convert(me, master, id, target):
1001 """
1002 Encode audio from MASTER, already identified as ID, writing it to TARGET.
1003
1004 See `encoder' for subclasses' responsibilities.
1005 """
1006
1007 ## Construct the necessary equipment.
1008 pipe = GS.Pipeline()
1009 bus = pipe.get_bus()
1010 bus.add_signal_watch()
1011 loop = G.MainLoop()
1012
1013 ## Make sure that there isn't anything in the way of our output. We're
1014 ## going to write to a scratch file so that we don't get confused by
1015 ## half-written rubbish left by a crashed program.
1016 new = target + '.new'
1017 try:
1018 OS.unlink(new)
1019 except OSError, err:
1020 if err.errno != E.ENOENT:
1021 raise
1022
1023 ## Piece together our pipeline. The annoying part is that the
1024 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1025 ## halves for now.
1026 source = make_element('filesrc', 'source', location = master)
1027 decoder = make_element('decodebin', 'decode')
1028 convert = make_element('audioconvert', 'convert')
1029 encoder = me.encoder()
1030 sink = make_element('filesink', 'sink', location = new)
1031 pipe.add(source, decoder, convert, encoder, sink)
1032 GS.element_link_many(source, decoder)
1033 GS.element_link_many(convert, encoder, sink)
1034
1035 ## Some decoders (e.g., the AC3 decoder) include channel-position
1036 ## indicators in their output caps. The Vorbis encoder interferes with
1037 ## this, and you end up with a beautifully encoded mono signal from a
1038 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1039 ## /think/ that this is only a problem with stereo signals: mono signals
1040 ## are mono already, and `vorbisenc' accepts channel positions if there
1041 ## are more than two channels.
1042 ##
1043 ## So we have this bodge. We already collected the decoded audio caps
1044 ## during identification. So if we see 2-channel audio with channel
1045 ## positions, we strip the positions off forcibly by adding a filter.
1046 if id.dcap.get_name().startswith('audio/x-raw-') and \
1047 id.dcap.has_field('channels') and \
1048 id.dcap['channels'] == 2 and \
1049 id.dcap.has_field('channel-positions'):
1050 dcap = GS.Caps()
1051 c = id.dcap.copy()
1052 c.remove_field('channel-positions')
1053 dcap.append(c)
1054 else:
1055 dcap = None
1056
1057 ## Hook onto the `decodebin' so we can link together the two halves of
1058 ## our encoding chain. For now, we'll hope that there's only one audio
1059 ## stream in there, and just throw everything else away.
1060 def decoder_pad_arrived(elt, pad):
1061 if pad.get_caps()[0].get_name().startswith('audio/'):
1062 if dcap:
1063 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1064 else:
1065 elt.link_pads(pad.get_name(), convert, 'sink')
1066 decoder.connect('pad-added', decoder_pad_arrived)
1067
1068 ## Watch the bus for completion messages.
1069 fail = []
1070 def bus_message(bus, msg):
1071 if msg.type == GS.MESSAGE_ERROR:
1072 fail[:] = (ValueError, msg.structure['debug'], None)
1073 loop.quit()
1074 elif msg.type == GS.MESSAGE_EOS:
1075 loop.quit()
1076 bmid = bus.connect('message', bus_message)
1077
1078 ## Get everything ready and let it go.
1079 pipe.set_state(GS.STATE_PLAYING)
1080 with GStreamerProgressEyecandy(filestatus(master,
1081 'convert to %s' % me.NAME),
1082 pipe):
1083 loop.run()
1084 pipe.set_state(GS.STATE_NULL)
1085 if fail:
1086 raise fail[0], fail[1], fail[2]
1087
1088 ## Fix up the output file if we have to.
1089 me.fixup(new)
1090
1091 ## We're done.
1092 OS.rename(new, target)
1093
1094 class OggVorbisFormat (AudioFormat):
1095 "AudioFormat object for Ogg Vorbis."
1096
1097 ## From http://en.wikipedia.org/wiki/Vorbis
1098 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1099 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1100 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1101
1102 NAME = 'Ogg Vorbis'
1103 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1104 'audio/x-vorbis+ogg'])
1105 EXT = 'ogg'
1106
1107 def encoder_chain(me):
1108 for q, br in me.QMAP:
1109 if br >= me.bitrate:
1110 break
1111 else:
1112 raise ValueError, 'no suitable quality setting found'
1113 return [make_element('vorbisenc',
1114 quality = q/10.0),
1115 make_element('oggmux')]
1116
1117 defformat('ogg-vorbis', OggVorbisFormat)
1118
1119 class MP3Format (AudioFormat):
1120 "AudioFormat object for MP3."
1121
1122 NAME = 'MP3'
1123 MIMETYPES = set(['audio/mpeg'])
1124 EXT = 'mp3'
1125
1126 def encoder_chain(me):
1127 return [make_element('lame',
1128 vbr_mean_bitrate = me.bitrate,
1129 vbr = 4),
1130 make_element('xingmux'),
1131 make_element('id3v2mux')]
1132
1133 def fixup(me, path):
1134 """
1135 Fix up MP3 files.
1136
1137 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1138 unkind to stupid players.
1139 """
1140 tag = E3.Tag()
1141 tag.link(path)
1142 tag.setTextEncoding(E3.UTF_8_ENCODING)
1143 try:
1144 tag.update(E3.ID3_V1_1)
1145 except (UnicodeEncodeError, E3.tag.GenreException):
1146 pass
1147
1148 defformat('mp3', MP3Format)
1149
1150 ###--------------------------------------------------------------------------
1151 ### Image handling, based on the Python Imaging Library.
1152
1153 class ImageIdentifier (object):
1154 """
1155 Analyses and identifies an image file.
1156
1157 Simply leaves an Image object in the `img' property which can be inspected.
1158 """
1159
1160 def __init__(me, file, mime):
1161
1162 ## Get PIL to open the file. It will magically work out what kind of
1163 ## file it is.
1164 try:
1165 me.img = I.open(file)
1166 except IOError, exc:
1167
1168 ## Unhelpful thing to raise on identification failure. We can
1169 ## distinguish this from an actual I/O error because it doesn't have an
1170 ## `errno'.
1171 if exc.errno is None:
1172 raise IdentificationFailure
1173 raise
1174
1175 me.mime = set([mime])
1176
1177 class ImageFormat (BaseFormat):
1178 """
1179 An ImageFormat is a kind of Format specialized for image files.
1180
1181 Subclasses don't need to provide anything other than the properties
1182 required by all concrete Format subclasses. However, there is a
1183 requirement that the `NAME' property match PIL's `format' name for the
1184 format.
1185 """
1186
1187 PROPS = prop('size', Num)
1188 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1189
1190 def __init__(me, size = None, **kw):
1191 """
1192 Initialize an ImageFormat object.
1193
1194 Additional keywords are used when encoding, and may be recognized by
1195 enhanced `check' methods in subclasses.
1196 """
1197 me._size = size
1198 me._props = kw
1199
1200 def check(me, id):
1201 "Check whether the ImageIdentifier ID matches our requirements."
1202 return id.img.format == me.NAME and \
1203 (me._size is None or
1204 (id.img.size[0] <= me._size and
1205 id.img.size[1] <= me._size))
1206
1207 def convert(me, master, id, target):
1208 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1209
1210 ## Write to a scratch file.
1211 new = target + '.new'
1212
1213 ## The ImageIdentifier already contains a copy of the open file. It
1214 ## would be wasteful not to use it.
1215 img = id.img
1216 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1217
1218 ## If there's a stated maximum size then scale the image down to match.
1219 ## But thumbnailing clobbers the original, so take a copy.
1220 if me._size is not None and \
1221 (img.size[0] > me._size or img.size[1] > me._size):
1222 img = img.copy()
1223 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1224
1225 ## Write the output image.
1226 img.save(new, me.NAME, **me._props)
1227
1228 ## Fix it up if necessary.
1229 me.fixup(new)
1230
1231 ## We're done.
1232 OS.rename(new, target)
1233 STATUS.commit()
1234
1235 class JPEGFormat (ImageFormat):
1236 """
1237 Image format for JPEG (actually JFIF) files.
1238
1239 Interesting properties to set:
1240
1241 optimize
1242 If present, take a second pass to select optimal encoder settings.
1243
1244 progression
1245 If present, make a progressive file.
1246
1247 quality Integer from 1--100 (worst to best); default is 75.
1248 """
1249 EXT = 'jpg'
1250 NAME = 'JPEG'
1251 PROPS = prop('optimize', None) \
1252 | prop('progressive', None, 'progression') \
1253 | prop('quality', Num)
1254
1255 defformat('jpeg', JPEGFormat)
1256
1257 class PNGFormat (ImageFormat):
1258 """
1259 Image format for PNG files.
1260
1261 Interesting properties:
1262
1263 optimize
1264 If present, make a special effort to minimize the output file.
1265 """
1266 EXT = 'png'
1267 NAME = 'PNG'
1268 PROPS = prop('optimize', None)
1269
1270 defformat('png', PNGFormat)
1271
1272 class BMPFormat (ImageFormat):
1273 """
1274 Image format for Windows BMP files, as used by RockBox.
1275
1276 No additional properties.
1277 """
1278 NAME = 'BMP'
1279 EXT = 'bmp'
1280
1281 defformat('bmp', BMPFormat)
1282
1283 ###--------------------------------------------------------------------------
1284 ### The directory grobbler.
1285
1286 class Grobbler (object):
1287 """
1288 The directory grobbler copies a directory tree, converting files.
1289 """
1290
1291 def __init__(me, policies, noact = False):
1292 """
1293 Create a new Grobbler, working with the given POLICIES.
1294 """
1295 me._pmap = {}
1296 me._noact = noact
1297 for p in policies:
1298 me._pmap.setdefault(p.cat, []).append(p)
1299 me._dirs = []
1300
1301 def _grobble_file(me, master, targetdir, cohorts):
1302 """
1303 Convert MASTER, writing the result to TARGETDIR.
1304
1305 The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is a
1306 list of (FILENAME, ID) pairs.
1307
1308 Since this function might convert the MASTER file, the caller doesn't
1309 know the name of the output files, so we return then as a list.
1310 """
1311
1312 done = set()
1313 st_m = OS.stat(master)
1314
1315 ## Work through each category listed and apply its policy.
1316 for cat, id, cohort in cohorts:
1317
1318 ## Go through the category's policies and see if any match. If we fail
1319 ## here, see if there are more categories to try.
1320 for pol in me._pmap[cat]:
1321 acts = pol.actions(master, targetdir, id, cohort)
1322 if acts: break
1323 else:
1324 continue
1325
1326 ## Work through the targets one by one.
1327 for a in acts:
1328 done.add(a.target)
1329
1330 ## Find out whether the target file already exists and is up-to-date
1331 ## with respect to the master. (Caution here with low-resolution
1332 ## timestamps.) If it's OK, then just move on.
1333 try:
1334 st_t = OS.stat(a.target)
1335 if st_m.st_mtime < st_t.st_mtime or \
1336 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1337 continue
1338 except OSError, err:
1339 if err.errno not in (E.ENOENT, E.ENOTDIR):
1340 raise
1341
1342 ## We have real work to do. If there's a current status message,
1343 ## it's the containing directory so flush it so that people know
1344 ## where we are.
1345 STATUS.commit()
1346
1347 ## Remove the target. (A hardlink will fail if the target already
1348 ## exists.)
1349 if not me._noact:
1350 try:
1351 OS.unlink(a.target)
1352 except OSError, err:
1353 if err.errno not in (E.ENOENT, E.ENOTDIR):
1354 raise
1355
1356 ## Do whatever it is we decided to do.
1357 if me._noact:
1358 STATUS.commit(filestatus(master, a))
1359 else:
1360 a.perform()
1361
1362 ## We're done. Return the names of the targets.
1363 return list(done)
1364
1365 @contextmanager
1366 def _wrap(me, masterfile):
1367 """
1368 Handle exceptions found while trying to convert a particular file or
1369 directory.
1370 """
1371
1372 try:
1373 yield masterfile
1374
1375 ## Something bad happened. Report the error, but continue. (This list
1376 ## of exceptions needs a lot of work.)
1377 except (IOError, OSError), exc:
1378 STATUS.clear()
1379 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
1380 me._broken.append((masterfile, exc))
1381
1382 def _grobble_dir(me, master, target):
1383 """
1384 Recursively convert files in MASTER, writing them to TARGET.
1385 """
1386
1387 ## Make sure the TARGET exists and is a directory. It's a fundamental
1388 ## assumption of this program that the entire TARGET tree is disposable,
1389 ## so if something exists but isn't a directory, we should kill it.
1390 if OS.path.isdir(target):
1391 pass
1392 else:
1393 if OS.path.exists(target):
1394 STATUS.commit(filestatus(target, 'clear nondirectory'))
1395 if not me._noact:
1396 OS.unlink(target)
1397 STATUS.commit(filestatus(target, 'create directory'))
1398 if not me._noact:
1399 OS.mkdir(target)
1400
1401 ## Keep a list of things in the target. As we convert files, we'll check
1402 ## them off. Anything left over is rubbish and needs to be deleted.
1403 checklist = {}
1404 try:
1405 for i in OS.listdir(target):
1406 checklist[i] = False
1407 except OSError, err:
1408 if err.errno not in (E.ENOENT, E.ENOTDIR):
1409 raise
1410
1411 ## Keep track of the files in each category.
1412 catmap = {}
1413 todo = []
1414 done = []
1415
1416 ## Work through the master files.
1417 for f in sorted(OS.listdir(master)):
1418
1419 ## If the killswitch has been pulled then stop. The whole idea is that
1420 ## we want to cause a clean shutdown if possible, so we don't want to
1421 ## do it in the middle of encoding because the encoding effort will
1422 ## have been wasted. This is the only place we need to check. If
1423 ## we've exited the loop, then clearing old files will probably be
1424 ## fast, and we'll either end up here when the recursive call returns
1425 ## or we'll be in the same boat as before, clearing old files, only up
1426 ## a level. If worst comes to worst, we'll be killed forcibly
1427 ## somewhere inside `SH.rmtree', and that can continue where it left
1428 ## off.
1429 if KILLSWITCH.is_set():
1430 return
1431
1432 ## Do something with the file.
1433 with me._wrap(OS.path.join(master, f)) as masterfile:
1434
1435 ## If it's a directory then grobble it recursively. Keep the user
1436 ## amused by telling him where we are in the tree.
1437 if OS.path.isdir(masterfile):
1438 me._dirs.append(f)
1439 STATUS.set('/'.join(me._dirs))
1440 try:
1441 done += me._grobble_dir(masterfile, OS.path.join(target, f))
1442 finally:
1443 me._dirs.pop()
1444 STATUS.set('/'.join(me._dirs))
1445
1446 ## Otherwise it's a file. Work out what kind, and stash it under
1447 ## the appropriate categories. Later, we'll apply policy to the
1448 ## files, by category, and work out what to do with them all.
1449 else:
1450 gf = GIO.File(masterfile)
1451 mime = gf.query_info('standard::content-type').get_content_type()
1452 cats = []
1453 for cat in me._pmap.iterkeys():
1454 id = cat.identify(masterfile, mime)
1455 if id is None: continue
1456 catmap.setdefault(cat, []).append((masterfile, id))
1457 cats.append((cat, id))
1458 if not cats:
1459 catmap.setdefault(None, []).append((masterfile, id))
1460 todo.append((masterfile, cats))
1461
1462 ## Work through the categorized files to see what actions to do for
1463 ## them.
1464 for masterfile, cats in todo:
1465 with me._wrap(masterfile):
1466 done += me._grobble_file(masterfile, target,
1467 [(cat, id, catmap[cat])
1468 for cat, id in cats])
1469
1470 ## Check the results off the list so that we don't clear it later.
1471 for f in done:
1472 checklist[OS.path.basename(f)] = True
1473
1474 ## Maybe there's stuff in the target which isn't accounted for. Delete
1475 ## it: either the master has changed, or the policy for this target has
1476 ## changed. Either way, the old files aren't wanted.
1477 for f in checklist:
1478 if not checklist[f]:
1479 STATUS.commit(filestatus(f, 'clear bogus file'))
1480 if not me._noact:
1481 bogus = OS.path.join(target, f)
1482 try:
1483 if OS.path.isdir(bogus):
1484 SH.rmtree(bogus)
1485 else:
1486 OS.unlink(bogus)
1487 except OSError, err:
1488 if err.errno != E.ENOENT:
1489 raise
1490
1491 ## Return the target name, so that it can be checked off.
1492 return [target]
1493
1494 def grobble(me, master, target):
1495 """
1496 Convert MASTER, writing a directory tree TARGET.
1497
1498 Returns a list of files which couldn't be converted.
1499 """
1500 try:
1501 me._broken = []
1502 me._grobble_dir(master, target)
1503 return me._broken
1504 finally:
1505 del me._broken
1506
1507 ###--------------------------------------------------------------------------
1508 ### Remaining parsing machinery.
1509
1510 Type = K('type') - Name - D('{') - R(Policy) - D('}')
1511 def build_type(s, l, t):
1512 try:
1513 cat = CATEGORYMAP[t[0]]
1514 except KeyError:
1515 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1516 pols = t[1]
1517 if len(pols) == 1: pol = pols[0]
1518 else: pol = AndPolicy(pols)
1519 pol.setcategory(cat)
1520 return pol
1521 Type.setParseAction(build_type)
1522
1523 TARGETS = []
1524 class TargetJob (object):
1525 def __init__(me, targetdir, policies):
1526 me.targetdir = targetdir
1527 me.policies = policies
1528 def perform(me):
1529 TARGETS.append(me)
1530
1531 Target = K('target') - String - D('{') - R(Type) - D('}')
1532 def build_target(s, l, t):
1533 return TargetJob(t[0], t[1])
1534 Target.setParseAction(build_target)
1535
1536 VARS = { 'master': None }
1537 class VarsJob (object):
1538 def __init__(me, vars):
1539 me.vars = vars
1540 def perform(me):
1541 for k, v in me.vars:
1542 VARS[k] = v
1543
1544 Var = prop('master', String)
1545 Vars = K('vars') - D('{') - R(Var) - D('}')
1546 def build_vars(s, l, t):
1547 return VarsJob(t[0])
1548 Vars.setParseAction(build_vars)
1549
1550 TopLevel = Vars | Target
1551 Config = R(TopLevel)
1552 Config.ignore(P.pythonStyleComment)
1553
1554 ###--------------------------------------------------------------------------
1555 ### Command-line interface.
1556
1557 QUIS = OS.path.basename(SYS.argv[0])
1558
1559 def moan(msg):
1560 "Report a warning message to the user."
1561 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1562
1563 def die(msg):
1564 "Report a fatal error message to the user."
1565 moan(msg)
1566 SYS.exit(1)
1567
1568 def parse_opts(args):
1569 """
1570 Parse command-line arguments in ARGS.
1571
1572 Returns a Grobbler object and the MASTER and TARGET directories to be
1573 grobbled.
1574 """
1575
1576 ## Build the option parser object.
1577 op = OP.OptionParser(prog = QUIS, version = VERSION,
1578 usage = '%prog [-t TIMEOUT] CONFIG',
1579 description = """\
1580 Convert a directory tree of files according to the configuration file
1581 CONFIG.
1582 """)
1583
1584 ## Timeout handling.
1585 def cb_time(opt, ostr, arg, op):
1586 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1587 if not m:
1588 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1589 t, u = m.groups()
1590 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1591 setattr(op.values, opt.dest, t)
1592 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1593 dest = 'timeout',
1594 help = 'stop processing nicely after SECS',
1595 action = 'callback', callback = cb_time)
1596 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1597 dest = 'timeout_nasty',
1598 help = 'stop processing unpleasantly after further SECS',
1599 action = 'callback', callback = cb_time)
1600
1601 ## Other options.
1602 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1603 help = 'provide progress information')
1604 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1605 help = 'don\'t actually modify the filesystem')
1606
1607 ## Ready to rock.
1608 op.set_defaults(formats = [], noact = False,
1609 timeout = None, timeout_nasty = 300)
1610 opts, args = op.parse_args(args)
1611
1612 ## Check that we got the non-option arguments that we want.
1613 if len(args) != 1:
1614 op.error('wrong number of arguments')
1615
1616 ## Act on the options.
1617 if opts.tty:
1618 STATUS.eyecandyp = True
1619 if opts.timeout is not None:
1620 to = TH.Thread(target = timeout,
1621 args = (opts.timeout, opts.timeout_nasty))
1622 to.daemon = True
1623 to.start()
1624
1625 ## Parse the configuration file.
1626 with open(args[0]) as conf:
1627 jobs, = Config.parseFile(conf, True)
1628 for j in jobs:
1629 j.perform()
1630
1631 return opts
1632
1633 if __name__ == '__main__':
1634 opts = parse_opts(SYS.argv[1:])
1635 if 'master' not in VARS:
1636 die("no master directory set")
1637 broken = []
1638 for t in TARGETS:
1639 g = Grobbler(t.policies, opts.noact)
1640 b = g.grobble(VARS['master'], t.targetdir)
1641 broken += b
1642 if broken:
1643 moan('failed to convert some files:')
1644 for file, exc in broken:
1645 moan('%s: %s' % (file, exc))
1646 SYS.exit(1)
1647
1648 ## This is basically a successful completion: we did what we were asked to
1649 ## do. It seems polite to report a message, though.
1650 ##
1651 ## Why don't we have a nonzero exit status? The idea would be that a
1652 ## calling script would be interested that we used up all of our time, and
1653 ## not attempt to convert some other directory as well. But that doesn't
1654 ## quite work. Such a script would need to account correctly for time we
1655 ## had spent even if we complete successfully. And if the script is having
1656 ## to watch the clock itself, it can do that without our help here.
1657 if KILLSWITCH.is_set():
1658 moan('killed by timeout')
1659
1660 ###----- That's all, folks --------------------------------------------------