gremlin/: Add a manpage! Approximately ready for release now.
[autoys] / gremlin / gremlin.in
1 #! @PYTHON@
2 ###
3 ### Convert a directory tree of audio files
4 ###
5 ### (c) 2010 Mark Wooding
6 ###
7
8 ###----- Licensing notice ---------------------------------------------------
9 ###
10 ### This program is free software; you can redistribute it and/or modify
11 ### it under the terms of the GNU General Public License as published by
12 ### the Free Software Foundation; either version 2 of the License, or
13 ### (at your option) any later version.
14 ###
15 ### This program is distributed in the hope that it will be useful,
16 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ### GNU General Public License for more details.
19 ###
20 ### You should have received a copy of the GNU General Public License
21 ### along with this program; if not, write to the Free Software Foundation,
22 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23
24 ###--------------------------------------------------------------------------
25 ### External dependencies.
26
27 ## Language features.
28 from __future__ import with_statement
29
30 ## Standard Python libraries.
31 import sys as SYS
32 import os as OS
33 import errno as E
34 import time as T
35 import unicodedata as UD
36 import fnmatch as FN
37 import re as RX
38 import shutil as SH
39 import optparse as OP
40 import threading as TH
41 import shlex as L
42 from math import sqrt
43 from contextlib import contextmanager
44
45 ## eyeD3 tag fettling.
46 import eyeD3 as E3
47
48 ## Gstreamer. It picks up command-line arguments -- most notably `--help' --
49 ## and processes them itself. Of course, its help is completely wrong. This
50 ## kludge is due to Jonas Wagner.
51 _argv, SYS.argv = SYS.argv, []
52 import gobject as G
53 import gio as GIO
54 import gst as GS
55 SYS.argv = _argv
56
57 ## Python Imaging.
58 from PIL import Image as I
59
60 ## Python parsing.
61 import pyparsing as P
62
63 ###--------------------------------------------------------------------------
64 ### Special initialization.
65
66 VERSION = '@VERSION@'
67
68 ## GLib.
69 G.threads_init()
70
71 ###--------------------------------------------------------------------------
72 ### Eyecandy progress reports.
73
74 def charwidth(s):
75 """
76 Return the width of S, in characters.
77
78 Specifically, this is the number of backspace characters required to
79 overprint the string S. If the current encoding for `stdout' appears to be
80 Unicode then do a complicated Unicode thing; otherwise assume that
81 characters take up one cell each.
82
83 None of this handles tab characters in any kind of useful way. Sorry.
84 """
85
86 ## If there's no encoding for stdout then we're doing something stupid.
87 if SYS.stdout.encoding is None: return len(s)
88
89 ## Turn the string into Unicode so we can hack on it properly. Maybe that
90 ## won't work out, in which case fall back to being stupid.
91 try: u = s.decode(SYS.stdout.encoding)
92 except UnicodeError: return len(s)
93
94 ## Our main problem is combining characters, but we should also try to
95 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
96 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
97 w = 0
98 for ch in u:
99 cd = ord(ch)
100 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
101 0x1160 <= cd <= 0x11ff: pass
102 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
103 else: w += 1
104
105 ## Done.
106 return w
107
108 class StatusLine (object):
109 """
110 Maintains a status line containing ephemeral progress information.
111
112 The status line isn't especially important, but it keeps interactive users
113 amused.
114
115 There should be only one status line object in your program; otherwise
116 they'll interfere with each other and get confused.
117
118 The update algorithm (in `set') is fairly careful to do the right thing
119 with long status `lines', and to work properly in an Emacs `shell' buffer.
120 """
121
122 def __init__(me):
123 "Initialize the status line."
124 me._last = ''
125 me._lastlen = 0
126 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
127
128 def set(me, line):
129 """
130 Set the status line contents to LINE, replacing what was there before.
131
132 This only produces actual output if stdout is interactive.
133 """
134 n = len(line)
135
136 ## Eyecandy update.
137 if me.eyecandyp:
138
139 ## If the old line was longer, we need to clobber its tail, so work out
140 ## what that involves.
141 if n < me._lastlen:
142 b = charwidth(me._last[n:])
143 pre = '\b'*b + ' '*b
144 else:
145 pre = ''
146
147 ## Now figure out the length of the common prefix between what we had
148 ## before and what we have now. This reduces the amount of I/O done,
149 ## which keeps network traffic down on SSH links, and keeps down the
150 ## amount of work slow terminal emulators like Emacs have to do.
151 i = 0
152 m = min(n, me._lastlen)
153 while i < m and line[i] == me._last[i]:
154 i += 1
155
156 ## Actually do the output, all in one syscall.
157 b = charwidth(me._last[i:])
158 SYS.stdout.write(pre + '\b'*b + line[i:])
159 SYS.stdout.flush()
160
161 ## Update our idea of what's gone on.
162 me._lastlen = n
163 me._last = line
164
165 def clear(me):
166 "Clear the status line. Just like set('')."
167 me.set('')
168
169 def commit(me, line = None):
170 """
171 Commit the current status line, and maybe the string LINE.
172
173 If the current status line is nonempty, then commit it to the transcript.
174 If LINE is not None, then commit that to the transcript too.
175
176 After all of this, we clear the status line to get back to a clean state.
177 """
178 if me._last:
179 if me.eyecandyp:
180 SYS.stdout.write('\n')
181 else:
182 SYS.stdout.write(me._last + '\n')
183 if line is not None:
184 SYS.stdout.write(line + '\n')
185 me._lastlen = 0
186 me._last = ''
187
188 STATUS = StatusLine()
189
190 def filestatus(file, status):
191 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
192
193 class ProgressEyecandy (object):
194 """
195 Provide amusement while something big and complicated is happening.
196
197 This is an abstract class. Subclasses must provide a method `progress'
198 returning a pair (CURRENT, MAX) indicating the current progress through the
199 operation.
200 """
201
202 def __init__(me, what, silentp = False):
203 """
204 Initialize a progress meter.
205
206 WHAT is a prefix string to be written before the progress eyecandy
207 itself.
208 """
209 me._what = what
210 me._silentp = silentp
211 me._spinner = 0
212 me._start = T.time()
213
214 def _fmt_time(me, t):
215 "Format T as a time, in (maybe hours) minutes and seconds."
216 s, t = t % 60, int(t/60)
217 m, h = t % 60, int(t/60)
218 if h > 0:
219 return '%d:%02d:%02d' % (h, m, s)
220 else:
221 return '%02d:%02d' % (m, s)
222
223 def show(me):
224 "Show the current level of progress."
225
226 ## If we're not showing pointless frippery, don't bother at all.
227 if not STATUS.eyecandyp:
228 return
229
230 ## Update the spinner index.
231 me._spinner = (me._spinner + 1)%4
232
233 ## Fetch the current progress information. Note that we always fetch
234 ## both the current and maximum levels, because both might change if an
235 ## operation revises its idea of how much work needs doing.
236 cur, max = me.progress()
237
238 ## If we couldn't get progress information, display something vaguely
239 ## amusing anyway.
240 if cur is None or max is None:
241 STATUS.set('%s %c [unknown progress]' %
242 (me._what, r'/-\|'[me._spinner]))
243 return
244
245 ## Work out -- well, guess -- the time remaining.
246 if cur:
247 t = T.time()
248 eta = me._fmt_time((t - me._start)*(max - cur)/cur)
249 else:
250 eta = '???'
251
252 ## Set the status bar.
253 n = 40*cur/max
254 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
255 (me._what,
256 r'/-\|'[me._spinner],
257 '='*n, ' '*(40 - n),
258 100*cur/max,
259 eta))
260
261 def done(me, win = True):
262 "Show a completion notice, or a failure if WIN is false."
263 if not win:
264 STATUS.set('%s FAILED!' % me._what)
265 elif not me._silentp:
266 STATUS.set('%s done (%s)' %
267 (me._what,
268 me._fmt_time(T.time() - me._start)))
269 else:
270 return
271 STATUS.commit()
272
273 ###--------------------------------------------------------------------------
274 ### Timeout handling.
275
276 KILLSWITCH = TH.Event()
277
278 def timeout(t0, t1):
279 T.sleep(t0)
280 KILLSWITCH.set()
281 T.sleep(t1)
282 moan('dying messily due to timeout')
283 OS._exit(3)
284
285 ###--------------------------------------------------------------------------
286 ### Parsing utilities.
287
288 ## Allow hyphens in identifiers.
289 IDCHARS = P.alphanums + '-_'
290 P.Keyword.setDefaultKeywordChars(IDCHARS)
291
292 ## Some common kinds of tokens.
293 Name = P.Word(IDCHARS)
294 Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
295 String = P.QuotedString('"', '\\')
296
297 ## Handy abbreviations for constructed parser elements.
298 def K(k): return P.Keyword(k).suppress()
299 def D(d): return P.Literal(d).suppress()
300 def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
301 O = P.Optional
302
303 ###--------------------------------------------------------------------------
304 ### Format identification and conversion.
305
306 class IdentificationFailure (Exception):
307 pass
308
309 class FileCategory (object):
310 """
311 A FileCategory represents a class of files.
312
313 For example, it's sensible to consider audio, or image files as a
314 category. A file category knows how to recognize member files from
315 MIME content types.
316 """
317
318 def __init__(me, name, mime_pats, ident):
319 """
320 Construct a new category.
321
322 The PATS are a list of `fnmatch' patterns to be compared with a MIME
323 type. The IDENT is a function which produces an identification object
324 given a file's name and first-guess MIME type. The object is passed to a
325 Format's `check' method to see whether a file needs re-encoding, and to
326 `convert' to assist with the conversion.
327
328 An identification object must have an attribute `mime' which is a set of
329 possible MIME types accumulated for the object.
330 """
331 me.name = name
332 me._mime_pats = mime_pats
333 me._ident = ident
334 CATEGORYMAP[name] = me
335
336 def identify(me, file, mime):
337 """
338 Attempt to identify FILE, given its apparent MIME type.
339
340 If identification succeeds, return an identification object which can be
341 used by associated file formats; otherwise return None.
342 """
343 for p in me._mime_pats:
344 if not FN.fnmatchcase(mime, p):
345 continue
346 try:
347 return me._ident(file, mime)
348 except IdentificationFailure:
349 pass
350 return None
351
352 class BaseFormat (object):
353 """
354 A BaseFormat object represents a particular encoding and parameters.
355
356 The object can verify (the `check' method) whether a particular file
357 matches its requirements, and if necessary (`encode') re-encode a file.
358
359 Subclasses should define the following methods.
360
361 check(ID)
362 Answer whether the file identified by ID is acceptable according to
363 the receiver's parameters.
364
365 convert(MASTER, ID, TARGET)
366 Convert the file MASTER, which has been identified as ID, according
367 to the receiver's parameters, writing the output to TARGET.
368
369 Subclasses should also provide these attributes.
370
371 CATEGORY
372 A FileCategory object for the category of files that this format
373 lives within.
374
375 EXT A file extension to be applied to encoded output files.
376
377 NAME A user-facing name for the format.
378
379 PROPS A parser element to parse a property definition. It should produce
380 a pair NAME, VALUE to be stored in a dictionary.
381
382 Subclasses for different kinds of file may introduce more subclass
383 protocol.
384 """
385
386 def fixup(me, path):
387 """Post-encoding fixups."""
388 pass
389
390 FORMATMAP = {}
391 CATEGORYMAP = {}
392
393 def defformat(name, cls):
394 "Define a format NAME using class CLS."
395 if not hasattr(cls, 'NAME'):
396 raise ValueError, 'abstract class'
397 if not hasattr(cls, 'CATEGORY'):
398 raise ValueError, 'no category'
399 FORMATMAP[name] = cls
400
401 class FormatParser (P.ParserElement):
402 """
403 Parse a format specifier:
404
405 format-spec ::= string [format-properties]
406 format-properties ::= `{' format-property (`,' format-property)* `}'
407
408 The syntax of a format-property is determined by the PROPS attribute on the
409 named format and its superclasses.
410 """
411
412 ## We cache the parser elements we generate to avoid enormous consing.
413 CACHE = {}
414
415 def parseImpl(me, s, loc, actp = True):
416
417 ## Firstly, determine the format name.
418 loc, r = Name._parse(s, loc, actp)
419 fmt = r[0]
420
421 ## Look up the format class.
422 try: fcls = FORMATMAP[fmt]
423 except KeyError:
424 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
425
426 ## Fetch the property-list parser from the cache, if possible; else
427 ## construct it.
428 try:
429 pp = me.CACHE[fmt]
430 except KeyError:
431 seen = set()
432 prop = None
433 for c in fcls.mro():
434 try: p = c.PROPS
435 except AttributeError: continue
436 if p in seen: continue
437 if prop is None: prop = p
438 else: prop |= p
439 seen.add(p)
440 if prop is None:
441 pp = me.CACHE[fmt] = None
442 else:
443 props = P.delimitedList(prop)
444 props.setParseAction(lambda s, l, t: dict(t.asList()))
445 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
446
447 ## Parse the properties.
448 if pp is None:
449 pd = {}
450 else:
451 loc, r = pp._parse(s, loc, actp)
452 if r: pd = r[0]
453 else: pd = {}
454
455 ## Construct the format object and return it.
456 return loc, fcls(**pd)
457
458 Format = FormatParser()
459
460 def prop(kw, pval, tag = None):
461 if tag is None: tag = kw
462 if pval is None:
463 p = K(kw)
464 p.setParseAction(lambda s, l, t: (tag, True))
465 else:
466 p = K(kw) + D('=') + pval
467 p.setParseAction(lambda s, l, t: (tag, t[0]))
468 return p
469
470 ###--------------------------------------------------------------------------
471 ### Policies and actions.
472
473 class Action (object):
474 """
475 An Action object represents a conversion action to be performed.
476
477 This class isn't intended to be instantiated directly. It exists to define
478 some protocol common to all Action objects.
479
480 Action objects have the following attributes.
481
482 master The name of the master (source) file.
483
484 target The name of the target (destination) file.
485
486 PRIORITY The priority of the action, for deciding which of two actions
487 to perform. Higher priorities are more likely to win.
488
489 Converting an Action to a string describes the action in a simple
490 user-readable manner. The `perform' method actually carries the action
491 out.
492 """
493
494 PRIORITY = 0
495
496 def __init__(me, master):
497 "Stash the MASTER file name for later."
498 me.master = master
499
500 def choose(me, him):
501 "Choose either ME or HIM and return one."
502 if him is None or me.PRIORITY > him.PRIORITY:
503 return me
504 else:
505 return him
506
507 class CopyAction (Action):
508 """
509 An Action object for simply copying a file.
510
511 Actually we try to hardlink it first, falling back to a copy later. This
512 is both faster and more efficient with regard to disk space.
513 """
514
515 ## Copying is good. Linking is really good, but we can't tell the
516 ## difference at this stage.
517 PRIORITY = 10
518
519 def __init__(me, master, targetdir):
520 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
521 Action.__init__(me, master)
522 me.target = OS.path.join(targetdir, OS.path.basename(master))
523
524 def __str__(me):
525 return 'copy/link'
526
527 def perform(me):
528 "Actually perform a CopyAction."
529 try:
530 STATUS.set(filestatus(me.master, 'link'))
531 OS.link(me.master, me.target)
532 except OSError, err:
533 if err.errno != E.EXDEV:
534 raise
535 STATUS.set(filestatus(me.master, 'copy'))
536 new = me.target + '.new'
537 SH.copyfile(me.master, new)
538 OS.rename(new, me.target)
539 STATUS.commit()
540
541 class ConvertAction (Action):
542 """
543 An Action object for converting a file to a given format.
544
545 Additional attributes:
546
547 id The identification object for the master file.
548
549 format The format to which we're meant to conver the master.
550 """
551
552 def __init__(me, master, targetdir, id, format):
553 "Initialize a ConvertAction."
554 Action.__init__(me, master)
555 stem, ext = OS.path.splitext(OS.path.basename(master))
556 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
557 me.id = id
558 me.format = format
559
560 def __str__(me):
561 return 'convert to %s' % me.format.NAME
562
563 def perform(me):
564 "Acually perform a ConvertAction."
565 STATUS.set(filestatus(me.master, me))
566 me.format.convert(me.master, me.id, me.target)
567
568 Policy = P.Forward()
569
570 class FormatPolicy (object):
571 """
572 A FormatPolicy object represents a set of rules for how to convert files.
573
574 Given a master file, the FormatPolicy will identify it and return a list of
575 actions to be performed. The methods required of a FormatPolicy are:
576
577 setcategory(CAT)
578 Store CAT as the policy's category. Check that this is consistent
579 with the policy as stored.
580
581 actions(MASTER, TARGETDIR, ID, COHORT)
582 Given a MASTER file, identified as ID, a target directory
583 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
584 of the same category in the same directory, return a list of
585 actions to be performed to get the target directory into the right
586 form. The list might be empty if the policy object /rejects/ the
587 file.
588 """
589
590 class AndPolicy (FormatPolicy):
591 """
592 A FormatPolicy which does the union of a bunch of other policies.
593
594 Each subsidiary policy is invoked in turn. The highest-priority action for
595 each target file is returned.
596 """
597
598 def __init__(me, policies):
599 me._policies = policies
600
601 def setcategory(me, cat):
602 me.cat = cat
603 for p in me._policies:
604 p.setcategory(cat)
605
606 def actions(me, master, targetdir, id, cohort):
607 tmap = {}
608 for p in me._policies:
609 for a in p.actions(master, targetdir, id, cohort):
610 if a.target in tmap:
611 tmap[a.target] = a.choose(tmap.get(a.target))
612 else:
613 tmap[a.target] = a
614 return tmap.values()
615
616 And = K('and') - D('{') - R(Policy) - D('}')
617 And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
618
619 class OrPolicy (FormatPolicy):
620 """
621 A FormatPolicy which tries other policies and uses the first that accepts.
622
623 Each subsidiary policy is invoked in turn. If any accepts, the actions it
624 proposes are turned and no further policies are invoked. If none accepts
625 then the file is rejected.
626 """
627
628 def __init__(me, policies):
629 me._policies = policies
630
631 def setcategory(me, cat):
632 me.cat = cat
633 for p in me._policies:
634 p.setcategory(cat)
635
636 def actions(me, master, targetdir, id, cohort):
637 for p in me._policies:
638 aa = p.actions(master, targetdir, id, cohort)
639 if aa:
640 return aa
641 else:
642 return []
643
644 Or = K('or') - D('{') - R(Policy) - D('}')
645 Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
646
647 class AcceptPolicy (FormatPolicy):
648 """
649 A FormatPolicy which copies files in a particular format.
650
651 If all of the files in a cohort are recognized as being in a particular
652 format (including this one), then accept it with a CopyAction; otherwise
653 reject.
654 """
655
656 def __init__(me, format):
657 me._format = format
658
659 def setcategory(me, cat):
660 if me._format.CATEGORY is not cat:
661 raise ValueError, \
662 "Accept format `%s' has category `%s', not `%s'" % \
663 (me._format.__class__.__name__,
664 me._format.CATEGORY.name, cat.name)
665 me.cat = cat
666
667 def actions(me, master, targetdir, id, cohort):
668 if me._format.check(id) and \
669 all(me._format.check(cid) for f, cid in cohort):
670 return [CopyAction(master, targetdir)]
671 else:
672 return []
673
674 Accept = K('accept') - Format
675 Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
676
677 class ConvertPolicy (FormatPolicy):
678 """
679 A FormatPolicy which copies files in a particular format or converts if
680 necessary.
681 """
682 def __init__(me, format):
683 me._format = format
684
685 def setcategory(me, cat):
686 if me._format.CATEGORY is not cat:
687 raise ValueError, \
688 "Accept format `%s' has category `%s', not `%s'" % \
689 (me._format.__class__.__name__,
690 me._format.CATEGORY.name, cat.name)
691 me.cat = cat
692
693 def actions(me, master, targetdir, id, cohort):
694 if me._format.check(id):
695 return [CopyAction(master, targetdir)]
696 else:
697 return [ConvertAction(master, targetdir, id, me._format)]
698
699 Convert = K('convert') - Format
700 Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
701
702 Policy << (And | Or | Accept | Convert)
703
704 ###--------------------------------------------------------------------------
705 ### Audio handling, based on GStreamer.
706
707 def make_element(factory, name = None, **props):
708 "Return a new element from the FACTORY with the given NAME and PROPS."
709 elt = GS.element_factory_make(factory, name)
710 elt.set_properties(**props)
711 return elt
712
713 class GStreamerProgressEyecandy (ProgressEyecandy):
714 """
715 Provide amusement while GStreamer is busy doing something.
716
717 The GStreamerProgressEyecandy object is a context manager. Wrap it round
718 your GStreamer loop to provide progress information for an operation.
719 """
720
721 def __init__(me, what, elt, **kw):
722 """
723 Initialize a progress meter.
724
725 WHAT is a prefix string to be written before the progress eyecandy
726 itself. ELT is a GStreamer element to interrogate to find the progress
727 information.
728 """
729 me._elt = elt
730 ProgressEyecandy.__init__(me, what, **kw)
731
732 def _update(me):
733 "Called by GLib main event loop to update the eyecandy."
734 me.show()
735 return True
736
737 def _timer(me):
738 """
739 Update the progress meter.
740
741 This is called periodically by the GLib main event-processing loop.
742 """
743 me.show()
744 return True
745
746 def progress(me):
747 "Return the current progress as a pair (CURRENT, MAX)."
748
749 ## Fetch the current progress information. We get the duration each
750 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
751 ## duration can change as we progress. Hopefully it settles down fairly
752 ## soon.
753 try:
754 t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
755 end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
756 return t, end
757 except GS.QueryError:
758 return None, None
759
760 def __enter__(me):
761 "Enter context: attach progress meter display."
762
763 ## If we're not showing pointless frippery, don't bother at all.
764 if not STATUS.eyecandyp:
765 return
766
767 ## Update regularly. The pipeline runs asynchronously.
768 me._id = G.timeout_add(200, me._update)
769
770 def __exit__(me, ty, val, tb):
771 "Leave context: remove display and report completion or failure."
772
773 ## If we're not showing pointless frippery, there's nothing to remove.
774 if STATUS.eyecandyp:
775 G.source_remove(me._id)
776
777 ## Report completion anyway.
778 me.done(ty is None)
779
780 ## As you were.
781 return False
782
783 class AudioIdentifier (object):
784 """
785 Analyses and identifies an audio file.
786
787 Important properties are:
788
789 cap A capabilities structure describing the audio file data. The most
790 interesting thing in here is probably its name, which is a MIME
791 type describing the data.
792
793 dcap A capabilities structure describing the decoded audio data. This
794 is of interest during conversion.
795
796 tags A dictionary containing metadata tags from the file. These are in
797 GStreamer's encoding-independent format.
798
799 bitrate An approximation to the stream's bitrate, in kilobits per second.
800 This might be slow to work out for some files so it's computed on
801 demand.
802 """
803
804 def __init__(me, file, mime):
805 "Initialize the object suitably for identifying FILE."
806
807 ## Make some initial GStreamer objects. We'll want the pipeline later if
808 ## we need to analyse a poorly tagged MP3 stream, so save it away.
809 me._pipe = GS.Pipeline()
810 me._file = file
811 bus = me._pipe.get_bus()
812 bus.add_signal_watch()
813 loop = G.MainLoop()
814
815 ## The basic recognition kit is based around `decodebin'. We must keep
816 ## it happy by giving it sinks for the streams it's found, which it
817 ## announces asynchronously.
818 source = make_element('filesrc', 'file', location = file)
819 decoder = make_element('decodebin', 'decode')
820 sink = make_element('fakesink')
821 def decoder_pad_arrived(elt, pad):
822 if pad.get_caps()[0].get_name().startswith('audio/'):
823 elt.link_pads(pad.get_name(), sink, 'sink')
824 dpaid = decoder.connect('pad-added', decoder_pad_arrived)
825 me._pipe.add(source, decoder, sink)
826 GS.element_link_many(source, decoder)
827
828 ## Arrange to collect tags from the pipeline's bus as they're reported.
829 ## If we reuse the pipeline later, we'll want different bus-message
830 ## handling, so make sure we can take the signal handler away.
831 tags = {}
832 fail = []
833 def bus_message(bus, msg):
834 if msg.type == GS.MESSAGE_ERROR:
835 fail[:] = (ValueError, msg.structure['debug'], None)
836 loop.quit()
837 elif msg.type == GS.MESSAGE_STATE_CHANGED:
838 if msg.structure['new-state'] == GS.STATE_PAUSED and \
839 msg.src == me._pipe:
840 loop.quit()
841 elif msg.type == GS.MESSAGE_TAG:
842 tags.update(msg.structure)
843 bmid = bus.connect('message', bus_message)
844
845 ## We want to identify the kind of stream this is. (Hmm. The MIME type
846 ## recognizer has already done this work, but GStreamer is probably more
847 ## reliable.) The `decodebin' has a `typefind' element inside which will
848 ## announce the identified media type. All we need to do is find it and
849 ## attach a signal handler. (Note that the handler might be run in the
850 ## thread context of the pipeline element, but Python's GIL will keep
851 ## things from being too awful.)
852 me.cap = None
853 me.dcap = None
854 for e in decoder.elements():
855 if e.get_factory().get_name() == 'typefind':
856 tfelt = e
857 break
858 else:
859 assert False, 'failed to find typefind element'
860
861 ## Crank up most of the heavy machinery. The message handler will stop
862 ## the loop when things seem to be sufficiently well underway.
863 me._pipe.set_state(GS.STATE_PAUSED)
864 loop.run()
865 bus.disconnect(bmid)
866 decoder.disconnect(dpaid)
867 if fail:
868 me._pipe.set_state(GS.STATE_NULL)
869 raise fail[0], fail[1], fail[2]
870
871 ## Store the collected tags.
872 me.tags = tags
873
874 ## Gather the capabilities. The `typefind' element knows the input data
875 ## type. The 'decodebin' knows the raw data type.
876 me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
877 me.mime = set([mime, me.cap.get_name()])
878 me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
879
880 ## If we found a plausible bitrate then stash it. Otherwise note that we
881 ## failed. If anybody asks then we'll work it out then.
882 if 'nominal-bitrate' in tags:
883 me._bitrate = tags['nominal-bitrate']/1000
884 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
885 me._bitrate = tags['bitrate']/1000
886 else:
887 me._bitrate = None
888
889 ## The bitrate computation wants the file size. Ideally we'd want the
890 ## total size of the frames' contents, but that seems hard to dredge
891 ## out. If the framing overhead is small, this should be close enough
892 ## for our purposes.
893 me._bytes = OS.stat(file).st_size
894
895 def __del__(me):
896 "Close the pipeline down so we don't leak file descriptors."
897 me._pipe.set_state(GS.STATE_NULL)
898
899 @property
900 def bitrate(me):
901 """
902 Return the approximate bit-rate of the input file.
903
904 This might take a while if we have to work it out the hard way.
905 """
906
907 ## If we already know the answer then just return it.
908 if me._bitrate is not None:
909 return me._bitrate
910
911 ## Make up a new main loop.
912 loop = G.MainLoop()
913
914 ## Watch for bus messages. We'll stop when we reach the end of the
915 ## stream: then we'll have a clear idea of how long the track was.
916 fail = []
917 def bus_message(bus, msg):
918 if msg.type == GS.MESSAGE_ERROR:
919 fail[:] = (ValueError, msg.structure['debug'], None)
920 loop.quit()
921 elif msg.type == GS.MESSAGE_EOS:
922 loop.quit()
923 bus = me._pipe.get_bus()
924 bmid = bus.connect('message', bus_message)
925
926 ## Get everything moving, and keep the user amused while we work.
927 me._pipe.set_state(GS.STATE_PLAYING)
928 with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
929 me._pipe,
930 silentp = True):
931 loop.run()
932 bus.disconnect(bmid)
933 if fail:
934 me._pipe.set_state(GS.STATE_NULL)
935 raise fail[0], fail[1], fail[2]
936
937 ## Now we should be able to find out our position accurately and work out
938 ## a bitrate. Cache it in case anybody asks again.
939 t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
940 me._bitrate = int(8*me._bytes*1e6/t)
941
942 ## Done.
943 return me._bitrate
944
945 class AudioFormat (BaseFormat):
946 """
947 An AudioFormat is a kind of Format specialized for audio files.
948
949 Format checks are done on an AudioIdentifier object.
950 """
951
952 PROPS = prop('bitrate', Num)
953
954 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
955 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
956 ## depending on how thorough it's trying to be. Still, it doesn't do any
957 ## harm here; the main risk is picking up Ogg Theora files by accident, and
958 ## we'll probably be able to extract the audio from them anyway.
959 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
960 AudioIdentifier)
961
962 def __init__(me, bitrate = None):
963 "Construct an object, requiring an approximate bitrate."
964 me.bitrate = bitrate
965
966 def check(me, id):
967 """
968 Return whether the AudioIdentifier ID is suitable for our purposes.
969
970 Subclasses can either override this method or provide a property
971 `MIMETYPES', which is a list (other thing that implements `__contains__')
972 of GStreamer MIME types matching this format.
973 """
974 return id.mime & me.MIMETYPES and \
975 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
976
977 def encoder(me):
978 """
979 Constructs a GStreamer element to encode audio input.
980
981 Subclasses can either override this method (or replace `encode'
982 entirely), or provide a method `encoder_chain' which returns a list of
983 elements to be linked together in sequence. The first element in the
984 chain must have a pad named `sink' and the last must have a pad named
985 `src'.
986 """
987 elts = me.encoder_chain()
988 bin = GS.Bin()
989 bin.add(*elts)
990 GS.element_link_many(*elts)
991 bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
992 bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
993 return bin
994
995 def convert(me, master, id, target):
996 """
997 Encode audio from MASTER, already identified as ID, writing it to TARGET.
998
999 See `encoder' for subclasses' responsibilities.
1000 """
1001
1002 ## Construct the necessary equipment.
1003 pipe = GS.Pipeline()
1004 bus = pipe.get_bus()
1005 bus.add_signal_watch()
1006 loop = G.MainLoop()
1007
1008 ## Make sure that there isn't anything in the way of our output. We're
1009 ## going to write to a scratch file so that we don't get confused by
1010 ## half-written rubbish left by a crashed program.
1011 new = target + '.new'
1012 try:
1013 OS.unlink(new)
1014 except OSError, err:
1015 if err.errno != E.ENOENT:
1016 raise
1017
1018 ## Piece together our pipeline. The annoying part is that the
1019 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1020 ## halves for now.
1021 source = make_element('filesrc', 'source', location = master)
1022 decoder = make_element('decodebin', 'decode')
1023 convert = make_element('audioconvert', 'convert')
1024 encoder = me.encoder()
1025 sink = make_element('filesink', 'sink', location = new)
1026 pipe.add(source, decoder, convert, encoder, sink)
1027 GS.element_link_many(source, decoder)
1028 GS.element_link_many(convert, encoder, sink)
1029
1030 ## Some decoders (e.g., the AC3 decoder) include channel-position
1031 ## indicators in their output caps. The Vorbis encoder interferes with
1032 ## this, and you end up with a beautifully encoded mono signal from a
1033 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1034 ## /think/ that this is only a problem with stereo signals: mono signals
1035 ## are mono already, and `vorbisenc' accepts channel positions if there
1036 ## are more than two channels.
1037 ##
1038 ## So we have this bodge. We already collected the decoded audio caps
1039 ## during identification. So if we see 2-channel audio with channel
1040 ## positions, we strip the positions off forcibly by adding a filter.
1041 if id.dcap.get_name().startswith('audio/x-raw-') and \
1042 id.dcap.has_field('channels') and \
1043 id.dcap['channels'] == 2 and \
1044 id.dcap.has_field('channel-positions'):
1045 dcap = GS.Caps()
1046 c = id.dcap.copy()
1047 c.remove_field('channel-positions')
1048 dcap.append(c)
1049 else:
1050 dcap = None
1051
1052 ## Hook onto the `decodebin' so we can link together the two halves of
1053 ## our encoding chain. For now, we'll hope that there's only one audio
1054 ## stream in there, and just throw everything else away.
1055 def decoder_pad_arrived(elt, pad):
1056 if pad.get_caps()[0].get_name().startswith('audio/'):
1057 if dcap:
1058 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1059 else:
1060 elt.link_pads(pad.get_name(), convert, 'sink')
1061 decoder.connect('pad-added', decoder_pad_arrived)
1062
1063 ## Watch the bus for completion messages.
1064 fail = []
1065 def bus_message(bus, msg):
1066 if msg.type == GS.MESSAGE_ERROR:
1067 fail[:] = (ValueError, msg.structure['debug'], None)
1068 loop.quit()
1069 elif msg.type == GS.MESSAGE_EOS:
1070 loop.quit()
1071 bmid = bus.connect('message', bus_message)
1072
1073 ## Get everything ready and let it go.
1074 pipe.set_state(GS.STATE_PLAYING)
1075 with GStreamerProgressEyecandy(filestatus(master,
1076 'convert to %s' % me.NAME),
1077 pipe):
1078 loop.run()
1079 pipe.set_state(GS.STATE_NULL)
1080 if fail:
1081 raise fail[0], fail[1], fail[2]
1082
1083 ## Fix up the output file if we have to.
1084 me.fixup(new)
1085
1086 ## We're done.
1087 OS.rename(new, target)
1088
1089 class OggVorbisFormat (AudioFormat):
1090 "AudioFormat object for Ogg Vorbis."
1091
1092 ## From https://en.wikipedia.org/wiki/Vorbis
1093 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1094 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1095 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1096
1097 NAME = 'Ogg Vorbis'
1098 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1099 'audio/x-vorbis+ogg'])
1100 EXT = 'ogg'
1101
1102 def encoder_chain(me):
1103 encprops = {}
1104 if me.bitrate is not None:
1105 for q, br in me.QMAP:
1106 if br >= me.bitrate:
1107 break
1108 else:
1109 raise ValueError, 'no suitable quality setting found'
1110 encprops['quality'] = q/10.0
1111 return [make_element('vorbisenc', **encprops),
1112 make_element('oggmux')]
1113
1114 defformat('ogg-vorbis', OggVorbisFormat)
1115
1116 class MP3Format (AudioFormat):
1117 "AudioFormat object for MP3."
1118
1119 NAME = 'MP3'
1120 MIMETYPES = set(['audio/mpeg'])
1121 EXT = 'mp3'
1122
1123 def encoder_chain(me):
1124 encprops = {}
1125 if me.bitrate is not None: encprops['vbr_mean_bitrate'] = me.bitrate
1126 return [make_element('lame', vbr = 4, **encprops),
1127 make_element('xingmux'),
1128 make_element('id3v2mux')]
1129
1130 def fixup(me, path):
1131 """
1132 Fix up MP3 files.
1133
1134 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1135 unkind to stupid players.
1136 """
1137 tag = E3.Tag()
1138 tag.link(path)
1139 tag.setTextEncoding(E3.UTF_8_ENCODING)
1140 try:
1141 tag.update(E3.ID3_V1_1)
1142 except (UnicodeEncodeError, E3.tag.GenreException):
1143 pass
1144
1145 defformat('mp3', MP3Format)
1146
1147 ###--------------------------------------------------------------------------
1148 ### Image handling, based on the Python Imaging Library.
1149
1150 class ImageIdentifier (object):
1151 """
1152 Analyses and identifies an image file.
1153
1154 Simply leaves an Image object in the `img' property which can be inspected.
1155 """
1156
1157 def __init__(me, file, mime):
1158
1159 ## Get PIL to open the file. It will magically work out what kind of
1160 ## file it is.
1161 try:
1162 me.img = I.open(file)
1163 except IOError, exc:
1164
1165 ## Unhelpful thing to raise on identification failure. We can
1166 ## distinguish this from an actual I/O error because it doesn't have an
1167 ## `errno'.
1168 if exc.errno is None:
1169 raise IdentificationFailure
1170 raise
1171
1172 me.mime = set([mime])
1173
1174 class ImageFormat (BaseFormat):
1175 """
1176 An ImageFormat is a kind of Format specialized for image files.
1177
1178 Subclasses don't need to provide anything other than the properties
1179 required by all concrete Format subclasses. However, there is a
1180 requirement that the `NAME' property match PIL's `format' name for the
1181 format.
1182 """
1183
1184 PROPS = prop('size', Num)
1185 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1186
1187 def __init__(me, size = None, **kw):
1188 """
1189 Initialize an ImageFormat object.
1190
1191 Additional keywords are used when encoding, and may be recognized by
1192 enhanced `check' methods in subclasses.
1193 """
1194 me._size = size
1195 me._props = kw
1196
1197 def check(me, id):
1198 "Check whether the ImageIdentifier ID matches our requirements."
1199 return id.img.format == me.NAME and \
1200 (me._size is None or
1201 (id.img.size[0] <= me._size and
1202 id.img.size[1] <= me._size))
1203
1204 def convert(me, master, id, target):
1205 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1206
1207 ## Write to a scratch file.
1208 new = target + '.new'
1209
1210 ## The ImageIdentifier already contains a copy of the open file. It
1211 ## would be wasteful not to use it.
1212 img = id.img
1213 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1214
1215 ## If there's a stated maximum size then scale the image down to match.
1216 ## But thumbnailing clobbers the original, so take a copy.
1217 if me._size is not None and \
1218 (img.size[0] > me._size or img.size[1] > me._size):
1219 img = img.copy()
1220 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1221
1222 ## Write the output image.
1223 img.save(new, me.NAME, **me._props)
1224
1225 ## Fix it up if necessary.
1226 me.fixup(new)
1227
1228 ## We're done.
1229 OS.rename(new, target)
1230 STATUS.commit()
1231
1232 class JPEGFormat (ImageFormat):
1233 """
1234 Image format for JPEG (actually JFIF) files.
1235
1236 Interesting properties to set:
1237
1238 optimize
1239 If present, take a second pass to select optimal encoder settings.
1240
1241 progressive
1242 If present, make a progressive file.
1243
1244 quality Integer from 1--100 (worst to best); default is 75.
1245 """
1246 EXT = 'jpg'
1247 NAME = 'JPEG'
1248 PROPS = prop('optimize', None) \
1249 | prop('progressive', None, 'progression') \
1250 | prop('quality', Num)
1251
1252 defformat('jpeg', JPEGFormat)
1253
1254 class PNGFormat (ImageFormat):
1255 """
1256 Image format for PNG files.
1257
1258 Interesting properties:
1259
1260 optimize
1261 If present, make a special effort to minimize the output file.
1262 """
1263 EXT = 'png'
1264 NAME = 'PNG'
1265 PROPS = prop('optimize', None)
1266
1267 defformat('png', PNGFormat)
1268
1269 class BMPFormat (ImageFormat):
1270 """
1271 Image format for Windows BMP files, as used by RockBox.
1272
1273 No additional properties.
1274 """
1275 NAME = 'BMP'
1276 EXT = 'bmp'
1277
1278 defformat('bmp', BMPFormat)
1279
1280 ###--------------------------------------------------------------------------
1281 ### Remaining parsing machinery.
1282
1283 Type = K('type') - Name - D('{') - R(Policy) - D('}')
1284 def build_type(s, l, t):
1285 try:
1286 cat = CATEGORYMAP[t[0]]
1287 except KeyError:
1288 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1289 pols = t[1]
1290 if len(pols) == 1: pol = pols[0]
1291 else: pol = AndPolicy(pols)
1292 pol.setcategory(cat)
1293 return pol
1294 Type.setParseAction(build_type)
1295
1296 TARGETS = []
1297 class TargetJob (object):
1298 def __init__(me, targetdir, policies):
1299 me.targetdir = targetdir
1300 me.policies = policies
1301 def perform(me):
1302 TARGETS.append(me)
1303
1304 Target = K('target') - String - D('{') - R(Type) - D('}')
1305 def build_target(s, l, t):
1306 return TargetJob(t[0], t[1])
1307 Target.setParseAction(build_target)
1308
1309 VARS = { 'master': None }
1310 class VarsJob (object):
1311 def __init__(me, vars):
1312 me.vars = vars
1313 def perform(me):
1314 for k, v in me.vars:
1315 VARS[k] = v
1316
1317 Var = prop('master', String)
1318 Vars = K('vars') - D('{') - R(Var) - D('}')
1319 def build_vars(s, l, t):
1320 return VarsJob(t[0])
1321 Vars.setParseAction(build_vars)
1322
1323 TopLevel = Vars | Target
1324 Config = R(TopLevel)
1325 Config.ignore(P.pythonStyleComment)
1326
1327 ###--------------------------------------------------------------------------
1328 ### The directory grobbler.
1329
1330 def grobble(master, targets, noact = False):
1331 """
1332 Work through the MASTER directory, writing converted files to TARGETS.
1333
1334 The TARGETS are a list of `TargetJob' objects, each describing a target
1335 directory and a policy to apply to it.
1336
1337 If NOACT is true, then don't actually do anything permanent to the
1338 filesystem.
1339 """
1340
1341 ## Transform the targets into a more convenient data structure.
1342 tpolmap = []
1343 for t in targets:
1344 pmap = {}
1345 tpolmap.append(pmap)
1346 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
1347
1348 ## Keep track of the current position in the master tree.
1349 dirs = []
1350
1351 ## And the files which haven't worked.
1352 broken = []
1353
1354 def grobble_file(master, pmap, targetdir, cohorts):
1355 ## Convert MASTER, writing the result to TARGETDIR.
1356 ##
1357 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1358 ## a list of (FILENAME, ID) pairs.
1359 ##
1360 ## Since this function might convert the MASTER file, the caller doesn't
1361 ## know the name of the output files, so we return then as a list.
1362
1363 done = set()
1364 st_m = OS.stat(master)
1365
1366 ## Work through each category listed and apply its policy.
1367 for cat, id, cohort in cohorts:
1368
1369 ## Go through the category's policies and see if any match. If we fail
1370 ## here, see if there are more categories to try.
1371 for pol in pmap[cat]:
1372 acts = pol.actions(master, targetdir, id, cohort)
1373 if acts: break
1374 else:
1375 continue
1376
1377 ## Work through the targets one by one.
1378 for a in acts:
1379 done.add(a.target)
1380
1381 ## Find out whether the target file already exists and is up-to-date
1382 ## with respect to the master. (Caution here with low-resolution
1383 ## timestamps.) If it's OK, then just move on.
1384 try:
1385 st_t = OS.stat(a.target)
1386 if st_m.st_mtime < st_t.st_mtime or \
1387 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1388 continue
1389 except OSError, err:
1390 if err.errno not in (E.ENOENT, E.ENOTDIR):
1391 raise
1392
1393 ## We have real work to do. If there's a current status message,
1394 ## it's the containing directory so flush it so that people know
1395 ## where we are.
1396 STATUS.commit()
1397
1398 ## Remove the target. (A hardlink will fail if the target already
1399 ## exists.)
1400 if not noact:
1401 try:
1402 OS.unlink(a.target)
1403 except OSError, err:
1404 if err.errno not in (E.ENOENT, E.ENOTDIR):
1405 raise
1406
1407 ## Do whatever it is we decided to do.
1408 if noact:
1409 STATUS.commit(filestatus(master, a))
1410 else:
1411 a.perform()
1412
1413 ## We're done. Return the names of the targets.
1414 return list(done)
1415
1416 @contextmanager
1417 def wrap(masterfile):
1418 ## Handle exceptions found while trying to convert a particular file or
1419 ## directory.
1420
1421 try:
1422 yield masterfile
1423
1424 ## Something bad happened. Report the error, but continue. (This list
1425 ## of exceptions needs a lot of work.)
1426 except (IOError, OSError), exc:
1427 STATUS.clear()
1428 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
1429 broken.append((masterfile, exc))
1430
1431 def grobble_dir(master, targets):
1432 ## Recursively convert files in MASTER, writing them to the TARGETS.
1433
1434 ## Keep track of the subdirectories we encounter, because we'll need to
1435 ## do all of those in one go at the end.
1436 subdirs = set()
1437
1438 ## Work through each target directory in turn.
1439 for target, pmap in zip(targets, tpolmap):
1440
1441 ## Make sure the TARGET exists and is a directory. It's a fundamental
1442 ## assumption of this program that the entire TARGET tree is
1443 ## disposable, so if something exists but isn't a directory, we should
1444 ## kill it.
1445 if OS.path.isdir(target):
1446 pass
1447 else:
1448 if OS.path.exists(target):
1449 STATUS.commit(filestatus(target, 'clear nondirectory'))
1450 if not noact:
1451 OS.unlink(target)
1452 STATUS.commit(filestatus(target, 'create directory'))
1453 if not noact:
1454 OS.mkdir(target)
1455
1456 ## Keep a list of things in the target. As we convert files, we'll
1457 ## check them off. Anything left over is rubbish and needs to be
1458 ## deleted.
1459 checklist = {}
1460 try:
1461 for i in OS.listdir(target):
1462 checklist[i] = False
1463 except OSError, err:
1464 if err.errno not in (E.ENOENT, E.ENOTDIR):
1465 raise
1466
1467 ## Keep track of the files in each category.
1468 catmap = {}
1469 todo = []
1470 done = []
1471
1472 ## Work through the master files.
1473 for f in sorted(OS.listdir(master)):
1474
1475 ## If the killswitch has been pulled then stop. The whole idea is
1476 ## that we want to cause a clean shutdown if possible, so we don't
1477 ## want to do it in the middle of encoding because the encoding
1478 ## effort will have been wasted. This is the only place we need to
1479 ## check. If we've exited the loop, then clearing old files will
1480 ## probably be fast, and we'll either end up here when the recursive
1481 ## call returns or we'll be in the same boat as before, clearing old
1482 ## files, only up a level. If worst comes to worst, we'll be killed
1483 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1484 ## it left off.
1485 if KILLSWITCH.is_set():
1486 return
1487
1488 ## Do something with the file.
1489 with wrap(OS.path.join(master, f)) as masterfile:
1490
1491 ## If it's a directory then prepare to grobble it recursively, but
1492 ## don't do that yet.
1493 if OS.path.isdir(masterfile):
1494 subdirs.add(f)
1495 done.append(OS.path.join(target, f))
1496
1497 ## Otherwise it's a file. Work out what kind, and stash it under
1498 ## the appropriate categories. Later, we'll apply policy to the
1499 ## files, by category, and work out what to do with them all.
1500 else:
1501 gf = GIO.File(masterfile)
1502 mime = gf.query_info('standard::content-type').get_content_type()
1503 cats = []
1504 for cat in pmap.iterkeys():
1505 id = cat.identify(masterfile, mime)
1506 if id is None: continue
1507 catmap.setdefault(cat, []).append((masterfile, id))
1508 cats.append((cat, id))
1509 if not cats:
1510 catmap.setdefault(None, []).append((masterfile, id))
1511 todo.append((masterfile, cats))
1512
1513 ## Work through the categorized files to see what actions to do for
1514 ## them.
1515 for masterfile, cats in todo:
1516 with wrap(masterfile):
1517 done += grobble_file(masterfile, pmap, target,
1518 [(cat, id, catmap[cat]) for cat, id in cats])
1519
1520 ## Check the results off the list so that we don't clear it later.
1521 for f in done:
1522 checklist[OS.path.basename(f)] = True
1523
1524 ## Maybe there's stuff in the target which isn't accounted for. Delete
1525 ## it: either the master has changed, or the policy for this target has
1526 ## changed. Either way, the old files aren't wanted.
1527 for f in checklist:
1528 if not checklist[f]:
1529 STATUS.commit(filestatus(f, 'clear bogus file'))
1530 if not noact:
1531 bogus = OS.path.join(target, f)
1532 try:
1533 if OS.path.isdir(bogus):
1534 SH.rmtree(bogus)
1535 else:
1536 OS.unlink(bogus)
1537 except OSError, err:
1538 if err.errno != E.ENOENT:
1539 raise
1540
1541 ## If there are subdirectories which want processing then do those.
1542 ## Keep the user amused by telling him where we are in the tree.
1543 for d in sorted(subdirs):
1544 dirs.append(d)
1545 STATUS.set('/'.join(dirs))
1546 with wrap(OS.path.join(master, d)) as masterdir:
1547 try:
1548 grobble_dir(masterdir,
1549 [OS.path.join(target, d) for target in targets])
1550 finally:
1551 dirs.pop()
1552 STATUS.set('/'.join(dirs))
1553
1554 ## Right. We're ready to go.
1555 grobble_dir(master, [t.targetdir for t in targets])
1556 return broken
1557
1558 ###--------------------------------------------------------------------------
1559 ### Command-line interface.
1560
1561 QUIS = OS.path.basename(SYS.argv[0])
1562
1563 def moan(msg):
1564 "Report a warning message to the user."
1565 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1566
1567 def die(msg):
1568 "Report a fatal error message to the user."
1569 moan(msg)
1570 SYS.exit(1)
1571
1572 def parse_opts(args):
1573 """
1574 Parse command-line arguments in ARGS.
1575
1576 Returns a Grobbler object and the MASTER and TARGET directories to be
1577 grobbled.
1578 """
1579
1580 ## Build the option parser object.
1581 op = OP.OptionParser(prog = QUIS, version = VERSION,
1582 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1583 'CONFIG',
1584 description = """\
1585 Convert a directory tree of files according to the configuration file
1586 CONFIG.
1587 """)
1588
1589 ## Timeout handling.
1590 def cb_time(opt, ostr, arg, op):
1591 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1592 if not m:
1593 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1594 t, u = m.groups()
1595 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1596 setattr(op.values, opt.dest, t)
1597 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1598 dest = 'timeout',
1599 help = 'stop processing nicely after SECS',
1600 action = 'callback', callback = cb_time)
1601 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1602 dest = 'timeout_nasty',
1603 help = 'stop processing unpleasantly after further SECS',
1604 action = 'callback', callback = cb_time)
1605
1606 ## Other options.
1607 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1608 help = 'provide progress information')
1609 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1610 help = 'don\'t actually modify the filesystem')
1611
1612 ## Ready to rock.
1613 op.set_defaults(formats = [], noact = False,
1614 timeout = None, timeout_nasty = 300)
1615 opts, args = op.parse_args(args)
1616
1617 ## Check that we got the non-option arguments that we want.
1618 if len(args) != 1:
1619 op.error('wrong number of arguments')
1620
1621 ## Act on the options.
1622 if opts.tty:
1623 STATUS.eyecandyp = True
1624 if opts.timeout is not None:
1625 to = TH.Thread(target = timeout,
1626 args = (opts.timeout, opts.timeout_nasty))
1627 to.daemon = True
1628 to.start()
1629
1630 ## Parse the configuration file.
1631 with open(args[0]) as conf:
1632 jobs, = Config.parseFile(conf, True)
1633 for j in jobs:
1634 j.perform()
1635
1636 return opts
1637
1638 if __name__ == '__main__':
1639 opts = parse_opts(SYS.argv[1:])
1640 if 'master' not in VARS:
1641 die("no master directory set")
1642 broken = grobble(VARS['master'], TARGETS, opts.noact)
1643 if broken:
1644 moan('failed to convert some files:')
1645 for file, exc in broken:
1646 moan('%s: %s' % (file, exc))
1647 SYS.exit(1)
1648
1649 ## This is basically a successful completion: we did what we were asked to
1650 ## do. It seems polite to report a message, though.
1651 ##
1652 ## Why don't we have a nonzero exit status? The idea would be that a
1653 ## calling script would be interested that we used up all of our time, and
1654 ## not attempt to convert some other directory as well. But that doesn't
1655 ## quite work. Such a script would need to account correctly for time we
1656 ## had spent even if we complete successfully. And if the script is having
1657 ## to watch the clock itself, it can do that without our help here.
1658 if KILLSWITCH.is_set():
1659 moan('killed by timeout')
1660
1661 ###----- That's all, folks --------------------------------------------------