gremlin/gremlin.in: Fix licensing notice.
[autoys] / gremlin / gremlin.in
CommitLineData
d4a7d7b5 1#! @PYTHON@
583b7e4a
MW
2###
3### Convert a directory tree of audio files
4###
5### (c) 2010 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
9e3a516f
MW
10### This file is part of the `autoys' audio tools collection.
11###
12### `autoys' is free software; you can redistribute it and/or modify
583b7e4a
MW
13### it under the terms of the GNU General Public License as published by
14### the Free Software Foundation; either version 2 of the License, or
15### (at your option) any later version.
16###
9e3a516f 17### `autoys' is distributed in the hope that it will be useful,
583b7e4a
MW
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20### GNU General Public License for more details.
21###
22### You should have received a copy of the GNU General Public License
9e3a516f 23### along with `autoys'; if not, write to the Free Software Foundation,
583b7e4a
MW
24### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26###--------------------------------------------------------------------------
27### External dependencies.
28
29## Language features.
30from __future__ import with_statement
31
32## Standard Python libraries.
33import sys as SYS
34import os as OS
35import errno as E
36import time as T
37import unicodedata as UD
38import fnmatch as FN
39import re as RX
40import shutil as SH
41import optparse as OP
42import threading as TH
43import shlex as L
44from math import sqrt
45from contextlib import contextmanager
46
47## eyeD3 tag fettling.
48import eyeD3 as E3
49
50## Gstreamer. It picks up command-line arguments -- most notably `--help' --
51## and processes them itself. Of course, its help is completely wrong. This
52## kludge is due to Jonas Wagner.
53_argv, SYS.argv = SYS.argv, []
54import gobject as G
55import gio as GIO
56import gst as GS
57SYS.argv = _argv
58
59## Python Imaging.
60from PIL import Image as I
61
62## Python parsing.
63import pyparsing as P
64
65###--------------------------------------------------------------------------
66### Special initialization.
67
d4a7d7b5 68VERSION = '@VERSION@'
583b7e4a
MW
69
70## GLib.
71G.threads_init()
72
73###--------------------------------------------------------------------------
74### Eyecandy progress reports.
75
76def charwidth(s):
77 """
78 Return the width of S, in characters.
79
80 Specifically, this is the number of backspace characters required to
81 overprint the string S. If the current encoding for `stdout' appears to be
82 Unicode then do a complicated Unicode thing; otherwise assume that
83 characters take up one cell each.
84
85 None of this handles tab characters in any kind of useful way. Sorry.
86 """
87
88 ## If there's no encoding for stdout then we're doing something stupid.
89 if SYS.stdout.encoding is None: return len(s)
90
91 ## Turn the string into Unicode so we can hack on it properly. Maybe that
92 ## won't work out, in which case fall back to being stupid.
93 try: u = s.decode(SYS.stdout.encoding)
94 except UnicodeError: return len(s)
95
96 ## Our main problem is combining characters, but we should also try to
97 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
98 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
99 w = 0
100 for ch in u:
101 cd = ord(ch)
102 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
103 0x1160 <= cd <= 0x11ff: pass
104 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
105 else: w += 1
106
107 ## Done.
583b7e4a
MW
108 return w
109
110class StatusLine (object):
111 """
112 Maintains a status line containing ephemeral progress information.
113
114 The status line isn't especially important, but it keeps interactive users
115 amused.
116
117 There should be only one status line object in your program; otherwise
118 they'll interfere with each other and get confused.
119
120 The update algorithm (in `set') is fairly careful to do the right thing
121 with long status `lines', and to work properly in an Emacs `shell' buffer.
122 """
123
124 def __init__(me):
125 "Initialize the status line."
126 me._last = ''
127 me._lastlen = 0
128 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
129
130 def set(me, line):
131 """
132 Set the status line contents to LINE, replacing what was there before.
133
134 This only produces actual output if stdout is interactive.
135 """
136 n = len(line)
137
138 ## Eyecandy update.
139 if me.eyecandyp:
583b7e4a
MW
140
141 ## If the old line was longer, we need to clobber its tail, so work out
142 ## what that involves.
143 if n < me._lastlen:
144 b = charwidth(me._last[n:])
145 pre = '\b'*b + ' '*b
146 else:
147 pre = ''
148
149 ## Now figure out the length of the common prefix between what we had
150 ## before and what we have now. This reduces the amount of I/O done,
151 ## which keeps network traffic down on SSH links, and keeps down the
152 ## amount of work slow terminal emulators like Emacs have to do.
153 i = 0
154 m = min(n, me._lastlen)
155 while i < m and line[i] == me._last[i]:
156 i += 1
157
158 ## Actually do the output, all in one syscall.
159 b = charwidth(me._last[i:])
160 SYS.stdout.write(pre + '\b'*b + line[i:])
583b7e4a
MW
161 SYS.stdout.flush()
162
163 ## Update our idea of what's gone on.
164 me._lastlen = n
165 me._last = line
166
167 def clear(me):
168 "Clear the status line. Just like set('')."
169 me.set('')
170
171 def commit(me, line = None):
172 """
173 Commit the current status line, and maybe the string LINE.
174
175 If the current status line is nonempty, then commit it to the transcript.
176 If LINE is not None, then commit that to the transcript too.
177
178 After all of this, we clear the status line to get back to a clean state.
179 """
180 if me._last:
181 if me.eyecandyp:
182 SYS.stdout.write('\n')
183 else:
184 SYS.stdout.write(me._last + '\n')
185 if line is not None:
186 SYS.stdout.write(line + '\n')
187 me._lastlen = 0
188 me._last = ''
189
190STATUS = StatusLine()
191
192def filestatus(file, status):
193 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
194
195class ProgressEyecandy (object):
196 """
197 Provide amusement while something big and complicated is happening.
198
199 This is an abstract class. Subclasses must provide a method `progress'
200 returning a pair (CURRENT, MAX) indicating the current progress through the
201 operation.
202 """
203
204 def __init__(me, what, silentp = False):
205 """
206 Initialize a progress meter.
207
208 WHAT is a prefix string to be written before the progress eyecandy
209 itself.
210 """
211 me._what = what
212 me._silentp = silentp
213 me._spinner = 0
214 me._start = T.time()
215
216 def _fmt_time(me, t):
217 "Format T as a time, in (maybe hours) minutes and seconds."
218 s, t = t % 60, int(t/60)
219 m, h = t % 60, int(t/60)
220 if h > 0:
221 return '%d:%02d:%02d' % (h, m, s)
222 else:
223 return '%02d:%02d' % (m, s)
224
225 def show(me):
226 "Show the current level of progress."
227
228 ## If we're not showing pointless frippery, don't bother at all.
229 if not STATUS.eyecandyp:
230 return
231
232 ## Update the spinner index.
233 me._spinner = (me._spinner + 1)%4
234
235 ## Fetch the current progress information. Note that we always fetch
236 ## both the current and maximum levels, because both might change if an
237 ## operation revises its idea of how much work needs doing.
238 cur, max = me.progress()
239
240 ## If we couldn't get progress information, display something vaguely
241 ## amusing anyway.
242 if cur is None or max is None:
243 STATUS.set('%s %c [unknown progress]' %
244 (me._what, r'/-\|'[me._spinner]))
245 return
246
247 ## Work out -- well, guess -- the time remaining.
248 if cur:
249 t = T.time()
250 eta = me._fmt_time((t - me._start)*(max - cur)/cur)
251 else:
252 eta = '???'
253
254 ## Set the status bar.
255 n = 40*cur/max
256 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
257 (me._what,
258 r'/-\|'[me._spinner],
259 '='*n, ' '*(40 - n),
260 100*cur/max,
261 eta))
262
263 def done(me, win = True):
264 "Show a completion notice, or a failure if WIN is false."
265 if not win:
266 STATUS.set('%s FAILED!' % me._what)
267 elif not me._silentp:
268 STATUS.set('%s done (%s)' %
269 (me._what,
270 me._fmt_time(T.time() - me._start)))
271 else:
272 return
273 STATUS.commit()
274
275###--------------------------------------------------------------------------
276### Timeout handling.
277
278KILLSWITCH = TH.Event()
279
280def timeout(t0, t1):
281 T.sleep(t0)
282 KILLSWITCH.set()
283 T.sleep(t1)
284 moan('dying messily due to timeout')
285 OS._exit(3)
286
287###--------------------------------------------------------------------------
288### Parsing utilities.
289
290## Allow hyphens in identifiers.
291IDCHARS = P.alphanums + '-_'
292P.Keyword.setDefaultKeywordChars(IDCHARS)
293
294## Some common kinds of tokens.
295Name = P.Word(IDCHARS)
296Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
297String = P.QuotedString('"', '\\')
298
299## Handy abbreviations for constructed parser elements.
300def K(k): return P.Keyword(k).suppress()
301def D(d): return P.Literal(d).suppress()
583b7e4a
MW
302def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
303O = P.Optional
304
305###--------------------------------------------------------------------------
306### Format identification and conversion.
307
308class IdentificationFailure (Exception):
309 pass
310
311class FileCategory (object):
312 """
313 A FileCategory represents a class of files.
314
315 For example, it's sensible to consider audio, or image files as a
316 category. A file category knows how to recognize member files from
317 MIME content types.
318 """
319
320 def __init__(me, name, mime_pats, ident):
321 """
322 Construct a new category.
323
324 The PATS are a list of `fnmatch' patterns to be compared with a MIME
325 type. The IDENT is a function which produces an identification object
326 given a file's name and first-guess MIME type. The object is passed to a
327 Format's `check' method to see whether a file needs re-encoding, and to
328 `convert' to assist with the conversion.
329
330 An identification object must have an attribute `mime' which is a set of
331 possible MIME types accumulated for the object.
332 """
333 me.name = name
334 me._mime_pats = mime_pats
335 me._ident = ident
336 CATEGORYMAP[name] = me
337
338 def identify(me, file, mime):
339 """
340 Attempt to identify FILE, given its apparent MIME type.
341
342 If identification succeeds, return an identification object which can be
343 used by associated file formats; otherwise return None.
344 """
345 for p in me._mime_pats:
346 if not FN.fnmatchcase(mime, p):
347 continue
348 try:
349 return me._ident(file, mime)
350 except IdentificationFailure:
351 pass
352 return None
353
354class BaseFormat (object):
355 """
356 A BaseFormat object represents a particular encoding and parameters.
357
358 The object can verify (the `check' method) whether a particular file
359 matches its requirements, and if necessary (`encode') re-encode a file.
360
361 Subclasses should define the following methods.
362
363 check(ID)
364 Answer whether the file identified by ID is acceptable according to
365 the receiver's parameters.
366
367 convert(MASTER, ID, TARGET)
368 Convert the file MASTER, which has been identified as ID, according
369 to the receiver's parameters, writing the output to TARGET.
370
371 Subclasses should also provide these attributes.
372
373 CATEGORY
374 A FileCategory object for the category of files that this format
375 lives within.
376
377 EXT A file extension to be applied to encoded output files.
378
379 NAME A user-facing name for the format.
380
381 PROPS A parser element to parse a property definition. It should produce
382 a pair NAME, VALUE to be stored in a dictionary.
383
384 Subclasses for different kinds of file may introduce more subclass
385 protocol.
386 """
387
388 def fixup(me, path):
389 """Post-encoding fixups."""
390 pass
391
392FORMATMAP = {}
393CATEGORYMAP = {}
394
395def defformat(name, cls):
396 "Define a format NAME using class CLS."
397 if not hasattr(cls, 'NAME'):
398 raise ValueError, 'abstract class'
399 if not hasattr(cls, 'CATEGORY'):
400 raise ValueError, 'no category'
401 FORMATMAP[name] = cls
402
403class FormatParser (P.ParserElement):
404 """
405 Parse a format specifier:
406
407 format-spec ::= string [format-properties]
408 format-properties ::= `{' format-property (`,' format-property)* `}'
409
410 The syntax of a format-property is determined by the PROPS attribute on the
411 named format and its superclasses.
412 """
413
414 ## We cache the parser elements we generate to avoid enormous consing.
415 CACHE = {}
416
417 def parseImpl(me, s, loc, actp = True):
418
419 ## Firstly, determine the format name.
420 loc, r = Name._parse(s, loc, actp)
421 fmt = r[0]
422
423 ## Look up the format class.
424 try: fcls = FORMATMAP[fmt]
425 except KeyError:
426 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
427
428 ## Fetch the property-list parser from the cache, if possible; else
429 ## construct it.
430 try:
431 pp = me.CACHE[fmt]
432 except KeyError:
433 seen = set()
434 prop = None
435 for c in fcls.mro():
436 try: p = c.PROPS
437 except AttributeError: continue
438 if p in seen: continue
439 if prop is None: prop = p
440 else: prop |= p
441 seen.add(p)
442 if prop is None:
443 pp = me.CACHE[fmt] = None
444 else:
445 props = P.delimitedList(prop)
446 props.setParseAction(lambda s, l, t: dict(t.asList()))
447 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
448
449 ## Parse the properties.
450 if pp is None:
451 pd = {}
452 else:
453 loc, r = pp._parse(s, loc, actp)
454 if r: pd = r[0]
455 else: pd = {}
456
457 ## Construct the format object and return it.
458 return loc, fcls(**pd)
459
460Format = FormatParser()
461
462def prop(kw, pval, tag = None):
463 if tag is None: tag = kw
464 if pval is None:
465 p = K(kw)
466 p.setParseAction(lambda s, l, t: (tag, True))
467 else:
468 p = K(kw) + D('=') + pval
469 p.setParseAction(lambda s, l, t: (tag, t[0]))
470 return p
471
472###--------------------------------------------------------------------------
473### Policies and actions.
474
475class Action (object):
476 """
477 An Action object represents a conversion action to be performed.
478
479 This class isn't intended to be instantiated directly. It exists to define
480 some protocol common to all Action objects.
481
482 Action objects have the following attributes.
483
484 master The name of the master (source) file.
485
486 target The name of the target (destination) file.
487
488 PRIORITY The priority of the action, for deciding which of two actions
489 to perform. Higher priorities are more likely to win.
490
491 Converting an Action to a string describes the action in a simple
492 user-readable manner. The `perform' method actually carries the action
493 out.
494 """
495
496 PRIORITY = 0
497
498 def __init__(me, master):
499 "Stash the MASTER file name for later."
500 me.master = master
501
502 def choose(me, him):
503 "Choose either ME or HIM and return one."
504 if him is None or me.PRIORITY > him.PRIORITY:
505 return me
506 else:
507 return him
508
509class CopyAction (Action):
510 """
511 An Action object for simply copying a file.
512
513 Actually we try to hardlink it first, falling back to a copy later. This
514 is both faster and more efficient with regard to disk space.
515 """
516
517 ## Copying is good. Linking is really good, but we can't tell the
518 ## difference at this stage.
519 PRIORITY = 10
520
521 def __init__(me, master, targetdir):
522 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
523 Action.__init__(me, master)
524 me.target = OS.path.join(targetdir, OS.path.basename(master))
525
526 def __str__(me):
527 return 'copy/link'
528
529 def perform(me):
530 "Actually perform a CopyAction."
531 try:
532 STATUS.set(filestatus(me.master, 'link'))
533 OS.link(me.master, me.target)
534 except OSError, err:
535 if err.errno != E.EXDEV:
536 raise
537 STATUS.set(filestatus(me.master, 'copy'))
538 new = me.target + '.new'
539 SH.copyfile(me.master, new)
540 OS.rename(new, me.target)
541 STATUS.commit()
542
543class ConvertAction (Action):
544 """
545 An Action object for converting a file to a given format.
546
547 Additional attributes:
548
549 id The identification object for the master file.
550
551 format The format to which we're meant to conver the master.
552 """
553
554 def __init__(me, master, targetdir, id, format):
555 "Initialize a ConvertAction."
556 Action.__init__(me, master)
557 stem, ext = OS.path.splitext(OS.path.basename(master))
558 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
559 me.id = id
560 me.format = format
561
562 def __str__(me):
563 return 'convert to %s' % me.format.NAME
564
565 def perform(me):
566 "Acually perform a ConvertAction."
567 STATUS.set(filestatus(me.master, me))
568 me.format.convert(me.master, me.id, me.target)
569
570Policy = P.Forward()
571
572class FormatPolicy (object):
573 """
574 A FormatPolicy object represents a set of rules for how to convert files.
575
576 Given a master file, the FormatPolicy will identify it and return a list of
577 actions to be performed. The methods required of a FormatPolicy are:
578
579 setcategory(CAT)
580 Store CAT as the policy's category. Check that this is consistent
581 with the policy as stored.
582
583 actions(MASTER, TARGETDIR, ID, COHORT)
584 Given a MASTER file, identified as ID, a target directory
585 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
586 of the same category in the same directory, return a list of
587 actions to be performed to get the target directory into the right
588 form. The list might be empty if the policy object /rejects/ the
589 file.
590 """
591
592class AndPolicy (FormatPolicy):
593 """
594 A FormatPolicy which does the union of a bunch of other policies.
595
596 Each subsidiary policy is invoked in turn. The highest-priority action for
597 each target file is returned.
598 """
599
600 def __init__(me, policies):
601 me._policies = policies
602
603 def setcategory(me, cat):
604 me.cat = cat
605 for p in me._policies:
606 p.setcategory(cat)
607
608 def actions(me, master, targetdir, id, cohort):
609 tmap = {}
610 for p in me._policies:
611 for a in p.actions(master, targetdir, id, cohort):
612 if a.target in tmap:
613 tmap[a.target] = a.choose(tmap.get(a.target))
614 else:
615 tmap[a.target] = a
616 return tmap.values()
617
618And = K('and') - D('{') - R(Policy) - D('}')
619And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
620
621class OrPolicy (FormatPolicy):
622 """
623 A FormatPolicy which tries other policies and uses the first that accepts.
624
625 Each subsidiary policy is invoked in turn. If any accepts, the actions it
626 proposes are turned and no further policies are invoked. If none accepts
627 then the file is rejected.
628 """
629
630 def __init__(me, policies):
631 me._policies = policies
632
633 def setcategory(me, cat):
634 me.cat = cat
635 for p in me._policies:
636 p.setcategory(cat)
637
638 def actions(me, master, targetdir, id, cohort):
639 for p in me._policies:
640 aa = p.actions(master, targetdir, id, cohort)
641 if aa:
642 return aa
643 else:
644 return []
645
646Or = K('or') - D('{') - R(Policy) - D('}')
647Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
648
649class AcceptPolicy (FormatPolicy):
650 """
651 A FormatPolicy which copies files in a particular format.
652
653 If all of the files in a cohort are recognized as being in a particular
654 format (including this one), then accept it with a CopyAction; otherwise
655 reject.
656 """
657
658 def __init__(me, format):
659 me._format = format
660
661 def setcategory(me, cat):
662 if me._format.CATEGORY is not cat:
663 raise ValueError, \
664 "Accept format `%s' has category `%s', not `%s'" % \
665 (me._format.__class__.__name__,
666 me._format.CATEGORY.name, cat.name)
667 me.cat = cat
668
669 def actions(me, master, targetdir, id, cohort):
670 if me._format.check(id) and \
671 all(me._format.check(cid) for f, cid in cohort):
672 return [CopyAction(master, targetdir)]
673 else:
674 return []
675
676Accept = K('accept') - Format
677Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
678
679class ConvertPolicy (FormatPolicy):
680 """
681 A FormatPolicy which copies files in a particular format or converts if
682 necessary.
683 """
684 def __init__(me, format):
685 me._format = format
686
687 def setcategory(me, cat):
688 if me._format.CATEGORY is not cat:
689 raise ValueError, \
690 "Accept format `%s' has category `%s', not `%s'" % \
691 (me._format.__class__.__name__,
692 me._format.CATEGORY.name, cat.name)
693 me.cat = cat
694
695 def actions(me, master, targetdir, id, cohort):
696 if me._format.check(id):
697 return [CopyAction(master, targetdir)]
698 else:
699 return [ConvertAction(master, targetdir, id, me._format)]
700
701Convert = K('convert') - Format
702Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
703
704Policy << (And | Or | Accept | Convert)
705
706###--------------------------------------------------------------------------
707### Audio handling, based on GStreamer.
708
709def make_element(factory, name = None, **props):
710 "Return a new element from the FACTORY with the given NAME and PROPS."
711 elt = GS.element_factory_make(factory, name)
712 elt.set_properties(**props)
713 return elt
714
715class GStreamerProgressEyecandy (ProgressEyecandy):
716 """
717 Provide amusement while GStreamer is busy doing something.
718
719 The GStreamerProgressEyecandy object is a context manager. Wrap it round
720 your GStreamer loop to provide progress information for an operation.
721 """
722
723 def __init__(me, what, elt, **kw):
724 """
725 Initialize a progress meter.
726
727 WHAT is a prefix string to be written before the progress eyecandy
728 itself. ELT is a GStreamer element to interrogate to find the progress
729 information.
730 """
731 me._elt = elt
732 ProgressEyecandy.__init__(me, what, **kw)
733
734 def _update(me):
735 "Called by GLib main event loop to update the eyecandy."
736 me.show()
737 return True
738
739 def _timer(me):
740 """
741 Update the progress meter.
742
743 This is called periodically by the GLib main event-processing loop.
744 """
745 me.show()
746 return True
747
748 def progress(me):
749 "Return the current progress as a pair (CURRENT, MAX)."
750
751 ## Fetch the current progress information. We get the duration each
752 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
753 ## duration can change as we progress. Hopefully it settles down fairly
754 ## soon.
755 try:
756 t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
757 end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
758 return t, end
759 except GS.QueryError:
760 return None, None
761
762 def __enter__(me):
763 "Enter context: attach progress meter display."
764
765 ## If we're not showing pointless frippery, don't bother at all.
766 if not STATUS.eyecandyp:
767 return
768
769 ## Update regularly. The pipeline runs asynchronously.
770 me._id = G.timeout_add(200, me._update)
771
772 def __exit__(me, ty, val, tb):
773 "Leave context: remove display and report completion or failure."
774
775 ## If we're not showing pointless frippery, there's nothing to remove.
776 if STATUS.eyecandyp:
777 G.source_remove(me._id)
778
779 ## Report completion anyway.
780 me.done(ty is None)
781
782 ## As you were.
783 return False
784
785class AudioIdentifier (object):
786 """
787 Analyses and identifies an audio file.
788
789 Important properties are:
790
791 cap A capabilities structure describing the audio file data. The most
792 interesting thing in here is probably its name, which is a MIME
793 type describing the data.
794
795 dcap A capabilities structure describing the decoded audio data. This
796 is of interest during conversion.
797
798 tags A dictionary containing metadata tags from the file. These are in
799 GStreamer's encoding-independent format.
800
801 bitrate An approximation to the stream's bitrate, in kilobits per second.
802 This might be slow to work out for some files so it's computed on
803 demand.
804 """
805
806 def __init__(me, file, mime):
807 "Initialize the object suitably for identifying FILE."
808
809 ## Make some initial GStreamer objects. We'll want the pipeline later if
810 ## we need to analyse a poorly tagged MP3 stream, so save it away.
811 me._pipe = GS.Pipeline()
812 me._file = file
813 bus = me._pipe.get_bus()
814 bus.add_signal_watch()
815 loop = G.MainLoop()
816
817 ## The basic recognition kit is based around `decodebin'. We must keep
818 ## it happy by giving it sinks for the streams it's found, which it
819 ## announces asynchronously.
820 source = make_element('filesrc', 'file', location = file)
821 decoder = make_element('decodebin', 'decode')
822 sink = make_element('fakesink')
823 def decoder_pad_arrived(elt, pad):
824 if pad.get_caps()[0].get_name().startswith('audio/'):
825 elt.link_pads(pad.get_name(), sink, 'sink')
826 dpaid = decoder.connect('pad-added', decoder_pad_arrived)
827 me._pipe.add(source, decoder, sink)
828 GS.element_link_many(source, decoder)
829
830 ## Arrange to collect tags from the pipeline's bus as they're reported.
831 ## If we reuse the pipeline later, we'll want different bus-message
832 ## handling, so make sure we can take the signal handler away.
833 tags = {}
834 fail = []
835 def bus_message(bus, msg):
836 if msg.type == GS.MESSAGE_ERROR:
837 fail[:] = (ValueError, msg.structure['debug'], None)
838 loop.quit()
839 elif msg.type == GS.MESSAGE_STATE_CHANGED:
840 if msg.structure['new-state'] == GS.STATE_PAUSED and \
841 msg.src == me._pipe:
842 loop.quit()
843 elif msg.type == GS.MESSAGE_TAG:
844 tags.update(msg.structure)
845 bmid = bus.connect('message', bus_message)
846
847 ## We want to identify the kind of stream this is. (Hmm. The MIME type
848 ## recognizer has already done this work, but GStreamer is probably more
849 ## reliable.) The `decodebin' has a `typefind' element inside which will
850 ## announce the identified media type. All we need to do is find it and
851 ## attach a signal handler. (Note that the handler might be run in the
852 ## thread context of the pipeline element, but Python's GIL will keep
853 ## things from being too awful.)
854 me.cap = None
855 me.dcap = None
856 for e in decoder.elements():
857 if e.get_factory().get_name() == 'typefind':
858 tfelt = e
859 break
860 else:
861 assert False, 'failed to find typefind element'
862
863 ## Crank up most of the heavy machinery. The message handler will stop
864 ## the loop when things seem to be sufficiently well underway.
865 me._pipe.set_state(GS.STATE_PAUSED)
866 loop.run()
867 bus.disconnect(bmid)
868 decoder.disconnect(dpaid)
869 if fail:
870 me._pipe.set_state(GS.STATE_NULL)
871 raise fail[0], fail[1], fail[2]
872
873 ## Store the collected tags.
874 me.tags = tags
875
876 ## Gather the capabilities. The `typefind' element knows the input data
877 ## type. The 'decodebin' knows the raw data type.
878 me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
879 me.mime = set([mime, me.cap.get_name()])
880 me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
881
882 ## If we found a plausible bitrate then stash it. Otherwise note that we
883 ## failed. If anybody asks then we'll work it out then.
884 if 'nominal-bitrate' in tags:
885 me._bitrate = tags['nominal-bitrate']/1000
886 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
887 me._bitrate = tags['bitrate']/1000
888 else:
889 me._bitrate = None
890
891 ## The bitrate computation wants the file size. Ideally we'd want the
892 ## total size of the frames' contents, but that seems hard to dredge
893 ## out. If the framing overhead is small, this should be close enough
894 ## for our purposes.
895 me._bytes = OS.stat(file).st_size
896
897 def __del__(me):
898 "Close the pipeline down so we don't leak file descriptors."
899 me._pipe.set_state(GS.STATE_NULL)
900
901 @property
902 def bitrate(me):
903 """
904 Return the approximate bit-rate of the input file.
905
906 This might take a while if we have to work it out the hard way.
907 """
908
909 ## If we already know the answer then just return it.
910 if me._bitrate is not None:
911 return me._bitrate
912
913 ## Make up a new main loop.
914 loop = G.MainLoop()
915
916 ## Watch for bus messages. We'll stop when we reach the end of the
917 ## stream: then we'll have a clear idea of how long the track was.
918 fail = []
919 def bus_message(bus, msg):
920 if msg.type == GS.MESSAGE_ERROR:
921 fail[:] = (ValueError, msg.structure['debug'], None)
922 loop.quit()
923 elif msg.type == GS.MESSAGE_EOS:
924 loop.quit()
925 bus = me._pipe.get_bus()
926 bmid = bus.connect('message', bus_message)
927
928 ## Get everything moving, and keep the user amused while we work.
929 me._pipe.set_state(GS.STATE_PLAYING)
930 with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
931 me._pipe,
932 silentp = True):
933 loop.run()
934 bus.disconnect(bmid)
935 if fail:
936 me._pipe.set_state(GS.STATE_NULL)
937 raise fail[0], fail[1], fail[2]
938
939 ## Now we should be able to find out our position accurately and work out
940 ## a bitrate. Cache it in case anybody asks again.
941 t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
942 me._bitrate = int(8*me._bytes*1e6/t)
943
944 ## Done.
945 return me._bitrate
946
947class AudioFormat (BaseFormat):
948 """
949 An AudioFormat is a kind of Format specialized for audio files.
950
951 Format checks are done on an AudioIdentifier object.
952 """
953
954 PROPS = prop('bitrate', Num)
955
956 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
957 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
958 ## depending on how thorough it's trying to be. Still, it doesn't do any
959 ## harm here; the main risk is picking up Ogg Theora files by accident, and
960 ## we'll probably be able to extract the audio from them anyway.
961 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
962 AudioIdentifier)
963
964 def __init__(me, bitrate = None):
965 "Construct an object, requiring an approximate bitrate."
966 me.bitrate = bitrate
967
968 def check(me, id):
969 """
970 Return whether the AudioIdentifier ID is suitable for our purposes.
971
972 Subclasses can either override this method or provide a property
973 `MIMETYPES', which is a list (other thing that implements `__contains__')
974 of GStreamer MIME types matching this format.
975 """
976 return id.mime & me.MIMETYPES and \
977 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
978
979 def encoder(me):
980 """
981 Constructs a GStreamer element to encode audio input.
982
983 Subclasses can either override this method (or replace `encode'
984 entirely), or provide a method `encoder_chain' which returns a list of
985 elements to be linked together in sequence. The first element in the
986 chain must have a pad named `sink' and the last must have a pad named
987 `src'.
988 """
989 elts = me.encoder_chain()
990 bin = GS.Bin()
991 bin.add(*elts)
992 GS.element_link_many(*elts)
993 bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
994 bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
995 return bin
996
997 def convert(me, master, id, target):
998 """
999 Encode audio from MASTER, already identified as ID, writing it to TARGET.
1000
1001 See `encoder' for subclasses' responsibilities.
1002 """
1003
1004 ## Construct the necessary equipment.
1005 pipe = GS.Pipeline()
1006 bus = pipe.get_bus()
1007 bus.add_signal_watch()
1008 loop = G.MainLoop()
1009
1010 ## Make sure that there isn't anything in the way of our output. We're
1011 ## going to write to a scratch file so that we don't get confused by
1012 ## half-written rubbish left by a crashed program.
1013 new = target + '.new'
1014 try:
1015 OS.unlink(new)
1016 except OSError, err:
1017 if err.errno != E.ENOENT:
1018 raise
1019
1020 ## Piece together our pipeline. The annoying part is that the
1021 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1022 ## halves for now.
1023 source = make_element('filesrc', 'source', location = master)
1024 decoder = make_element('decodebin', 'decode')
1025 convert = make_element('audioconvert', 'convert')
1026 encoder = me.encoder()
1027 sink = make_element('filesink', 'sink', location = new)
1028 pipe.add(source, decoder, convert, encoder, sink)
1029 GS.element_link_many(source, decoder)
1030 GS.element_link_many(convert, encoder, sink)
1031
1032 ## Some decoders (e.g., the AC3 decoder) include channel-position
1033 ## indicators in their output caps. The Vorbis encoder interferes with
1034 ## this, and you end up with a beautifully encoded mono signal from a
1035 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1036 ## /think/ that this is only a problem with stereo signals: mono signals
1037 ## are mono already, and `vorbisenc' accepts channel positions if there
1038 ## are more than two channels.
1039 ##
1040 ## So we have this bodge. We already collected the decoded audio caps
1041 ## during identification. So if we see 2-channel audio with channel
1042 ## positions, we strip the positions off forcibly by adding a filter.
1043 if id.dcap.get_name().startswith('audio/x-raw-') and \
1044 id.dcap.has_field('channels') and \
1045 id.dcap['channels'] == 2 and \
1046 id.dcap.has_field('channel-positions'):
1047 dcap = GS.Caps()
1048 c = id.dcap.copy()
1049 c.remove_field('channel-positions')
1050 dcap.append(c)
1051 else:
1052 dcap = None
1053
1054 ## Hook onto the `decodebin' so we can link together the two halves of
1055 ## our encoding chain. For now, we'll hope that there's only one audio
1056 ## stream in there, and just throw everything else away.
1057 def decoder_pad_arrived(elt, pad):
1058 if pad.get_caps()[0].get_name().startswith('audio/'):
1059 if dcap:
1060 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1061 else:
1062 elt.link_pads(pad.get_name(), convert, 'sink')
1063 decoder.connect('pad-added', decoder_pad_arrived)
1064
1065 ## Watch the bus for completion messages.
1066 fail = []
1067 def bus_message(bus, msg):
1068 if msg.type == GS.MESSAGE_ERROR:
1069 fail[:] = (ValueError, msg.structure['debug'], None)
1070 loop.quit()
1071 elif msg.type == GS.MESSAGE_EOS:
1072 loop.quit()
1073 bmid = bus.connect('message', bus_message)
1074
1075 ## Get everything ready and let it go.
1076 pipe.set_state(GS.STATE_PLAYING)
1077 with GStreamerProgressEyecandy(filestatus(master,
1078 'convert to %s' % me.NAME),
1079 pipe):
1080 loop.run()
1081 pipe.set_state(GS.STATE_NULL)
1082 if fail:
1083 raise fail[0], fail[1], fail[2]
1084
1085 ## Fix up the output file if we have to.
1086 me.fixup(new)
1087
1088 ## We're done.
1089 OS.rename(new, target)
1090
1091class OggVorbisFormat (AudioFormat):
1092 "AudioFormat object for Ogg Vorbis."
1093
00eb0a5b 1094 ## From https://en.wikipedia.org/wiki/Vorbis
583b7e4a
MW
1095 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1096 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1097 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1098
1099 NAME = 'Ogg Vorbis'
1100 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1101 'audio/x-vorbis+ogg'])
1102 EXT = 'ogg'
1103
1104 def encoder_chain(me):
3589c4a4
MW
1105 encprops = {}
1106 if me.bitrate is not None:
1107 for q, br in me.QMAP:
1108 if br >= me.bitrate:
1109 break
1110 else:
1111 raise ValueError, 'no suitable quality setting found'
1112 encprops['quality'] = q/10.0
1113 return [make_element('vorbisenc', **encprops),
583b7e4a
MW
1114 make_element('oggmux')]
1115
1116defformat('ogg-vorbis', OggVorbisFormat)
1117
1118class MP3Format (AudioFormat):
1119 "AudioFormat object for MP3."
1120
1121 NAME = 'MP3'
1122 MIMETYPES = set(['audio/mpeg'])
1123 EXT = 'mp3'
1124
1125 def encoder_chain(me):
3589c4a4
MW
1126 encprops = {}
1127 if me.bitrate is not None: encprops['vbr_mean_bitrate'] = me.bitrate
1128 return [make_element('lame', vbr = 4, **encprops),
583b7e4a
MW
1129 make_element('xingmux'),
1130 make_element('id3v2mux')]
1131
1132 def fixup(me, path):
1133 """
1134 Fix up MP3 files.
1135
1136 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1137 unkind to stupid players.
1138 """
1139 tag = E3.Tag()
1140 tag.link(path)
1141 tag.setTextEncoding(E3.UTF_8_ENCODING)
1142 try:
1143 tag.update(E3.ID3_V1_1)
1144 except (UnicodeEncodeError, E3.tag.GenreException):
1145 pass
1146
1147defformat('mp3', MP3Format)
1148
1149###--------------------------------------------------------------------------
1150### Image handling, based on the Python Imaging Library.
1151
1152class ImageIdentifier (object):
1153 """
1154 Analyses and identifies an image file.
1155
1156 Simply leaves an Image object in the `img' property which can be inspected.
1157 """
1158
1159 def __init__(me, file, mime):
1160
1161 ## Get PIL to open the file. It will magically work out what kind of
1162 ## file it is.
1163 try:
1164 me.img = I.open(file)
1165 except IOError, exc:
1166
1167 ## Unhelpful thing to raise on identification failure. We can
1168 ## distinguish this from an actual I/O error because it doesn't have an
1169 ## `errno'.
1170 if exc.errno is None:
1171 raise IdentificationFailure
1172 raise
1173
1174 me.mime = set([mime])
1175
1176class ImageFormat (BaseFormat):
1177 """
1178 An ImageFormat is a kind of Format specialized for image files.
1179
1180 Subclasses don't need to provide anything other than the properties
1181 required by all concrete Format subclasses. However, there is a
1182 requirement that the `NAME' property match PIL's `format' name for the
1183 format.
1184 """
1185
1186 PROPS = prop('size', Num)
1187 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1188
1189 def __init__(me, size = None, **kw):
1190 """
1191 Initialize an ImageFormat object.
1192
1193 Additional keywords are used when encoding, and may be recognized by
1194 enhanced `check' methods in subclasses.
1195 """
1196 me._size = size
1197 me._props = kw
1198
1199 def check(me, id):
1200 "Check whether the ImageIdentifier ID matches our requirements."
1201 return id.img.format == me.NAME and \
1202 (me._size is None or
1203 (id.img.size[0] <= me._size and
1204 id.img.size[1] <= me._size))
1205
1206 def convert(me, master, id, target):
1207 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1208
1209 ## Write to a scratch file.
1210 new = target + '.new'
1211
1212 ## The ImageIdentifier already contains a copy of the open file. It
1213 ## would be wasteful not to use it.
1214 img = id.img
1215 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1216
1217 ## If there's a stated maximum size then scale the image down to match.
1218 ## But thumbnailing clobbers the original, so take a copy.
1219 if me._size is not None and \
1220 (img.size[0] > me._size or img.size[1] > me._size):
1221 img = img.copy()
1222 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1223
1224 ## Write the output image.
1225 img.save(new, me.NAME, **me._props)
1226
1227 ## Fix it up if necessary.
1228 me.fixup(new)
1229
1230 ## We're done.
1231 OS.rename(new, target)
1232 STATUS.commit()
1233
1234class JPEGFormat (ImageFormat):
1235 """
1236 Image format for JPEG (actually JFIF) files.
1237
1238 Interesting properties to set:
1239
1240 optimize
1241 If present, take a second pass to select optimal encoder settings.
1242
b524aa9d 1243 progressive
583b7e4a
MW
1244 If present, make a progressive file.
1245
1246 quality Integer from 1--100 (worst to best); default is 75.
1247 """
1248 EXT = 'jpg'
1249 NAME = 'JPEG'
1250 PROPS = prop('optimize', None) \
1251 | prop('progressive', None, 'progression') \
1252 | prop('quality', Num)
1253
1254defformat('jpeg', JPEGFormat)
1255
1256class PNGFormat (ImageFormat):
1257 """
1258 Image format for PNG files.
1259
1260 Interesting properties:
1261
1262 optimize
1263 If present, make a special effort to minimize the output file.
1264 """
1265 EXT = 'png'
1266 NAME = 'PNG'
1267 PROPS = prop('optimize', None)
1268
1269defformat('png', PNGFormat)
1270
1271class BMPFormat (ImageFormat):
1272 """
1273 Image format for Windows BMP files, as used by RockBox.
1274
1275 No additional properties.
1276 """
1277 NAME = 'BMP'
1278 EXT = 'bmp'
1279
1280defformat('bmp', BMPFormat)
1281
1282###--------------------------------------------------------------------------
e0361afb
MW
1283### Remaining parsing machinery.
1284
1285Type = K('type') - Name - D('{') - R(Policy) - D('}')
1286def build_type(s, l, t):
1287 try:
1288 cat = CATEGORYMAP[t[0]]
1289 except KeyError:
1290 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1291 pols = t[1]
1292 if len(pols) == 1: pol = pols[0]
1293 else: pol = AndPolicy(pols)
1294 pol.setcategory(cat)
1295 return pol
1296Type.setParseAction(build_type)
1297
1298TARGETS = []
1299class TargetJob (object):
1300 def __init__(me, targetdir, policies):
1301 me.targetdir = targetdir
1302 me.policies = policies
1303 def perform(me):
1304 TARGETS.append(me)
1305
1306Target = K('target') - String - D('{') - R(Type) - D('}')
1307def build_target(s, l, t):
1308 return TargetJob(t[0], t[1])
1309Target.setParseAction(build_target)
1310
1311VARS = { 'master': None }
1312class VarsJob (object):
1313 def __init__(me, vars):
1314 me.vars = vars
1315 def perform(me):
1316 for k, v in me.vars:
1317 VARS[k] = v
1318
1319Var = prop('master', String)
1320Vars = K('vars') - D('{') - R(Var) - D('}')
1321def build_vars(s, l, t):
1322 return VarsJob(t[0])
1323Vars.setParseAction(build_vars)
1324
1325TopLevel = Vars | Target
1326Config = R(TopLevel)
1327Config.ignore(P.pythonStyleComment)
1328
1329###--------------------------------------------------------------------------
583b7e4a
MW
1330### The directory grobbler.
1331
e0361afb 1332def grobble(master, targets, noact = False):
583b7e4a 1333 """
e0361afb
MW
1334 Work through the MASTER directory, writing converted files to TARGETS.
1335
1336 The TARGETS are a list of `TargetJob' objects, each describing a target
1337 directory and a policy to apply to it.
1338
1339 If NOACT is true, then don't actually do anything permanent to the
1340 filesystem.
583b7e4a
MW
1341 """
1342
e0361afb
MW
1343 ## Transform the targets into a more convenient data structure.
1344 tpolmap = []
1345 for t in targets:
1346 pmap = {}
1347 tpolmap.append(pmap)
1348 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
583b7e4a 1349
e0361afb
MW
1350 ## Keep track of the current position in the master tree.
1351 dirs = []
583b7e4a 1352
e0361afb
MW
1353 ## And the files which haven't worked.
1354 broken = []
583b7e4a 1355
e0361afb
MW
1356 def grobble_file(master, pmap, targetdir, cohorts):
1357 ## Convert MASTER, writing the result to TARGETDIR.
1358 ##
1359 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1360 ## a list of (FILENAME, ID) pairs.
1361 ##
1362 ## Since this function might convert the MASTER file, the caller doesn't
1363 ## know the name of the output files, so we return then as a list.
583b7e4a
MW
1364
1365 done = set()
1366 st_m = OS.stat(master)
1367
1368 ## Work through each category listed and apply its policy.
1369 for cat, id, cohort in cohorts:
1370
1371 ## Go through the category's policies and see if any match. If we fail
1372 ## here, see if there are more categories to try.
e0361afb 1373 for pol in pmap[cat]:
583b7e4a
MW
1374 acts = pol.actions(master, targetdir, id, cohort)
1375 if acts: break
1376 else:
1377 continue
1378
1379 ## Work through the targets one by one.
1380 for a in acts:
1381 done.add(a.target)
1382
1383 ## Find out whether the target file already exists and is up-to-date
1384 ## with respect to the master. (Caution here with low-resolution
1385 ## timestamps.) If it's OK, then just move on.
1386 try:
1387 st_t = OS.stat(a.target)
1388 if st_m.st_mtime < st_t.st_mtime or \
1389 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1390 continue
1391 except OSError, err:
1392 if err.errno not in (E.ENOENT, E.ENOTDIR):
1393 raise
1394
1395 ## We have real work to do. If there's a current status message,
1396 ## it's the containing directory so flush it so that people know
1397 ## where we are.
1398 STATUS.commit()
1399
1400 ## Remove the target. (A hardlink will fail if the target already
1401 ## exists.)
e0361afb 1402 if not noact:
583b7e4a
MW
1403 try:
1404 OS.unlink(a.target)
1405 except OSError, err:
1406 if err.errno not in (E.ENOENT, E.ENOTDIR):
1407 raise
1408
1409 ## Do whatever it is we decided to do.
e0361afb 1410 if noact:
583b7e4a
MW
1411 STATUS.commit(filestatus(master, a))
1412 else:
1413 a.perform()
1414
1415 ## We're done. Return the names of the targets.
1416 return list(done)
1417
1418 @contextmanager
e0361afb
MW
1419 def wrap(masterfile):
1420 ## Handle exceptions found while trying to convert a particular file or
1421 ## directory.
583b7e4a
MW
1422
1423 try:
1424 yield masterfile
1425
1426 ## Something bad happened. Report the error, but continue. (This list
1427 ## of exceptions needs a lot of work.)
1428 except (IOError, OSError), exc:
1429 STATUS.clear()
1430 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
e0361afb 1431 broken.append((masterfile, exc))
583b7e4a 1432
e0361afb
MW
1433 def grobble_dir(master, targets):
1434 ## Recursively convert files in MASTER, writing them to the TARGETS.
583b7e4a 1435
e0361afb
MW
1436 ## Keep track of the subdirectories we encounter, because we'll need to
1437 ## do all of those in one go at the end.
1438 subdirs = set()
583b7e4a 1439
e0361afb
MW
1440 ## Work through each target directory in turn.
1441 for target, pmap in zip(targets, tpolmap):
583b7e4a 1442
e0361afb
MW
1443 ## Make sure the TARGET exists and is a directory. It's a fundamental
1444 ## assumption of this program that the entire TARGET tree is
1445 ## disposable, so if something exists but isn't a directory, we should
1446 ## kill it.
1447 if OS.path.isdir(target):
1448 pass
1449 else:
1450 if OS.path.exists(target):
1451 STATUS.commit(filestatus(target, 'clear nondirectory'))
1452 if not noact:
1453 OS.unlink(target)
1454 STATUS.commit(filestatus(target, 'create directory'))
1455 if not noact:
1456 OS.mkdir(target)
1457
1458 ## Keep a list of things in the target. As we convert files, we'll
1459 ## check them off. Anything left over is rubbish and needs to be
1460 ## deleted.
1461 checklist = {}
1462 try:
1463 for i in OS.listdir(target):
1464 checklist[i] = False
1465 except OSError, err:
1466 if err.errno not in (E.ENOENT, E.ENOTDIR):
1467 raise
1468
1469 ## Keep track of the files in each category.
1470 catmap = {}
1471 todo = []
1472 done = []
1473
1474 ## Work through the master files.
1475 for f in sorted(OS.listdir(master)):
1476
1477 ## If the killswitch has been pulled then stop. The whole idea is
1478 ## that we want to cause a clean shutdown if possible, so we don't
1479 ## want to do it in the middle of encoding because the encoding
1480 ## effort will have been wasted. This is the only place we need to
1481 ## check. If we've exited the loop, then clearing old files will
1482 ## probably be fast, and we'll either end up here when the recursive
1483 ## call returns or we'll be in the same boat as before, clearing old
1484 ## files, only up a level. If worst comes to worst, we'll be killed
1485 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1486 ## it left off.
1487 if KILLSWITCH.is_set():
1488 return
1489
1490 ## Do something with the file.
1491 with wrap(OS.path.join(master, f)) as masterfile:
1492
1493 ## If it's a directory then prepare to grobble it recursively, but
1494 ## don't do that yet.
1495 if OS.path.isdir(masterfile):
1496 subdirs.add(f)
1497 done.append(OS.path.join(target, f))
1498
1499 ## Otherwise it's a file. Work out what kind, and stash it under
1500 ## the appropriate categories. Later, we'll apply policy to the
1501 ## files, by category, and work out what to do with them all.
1502 else:
1503 gf = GIO.File(masterfile)
1504 mime = gf.query_info('standard::content-type').get_content_type()
1505 cats = []
1506 for cat in pmap.iterkeys():
1507 id = cat.identify(masterfile, mime)
1508 if id is None: continue
1509 catmap.setdefault(cat, []).append((masterfile, id))
1510 cats.append((cat, id))
1511 if not cats:
1512 catmap.setdefault(None, []).append((masterfile, id))
1513 todo.append((masterfile, cats))
1514
1515 ## Work through the categorized files to see what actions to do for
1516 ## them.
1517 for masterfile, cats in todo:
1518 with wrap(masterfile):
1519 done += grobble_file(masterfile, pmap, target,
1520 [(cat, id, catmap[cat]) for cat, id in cats])
1521
1522 ## Check the results off the list so that we don't clear it later.
1523 for f in done:
1524 checklist[OS.path.basename(f)] = True
1525
1526 ## Maybe there's stuff in the target which isn't accounted for. Delete
1527 ## it: either the master has changed, or the policy for this target has
1528 ## changed. Either way, the old files aren't wanted.
1529 for f in checklist:
1530 if not checklist[f]:
1531 STATUS.commit(filestatus(f, 'clear bogus file'))
1532 if not noact:
1533 bogus = OS.path.join(target, f)
1534 try:
1535 if OS.path.isdir(bogus):
1536 SH.rmtree(bogus)
1537 else:
1538 OS.unlink(bogus)
1539 except OSError, err:
1540 if err.errno != E.ENOENT:
1541 raise
1542
1543 ## If there are subdirectories which want processing then do those.
1544 ## Keep the user amused by telling him where we are in the tree.
1545 for d in sorted(subdirs):
1546 dirs.append(d)
1547 STATUS.set('/'.join(dirs))
1548 with wrap(OS.path.join(master, d)) as masterdir:
1549 try:
1550 grobble_dir(masterdir,
1551 [OS.path.join(target, d) for target in targets])
1552 finally:
1553 dirs.pop()
1554 STATUS.set('/'.join(dirs))
583b7e4a 1555
e0361afb
MW
1556 ## Right. We're ready to go.
1557 grobble_dir(master, [t.targetdir for t in targets])
1558 return broken
583b7e4a
MW
1559
1560###--------------------------------------------------------------------------
1561### Command-line interface.
1562
1563QUIS = OS.path.basename(SYS.argv[0])
1564
1565def moan(msg):
1566 "Report a warning message to the user."
1567 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1568
1569def die(msg):
1570 "Report a fatal error message to the user."
1571 moan(msg)
1572 SYS.exit(1)
1573
1574def parse_opts(args):
1575 """
1576 Parse command-line arguments in ARGS.
1577
1578 Returns a Grobbler object and the MASTER and TARGET directories to be
1579 grobbled.
1580 """
1581
1582 ## Build the option parser object.
1583 op = OP.OptionParser(prog = QUIS, version = VERSION,
5379ab85
MW
1584 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1585 'CONFIG',
583b7e4a
MW
1586 description = """\
1587Convert a directory tree of files according to the configuration file
1588CONFIG.
1589""")
1590
1591 ## Timeout handling.
1592 def cb_time(opt, ostr, arg, op):
1593 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1594 if not m:
1595 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1596 t, u = m.groups()
1597 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1598 setattr(op.values, opt.dest, t)
1599 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1600 dest = 'timeout',
1601 help = 'stop processing nicely after SECS',
1602 action = 'callback', callback = cb_time)
1603 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1604 dest = 'timeout_nasty',
1605 help = 'stop processing unpleasantly after further SECS',
1606 action = 'callback', callback = cb_time)
1607
1608 ## Other options.
1609 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1610 help = 'provide progress information')
1611 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1612 help = 'don\'t actually modify the filesystem')
1613
1614 ## Ready to rock.
1615 op.set_defaults(formats = [], noact = False,
1616 timeout = None, timeout_nasty = 300)
1617 opts, args = op.parse_args(args)
1618
1619 ## Check that we got the non-option arguments that we want.
1620 if len(args) != 1:
1621 op.error('wrong number of arguments')
1622
1623 ## Act on the options.
1624 if opts.tty:
1625 STATUS.eyecandyp = True
1626 if opts.timeout is not None:
1627 to = TH.Thread(target = timeout,
1628 args = (opts.timeout, opts.timeout_nasty))
1629 to.daemon = True
1630 to.start()
1631
1632 ## Parse the configuration file.
1633 with open(args[0]) as conf:
1634 jobs, = Config.parseFile(conf, True)
1635 for j in jobs:
1636 j.perform()
1637
1638 return opts
1639
1640if __name__ == '__main__':
1641 opts = parse_opts(SYS.argv[1:])
1642 if 'master' not in VARS:
1643 die("no master directory set")
e0361afb 1644 broken = grobble(VARS['master'], TARGETS, opts.noact)
583b7e4a
MW
1645 if broken:
1646 moan('failed to convert some files:')
1647 for file, exc in broken:
1648 moan('%s: %s' % (file, exc))
1649 SYS.exit(1)
1650
1651 ## This is basically a successful completion: we did what we were asked to
1652 ## do. It seems polite to report a message, though.
1653 ##
1654 ## Why don't we have a nonzero exit status? The idea would be that a
1655 ## calling script would be interested that we used up all of our time, and
1656 ## not attempt to convert some other directory as well. But that doesn't
1657 ## quite work. Such a script would need to account correctly for time we
1658 ## had spent even if we complete successfully. And if the script is having
1659 ## to watch the clock itself, it can do that without our help here.
1660 if KILLSWITCH.is_set():
1661 moan('killed by timeout')
1662
1663###----- That's all, folks --------------------------------------------------