gremlin/gremlin.in: Fix things for new GI-based GStreamer etc. bindings.
[autoys] / gremlin / gremlin.in
CommitLineData
d4a7d7b5 1#! @PYTHON@
583b7e4a
MW
2###
3### Convert a directory tree of audio files
4###
5### (c) 2010 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
9e3a516f
MW
10### This file is part of the `autoys' audio tools collection.
11###
12### `autoys' is free software; you can redistribute it and/or modify
583b7e4a
MW
13### it under the terms of the GNU General Public License as published by
14### the Free Software Foundation; either version 2 of the License, or
15### (at your option) any later version.
16###
9e3a516f 17### `autoys' is distributed in the hope that it will be useful,
583b7e4a
MW
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20### GNU General Public License for more details.
21###
22### You should have received a copy of the GNU General Public License
9e3a516f 23### along with `autoys'; if not, write to the Free Software Foundation,
583b7e4a
MW
24### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26###--------------------------------------------------------------------------
27### External dependencies.
28
29## Language features.
30from __future__ import with_statement
31
32## Standard Python libraries.
33import sys as SYS
34import os as OS
35import errno as E
36import time as T
37import unicodedata as UD
38import fnmatch as FN
39import re as RX
40import shutil as SH
41import optparse as OP
42import threading as TH
43import shlex as L
00beb9e5 44from math import sqrt, ceil
583b7e4a
MW
45from contextlib import contextmanager
46
47## eyeD3 tag fettling.
608b936e 48import eyed3 as E3
583b7e4a 49
3bf73acf
MW
50## Gstreamer.
51import gi
52gi.require_version('GLib', '2.0'); from gi.repository import GLib as G
53gi.require_version('Gio', '2.0'); from gi.repository import Gio as GIO
54gi.require_version('Gst', '1.0'); from gi.repository import Gst as GS
55GS.init([])
583b7e4a
MW
56
57## Python Imaging.
58from PIL import Image as I
59
60## Python parsing.
61import pyparsing as P
62
63###--------------------------------------------------------------------------
64### Special initialization.
65
d4a7d7b5 66VERSION = '@VERSION@'
583b7e4a
MW
67
68## GLib.
69G.threads_init()
70
71###--------------------------------------------------------------------------
72### Eyecandy progress reports.
73
74def charwidth(s):
75 """
76 Return the width of S, in characters.
77
78 Specifically, this is the number of backspace characters required to
79 overprint the string S. If the current encoding for `stdout' appears to be
80 Unicode then do a complicated Unicode thing; otherwise assume that
81 characters take up one cell each.
82
83 None of this handles tab characters in any kind of useful way. Sorry.
84 """
85
86 ## If there's no encoding for stdout then we're doing something stupid.
87 if SYS.stdout.encoding is None: return len(s)
88
89 ## Turn the string into Unicode so we can hack on it properly. Maybe that
90 ## won't work out, in which case fall back to being stupid.
91 try: u = s.decode(SYS.stdout.encoding)
92 except UnicodeError: return len(s)
93
94 ## Our main problem is combining characters, but we should also try to
95 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
96 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
97 w = 0
98 for ch in u:
99 cd = ord(ch)
100 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
101 0x1160 <= cd <= 0x11ff: pass
102 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
103 else: w += 1
104
105 ## Done.
583b7e4a
MW
106 return w
107
108class StatusLine (object):
109 """
110 Maintains a status line containing ephemeral progress information.
111
112 The status line isn't especially important, but it keeps interactive users
113 amused.
114
115 There should be only one status line object in your program; otherwise
116 they'll interfere with each other and get confused.
117
118 The update algorithm (in `set') is fairly careful to do the right thing
119 with long status `lines', and to work properly in an Emacs `shell' buffer.
120 """
121
122 def __init__(me):
123 "Initialize the status line."
124 me._last = ''
125 me._lastlen = 0
126 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
127
128 def set(me, line):
129 """
130 Set the status line contents to LINE, replacing what was there before.
131
132 This only produces actual output if stdout is interactive.
133 """
134 n = len(line)
135
136 ## Eyecandy update.
137 if me.eyecandyp:
583b7e4a
MW
138
139 ## If the old line was longer, we need to clobber its tail, so work out
140 ## what that involves.
141 if n < me._lastlen:
142 b = charwidth(me._last[n:])
143 pre = '\b'*b + ' '*b
144 else:
145 pre = ''
146
147 ## Now figure out the length of the common prefix between what we had
148 ## before and what we have now. This reduces the amount of I/O done,
149 ## which keeps network traffic down on SSH links, and keeps down the
150 ## amount of work slow terminal emulators like Emacs have to do.
151 i = 0
152 m = min(n, me._lastlen)
153 while i < m and line[i] == me._last[i]:
154 i += 1
155
156 ## Actually do the output, all in one syscall.
157 b = charwidth(me._last[i:])
158 SYS.stdout.write(pre + '\b'*b + line[i:])
583b7e4a
MW
159 SYS.stdout.flush()
160
161 ## Update our idea of what's gone on.
162 me._lastlen = n
163 me._last = line
164
165 def clear(me):
166 "Clear the status line. Just like set('')."
167 me.set('')
168
169 def commit(me, line = None):
170 """
171 Commit the current status line, and maybe the string LINE.
172
173 If the current status line is nonempty, then commit it to the transcript.
174 If LINE is not None, then commit that to the transcript too.
175
176 After all of this, we clear the status line to get back to a clean state.
177 """
178 if me._last:
179 if me.eyecandyp:
180 SYS.stdout.write('\n')
181 else:
182 SYS.stdout.write(me._last + '\n')
183 if line is not None:
184 SYS.stdout.write(line + '\n')
185 me._lastlen = 0
186 me._last = ''
187
188STATUS = StatusLine()
189
190def filestatus(file, status):
191 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
192
193class ProgressEyecandy (object):
194 """
195 Provide amusement while something big and complicated is happening.
196
197 This is an abstract class. Subclasses must provide a method `progress'
198 returning a pair (CURRENT, MAX) indicating the current progress through the
199 operation.
200 """
201
202 def __init__(me, what, silentp = False):
203 """
204 Initialize a progress meter.
205
206 WHAT is a prefix string to be written before the progress eyecandy
207 itself.
208 """
209 me._what = what
210 me._silentp = silentp
211 me._spinner = 0
212 me._start = T.time()
213
214 def _fmt_time(me, t):
215 "Format T as a time, in (maybe hours) minutes and seconds."
216 s, t = t % 60, int(t/60)
217 m, h = t % 60, int(t/60)
218 if h > 0:
219 return '%d:%02d:%02d' % (h, m, s)
220 else:
221 return '%02d:%02d' % (m, s)
222
223 def show(me):
224 "Show the current level of progress."
225
226 ## If we're not showing pointless frippery, don't bother at all.
227 if not STATUS.eyecandyp:
228 return
229
230 ## Update the spinner index.
231 me._spinner = (me._spinner + 1)%4
232
233 ## Fetch the current progress information. Note that we always fetch
234 ## both the current and maximum levels, because both might change if an
235 ## operation revises its idea of how much work needs doing.
236 cur, max = me.progress()
237
238 ## If we couldn't get progress information, display something vaguely
239 ## amusing anyway.
240 if cur is None or max is None:
241 STATUS.set('%s %c [unknown progress]' %
242 (me._what, r'/-\|'[me._spinner]))
243 return
244
245 ## Work out -- well, guess -- the time remaining.
246 if cur:
247 t = T.time()
00beb9e5 248 eta = me._fmt_time(ceil((t - me._start)*(max - cur)/cur))
583b7e4a
MW
249 else:
250 eta = '???'
251
252 ## Set the status bar.
253 n = 40*cur/max
254 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
255 (me._what,
256 r'/-\|'[me._spinner],
257 '='*n, ' '*(40 - n),
258 100*cur/max,
259 eta))
260
261 def done(me, win = True):
262 "Show a completion notice, or a failure if WIN is false."
263 if not win:
264 STATUS.set('%s FAILED!' % me._what)
265 elif not me._silentp:
266 STATUS.set('%s done (%s)' %
267 (me._what,
268 me._fmt_time(T.time() - me._start)))
269 else:
270 return
271 STATUS.commit()
272
273###--------------------------------------------------------------------------
274### Timeout handling.
275
276KILLSWITCH = TH.Event()
277
278def timeout(t0, t1):
279 T.sleep(t0)
280 KILLSWITCH.set()
281 T.sleep(t1)
282 moan('dying messily due to timeout')
283 OS._exit(3)
284
285###--------------------------------------------------------------------------
286### Parsing utilities.
287
288## Allow hyphens in identifiers.
289IDCHARS = P.alphanums + '-_'
290P.Keyword.setDefaultKeywordChars(IDCHARS)
291
292## Some common kinds of tokens.
293Name = P.Word(IDCHARS)
294Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
295String = P.QuotedString('"', '\\')
296
297## Handy abbreviations for constructed parser elements.
298def K(k): return P.Keyword(k).suppress()
299def D(d): return P.Literal(d).suppress()
583b7e4a
MW
300def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
301O = P.Optional
302
303###--------------------------------------------------------------------------
304### Format identification and conversion.
305
306class IdentificationFailure (Exception):
307 pass
308
309class FileCategory (object):
310 """
311 A FileCategory represents a class of files.
312
313 For example, it's sensible to consider audio, or image files as a
314 category. A file category knows how to recognize member files from
315 MIME content types.
316 """
317
318 def __init__(me, name, mime_pats, ident):
319 """
320 Construct a new category.
321
322 The PATS are a list of `fnmatch' patterns to be compared with a MIME
323 type. The IDENT is a function which produces an identification object
324 given a file's name and first-guess MIME type. The object is passed to a
325 Format's `check' method to see whether a file needs re-encoding, and to
326 `convert' to assist with the conversion.
327
328 An identification object must have an attribute `mime' which is a set of
329 possible MIME types accumulated for the object.
330 """
331 me.name = name
332 me._mime_pats = mime_pats
333 me._ident = ident
334 CATEGORYMAP[name] = me
335
336 def identify(me, file, mime):
337 """
338 Attempt to identify FILE, given its apparent MIME type.
339
340 If identification succeeds, return an identification object which can be
341 used by associated file formats; otherwise return None.
342 """
343 for p in me._mime_pats:
344 if not FN.fnmatchcase(mime, p):
345 continue
346 try:
347 return me._ident(file, mime)
348 except IdentificationFailure:
349 pass
350 return None
351
352class BaseFormat (object):
353 """
354 A BaseFormat object represents a particular encoding and parameters.
355
356 The object can verify (the `check' method) whether a particular file
357 matches its requirements, and if necessary (`encode') re-encode a file.
358
359 Subclasses should define the following methods.
360
361 check(ID)
362 Answer whether the file identified by ID is acceptable according to
363 the receiver's parameters.
364
365 convert(MASTER, ID, TARGET)
366 Convert the file MASTER, which has been identified as ID, according
367 to the receiver's parameters, writing the output to TARGET.
368
369 Subclasses should also provide these attributes.
370
371 CATEGORY
372 A FileCategory object for the category of files that this format
373 lives within.
374
375 EXT A file extension to be applied to encoded output files.
376
377 NAME A user-facing name for the format.
378
379 PROPS A parser element to parse a property definition. It should produce
380 a pair NAME, VALUE to be stored in a dictionary.
381
382 Subclasses for different kinds of file may introduce more subclass
383 protocol.
384 """
385
386 def fixup(me, path):
387 """Post-encoding fixups."""
388 pass
389
390FORMATMAP = {}
391CATEGORYMAP = {}
392
393def defformat(name, cls):
394 "Define a format NAME using class CLS."
395 if not hasattr(cls, 'NAME'):
396 raise ValueError, 'abstract class'
397 if not hasattr(cls, 'CATEGORY'):
398 raise ValueError, 'no category'
399 FORMATMAP[name] = cls
400
401class FormatParser (P.ParserElement):
402 """
403 Parse a format specifier:
404
405 format-spec ::= string [format-properties]
406 format-properties ::= `{' format-property (`,' format-property)* `}'
407
408 The syntax of a format-property is determined by the PROPS attribute on the
409 named format and its superclasses.
410 """
411
483b52d0
MW
412 name = 'format-spec'
413
583b7e4a
MW
414 ## We cache the parser elements we generate to avoid enormous consing.
415 CACHE = {}
416
417 def parseImpl(me, s, loc, actp = True):
418
419 ## Firstly, determine the format name.
420 loc, r = Name._parse(s, loc, actp)
421 fmt = r[0]
422
423 ## Look up the format class.
424 try: fcls = FORMATMAP[fmt]
425 except KeyError:
426 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
427
428 ## Fetch the property-list parser from the cache, if possible; else
429 ## construct it.
430 try:
431 pp = me.CACHE[fmt]
432 except KeyError:
433 seen = set()
434 prop = None
435 for c in fcls.mro():
436 try: p = c.PROPS
437 except AttributeError: continue
438 if p in seen: continue
439 if prop is None: prop = p
440 else: prop |= p
441 seen.add(p)
442 if prop is None:
443 pp = me.CACHE[fmt] = None
444 else:
445 props = P.delimitedList(prop)
446 props.setParseAction(lambda s, l, t: dict(t.asList()))
447 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
448
449 ## Parse the properties.
450 if pp is None:
451 pd = {}
452 else:
453 loc, r = pp._parse(s, loc, actp)
454 if r: pd = r[0]
455 else: pd = {}
456
457 ## Construct the format object and return it.
458 return loc, fcls(**pd)
459
460Format = FormatParser()
461
462def prop(kw, pval, tag = None):
463 if tag is None: tag = kw
464 if pval is None:
465 p = K(kw)
466 p.setParseAction(lambda s, l, t: (tag, True))
467 else:
468 p = K(kw) + D('=') + pval
469 p.setParseAction(lambda s, l, t: (tag, t[0]))
470 return p
471
472###--------------------------------------------------------------------------
473### Policies and actions.
474
475class Action (object):
476 """
477 An Action object represents a conversion action to be performed.
478
479 This class isn't intended to be instantiated directly. It exists to define
480 some protocol common to all Action objects.
481
482 Action objects have the following attributes.
483
484 master The name of the master (source) file.
485
486 target The name of the target (destination) file.
487
488 PRIORITY The priority of the action, for deciding which of two actions
489 to perform. Higher priorities are more likely to win.
490
491 Converting an Action to a string describes the action in a simple
492 user-readable manner. The `perform' method actually carries the action
493 out.
494 """
495
496 PRIORITY = 0
497
498 def __init__(me, master):
499 "Stash the MASTER file name for later."
500 me.master = master
501
502 def choose(me, him):
503 "Choose either ME or HIM and return one."
504 if him is None or me.PRIORITY > him.PRIORITY:
505 return me
506 else:
507 return him
508
509class CopyAction (Action):
510 """
511 An Action object for simply copying a file.
512
513 Actually we try to hardlink it first, falling back to a copy later. This
514 is both faster and more efficient with regard to disk space.
515 """
516
517 ## Copying is good. Linking is really good, but we can't tell the
518 ## difference at this stage.
519 PRIORITY = 10
520
521 def __init__(me, master, targetdir):
522 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
523 Action.__init__(me, master)
524 me.target = OS.path.join(targetdir, OS.path.basename(master))
525
526 def __str__(me):
527 return 'copy/link'
528
529 def perform(me):
530 "Actually perform a CopyAction."
531 try:
532 STATUS.set(filestatus(me.master, 'link'))
533 OS.link(me.master, me.target)
534 except OSError, err:
535 if err.errno != E.EXDEV:
536 raise
537 STATUS.set(filestatus(me.master, 'copy'))
538 new = me.target + '.new'
539 SH.copyfile(me.master, new)
540 OS.rename(new, me.target)
541 STATUS.commit()
542
543class ConvertAction (Action):
544 """
545 An Action object for converting a file to a given format.
546
547 Additional attributes:
548
549 id The identification object for the master file.
550
551 format The format to which we're meant to conver the master.
552 """
553
554 def __init__(me, master, targetdir, id, format):
555 "Initialize a ConvertAction."
556 Action.__init__(me, master)
557 stem, ext = OS.path.splitext(OS.path.basename(master))
558 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
559 me.id = id
560 me.format = format
561
562 def __str__(me):
563 return 'convert to %s' % me.format.NAME
564
565 def perform(me):
566 "Acually perform a ConvertAction."
567 STATUS.set(filestatus(me.master, me))
568 me.format.convert(me.master, me.id, me.target)
569
570Policy = P.Forward()
571
572class FormatPolicy (object):
573 """
574 A FormatPolicy object represents a set of rules for how to convert files.
575
576 Given a master file, the FormatPolicy will identify it and return a list of
577 actions to be performed. The methods required of a FormatPolicy are:
578
579 setcategory(CAT)
580 Store CAT as the policy's category. Check that this is consistent
581 with the policy as stored.
582
583 actions(MASTER, TARGETDIR, ID, COHORT)
584 Given a MASTER file, identified as ID, a target directory
585 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
586 of the same category in the same directory, return a list of
587 actions to be performed to get the target directory into the right
588 form. The list might be empty if the policy object /rejects/ the
589 file.
590 """
591
592class AndPolicy (FormatPolicy):
593 """
594 A FormatPolicy which does the union of a bunch of other policies.
595
596 Each subsidiary policy is invoked in turn. The highest-priority action for
597 each target file is returned.
598 """
599
600 def __init__(me, policies):
601 me._policies = policies
602
603 def setcategory(me, cat):
604 me.cat = cat
605 for p in me._policies:
606 p.setcategory(cat)
607
608 def actions(me, master, targetdir, id, cohort):
609 tmap = {}
610 for p in me._policies:
611 for a in p.actions(master, targetdir, id, cohort):
612 if a.target in tmap:
613 tmap[a.target] = a.choose(tmap.get(a.target))
614 else:
615 tmap[a.target] = a
616 return tmap.values()
617
618And = K('and') - D('{') - R(Policy) - D('}')
619And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
620
621class OrPolicy (FormatPolicy):
622 """
623 A FormatPolicy which tries other policies and uses the first that accepts.
624
625 Each subsidiary policy is invoked in turn. If any accepts, the actions it
626 proposes are turned and no further policies are invoked. If none accepts
627 then the file is rejected.
628 """
629
630 def __init__(me, policies):
631 me._policies = policies
632
633 def setcategory(me, cat):
634 me.cat = cat
635 for p in me._policies:
636 p.setcategory(cat)
637
638 def actions(me, master, targetdir, id, cohort):
639 for p in me._policies:
640 aa = p.actions(master, targetdir, id, cohort)
641 if aa:
642 return aa
643 else:
644 return []
645
646Or = K('or') - D('{') - R(Policy) - D('}')
647Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
648
649class AcceptPolicy (FormatPolicy):
650 """
651 A FormatPolicy which copies files in a particular format.
652
653 If all of the files in a cohort are recognized as being in a particular
654 format (including this one), then accept it with a CopyAction; otherwise
655 reject.
656 """
657
658 def __init__(me, format):
659 me._format = format
660
661 def setcategory(me, cat):
662 if me._format.CATEGORY is not cat:
663 raise ValueError, \
664 "Accept format `%s' has category `%s', not `%s'" % \
665 (me._format.__class__.__name__,
666 me._format.CATEGORY.name, cat.name)
667 me.cat = cat
668
669 def actions(me, master, targetdir, id, cohort):
670 if me._format.check(id) and \
671 all(me._format.check(cid) for f, cid in cohort):
672 return [CopyAction(master, targetdir)]
673 else:
674 return []
675
676Accept = K('accept') - Format
677Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
678
679class ConvertPolicy (FormatPolicy):
680 """
681 A FormatPolicy which copies files in a particular format or converts if
682 necessary.
683 """
684 def __init__(me, format):
685 me._format = format
686
687 def setcategory(me, cat):
688 if me._format.CATEGORY is not cat:
689 raise ValueError, \
690 "Accept format `%s' has category `%s', not `%s'" % \
691 (me._format.__class__.__name__,
692 me._format.CATEGORY.name, cat.name)
693 me.cat = cat
694
695 def actions(me, master, targetdir, id, cohort):
696 if me._format.check(id):
697 return [CopyAction(master, targetdir)]
698 else:
699 return [ConvertAction(master, targetdir, id, me._format)]
700
701Convert = K('convert') - Format
702Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
703
704Policy << (And | Or | Accept | Convert)
705
706###--------------------------------------------------------------------------
707### Audio handling, based on GStreamer.
708
709def make_element(factory, name = None, **props):
710 "Return a new element from the FACTORY with the given NAME and PROPS."
3bf73acf
MW
711 elt = GS.ElementFactory.make(factory, name)
712 if elt is None: raise ValueError, 'failed to make `%s\' element' % factory
583b7e4a
MW
713 elt.set_properties(**props)
714 return elt
715
0dbdd41c
MW
716def link_elements(elts):
717 "Link the elements ELTS together, in order."
3bf73acf
MW
718 e0 = None
719 for e1 in elts:
720 if e0 is not None: e0.link(e1)
721 e0 = e1
0dbdd41c 722
2101727d
MW
723def bin_children(bin):
724 "Iterate over the (direct) children of a BIN."
3bf73acf
MW
725 iter = bin.iterate_elements()
726 while True:
727 rc, elt = iter.next()
728 if rc == GS.IteratorResult.DONE: break
729 elif rc != GS.IteratorResult.OK:
730 raise ValueError, 'iteration failed (%s)' % rc
731 else: yield elt
2101727d 732
583b7e4a
MW
733class GStreamerProgressEyecandy (ProgressEyecandy):
734 """
735 Provide amusement while GStreamer is busy doing something.
736
737 The GStreamerProgressEyecandy object is a context manager. Wrap it round
738 your GStreamer loop to provide progress information for an operation.
739 """
740
741 def __init__(me, what, elt, **kw):
742 """
743 Initialize a progress meter.
744
745 WHAT is a prefix string to be written before the progress eyecandy
746 itself. ELT is a GStreamer element to interrogate to find the progress
747 information.
748 """
749 me._elt = elt
750 ProgressEyecandy.__init__(me, what, **kw)
751
752 def _update(me):
753 "Called by GLib main event loop to update the eyecandy."
754 me.show()
755 return True
756
757 def _timer(me):
758 """
759 Update the progress meter.
760
761 This is called periodically by the GLib main event-processing loop.
762 """
763 me.show()
764 return True
765
766 def progress(me):
767 "Return the current progress as a pair (CURRENT, MAX)."
768
769 ## Fetch the current progress information. We get the duration each
770 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
771 ## duration can change as we progress. Hopefully it settles down fairly
772 ## soon.
3bf73acf
MW
773 ok, t = me._elt.query_position(GS.Format.TIME)
774 if ok: ok, end = me._elt.query_duration(GS.Format.TIME)
775 if ok: return t, end
776 else: return None, None
583b7e4a
MW
777
778 def __enter__(me):
779 "Enter context: attach progress meter display."
780
781 ## If we're not showing pointless frippery, don't bother at all.
782 if not STATUS.eyecandyp:
783 return
784
785 ## Update regularly. The pipeline runs asynchronously.
786 me._id = G.timeout_add(200, me._update)
787
788 def __exit__(me, ty, val, tb):
789 "Leave context: remove display and report completion or failure."
790
791 ## If we're not showing pointless frippery, there's nothing to remove.
792 if STATUS.eyecandyp:
793 G.source_remove(me._id)
794
795 ## Report completion anyway.
796 me.done(ty is None)
797
798 ## As you were.
799 return False
800
801class AudioIdentifier (object):
802 """
803 Analyses and identifies an audio file.
804
805 Important properties are:
806
807 cap A capabilities structure describing the audio file data. The most
808 interesting thing in here is probably its name, which is a MIME
809 type describing the data.
810
811 dcap A capabilities structure describing the decoded audio data. This
812 is of interest during conversion.
813
814 tags A dictionary containing metadata tags from the file. These are in
815 GStreamer's encoding-independent format.
816
817 bitrate An approximation to the stream's bitrate, in kilobits per second.
818 This might be slow to work out for some files so it's computed on
819 demand.
820 """
821
822 def __init__(me, file, mime):
823 "Initialize the object suitably for identifying FILE."
824
825 ## Make some initial GStreamer objects. We'll want the pipeline later if
826 ## we need to analyse a poorly tagged MP3 stream, so save it away.
827 me._pipe = GS.Pipeline()
828 me._file = file
829 bus = me._pipe.get_bus()
583b7e4a
MW
830 loop = G.MainLoop()
831
832 ## The basic recognition kit is based around `decodebin'. We must keep
833 ## it happy by giving it sinks for the streams it's found, which it
834 ## announces asynchronously.
835 source = make_element('filesrc', 'file', location = file)
836 decoder = make_element('decodebin', 'decode')
837 sink = make_element('fakesink')
838 def decoder_pad_arrived(elt, pad):
3bf73acf 839 if pad.get_current_caps()[0].get_name().startswith('audio/'):
583b7e4a
MW
840 elt.link_pads(pad.get_name(), sink, 'sink')
841 dpaid = decoder.connect('pad-added', decoder_pad_arrived)
0bf1fa38 842 for i in [source, decoder, sink]: me._pipe.add(i)
0dbdd41c 843 link_elements([source, decoder])
583b7e4a
MW
844
845 ## Arrange to collect tags from the pipeline's bus as they're reported.
583b7e4a
MW
846 tags = {}
847 fail = []
848 def bus_message(bus, msg):
3bf73acf
MW
849 ty, s = msg.type, msg.get_structure()
850 if ty == GS.MessageType.ERROR:
851 fail[:] = (ValueError, s['debug'], None)
583b7e4a 852 loop.quit()
3bf73acf
MW
853 elif ty == GS.MessageType.STATE_CHANGED:
854 if s['new-state'] == GS.State.PAUSED and \
583b7e4a
MW
855 msg.src == me._pipe:
856 loop.quit()
3bf73acf
MW
857 elif ty == GS.MessageType.TAG:
858 tt = s['taglist']
859 for i in xrange(tt.n_tags()):
860 t = tt.nth_tag_name(i)
861 if tt.get_tag_size(t) != 1: continue
862 v = tt.get_value_index(t, 0)
863 tags[t] = v
583b7e4a
MW
864 bmid = bus.connect('message', bus_message)
865
866 ## We want to identify the kind of stream this is. (Hmm. The MIME type
867 ## recognizer has already done this work, but GStreamer is probably more
868 ## reliable.) The `decodebin' has a `typefind' element inside which will
869 ## announce the identified media type. All we need to do is find it and
870 ## attach a signal handler. (Note that the handler might be run in the
871 ## thread context of the pipeline element, but Python's GIL will keep
872 ## things from being too awful.)
873 me.cap = None
874 me.dcap = None
2101727d 875 for e in bin_children(decoder):
583b7e4a
MW
876 if e.get_factory().get_name() == 'typefind':
877 tfelt = e
878 break
879 else:
880 assert False, 'failed to find typefind element'
881
882 ## Crank up most of the heavy machinery. The message handler will stop
883 ## the loop when things seem to be sufficiently well underway.
3bf73acf
MW
884 bus.add_signal_watch()
885 me._pipe.set_state(GS.State.PAUSED)
583b7e4a
MW
886 loop.run()
887 bus.disconnect(bmid)
888 decoder.disconnect(dpaid)
3bf73acf 889 bus.remove_signal_watch()
583b7e4a 890 if fail:
3bf73acf 891 me._pipe.set_state(GS.State.NULL)
583b7e4a
MW
892 raise fail[0], fail[1], fail[2]
893
894 ## Store the collected tags.
895 me.tags = tags
896
897 ## Gather the capabilities. The `typefind' element knows the input data
898 ## type. The 'decodebin' knows the raw data type.
3bf73acf 899 me.cap = tfelt.get_static_pad('src').get_allowed_caps()[0]
583b7e4a 900 me.mime = set([mime, me.cap.get_name()])
3bf73acf 901 me.dcap = sink.get_static_pad('sink').get_allowed_caps()[0]
583b7e4a
MW
902
903 ## If we found a plausible bitrate then stash it. Otherwise note that we
904 ## failed. If anybody asks then we'll work it out then.
905 if 'nominal-bitrate' in tags:
906 me._bitrate = tags['nominal-bitrate']/1000
907 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
908 me._bitrate = tags['bitrate']/1000
909 else:
910 me._bitrate = None
911
912 ## The bitrate computation wants the file size. Ideally we'd want the
913 ## total size of the frames' contents, but that seems hard to dredge
914 ## out. If the framing overhead is small, this should be close enough
915 ## for our purposes.
916 me._bytes = OS.stat(file).st_size
917
918 def __del__(me):
919 "Close the pipeline down so we don't leak file descriptors."
3bf73acf 920 me._pipe.set_state(GS.State.NULL)
583b7e4a
MW
921
922 @property
923 def bitrate(me):
924 """
925 Return the approximate bit-rate of the input file.
926
927 This might take a while if we have to work it out the hard way.
928 """
929
930 ## If we already know the answer then just return it.
931 if me._bitrate is not None:
932 return me._bitrate
933
934 ## Make up a new main loop.
935 loop = G.MainLoop()
936
937 ## Watch for bus messages. We'll stop when we reach the end of the
938 ## stream: then we'll have a clear idea of how long the track was.
939 fail = []
940 def bus_message(bus, msg):
3bf73acf
MW
941 ty, s = msg.type, msg.get_structure()
942 if ty == GS.MessageType.ERROR:
943 fail[:] = (ValueError, s['debug'], None)
583b7e4a 944 loop.quit()
3bf73acf 945 elif ty == GS.MessageType.EOS:
583b7e4a
MW
946 loop.quit()
947 bus = me._pipe.get_bus()
948 bmid = bus.connect('message', bus_message)
949
950 ## Get everything moving, and keep the user amused while we work.
3bf73acf
MW
951 bus.add_signal_watch()
952 me._pipe.set_state(GS.State.PLAYING)
583b7e4a
MW
953 with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
954 me._pipe,
955 silentp = True):
956 loop.run()
3bf73acf 957 bus.remove_signal_watch()
583b7e4a
MW
958 bus.disconnect(bmid)
959 if fail:
3bf73acf 960 me._pipe.set_state(GS.State.NULL)
583b7e4a
MW
961 raise fail[0], fail[1], fail[2]
962
963 ## Now we should be able to find out our position accurately and work out
964 ## a bitrate. Cache it in case anybody asks again.
3bf73acf
MW
965 ok, t = pipe.query_position(GS.Format.TIME)
966 assert ok, 'failed to discover bitrate'
583b7e4a
MW
967 me._bitrate = int(8*me._bytes*1e6/t)
968
969 ## Done.
970 return me._bitrate
971
972class AudioFormat (BaseFormat):
973 """
974 An AudioFormat is a kind of Format specialized for audio files.
975
976 Format checks are done on an AudioIdentifier object.
977 """
978
979 PROPS = prop('bitrate', Num)
980
981 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
982 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
983 ## depending on how thorough it's trying to be. Still, it doesn't do any
984 ## harm here; the main risk is picking up Ogg Theora files by accident, and
985 ## we'll probably be able to extract the audio from them anyway.
986 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
987 AudioIdentifier)
988
989 def __init__(me, bitrate = None):
990 "Construct an object, requiring an approximate bitrate."
991 me.bitrate = bitrate
992
993 def check(me, id):
994 """
995 Return whether the AudioIdentifier ID is suitable for our purposes.
996
997 Subclasses can either override this method or provide a property
998 `MIMETYPES', which is a list (other thing that implements `__contains__')
999 of GStreamer MIME types matching this format.
1000 """
1001 return id.mime & me.MIMETYPES and \
1002 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
1003
1004 def encoder(me):
1005 """
1006 Constructs a GStreamer element to encode audio input.
1007
1008 Subclasses can either override this method (or replace `encode'
1009 entirely), or provide a method `encoder_chain' which returns a list of
1010 elements to be linked together in sequence. The first element in the
1011 chain must have a pad named `sink' and the last must have a pad named
1012 `src'.
1013 """
1014 elts = me.encoder_chain()
1015 bin = GS.Bin()
0bf1fa38 1016 for i in elts: bin.add(i)
0dbdd41c 1017 link_elements(elts)
3bf73acf
MW
1018 bin.add_pad(GS.GhostPad('sink', elts[0].get_static_pad('sink')))
1019 bin.add_pad(GS.GhostPad('src', elts[-1].get_static_pad('src')))
583b7e4a
MW
1020 return bin
1021
1022 def convert(me, master, id, target):
1023 """
1024 Encode audio from MASTER, already identified as ID, writing it to TARGET.
1025
1026 See `encoder' for subclasses' responsibilities.
1027 """
1028
1029 ## Construct the necessary equipment.
1030 pipe = GS.Pipeline()
1031 bus = pipe.get_bus()
583b7e4a
MW
1032 loop = G.MainLoop()
1033
1034 ## Make sure that there isn't anything in the way of our output. We're
1035 ## going to write to a scratch file so that we don't get confused by
1036 ## half-written rubbish left by a crashed program.
1037 new = target + '.new'
1038 try:
1039 OS.unlink(new)
1040 except OSError, err:
1041 if err.errno != E.ENOENT:
1042 raise
1043
1044 ## Piece together our pipeline. The annoying part is that the
1045 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1046 ## halves for now.
1047 source = make_element('filesrc', 'source', location = master)
1048 decoder = make_element('decodebin', 'decode')
1049 convert = make_element('audioconvert', 'convert')
1050 encoder = me.encoder()
1051 sink = make_element('filesink', 'sink', location = new)
0bf1fa38 1052 for i in [source, decoder, convert, encoder, sink]: pipe.add(i)
0dbdd41c
MW
1053 link_elements([source, decoder])
1054 link_elements([convert, encoder, sink])
583b7e4a
MW
1055
1056 ## Some decoders (e.g., the AC3 decoder) include channel-position
1057 ## indicators in their output caps. The Vorbis encoder interferes with
1058 ## this, and you end up with a beautifully encoded mono signal from a
1059 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1060 ## /think/ that this is only a problem with stereo signals: mono signals
1061 ## are mono already, and `vorbisenc' accepts channel positions if there
1062 ## are more than two channels.
1063 ##
1064 ## So we have this bodge. We already collected the decoded audio caps
1065 ## during identification. So if we see 2-channel audio with channel
1066 ## positions, we strip the positions off forcibly by adding a filter.
1067 if id.dcap.get_name().startswith('audio/x-raw-') and \
1068 id.dcap.has_field('channels') and \
1069 id.dcap['channels'] == 2 and \
1070 id.dcap.has_field('channel-positions'):
1071 dcap = GS.Caps()
1072 c = id.dcap.copy()
1073 c.remove_field('channel-positions')
1074 dcap.append(c)
1075 else:
1076 dcap = None
1077
1078 ## Hook onto the `decodebin' so we can link together the two halves of
1079 ## our encoding chain. For now, we'll hope that there's only one audio
1080 ## stream in there, and just throw everything else away.
1081 def decoder_pad_arrived(elt, pad):
3bf73acf 1082 if pad.get_current_caps()[0].get_name().startswith('audio/'):
583b7e4a
MW
1083 if dcap:
1084 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1085 else:
1086 elt.link_pads(pad.get_name(), convert, 'sink')
1087 decoder.connect('pad-added', decoder_pad_arrived)
1088
1089 ## Watch the bus for completion messages.
1090 fail = []
1091 def bus_message(bus, msg):
3bf73acf
MW
1092 if msg.type == GS.MessageType.ERROR:
1093 fail[:] = (ValueError, msg.get_structure()['debug'], None)
583b7e4a 1094 loop.quit()
3bf73acf 1095 elif msg.type == GS.MessageType.EOS:
583b7e4a
MW
1096 loop.quit()
1097 bmid = bus.connect('message', bus_message)
1098
1099 ## Get everything ready and let it go.
3bf73acf
MW
1100 bus.add_signal_watch()
1101 pipe.set_state(GS.State.PLAYING)
583b7e4a
MW
1102 with GStreamerProgressEyecandy(filestatus(master,
1103 'convert to %s' % me.NAME),
1104 pipe):
1105 loop.run()
3bf73acf
MW
1106 pipe.set_state(GS.State.NULL)
1107 bus.remove_signal_watch()
1108 bus.disconnect(bmid)
583b7e4a
MW
1109 if fail:
1110 raise fail[0], fail[1], fail[2]
1111
1112 ## Fix up the output file if we have to.
1113 me.fixup(new)
1114
1115 ## We're done.
1116 OS.rename(new, target)
1117
1118class OggVorbisFormat (AudioFormat):
1119 "AudioFormat object for Ogg Vorbis."
1120
00eb0a5b 1121 ## From https://en.wikipedia.org/wiki/Vorbis
583b7e4a
MW
1122 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1123 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1124 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1125
1126 NAME = 'Ogg Vorbis'
1127 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1128 'audio/x-vorbis+ogg'])
1129 EXT = 'ogg'
1130
1131 def encoder_chain(me):
3589c4a4
MW
1132 encprops = {}
1133 if me.bitrate is not None:
1134 for q, br in me.QMAP:
1135 if br >= me.bitrate:
1136 break
cf3c562e
MW
1137 else:
1138 raise ValueError, 'no suitable quality setting found'
3589c4a4
MW
1139 encprops['quality'] = q/10.0
1140 return [make_element('vorbisenc', **encprops),
583b7e4a
MW
1141 make_element('oggmux')]
1142
1143defformat('ogg-vorbis', OggVorbisFormat)
1144
1145class MP3Format (AudioFormat):
1146 "AudioFormat object for MP3."
1147
1148 NAME = 'MP3'
1149 MIMETYPES = set(['audio/mpeg'])
1150 EXT = 'mp3'
1151
1152 def encoder_chain(me):
3589c4a4 1153 encprops = {}
3bf73acf
MW
1154 if me.bitrate is not None:
1155 encprops['bitrate'] = me.bitrate
1156 encprops['target'] = 'bitrate'
1157 else:
1158 encprops['quality'] = 4
1159 encprops['target'] = 'quality'
1160 return [make_element('lamemp3enc', quality = 4, **encprops),
583b7e4a
MW
1161 make_element('xingmux'),
1162 make_element('id3v2mux')]
1163
1164 def fixup(me, path):
1165 """
1166 Fix up MP3 files.
1167
1168 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1169 unkind to stupid players.
1170 """
608b936e
MW
1171 f = E3.load(path)
1172 if f is None: return
1173 t = f.tag
1174 if t is None: return
1175 for v in [E3.id3.ID3_V2_3, E3.id3.ID3_V1]:
1176 try: f.tag.save(version = v)
1177 except (UnicodeEncodeError,
1178 E3.id3.GenreException,
1179 E3.id3.TagException):
1180 pass
583b7e4a
MW
1181
1182defformat('mp3', MP3Format)
1183
1184###--------------------------------------------------------------------------
1185### Image handling, based on the Python Imaging Library.
1186
1187class ImageIdentifier (object):
1188 """
1189 Analyses and identifies an image file.
1190
1191 Simply leaves an Image object in the `img' property which can be inspected.
1192 """
1193
1194 def __init__(me, file, mime):
1195
1196 ## Get PIL to open the file. It will magically work out what kind of
1197 ## file it is.
1198 try:
1199 me.img = I.open(file)
1200 except IOError, exc:
1201
1202 ## Unhelpful thing to raise on identification failure. We can
1203 ## distinguish this from an actual I/O error because it doesn't have an
1204 ## `errno'.
1205 if exc.errno is None:
1206 raise IdentificationFailure
1207 raise
1208
1209 me.mime = set([mime])
1210
1211class ImageFormat (BaseFormat):
1212 """
1213 An ImageFormat is a kind of Format specialized for image files.
1214
1215 Subclasses don't need to provide anything other than the properties
1216 required by all concrete Format subclasses. However, there is a
1217 requirement that the `NAME' property match PIL's `format' name for the
1218 format.
1219 """
1220
1221 PROPS = prop('size', Num)
1222 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1223
1224 def __init__(me, size = None, **kw):
1225 """
1226 Initialize an ImageFormat object.
1227
1228 Additional keywords are used when encoding, and may be recognized by
1229 enhanced `check' methods in subclasses.
1230 """
1231 me._size = size
1232 me._props = kw
1233
1234 def check(me, id):
1235 "Check whether the ImageIdentifier ID matches our requirements."
1236 return id.img.format == me.NAME and \
1237 (me._size is None or
1238 (id.img.size[0] <= me._size and
1239 id.img.size[1] <= me._size))
1240
1241 def convert(me, master, id, target):
1242 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1243
1244 ## Write to a scratch file.
1245 new = target + '.new'
1246
1247 ## The ImageIdentifier already contains a copy of the open file. It
1248 ## would be wasteful not to use it.
1249 img = id.img
1250 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1251
1252 ## If there's a stated maximum size then scale the image down to match.
1253 ## But thumbnailing clobbers the original, so take a copy.
1254 if me._size is not None and \
1255 (img.size[0] > me._size or img.size[1] > me._size):
1256 img = img.copy()
1257 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1258
1259 ## Write the output image.
1260 img.save(new, me.NAME, **me._props)
1261
1262 ## Fix it up if necessary.
1263 me.fixup(new)
1264
1265 ## We're done.
1266 OS.rename(new, target)
1267 STATUS.commit()
1268
1269class JPEGFormat (ImageFormat):
1270 """
1271 Image format for JPEG (actually JFIF) files.
1272
1273 Interesting properties to set:
1274
1275 optimize
1276 If present, take a second pass to select optimal encoder settings.
1277
b524aa9d 1278 progressive
583b7e4a
MW
1279 If present, make a progressive file.
1280
1281 quality Integer from 1--100 (worst to best); default is 75.
1282 """
1283 EXT = 'jpg'
1284 NAME = 'JPEG'
1285 PROPS = prop('optimize', None) \
1286 | prop('progressive', None, 'progression') \
1287 | prop('quality', Num)
1288
1289defformat('jpeg', JPEGFormat)
1290
1291class PNGFormat (ImageFormat):
1292 """
1293 Image format for PNG files.
1294
1295 Interesting properties:
1296
1297 optimize
1298 If present, make a special effort to minimize the output file.
1299 """
1300 EXT = 'png'
1301 NAME = 'PNG'
1302 PROPS = prop('optimize', None)
1303
1304defformat('png', PNGFormat)
1305
1306class BMPFormat (ImageFormat):
1307 """
1308 Image format for Windows BMP files, as used by RockBox.
1309
1310 No additional properties.
1311 """
1312 NAME = 'BMP'
1313 EXT = 'bmp'
1314
1315defformat('bmp', BMPFormat)
1316
1317###--------------------------------------------------------------------------
e0361afb
MW
1318### Remaining parsing machinery.
1319
1320Type = K('type') - Name - D('{') - R(Policy) - D('}')
1321def build_type(s, l, t):
1322 try:
1323 cat = CATEGORYMAP[t[0]]
1324 except KeyError:
1325 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1326 pols = t[1]
1327 if len(pols) == 1: pol = pols[0]
1328 else: pol = AndPolicy(pols)
1329 pol.setcategory(cat)
1330 return pol
1331Type.setParseAction(build_type)
1332
1333TARGETS = []
1334class TargetJob (object):
1335 def __init__(me, targetdir, policies):
1336 me.targetdir = targetdir
1337 me.policies = policies
1338 def perform(me):
1339 TARGETS.append(me)
1340
1341Target = K('target') - String - D('{') - R(Type) - D('}')
1342def build_target(s, l, t):
1343 return TargetJob(t[0], t[1])
1344Target.setParseAction(build_target)
1345
1346VARS = { 'master': None }
1347class VarsJob (object):
1348 def __init__(me, vars):
1349 me.vars = vars
1350 def perform(me):
1351 for k, v in me.vars:
1352 VARS[k] = v
1353
1354Var = prop('master', String)
1355Vars = K('vars') - D('{') - R(Var) - D('}')
1356def build_vars(s, l, t):
1357 return VarsJob(t[0])
1358Vars.setParseAction(build_vars)
1359
1360TopLevel = Vars | Target
1361Config = R(TopLevel)
1362Config.ignore(P.pythonStyleComment)
1363
1364###--------------------------------------------------------------------------
583b7e4a
MW
1365### The directory grobbler.
1366
e0361afb 1367def grobble(master, targets, noact = False):
583b7e4a 1368 """
e0361afb
MW
1369 Work through the MASTER directory, writing converted files to TARGETS.
1370
1371 The TARGETS are a list of `TargetJob' objects, each describing a target
1372 directory and a policy to apply to it.
1373
1374 If NOACT is true, then don't actually do anything permanent to the
1375 filesystem.
583b7e4a
MW
1376 """
1377
e0361afb
MW
1378 ## Transform the targets into a more convenient data structure.
1379 tpolmap = []
1380 for t in targets:
1381 pmap = {}
1382 tpolmap.append(pmap)
1383 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
583b7e4a 1384
e0361afb
MW
1385 ## Keep track of the current position in the master tree.
1386 dirs = []
583b7e4a 1387
e0361afb
MW
1388 ## And the files which haven't worked.
1389 broken = []
583b7e4a 1390
e0361afb
MW
1391 def grobble_file(master, pmap, targetdir, cohorts):
1392 ## Convert MASTER, writing the result to TARGETDIR.
1393 ##
1394 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1395 ## a list of (FILENAME, ID) pairs.
1396 ##
1397 ## Since this function might convert the MASTER file, the caller doesn't
1398 ## know the name of the output files, so we return then as a list.
583b7e4a
MW
1399
1400 done = set()
1401 st_m = OS.stat(master)
1402
1403 ## Work through each category listed and apply its policy.
1404 for cat, id, cohort in cohorts:
1405
1406 ## Go through the category's policies and see if any match. If we fail
1407 ## here, see if there are more categories to try.
e0361afb 1408 for pol in pmap[cat]:
583b7e4a
MW
1409 acts = pol.actions(master, targetdir, id, cohort)
1410 if acts: break
1411 else:
1412 continue
1413
1414 ## Work through the targets one by one.
1415 for a in acts:
1416 done.add(a.target)
1417
1418 ## Find out whether the target file already exists and is up-to-date
1419 ## with respect to the master. (Caution here with low-resolution
1420 ## timestamps.) If it's OK, then just move on.
1421 try:
1422 st_t = OS.stat(a.target)
1423 if st_m.st_mtime < st_t.st_mtime or \
1424 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1425 continue
1426 except OSError, err:
1427 if err.errno not in (E.ENOENT, E.ENOTDIR):
1428 raise
1429
1430 ## We have real work to do. If there's a current status message,
1431 ## it's the containing directory so flush it so that people know
1432 ## where we are.
1433 STATUS.commit()
1434
1435 ## Remove the target. (A hardlink will fail if the target already
1436 ## exists.)
e0361afb 1437 if not noact:
583b7e4a
MW
1438 try:
1439 OS.unlink(a.target)
1440 except OSError, err:
1441 if err.errno not in (E.ENOENT, E.ENOTDIR):
1442 raise
1443
1444 ## Do whatever it is we decided to do.
e0361afb 1445 if noact:
583b7e4a
MW
1446 STATUS.commit(filestatus(master, a))
1447 else:
1448 a.perform()
1449
1450 ## We're done. Return the names of the targets.
1451 return list(done)
1452
1453 @contextmanager
e0361afb
MW
1454 def wrap(masterfile):
1455 ## Handle exceptions found while trying to convert a particular file or
1456 ## directory.
583b7e4a
MW
1457
1458 try:
1459 yield masterfile
1460
1461 ## Something bad happened. Report the error, but continue. (This list
1462 ## of exceptions needs a lot of work.)
1463 except (IOError, OSError), exc:
1464 STATUS.clear()
1465 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
e0361afb 1466 broken.append((masterfile, exc))
583b7e4a 1467
e0361afb
MW
1468 def grobble_dir(master, targets):
1469 ## Recursively convert files in MASTER, writing them to the TARGETS.
583b7e4a 1470
e0361afb
MW
1471 ## Keep track of the subdirectories we encounter, because we'll need to
1472 ## do all of those in one go at the end.
1473 subdirs = set()
583b7e4a 1474
e0361afb
MW
1475 ## Work through each target directory in turn.
1476 for target, pmap in zip(targets, tpolmap):
583b7e4a 1477
e0361afb
MW
1478 ## Make sure the TARGET exists and is a directory. It's a fundamental
1479 ## assumption of this program that the entire TARGET tree is
1480 ## disposable, so if something exists but isn't a directory, we should
1481 ## kill it.
1482 if OS.path.isdir(target):
1483 pass
1484 else:
1485 if OS.path.exists(target):
1486 STATUS.commit(filestatus(target, 'clear nondirectory'))
1487 if not noact:
1488 OS.unlink(target)
1489 STATUS.commit(filestatus(target, 'create directory'))
1490 if not noact:
1491 OS.mkdir(target)
1492
1493 ## Keep a list of things in the target. As we convert files, we'll
1494 ## check them off. Anything left over is rubbish and needs to be
1495 ## deleted.
1496 checklist = {}
1497 try:
1498 for i in OS.listdir(target):
1499 checklist[i] = False
1500 except OSError, err:
1501 if err.errno not in (E.ENOENT, E.ENOTDIR):
1502 raise
1503
1504 ## Keep track of the files in each category.
1505 catmap = {}
1506 todo = []
1507 done = []
1508
1509 ## Work through the master files.
1510 for f in sorted(OS.listdir(master)):
1511
1512 ## If the killswitch has been pulled then stop. The whole idea is
1513 ## that we want to cause a clean shutdown if possible, so we don't
1514 ## want to do it in the middle of encoding because the encoding
1515 ## effort will have been wasted. This is the only place we need to
1516 ## check. If we've exited the loop, then clearing old files will
1517 ## probably be fast, and we'll either end up here when the recursive
1518 ## call returns or we'll be in the same boat as before, clearing old
1519 ## files, only up a level. If worst comes to worst, we'll be killed
1520 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1521 ## it left off.
1522 if KILLSWITCH.is_set():
1523 return
1524
1525 ## Do something with the file.
1526 with wrap(OS.path.join(master, f)) as masterfile:
1527
1528 ## If it's a directory then prepare to grobble it recursively, but
1529 ## don't do that yet.
1530 if OS.path.isdir(masterfile):
1531 subdirs.add(f)
1532 done.append(OS.path.join(target, f))
1533
1534 ## Otherwise it's a file. Work out what kind, and stash it under
1535 ## the appropriate categories. Later, we'll apply policy to the
1536 ## files, by category, and work out what to do with them all.
1537 else:
3bf73acf
MW
1538 mime = GIO.file_new_for_path(masterfile) \
1539 .query_info('standard::content-type', 0) \
2a236d3a 1540 .get_content_type()
e0361afb
MW
1541 cats = []
1542 for cat in pmap.iterkeys():
1543 id = cat.identify(masterfile, mime)
1544 if id is None: continue
1545 catmap.setdefault(cat, []).append((masterfile, id))
1546 cats.append((cat, id))
1547 if not cats:
1548 catmap.setdefault(None, []).append((masterfile, id))
1549 todo.append((masterfile, cats))
1550
1551 ## Work through the categorized files to see what actions to do for
1552 ## them.
1553 for masterfile, cats in todo:
1554 with wrap(masterfile):
1555 done += grobble_file(masterfile, pmap, target,
1556 [(cat, id, catmap[cat]) for cat, id in cats])
1557
1558 ## Check the results off the list so that we don't clear it later.
1559 for f in done:
1560 checklist[OS.path.basename(f)] = True
1561
1562 ## Maybe there's stuff in the target which isn't accounted for. Delete
1563 ## it: either the master has changed, or the policy for this target has
1564 ## changed. Either way, the old files aren't wanted.
1565 for f in checklist:
1566 if not checklist[f]:
1567 STATUS.commit(filestatus(f, 'clear bogus file'))
1568 if not noact:
1569 bogus = OS.path.join(target, f)
1570 try:
1571 if OS.path.isdir(bogus):
1572 SH.rmtree(bogus)
1573 else:
1574 OS.unlink(bogus)
1575 except OSError, err:
1576 if err.errno != E.ENOENT:
1577 raise
1578
1579 ## If there are subdirectories which want processing then do those.
1580 ## Keep the user amused by telling him where we are in the tree.
1581 for d in sorted(subdirs):
1582 dirs.append(d)
1583 STATUS.set('/'.join(dirs))
1584 with wrap(OS.path.join(master, d)) as masterdir:
1585 try:
1586 grobble_dir(masterdir,
1587 [OS.path.join(target, d) for target in targets])
1588 finally:
1589 dirs.pop()
1590 STATUS.set('/'.join(dirs))
583b7e4a 1591
e0361afb
MW
1592 ## Right. We're ready to go.
1593 grobble_dir(master, [t.targetdir for t in targets])
1594 return broken
583b7e4a
MW
1595
1596###--------------------------------------------------------------------------
1597### Command-line interface.
1598
1599QUIS = OS.path.basename(SYS.argv[0])
1600
1601def moan(msg):
1602 "Report a warning message to the user."
1603 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1604
1605def die(msg):
1606 "Report a fatal error message to the user."
1607 moan(msg)
1608 SYS.exit(1)
1609
1610def parse_opts(args):
1611 """
1612 Parse command-line arguments in ARGS.
1613
1614 Returns a Grobbler object and the MASTER and TARGET directories to be
1615 grobbled.
1616 """
1617
1618 ## Build the option parser object.
1619 op = OP.OptionParser(prog = QUIS, version = VERSION,
5379ab85
MW
1620 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1621 'CONFIG',
583b7e4a
MW
1622 description = """\
1623Convert a directory tree of files according to the configuration file
1624CONFIG.
1625""")
1626
1627 ## Timeout handling.
1628 def cb_time(opt, ostr, arg, op):
1629 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1630 if not m:
1631 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1632 t, u = m.groups()
1633 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1634 setattr(op.values, opt.dest, t)
1635 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1636 dest = 'timeout',
1637 help = 'stop processing nicely after SECS',
1638 action = 'callback', callback = cb_time)
1639 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1640 dest = 'timeout_nasty',
1641 help = 'stop processing unpleasantly after further SECS',
1642 action = 'callback', callback = cb_time)
1643
1644 ## Other options.
1645 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1646 help = 'provide progress information')
1647 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1648 help = 'don\'t actually modify the filesystem')
1649
1650 ## Ready to rock.
1651 op.set_defaults(formats = [], noact = False,
1652 timeout = None, timeout_nasty = 300)
1653 opts, args = op.parse_args(args)
1654
1655 ## Check that we got the non-option arguments that we want.
1656 if len(args) != 1:
1657 op.error('wrong number of arguments')
1658
1659 ## Act on the options.
1660 if opts.tty:
1661 STATUS.eyecandyp = True
1662 if opts.timeout is not None:
1663 to = TH.Thread(target = timeout,
1664 args = (opts.timeout, opts.timeout_nasty))
1665 to.daemon = True
1666 to.start()
1667
1668 ## Parse the configuration file.
1669 with open(args[0]) as conf:
1670 jobs, = Config.parseFile(conf, True)
1671 for j in jobs:
1672 j.perform()
1673
1674 return opts
1675
1676if __name__ == '__main__':
1677 opts = parse_opts(SYS.argv[1:])
1678 if 'master' not in VARS:
1679 die("no master directory set")
e0361afb 1680 broken = grobble(VARS['master'], TARGETS, opts.noact)
583b7e4a
MW
1681 if broken:
1682 moan('failed to convert some files:')
1683 for file, exc in broken:
1684 moan('%s: %s' % (file, exc))
1685 SYS.exit(1)
1686
1687 ## This is basically a successful completion: we did what we were asked to
1688 ## do. It seems polite to report a message, though.
1689 ##
1690 ## Why don't we have a nonzero exit status? The idea would be that a
1691 ## calling script would be interested that we used up all of our time, and
1692 ## not attempt to convert some other directory as well. But that doesn't
1693 ## quite work. Such a script would need to account correctly for time we
1694 ## had spent even if we complete successfully. And if the script is having
1695 ## to watch the clock itself, it can do that without our help here.
1696 if KILLSWITCH.is_set():
1697 moan('killed by timeout')
1698
1699###----- That's all, folks --------------------------------------------------