gremlin/gremlin.in: Use `locale.getpreferredencoding'.
[autoys] / gremlin / gremlin.in
CommitLineData
d4a7d7b5 1#! @PYTHON@
583b7e4a
MW
2###
3### Convert a directory tree of audio files
4###
5### (c) 2010 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
9e3a516f
MW
10### This file is part of the `autoys' audio tools collection.
11###
12### `autoys' is free software; you can redistribute it and/or modify
583b7e4a
MW
13### it under the terms of the GNU General Public License as published by
14### the Free Software Foundation; either version 2 of the License, or
15### (at your option) any later version.
16###
9e3a516f 17### `autoys' is distributed in the hope that it will be useful,
583b7e4a
MW
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20### GNU General Public License for more details.
21###
22### You should have received a copy of the GNU General Public License
9e3a516f 23### along with `autoys'; if not, write to the Free Software Foundation,
583b7e4a
MW
24### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26###--------------------------------------------------------------------------
27### External dependencies.
28
29## Language features.
30from __future__ import with_statement
31
32## Standard Python libraries.
583b7e4a 33import errno as E
583b7e4a 34import fnmatch as FN
17fc1e1b 35import locale as LC
61e37835
MW
36import optparse as OP
37import os as OS
583b7e4a 38import re as RX
61e37835
MW
39import sys as SYS
40import time as T
41import shlex as L
583b7e4a 42import shutil as SH
583b7e4a 43import threading as TH
61e37835 44import unicodedata as UD
00beb9e5 45from math import sqrt, ceil
583b7e4a
MW
46from contextlib import contextmanager
47
48## eyeD3 tag fettling.
608b936e 49import eyed3 as E3
583b7e4a 50
3bf73acf
MW
51## Gstreamer.
52import gi
53gi.require_version('GLib', '2.0'); from gi.repository import GLib as G
54gi.require_version('Gio', '2.0'); from gi.repository import Gio as GIO
55gi.require_version('Gst', '1.0'); from gi.repository import Gst as GS
56GS.init([])
583b7e4a
MW
57
58## Python Imaging.
59from PIL import Image as I
60
61## Python parsing.
62import pyparsing as P
63
64###--------------------------------------------------------------------------
65### Special initialization.
66
d4a7d7b5 67VERSION = '@VERSION@'
583b7e4a
MW
68
69## GLib.
70G.threads_init()
71
72###--------------------------------------------------------------------------
73### Eyecandy progress reports.
74
17fc1e1b
MW
75DEFAULT_ENCODING = None
76
583b7e4a
MW
77def charwidth(s):
78 """
79 Return the width of S, in characters.
80
81 Specifically, this is the number of backspace characters required to
82 overprint the string S. If the current encoding for `stdout' appears to be
83 Unicode then do a complicated Unicode thing; otherwise assume that
84 characters take up one cell each.
85
86 None of this handles tab characters in any kind of useful way. Sorry.
87 """
88
17fc1e1b
MW
89 global DEFAULT_ENCODING
90
91 ## Figure out the default encoding.
92 if DEFAULT_ENCODING is None: DEFAULT_ENCODING = LC.getpreferredencoding()
583b7e4a
MW
93
94 ## Turn the string into Unicode so we can hack on it properly. Maybe that
95 ## won't work out, in which case fall back to being stupid.
17fc1e1b 96 try: u = s.decode(DEFAULT_ENCODING)
583b7e4a
MW
97 except UnicodeError: return len(s)
98
99 ## Our main problem is combining characters, but we should also try to
100 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
101 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
102 w = 0
103 for ch in u:
104 cd = ord(ch)
105 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
106 0x1160 <= cd <= 0x11ff: pass
107 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
108 else: w += 1
109
110 ## Done.
583b7e4a
MW
111 return w
112
113class StatusLine (object):
114 """
115 Maintains a status line containing ephemeral progress information.
116
117 The status line isn't especially important, but it keeps interactive users
118 amused.
119
120 There should be only one status line object in your program; otherwise
121 they'll interfere with each other and get confused.
122
123 The update algorithm (in `set') is fairly careful to do the right thing
124 with long status `lines', and to work properly in an Emacs `shell' buffer.
125 """
126
127 def __init__(me):
128 "Initialize the status line."
129 me._last = ''
130 me._lastlen = 0
131 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
132
133 def set(me, line):
134 """
135 Set the status line contents to LINE, replacing what was there before.
136
137 This only produces actual output if stdout is interactive.
138 """
139 n = len(line)
140
141 ## Eyecandy update.
142 if me.eyecandyp:
583b7e4a
MW
143
144 ## If the old line was longer, we need to clobber its tail, so work out
145 ## what that involves.
146 if n < me._lastlen:
147 b = charwidth(me._last[n:])
148 pre = '\b'*b + ' '*b
149 else:
150 pre = ''
151
152 ## Now figure out the length of the common prefix between what we had
153 ## before and what we have now. This reduces the amount of I/O done,
154 ## which keeps network traffic down on SSH links, and keeps down the
155 ## amount of work slow terminal emulators like Emacs have to do.
156 i = 0
157 m = min(n, me._lastlen)
158 while i < m and line[i] == me._last[i]:
159 i += 1
160
161 ## Actually do the output, all in one syscall.
162 b = charwidth(me._last[i:])
163 SYS.stdout.write(pre + '\b'*b + line[i:])
583b7e4a
MW
164 SYS.stdout.flush()
165
166 ## Update our idea of what's gone on.
167 me._lastlen = n
168 me._last = line
169
170 def clear(me):
171 "Clear the status line. Just like set('')."
172 me.set('')
173
174 def commit(me, line = None):
175 """
176 Commit the current status line, and maybe the string LINE.
177
178 If the current status line is nonempty, then commit it to the transcript.
179 If LINE is not None, then commit that to the transcript too.
180
181 After all of this, we clear the status line to get back to a clean state.
182 """
183 if me._last:
184 if me.eyecandyp:
185 SYS.stdout.write('\n')
186 else:
187 SYS.stdout.write(me._last + '\n')
188 if line is not None:
189 SYS.stdout.write(line + '\n')
190 me._lastlen = 0
191 me._last = ''
192
193STATUS = StatusLine()
194
195def filestatus(file, status):
196 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
197
198class ProgressEyecandy (object):
199 """
200 Provide amusement while something big and complicated is happening.
201
202 This is an abstract class. Subclasses must provide a method `progress'
203 returning a pair (CURRENT, MAX) indicating the current progress through the
204 operation.
205 """
206
207 def __init__(me, what, silentp = False):
208 """
209 Initialize a progress meter.
210
211 WHAT is a prefix string to be written before the progress eyecandy
212 itself.
213 """
214 me._what = what
215 me._silentp = silentp
216 me._spinner = 0
217 me._start = T.time()
218
219 def _fmt_time(me, t):
220 "Format T as a time, in (maybe hours) minutes and seconds."
221 s, t = t % 60, int(t/60)
222 m, h = t % 60, int(t/60)
223 if h > 0:
224 return '%d:%02d:%02d' % (h, m, s)
225 else:
226 return '%02d:%02d' % (m, s)
227
228 def show(me):
229 "Show the current level of progress."
230
231 ## If we're not showing pointless frippery, don't bother at all.
232 if not STATUS.eyecandyp:
233 return
234
235 ## Update the spinner index.
236 me._spinner = (me._spinner + 1)%4
237
238 ## Fetch the current progress information. Note that we always fetch
239 ## both the current and maximum levels, because both might change if an
240 ## operation revises its idea of how much work needs doing.
241 cur, max = me.progress()
242
243 ## If we couldn't get progress information, display something vaguely
244 ## amusing anyway.
245 if cur is None or max is None:
246 STATUS.set('%s %c [unknown progress]' %
247 (me._what, r'/-\|'[me._spinner]))
248 return
249
250 ## Work out -- well, guess -- the time remaining.
251 if cur:
252 t = T.time()
00beb9e5 253 eta = me._fmt_time(ceil((t - me._start)*(max - cur)/cur))
583b7e4a
MW
254 else:
255 eta = '???'
256
257 ## Set the status bar.
258 n = 40*cur/max
259 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
260 (me._what,
261 r'/-\|'[me._spinner],
262 '='*n, ' '*(40 - n),
263 100*cur/max,
264 eta))
265
266 def done(me, win = True):
267 "Show a completion notice, or a failure if WIN is false."
268 if not win:
269 STATUS.set('%s FAILED!' % me._what)
270 elif not me._silentp:
271 STATUS.set('%s done (%s)' %
272 (me._what,
273 me._fmt_time(T.time() - me._start)))
274 else:
275 return
276 STATUS.commit()
277
278###--------------------------------------------------------------------------
279### Timeout handling.
280
281KILLSWITCH = TH.Event()
282
283def timeout(t0, t1):
284 T.sleep(t0)
285 KILLSWITCH.set()
286 T.sleep(t1)
287 moan('dying messily due to timeout')
288 OS._exit(3)
289
290###--------------------------------------------------------------------------
291### Parsing utilities.
292
293## Allow hyphens in identifiers.
294IDCHARS = P.alphanums + '-_'
295P.Keyword.setDefaultKeywordChars(IDCHARS)
296
297## Some common kinds of tokens.
298Name = P.Word(IDCHARS)
299Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
300String = P.QuotedString('"', '\\')
301
302## Handy abbreviations for constructed parser elements.
303def K(k): return P.Keyword(k).suppress()
304def D(d): return P.Literal(d).suppress()
583b7e4a
MW
305def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
306O = P.Optional
307
308###--------------------------------------------------------------------------
309### Format identification and conversion.
310
311class IdentificationFailure (Exception):
312 pass
313
314class FileCategory (object):
315 """
316 A FileCategory represents a class of files.
317
318 For example, it's sensible to consider audio, or image files as a
319 category. A file category knows how to recognize member files from
320 MIME content types.
321 """
322
323 def __init__(me, name, mime_pats, ident):
324 """
325 Construct a new category.
326
327 The PATS are a list of `fnmatch' patterns to be compared with a MIME
328 type. The IDENT is a function which produces an identification object
329 given a file's name and first-guess MIME type. The object is passed to a
330 Format's `check' method to see whether a file needs re-encoding, and to
331 `convert' to assist with the conversion.
332
333 An identification object must have an attribute `mime' which is a set of
334 possible MIME types accumulated for the object.
335 """
336 me.name = name
337 me._mime_pats = mime_pats
338 me._ident = ident
339 CATEGORYMAP[name] = me
340
341 def identify(me, file, mime):
342 """
343 Attempt to identify FILE, given its apparent MIME type.
344
345 If identification succeeds, return an identification object which can be
346 used by associated file formats; otherwise return None.
347 """
348 for p in me._mime_pats:
349 if not FN.fnmatchcase(mime, p):
350 continue
351 try:
352 return me._ident(file, mime)
353 except IdentificationFailure:
354 pass
355 return None
356
357class BaseFormat (object):
358 """
359 A BaseFormat object represents a particular encoding and parameters.
360
361 The object can verify (the `check' method) whether a particular file
362 matches its requirements, and if necessary (`encode') re-encode a file.
363
364 Subclasses should define the following methods.
365
366 check(ID)
367 Answer whether the file identified by ID is acceptable according to
368 the receiver's parameters.
369
370 convert(MASTER, ID, TARGET)
371 Convert the file MASTER, which has been identified as ID, according
372 to the receiver's parameters, writing the output to TARGET.
373
374 Subclasses should also provide these attributes.
375
376 CATEGORY
377 A FileCategory object for the category of files that this format
378 lives within.
379
380 EXT A file extension to be applied to encoded output files.
381
382 NAME A user-facing name for the format.
383
384 PROPS A parser element to parse a property definition. It should produce
385 a pair NAME, VALUE to be stored in a dictionary.
386
387 Subclasses for different kinds of file may introduce more subclass
388 protocol.
389 """
390
391 def fixup(me, path):
392 """Post-encoding fixups."""
393 pass
394
395FORMATMAP = {}
396CATEGORYMAP = {}
397
398def defformat(name, cls):
399 "Define a format NAME using class CLS."
400 if not hasattr(cls, 'NAME'):
401 raise ValueError, 'abstract class'
402 if not hasattr(cls, 'CATEGORY'):
403 raise ValueError, 'no category'
404 FORMATMAP[name] = cls
405
406class FormatParser (P.ParserElement):
407 """
408 Parse a format specifier:
409
410 format-spec ::= string [format-properties]
411 format-properties ::= `{' format-property (`,' format-property)* `}'
412
413 The syntax of a format-property is determined by the PROPS attribute on the
414 named format and its superclasses.
415 """
416
483b52d0
MW
417 name = 'format-spec'
418
583b7e4a
MW
419 ## We cache the parser elements we generate to avoid enormous consing.
420 CACHE = {}
421
422 def parseImpl(me, s, loc, actp = True):
423
424 ## Firstly, determine the format name.
425 loc, r = Name._parse(s, loc, actp)
426 fmt = r[0]
427
428 ## Look up the format class.
429 try: fcls = FORMATMAP[fmt]
430 except KeyError:
431 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
432
433 ## Fetch the property-list parser from the cache, if possible; else
434 ## construct it.
435 try:
436 pp = me.CACHE[fmt]
437 except KeyError:
438 seen = set()
439 prop = None
440 for c in fcls.mro():
441 try: p = c.PROPS
442 except AttributeError: continue
443 if p in seen: continue
444 if prop is None: prop = p
445 else: prop |= p
446 seen.add(p)
447 if prop is None:
448 pp = me.CACHE[fmt] = None
449 else:
450 props = P.delimitedList(prop)
451 props.setParseAction(lambda s, l, t: dict(t.asList()))
452 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
453
454 ## Parse the properties.
455 if pp is None:
456 pd = {}
457 else:
458 loc, r = pp._parse(s, loc, actp)
459 if r: pd = r[0]
460 else: pd = {}
461
462 ## Construct the format object and return it.
463 return loc, fcls(**pd)
464
465Format = FormatParser()
466
467def prop(kw, pval, tag = None):
468 if tag is None: tag = kw
469 if pval is None:
470 p = K(kw)
471 p.setParseAction(lambda s, l, t: (tag, True))
472 else:
473 p = K(kw) + D('=') + pval
474 p.setParseAction(lambda s, l, t: (tag, t[0]))
475 return p
476
477###--------------------------------------------------------------------------
478### Policies and actions.
479
480class Action (object):
481 """
482 An Action object represents a conversion action to be performed.
483
484 This class isn't intended to be instantiated directly. It exists to define
485 some protocol common to all Action objects.
486
487 Action objects have the following attributes.
488
489 master The name of the master (source) file.
490
491 target The name of the target (destination) file.
492
493 PRIORITY The priority of the action, for deciding which of two actions
494 to perform. Higher priorities are more likely to win.
495
496 Converting an Action to a string describes the action in a simple
497 user-readable manner. The `perform' method actually carries the action
498 out.
499 """
500
501 PRIORITY = 0
502
503 def __init__(me, master):
504 "Stash the MASTER file name for later."
505 me.master = master
506
507 def choose(me, him):
508 "Choose either ME or HIM and return one."
509 if him is None or me.PRIORITY > him.PRIORITY:
510 return me
511 else:
512 return him
513
514class CopyAction (Action):
515 """
516 An Action object for simply copying a file.
517
518 Actually we try to hardlink it first, falling back to a copy later. This
519 is both faster and more efficient with regard to disk space.
520 """
521
522 ## Copying is good. Linking is really good, but we can't tell the
523 ## difference at this stage.
524 PRIORITY = 10
525
526 def __init__(me, master, targetdir):
527 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
528 Action.__init__(me, master)
529 me.target = OS.path.join(targetdir, OS.path.basename(master))
530
531 def __str__(me):
532 return 'copy/link'
533
534 def perform(me):
535 "Actually perform a CopyAction."
536 try:
537 STATUS.set(filestatus(me.master, 'link'))
538 OS.link(me.master, me.target)
539 except OSError, err:
540 if err.errno != E.EXDEV:
541 raise
542 STATUS.set(filestatus(me.master, 'copy'))
543 new = me.target + '.new'
544 SH.copyfile(me.master, new)
545 OS.rename(new, me.target)
546 STATUS.commit()
547
548class ConvertAction (Action):
549 """
550 An Action object for converting a file to a given format.
551
552 Additional attributes:
553
554 id The identification object for the master file.
555
556 format The format to which we're meant to conver the master.
557 """
558
559 def __init__(me, master, targetdir, id, format):
560 "Initialize a ConvertAction."
561 Action.__init__(me, master)
562 stem, ext = OS.path.splitext(OS.path.basename(master))
563 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
564 me.id = id
565 me.format = format
566
567 def __str__(me):
568 return 'convert to %s' % me.format.NAME
569
570 def perform(me):
571 "Acually perform a ConvertAction."
572 STATUS.set(filestatus(me.master, me))
573 me.format.convert(me.master, me.id, me.target)
574
575Policy = P.Forward()
576
577class FormatPolicy (object):
578 """
579 A FormatPolicy object represents a set of rules for how to convert files.
580
581 Given a master file, the FormatPolicy will identify it and return a list of
582 actions to be performed. The methods required of a FormatPolicy are:
583
584 setcategory(CAT)
585 Store CAT as the policy's category. Check that this is consistent
586 with the policy as stored.
587
588 actions(MASTER, TARGETDIR, ID, COHORT)
589 Given a MASTER file, identified as ID, a target directory
590 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
591 of the same category in the same directory, return a list of
592 actions to be performed to get the target directory into the right
593 form. The list might be empty if the policy object /rejects/ the
594 file.
595 """
596
597class AndPolicy (FormatPolicy):
598 """
599 A FormatPolicy which does the union of a bunch of other policies.
600
601 Each subsidiary policy is invoked in turn. The highest-priority action for
602 each target file is returned.
603 """
604
605 def __init__(me, policies):
606 me._policies = policies
607
608 def setcategory(me, cat):
609 me.cat = cat
610 for p in me._policies:
611 p.setcategory(cat)
612
613 def actions(me, master, targetdir, id, cohort):
614 tmap = {}
615 for p in me._policies:
616 for a in p.actions(master, targetdir, id, cohort):
617 if a.target in tmap:
618 tmap[a.target] = a.choose(tmap.get(a.target))
619 else:
620 tmap[a.target] = a
621 return tmap.values()
622
623And = K('and') - D('{') - R(Policy) - D('}')
624And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
625
626class OrPolicy (FormatPolicy):
627 """
628 A FormatPolicy which tries other policies and uses the first that accepts.
629
630 Each subsidiary policy is invoked in turn. If any accepts, the actions it
631 proposes are turned and no further policies are invoked. If none accepts
632 then the file is rejected.
633 """
634
635 def __init__(me, policies):
636 me._policies = policies
637
638 def setcategory(me, cat):
639 me.cat = cat
640 for p in me._policies:
641 p.setcategory(cat)
642
643 def actions(me, master, targetdir, id, cohort):
644 for p in me._policies:
645 aa = p.actions(master, targetdir, id, cohort)
646 if aa:
647 return aa
648 else:
649 return []
650
651Or = K('or') - D('{') - R(Policy) - D('}')
652Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
653
654class AcceptPolicy (FormatPolicy):
655 """
656 A FormatPolicy which copies files in a particular format.
657
658 If all of the files in a cohort are recognized as being in a particular
659 format (including this one), then accept it with a CopyAction; otherwise
660 reject.
661 """
662
663 def __init__(me, format):
664 me._format = format
665
666 def setcategory(me, cat):
667 if me._format.CATEGORY is not cat:
668 raise ValueError, \
669 "Accept format `%s' has category `%s', not `%s'" % \
670 (me._format.__class__.__name__,
671 me._format.CATEGORY.name, cat.name)
672 me.cat = cat
673
674 def actions(me, master, targetdir, id, cohort):
675 if me._format.check(id) and \
676 all(me._format.check(cid) for f, cid in cohort):
677 return [CopyAction(master, targetdir)]
678 else:
679 return []
680
681Accept = K('accept') - Format
682Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
683
684class ConvertPolicy (FormatPolicy):
685 """
686 A FormatPolicy which copies files in a particular format or converts if
687 necessary.
688 """
689 def __init__(me, format):
690 me._format = format
691
692 def setcategory(me, cat):
693 if me._format.CATEGORY is not cat:
694 raise ValueError, \
695 "Accept format `%s' has category `%s', not `%s'" % \
696 (me._format.__class__.__name__,
697 me._format.CATEGORY.name, cat.name)
698 me.cat = cat
699
700 def actions(me, master, targetdir, id, cohort):
701 if me._format.check(id):
702 return [CopyAction(master, targetdir)]
703 else:
704 return [ConvertAction(master, targetdir, id, me._format)]
705
706Convert = K('convert') - Format
707Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
708
709Policy << (And | Or | Accept | Convert)
710
711###--------------------------------------------------------------------------
712### Audio handling, based on GStreamer.
713
714def make_element(factory, name = None, **props):
715 "Return a new element from the FACTORY with the given NAME and PROPS."
3bf73acf
MW
716 elt = GS.ElementFactory.make(factory, name)
717 if elt is None: raise ValueError, 'failed to make `%s\' element' % factory
583b7e4a
MW
718 elt.set_properties(**props)
719 return elt
720
0dbdd41c
MW
721def link_elements(elts):
722 "Link the elements ELTS together, in order."
3bf73acf
MW
723 e0 = None
724 for e1 in elts:
725 if e0 is not None: e0.link(e1)
726 e0 = e1
0dbdd41c 727
2101727d
MW
728def bin_children(bin):
729 "Iterate over the (direct) children of a BIN."
3bf73acf
MW
730 iter = bin.iterate_elements()
731 while True:
732 rc, elt = iter.next()
733 if rc == GS.IteratorResult.DONE: break
734 elif rc != GS.IteratorResult.OK:
735 raise ValueError, 'iteration failed (%s)' % rc
736 else: yield elt
2101727d 737
583b7e4a
MW
738class GStreamerProgressEyecandy (ProgressEyecandy):
739 """
740 Provide amusement while GStreamer is busy doing something.
741
742 The GStreamerProgressEyecandy object is a context manager. Wrap it round
743 your GStreamer loop to provide progress information for an operation.
744 """
745
746 def __init__(me, what, elt, **kw):
747 """
748 Initialize a progress meter.
749
750 WHAT is a prefix string to be written before the progress eyecandy
751 itself. ELT is a GStreamer element to interrogate to find the progress
752 information.
753 """
754 me._elt = elt
755 ProgressEyecandy.__init__(me, what, **kw)
756
757 def _update(me):
758 "Called by GLib main event loop to update the eyecandy."
759 me.show()
760 return True
761
762 def _timer(me):
763 """
764 Update the progress meter.
765
766 This is called periodically by the GLib main event-processing loop.
767 """
768 me.show()
769 return True
770
771 def progress(me):
772 "Return the current progress as a pair (CURRENT, MAX)."
773
774 ## Fetch the current progress information. We get the duration each
775 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
776 ## duration can change as we progress. Hopefully it settles down fairly
777 ## soon.
3bf73acf
MW
778 ok, t = me._elt.query_position(GS.Format.TIME)
779 if ok: ok, end = me._elt.query_duration(GS.Format.TIME)
780 if ok: return t, end
781 else: return None, None
583b7e4a
MW
782
783 def __enter__(me):
784 "Enter context: attach progress meter display."
785
786 ## If we're not showing pointless frippery, don't bother at all.
787 if not STATUS.eyecandyp:
788 return
789
790 ## Update regularly. The pipeline runs asynchronously.
fbbde927 791 me._id = G.timeout_add(100, me._update)
583b7e4a
MW
792
793 def __exit__(me, ty, val, tb):
794 "Leave context: remove display and report completion or failure."
795
796 ## If we're not showing pointless frippery, there's nothing to remove.
797 if STATUS.eyecandyp:
798 G.source_remove(me._id)
799
800 ## Report completion anyway.
801 me.done(ty is None)
802
803 ## As you were.
804 return False
805
806class AudioIdentifier (object):
807 """
808 Analyses and identifies an audio file.
809
810 Important properties are:
811
812 cap A capabilities structure describing the audio file data. The most
813 interesting thing in here is probably its name, which is a MIME
814 type describing the data.
815
816 dcap A capabilities structure describing the decoded audio data. This
817 is of interest during conversion.
818
819 tags A dictionary containing metadata tags from the file. These are in
820 GStreamer's encoding-independent format.
821
822 bitrate An approximation to the stream's bitrate, in kilobits per second.
823 This might be slow to work out for some files so it's computed on
824 demand.
825 """
826
2a2c54c4
MW
827 def _prepare_pipeline(me):
828 pipe = GS.Pipeline()
829 bus = pipe.get_bus()
583b7e4a
MW
830
831 ## The basic recognition kit is based around `decodebin'. We must keep
832 ## it happy by giving it sinks for the streams it's found, which it
833 ## announces asynchronously.
2a2c54c4 834 source = make_element('filesrc', 'file', location = me._file)
583b7e4a
MW
835 decoder = make_element('decodebin', 'decode')
836 sink = make_element('fakesink')
837 def decoder_pad_arrived(elt, pad):
3bf73acf 838 if pad.get_current_caps()[0].get_name().startswith('audio/'):
583b7e4a 839 elt.link_pads(pad.get_name(), sink, 'sink')
2a2c54c4
MW
840 decoder.connect('pad-added', decoder_pad_arrived)
841 for i in [source, decoder, sink]: pipe.add(i)
0dbdd41c 842 link_elements([source, decoder])
583b7e4a 843
2a2c54c4
MW
844 ## Done.
845 return pipe, bus, decoder, sink
846
847 def __init__(me, file, mime):
848 "Initialize the object suitably for identifying FILE."
849
850 me._file = file
851 pipe, bus, decoder, sink = me._prepare_pipeline()
852
853 ## Make some initial GStreamer objects. We'll want the pipeline later if
854 ## we need to analyse a poorly tagged MP3 stream, so save it away.
855 loop = G.MainLoop()
856
583b7e4a 857 ## Arrange to collect tags from the pipeline's bus as they're reported.
583b7e4a
MW
858 tags = {}
859 fail = []
860 def bus_message(bus, msg):
3bf73acf
MW
861 ty, s = msg.type, msg.get_structure()
862 if ty == GS.MessageType.ERROR:
863 fail[:] = (ValueError, s['debug'], None)
583b7e4a 864 loop.quit()
3bf73acf
MW
865 elif ty == GS.MessageType.STATE_CHANGED:
866 if s['new-state'] == GS.State.PAUSED and \
2a2c54c4 867 msg.src == pipe:
583b7e4a 868 loop.quit()
3bf73acf
MW
869 elif ty == GS.MessageType.TAG:
870 tt = s['taglist']
871 for i in xrange(tt.n_tags()):
872 t = tt.nth_tag_name(i)
873 if tt.get_tag_size(t) != 1: continue
874 v = tt.get_value_index(t, 0)
875 tags[t] = v
583b7e4a
MW
876 bmid = bus.connect('message', bus_message)
877
878 ## We want to identify the kind of stream this is. (Hmm. The MIME type
879 ## recognizer has already done this work, but GStreamer is probably more
880 ## reliable.) The `decodebin' has a `typefind' element inside which will
881 ## announce the identified media type. All we need to do is find it and
882 ## attach a signal handler. (Note that the handler might be run in the
883 ## thread context of the pipeline element, but Python's GIL will keep
884 ## things from being too awful.)
885 me.cap = None
886 me.dcap = None
2101727d 887 for e in bin_children(decoder):
583b7e4a
MW
888 if e.get_factory().get_name() == 'typefind':
889 tfelt = e
890 break
891 else:
892 assert False, 'failed to find typefind element'
893
894 ## Crank up most of the heavy machinery. The message handler will stop
895 ## the loop when things seem to be sufficiently well underway.
3bf73acf 896 bus.add_signal_watch()
2a2c54c4 897 pipe.set_state(GS.State.PAUSED)
583b7e4a
MW
898 loop.run()
899 bus.disconnect(bmid)
3bf73acf 900 bus.remove_signal_watch()
583b7e4a 901 if fail:
2a2c54c4 902 pipe.set_state(GS.State.NULL)
583b7e4a
MW
903 raise fail[0], fail[1], fail[2]
904
905 ## Store the collected tags.
906 me.tags = tags
907
908 ## Gather the capabilities. The `typefind' element knows the input data
909 ## type. The 'decodebin' knows the raw data type.
3bf73acf 910 me.cap = tfelt.get_static_pad('src').get_allowed_caps()[0]
583b7e4a 911 me.mime = set([mime, me.cap.get_name()])
3bf73acf 912 me.dcap = sink.get_static_pad('sink').get_allowed_caps()[0]
583b7e4a
MW
913
914 ## If we found a plausible bitrate then stash it. Otherwise note that we
915 ## failed. If anybody asks then we'll work it out then.
916 if 'nominal-bitrate' in tags:
917 me._bitrate = tags['nominal-bitrate']/1000
918 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
919 me._bitrate = tags['bitrate']/1000
920 else:
487d44e5
MW
921 ok, n = pipe.query_duration(GS.Format.BYTES)
922 if ok: ok, t = pipe.query_duration(GS.Format.TIME)
923 if ok: me._bitrate = int((8e6*n)/t)
924 else: me._bitrate = None
2a2c54c4 925 pipe.set_state(GS.State.NULL)
583b7e4a
MW
926
927 @property
928 def bitrate(me):
929 """
930 Return the approximate bit-rate of the input file.
931
932 This might take a while if we have to work it out the hard way.
933 """
934
935 ## If we already know the answer then just return it.
936 if me._bitrate is not None:
937 return me._bitrate
938
2a2c54c4
MW
939 ## Make up a new pipeline and main loop.
940 pipe, bus, _, _ = me._prepare_pipeline()
583b7e4a
MW
941 loop = G.MainLoop()
942
943 ## Watch for bus messages. We'll stop when we reach the end of the
944 ## stream: then we'll have a clear idea of how long the track was.
945 fail = []
946 def bus_message(bus, msg):
3bf73acf
MW
947 ty, s = msg.type, msg.get_structure()
948 if ty == GS.MessageType.ERROR:
949 fail[:] = (ValueError, s['debug'], None)
583b7e4a 950 loop.quit()
3bf73acf 951 elif ty == GS.MessageType.EOS:
583b7e4a 952 loop.quit()
2a2c54c4 953 bus = pipe.get_bus()
583b7e4a
MW
954 bmid = bus.connect('message', bus_message)
955
956 ## Get everything moving, and keep the user amused while we work.
3bf73acf 957 bus.add_signal_watch()
2a2c54c4
MW
958 pipe.set_state(GS.State.PLAYING)
959 with GStreamerProgressEyecandy(filestatus(me._file, 'measure bitrate'),
960 pipe, silentp = True):
583b7e4a 961 loop.run()
3bf73acf 962 bus.remove_signal_watch()
583b7e4a
MW
963 bus.disconnect(bmid)
964 if fail:
2a2c54c4 965 pipe.set_state(GS.State.NULL)
583b7e4a 966 raise fail[0], fail[1], fail[2]
f3ebed77 967 STATUS.clear()
583b7e4a 968
2a2c54c4
MW
969 ## The bitrate computation wants the file size. Ideally we'd want the
970 ## total size of the frames' contents, but that seems hard to dredge
971 ## out. If the framing overhead is small, this should be close enough
972 ## for our purposes.
973 bytes = OS.stat(me._file).st_size
974
583b7e4a
MW
975 ## Now we should be able to find out our position accurately and work out
976 ## a bitrate. Cache it in case anybody asks again.
3bf73acf
MW
977 ok, t = pipe.query_position(GS.Format.TIME)
978 assert ok, 'failed to discover bitrate'
2a2c54c4
MW
979 me._bitrate = int(8*bytes*1e6/t)
980 pipe.set_state(GS.State.NULL)
583b7e4a
MW
981
982 ## Done.
983 return me._bitrate
984
985class AudioFormat (BaseFormat):
986 """
987 An AudioFormat is a kind of Format specialized for audio files.
988
989 Format checks are done on an AudioIdentifier object.
990 """
991
992 PROPS = prop('bitrate', Num)
993
994 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
995 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
996 ## depending on how thorough it's trying to be. Still, it doesn't do any
997 ## harm here; the main risk is picking up Ogg Theora files by accident, and
998 ## we'll probably be able to extract the audio from them anyway.
999 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
1000 AudioIdentifier)
1001
1002 def __init__(me, bitrate = None):
1003 "Construct an object, requiring an approximate bitrate."
1004 me.bitrate = bitrate
1005
1006 def check(me, id):
1007 """
1008 Return whether the AudioIdentifier ID is suitable for our purposes.
1009
1010 Subclasses can either override this method or provide a property
1011 `MIMETYPES', which is a list (other thing that implements `__contains__')
1012 of GStreamer MIME types matching this format.
1013 """
1014 return id.mime & me.MIMETYPES and \
1015 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
1016
1017 def encoder(me):
1018 """
1019 Constructs a GStreamer element to encode audio input.
1020
1021 Subclasses can either override this method (or replace `encode'
1022 entirely), or provide a method `encoder_chain' which returns a list of
1023 elements to be linked together in sequence. The first element in the
1024 chain must have a pad named `sink' and the last must have a pad named
1025 `src'.
1026 """
1027 elts = me.encoder_chain()
1028 bin = GS.Bin()
0bf1fa38 1029 for i in elts: bin.add(i)
0dbdd41c 1030 link_elements(elts)
3bf73acf
MW
1031 bin.add_pad(GS.GhostPad('sink', elts[0].get_static_pad('sink')))
1032 bin.add_pad(GS.GhostPad('src', elts[-1].get_static_pad('src')))
583b7e4a
MW
1033 return bin
1034
1035 def convert(me, master, id, target):
1036 """
1037 Encode audio from MASTER, already identified as ID, writing it to TARGET.
1038
1039 See `encoder' for subclasses' responsibilities.
1040 """
1041
1042 ## Construct the necessary equipment.
1043 pipe = GS.Pipeline()
1044 bus = pipe.get_bus()
583b7e4a
MW
1045 loop = G.MainLoop()
1046
1047 ## Make sure that there isn't anything in the way of our output. We're
1048 ## going to write to a scratch file so that we don't get confused by
1049 ## half-written rubbish left by a crashed program.
1050 new = target + '.new'
1051 try:
1052 OS.unlink(new)
1053 except OSError, err:
1054 if err.errno != E.ENOENT:
1055 raise
1056
1057 ## Piece together our pipeline. The annoying part is that the
1058 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1059 ## halves for now.
1060 source = make_element('filesrc', 'source', location = master)
1061 decoder = make_element('decodebin', 'decode')
1062 convert = make_element('audioconvert', 'convert')
1063 encoder = me.encoder()
1064 sink = make_element('filesink', 'sink', location = new)
0bf1fa38 1065 for i in [source, decoder, convert, encoder, sink]: pipe.add(i)
0dbdd41c
MW
1066 link_elements([source, decoder])
1067 link_elements([convert, encoder, sink])
583b7e4a
MW
1068
1069 ## Some decoders (e.g., the AC3 decoder) include channel-position
1070 ## indicators in their output caps. The Vorbis encoder interferes with
1071 ## this, and you end up with a beautifully encoded mono signal from a
1072 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1073 ## /think/ that this is only a problem with stereo signals: mono signals
1074 ## are mono already, and `vorbisenc' accepts channel positions if there
1075 ## are more than two channels.
1076 ##
1077 ## So we have this bodge. We already collected the decoded audio caps
1078 ## during identification. So if we see 2-channel audio with channel
1079 ## positions, we strip the positions off forcibly by adding a filter.
1080 if id.dcap.get_name().startswith('audio/x-raw-') and \
1081 id.dcap.has_field('channels') and \
1082 id.dcap['channels'] == 2 and \
1083 id.dcap.has_field('channel-positions'):
1084 dcap = GS.Caps()
1085 c = id.dcap.copy()
1086 c.remove_field('channel-positions')
1087 dcap.append(c)
1088 else:
1089 dcap = None
1090
1091 ## Hook onto the `decodebin' so we can link together the two halves of
1092 ## our encoding chain. For now, we'll hope that there's only one audio
1093 ## stream in there, and just throw everything else away.
1094 def decoder_pad_arrived(elt, pad):
3bf73acf 1095 if pad.get_current_caps()[0].get_name().startswith('audio/'):
583b7e4a
MW
1096 if dcap:
1097 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1098 else:
1099 elt.link_pads(pad.get_name(), convert, 'sink')
1100 decoder.connect('pad-added', decoder_pad_arrived)
1101
1102 ## Watch the bus for completion messages.
1103 fail = []
1104 def bus_message(bus, msg):
3bf73acf
MW
1105 if msg.type == GS.MessageType.ERROR:
1106 fail[:] = (ValueError, msg.get_structure()['debug'], None)
583b7e4a 1107 loop.quit()
3bf73acf 1108 elif msg.type == GS.MessageType.EOS:
583b7e4a
MW
1109 loop.quit()
1110 bmid = bus.connect('message', bus_message)
1111
1112 ## Get everything ready and let it go.
3bf73acf
MW
1113 bus.add_signal_watch()
1114 pipe.set_state(GS.State.PLAYING)
583b7e4a
MW
1115 with GStreamerProgressEyecandy(filestatus(master,
1116 'convert to %s' % me.NAME),
1117 pipe):
1118 loop.run()
3bf73acf
MW
1119 pipe.set_state(GS.State.NULL)
1120 bus.remove_signal_watch()
1121 bus.disconnect(bmid)
583b7e4a
MW
1122 if fail:
1123 raise fail[0], fail[1], fail[2]
1124
1125 ## Fix up the output file if we have to.
1126 me.fixup(new)
1127
1128 ## We're done.
1129 OS.rename(new, target)
1130
1131class OggVorbisFormat (AudioFormat):
1132 "AudioFormat object for Ogg Vorbis."
1133
00eb0a5b 1134 ## From https://en.wikipedia.org/wiki/Vorbis
583b7e4a
MW
1135 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1136 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1137 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1138
1139 NAME = 'Ogg Vorbis'
1140 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1141 'audio/x-vorbis+ogg'])
1142 EXT = 'ogg'
1143
1144 def encoder_chain(me):
3589c4a4
MW
1145 encprops = {}
1146 if me.bitrate is not None:
1147 for q, br in me.QMAP:
1148 if br >= me.bitrate:
1149 break
cf3c562e
MW
1150 else:
1151 raise ValueError, 'no suitable quality setting found'
3589c4a4
MW
1152 encprops['quality'] = q/10.0
1153 return [make_element('vorbisenc', **encprops),
583b7e4a
MW
1154 make_element('oggmux')]
1155
1156defformat('ogg-vorbis', OggVorbisFormat)
1157
1158class MP3Format (AudioFormat):
1159 "AudioFormat object for MP3."
1160
1161 NAME = 'MP3'
1162 MIMETYPES = set(['audio/mpeg'])
1163 EXT = 'mp3'
1164
1165 def encoder_chain(me):
3589c4a4 1166 encprops = {}
3bf73acf
MW
1167 if me.bitrate is not None:
1168 encprops['bitrate'] = me.bitrate
1169 encprops['target'] = 'bitrate'
1170 else:
1171 encprops['quality'] = 4
1172 encprops['target'] = 'quality'
1173 return [make_element('lamemp3enc', quality = 4, **encprops),
583b7e4a
MW
1174 make_element('xingmux'),
1175 make_element('id3v2mux')]
1176
1177 def fixup(me, path):
1178 """
1179 Fix up MP3 files.
1180
1181 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1182 unkind to stupid players.
1183 """
608b936e
MW
1184 f = E3.load(path)
1185 if f is None: return
1186 t = f.tag
1187 if t is None: return
1188 for v in [E3.id3.ID3_V2_3, E3.id3.ID3_V1]:
1189 try: f.tag.save(version = v)
1190 except (UnicodeEncodeError,
1191 E3.id3.GenreException,
1192 E3.id3.TagException):
1193 pass
583b7e4a
MW
1194
1195defformat('mp3', MP3Format)
1196
1197###--------------------------------------------------------------------------
1198### Image handling, based on the Python Imaging Library.
1199
1200class ImageIdentifier (object):
1201 """
1202 Analyses and identifies an image file.
1203
1204 Simply leaves an Image object in the `img' property which can be inspected.
1205 """
1206
1207 def __init__(me, file, mime):
1208
1209 ## Get PIL to open the file. It will magically work out what kind of
1210 ## file it is.
1211 try:
1212 me.img = I.open(file)
1213 except IOError, exc:
1214
1215 ## Unhelpful thing to raise on identification failure. We can
1216 ## distinguish this from an actual I/O error because it doesn't have an
1217 ## `errno'.
1218 if exc.errno is None:
1219 raise IdentificationFailure
1220 raise
1221
1222 me.mime = set([mime])
1223
1224class ImageFormat (BaseFormat):
1225 """
1226 An ImageFormat is a kind of Format specialized for image files.
1227
1228 Subclasses don't need to provide anything other than the properties
1229 required by all concrete Format subclasses. However, there is a
1230 requirement that the `NAME' property match PIL's `format' name for the
1231 format.
1232 """
1233
1234 PROPS = prop('size', Num)
1235 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1236
1237 def __init__(me, size = None, **kw):
1238 """
1239 Initialize an ImageFormat object.
1240
1241 Additional keywords are used when encoding, and may be recognized by
1242 enhanced `check' methods in subclasses.
1243 """
1244 me._size = size
1245 me._props = kw
1246
1247 def check(me, id):
1248 "Check whether the ImageIdentifier ID matches our requirements."
1249 return id.img.format == me.NAME and \
1250 (me._size is None or
1251 (id.img.size[0] <= me._size and
1252 id.img.size[1] <= me._size))
1253
1254 def convert(me, master, id, target):
1255 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1256
1257 ## Write to a scratch file.
1258 new = target + '.new'
1259
1260 ## The ImageIdentifier already contains a copy of the open file. It
1261 ## would be wasteful not to use it.
1262 img = id.img
1263 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1264
1265 ## If there's a stated maximum size then scale the image down to match.
1266 ## But thumbnailing clobbers the original, so take a copy.
1267 if me._size is not None and \
1268 (img.size[0] > me._size or img.size[1] > me._size):
1269 img = img.copy()
1270 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1271
1272 ## Write the output image.
1273 img.save(new, me.NAME, **me._props)
1274
1275 ## Fix it up if necessary.
1276 me.fixup(new)
1277
1278 ## We're done.
1279 OS.rename(new, target)
1280 STATUS.commit()
1281
1282class JPEGFormat (ImageFormat):
1283 """
1284 Image format for JPEG (actually JFIF) files.
1285
1286 Interesting properties to set:
1287
1288 optimize
1289 If present, take a second pass to select optimal encoder settings.
1290
b524aa9d 1291 progressive
583b7e4a
MW
1292 If present, make a progressive file.
1293
1294 quality Integer from 1--100 (worst to best); default is 75.
1295 """
1296 EXT = 'jpg'
1297 NAME = 'JPEG'
1298 PROPS = prop('optimize', None) \
1299 | prop('progressive', None, 'progression') \
1300 | prop('quality', Num)
1301
1302defformat('jpeg', JPEGFormat)
1303
1304class PNGFormat (ImageFormat):
1305 """
1306 Image format for PNG files.
1307
1308 Interesting properties:
1309
1310 optimize
1311 If present, make a special effort to minimize the output file.
1312 """
1313 EXT = 'png'
1314 NAME = 'PNG'
1315 PROPS = prop('optimize', None)
1316
1317defformat('png', PNGFormat)
1318
1319class BMPFormat (ImageFormat):
1320 """
1321 Image format for Windows BMP files, as used by RockBox.
1322
1323 No additional properties.
1324 """
1325 NAME = 'BMP'
1326 EXT = 'bmp'
1327
1328defformat('bmp', BMPFormat)
1329
1330###--------------------------------------------------------------------------
e0361afb
MW
1331### Remaining parsing machinery.
1332
1333Type = K('type') - Name - D('{') - R(Policy) - D('}')
1334def build_type(s, l, t):
1335 try:
1336 cat = CATEGORYMAP[t[0]]
1337 except KeyError:
1338 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1339 pols = t[1]
1340 if len(pols) == 1: pol = pols[0]
1341 else: pol = AndPolicy(pols)
1342 pol.setcategory(cat)
1343 return pol
1344Type.setParseAction(build_type)
1345
1346TARGETS = []
1347class TargetJob (object):
1348 def __init__(me, targetdir, policies):
1349 me.targetdir = targetdir
1350 me.policies = policies
1351 def perform(me):
1352 TARGETS.append(me)
1353
1354Target = K('target') - String - D('{') - R(Type) - D('}')
1355def build_target(s, l, t):
1356 return TargetJob(t[0], t[1])
1357Target.setParseAction(build_target)
1358
1359VARS = { 'master': None }
1360class VarsJob (object):
1361 def __init__(me, vars):
1362 me.vars = vars
1363 def perform(me):
1364 for k, v in me.vars:
1365 VARS[k] = v
1366
1367Var = prop('master', String)
1368Vars = K('vars') - D('{') - R(Var) - D('}')
1369def build_vars(s, l, t):
1370 return VarsJob(t[0])
1371Vars.setParseAction(build_vars)
1372
1373TopLevel = Vars | Target
1374Config = R(TopLevel)
1375Config.ignore(P.pythonStyleComment)
1376
1377###--------------------------------------------------------------------------
583b7e4a
MW
1378### The directory grobbler.
1379
e0361afb 1380def grobble(master, targets, noact = False):
583b7e4a 1381 """
e0361afb
MW
1382 Work through the MASTER directory, writing converted files to TARGETS.
1383
1384 The TARGETS are a list of `TargetJob' objects, each describing a target
1385 directory and a policy to apply to it.
1386
1387 If NOACT is true, then don't actually do anything permanent to the
1388 filesystem.
583b7e4a
MW
1389 """
1390
e0361afb
MW
1391 ## Transform the targets into a more convenient data structure.
1392 tpolmap = []
1393 for t in targets:
1394 pmap = {}
1395 tpolmap.append(pmap)
1396 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
583b7e4a 1397
e0361afb
MW
1398 ## Keep track of the current position in the master tree.
1399 dirs = []
583b7e4a 1400
e0361afb
MW
1401 ## And the files which haven't worked.
1402 broken = []
583b7e4a 1403
e0361afb
MW
1404 def grobble_file(master, pmap, targetdir, cohorts):
1405 ## Convert MASTER, writing the result to TARGETDIR.
1406 ##
1407 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1408 ## a list of (FILENAME, ID) pairs.
1409 ##
1410 ## Since this function might convert the MASTER file, the caller doesn't
1411 ## know the name of the output files, so we return then as a list.
583b7e4a
MW
1412
1413 done = set()
1414 st_m = OS.stat(master)
1415
1416 ## Work through each category listed and apply its policy.
1417 for cat, id, cohort in cohorts:
1418
1419 ## Go through the category's policies and see if any match. If we fail
1420 ## here, see if there are more categories to try.
e0361afb 1421 for pol in pmap[cat]:
583b7e4a
MW
1422 acts = pol.actions(master, targetdir, id, cohort)
1423 if acts: break
1424 else:
1425 continue
1426
1427 ## Work through the targets one by one.
1428 for a in acts:
1429 done.add(a.target)
1430
1431 ## Find out whether the target file already exists and is up-to-date
1432 ## with respect to the master. (Caution here with low-resolution
1433 ## timestamps.) If it's OK, then just move on.
1434 try:
1435 st_t = OS.stat(a.target)
1436 if st_m.st_mtime < st_t.st_mtime or \
1437 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1438 continue
1439 except OSError, err:
1440 if err.errno not in (E.ENOENT, E.ENOTDIR):
1441 raise
1442
1443 ## We have real work to do. If there's a current status message,
1444 ## it's the containing directory so flush it so that people know
1445 ## where we are.
1446 STATUS.commit()
1447
1448 ## Remove the target. (A hardlink will fail if the target already
1449 ## exists.)
e0361afb 1450 if not noact:
583b7e4a
MW
1451 try:
1452 OS.unlink(a.target)
1453 except OSError, err:
1454 if err.errno not in (E.ENOENT, E.ENOTDIR):
1455 raise
1456
1457 ## Do whatever it is we decided to do.
e0361afb 1458 if noact:
583b7e4a
MW
1459 STATUS.commit(filestatus(master, a))
1460 else:
1461 a.perform()
1462
1463 ## We're done. Return the names of the targets.
1464 return list(done)
1465
1466 @contextmanager
e0361afb
MW
1467 def wrap(masterfile):
1468 ## Handle exceptions found while trying to convert a particular file or
1469 ## directory.
583b7e4a
MW
1470
1471 try:
1472 yield masterfile
1473
1474 ## Something bad happened. Report the error, but continue. (This list
1475 ## of exceptions needs a lot of work.)
1476 except (IOError, OSError), exc:
1477 STATUS.clear()
1478 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
e0361afb 1479 broken.append((masterfile, exc))
583b7e4a 1480
e0361afb
MW
1481 def grobble_dir(master, targets):
1482 ## Recursively convert files in MASTER, writing them to the TARGETS.
583b7e4a 1483
e0361afb
MW
1484 ## Keep track of the subdirectories we encounter, because we'll need to
1485 ## do all of those in one go at the end.
1486 subdirs = set()
583b7e4a 1487
e0361afb
MW
1488 ## Work through each target directory in turn.
1489 for target, pmap in zip(targets, tpolmap):
583b7e4a 1490
e0361afb
MW
1491 ## Make sure the TARGET exists and is a directory. It's a fundamental
1492 ## assumption of this program that the entire TARGET tree is
1493 ## disposable, so if something exists but isn't a directory, we should
1494 ## kill it.
1495 if OS.path.isdir(target):
1496 pass
1497 else:
1498 if OS.path.exists(target):
1499 STATUS.commit(filestatus(target, 'clear nondirectory'))
1500 if not noact:
1501 OS.unlink(target)
1502 STATUS.commit(filestatus(target, 'create directory'))
1503 if not noact:
1504 OS.mkdir(target)
1505
1506 ## Keep a list of things in the target. As we convert files, we'll
1507 ## check them off. Anything left over is rubbish and needs to be
1508 ## deleted.
1509 checklist = {}
1510 try:
1511 for i in OS.listdir(target):
1512 checklist[i] = False
1513 except OSError, err:
1514 if err.errno not in (E.ENOENT, E.ENOTDIR):
1515 raise
1516
1517 ## Keep track of the files in each category.
1518 catmap = {}
1519 todo = []
1520 done = []
1521
1522 ## Work through the master files.
1523 for f in sorted(OS.listdir(master)):
1524
1525 ## If the killswitch has been pulled then stop. The whole idea is
1526 ## that we want to cause a clean shutdown if possible, so we don't
1527 ## want to do it in the middle of encoding because the encoding
1528 ## effort will have been wasted. This is the only place we need to
1529 ## check. If we've exited the loop, then clearing old files will
1530 ## probably be fast, and we'll either end up here when the recursive
1531 ## call returns or we'll be in the same boat as before, clearing old
1532 ## files, only up a level. If worst comes to worst, we'll be killed
1533 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1534 ## it left off.
1535 if KILLSWITCH.is_set():
1536 return
1537
1538 ## Do something with the file.
1539 with wrap(OS.path.join(master, f)) as masterfile:
1540
1541 ## If it's a directory then prepare to grobble it recursively, but
1542 ## don't do that yet.
1543 if OS.path.isdir(masterfile):
1544 subdirs.add(f)
1545 done.append(OS.path.join(target, f))
1546
1547 ## Otherwise it's a file. Work out what kind, and stash it under
1548 ## the appropriate categories. Later, we'll apply policy to the
1549 ## files, by category, and work out what to do with them all.
1550 else:
3bf73acf
MW
1551 mime = GIO.file_new_for_path(masterfile) \
1552 .query_info('standard::content-type', 0) \
2a236d3a 1553 .get_content_type()
e0361afb
MW
1554 cats = []
1555 for cat in pmap.iterkeys():
1556 id = cat.identify(masterfile, mime)
1557 if id is None: continue
1558 catmap.setdefault(cat, []).append((masterfile, id))
1559 cats.append((cat, id))
1560 if not cats:
1561 catmap.setdefault(None, []).append((masterfile, id))
1562 todo.append((masterfile, cats))
1563
1564 ## Work through the categorized files to see what actions to do for
1565 ## them.
1566 for masterfile, cats in todo:
1567 with wrap(masterfile):
1568 done += grobble_file(masterfile, pmap, target,
1569 [(cat, id, catmap[cat]) for cat, id in cats])
1570
1571 ## Check the results off the list so that we don't clear it later.
1572 for f in done:
1573 checklist[OS.path.basename(f)] = True
1574
1575 ## Maybe there's stuff in the target which isn't accounted for. Delete
1576 ## it: either the master has changed, or the policy for this target has
1577 ## changed. Either way, the old files aren't wanted.
1578 for f in checklist:
1579 if not checklist[f]:
1580 STATUS.commit(filestatus(f, 'clear bogus file'))
1581 if not noact:
1582 bogus = OS.path.join(target, f)
1583 try:
1584 if OS.path.isdir(bogus):
1585 SH.rmtree(bogus)
1586 else:
1587 OS.unlink(bogus)
1588 except OSError, err:
1589 if err.errno != E.ENOENT:
1590 raise
1591
1592 ## If there are subdirectories which want processing then do those.
1593 ## Keep the user amused by telling him where we are in the tree.
1594 for d in sorted(subdirs):
1595 dirs.append(d)
1596 STATUS.set('/'.join(dirs))
1597 with wrap(OS.path.join(master, d)) as masterdir:
1598 try:
1599 grobble_dir(masterdir,
1600 [OS.path.join(target, d) for target in targets])
1601 finally:
1602 dirs.pop()
1603 STATUS.set('/'.join(dirs))
583b7e4a 1604
e0361afb
MW
1605 ## Right. We're ready to go.
1606 grobble_dir(master, [t.targetdir for t in targets])
1607 return broken
583b7e4a
MW
1608
1609###--------------------------------------------------------------------------
1610### Command-line interface.
1611
1612QUIS = OS.path.basename(SYS.argv[0])
1613
1614def moan(msg):
1615 "Report a warning message to the user."
1616 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1617
1618def die(msg):
1619 "Report a fatal error message to the user."
1620 moan(msg)
1621 SYS.exit(1)
1622
1623def parse_opts(args):
1624 """
1625 Parse command-line arguments in ARGS.
1626
1627 Returns a Grobbler object and the MASTER and TARGET directories to be
1628 grobbled.
1629 """
1630
1631 ## Build the option parser object.
1632 op = OP.OptionParser(prog = QUIS, version = VERSION,
5379ab85
MW
1633 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1634 'CONFIG',
583b7e4a
MW
1635 description = """\
1636Convert a directory tree of files according to the configuration file
1637CONFIG.
1638""")
1639
1640 ## Timeout handling.
1641 def cb_time(opt, ostr, arg, op):
1642 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1643 if not m:
1644 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1645 t, u = m.groups()
1646 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1647 setattr(op.values, opt.dest, t)
1648 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1649 dest = 'timeout',
1650 help = 'stop processing nicely after SECS',
1651 action = 'callback', callback = cb_time)
1652 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1653 dest = 'timeout_nasty',
1654 help = 'stop processing unpleasantly after further SECS',
1655 action = 'callback', callback = cb_time)
1656
1657 ## Other options.
1658 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1659 help = 'provide progress information')
1660 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1661 help = 'don\'t actually modify the filesystem')
1662
1663 ## Ready to rock.
1664 op.set_defaults(formats = [], noact = False,
1665 timeout = None, timeout_nasty = 300)
1666 opts, args = op.parse_args(args)
1667
1668 ## Check that we got the non-option arguments that we want.
1669 if len(args) != 1:
1670 op.error('wrong number of arguments')
1671
1672 ## Act on the options.
1673 if opts.tty:
1674 STATUS.eyecandyp = True
1675 if opts.timeout is not None:
1676 to = TH.Thread(target = timeout,
1677 args = (opts.timeout, opts.timeout_nasty))
1678 to.daemon = True
1679 to.start()
1680
1681 ## Parse the configuration file.
1682 with open(args[0]) as conf:
1683 jobs, = Config.parseFile(conf, True)
1684 for j in jobs:
1685 j.perform()
1686
1687 return opts
1688
1689if __name__ == '__main__':
1690 opts = parse_opts(SYS.argv[1:])
1691 if 'master' not in VARS:
1692 die("no master directory set")
e0361afb 1693 broken = grobble(VARS['master'], TARGETS, opts.noact)
583b7e4a
MW
1694 if broken:
1695 moan('failed to convert some files:')
1696 for file, exc in broken:
1697 moan('%s: %s' % (file, exc))
1698 SYS.exit(1)
1699
1700 ## This is basically a successful completion: we did what we were asked to
1701 ## do. It seems polite to report a message, though.
1702 ##
1703 ## Why don't we have a nonzero exit status? The idea would be that a
1704 ## calling script would be interested that we used up all of our time, and
1705 ## not attempt to convert some other directory as well. But that doesn't
1706 ## quite work. Such a script would need to account correctly for time we
1707 ## had spent even if we complete successfully. And if the script is having
1708 ## to watch the clock itself, it can do that without our help here.
1709 if KILLSWITCH.is_set():
1710 moan('killed by timeout')
1711
1712###----- That's all, folks --------------------------------------------------